ddr-extraction 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +33 -56
- data/lib/ddr/extraction.rb +15 -3
- data/lib/ddr/extraction/adapters.rb +43 -9
- data/lib/ddr/extraction/adapters/adapter.rb +55 -0
- data/lib/ddr/extraction/adapters/fits_adapter.rb +12 -8
- data/lib/ddr/extraction/adapters/null_adapter.rb +21 -0
- data/lib/ddr/extraction/adapters/registry.rb +42 -0
- data/lib/ddr/extraction/adapters/tika_adapter.rb +22 -10
- data/lib/ddr/extraction/configuration.rb +1 -7
- data/lib/ddr/extraction/defaults.rb +1 -2
- data/lib/ddr/extraction/extractor.rb +26 -12
- data/lib/ddr/extraction/version.rb +1 -1
- data/spec/unit/extractor_spec.rb +6 -4
- metadata +5 -3
- data/lib/ddr/extraction/adapter.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2975444782fb458e450a8acb9ec54a690d457241
|
4
|
+
data.tar.gz: 5d2cedbf73284f32b7c5497d6a49d56aaaa1c0ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f72b6e7224081f193cbf4b8977167d84f995e467346d60590c48133746e74cd29622ee8d1d6fb3dbc426ec37339ec5e7985c08d5e3755268b1a46dac02bb6f07
|
7
|
+
data.tar.gz: ec401d6d43201348ac80bf5ce13dfcfd854918ed39cb49bdc990c86fd3b682ced86db425387e481f57e9f39bdbd6e7623ee88b44d8f5674052e5e8d07cabb8d5
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Pluggable file text and metadata extraction service.
|
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem 'ddr-
|
9
|
+
gem 'ddr-extraction'
|
10
10
|
|
11
11
|
And then execute:
|
12
12
|
|
@@ -14,68 +14,45 @@ And then execute:
|
|
14
14
|
|
15
15
|
Or install it yourself as:
|
16
16
|
|
17
|
-
$ gem install ddr-
|
17
|
+
$ gem install ddr-extraction
|
18
18
|
|
19
|
-
##
|
19
|
+
## Dependencies
|
20
|
+
|
21
|
+
The gem has no external dependencies of its own. Consult the documentation for each extraction tool used by your configuration.
|
22
|
+
|
23
|
+
## Configuration
|
24
|
+
|
25
|
+
`Ddr::Extraction` includes default configurations for [Aapche Tika](http://tika.apache.org/) (text and metadata extraction) and [FITS](http://fitstool.org/) (metadata only). Tika is set as the default adapter when one is not specified to the builder.
|
20
26
|
|
27
|
+
```ruby
|
28
|
+
require "ddr-extraction
|
29
|
+
Ddr::Extraction.load_defaults!
|
21
30
|
```
|
22
|
-
>> extractor = Ddr::Extraction::Extractor.new
|
23
|
-
=> #<Ddr::Extraction::Extractor:0x007fc2851dcfa0>
|
24
31
|
|
25
|
-
|
26
|
-
|
32
|
+
There are rake tasks for downloading Tika and FITS to expected locations.
|
33
|
+
|
34
|
+
```sh
|
35
|
+
rake tika:download
|
36
|
+
rake fits:download
|
37
|
+
```
|
38
|
+
|
39
|
+
Configuration Example
|
27
40
|
|
41
|
+
```ruby
|
42
|
+
Ddr::Extraction.configure do |config|
|
43
|
+
config.adapters.default = :tika # Use Tika as the default adapter
|
44
|
+
config.adapters.tika.path = "/path/to/tika-app.jar"
|
45
|
+
config.adapters.fits.path = "/path/to/fits.sh"
|
46
|
+
end
|
47
|
+
```
|
48
|
+
|
49
|
+
## Usage
|
50
|
+
|
51
|
+
```
|
52
|
+
>> extractor = Ddr::Extraction.build_extractor
|
53
|
+
>> text = extractor.extract(:text, "spec/fixtures/sample.docx")
|
28
54
|
>> puts text.read
|
29
55
|
This is a sample document.
|
30
|
-
|
31
|
-
>> metadata = extractor.extract(:metadata, "spec/fixtures/blue-devil.png")
|
32
|
-
=> #<IO:fd 12>
|
33
|
-
|
34
|
-
>> puts metadata.read
|
35
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
36
|
-
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.3" timestamp="11/12/14 12:36 PM">
|
37
|
-
<identification>
|
38
|
-
<identity format="Portable Network Graphics" mimetype="image/png" toolname="FITS" toolversion="0.8.3">
|
39
|
-
<tool toolname="Exiftool" toolversion="9.13" />
|
40
|
-
<tool toolname="Droid" toolversion="6.1.3" />
|
41
|
-
<tool toolname="ffident" toolversion="0.2" />
|
42
|
-
<tool toolname="Tika" toolversion="1.3" />
|
43
|
-
<version toolname="Droid" toolversion="6.1.3">1.0</version>
|
44
|
-
<externalIdentifier toolname="Droid" toolversion="6.1.3" type="puid">fmt/11</externalIdentifier>
|
45
|
-
</identity>
|
46
|
-
</identification>
|
47
|
-
<fileinfo>
|
48
|
-
<lastmodified toolname="Exiftool" toolversion="9.13" status="SINGLE_RESULT">2014:11:12 12:24:18-05:00</lastmodified>
|
49
|
-
<filepath toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">/path/to/spec/fixtures/blue-devil.png</filepath>
|
50
|
-
<filename toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">blue-devil.png</filename>
|
51
|
-
<size toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">75005</size>
|
52
|
-
<md5checksum toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">e6a5d16da2fbe65311952e2d8b04f069</md5checksum>
|
53
|
-
<fslastmodified toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">1415813058000</fslastmodified>
|
54
|
-
</fileinfo>
|
55
|
-
<filestatus />
|
56
|
-
<metadata>
|
57
|
-
<image>
|
58
|
-
<compressionScheme toolname="Exiftool" toolversion="9.13" status="CONFLICT">Deflate/Inflate</compressionScheme>
|
59
|
-
<compressionScheme toolname="Tika" toolversion="1.3" status="CONFLICT">Deflate</compressionScheme>
|
60
|
-
<imageWidth toolname="Exiftool" toolversion="9.13">200</imageWidth>
|
61
|
-
<imageHeight toolname="Exiftool" toolversion="9.13">200</imageHeight>
|
62
|
-
<orientation toolname="Tika" toolversion="1.3" status="SINGLE_RESULT">normal*</orientation>
|
63
|
-
</image>
|
64
|
-
</metadata>
|
65
|
-
<statistics fitsExecutionTime="791">
|
66
|
-
<tool toolname="OIS Audio Information" toolversion="0.1" status="did not run" />
|
67
|
-
<tool toolname="ADL Tool" toolversion="0.1" status="did not run" />
|
68
|
-
<tool toolname="Jhove" toolversion="1.5" executionTime="556" />
|
69
|
-
<tool toolname="file utility" toolversion="5.04" executionTime="623" />
|
70
|
-
<tool toolname="Exiftool" toolversion="9.13" executionTime="664" />
|
71
|
-
<tool toolname="Droid" toolversion="6.1.3" executionTime="147" />
|
72
|
-
<tool toolname="NLNZ Metadata Extractor" toolversion="3.4GA" executionTime="366" />
|
73
|
-
<tool toolname="OIS File Information" toolversion="0.2" executionTime="142" />
|
74
|
-
<tool toolname="OIS XML Metadata" toolversion="0.2" status="did not run" />
|
75
|
-
<tool toolname="ffident" toolversion="0.2" executionTime="369" />
|
76
|
-
<tool toolname="Tika" toolversion="1.3" executionTime="356" />
|
77
|
-
</statistics>
|
78
|
-
</fits>
|
79
56
|
```
|
80
57
|
|
81
58
|
## Contributing
|
data/lib/ddr/extraction.rb
CHANGED
@@ -1,26 +1,38 @@
|
|
1
1
|
require_relative "extraction/version"
|
2
2
|
require_relative "extraction/configuration"
|
3
3
|
require_relative "extraction/extractor"
|
4
|
+
require_relative "extraction/adapters"
|
4
5
|
|
5
6
|
module Ddr
|
6
7
|
#
|
7
|
-
# Ddr::Extraction - A
|
8
|
+
# Ddr::Extraction - A pluggable content extraction service.
|
8
9
|
#
|
9
10
|
module Extraction
|
10
11
|
|
11
12
|
class << self
|
12
13
|
|
14
|
+
# Returns the service configuration
|
13
15
|
def config
|
14
16
|
@config ||= Configuration.new
|
15
17
|
end
|
16
18
|
|
17
|
-
# Yields
|
19
|
+
# Yields the service configuration to a block
|
18
20
|
def configure
|
19
21
|
yield config
|
20
22
|
end
|
21
|
-
|
23
|
+
|
24
|
+
# Loads default configuration settings
|
25
|
+
def load_defaults!
|
26
|
+
require_relative "extraction/defaults"
|
27
|
+
end
|
28
|
+
|
29
|
+
def build_extractor(adapter_name = nil)
|
30
|
+
Extractor.build(adapter_name)
|
31
|
+
end
|
32
|
+
|
22
33
|
end
|
23
34
|
|
24
35
|
end
|
25
36
|
end
|
26
37
|
|
38
|
+
Dir[File.join(__dir__, "extraction", "adapters", "*_adapter.rb")].each { |adapter| require(adapter) }
|
@@ -1,25 +1,59 @@
|
|
1
|
+
require_relative "adapters/registry"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
6
|
|
5
|
-
KNOWN_ADAPTERS = [:fits, :tika]
|
6
|
-
|
7
7
|
class << self
|
8
|
+
|
9
|
+
# Accessor for the name of the default adapter
|
10
|
+
attr_accessor :default
|
11
|
+
|
12
|
+
# Return the requested adapter by name.
|
13
|
+
# If a name is not supplied, return the default adapter.
|
14
|
+
# @see .get_default_adapter
|
15
|
+
#
|
16
|
+
# @param adapter_name [Symbol] the name of the requested adapter.
|
17
|
+
# @return [Class] the adapter class requested.
|
18
|
+
def get_adapter(adapter_name = nil)
|
19
|
+
if adapter_name
|
20
|
+
Registry.instance.adapters[adapter_name.to_sym]
|
21
|
+
else
|
22
|
+
get_default_adapter
|
23
|
+
end
|
24
|
+
end
|
8
25
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
26
|
+
# Return the default adapter.
|
27
|
+
# Raises an exception if the default adapter has not been configured.
|
28
|
+
def get_default_adapter
|
29
|
+
raise "The default adapter has not been configured." unless default
|
30
|
+
get_adapter(default)
|
13
31
|
end
|
14
32
|
|
15
|
-
|
16
|
-
|
17
|
-
|
33
|
+
# Registers an adapter.
|
34
|
+
# @see Registry#register
|
35
|
+
#
|
36
|
+
# @param name [Symbol] the name of the adapter.
|
37
|
+
# @param adapter [Class] the adapter class to register.
|
38
|
+
def register(name, adapter)
|
39
|
+
Registry.instance.register(name, adapter)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Creates methods to access each adapter.
|
43
|
+
Registry.instance.adapters.each do |name, adapter|
|
44
|
+
define_method(name) do
|
45
|
+
adapter
|
18
46
|
end
|
19
47
|
end
|
20
48
|
|
49
|
+
def method_missing(name, *args)
|
50
|
+
return get_adapter(name) if Registry.instance.adapters.key?(name.to_sym)
|
51
|
+
super
|
52
|
+
end
|
21
53
|
end
|
22
54
|
|
23
55
|
end
|
24
56
|
end
|
25
57
|
end
|
58
|
+
|
59
|
+
Dir[File.join(__dir__, "adapters", "*_adapter.rb")].each { |adapter| require(adapter) }
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Ddr
|
2
|
+
module Extraction
|
3
|
+
module Adapters
|
4
|
+
class Adapter
|
5
|
+
|
6
|
+
# Supported extraction output types
|
7
|
+
OUTPUT_TYPES = [:text, :metadata]
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Register the adapter
|
11
|
+
def register(adapter_name)
|
12
|
+
Ddr::Extraction::Adapters.register(adapter_name, self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Extract a kind of output from the file path
|
17
|
+
#
|
18
|
+
# @param output [Symbol] the kind of output, `:text` or `:metadata`
|
19
|
+
# @param file_path [String] path to the file to be processed
|
20
|
+
# @return [IO] the result of the extraction
|
21
|
+
# @api public
|
22
|
+
def extract(output, file_path)
|
23
|
+
raise ArgumentError, "Output type must be one of #{OUTPUT_TYPES}." unless OUTPUT_TYPES.include?(output)
|
24
|
+
raise IOError, "File not found: #{file_path}" unless File.exist?(file_path)
|
25
|
+
execute(command(output, file_path))
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# Returns the command to be executed
|
31
|
+
#
|
32
|
+
# @param output [Symbol] the kind of output.
|
33
|
+
# @param file_path [String] path to the file to be processed.
|
34
|
+
# @return [String, Array] the command as a String or Array
|
35
|
+
# @see #extract
|
36
|
+
# @see #execute
|
37
|
+
# @api private
|
38
|
+
def command(output, file_path)
|
39
|
+
raise NotImplementedError, "The `command' instance method must be implemented by the adapter."
|
40
|
+
end
|
41
|
+
|
42
|
+
# Executes the command in a subprocess.
|
43
|
+
#
|
44
|
+
# @param cmd [String, Array] the command as a String or Array
|
45
|
+
# @see Ruby documentation for IO.popen
|
46
|
+
# @return [IO] the output of the command.
|
47
|
+
# @api private
|
48
|
+
def execute(cmd)
|
49
|
+
IO.popen(cmd)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,22 +1,26 @@
|
|
1
|
+
require_relative "adapter"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
|
-
class FitsAdapter
|
6
|
+
class FitsAdapter < Adapter
|
5
7
|
|
6
|
-
|
7
|
-
#
|
8
|
-
# @param file [String] the file from which to extract metadata.
|
9
|
-
# @return [IO] the output
|
10
|
-
def extract_metadata(file)
|
11
|
-
IO.popen([self.class.path, "-i", file])
|
12
|
-
end
|
8
|
+
register :fits
|
13
9
|
|
14
10
|
class << self
|
15
11
|
# Path to FITS executable (fits.sh or fits.bat)
|
16
12
|
attr_accessor :path
|
17
13
|
end
|
18
14
|
|
15
|
+
private
|
16
|
+
|
17
|
+
def command(output, file_path)
|
18
|
+
raise "This adapter only supports :metadata output." unless output == :metadata
|
19
|
+
[self.class.path, "-i", file_path]
|
20
|
+
end
|
21
|
+
|
19
22
|
end
|
20
23
|
end
|
21
24
|
end
|
22
25
|
end
|
26
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "singleton"
|
2
|
+
require_relative "adapter"
|
3
|
+
|
4
|
+
module Ddr
|
5
|
+
module Extraction
|
6
|
+
module Adapters
|
7
|
+
#
|
8
|
+
# Registry of adapter names and classes
|
9
|
+
#
|
10
|
+
class Registry
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
attr_reader :adapters
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@adapters = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
# Registers an adapter
|
20
|
+
#
|
21
|
+
# @param name [Symbol] the name of the adapter.
|
22
|
+
# @param adapter [Class] the adapter to be registered.
|
23
|
+
def register(name, adapter)
|
24
|
+
name = name.to_sym
|
25
|
+
validate!(name, adapter)
|
26
|
+
adapters[name] = adapter
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def validate!(name, adapter)
|
32
|
+
raise "Another adapter is registered under the name :#{name}." if adapters.key?(name)
|
33
|
+
unless adapter < Adapter
|
34
|
+
raise ArgumentError, "Only subclasses of Ddr::Extraction::Adapters::Adapter may be registered."
|
35
|
+
end
|
36
|
+
raise "The adapter #{adapter.to_s} is already registered." if adapters.value?(adapter)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,15 +1,11 @@
|
|
1
|
+
require_relative "adapter"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
|
-
class TikaAdapter
|
5
|
-
|
6
|
-
|
7
|
-
#
|
8
|
-
# @param file [String] path to file from which to extract text
|
9
|
-
# @return [IO] the output
|
10
|
-
def extract_text(file)
|
11
|
-
IO.popen(["java", "-jar", self.class.path, "--text", file])
|
12
|
-
end
|
6
|
+
class TikaAdapter < Adapter
|
7
|
+
|
8
|
+
register :tika
|
13
9
|
|
14
10
|
class << self
|
15
11
|
# Path to tika-app.jar
|
@@ -17,9 +13,25 @@ module Ddr
|
|
17
13
|
|
18
14
|
# Tika server port (optional, required for server)
|
19
15
|
attr_accessor :port
|
20
|
-
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def command(output, file_path)
|
21
|
+
["java", "-jar", self.class.path, output_options(output), file_path].flatten
|
22
|
+
end
|
23
|
+
|
24
|
+
def output_options(output)
|
25
|
+
case output
|
26
|
+
when :text
|
27
|
+
"--text"
|
28
|
+
when :metadata
|
29
|
+
["--metadata", "--xml"]
|
30
|
+
end
|
31
|
+
end
|
21
32
|
|
22
33
|
end
|
34
|
+
|
23
35
|
end
|
24
36
|
end
|
25
37
|
end
|
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative "adapter"
|
2
1
|
require_relative "adapters"
|
3
2
|
|
4
3
|
module Ddr
|
5
4
|
module Extraction
|
6
5
|
class Configuration
|
7
6
|
|
7
|
+
# Returns an object have settable attributes for adapters.
|
8
8
|
def adapters
|
9
9
|
config = Adapters
|
10
10
|
yield config if block_given?
|
11
11
|
config
|
12
12
|
end
|
13
13
|
|
14
|
-
def adapter
|
15
|
-
config = Adapter
|
16
|
-
yield config if block_given?
|
17
|
-
config
|
18
|
-
end
|
19
|
-
|
20
14
|
end
|
21
15
|
end
|
22
16
|
end
|
@@ -3,8 +3,7 @@ require "ddr-extraction"
|
|
3
3
|
bin_dir = File.expand_path("../../../../bin", __FILE__)
|
4
4
|
|
5
5
|
Ddr::Extraction.configure do |config|
|
6
|
-
config.
|
7
|
-
config.adapter.metadata = :fits
|
6
|
+
config.adapters.default = :tika
|
8
7
|
config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
|
9
8
|
config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
|
10
9
|
end
|
@@ -1,22 +1,36 @@
|
|
1
|
+
require "delegate"
|
1
2
|
require_relative "adapters"
|
2
3
|
|
3
4
|
module Ddr
|
4
5
|
module Extraction
|
5
|
-
|
6
|
+
#
|
7
|
+
# The Extractor is the main public class.
|
8
|
+
#
|
9
|
+
# It works by delegating to an adapter that does the real work.
|
10
|
+
#
|
11
|
+
# extractor = Ddr::Extraction::Extractor.build(:tika)
|
12
|
+
# text = extractor.extract(:text, "/path/to/text/file")
|
13
|
+
# puts text.read
|
14
|
+
# ...
|
15
|
+
#
|
16
|
+
class Extractor < ::SimpleDelegator
|
6
17
|
|
7
|
-
|
8
|
-
#
|
9
|
-
# @param type [Symbol] the type of content to extract, `:text` or `:metadata`.
|
10
|
-
# @param file [String] path to file from which to extract content.
|
11
|
-
# @return [IO] the output
|
12
|
-
def extract(type, file)
|
13
|
-
adapter(type).send("extract_#{type}", file)
|
14
|
-
end
|
18
|
+
class << self
|
15
19
|
|
16
|
-
|
20
|
+
# Returns/yields an extractor instance
|
21
|
+
#
|
22
|
+
# @param adapter_name [Symbol] the name of the adapter to plug in.
|
23
|
+
# If not given, a default adapter will be used, if
|
24
|
+
# Ddr::Extraction::Adapters.default has been set with
|
25
|
+
# the name of the default adapter.
|
26
|
+
#
|
27
|
+
def build(adapter_name = nil)
|
28
|
+
adapter = Adapters.get_adapter(adapter_name)
|
29
|
+
extractor = new(adapter.new)
|
30
|
+
yield extractor if block_given?
|
31
|
+
extractor
|
32
|
+
end
|
17
33
|
|
18
|
-
def adapter(type)
|
19
|
-
Adapter.build_adapter(type)
|
20
34
|
end
|
21
35
|
|
22
36
|
end
|
data/spec/unit/extractor_spec.rb
CHANGED
@@ -3,16 +3,18 @@ module Ddr
|
|
3
3
|
RSpec.describe Extractor do
|
4
4
|
|
5
5
|
describe "extracting text" do
|
6
|
-
|
6
|
+
subject { described_class.build(:tika) }
|
7
|
+
let(:file_path) { File.expand_path("../../fixtures/sample.docx", __FILE__) }
|
7
8
|
it "should extract the text content of the file" do
|
8
|
-
expect(subject.extract(:text,
|
9
|
+
expect(subject.extract(:text, file_path).read).to match(/This is a sample document./)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
12
13
|
describe "extracting metadata" do
|
13
|
-
|
14
|
+
subject { described_class.build(:tika) }
|
15
|
+
let(:file_path) { File.expand_path("../../fixtures/blue-devil.png", __FILE__) }
|
14
16
|
it "should extract technical metadata from the file" do
|
15
|
-
expect(subject.extract(:metadata,
|
17
|
+
expect(subject.extract(:metadata, file_path).read.length).to_not eq(0)
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-extraction
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Chandek-Stark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,9 +71,11 @@ files:
|
|
71
71
|
- ddr-extraction.gemspec
|
72
72
|
- lib/ddr-extraction.rb
|
73
73
|
- lib/ddr/extraction.rb
|
74
|
-
- lib/ddr/extraction/adapter.rb
|
75
74
|
- lib/ddr/extraction/adapters.rb
|
75
|
+
- lib/ddr/extraction/adapters/adapter.rb
|
76
76
|
- lib/ddr/extraction/adapters/fits_adapter.rb
|
77
|
+
- lib/ddr/extraction/adapters/null_adapter.rb
|
78
|
+
- lib/ddr/extraction/adapters/registry.rb
|
77
79
|
- lib/ddr/extraction/adapters/tika_adapter.rb
|
78
80
|
- lib/ddr/extraction/configuration.rb
|
79
81
|
- lib/ddr/extraction/defaults.rb
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "delegate"
|
2
|
-
require_relative "adapters"
|
3
|
-
|
4
|
-
module Ddr
|
5
|
-
module Extraction
|
6
|
-
class Adapter < ::SimpleDelegator
|
7
|
-
|
8
|
-
class << self
|
9
|
-
# Accessors for adapter types
|
10
|
-
attr_accessor :text, :metadata
|
11
|
-
|
12
|
-
def build_adapter(type)
|
13
|
-
adapter_name = send(type)
|
14
|
-
adapter = Adapters.get_adapter(adapter_name)
|
15
|
-
new(adapter.new)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|