ddr-extraction 0.2.1 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -56
- data/lib/ddr/extraction.rb +15 -3
- data/lib/ddr/extraction/adapters.rb +43 -9
- data/lib/ddr/extraction/adapters/adapter.rb +55 -0
- data/lib/ddr/extraction/adapters/fits_adapter.rb +12 -8
- data/lib/ddr/extraction/adapters/null_adapter.rb +21 -0
- data/lib/ddr/extraction/adapters/registry.rb +42 -0
- data/lib/ddr/extraction/adapters/tika_adapter.rb +22 -10
- data/lib/ddr/extraction/configuration.rb +1 -7
- data/lib/ddr/extraction/defaults.rb +1 -2
- data/lib/ddr/extraction/extractor.rb +26 -12
- data/lib/ddr/extraction/version.rb +1 -1
- data/spec/unit/extractor_spec.rb +6 -4
- metadata +5 -3
- data/lib/ddr/extraction/adapter.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2975444782fb458e450a8acb9ec54a690d457241
|
4
|
+
data.tar.gz: 5d2cedbf73284f32b7c5497d6a49d56aaaa1c0ee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f72b6e7224081f193cbf4b8977167d84f995e467346d60590c48133746e74cd29622ee8d1d6fb3dbc426ec37339ec5e7985c08d5e3755268b1a46dac02bb6f07
|
7
|
+
data.tar.gz: ec401d6d43201348ac80bf5ce13dfcfd854918ed39cb49bdc990c86fd3b682ced86db425387e481f57e9f39bdbd6e7623ee88b44d8f5674052e5e8d07cabb8d5
|
data/README.md
CHANGED
@@ -6,7 +6,7 @@ Pluggable file text and metadata extraction service.
|
|
6
6
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
|
-
gem 'ddr-
|
9
|
+
gem 'ddr-extraction'
|
10
10
|
|
11
11
|
And then execute:
|
12
12
|
|
@@ -14,68 +14,45 @@ And then execute:
|
|
14
14
|
|
15
15
|
Or install it yourself as:
|
16
16
|
|
17
|
-
$ gem install ddr-
|
17
|
+
$ gem install ddr-extraction
|
18
18
|
|
19
|
-
##
|
19
|
+
## Dependencies
|
20
|
+
|
21
|
+
The gem has no external dependencies of its own. Consult the documentation for each extraction tool used by your configuration.
|
22
|
+
|
23
|
+
## Configuration
|
24
|
+
|
25
|
+
`Ddr::Extraction` includes default configurations for [Aapche Tika](http://tika.apache.org/) (text and metadata extraction) and [FITS](http://fitstool.org/) (metadata only). Tika is set as the default adapter when one is not specified to the builder.
|
20
26
|
|
27
|
+
```ruby
|
28
|
+
require "ddr-extraction
|
29
|
+
Ddr::Extraction.load_defaults!
|
21
30
|
```
|
22
|
-
>> extractor = Ddr::Extraction::Extractor.new
|
23
|
-
=> #<Ddr::Extraction::Extractor:0x007fc2851dcfa0>
|
24
31
|
|
25
|
-
|
26
|
-
|
32
|
+
There are rake tasks for downloading Tika and FITS to expected locations.
|
33
|
+
|
34
|
+
```sh
|
35
|
+
rake tika:download
|
36
|
+
rake fits:download
|
37
|
+
```
|
38
|
+
|
39
|
+
Configuration Example
|
27
40
|
|
41
|
+
```ruby
|
42
|
+
Ddr::Extraction.configure do |config|
|
43
|
+
config.adapters.default = :tika # Use Tika as the default adapter
|
44
|
+
config.adapters.tika.path = "/path/to/tika-app.jar"
|
45
|
+
config.adapters.fits.path = "/path/to/fits.sh"
|
46
|
+
end
|
47
|
+
```
|
48
|
+
|
49
|
+
## Usage
|
50
|
+
|
51
|
+
```
|
52
|
+
>> extractor = Ddr::Extraction.build_extractor
|
53
|
+
>> text = extractor.extract(:text, "spec/fixtures/sample.docx")
|
28
54
|
>> puts text.read
|
29
55
|
This is a sample document.
|
30
|
-
|
31
|
-
>> metadata = extractor.extract(:metadata, "spec/fixtures/blue-devil.png")
|
32
|
-
=> #<IO:fd 12>
|
33
|
-
|
34
|
-
>> puts metadata.read
|
35
|
-
<?xml version="1.0" encoding="UTF-8"?>
|
36
|
-
<fits xmlns="http://hul.harvard.edu/ois/xml/ns/fits/fits_output" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/fits/fits_output http://hul.harvard.edu/ois/xml/xsd/fits/fits_output.xsd" version="0.8.3" timestamp="11/12/14 12:36 PM">
|
37
|
-
<identification>
|
38
|
-
<identity format="Portable Network Graphics" mimetype="image/png" toolname="FITS" toolversion="0.8.3">
|
39
|
-
<tool toolname="Exiftool" toolversion="9.13" />
|
40
|
-
<tool toolname="Droid" toolversion="6.1.3" />
|
41
|
-
<tool toolname="ffident" toolversion="0.2" />
|
42
|
-
<tool toolname="Tika" toolversion="1.3" />
|
43
|
-
<version toolname="Droid" toolversion="6.1.3">1.0</version>
|
44
|
-
<externalIdentifier toolname="Droid" toolversion="6.1.3" type="puid">fmt/11</externalIdentifier>
|
45
|
-
</identity>
|
46
|
-
</identification>
|
47
|
-
<fileinfo>
|
48
|
-
<lastmodified toolname="Exiftool" toolversion="9.13" status="SINGLE_RESULT">2014:11:12 12:24:18-05:00</lastmodified>
|
49
|
-
<filepath toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">/path/to/spec/fixtures/blue-devil.png</filepath>
|
50
|
-
<filename toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">blue-devil.png</filename>
|
51
|
-
<size toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">75005</size>
|
52
|
-
<md5checksum toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">e6a5d16da2fbe65311952e2d8b04f069</md5checksum>
|
53
|
-
<fslastmodified toolname="OIS File Information" toolversion="0.2" status="SINGLE_RESULT">1415813058000</fslastmodified>
|
54
|
-
</fileinfo>
|
55
|
-
<filestatus />
|
56
|
-
<metadata>
|
57
|
-
<image>
|
58
|
-
<compressionScheme toolname="Exiftool" toolversion="9.13" status="CONFLICT">Deflate/Inflate</compressionScheme>
|
59
|
-
<compressionScheme toolname="Tika" toolversion="1.3" status="CONFLICT">Deflate</compressionScheme>
|
60
|
-
<imageWidth toolname="Exiftool" toolversion="9.13">200</imageWidth>
|
61
|
-
<imageHeight toolname="Exiftool" toolversion="9.13">200</imageHeight>
|
62
|
-
<orientation toolname="Tika" toolversion="1.3" status="SINGLE_RESULT">normal*</orientation>
|
63
|
-
</image>
|
64
|
-
</metadata>
|
65
|
-
<statistics fitsExecutionTime="791">
|
66
|
-
<tool toolname="OIS Audio Information" toolversion="0.1" status="did not run" />
|
67
|
-
<tool toolname="ADL Tool" toolversion="0.1" status="did not run" />
|
68
|
-
<tool toolname="Jhove" toolversion="1.5" executionTime="556" />
|
69
|
-
<tool toolname="file utility" toolversion="5.04" executionTime="623" />
|
70
|
-
<tool toolname="Exiftool" toolversion="9.13" executionTime="664" />
|
71
|
-
<tool toolname="Droid" toolversion="6.1.3" executionTime="147" />
|
72
|
-
<tool toolname="NLNZ Metadata Extractor" toolversion="3.4GA" executionTime="366" />
|
73
|
-
<tool toolname="OIS File Information" toolversion="0.2" executionTime="142" />
|
74
|
-
<tool toolname="OIS XML Metadata" toolversion="0.2" status="did not run" />
|
75
|
-
<tool toolname="ffident" toolversion="0.2" executionTime="369" />
|
76
|
-
<tool toolname="Tika" toolversion="1.3" executionTime="356" />
|
77
|
-
</statistics>
|
78
|
-
</fits>
|
79
56
|
```
|
80
57
|
|
81
58
|
## Contributing
|
data/lib/ddr/extraction.rb
CHANGED
@@ -1,26 +1,38 @@
|
|
1
1
|
require_relative "extraction/version"
|
2
2
|
require_relative "extraction/configuration"
|
3
3
|
require_relative "extraction/extractor"
|
4
|
+
require_relative "extraction/adapters"
|
4
5
|
|
5
6
|
module Ddr
|
6
7
|
#
|
7
|
-
# Ddr::Extraction - A
|
8
|
+
# Ddr::Extraction - A pluggable content extraction service.
|
8
9
|
#
|
9
10
|
module Extraction
|
10
11
|
|
11
12
|
class << self
|
12
13
|
|
14
|
+
# Returns the service configuration
|
13
15
|
def config
|
14
16
|
@config ||= Configuration.new
|
15
17
|
end
|
16
18
|
|
17
|
-
# Yields
|
19
|
+
# Yields the service configuration to a block
|
18
20
|
def configure
|
19
21
|
yield config
|
20
22
|
end
|
21
|
-
|
23
|
+
|
24
|
+
# Loads default configuration settings
|
25
|
+
def load_defaults!
|
26
|
+
require_relative "extraction/defaults"
|
27
|
+
end
|
28
|
+
|
29
|
+
def build_extractor(adapter_name = nil)
|
30
|
+
Extractor.build(adapter_name)
|
31
|
+
end
|
32
|
+
|
22
33
|
end
|
23
34
|
|
24
35
|
end
|
25
36
|
end
|
26
37
|
|
38
|
+
Dir[File.join(__dir__, "extraction", "adapters", "*_adapter.rb")].each { |adapter| require(adapter) }
|
@@ -1,25 +1,59 @@
|
|
1
|
+
require_relative "adapters/registry"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
6
|
|
5
|
-
KNOWN_ADAPTERS = [:fits, :tika]
|
6
|
-
|
7
7
|
class << self
|
8
|
+
|
9
|
+
# Accessor for the name of the default adapter
|
10
|
+
attr_accessor :default
|
11
|
+
|
12
|
+
# Return the requested adapter by name.
|
13
|
+
# If a name is not supplied, return the default adapter.
|
14
|
+
# @see .get_default_adapter
|
15
|
+
#
|
16
|
+
# @param adapter_name [Symbol] the name of the requested adapter.
|
17
|
+
# @return [Class] the adapter class requested.
|
18
|
+
def get_adapter(adapter_name = nil)
|
19
|
+
if adapter_name
|
20
|
+
Registry.instance.adapters[adapter_name.to_sym]
|
21
|
+
else
|
22
|
+
get_default_adapter
|
23
|
+
end
|
24
|
+
end
|
8
25
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
26
|
+
# Return the default adapter.
|
27
|
+
# Raises an exception if the default adapter has not been configured.
|
28
|
+
def get_default_adapter
|
29
|
+
raise "The default adapter has not been configured." unless default
|
30
|
+
get_adapter(default)
|
13
31
|
end
|
14
32
|
|
15
|
-
|
16
|
-
|
17
|
-
|
33
|
+
# Registers an adapter.
|
34
|
+
# @see Registry#register
|
35
|
+
#
|
36
|
+
# @param name [Symbol] the name of the adapter.
|
37
|
+
# @param adapter [Class] the adapter class to register.
|
38
|
+
def register(name, adapter)
|
39
|
+
Registry.instance.register(name, adapter)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Creates methods to access each adapter.
|
43
|
+
Registry.instance.adapters.each do |name, adapter|
|
44
|
+
define_method(name) do
|
45
|
+
adapter
|
18
46
|
end
|
19
47
|
end
|
20
48
|
|
49
|
+
def method_missing(name, *args)
|
50
|
+
return get_adapter(name) if Registry.instance.adapters.key?(name.to_sym)
|
51
|
+
super
|
52
|
+
end
|
21
53
|
end
|
22
54
|
|
23
55
|
end
|
24
56
|
end
|
25
57
|
end
|
58
|
+
|
59
|
+
Dir[File.join(__dir__, "adapters", "*_adapter.rb")].each { |adapter| require(adapter) }
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Ddr
|
2
|
+
module Extraction
|
3
|
+
module Adapters
|
4
|
+
class Adapter
|
5
|
+
|
6
|
+
# Supported extraction output types
|
7
|
+
OUTPUT_TYPES = [:text, :metadata]
|
8
|
+
|
9
|
+
class << self
|
10
|
+
# Register the adapter
|
11
|
+
def register(adapter_name)
|
12
|
+
Ddr::Extraction::Adapters.register(adapter_name, self)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Extract a kind of output from the file path
|
17
|
+
#
|
18
|
+
# @param output [Symbol] the kind of output, `:text` or `:metadata`
|
19
|
+
# @param file_path [String] path to the file to be processed
|
20
|
+
# @return [IO] the result of the extraction
|
21
|
+
# @api public
|
22
|
+
def extract(output, file_path)
|
23
|
+
raise ArgumentError, "Output type must be one of #{OUTPUT_TYPES}." unless OUTPUT_TYPES.include?(output)
|
24
|
+
raise IOError, "File not found: #{file_path}" unless File.exist?(file_path)
|
25
|
+
execute(command(output, file_path))
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
# Returns the command to be executed
|
31
|
+
#
|
32
|
+
# @param output [Symbol] the kind of output.
|
33
|
+
# @param file_path [String] path to the file to be processed.
|
34
|
+
# @return [String, Array] the command as a String or Array
|
35
|
+
# @see #extract
|
36
|
+
# @see #execute
|
37
|
+
# @api private
|
38
|
+
def command(output, file_path)
|
39
|
+
raise NotImplementedError, "The `command' instance method must be implemented by the adapter."
|
40
|
+
end
|
41
|
+
|
42
|
+
# Executes the command in a subprocess.
|
43
|
+
#
|
44
|
+
# @param cmd [String, Array] the command as a String or Array
|
45
|
+
# @see Ruby documentation for IO.popen
|
46
|
+
# @return [IO] the output of the command.
|
47
|
+
# @api private
|
48
|
+
def execute(cmd)
|
49
|
+
IO.popen(cmd)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -1,22 +1,26 @@
|
|
1
|
+
require_relative "adapter"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
|
-
class FitsAdapter
|
6
|
+
class FitsAdapter < Adapter
|
5
7
|
|
6
|
-
|
7
|
-
#
|
8
|
-
# @param file [String] the file from which to extract metadata.
|
9
|
-
# @return [IO] the output
|
10
|
-
def extract_metadata(file)
|
11
|
-
IO.popen([self.class.path, "-i", file])
|
12
|
-
end
|
8
|
+
register :fits
|
13
9
|
|
14
10
|
class << self
|
15
11
|
# Path to FITS executable (fits.sh or fits.bat)
|
16
12
|
attr_accessor :path
|
17
13
|
end
|
18
14
|
|
15
|
+
private
|
16
|
+
|
17
|
+
def command(output, file_path)
|
18
|
+
raise "This adapter only supports :metadata output." unless output == :metadata
|
19
|
+
[self.class.path, "-i", file_path]
|
20
|
+
end
|
21
|
+
|
19
22
|
end
|
20
23
|
end
|
21
24
|
end
|
22
25
|
end
|
26
|
+
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require "singleton"
|
2
|
+
require_relative "adapter"
|
3
|
+
|
4
|
+
module Ddr
|
5
|
+
module Extraction
|
6
|
+
module Adapters
|
7
|
+
#
|
8
|
+
# Registry of adapter names and classes
|
9
|
+
#
|
10
|
+
class Registry
|
11
|
+
include Singleton
|
12
|
+
|
13
|
+
attr_reader :adapters
|
14
|
+
|
15
|
+
def initialize
|
16
|
+
@adapters = {}
|
17
|
+
end
|
18
|
+
|
19
|
+
# Registers an adapter
|
20
|
+
#
|
21
|
+
# @param name [Symbol] the name of the adapter.
|
22
|
+
# @param adapter [Class] the adapter to be registered.
|
23
|
+
def register(name, adapter)
|
24
|
+
name = name.to_sym
|
25
|
+
validate!(name, adapter)
|
26
|
+
adapters[name] = adapter
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def validate!(name, adapter)
|
32
|
+
raise "Another adapter is registered under the name :#{name}." if adapters.key?(name)
|
33
|
+
unless adapter < Adapter
|
34
|
+
raise ArgumentError, "Only subclasses of Ddr::Extraction::Adapters::Adapter may be registered."
|
35
|
+
end
|
36
|
+
raise "The adapter #{adapter.to_s} is already registered." if adapters.value?(adapter)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -1,15 +1,11 @@
|
|
1
|
+
require_relative "adapter"
|
2
|
+
|
1
3
|
module Ddr
|
2
4
|
module Extraction
|
3
5
|
module Adapters
|
4
|
-
class TikaAdapter
|
5
|
-
|
6
|
-
|
7
|
-
#
|
8
|
-
# @param file [String] path to file from which to extract text
|
9
|
-
# @return [IO] the output
|
10
|
-
def extract_text(file)
|
11
|
-
IO.popen(["java", "-jar", self.class.path, "--text", file])
|
12
|
-
end
|
6
|
+
class TikaAdapter < Adapter
|
7
|
+
|
8
|
+
register :tika
|
13
9
|
|
14
10
|
class << self
|
15
11
|
# Path to tika-app.jar
|
@@ -17,9 +13,25 @@ module Ddr
|
|
17
13
|
|
18
14
|
# Tika server port (optional, required for server)
|
19
15
|
attr_accessor :port
|
20
|
-
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def command(output, file_path)
|
21
|
+
["java", "-jar", self.class.path, output_options(output), file_path].flatten
|
22
|
+
end
|
23
|
+
|
24
|
+
def output_options(output)
|
25
|
+
case output
|
26
|
+
when :text
|
27
|
+
"--text"
|
28
|
+
when :metadata
|
29
|
+
["--metadata", "--xml"]
|
30
|
+
end
|
31
|
+
end
|
21
32
|
|
22
33
|
end
|
34
|
+
|
23
35
|
end
|
24
36
|
end
|
25
37
|
end
|
@@ -1,22 +1,16 @@
|
|
1
|
-
require_relative "adapter"
|
2
1
|
require_relative "adapters"
|
3
2
|
|
4
3
|
module Ddr
|
5
4
|
module Extraction
|
6
5
|
class Configuration
|
7
6
|
|
7
|
+
# Returns an object have settable attributes for adapters.
|
8
8
|
def adapters
|
9
9
|
config = Adapters
|
10
10
|
yield config if block_given?
|
11
11
|
config
|
12
12
|
end
|
13
13
|
|
14
|
-
def adapter
|
15
|
-
config = Adapter
|
16
|
-
yield config if block_given?
|
17
|
-
config
|
18
|
-
end
|
19
|
-
|
20
14
|
end
|
21
15
|
end
|
22
16
|
end
|
@@ -3,8 +3,7 @@ require "ddr-extraction"
|
|
3
3
|
bin_dir = File.expand_path("../../../../bin", __FILE__)
|
4
4
|
|
5
5
|
Ddr::Extraction.configure do |config|
|
6
|
-
config.
|
7
|
-
config.adapter.metadata = :fits
|
6
|
+
config.adapters.default = :tika
|
8
7
|
config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
|
9
8
|
config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
|
10
9
|
end
|
@@ -1,22 +1,36 @@
|
|
1
|
+
require "delegate"
|
1
2
|
require_relative "adapters"
|
2
3
|
|
3
4
|
module Ddr
|
4
5
|
module Extraction
|
5
|
-
|
6
|
+
#
|
7
|
+
# The Extractor is the main public class.
|
8
|
+
#
|
9
|
+
# It works by delegating to an adapter that does the real work.
|
10
|
+
#
|
11
|
+
# extractor = Ddr::Extraction::Extractor.build(:tika)
|
12
|
+
# text = extractor.extract(:text, "/path/to/text/file")
|
13
|
+
# puts text.read
|
14
|
+
# ...
|
15
|
+
#
|
16
|
+
class Extractor < ::SimpleDelegator
|
6
17
|
|
7
|
-
|
8
|
-
#
|
9
|
-
# @param type [Symbol] the type of content to extract, `:text` or `:metadata`.
|
10
|
-
# @param file [String] path to file from which to extract content.
|
11
|
-
# @return [IO] the output
|
12
|
-
def extract(type, file)
|
13
|
-
adapter(type).send("extract_#{type}", file)
|
14
|
-
end
|
18
|
+
class << self
|
15
19
|
|
16
|
-
|
20
|
+
# Returns/yields an extractor instance
|
21
|
+
#
|
22
|
+
# @param adapter_name [Symbol] the name of the adapter to plug in.
|
23
|
+
# If not given, a default adapter will be used, if
|
24
|
+
# Ddr::Extraction::Adapters.default has been set with
|
25
|
+
# the name of the default adapter.
|
26
|
+
#
|
27
|
+
def build(adapter_name = nil)
|
28
|
+
adapter = Adapters.get_adapter(adapter_name)
|
29
|
+
extractor = new(adapter.new)
|
30
|
+
yield extractor if block_given?
|
31
|
+
extractor
|
32
|
+
end
|
17
33
|
|
18
|
-
def adapter(type)
|
19
|
-
Adapter.build_adapter(type)
|
20
34
|
end
|
21
35
|
|
22
36
|
end
|
data/spec/unit/extractor_spec.rb
CHANGED
@@ -3,16 +3,18 @@ module Ddr
|
|
3
3
|
RSpec.describe Extractor do
|
4
4
|
|
5
5
|
describe "extracting text" do
|
6
|
-
|
6
|
+
subject { described_class.build(:tika) }
|
7
|
+
let(:file_path) { File.expand_path("../../fixtures/sample.docx", __FILE__) }
|
7
8
|
it "should extract the text content of the file" do
|
8
|
-
expect(subject.extract(:text,
|
9
|
+
expect(subject.extract(:text, file_path).read).to match(/This is a sample document./)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
12
13
|
describe "extracting metadata" do
|
13
|
-
|
14
|
+
subject { described_class.build(:tika) }
|
15
|
+
let(:file_path) { File.expand_path("../../fixtures/blue-devil.png", __FILE__) }
|
14
16
|
it "should extract technical metadata from the file" do
|
15
|
-
expect(subject.extract(:metadata,
|
17
|
+
expect(subject.extract(:metadata, file_path).read.length).to_not eq(0)
|
16
18
|
end
|
17
19
|
end
|
18
20
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ddr-extraction
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David Chandek-Stark
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,9 +71,11 @@ files:
|
|
71
71
|
- ddr-extraction.gemspec
|
72
72
|
- lib/ddr-extraction.rb
|
73
73
|
- lib/ddr/extraction.rb
|
74
|
-
- lib/ddr/extraction/adapter.rb
|
75
74
|
- lib/ddr/extraction/adapters.rb
|
75
|
+
- lib/ddr/extraction/adapters/adapter.rb
|
76
76
|
- lib/ddr/extraction/adapters/fits_adapter.rb
|
77
|
+
- lib/ddr/extraction/adapters/null_adapter.rb
|
78
|
+
- lib/ddr/extraction/adapters/registry.rb
|
77
79
|
- lib/ddr/extraction/adapters/tika_adapter.rb
|
78
80
|
- lib/ddr/extraction/configuration.rb
|
79
81
|
- lib/ddr/extraction/defaults.rb
|
@@ -1,21 +0,0 @@
|
|
1
|
-
require "delegate"
|
2
|
-
require_relative "adapters"
|
3
|
-
|
4
|
-
module Ddr
|
5
|
-
module Extraction
|
6
|
-
class Adapter < ::SimpleDelegator
|
7
|
-
|
8
|
-
class << self
|
9
|
-
# Accessors for adapter types
|
10
|
-
attr_accessor :text, :metadata
|
11
|
-
|
12
|
-
def build_adapter(type)
|
13
|
-
adapter_name = send(type)
|
14
|
-
adapter = Adapters.get_adapter(adapter_name)
|
15
|
-
new(adapter.new)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|