ddr-filetools 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rspec +3 -0
- data/.travis.yml +9 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +12 -0
- data/README.md +44 -0
- data/Rakefile +9 -0
- data/bin/.keep +0 -0
- data/ddr-filetools.gemspec +24 -0
- data/lib/ddr-extraction.rb +1 -0
- data/lib/ddr/filetools.rb +29 -0
- data/lib/ddr/filetools/client.rb +12 -0
- data/lib/ddr/filetools/command.rb +17 -0
- data/lib/ddr/filetools/defaults.rb +10 -0
- data/lib/ddr/filetools/fits.rb +27 -0
- data/lib/ddr/filetools/metadata_command.rb +15 -0
- data/lib/ddr/filetools/ocr_command.rb +15 -0
- data/lib/ddr/filetools/provider.rb +15 -0
- data/lib/ddr/filetools/result.rb +33 -0
- data/lib/ddr/filetools/text_command.rb +15 -0
- data/lib/ddr/filetools/tika.rb +30 -0
- data/lib/ddr/filetools/tool.rb +24 -0
- data/lib/ddr/filetools/version.rb +5 -0
- data/lib/tasks/ddr_filetools.rake +56 -0
- data/spec/fixtures/blue-devil.png +0 -0
- data/spec/fixtures/sample.docx +0 -0
- data/spec/fixtures/sample.pdf +0 -0
- data/spec/spec_helper.rb +83 -0
- data/spec/unit/client_spec.rb +21 -0
- metadata +121 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: cd1c3088ed857301a2108f8921c4784538374cc4
|
4
|
+
data.tar.gz: 4004b38451534990699d74f034729cab559f7cd2
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: ef1570db43cc860bdc3abf6e739f2eabdfece85fc6f5fdbce36f15cdf145eb857303743a527b00422f381558eb9b9104d53264485443a51e2480577b0ef53645
|
7
|
+
data.tar.gz: d79b1d0f67a1af8c769a45bdba0b69b603826703d5d89962e9010327c0beae03176efe108f04e8df86f139e9c270b96f719c918228517c65d795adcfae25b7d1
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
Copyright (c) 2014, Duke University
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
|
5
|
+
|
6
|
+
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
|
7
|
+
|
8
|
+
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
|
9
|
+
|
10
|
+
3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
|
11
|
+
|
12
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,44 @@
|
|
1
|
+
# Ddr::FileTools
|
2
|
+
|
3
|
+
File extraction and analysis tools.
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'ddr-filetools'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install ddr-filetools
|
18
|
+
|
19
|
+
## Dependencies
|
20
|
+
|
21
|
+
TODO
|
22
|
+
|
23
|
+
## Configuration
|
24
|
+
|
25
|
+
TODO
|
26
|
+
|
27
|
+
There are rake tasks for downloading Tika and FITS to expected locations.
|
28
|
+
|
29
|
+
```sh
|
30
|
+
rake tika:download
|
31
|
+
rake fits:download
|
32
|
+
```
|
33
|
+
|
34
|
+
## Usage
|
35
|
+
|
36
|
+
TODO
|
37
|
+
|
38
|
+
## Contributing
|
39
|
+
|
40
|
+
1. Fork it ( https://github.com/[my-github-username]/ddr_extractor/fork )
|
41
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
42
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
43
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
44
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/bin/.keep
ADDED
File without changes
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'ddr/filetools/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "ddr-filetools"
|
8
|
+
spec.version = Ddr::FileTools::VERSION
|
9
|
+
spec.authors = ["David Chandek-Stark"]
|
10
|
+
spec.email = ["dchandekstark@gmail.com"]
|
11
|
+
spec.summary = "Pluggable text and metadata extraction service."
|
12
|
+
spec.description = "Pluggable text and metadata extraction service."
|
13
|
+
spec.homepage = "https://github.com/duke-libraries/ddr-filetools"
|
14
|
+
spec.license = "BSD-3-Clause"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.6"
|
22
|
+
spec.add_development_dependency "rake"
|
23
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
24
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require "ddr/extraction"
|
@@ -0,0 +1,29 @@
|
|
1
|
+
require_relative "filetools/version"
|
2
|
+
require_relative "filetools/client"
|
3
|
+
require_relative "filetools/tool"
|
4
|
+
require_relative "filetools/tika"
|
5
|
+
require_relative "filetools/fits"
|
6
|
+
require_relative "filetools/text_command"
|
7
|
+
require_relative "filetools/metadata_command"
|
8
|
+
require_relative "filetools/ocr_command"
|
9
|
+
|
10
|
+
module Ddr
|
11
|
+
module FileTools
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def tools
|
15
|
+
@tools ||= {}
|
16
|
+
end
|
17
|
+
|
18
|
+
def register(tool_name, opts)
|
19
|
+
tools[tool_name] = Tool.new(opts)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
register :text, command: TextCommand, provider: Tika
|
24
|
+
register :metadata, command: MetadataCommand, provider: Fits
|
25
|
+
register :ocr, command: OcrCommand, provider: Tika
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module Ddr
|
2
|
+
module FileTools
|
3
|
+
class Command
|
4
|
+
|
5
|
+
attr_reader :provider
|
6
|
+
|
7
|
+
def initialize(provider)
|
8
|
+
@provider = provider
|
9
|
+
end
|
10
|
+
|
11
|
+
def call(file_path)
|
12
|
+
raise NotImplementedError, "Subclasses must implement `call`."
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require "ddr-extraction"
|
2
|
+
|
3
|
+
bin_dir = File.expand_path("../../../../bin", __FILE__)
|
4
|
+
|
5
|
+
Ddr::Extraction.configure do |config|
|
6
|
+
config.adapters.default = :tika
|
7
|
+
config.adapters.tika.path = File.join(bin_dir, "tika-app.jar")
|
8
|
+
config.adapters.fits.path = File.join(bin_dir, "fits", "fits.sh")
|
9
|
+
end
|
10
|
+
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative "provider"
|
2
|
+
|
3
|
+
module Ddr
|
4
|
+
module FileTools
|
5
|
+
class Fits < Provider
|
6
|
+
|
7
|
+
class << self
|
8
|
+
# Path to FITS executable (fits.sh or fits.bat)
|
9
|
+
attr_accessor :fits_path
|
10
|
+
end
|
11
|
+
|
12
|
+
self.fits_path = File.join(File.expand_path("../../../../bin", __FILE__), "fits", "fits.sh")
|
13
|
+
|
14
|
+
def metadata(file_path)
|
15
|
+
call command(file_path)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
def command(file_path)
|
21
|
+
[self.class.fits_path, "-i", file_path]
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Ddr
|
2
|
+
module FileTools
|
3
|
+
class Result
|
4
|
+
|
5
|
+
attr_reader :output, :error, :status
|
6
|
+
|
7
|
+
def initialize(out, err, s)
|
8
|
+
@output, @error, @status = out, err, s
|
9
|
+
end
|
10
|
+
|
11
|
+
def inspect
|
12
|
+
"#<Ddr::FileTools::Result #{success? ? 'SUCCESS' : 'ERROR'}>"
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_s
|
16
|
+
output
|
17
|
+
end
|
18
|
+
|
19
|
+
def read
|
20
|
+
output
|
21
|
+
end
|
22
|
+
|
23
|
+
def success?
|
24
|
+
status.success?
|
25
|
+
end
|
26
|
+
|
27
|
+
def error?
|
28
|
+
!success?
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require_relative "provider"
|
2
|
+
|
3
|
+
module Ddr
|
4
|
+
module FileTools
|
5
|
+
class Tika < Provider
|
6
|
+
|
7
|
+
class << self
|
8
|
+
# Path to tika-app.jar
|
9
|
+
attr_accessor :tika_path
|
10
|
+
end
|
11
|
+
|
12
|
+
self.tika_path = File.join(File.expand_path("../../../../bin", __FILE__), "tika-app.jar")
|
13
|
+
|
14
|
+
def text(file_path)
|
15
|
+
call command(file_path, "--text")
|
16
|
+
end
|
17
|
+
|
18
|
+
def metadata(file_path)
|
19
|
+
call command(file_path, "--metadata", "--xml")
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def command(file_path, *options)
|
25
|
+
["java", "-jar", self.class.tika_path, options, file_path].flatten
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Ddr
|
2
|
+
module FileTools
|
3
|
+
class Tool
|
4
|
+
|
5
|
+
attr_reader :command, :provider
|
6
|
+
|
7
|
+
def initialize(opts)
|
8
|
+
@command = opts.fetch(:command)
|
9
|
+
@provider = opts.fetch(:provider)
|
10
|
+
end
|
11
|
+
|
12
|
+
def call(*args)
|
13
|
+
build_command.call(*args)
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def build_command
|
19
|
+
command.new(provider.new)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
require "openssl"
|
2
|
+
require "net/http"
|
3
|
+
|
4
|
+
DOWNLOAD_DIR = File.absolute_path("tmp")
|
5
|
+
BIN_DIR = File.absolute_path("bin")
|
6
|
+
TIKA_VERSION = "1.7"
|
7
|
+
FITS_VERSION = "0.8.3"
|
8
|
+
|
9
|
+
tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
|
10
|
+
tika_path = File.join(BIN_DIR, "tika-app.jar")
|
11
|
+
tika_app = File.basename(tika_path)
|
12
|
+
tika_download_url = "http://archive.apache.org/dist/tika/tika-app-#{tika_version}.jar"
|
13
|
+
tika_checksum_url = "#{tika_download_url}.sha"
|
14
|
+
tika_checksum_type = :SHA1
|
15
|
+
|
16
|
+
fits_version = ENV["FITS_VERSION"] || FITS_VERSION
|
17
|
+
fits_path = File.join(BIN_DIR, "fits", "fits.sh")
|
18
|
+
fits_download_url = "http://projects.iq.harvard.edu/files/fits/files/fits-#{fits_version}.zip"
|
19
|
+
|
20
|
+
namespace :tika do
|
21
|
+
desc "Download Tika app"
|
22
|
+
task :download => [:download_dir] do
|
23
|
+
FileUtils.cd(DOWNLOAD_DIR) do
|
24
|
+
puts "Downloading Tika app ... "
|
25
|
+
system "curl -L #{tika_download_url} -o #{tika_app}"
|
26
|
+
checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
|
27
|
+
puts "Verifiying checksum ... "
|
28
|
+
digest = OpenSSL::Digest.const_get(tika_checksum_type).new
|
29
|
+
digest << File.read(tika_app)
|
30
|
+
if digest.to_s != checksum
|
31
|
+
puts "Checksums do not match -- aborting!"
|
32
|
+
FileUtils.remove_entry_secure(tika_app)
|
33
|
+
abort
|
34
|
+
end
|
35
|
+
FileUtils.mv(tika_app, tika_path)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
namespace :fits do
|
41
|
+
desc "Download FITS tool"
|
42
|
+
task :download => :download_dir do
|
43
|
+
FileUtils.cd(DOWNLOAD_DIR) do
|
44
|
+
puts "Downloading FITS tool ... "
|
45
|
+
system "curl -L #{fits_download_url} -o fits.zip"
|
46
|
+
system "unzip -a -o -q fits.zip"
|
47
|
+
FileUtils.mv("fits-#{fits_version}", File.dirname(fits_path))
|
48
|
+
end
|
49
|
+
FileUtils.chmod(0755, fits_path)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
task :download_dir do
|
54
|
+
FileUtils.mkdir(DOWNLOAD_DIR) unless Dir.exists?(DOWNLOAD_DIR)
|
55
|
+
end
|
56
|
+
|
Binary file
|
Binary file
|
Binary file
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,83 @@
|
|
1
|
+
require "coveralls"
|
2
|
+
Coveralls.wear!
|
3
|
+
|
4
|
+
require "ddr/filetools"
|
5
|
+
|
6
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
7
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
8
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause this
|
9
|
+
# file to always be loaded, without a need to explicitly require it in any files.
|
10
|
+
#
|
11
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
12
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
13
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
14
|
+
# individual file that may not need all of that loaded. Instead, make a
|
15
|
+
# separate helper file that requires this one and then use it only in the specs
|
16
|
+
# that actually need it.
|
17
|
+
#
|
18
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
19
|
+
# users commonly want.
|
20
|
+
#
|
21
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
22
|
+
RSpec.configure do |config|
|
23
|
+
# The settings below are suggested to provide a good initial experience
|
24
|
+
# with RSpec, but feel free to customize to your heart's content.
|
25
|
+
|
26
|
+
# These two settings work together to allow you to limit a spec run
|
27
|
+
# to individual examples or groups you care about by tagging them with
|
28
|
+
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
29
|
+
# get run.
|
30
|
+
config.filter_run :focus
|
31
|
+
config.run_all_when_everything_filtered = true
|
32
|
+
|
33
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
34
|
+
# file, and it's useful to allow more verbose output when running an
|
35
|
+
# individual spec file.
|
36
|
+
if config.files_to_run.one?
|
37
|
+
# Use the documentation formatter for detailed output,
|
38
|
+
# unless a formatter has already been configured
|
39
|
+
# (e.g. via a command-line flag).
|
40
|
+
config.default_formatter = 'doc'
|
41
|
+
end
|
42
|
+
|
43
|
+
# Print the 10 slowest examples and example groups at the
|
44
|
+
# end of the spec run, to help surface which specs are running
|
45
|
+
# particularly slow.
|
46
|
+
config.profile_examples = 10
|
47
|
+
|
48
|
+
# Run specs in random order to surface order dependencies. If you find an
|
49
|
+
# order dependency and want to debug it, you can fix the order by providing
|
50
|
+
# the seed, which is printed after each run.
|
51
|
+
# --seed 1234
|
52
|
+
config.order = :random
|
53
|
+
|
54
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
55
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
56
|
+
# test failures related to randomization by passing the same `--seed` value
|
57
|
+
# as the one that triggered the failure.
|
58
|
+
Kernel.srand config.seed
|
59
|
+
|
60
|
+
# rspec-expectations config goes here. You can use an alternate
|
61
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
62
|
+
# assertions if you prefer.
|
63
|
+
config.expect_with :rspec do |expectations|
|
64
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
65
|
+
# For more details, see:
|
66
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
67
|
+
expectations.syntax = :expect
|
68
|
+
end
|
69
|
+
|
70
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
71
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
72
|
+
config.mock_with :rspec do |mocks|
|
73
|
+
# Enable only the newer, non-monkey-patching expect syntax.
|
74
|
+
# For more details, see:
|
75
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
76
|
+
mocks.syntax = :expect
|
77
|
+
|
78
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
79
|
+
# a real object. This is generally recommended.
|
80
|
+
mocks.verify_partial_doubles = true
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Ddr
|
2
|
+
module FileTools
|
3
|
+
RSpec.describe Client do
|
4
|
+
|
5
|
+
describe "extracting text" do
|
6
|
+
let(:file_path) { File.expand_path("../../fixtures/sample.docx", __FILE__) }
|
7
|
+
it "should extract the text content of the file" do
|
8
|
+
expect(subject.run_tool(:text, file_path).output).to match(/This is a sample document./)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "extracting metadata" do
|
13
|
+
let(:file_path) { File.expand_path("../../fixtures/blue-devil.png", __FILE__) }
|
14
|
+
it "should extract technical metadata from the file" do
|
15
|
+
expect(subject.run_tool(:metadata, file_path).output.length).to_not eq(0)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
metadata
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: ddr-filetools
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.4.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Chandek-Stark
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-02-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.6'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.6'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: Pluggable text and metadata extraction service.
|
56
|
+
email:
|
57
|
+
- dchandekstark@gmail.com
|
58
|
+
executables:
|
59
|
+
- ".keep"
|
60
|
+
extensions: []
|
61
|
+
extra_rdoc_files: []
|
62
|
+
files:
|
63
|
+
- ".gitignore"
|
64
|
+
- ".rspec"
|
65
|
+
- ".travis.yml"
|
66
|
+
- Gemfile
|
67
|
+
- LICENSE.txt
|
68
|
+
- README.md
|
69
|
+
- Rakefile
|
70
|
+
- bin/.keep
|
71
|
+
- ddr-filetools.gemspec
|
72
|
+
- lib/ddr-extraction.rb
|
73
|
+
- lib/ddr/filetools.rb
|
74
|
+
- lib/ddr/filetools/client.rb
|
75
|
+
- lib/ddr/filetools/command.rb
|
76
|
+
- lib/ddr/filetools/defaults.rb
|
77
|
+
- lib/ddr/filetools/fits.rb
|
78
|
+
- lib/ddr/filetools/metadata_command.rb
|
79
|
+
- lib/ddr/filetools/ocr_command.rb
|
80
|
+
- lib/ddr/filetools/provider.rb
|
81
|
+
- lib/ddr/filetools/result.rb
|
82
|
+
- lib/ddr/filetools/text_command.rb
|
83
|
+
- lib/ddr/filetools/tika.rb
|
84
|
+
- lib/ddr/filetools/tool.rb
|
85
|
+
- lib/ddr/filetools/version.rb
|
86
|
+
- lib/tasks/ddr_filetools.rake
|
87
|
+
- spec/fixtures/blue-devil.png
|
88
|
+
- spec/fixtures/sample.docx
|
89
|
+
- spec/fixtures/sample.pdf
|
90
|
+
- spec/spec_helper.rb
|
91
|
+
- spec/unit/client_spec.rb
|
92
|
+
homepage: https://github.com/duke-libraries/ddr-filetools
|
93
|
+
licenses:
|
94
|
+
- BSD-3-Clause
|
95
|
+
metadata: {}
|
96
|
+
post_install_message:
|
97
|
+
rdoc_options: []
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - ">="
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
requirements: []
|
111
|
+
rubyforge_project:
|
112
|
+
rubygems_version: 2.2.2
|
113
|
+
signing_key:
|
114
|
+
specification_version: 4
|
115
|
+
summary: Pluggable text and metadata extraction service.
|
116
|
+
test_files:
|
117
|
+
- spec/fixtures/blue-devil.png
|
118
|
+
- spec/fixtures/sample.docx
|
119
|
+
- spec/fixtures/sample.pdf
|
120
|
+
- spec/spec_helper.rb
|
121
|
+
- spec/unit/client_spec.rb
|