tika-app 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +2 -0
- data/Gemfile +4 -0
- data/LICENSE +27 -0
- data/README.md +31 -0
- data/Rakefile +7 -0
- data/VERSION +1 -0
- data/lib/tika/app.rb +89 -0
- data/lib/tika/command.rb +45 -0
- data/lib/tika/commands.rb +57 -0
- data/lib/tika/error.rb +3 -0
- data/lib/tika/resource.rb +31 -0
- data/lib/tika/result.rb +34 -0
- data/spec/fixtures/Lorem_ipsum.docx +0 -0
- data/spec/fixtures/Lorem_ipsum.pdf +0 -0
- data/spec/fixtures/Lorem_ipsum.png +0 -0
- data/spec/fixtures/Lorem_ipsum.tiff +0 -0
- data/spec/spec_helper.rb +92 -0
- data/spec/unit/app_spec.rb +74 -0
- data/spec/unit/resource_spec.rb +43 -0
- data/tika-app.gemspec +23 -0
- metadata +114 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: e7e13e3b027b7dfdb22b89fd6c07a8b021c3210e
|
4
|
+
data.tar.gz: 2e1021d068af535cef610eb244540a4c4f671052
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1f8ff1d0edac49772f8ecd1f8577458824014a6dfd359b96c0680f600addb1039ae1c90dd2d4605653698715bb45427abd5e89f5d9102e05844804727356ecc4
|
7
|
+
data.tar.gz: 74dedc5a7debfb4100a9db85dc1a095c0da329e01659b0a8526770ec95c52cddc62fa5d229d1a2e3a7849ac3e5e07f2f8d11f7093e62a3970fca4b75aa91c23b
|
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) Duke University.
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without modification,
|
5
|
+
are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
1. Redistributions of source code must retain the above copyright notice,
|
8
|
+
this list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
3. Neither the name of Duke University nor the names of its contributors may
|
15
|
+
be used to endorse or promote products derived from this software without
|
16
|
+
specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
19
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
|
22
|
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
23
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
24
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
25
|
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
27
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# Tika::App
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'tika-app'
|
11
|
+
```
|
12
|
+
|
13
|
+
And then execute:
|
14
|
+
|
15
|
+
$ bundle
|
16
|
+
|
17
|
+
Or install it yourself as:
|
18
|
+
|
19
|
+
$ gem install tika-app
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
TODO: Write usage instructions here
|
24
|
+
|
25
|
+
## Contributing
|
26
|
+
|
27
|
+
1. Fork it ( https://github.com/[my-github-username]/tika-app/fork )
|
28
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
29
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
30
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
31
|
+
5. Create a new Pull Request
|
data/Rakefile
ADDED
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/tika/app.rb
ADDED
@@ -0,0 +1,89 @@
|
|
1
|
+
require_relative "error"
|
2
|
+
require_relative "commands"
|
3
|
+
require_relative "resource"
|
4
|
+
|
5
|
+
module Tika
|
6
|
+
class App
|
7
|
+
|
8
|
+
class << self
|
9
|
+
attr_accessor :path
|
10
|
+
end
|
11
|
+
|
12
|
+
include Commands
|
13
|
+
|
14
|
+
DEFAULT_PATH = File.expand_path("../../../bin/tika-app.jar", __FILE__)
|
15
|
+
|
16
|
+
attr_reader :path
|
17
|
+
attr_accessor :result
|
18
|
+
|
19
|
+
def initialize(opts={})
|
20
|
+
@path = opts[:path] || self.class.path || ENV["TIKA_APP"] || DEFAULT_PATH
|
21
|
+
end
|
22
|
+
|
23
|
+
def get_text(file, opts={})
|
24
|
+
execute GetTextCommand, file, opts
|
25
|
+
end
|
26
|
+
|
27
|
+
def get_metadata(file, opts={})
|
28
|
+
execute GetMetadataCommand, file, opts
|
29
|
+
end
|
30
|
+
|
31
|
+
def get_version
|
32
|
+
execute GetVersionCommand
|
33
|
+
end
|
34
|
+
|
35
|
+
def get_mime_types
|
36
|
+
execute GetMimeTypesCommand
|
37
|
+
end
|
38
|
+
alias_method :list_supported_types, :get_mime_types
|
39
|
+
|
40
|
+
def get_parsers
|
41
|
+
execute GetParsersCommand
|
42
|
+
end
|
43
|
+
alias_method :list_parsers, :get_parsers
|
44
|
+
|
45
|
+
def get_parsers_details
|
46
|
+
execute GetParsersDetailsCommand
|
47
|
+
end
|
48
|
+
alias_method :list_parser_details, :get_parsers_details
|
49
|
+
|
50
|
+
def get_detectors
|
51
|
+
execute GetDetectorsCommand
|
52
|
+
end
|
53
|
+
alias_method :list_detectors, :get_detectors
|
54
|
+
|
55
|
+
def detect(file, opts={})
|
56
|
+
execute DetectCommand, file, opts
|
57
|
+
end
|
58
|
+
|
59
|
+
def get_language(file, opts={})
|
60
|
+
execute GetLanguageCommand, file, opts
|
61
|
+
end
|
62
|
+
|
63
|
+
def get_metadata_models
|
64
|
+
execute GetMetadataModelsCommand
|
65
|
+
end
|
66
|
+
alias_method :list_met_models, :get_metadata_models
|
67
|
+
|
68
|
+
def command_line
|
69
|
+
["java", "-jar", path].freeze
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
def reset
|
75
|
+
@result = nil
|
76
|
+
end
|
77
|
+
|
78
|
+
def execute(command, *args)
|
79
|
+
reset
|
80
|
+
@result = command.execute(self, *args)
|
81
|
+
if result.success?
|
82
|
+
result.render
|
83
|
+
else
|
84
|
+
raise Error, result.error
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
end
|
data/lib/tika/command.rb
ADDED
@@ -0,0 +1,45 @@
|
|
1
|
+
require "open3"
|
2
|
+
require "tempfile"
|
3
|
+
require_relative "result"
|
4
|
+
|
5
|
+
module Tika
|
6
|
+
class Command
|
7
|
+
|
8
|
+
class << self
|
9
|
+
attr_accessor :options, :result_class
|
10
|
+
|
11
|
+
def execute(app, *args)
|
12
|
+
new(app, *args).execute
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
attr_reader :app, :file, :options
|
17
|
+
|
18
|
+
def initialize(app, *args)
|
19
|
+
@app = app
|
20
|
+
@file = args.shift
|
21
|
+
@options = args.pop || {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def result_class
|
25
|
+
self.class.result_class
|
26
|
+
end
|
27
|
+
|
28
|
+
def command_line
|
29
|
+
cmd = app.command_line + self.class.options
|
30
|
+
if options[:password]
|
31
|
+
cmd << "-p#{options[:password]}"
|
32
|
+
end
|
33
|
+
if file
|
34
|
+
cmd << file
|
35
|
+
end
|
36
|
+
cmd
|
37
|
+
end
|
38
|
+
|
39
|
+
def execute
|
40
|
+
raw_result = Open3.capture3(*command_line)
|
41
|
+
result_class.new(*raw_result)
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require_relative "command"
|
2
|
+
|
3
|
+
module Tika
|
4
|
+
module Commands
|
5
|
+
|
6
|
+
class GetTextCommand < Command
|
7
|
+
self.options = ["-t", "-eUTF8"]
|
8
|
+
self.result_class = TextResult
|
9
|
+
end
|
10
|
+
|
11
|
+
class GetMetadataCommand < Command
|
12
|
+
self.options = ["-j"]
|
13
|
+
self.result_class = JSONResult
|
14
|
+
end
|
15
|
+
|
16
|
+
class GetVersionCommand < Command
|
17
|
+
self.options = ["-V"]
|
18
|
+
self.result_class = TextResult
|
19
|
+
end
|
20
|
+
|
21
|
+
class GetMimeTypesCommand < Command
|
22
|
+
self.options = ["--list-supported-types"]
|
23
|
+
self.result_class = TextResult
|
24
|
+
end
|
25
|
+
|
26
|
+
class GetParsersCommand < Command
|
27
|
+
self.options = ["--list-parsers"]
|
28
|
+
self.result_class = TextResult
|
29
|
+
end
|
30
|
+
|
31
|
+
class GetParsersDetailsCommand < Command
|
32
|
+
self.options = ["--list-parser-details"]
|
33
|
+
self.result_class = TextResult
|
34
|
+
end
|
35
|
+
|
36
|
+
class GetDetectorsCommand < Command
|
37
|
+
self.options = ["--list-detectors"]
|
38
|
+
self.result_class = TextResult
|
39
|
+
end
|
40
|
+
|
41
|
+
class DetectCommand < Command
|
42
|
+
self.options = ["-d"]
|
43
|
+
self.result_class = TextResult
|
44
|
+
end
|
45
|
+
|
46
|
+
class GetMetadataModelsCommand < Command
|
47
|
+
self.options = ["--list-met-models"]
|
48
|
+
self.result_class = TextResult
|
49
|
+
end
|
50
|
+
|
51
|
+
class GetLanguageCommand < Command
|
52
|
+
self.options = ["-l"]
|
53
|
+
self.result_class = TextResult
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
data/lib/tika/error.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative "app"
|
2
|
+
|
3
|
+
module Tika
|
4
|
+
class Resource
|
5
|
+
|
6
|
+
attr_reader :file, :app, :opts
|
7
|
+
|
8
|
+
def initialize(file, opts={})
|
9
|
+
@file = file
|
10
|
+
@app = App.new
|
11
|
+
@opts = opts
|
12
|
+
end
|
13
|
+
|
14
|
+
def text
|
15
|
+
@text ||= app.get_text(file, opts)
|
16
|
+
end
|
17
|
+
|
18
|
+
def metadata
|
19
|
+
@metadata ||= app.get_metadata(file, opts)
|
20
|
+
end
|
21
|
+
|
22
|
+
def content_type
|
23
|
+
@content_type ||= app.detect(file, opts)
|
24
|
+
end
|
25
|
+
|
26
|
+
def language
|
27
|
+
@language ||= app.get_language(file, opts)
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
data/lib/tika/result.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require "json"
|
2
|
+
require "forwardable"
|
3
|
+
|
4
|
+
module Tika
|
5
|
+
|
6
|
+
class Result
|
7
|
+
extend Forwardable
|
8
|
+
|
9
|
+
attr_reader :output, :error, :status
|
10
|
+
|
11
|
+
def_delegator :status, :success?
|
12
|
+
|
13
|
+
def initialize(output, error, status)
|
14
|
+
@output, @error, @status = output, error, status
|
15
|
+
end
|
16
|
+
|
17
|
+
def render
|
18
|
+
output
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class TextResult < Result
|
23
|
+
def render
|
24
|
+
output.strip
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class JSONResult < Result
|
29
|
+
def render
|
30
|
+
JSON.load(output)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
require "tika/app"
|
2
|
+
|
3
|
+
FIXTURE_DIR = File.expand_path("../fixtures", __FILE__)
|
4
|
+
|
5
|
+
# This file was generated by the `rspec --init` command. Conventionally, all
|
6
|
+
# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
|
7
|
+
# The generated `.rspec` file contains `--require spec_helper` which will cause
|
8
|
+
# this file to always be loaded, without a need to explicitly require it in any
|
9
|
+
# files.
|
10
|
+
#
|
11
|
+
# Given that it is always loaded, you are encouraged to keep this file as
|
12
|
+
# light-weight as possible. Requiring heavyweight dependencies from this file
|
13
|
+
# will add to the boot time of your test suite on EVERY test run, even for an
|
14
|
+
# individual file that may not need all of that loaded. Instead, consider making
|
15
|
+
# a separate helper file that requires the additional dependencies and performs
|
16
|
+
# the additional setup, and require it from the spec files that actually need
|
17
|
+
# it.
|
18
|
+
#
|
19
|
+
# The `.rspec` file also contains a few flags that are not defaults but that
|
20
|
+
# users commonly want.
|
21
|
+
#
|
22
|
+
# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
|
23
|
+
RSpec.configure do |config|
|
24
|
+
# rspec-expectations config goes here. You can use an alternate
|
25
|
+
# assertion/expectation library such as wrong or the stdlib/minitest
|
26
|
+
# assertions if you prefer.
|
27
|
+
config.expect_with :rspec do |expectations|
|
28
|
+
# This option will default to `true` in RSpec 4. It makes the `description`
|
29
|
+
# and `failure_message` of custom matchers include text for helper methods
|
30
|
+
# defined using `chain`, e.g.:
|
31
|
+
# be_bigger_than(2).and_smaller_than(4).description
|
32
|
+
# # => "be bigger than 2 and smaller than 4"
|
33
|
+
# ...rather than:
|
34
|
+
# # => "be bigger than 2"
|
35
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
36
|
+
end
|
37
|
+
|
38
|
+
# rspec-mocks config goes here. You can use an alternate test double
|
39
|
+
# library (such as bogus or mocha) by changing the `mock_with` option here.
|
40
|
+
config.mock_with :rspec do |mocks|
|
41
|
+
# Prevents you from mocking or stubbing a method that does not exist on
|
42
|
+
# a real object. This is generally recommended, and will default to
|
43
|
+
# `true` in RSpec 4.
|
44
|
+
mocks.verify_partial_doubles = true
|
45
|
+
end
|
46
|
+
|
47
|
+
# These two settings work together to allow you to limit a spec run
|
48
|
+
# to individual examples or groups you care about by tagging them with
|
49
|
+
# `:focus` metadata. When nothing is tagged with `:focus`, all examples
|
50
|
+
# get run.
|
51
|
+
config.filter_run :focus
|
52
|
+
config.run_all_when_everything_filtered = true
|
53
|
+
|
54
|
+
# Limits the available syntax to the non-monkey patched syntax that is
|
55
|
+
# recommended. For more details, see:
|
56
|
+
# - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
|
57
|
+
# - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
|
58
|
+
# - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
|
59
|
+
config.disable_monkey_patching!
|
60
|
+
|
61
|
+
# This setting enables warnings. It's recommended, but in some cases may
|
62
|
+
# be too noisy due to issues in dependencies.
|
63
|
+
config.warnings = true
|
64
|
+
|
65
|
+
# Many RSpec users commonly either run the entire suite or an individual
|
66
|
+
# file, and it's useful to allow more verbose output when running an
|
67
|
+
# individual spec file.
|
68
|
+
if config.files_to_run.one?
|
69
|
+
# Use the documentation formatter for detailed output,
|
70
|
+
# unless a formatter has already been configured
|
71
|
+
# (e.g. via a command-line flag).
|
72
|
+
config.default_formatter = 'doc'
|
73
|
+
end
|
74
|
+
|
75
|
+
# Print the 10 slowest examples and example groups at the
|
76
|
+
# end of the spec run, to help surface which specs are running
|
77
|
+
# particularly slow.
|
78
|
+
config.profile_examples = 10
|
79
|
+
|
80
|
+
# Run specs in random order to surface order dependencies. If you find an
|
81
|
+
# order dependency and want to debug it, you can fix the order by providing
|
82
|
+
# the seed, which is printed after each run.
|
83
|
+
# --seed 1234
|
84
|
+
config.order = :random
|
85
|
+
|
86
|
+
# Seed global randomization in this process using the `--seed` CLI option.
|
87
|
+
# Setting this allows you to use `--seed` to deterministically reproduce
|
88
|
+
# test failures related to randomization by passing the same `--seed` value
|
89
|
+
# as the one that triggered the failure.
|
90
|
+
Kernel.srand config.seed
|
91
|
+
|
92
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
module Tika
|
2
|
+
RSpec.describe App do
|
3
|
+
|
4
|
+
describe "#get_text" do
|
5
|
+
describe "with a document" do
|
6
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.docx") }
|
7
|
+
it "should return the text of the file" do
|
8
|
+
text = subject.get_text(file)
|
9
|
+
expect(text).to match(/Lorem ipsum/)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
describe "with an image" do
|
13
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.tiff") }
|
14
|
+
it "should return the text of the file" do
|
15
|
+
text = subject.get_text(file)
|
16
|
+
expect(text).to match(/^Lorem ipsum/)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "#get_metadata" do
|
22
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.pdf") }
|
23
|
+
it "should return the metadata of the file" do
|
24
|
+
metadata = subject.get_metadata(file, content_type: "application/pdf")
|
25
|
+
expect(metadata["Creation-Date"]).to eq("2015-02-15T01:54:41Z")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
describe "#get_version" do
|
30
|
+
it "should return the Tika app version" do
|
31
|
+
expect(subject.get_version).to match(/^Apache Tika/)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "#get_mime_types" do
|
36
|
+
it "should return the MIME Types support by the Tika app" do
|
37
|
+
expect(subject.get_mime_types).to match(/application\/pdf/)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "#get_parsers" do
|
42
|
+
it "should return the parsers available to the Tika app" do
|
43
|
+
expect(subject.get_parsers).to match(/org\.apache\.tika\.parser\.DefaultParser/)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
describe "#get_parsers_details" do
|
48
|
+
it "should return the parsers available to the Tika app and the MIME types they support" do
|
49
|
+
expect(subject.get_parsers_details).to match(/org\.apache\.tika\.parser\.DefaultParser/)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "#get_detectors" do
|
54
|
+
it "should return the detectors available to the Tika app" do
|
55
|
+
expect(subject.get_detectors).to match(/org\.apache\.tika\.detect\.DefaultDetector/)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
describe "#detect" do
|
60
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.png") }
|
61
|
+
it "should return the MIME type of the resource (if successful)" do
|
62
|
+
expect(subject.detect(file)).to eq("image/png")
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "#get_language" do
|
67
|
+
let(:file) { "http://www.archives.gov/exhibits/charters/constitution_transcript.html" }
|
68
|
+
it "should return the language code" do
|
69
|
+
expect(subject.get_language(file)).to eq("en")
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Tika
|
2
|
+
RSpec.describe Resource do
|
3
|
+
|
4
|
+
subject { described_class.new(file) }
|
5
|
+
|
6
|
+
describe "#text" do
|
7
|
+
describe "with a document" do
|
8
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.docx") }
|
9
|
+
it "should return the text of the file" do
|
10
|
+
expect(subject.text).to match(/^Lorem ipsum/)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
describe "with an image" do
|
14
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.tiff") }
|
15
|
+
it "should return the text of the file" do
|
16
|
+
expect(subject.text).to match(/^Lorem ipsum/)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "#metadata" do
|
22
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.pdf") }
|
23
|
+
it "should return the metadata of the file" do
|
24
|
+
expect(subject.metadata["Creation-Date"]).to eq("2015-02-15T01:54:41Z")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "#content_type" do
|
29
|
+
let(:file) { File.join(FIXTURE_DIR, "Lorem_ipsum.png") }
|
30
|
+
it "should return the MIME type of the resource (if successful)" do
|
31
|
+
expect(subject.content_type).to eq("image/png")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
describe "#language" do
|
36
|
+
let(:file) { "http://www.archives.gov/exhibits/charters/constitution_transcript.html" }
|
37
|
+
it "should return the language code" do
|
38
|
+
expect(subject.language).to eq("en")
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
end
|
data/tika-app.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
|
5
|
+
Gem::Specification.new do |spec|
|
6
|
+
spec.name = "tika-app"
|
7
|
+
spec.version = File.read(File.expand_path("../VERSION", __FILE__)).chomp
|
8
|
+
spec.authors = ["dchandekstark"]
|
9
|
+
spec.email = ["dchandekstark@gmail.com"]
|
10
|
+
spec.summary = "Ruby Tika app bindings"
|
11
|
+
spec.description = "Ruby Tika app bindings"
|
12
|
+
spec.homepage = "https://github.com/duke-libraries/tika-app"
|
13
|
+
spec.license = "BSD-3-Clause"
|
14
|
+
|
15
|
+
spec.files = `git ls-files -z`.split("\x0")
|
16
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
17
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
|
+
spec.require_paths = ["lib"]
|
19
|
+
|
20
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
21
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
22
|
+
spec.add_development_dependency "rspec", "~> 3.1"
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tika-app
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- dchandekstark
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2015-04-15 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.1'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.1'
|
55
|
+
description: Ruby Tika app bindings
|
56
|
+
email:
|
57
|
+
- dchandekstark@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".rspec"
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- VERSION
|
69
|
+
- lib/tika/app.rb
|
70
|
+
- lib/tika/command.rb
|
71
|
+
- lib/tika/commands.rb
|
72
|
+
- lib/tika/error.rb
|
73
|
+
- lib/tika/resource.rb
|
74
|
+
- lib/tika/result.rb
|
75
|
+
- spec/fixtures/Lorem_ipsum.docx
|
76
|
+
- spec/fixtures/Lorem_ipsum.pdf
|
77
|
+
- spec/fixtures/Lorem_ipsum.png
|
78
|
+
- spec/fixtures/Lorem_ipsum.tiff
|
79
|
+
- spec/spec_helper.rb
|
80
|
+
- spec/unit/app_spec.rb
|
81
|
+
- spec/unit/resource_spec.rb
|
82
|
+
- tika-app.gemspec
|
83
|
+
homepage: https://github.com/duke-libraries/tika-app
|
84
|
+
licenses:
|
85
|
+
- BSD-3-Clause
|
86
|
+
metadata: {}
|
87
|
+
post_install_message:
|
88
|
+
rdoc_options: []
|
89
|
+
require_paths:
|
90
|
+
- lib
|
91
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - ">="
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '0'
|
96
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - ">="
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '0'
|
101
|
+
requirements: []
|
102
|
+
rubyforge_project:
|
103
|
+
rubygems_version: 2.2.2
|
104
|
+
signing_key:
|
105
|
+
specification_version: 4
|
106
|
+
summary: Ruby Tika app bindings
|
107
|
+
test_files:
|
108
|
+
- spec/fixtures/Lorem_ipsum.docx
|
109
|
+
- spec/fixtures/Lorem_ipsum.pdf
|
110
|
+
- spec/fixtures/Lorem_ipsum.png
|
111
|
+
- spec/fixtures/Lorem_ipsum.tiff
|
112
|
+
- spec/spec_helper.rb
|
113
|
+
- spec/unit/app_spec.rb
|
114
|
+
- spec/unit/resource_spec.rb
|