tika-client 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6a520c2018bc2474f7006b3dc2defca7ba0dd051
4
+ data.tar.gz: 33a1f20b73f45f6f4c3f46a58136569041dac139
5
+ SHA512:
6
+ metadata.gz: a2adeecc540ee13117e32848226d8616f833a82103caa7ebb6d09185948c22dc753801499e9f3c355bc43cf7fef8d7cf6372737462a7cfcd60692a04cb42ce1b
7
+ data.tar.gz: 6461f28df9c2975bb0e707dff6fac55ec1141b5fdd157bafa57c00d298491759c2392d7a571ab814b863e463c0188686053e3267e43428cce3a093d32a915897
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /bin
6
+ /coverage/
7
+ /doc/
8
+ /pkg/
9
+ /spec/reports/
10
+ /tmp/
11
+ *.bundle
12
+ *.so
13
+ *.o
14
+ *.a
15
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tika-client.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) Duke University.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ 3. Neither the name of Duke University nor the names of its contributors may
15
+ be used to endorse or promote products derived from this software without
16
+ specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,31 @@
1
+ # Tika::Client
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'tika-client'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install tika-client
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/tika-client/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,76 @@
1
+ require "bundler/gem_tasks"
2
+ require "openssl"
3
+ require "net/http"
4
+
5
+ TEMP_DIR = File.absolute_path("tmp")
6
+ DOWNLOAD_DIR = TEMP_DIR
7
+ BIN_DIR = File.absolute_path("bin")
8
+ TIKA_VERSION = "1.7"
9
+ PID_FILE = File.join(TEMP_DIR, "tika-server.pid")
10
+
11
+ tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
12
+ tika_path = File.join(BIN_DIR, "tika-server.jar")
13
+ tika_server = File.basename(tika_path)
14
+ tika_download_url = "http://archive.apache.org/dist/tika/tika-server-#{tika_version}.jar"
15
+ tika_checksum_url = "#{tika_download_url}.sha"
16
+ tika_checksum_type = :SHA1
17
+
18
+ namespace :tika do
19
+ desc "Download Tika server"
20
+ task :download => [:download_dir] do
21
+ FileUtils.cd(DOWNLOAD_DIR) do
22
+ puts "Downloading Tika ... "
23
+ system "curl -L #{tika_download_url} -o #{tika_server}"
24
+ checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
25
+ puts "Verifiying checksum ... "
26
+ digest = OpenSSL::Digest.const_get(tika_checksum_type).new
27
+ digest << File.read(tika_server)
28
+ if digest.to_s != checksum
29
+ puts "Checksums do not match -- aborting!"
30
+ FileUtils.remove_entry_secure(tika_server)
31
+ abort
32
+ end
33
+ FileUtils.mv(tika_server, tika_path)
34
+ end
35
+ end
36
+
37
+ desc "Start Tika server"
38
+ task :start do
39
+ if File.exists?(tika_path)
40
+ puts "Starting Tika server ..."
41
+ File.open(PID_FILE, "w") do |pid_file|
42
+ pid = fork { exec "java -jar #{tika_path}" }
43
+ Process.detach(pid)
44
+ pid_file.write(pid)
45
+ end
46
+ else
47
+ puts "Tika server not found - run `rake tika:download'."
48
+ end
49
+ end
50
+
51
+ desc "Stop Tika server"
52
+ task :stop do
53
+ if File.exists?(PID_FILE)
54
+ puts "Stopping Tika server ..."
55
+ pid = File.read(PID_FILE).strip
56
+ Process.kill("KILL", pid.to_i)
57
+ File.unlink(PID_FILE)
58
+ else
59
+ puts "Tika server is not running or was not started by `rake tika:start' task."
60
+ end
61
+ end
62
+
63
+ desc "Check Tika server status"
64
+ task :status do
65
+ if File.exists?(PID_FILE)
66
+ pid = File.read(PID_FILE).strip
67
+ puts "Tika server is running (PID #{pid})"
68
+ else
69
+ puts "Tika server is not running or was not started by `rake tika:start' task."
70
+ end
71
+ end
72
+ end
73
+
74
+ task :download_dir do
75
+ FileUtils.mkdir(DOWNLOAD_DIR) unless Dir.exists?(DOWNLOAD_DIR)
76
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1 @@
1
+ require "tika/client"
@@ -0,0 +1,26 @@
1
+ module Tika
2
+ class Api
3
+
4
+ PUT = Net::HTTP::Put
5
+ GET = Net::HTTP::Get
6
+
7
+ JSON = "application/json"
8
+ TEXT = "text/plain"
9
+
10
+ Endpoint = Struct.new(:request_method, :path, :response_format)
11
+
12
+ ENDPOINTS = {
13
+ get_metadata: Endpoint.new(PUT, "/meta", JSON),
14
+ get_text: Endpoint.new(PUT, "/tika", TEXT)
15
+ }
16
+
17
+ def endpoint(name)
18
+ ENDPOINTS.fetch(name)
19
+ end
20
+
21
+ def has_endpoint?(name)
22
+ ENDPOINTS.include?(name)
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,48 @@
1
+ require_relative "configuration"
2
+ require_relative "api"
3
+ require_relative "request"
4
+ require "forwardable"
5
+
6
+ module Tika
7
+ class Client
8
+ extend Forwardable
9
+
10
+ class << self
11
+ def config
12
+ @config ||= Configuration.new
13
+ end
14
+
15
+ def configure
16
+ yield config
17
+ end
18
+ end
19
+
20
+ attr_accessor :host, :port, :api
21
+ def_delegators :api, :endpoint, :has_endpoint?
22
+
23
+ def initialize(opts={})
24
+ @host = opts.fetch(:host, config.host)
25
+ @port = opts.fetch(:port, config.port)
26
+ @api = Api.new
27
+ end
28
+
29
+ def config
30
+ self.class.config
31
+ end
32
+
33
+ def connection
34
+ @connection ||= Net::HTTP.new(host, port)
35
+ end
36
+
37
+ def execute(name, opts={})
38
+ request = Request.new(connection, endpoint(name))
39
+ request.execute(opts)
40
+ end
41
+
42
+ def method_missing(name, *args)
43
+ return execute(name, *args) if has_endpoint?(name)
44
+ super
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,16 @@
1
+ module Tika
2
+ class Configuration
3
+
4
+ DEFAULT_HOST = "localhost"
5
+ DEFAULT_PORT = 9998
6
+
7
+ attr_accessor :host
8
+ attr_accessor :port
9
+
10
+ def initialize
11
+ @host = ENV["TIKA_HOST"] || DEFAULT_HOST
12
+ @port = ENV["TIKA_PORT"] || DEFAULT_PORT
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,38 @@
1
+ require "uri"
2
+ require "net/http"
3
+ require "delegate"
4
+ # require "mime-types"
5
+
6
+ module Tika
7
+ # Executes an API method
8
+ class Request < SimpleDelegator
9
+
10
+ attr_reader :connection # , :endpoint, :http_request
11
+
12
+ # def self.execute(*args)
13
+ # request = new(*args)
14
+ # yield request if block_given?
15
+ # request.execute
16
+ # end
17
+
18
+ def initialize(connection, endpoint)
19
+ @connection = connection
20
+ @endpoint = endpoint
21
+ uri = URI::HTTP.build(host: connection.address, port: connection.port, path: endpoint.path)
22
+ super endpoint.request_method.new(uri)
23
+ self["Accept"] = endpoint.response_format
24
+ end
25
+
26
+ def execute(opts={})
27
+ connection.start do |conn|
28
+ if file = opts.delete(:file)
29
+ self.body = file.read
30
+ self.content_length = file.size
31
+ end
32
+ self.content_type = opts[:content_type] if opts[:content_type]
33
+ conn.request(__getobj__)
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,87 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # These two settings work together to allow you to limit a spec run
44
+ # to individual examples or groups you care about by tagging them with
45
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
46
+ # get run.
47
+ config.filter_run :focus
48
+ config.run_all_when_everything_filtered = true
49
+
50
+ # Limits the available syntax to the non-monkey patched syntax that is
51
+ # recommended. For more details, see:
52
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
53
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
54
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
55
+ config.disable_monkey_patching!
56
+
57
+ # This setting enables warnings. It's recommended, but in some cases may
58
+ # be too noisy due to issues in dependencies.
59
+ config.warnings = true
60
+
61
+ # Many RSpec users commonly either run the entire suite or an individual
62
+ # file, and it's useful to allow more verbose output when running an
63
+ # individual spec file.
64
+ if config.files_to_run.one?
65
+ # Use the documentation formatter for detailed output,
66
+ # unless a formatter has already been configured
67
+ # (e.g. via a command-line flag).
68
+ config.default_formatter = 'doc'
69
+ end
70
+
71
+ # Print the 10 slowest examples and example groups at the
72
+ # end of the spec run, to help surface which specs are running
73
+ # particularly slow.
74
+ config.profile_examples = 10
75
+
76
+ # Run specs in random order to surface order dependencies. If you find an
77
+ # order dependency and want to debug it, you can fix the order by providing
78
+ # the seed, which is printed after each run.
79
+ # --seed 1234
80
+ config.order = :random
81
+
82
+ # Seed global randomization in this process using the `--seed` CLI option.
83
+ # Setting this allows you to use `--seed` to deterministically reproduce
84
+ # test failures related to randomization by passing the same `--seed` value
85
+ # as the one that triggered the failure.
86
+ Kernel.srand config.seed
87
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "tika-client"
7
+ spec.version = File.read(File.expand_path("../VERSION", __FILE__)).chomp
8
+ spec.authors = ["dchandekstark"]
9
+ spec.email = ["dchandekstark@gmail.com"]
10
+ spec.summary = "Ruby bindings for Apache Tika Server REST API"
11
+ spec.description = "Ruby bindings for Apache Tika Server REST API"
12
+ spec.homepage = "https://github.com/duke-libraries/tika-client"
13
+ spec.license = "BSD-3-Clause"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = "~> 2.0"
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.1"
25
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tika-client
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - dchandekstark
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ description: Ruby bindings for Apache Tika Server REST API
56
+ email:
57
+ - dchandekstark@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - LICENSE
66
+ - README.md
67
+ - Rakefile
68
+ - VERSION
69
+ - lib/tika-client.rb
70
+ - lib/tika/api.rb
71
+ - lib/tika/client.rb
72
+ - lib/tika/configuration.rb
73
+ - lib/tika/request.rb
74
+ - spec/spec_helper.rb
75
+ - tika-client.gemspec
76
+ homepage: https://github.com/duke-libraries/tika-client
77
+ licenses:
78
+ - BSD-3-Clause
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '2.0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: Ruby bindings for Apache Tika Server REST API
100
+ test_files:
101
+ - spec/spec_helper.rb