tika-client 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 6a520c2018bc2474f7006b3dc2defca7ba0dd051
4
+ data.tar.gz: 33a1f20b73f45f6f4c3f46a58136569041dac139
5
+ SHA512:
6
+ metadata.gz: a2adeecc540ee13117e32848226d8616f833a82103caa7ebb6d09185948c22dc753801499e9f3c355bc43cf7fef8d7cf6372737462a7cfcd60692a04cb42ce1b
7
+ data.tar.gz: 6461f28df9c2975bb0e707dff6fac55ec1141b5fdd157bafa57c00d298491759c2392d7a571ab814b863e463c0188686053e3267e43428cce3a093d32a915897
@@ -0,0 +1,15 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /bin
6
+ /coverage/
7
+ /doc/
8
+ /pkg/
9
+ /spec/reports/
10
+ /tmp/
11
+ *.bundle
12
+ *.so
13
+ *.o
14
+ *.a
15
+ mkmf.log
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --require spec_helper
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in tika-client.gemspec
4
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,27 @@
1
+ Copyright (c) Duke University.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without modification,
5
+ are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright notice,
11
+ this list of conditions and the following disclaimer in the documentation
12
+ and/or other materials provided with the distribution.
13
+
14
+ 3. Neither the name of Duke University nor the names of its contributors may
15
+ be used to endorse or promote products derived from this software without
16
+ specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
22
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,31 @@
1
+ # Tika::Client
2
+
3
+ TODO: Write a gem description
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'tika-client'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install tika-client
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Contributing
26
+
27
+ 1. Fork it ( https://github.com/[my-github-username]/tika-client/fork )
28
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
29
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
30
+ 4. Push to the branch (`git push origin my-new-feature`)
31
+ 5. Create a new Pull Request
@@ -0,0 +1,76 @@
1
+ require "bundler/gem_tasks"
2
+ require "openssl"
3
+ require "net/http"
4
+
5
+ TEMP_DIR = File.absolute_path("tmp")
6
+ DOWNLOAD_DIR = TEMP_DIR
7
+ BIN_DIR = File.absolute_path("bin")
8
+ TIKA_VERSION = "1.7"
9
+ PID_FILE = File.join(TEMP_DIR, "tika-server.pid")
10
+
11
+ tika_version = ENV["TIKA_VERSION"] || TIKA_VERSION
12
+ tika_path = File.join(BIN_DIR, "tika-server.jar")
13
+ tika_server = File.basename(tika_path)
14
+ tika_download_url = "http://archive.apache.org/dist/tika/tika-server-#{tika_version}.jar"
15
+ tika_checksum_url = "#{tika_download_url}.sha"
16
+ tika_checksum_type = :SHA1
17
+
18
+ namespace :tika do
19
+ desc "Download Tika server"
20
+ task :download => [:download_dir] do
21
+ FileUtils.cd(DOWNLOAD_DIR) do
22
+ puts "Downloading Tika ... "
23
+ system "curl -L #{tika_download_url} -o #{tika_server}"
24
+ checksum = Net::HTTP.get(URI(tika_checksum_url)).chomp
25
+ puts "Verifiying checksum ... "
26
+ digest = OpenSSL::Digest.const_get(tika_checksum_type).new
27
+ digest << File.read(tika_server)
28
+ if digest.to_s != checksum
29
+ puts "Checksums do not match -- aborting!"
30
+ FileUtils.remove_entry_secure(tika_server)
31
+ abort
32
+ end
33
+ FileUtils.mv(tika_server, tika_path)
34
+ end
35
+ end
36
+
37
+ desc "Start Tika server"
38
+ task :start do
39
+ if File.exists?(tika_path)
40
+ puts "Starting Tika server ..."
41
+ File.open(PID_FILE, "w") do |pid_file|
42
+ pid = fork { exec "java -jar #{tika_path}" }
43
+ Process.detach(pid)
44
+ pid_file.write(pid)
45
+ end
46
+ else
47
+ puts "Tika server not found - run `rake tika:download'."
48
+ end
49
+ end
50
+
51
+ desc "Stop Tika server"
52
+ task :stop do
53
+ if File.exists?(PID_FILE)
54
+ puts "Stopping Tika server ..."
55
+ pid = File.read(PID_FILE).strip
56
+ Process.kill("KILL", pid.to_i)
57
+ File.unlink(PID_FILE)
58
+ else
59
+ puts "Tika server is not running or was not started by `rake tika:start' task."
60
+ end
61
+ end
62
+
63
+ desc "Check Tika server status"
64
+ task :status do
65
+ if File.exists?(PID_FILE)
66
+ pid = File.read(PID_FILE).strip
67
+ puts "Tika server is running (PID #{pid})"
68
+ else
69
+ puts "Tika server is not running or was not started by `rake tika:start' task."
70
+ end
71
+ end
72
+ end
73
+
74
+ task :download_dir do
75
+ FileUtils.mkdir(DOWNLOAD_DIR) unless Dir.exists?(DOWNLOAD_DIR)
76
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1 @@
1
+ require "tika/client"
@@ -0,0 +1,26 @@
1
+ module Tika
2
+ class Api
3
+
4
+ PUT = Net::HTTP::Put
5
+ GET = Net::HTTP::Get
6
+
7
+ JSON = "application/json"
8
+ TEXT = "text/plain"
9
+
10
+ Endpoint = Struct.new(:request_method, :path, :response_format)
11
+
12
+ ENDPOINTS = {
13
+ get_metadata: Endpoint.new(PUT, "/meta", JSON),
14
+ get_text: Endpoint.new(PUT, "/tika", TEXT)
15
+ }
16
+
17
+ def endpoint(name)
18
+ ENDPOINTS.fetch(name)
19
+ end
20
+
21
+ def has_endpoint?(name)
22
+ ENDPOINTS.include?(name)
23
+ end
24
+
25
+ end
26
+ end
@@ -0,0 +1,48 @@
1
+ require_relative "configuration"
2
+ require_relative "api"
3
+ require_relative "request"
4
+ require "forwardable"
5
+
6
+ module Tika
7
+ class Client
8
+ extend Forwardable
9
+
10
+ class << self
11
+ def config
12
+ @config ||= Configuration.new
13
+ end
14
+
15
+ def configure
16
+ yield config
17
+ end
18
+ end
19
+
20
+ attr_accessor :host, :port, :api
21
+ def_delegators :api, :endpoint, :has_endpoint?
22
+
23
+ def initialize(opts={})
24
+ @host = opts.fetch(:host, config.host)
25
+ @port = opts.fetch(:port, config.port)
26
+ @api = Api.new
27
+ end
28
+
29
+ def config
30
+ self.class.config
31
+ end
32
+
33
+ def connection
34
+ @connection ||= Net::HTTP.new(host, port)
35
+ end
36
+
37
+ def execute(name, opts={})
38
+ request = Request.new(connection, endpoint(name))
39
+ request.execute(opts)
40
+ end
41
+
42
+ def method_missing(name, *args)
43
+ return execute(name, *args) if has_endpoint?(name)
44
+ super
45
+ end
46
+
47
+ end
48
+ end
@@ -0,0 +1,16 @@
1
+ module Tika
2
+ class Configuration
3
+
4
+ DEFAULT_HOST = "localhost"
5
+ DEFAULT_PORT = 9998
6
+
7
+ attr_accessor :host
8
+ attr_accessor :port
9
+
10
+ def initialize
11
+ @host = ENV["TIKA_HOST"] || DEFAULT_HOST
12
+ @port = ENV["TIKA_PORT"] || DEFAULT_PORT
13
+ end
14
+
15
+ end
16
+ end
@@ -0,0 +1,38 @@
1
+ require "uri"
2
+ require "net/http"
3
+ require "delegate"
4
+ # require "mime-types"
5
+
6
+ module Tika
7
+ # Executes an API method
8
+ class Request < SimpleDelegator
9
+
10
+ attr_reader :connection # , :endpoint, :http_request
11
+
12
+ # def self.execute(*args)
13
+ # request = new(*args)
14
+ # yield request if block_given?
15
+ # request.execute
16
+ # end
17
+
18
+ def initialize(connection, endpoint)
19
+ @connection = connection
20
+ @endpoint = endpoint
21
+ uri = URI::HTTP.build(host: connection.address, port: connection.port, path: endpoint.path)
22
+ super endpoint.request_method.new(uri)
23
+ self["Accept"] = endpoint.response_format
24
+ end
25
+
26
+ def execute(opts={})
27
+ connection.start do |conn|
28
+ if file = opts.delete(:file)
29
+ self.body = file.read
30
+ self.content_length = file.size
31
+ end
32
+ self.content_type = opts[:content_type] if opts[:content_type]
33
+ conn.request(__getobj__)
34
+ end
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,87 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # The generated `.rspec` file contains `--require spec_helper` which will cause
4
+ # this file to always be loaded, without a need to explicitly require it in any
5
+ # files.
6
+ #
7
+ # Given that it is always loaded, you are encouraged to keep this file as
8
+ # light-weight as possible. Requiring heavyweight dependencies from this file
9
+ # will add to the boot time of your test suite on EVERY test run, even for an
10
+ # individual file that may not need all of that loaded. Instead, consider making
11
+ # a separate helper file that requires the additional dependencies and performs
12
+ # the additional setup, and require it from the spec files that actually need
13
+ # it.
14
+ #
15
+ # The `.rspec` file also contains a few flags that are not defaults but that
16
+ # users commonly want.
17
+ #
18
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
19
+ RSpec.configure do |config|
20
+ # rspec-expectations config goes here. You can use an alternate
21
+ # assertion/expectation library such as wrong or the stdlib/minitest
22
+ # assertions if you prefer.
23
+ config.expect_with :rspec do |expectations|
24
+ # This option will default to `true` in RSpec 4. It makes the `description`
25
+ # and `failure_message` of custom matchers include text for helper methods
26
+ # defined using `chain`, e.g.:
27
+ # be_bigger_than(2).and_smaller_than(4).description
28
+ # # => "be bigger than 2 and smaller than 4"
29
+ # ...rather than:
30
+ # # => "be bigger than 2"
31
+ expectations.include_chain_clauses_in_custom_matcher_descriptions = true
32
+ end
33
+
34
+ # rspec-mocks config goes here. You can use an alternate test double
35
+ # library (such as bogus or mocha) by changing the `mock_with` option here.
36
+ config.mock_with :rspec do |mocks|
37
+ # Prevents you from mocking or stubbing a method that does not exist on
38
+ # a real object. This is generally recommended, and will default to
39
+ # `true` in RSpec 4.
40
+ mocks.verify_partial_doubles = true
41
+ end
42
+
43
+ # These two settings work together to allow you to limit a spec run
44
+ # to individual examples or groups you care about by tagging them with
45
+ # `:focus` metadata. When nothing is tagged with `:focus`, all examples
46
+ # get run.
47
+ config.filter_run :focus
48
+ config.run_all_when_everything_filtered = true
49
+
50
+ # Limits the available syntax to the non-monkey patched syntax that is
51
+ # recommended. For more details, see:
52
+ # - http://myronmars.to/n/dev-blog/2012/06/rspecs-new-expectation-syntax
53
+ # - http://teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/
54
+ # - http://myronmars.to/n/dev-blog/2014/05/notable-changes-in-rspec-3#new__config_option_to_disable_rspeccore_monkey_patching
55
+ config.disable_monkey_patching!
56
+
57
+ # This setting enables warnings. It's recommended, but in some cases may
58
+ # be too noisy due to issues in dependencies.
59
+ config.warnings = true
60
+
61
+ # Many RSpec users commonly either run the entire suite or an individual
62
+ # file, and it's useful to allow more verbose output when running an
63
+ # individual spec file.
64
+ if config.files_to_run.one?
65
+ # Use the documentation formatter for detailed output,
66
+ # unless a formatter has already been configured
67
+ # (e.g. via a command-line flag).
68
+ config.default_formatter = 'doc'
69
+ end
70
+
71
+ # Print the 10 slowest examples and example groups at the
72
+ # end of the spec run, to help surface which specs are running
73
+ # particularly slow.
74
+ config.profile_examples = 10
75
+
76
+ # Run specs in random order to surface order dependencies. If you find an
77
+ # order dependency and want to debug it, you can fix the order by providing
78
+ # the seed, which is printed after each run.
79
+ # --seed 1234
80
+ config.order = :random
81
+
82
+ # Seed global randomization in this process using the `--seed` CLI option.
83
+ # Setting this allows you to use `--seed` to deterministically reproduce
84
+ # test failures related to randomization by passing the same `--seed` value
85
+ # as the one that triggered the failure.
86
+ Kernel.srand config.seed
87
+ end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "tika-client"
7
+ spec.version = File.read(File.expand_path("../VERSION", __FILE__)).chomp
8
+ spec.authors = ["dchandekstark"]
9
+ spec.email = ["dchandekstark@gmail.com"]
10
+ spec.summary = "Ruby bindings for Apache Tika Server REST API"
11
+ spec.description = "Ruby bindings for Apache Tika Server REST API"
12
+ spec.homepage = "https://github.com/duke-libraries/tika-client"
13
+ spec.license = "BSD-3-Clause"
14
+
15
+ spec.files = `git ls-files -z`.split("\x0")
16
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
17
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
+ spec.require_paths = ["lib"]
19
+
20
+ spec.required_ruby_version = "~> 2.0"
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.7"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.1"
25
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tika-client
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - dchandekstark
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-02-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.7'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.7'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.1'
55
+ description: Ruby bindings for Apache Tika Server REST API
56
+ email:
57
+ - dchandekstark@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rspec"
64
+ - Gemfile
65
+ - LICENSE
66
+ - README.md
67
+ - Rakefile
68
+ - VERSION
69
+ - lib/tika-client.rb
70
+ - lib/tika/api.rb
71
+ - lib/tika/client.rb
72
+ - lib/tika/configuration.rb
73
+ - lib/tika/request.rb
74
+ - spec/spec_helper.rb
75
+ - tika-client.gemspec
76
+ homepage: https://github.com/duke-libraries/tika-client
77
+ licenses:
78
+ - BSD-3-Clause
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '2.0'
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ">="
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: Ruby bindings for Apache Tika Server REST API
100
+ test_files:
101
+ - spec/spec_helper.rb