tika-client 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3bb963d542e83e8d690b3e243bf05a155ec22391
4
- data.tar.gz: 7776e59b7a1210326b8006f3891dfb78f14cf9c8
3
+ metadata.gz: 2657a16d46c66487c7e90bb41b17bc18b178e972
4
+ data.tar.gz: 9386b75938dd03c8622c7c8efb2dd641b3627f52
5
5
  SHA512:
6
- metadata.gz: 09198bec1ef7e2f79d0bee0b2b7762dc438ef3e5f821f52c000f297526a82d6e145d46a8e5b6ad17c9244f1dda5a978e96c9cb0bdc0aacf90269438165f16a5a
7
- data.tar.gz: e058fd18fb370a579d21a58a1c227313d81000bc5a6e27344fb42a0c6a14c6a89644070fc435a5f6a68d1432f42db2ae77ee84848565c981fadaf98f16a08e2e
6
+ metadata.gz: 6a205ff0d6422c6098c904f1ed2fa458367de5b991ca8197b54f6fbf64ea093ba5c3a7f3cd7ee85e3d34c9b042dca3df40ee94c4cbd327e0a8da502c7e058f37
7
+ data.tar.gz: 507f8ebc7ed4aeb4d6171450f7cf8b96b434dc903cc560dbfe3f8d49969474b81826a02ce7bcbf8fbb4ef1a6dac6d7a4bfb64a8942c62914824a781fb3f5e0cb
data/Rakefile CHANGED
@@ -1,4 +1,5 @@
1
1
  require "bundler/gem_tasks"
2
+ require "rspec/core/rake_task"
2
3
  require "openssl"
3
4
  require "net/http"
4
5
 
@@ -36,7 +37,10 @@ namespace :tika do
36
37
 
37
38
  desc "Start Tika server"
38
39
  task :start do
39
- if File.exists?(tika_path)
40
+ if File.exists?(PID_FILE)
41
+ pid = File.read(PID_FILE).strip
42
+ puts "Tika server is already running (PID #{pid})"
43
+ elsif File.exists?(tika_path)
40
44
  puts "Starting Tika server ..."
41
45
  File.open(PID_FILE, "w") do |pid_file|
42
46
  pid = fork { exec "java -jar #{tika_path}" }
@@ -74,3 +78,8 @@ end
74
78
  task :download_dir do
75
79
  FileUtils.mkdir(DOWNLOAD_DIR) unless Dir.exists?(DOWNLOAD_DIR)
76
80
  end
81
+
82
+ desc "Run all specs in spec directory"
83
+ RSpec::Core::RakeTask.new(:spec)
84
+
85
+ task default: :spec
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.1
1
+ 0.2.0
@@ -1,3 +1,4 @@
1
+ require "net/http"
1
2
  require_relative "configuration"
2
3
  require_relative "requests"
3
4
 
@@ -19,8 +20,8 @@ module Tika
19
20
  attr_reader :host, :port
20
21
 
21
22
  def initialize(opts={})
22
- @host = opts.fetch(:host, config.host)
23
- @port = opts.fetch(:port, config.port)
23
+ @host = opts.fetch(:host, self.class.config.host)
24
+ @port = opts.fetch(:port, self.class.config.port)
24
25
  end
25
26
 
26
27
  def get_text(opts={})
@@ -31,12 +32,28 @@ module Tika
31
32
  GetMetadataRequest.execute(connection, opts)
32
33
  end
33
34
 
34
- private
35
+ def get_version
36
+ GetVersionRequest.execute(connection)
37
+ end
35
38
 
36
- def config
37
- self.class.config
39
+ def get_mime_types
40
+ GetMimeTypesRequest.execute(connection)
38
41
  end
39
42
 
43
+ def get_parsers
44
+ GetParsersRequest.execute(connection)
45
+ end
46
+
47
+ def get_parsers_details
48
+ GetParsersDetailsRequest.execute(connection)
49
+ end
50
+
51
+ def get_detectors
52
+ GetDetectorsRequest.execute(connection)
53
+ end
54
+
55
+ private
56
+
40
57
  def connection
41
58
  @connection ||= Net::HTTP.new(host, port)
42
59
  end
@@ -1,59 +1,79 @@
1
1
  require "uri"
2
- require "net/http"
3
2
  require "delegate"
4
3
 
5
4
  module Tika
6
5
  class Request < SimpleDelegator
7
6
 
8
7
  class << self
9
- attr_accessor :endpoint
8
+ attr_accessor :http_method, :path
9
+
10
+ def headers
11
+ {}
12
+ end
10
13
  end
11
14
 
12
- attr_reader :connection
15
+ attr_reader :connection, :options
13
16
 
14
- def self.execute(connection, opts={})
15
- request = new(connection)
16
- yield request if block_given?
17
- request.execute(opts)
17
+ def self.execute(connection, options={})
18
+ request = new(connection, options)
19
+ request.execute
18
20
  end
19
21
 
20
- def initialize(connection)
22
+ def initialize(connection, options={})
21
23
  @connection = connection
24
+ @options = options
22
25
  super build_request
23
- set_defaults
26
+ handle_options
24
27
  post_initialize
25
28
  end
26
29
 
27
- def execute(opts={})
28
- connection.start do |conn|
29
- if file = opts.delete(:file)
30
- self.body = file.read
31
- self.content_length = file.size
32
- end
33
- self.content_type = opts[:content_type] if opts[:content_type]
34
- yield self if block_given?
35
- conn.request(__getobj__)
36
- end
30
+ def execute
31
+ response = connection.start { |conn| conn.request(__getobj__) }
32
+ handle_response(response)
37
33
  end
38
34
 
39
- def endpoint
40
- self.class.endpoint
35
+ def uri
36
+ @uri ||= URI::HTTP.build(host: connection.address, port: connection.port, path: self.class.path)
41
37
  end
42
38
 
43
- def uri
44
- @uri ||= URI::HTTP.build(host: connection.address, port: connection.port, path: endpoint.path)
39
+ def handle_response(response)
40
+ response.body
45
41
  end
46
42
 
47
43
  private
48
44
 
49
- def post_initialize; end
45
+ def handle_options
46
+ add_file if file
47
+ set_content_type
48
+ add_headers
49
+ end
50
50
 
51
- def build_request
52
- endpoint.request_method.new(uri)
51
+ def set_content_type
52
+ self.content_type = options[:content_type] if options[:content_type]
53
+ end
54
+
55
+ def add_file
56
+ self.body = file.read
57
+ self.content_length = file.size
53
58
  end
54
59
 
55
- def set_defaults
56
- self["Accept"] = endpoint.response_format
60
+ def file
61
+ options[:file]
62
+ end
63
+
64
+ def headers
65
+ @headers ||= self.class.headers.merge options.fetch(:headers, {})
66
+ end
67
+
68
+ def add_headers
69
+ headers.each { |header, value| self[header] = value }
70
+ end
71
+
72
+ # Subclass hook
73
+ def post_initialize; end
74
+
75
+ def build_request
76
+ self.class.http_method.new(uri)
57
77
  end
58
78
 
59
79
  end
@@ -1,19 +1,63 @@
1
+ require "json"
2
+ require "net/http"
1
3
  require_relative "request"
2
- require_relative "endpoints"
3
4
 
4
5
  module Tika
5
6
  module Requests
6
7
 
7
- include Endpoints
8
+ PUT = Net::HTTP::Put
9
+ GET = Net::HTTP::Get
8
10
 
9
- def self.request_class(endpoint)
10
- klass = Class.new(Request)
11
- klass.endpoint = endpoint
12
- klass
11
+ class TextRequest < Request
12
+ def self.headers
13
+ {"Accept" => "text/plain"}
14
+ end
13
15
  end
14
16
 
15
- GetTextRequest = request_class GetTextEndpoint
16
- GetMetadataRequest = request_class GetMetadataEndpoint
17
+ class JSONRequest < Request
18
+ def self.headers
19
+ {"Accept" => "application/json"}
20
+ end
21
+
22
+ def handle_response(response)
23
+ JSON.load(response.body)
24
+ end
25
+ end
26
+
27
+ class GetTextRequest < TextRequest
28
+ self.http_method = PUT
29
+ self.path = "/tika"
30
+ end
31
+
32
+ class GetMetadataRequest < JSONRequest
33
+ self.http_method = PUT
34
+ self.path = "/meta"
35
+ end
36
+
37
+ class GetVersionRequest < Request
38
+ self.http_method = GET
39
+ self.path = "/version"
40
+ end
41
+
42
+ class GetMimeTypesRequest < JSONRequest
43
+ self.http_method = GET
44
+ self.path = "/mime-types"
45
+ end
46
+
47
+ class GetParsersRequest < JSONRequest
48
+ self.http_method = GET
49
+ self.path = "/parsers"
50
+ end
51
+
52
+ class GetParsersDetailsRequest < JSONRequest
53
+ self.http_method = GET
54
+ self.path = "/parsers/details"
55
+ end
56
+
57
+ class GetDetectorsRequest < JSONRequest
58
+ self.http_method = GET
59
+ self.path = "/detectors"
60
+ end
17
61
 
18
62
  end
19
63
  end
@@ -1,3 +1,7 @@
1
+ require "tika-client"
2
+
3
+ FIXTURE_DIR = File.expand_path("../fixtures", __FILE__)
4
+
1
5
  # This file was generated by the `rspec --init` command. Conventionally, all
2
6
  # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
7
  # The generated `.rspec` file contains `--require spec_helper` which will cause
@@ -0,0 +1,51 @@
1
+ module Tika
2
+ RSpec.describe Client do
3
+
4
+ describe "#get_text" do
5
+ let(:file) { File.new(File.join(FIXTURE_DIR, "Lorem_ipsum.docx")) }
6
+ it "should return the text of the file" do
7
+ text = subject.get_text(file: file, content_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
8
+ expect(text).to match(/^Lorem ipsum/)
9
+ end
10
+ end
11
+
12
+ describe "#get_metadata" do
13
+ let(:file) { File.new(File.join(FIXTURE_DIR, "Lorem_ipsum.pdf")) }
14
+ it "should return the metadata of the file" do
15
+ metadata = subject.get_metadata(file: file, content_type: "application/pdf")
16
+ expect(metadata["Creation-Date"]).to eq("2015-02-15T01:54:41Z")
17
+ end
18
+ end
19
+
20
+ describe "#get_version" do
21
+ it "should return the Tika server version" do
22
+ expect(subject.get_version).to match(/^Apache Tika/)
23
+ end
24
+ end
25
+
26
+ describe "#get_mime_types" do
27
+ it "should return the MIME Types support by the Tika server" do
28
+ expect(subject.get_mime_types).to have_key("application/pdf")
29
+ end
30
+ end
31
+
32
+ describe "#get_parsers" do
33
+ it "should return the parsers available to the Tika server" do
34
+ expect(subject.get_parsers["name"]).to eq("org.apache.tika.parser.DefaultParser")
35
+ end
36
+ end
37
+
38
+ describe "#get_parsers_details" do
39
+ it "should return the parsers available to the Tika server and the MIME types they support" do
40
+ expect(subject.get_parsers_details["name"]).to eq("org.apache.tika.parser.DefaultParser")
41
+ end
42
+ end
43
+
44
+ describe "#get_detectors" do
45
+ it "should return the detectors available to the Tika server" do
46
+ expect(subject.get_detectors["name"]).to eq("org.apache.tika.detect.DefaultDetector")
47
+ end
48
+ end
49
+
50
+ end
51
+ end
@@ -0,0 +1,10 @@
1
+ module Tika
2
+ RSpec.describe Request do
3
+
4
+ subject { described_class.new(connection, options) }
5
+ let(:connection) { Net::HTTP.new("localhost", 9998) }
6
+
7
+
8
+
9
+ end
10
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: tika-client
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - dchandekstark
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-02-14 00:00:00.000000000 Z
11
+ date: 2015-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -69,10 +69,15 @@ files:
69
69
  - lib/tika-client.rb
70
70
  - lib/tika/client.rb
71
71
  - lib/tika/configuration.rb
72
- - lib/tika/endpoints.rb
73
72
  - lib/tika/request.rb
74
73
  - lib/tika/requests.rb
74
+ - spec/fixtures/Lorem_ipsum.docx
75
+ - spec/fixtures/Lorem_ipsum.pdf
76
+ - spec/fixtures/Lorem_ipsum.png
77
+ - spec/fixtures/Lorem_ipsum.tiff
75
78
  - spec/spec_helper.rb
79
+ - spec/unit/client_spec.rb
80
+ - spec/unit/request_spec.rb
76
81
  - tika-client.gemspec
77
82
  homepage: https://github.com/duke-libraries/tika-client
78
83
  licenses:
@@ -99,4 +104,10 @@ signing_key:
99
104
  specification_version: 4
100
105
  summary: Ruby bindings for Apache Tika Server REST API
101
106
  test_files:
107
+ - spec/fixtures/Lorem_ipsum.docx
108
+ - spec/fixtures/Lorem_ipsum.pdf
109
+ - spec/fixtures/Lorem_ipsum.png
110
+ - spec/fixtures/Lorem_ipsum.tiff
102
111
  - spec/spec_helper.rb
112
+ - spec/unit/client_spec.rb
113
+ - spec/unit/request_spec.rb
@@ -1,18 +0,0 @@
1
- require "net/http"
2
-
3
- module Tika
4
- module Endpoints
5
-
6
- PUT = Net::HTTP::Put
7
- GET = Net::HTTP::Get
8
-
9
- JSON = "application/json"
10
- TEXT = "text/plain"
11
-
12
- Endpoint = Struct.new(:request_method, :path, :response_format)
13
-
14
- GetTextEndpoint = Endpoint.new(PUT, "/tika", TEXT)
15
- GetMetadataEndpoint = Endpoint.new(PUT, "/meta", JSON)
16
-
17
- end
18
- end