henkei 1.17.2 → 1.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78f85d586c1195625f6bbf0906a2b0f8f64319ce
4
- data.tar.gz: dc676d8e29199dd6659606c7ef5ec2965c456591
3
+ metadata.gz: 5d629bcc0d435522497e749752307e6c33844d29
4
+ data.tar.gz: 490b5e0c89b43f4ec83434e3861c85d69de7a8bd
5
5
  SHA512:
6
- metadata.gz: 714aa3686c0d584fbbed017442cc46a14b232ddb194260048864ba5b4265751456239cc2a396b50530af10fe6dcda4a5e38178d71a2a196d36a13bd5fc78192e
7
- data.tar.gz: ddceb92fbb74cecb0400c8d2eadba4ff236e27018e63fdba5d97562be8a6a07d336663832c2f76de46124b7e5ace27c4264b129fd92280b71dfd88bd9be0abb0
6
+ metadata.gz: 6f4e76efb3cf67bca58db9e6dc074511af3e8661b5c03800bbf28cce9939b01774c33aaa65ae24df5e46a5346cda97347e17add23e934cbaf0cbaf7c48e73246
7
+ data.tar.gz: 21cf99d84f4428f3db892aabc5827f02c848e70bb693080ac33789f4ea66d64de03f88856af7e89c14a4b783a038e33c5fba7ad6229e316f99be2cc2c0ab5fa1
data/henkei.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Henkei::VERSION
9
9
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
10
10
  spec.email = %w[erol.fornoles@gmail.com a.bromwich@gmail.com]
11
- spec.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit}
12
- spec.summary = spec.description
11
+ spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
12
+ spec.summary = 'Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
13
13
  spec.homepage = 'http://github.com/abrom/henkei'
14
14
  spec.license = 'MIT'
15
15
 
@@ -18,11 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_runtime_dependency 'mime-types', '>= 1.23'
22
- spec.add_runtime_dependency 'json', '>= 1.8'
21
+ spec.add_runtime_dependency 'mime-types', '>= 1.23', '< 4'
22
+ spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
23
23
 
24
24
  spec.add_development_dependency 'bundler', '~> 1.3'
25
- spec.add_development_dependency 'rake'
26
- spec.add_development_dependency 'rspec', '~> 3.5'
27
- spec.add_development_dependency 'simplecov'
25
+ spec.add_development_dependency 'rake', '~> 12.3'
26
+ spec.add_development_dependency 'rspec', '~> 3.7'
27
+ spec.add_development_dependency 'simplecov', '~> 0.15'
28
28
  end
@@ -0,0 +1,3 @@
1
+ <properties>
2
+ <service-loader initializableProblemHandler="ignore"/>
3
+ </properties>
data/lib/henkei.rb CHANGED
@@ -10,8 +10,9 @@ require 'socket'
10
10
  require 'stringio'
11
11
 
12
12
  class Henkei
13
- GEMPATH = File.dirname(File.dirname(__FILE__))
14
- JARPATH = File.join(Henkei::GEMPATH, 'jar', 'tika-app-1.17.jar')
13
+ GEM_PATH = File.dirname(File.dirname(__FILE__))
14
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.17.jar')
15
+ CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
15
16
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
16
17
 
17
18
  @@server_port = nil
@@ -185,21 +186,9 @@ class Henkei
185
186
  # Henkei.server(:text, 9294)
186
187
  #
187
188
  def self.server(type, custom_port=nil)
188
- switch =
189
- case type
190
- when :text
191
- '-t'
192
- when :html
193
- '-h'
194
- when :metadata
195
- '-m -j'
196
- when :mimetype
197
- '-m -j'
198
- end
199
-
200
189
  @@server_port = custom_port || DEFAULT_SERVER_PORT
201
190
 
202
- @@server_pid = Process.spawn("#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} --server --port #{@@server_port} #{switch}")
191
+ @@server_pid = Process.spawn tika_command(type, true)
203
192
  sleep(2) # Give the server 2 seconds to spin up.
204
193
  @@server_pid
205
194
  end
@@ -213,7 +202,7 @@ class Henkei
213
202
  # Henkei.server(:text)
214
203
  # reports = ["report1.docx", "report2.doc", "report3.pdf"]
215
204
  # begin
216
- # my_texts = reports.map{|report_path| Henkei.new(report_path).text }
205
+ # my_texts = reports.map{ |report_path| Henkei.new(report_path).text }
217
206
  # rescue
218
207
  # ensure
219
208
  # Henkei.kill_server!
@@ -231,27 +220,15 @@ class Henkei
231
220
 
232
221
  # Provide the path to the Java binary
233
222
  #
234
- def self.java
223
+ def self.java_path
235
224
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
236
225
  end
237
- private_class_method :java
226
+ private_class_method :java_path
238
227
 
239
228
  # Internal helper for calling to Tika library directly
240
229
  #
241
230
  def self.client_read(type, data)
242
- switch =
243
- case type
244
- when :text
245
- '-t'
246
- when :html
247
- '-h'
248
- when :metadata
249
- '-m -j'
250
- when :mimetype
251
- '-m -j'
252
- end
253
-
254
- IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
231
+ IO.popen tika_command(type), 'r+' do |io|
255
232
  io.write data
256
233
  io.close_write
257
234
  io.read
@@ -283,4 +260,25 @@ class Henkei
283
260
  resp
284
261
  end
285
262
  private_class_method :server_read
263
+
264
+ # Internal helper for building the Java command to call Tika
265
+ #
266
+ def self.tika_command(type, server = false)
267
+ command = ["#{java_path} -Djava.awt.headless=true -jar #{Henkei::JAR_PATH} --config=#{Henkei::CONFIG_PATH}"]
268
+ command << "--server --port #{@@server_port}" if server
269
+ command << switch_for_type(type)
270
+ command.join ' '
271
+ end
272
+
273
+ # Internal helper for building the Java command to call Tika
274
+ #
275
+ def self.switch_for_type(type)
276
+ case type
277
+ when :text then '-t'
278
+ when :html then '-h'
279
+ when :metadata then '-m -j'
280
+ when :mimetype then '-m -j'
281
+ end
282
+ end
283
+ private_class_method :switch_for_type
286
284
  end
@@ -1,3 +1,3 @@
1
1
  class Henkei
2
- VERSION = '1.17.2'
2
+ VERSION = '1.17.3'
3
3
  end
data/spec/henkei_spec.rb CHANGED
@@ -104,13 +104,13 @@ describe Henkei do
104
104
 
105
105
  describe '.java' do
106
106
  specify 'with no specified JAVA_HOME' do
107
- expect( Henkei.send(:java) ).to eql 'java'
107
+ expect( Henkei.send(:java_path) ).to eql 'java'
108
108
  end
109
109
 
110
110
  specify 'with a specified JAVA_HOME' do
111
111
  ENV['JAVA_HOME'] = '/path/to/java/home'
112
112
 
113
- expect( Henkei.send(:java) ).to eql '/path/to/java/home/bin/java'
113
+ expect( Henkei.send(:java_path) ).to eql '/path/to/java/home/bin/java'
114
114
  end
115
115
  end
116
116
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.17.2
4
+ version: 1.17.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-02-08 00:00:00.000000000 Z
12
+ date: 2018-03-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mime-types
@@ -18,6 +18,9 @@ dependencies:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.23'
21
+ - - "<"
22
+ - !ruby/object:Gem::Version
23
+ version: '4'
21
24
  type: :runtime
22
25
  prerelease: false
23
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -25,6 +28,9 @@ dependencies:
25
28
  - - ">="
26
29
  - !ruby/object:Gem::Version
27
30
  version: '1.23'
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: '4'
28
34
  - !ruby/object:Gem::Dependency
29
35
  name: json
30
36
  requirement: !ruby/object:Gem::Requirement
@@ -32,6 +38,9 @@ dependencies:
32
38
  - - ">="
33
39
  - !ruby/object:Gem::Version
34
40
  version: '1.8'
41
+ - - "<"
42
+ - !ruby/object:Gem::Version
43
+ version: '3'
35
44
  type: :runtime
36
45
  prerelease: false
37
46
  version_requirements: !ruby/object:Gem::Requirement
@@ -39,6 +48,9 @@ dependencies:
39
48
  - - ">="
40
49
  - !ruby/object:Gem::Version
41
50
  version: '1.8'
51
+ - - "<"
52
+ - !ruby/object:Gem::Version
53
+ version: '3'
42
54
  - !ruby/object:Gem::Dependency
43
55
  name: bundler
44
56
  requirement: !ruby/object:Gem::Requirement
@@ -57,46 +69,45 @@ dependencies:
57
69
  name: rake
58
70
  requirement: !ruby/object:Gem::Requirement
59
71
  requirements:
60
- - - ">="
72
+ - - "~>"
61
73
  - !ruby/object:Gem::Version
62
- version: '0'
74
+ version: '12.3'
63
75
  type: :development
64
76
  prerelease: false
65
77
  version_requirements: !ruby/object:Gem::Requirement
66
78
  requirements:
67
- - - ">="
79
+ - - "~>"
68
80
  - !ruby/object:Gem::Version
69
- version: '0'
81
+ version: '12.3'
70
82
  - !ruby/object:Gem::Dependency
71
83
  name: rspec
72
84
  requirement: !ruby/object:Gem::Requirement
73
85
  requirements:
74
86
  - - "~>"
75
87
  - !ruby/object:Gem::Version
76
- version: '3.5'
88
+ version: '3.7'
77
89
  type: :development
78
90
  prerelease: false
79
91
  version_requirements: !ruby/object:Gem::Requirement
80
92
  requirements:
81
93
  - - "~>"
82
94
  - !ruby/object:Gem::Version
83
- version: '3.5'
95
+ version: '3.7'
84
96
  - !ruby/object:Gem::Dependency
85
97
  name: simplecov
86
98
  requirement: !ruby/object:Gem::Requirement
87
99
  requirements:
88
- - - ">="
100
+ - - "~>"
89
101
  - !ruby/object:Gem::Version
90
- version: '0'
102
+ version: '0.15'
91
103
  type: :development
92
104
  prerelease: false
93
105
  version_requirements: !ruby/object:Gem::Requirement
94
106
  requirements:
95
- - - ">="
107
+ - - "~>"
96
108
  - !ruby/object:Gem::Version
97
- version: '0'
98
- description: Read text and metadata from files and documents (.doc, .docx, .pages,
99
- .odt, .rtf, .pdf) using Apache Tika toolkit
109
+ version: '0.15'
110
+ description: Read text and metadata from files and documents using Apache Tika toolkit
100
111
  email:
101
112
  - erol.fornoles@gmail.com
102
113
  - a.bromwich@gmail.com
@@ -114,6 +125,7 @@ files:
114
125
  - Rakefile
115
126
  - henkei.gemspec
116
127
  - jar/tika-app-1.17.jar
128
+ - jar/tika-config.xml
117
129
  - lib/henkei.rb
118
130
  - lib/henkei/version.rb
119
131
  - lib/henkei/yomu.rb