henkei 1.17.2 → 1.17.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 78f85d586c1195625f6bbf0906a2b0f8f64319ce
4
- data.tar.gz: dc676d8e29199dd6659606c7ef5ec2965c456591
3
+ metadata.gz: 5d629bcc0d435522497e749752307e6c33844d29
4
+ data.tar.gz: 490b5e0c89b43f4ec83434e3861c85d69de7a8bd
5
5
  SHA512:
6
- metadata.gz: 714aa3686c0d584fbbed017442cc46a14b232ddb194260048864ba5b4265751456239cc2a396b50530af10fe6dcda4a5e38178d71a2a196d36a13bd5fc78192e
7
- data.tar.gz: ddceb92fbb74cecb0400c8d2eadba4ff236e27018e63fdba5d97562be8a6a07d336663832c2f76de46124b7e5ace27c4264b129fd92280b71dfd88bd9be0abb0
6
+ metadata.gz: 6f4e76efb3cf67bca58db9e6dc074511af3e8661b5c03800bbf28cce9939b01774c33aaa65ae24df5e46a5346cda97347e17add23e934cbaf0cbaf7c48e73246
7
+ data.tar.gz: 21cf99d84f4428f3db892aabc5827f02c848e70bb693080ac33789f4ea66d64de03f88856af7e89c14a4b783a038e33c5fba7ad6229e316f99be2cc2c0ab5fa1
data/henkei.gemspec CHANGED
@@ -8,8 +8,8 @@ Gem::Specification.new do |spec|
8
8
  spec.version = Henkei::VERSION
9
9
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
10
10
  spec.email = %w[erol.fornoles@gmail.com a.bromwich@gmail.com]
11
- spec.description = %q{Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit}
12
- spec.summary = spec.description
11
+ spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
12
+ spec.summary = 'Read text and metadata from files and documents (.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
13
13
  spec.homepage = 'http://github.com/abrom/henkei'
14
14
  spec.license = 'MIT'
15
15
 
@@ -18,11 +18,11 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
- spec.add_runtime_dependency 'mime-types', '>= 1.23'
22
- spec.add_runtime_dependency 'json', '>= 1.8'
21
+ spec.add_runtime_dependency 'mime-types', '>= 1.23', '< 4'
22
+ spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
23
23
 
24
24
  spec.add_development_dependency 'bundler', '~> 1.3'
25
- spec.add_development_dependency 'rake'
26
- spec.add_development_dependency 'rspec', '~> 3.5'
27
- spec.add_development_dependency 'simplecov'
25
+ spec.add_development_dependency 'rake', '~> 12.3'
26
+ spec.add_development_dependency 'rspec', '~> 3.7'
27
+ spec.add_development_dependency 'simplecov', '~> 0.15'
28
28
  end
@@ -0,0 +1,3 @@
1
+ <properties>
2
+ <service-loader initializableProblemHandler="ignore"/>
3
+ </properties>
data/lib/henkei.rb CHANGED
@@ -10,8 +10,9 @@ require 'socket'
10
10
  require 'stringio'
11
11
 
12
12
  class Henkei
13
- GEMPATH = File.dirname(File.dirname(__FILE__))
14
- JARPATH = File.join(Henkei::GEMPATH, 'jar', 'tika-app-1.17.jar')
13
+ GEM_PATH = File.dirname(File.dirname(__FILE__))
14
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.17.jar')
15
+ CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
15
16
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
16
17
 
17
18
  @@server_port = nil
@@ -185,21 +186,9 @@ class Henkei
185
186
  # Henkei.server(:text, 9294)
186
187
  #
187
188
  def self.server(type, custom_port=nil)
188
- switch =
189
- case type
190
- when :text
191
- '-t'
192
- when :html
193
- '-h'
194
- when :metadata
195
- '-m -j'
196
- when :mimetype
197
- '-m -j'
198
- end
199
-
200
189
  @@server_port = custom_port || DEFAULT_SERVER_PORT
201
190
 
202
- @@server_pid = Process.spawn("#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} --server --port #{@@server_port} #{switch}")
191
+ @@server_pid = Process.spawn tika_command(type, true)
203
192
  sleep(2) # Give the server 2 seconds to spin up.
204
193
  @@server_pid
205
194
  end
@@ -213,7 +202,7 @@ class Henkei
213
202
  # Henkei.server(:text)
214
203
  # reports = ["report1.docx", "report2.doc", "report3.pdf"]
215
204
  # begin
216
- # my_texts = reports.map{|report_path| Henkei.new(report_path).text }
205
+ # my_texts = reports.map{ |report_path| Henkei.new(report_path).text }
217
206
  # rescue
218
207
  # ensure
219
208
  # Henkei.kill_server!
@@ -231,27 +220,15 @@ class Henkei
231
220
 
232
221
  # Provide the path to the Java binary
233
222
  #
234
- def self.java
223
+ def self.java_path
235
224
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
236
225
  end
237
- private_class_method :java
226
+ private_class_method :java_path
238
227
 
239
228
  # Internal helper for calling to Tika library directly
240
229
  #
241
230
  def self.client_read(type, data)
242
- switch =
243
- case type
244
- when :text
245
- '-t'
246
- when :html
247
- '-h'
248
- when :metadata
249
- '-m -j'
250
- when :mimetype
251
- '-m -j'
252
- end
253
-
254
- IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
231
+ IO.popen tika_command(type), 'r+' do |io|
255
232
  io.write data
256
233
  io.close_write
257
234
  io.read
@@ -283,4 +260,25 @@ class Henkei
283
260
  resp
284
261
  end
285
262
  private_class_method :server_read
263
+
264
+ # Internal helper for building the Java command to call Tika
265
+ #
266
+ def self.tika_command(type, server = false)
267
+ command = ["#{java_path} -Djava.awt.headless=true -jar #{Henkei::JAR_PATH} --config=#{Henkei::CONFIG_PATH}"]
268
+ command << "--server --port #{@@server_port}" if server
269
+ command << switch_for_type(type)
270
+ command.join ' '
271
+ end
272
+
273
+ # Internal helper for building the Java command to call Tika
274
+ #
275
+ def self.switch_for_type(type)
276
+ case type
277
+ when :text then '-t'
278
+ when :html then '-h'
279
+ when :metadata then '-m -j'
280
+ when :mimetype then '-m -j'
281
+ end
282
+ end
283
+ private_class_method :switch_for_type
286
284
  end
@@ -1,3 +1,3 @@
1
1
  class Henkei
2
- VERSION = '1.17.2'
2
+ VERSION = '1.17.3'
3
3
  end
data/spec/henkei_spec.rb CHANGED
@@ -104,13 +104,13 @@ describe Henkei do
104
104
 
105
105
  describe '.java' do
106
106
  specify 'with no specified JAVA_HOME' do
107
- expect( Henkei.send(:java) ).to eql 'java'
107
+ expect( Henkei.send(:java_path) ).to eql 'java'
108
108
  end
109
109
 
110
110
  specify 'with a specified JAVA_HOME' do
111
111
  ENV['JAVA_HOME'] = '/path/to/java/home'
112
112
 
113
- expect( Henkei.send(:java) ).to eql '/path/to/java/home/bin/java'
113
+ expect( Henkei.send(:java_path) ).to eql '/path/to/java/home/bin/java'
114
114
  end
115
115
  end
116
116
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.17.2
4
+ version: 1.17.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-02-08 00:00:00.000000000 Z
12
+ date: 2018-03-07 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mime-types
@@ -18,6 +18,9 @@ dependencies:
18
18
  - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.23'
21
+ - - "<"
22
+ - !ruby/object:Gem::Version
23
+ version: '4'
21
24
  type: :runtime
22
25
  prerelease: false
23
26
  version_requirements: !ruby/object:Gem::Requirement
@@ -25,6 +28,9 @@ dependencies:
25
28
  - - ">="
26
29
  - !ruby/object:Gem::Version
27
30
  version: '1.23'
31
+ - - "<"
32
+ - !ruby/object:Gem::Version
33
+ version: '4'
28
34
  - !ruby/object:Gem::Dependency
29
35
  name: json
30
36
  requirement: !ruby/object:Gem::Requirement
@@ -32,6 +38,9 @@ dependencies:
32
38
  - - ">="
33
39
  - !ruby/object:Gem::Version
34
40
  version: '1.8'
41
+ - - "<"
42
+ - !ruby/object:Gem::Version
43
+ version: '3'
35
44
  type: :runtime
36
45
  prerelease: false
37
46
  version_requirements: !ruby/object:Gem::Requirement
@@ -39,6 +48,9 @@ dependencies:
39
48
  - - ">="
40
49
  - !ruby/object:Gem::Version
41
50
  version: '1.8'
51
+ - - "<"
52
+ - !ruby/object:Gem::Version
53
+ version: '3'
42
54
  - !ruby/object:Gem::Dependency
43
55
  name: bundler
44
56
  requirement: !ruby/object:Gem::Requirement
@@ -57,46 +69,45 @@ dependencies:
57
69
  name: rake
58
70
  requirement: !ruby/object:Gem::Requirement
59
71
  requirements:
60
- - - ">="
72
+ - - "~>"
61
73
  - !ruby/object:Gem::Version
62
- version: '0'
74
+ version: '12.3'
63
75
  type: :development
64
76
  prerelease: false
65
77
  version_requirements: !ruby/object:Gem::Requirement
66
78
  requirements:
67
- - - ">="
79
+ - - "~>"
68
80
  - !ruby/object:Gem::Version
69
- version: '0'
81
+ version: '12.3'
70
82
  - !ruby/object:Gem::Dependency
71
83
  name: rspec
72
84
  requirement: !ruby/object:Gem::Requirement
73
85
  requirements:
74
86
  - - "~>"
75
87
  - !ruby/object:Gem::Version
76
- version: '3.5'
88
+ version: '3.7'
77
89
  type: :development
78
90
  prerelease: false
79
91
  version_requirements: !ruby/object:Gem::Requirement
80
92
  requirements:
81
93
  - - "~>"
82
94
  - !ruby/object:Gem::Version
83
- version: '3.5'
95
+ version: '3.7'
84
96
  - !ruby/object:Gem::Dependency
85
97
  name: simplecov
86
98
  requirement: !ruby/object:Gem::Requirement
87
99
  requirements:
88
- - - ">="
100
+ - - "~>"
89
101
  - !ruby/object:Gem::Version
90
- version: '0'
102
+ version: '0.15'
91
103
  type: :development
92
104
  prerelease: false
93
105
  version_requirements: !ruby/object:Gem::Requirement
94
106
  requirements:
95
- - - ">="
107
+ - - "~>"
96
108
  - !ruby/object:Gem::Version
97
- version: '0'
98
- description: Read text and metadata from files and documents (.doc, .docx, .pages,
99
- .odt, .rtf, .pdf) using Apache Tika toolkit
109
+ version: '0.15'
110
+ description: Read text and metadata from files and documents using Apache Tika toolkit
100
111
  email:
101
112
  - erol.fornoles@gmail.com
102
113
  - a.bromwich@gmail.com
@@ -114,6 +125,7 @@ files:
114
125
  - Rakefile
115
126
  - henkei.gemspec
116
127
  - jar/tika-app-1.17.jar
128
+ - jar/tika-config.xml
117
129
  - lib/henkei.rb
118
130
  - lib/henkei/version.rb
119
131
  - lib/henkei/yomu.rb