henkei 1.23.1 → 1.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +42 -0
- data/.travis.yml +3 -2
- data/henkei.gemspec +2 -1
- data/lib/henkei.rb +38 -19
- data/lib/henkei/configuration.rb +21 -0
- data/lib/henkei/version.rb +1 -1
- data/spec/henkei_spec.rb +1 -1
- metadata +12 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 97b9b37da4e96281569a16e469d23a00c41cd35bbd15e5a7c1d1025ea2862f46
|
4
|
+
data.tar.gz: 45a7c3ae645fc417ae6064a7237d87ae7e56c3c20392f83cad42f9fdafd569e8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c323d5f5c2056bedce26e05b40d70eecd4f4fff6f8ae24362dfc80952e788f33454b8a7be6a375c54001694e6a2fbe2d7dfb60392dc7324b865d056e99637d78
|
7
|
+
data.tar.gz: fe9c6ee2a7033d55d8ccc36a3d3a1cc78fd459654314096fa087c8373544deaa291952b0303db11f362a7f758028acf11692f7b25ca132d7cb4c6603a10a5a9d
|
data/.rubocop.yml
CHANGED
@@ -1,6 +1,27 @@
|
|
1
|
+
AllCops:
|
2
|
+
NewCops: enable
|
3
|
+
|
4
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
5
|
+
Enabled: true
|
6
|
+
|
1
7
|
Layout/LineLength:
|
2
8
|
Max: 120
|
3
9
|
|
10
|
+
Layout/SpaceAroundMethodCallOperator:
|
11
|
+
Enabled: true
|
12
|
+
|
13
|
+
Lint/DeprecatedOpenSSLConstant:
|
14
|
+
Enabled: true
|
15
|
+
|
16
|
+
Lint/MixedRegexpCaptureTypes:
|
17
|
+
Enabled: true
|
18
|
+
|
19
|
+
Lint/RaiseException:
|
20
|
+
Enabled: true
|
21
|
+
|
22
|
+
Lint/StructNewOverride:
|
23
|
+
Enabled: true
|
24
|
+
|
4
25
|
Metrics/BlockLength:
|
5
26
|
Exclude:
|
6
27
|
- 'spec/**/*'
|
@@ -13,3 +34,24 @@ Style/ClassVars:
|
|
13
34
|
|
14
35
|
Style/DoubleNegation:
|
15
36
|
Enabled: false
|
37
|
+
|
38
|
+
Style/ExponentialNotation:
|
39
|
+
Enabled: true
|
40
|
+
|
41
|
+
Style/HashEachMethods:
|
42
|
+
Enabled: true
|
43
|
+
|
44
|
+
Style/HashTransformKeys:
|
45
|
+
Enabled: true
|
46
|
+
|
47
|
+
Style/HashTransformValues:
|
48
|
+
Enabled: true
|
49
|
+
|
50
|
+
Style/RedundantRegexpCharacterClass:
|
51
|
+
Enabled: true
|
52
|
+
|
53
|
+
Style/RedundantRegexpEscape:
|
54
|
+
Enabled: true
|
55
|
+
|
56
|
+
Style/SlicingWithRange:
|
57
|
+
Enabled: true
|
data/.travis.yml
CHANGED
@@ -7,10 +7,11 @@ env:
|
|
7
7
|
|
8
8
|
language: ruby
|
9
9
|
rvm:
|
10
|
-
- 2.3
|
11
10
|
- 2.4
|
12
11
|
- 2.5
|
13
12
|
- 2.6
|
13
|
+
- 2.7
|
14
|
+
- 3.0
|
14
15
|
|
15
16
|
before_install:
|
16
17
|
- gem update bundler
|
@@ -25,7 +26,7 @@ before_script:
|
|
25
26
|
- ./cc-test-reporter before-build
|
26
27
|
|
27
28
|
script:
|
28
|
-
- rubocop
|
29
|
+
- bundle exec rubocop
|
29
30
|
- bundle exec rspec
|
30
31
|
|
31
32
|
after_script:
|
data/henkei.gemspec
CHANGED
@@ -15,6 +15,7 @@ Gem::Specification.new do |spec|
|
|
15
15
|
'(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
|
16
16
|
spec.homepage = 'http://github.com/abrom/henkei'
|
17
17
|
spec.license = 'MIT'
|
18
|
+
spec.required_ruby_version = ['>= 2.4.0', '< 3.1.0']
|
18
19
|
|
19
20
|
spec.files = `git ls-files`.split("\n")
|
20
21
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
@@ -22,7 +23,7 @@ Gem::Specification.new do |spec|
|
|
22
23
|
spec.require_paths = ['lib']
|
23
24
|
|
24
25
|
spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
|
25
|
-
spec.add_runtime_dependency '
|
26
|
+
spec.add_runtime_dependency 'mini_mime', '>= 0.1.1', '< 1'
|
26
27
|
|
27
28
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
28
29
|
spec.add_development_dependency 'rails', '~> 5.0'
|
data/lib/henkei.rb
CHANGED
@@ -2,9 +2,18 @@
|
|
2
2
|
|
3
3
|
require 'henkei/version'
|
4
4
|
require 'henkei/yomu'
|
5
|
+
require 'henkei/configuration'
|
5
6
|
|
6
7
|
require 'net/http'
|
7
|
-
require '
|
8
|
+
require 'mini_mime'
|
9
|
+
|
10
|
+
# require 'mime/types' if available
|
11
|
+
begin
|
12
|
+
require 'mime/types'
|
13
|
+
rescue LoadError
|
14
|
+
nil
|
15
|
+
end
|
16
|
+
|
8
17
|
require 'time'
|
9
18
|
require 'json'
|
10
19
|
|
@@ -23,6 +32,18 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
23
32
|
@@server_port = nil
|
24
33
|
@@server_pid = nil
|
25
34
|
|
35
|
+
def self.mimetype(content_type)
|
36
|
+
if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
|
37
|
+
warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
|
38
|
+
' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
|
39
|
+
MIME::Types[content_type].first
|
40
|
+
else
|
41
|
+
MiniMime.lookup_by_content_type(content_type).tap do |object|
|
42
|
+
object.define_singleton_method(:extensions) { [extension] }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
26
47
|
# Read text or metadata from a data buffer.
|
27
48
|
#
|
28
49
|
# data = File.read 'sample.pages'
|
@@ -36,7 +57,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
36
57
|
when :text then result
|
37
58
|
when :html then result
|
38
59
|
when :metadata then JSON.parse(result)
|
39
|
-
when :mimetype then
|
60
|
+
when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
|
40
61
|
end
|
41
62
|
end
|
42
63
|
|
@@ -112,9 +133,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
112
133
|
def mimetype
|
113
134
|
return @mimetype if defined? @mimetype
|
114
135
|
|
115
|
-
|
116
|
-
|
117
|
-
@mimetype = MIME::Types[type].first
|
136
|
+
content_type = metadata['Content-Type'].is_a?(Array) ? metadata['Content-Type'].first : metadata['Content-Type']
|
137
|
+
@mimetype = Henkei.mimetype(content_type)
|
118
138
|
end
|
119
139
|
|
120
140
|
# Returns +true+ if the Henkei document was specified using a file path.
|
@@ -186,7 +206,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
186
206
|
def self.server(type, custom_port = nil)
|
187
207
|
@@server_port = custom_port || DEFAULT_SERVER_PORT
|
188
208
|
|
189
|
-
@@server_pid = Process.spawn
|
209
|
+
@@server_pid = Process.spawn(*tika_command(type, server: true))
|
190
210
|
sleep(2) # Give the server 2 seconds to spin up.
|
191
211
|
@@server_pid
|
192
212
|
end
|
@@ -219,14 +239,14 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
219
239
|
# Provide the path to the Java binary
|
220
240
|
#
|
221
241
|
def self.java_path
|
222
|
-
ENV['JAVA_HOME'] ? ENV['JAVA_HOME']
|
242
|
+
ENV['JAVA_HOME'] ? "#{ENV['JAVA_HOME']}/bin/java" : 'java'
|
223
243
|
end
|
224
244
|
private_class_method :java_path
|
225
245
|
|
226
246
|
# Internal helper for calling to Tika library directly
|
227
247
|
#
|
228
248
|
def self.client_read(type, data)
|
229
|
-
Open3.capture2(tika_command(type), stdin_data: data, binmode: true).first
|
249
|
+
Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
|
230
250
|
end
|
231
251
|
private_class_method :client_read
|
232
252
|
|
@@ -259,23 +279,22 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
259
279
|
|
260
280
|
# Internal helper for building the Java command to call Tika
|
261
281
|
#
|
262
|
-
def self.tika_command(type, server
|
263
|
-
command = [
|
264
|
-
command
|
265
|
-
command
|
266
|
-
command.join ' '
|
282
|
+
def self.tika_command(type, server: false)
|
283
|
+
command = [java_path, '-Djava.awt.headless=true', '-jar', Henkei::JAR_PATH, "--config=#{Henkei::CONFIG_PATH}"]
|
284
|
+
command += ['--server', '--port', @@server_port.to_s] if server
|
285
|
+
command + switch_for_type(type)
|
267
286
|
end
|
268
287
|
private_class_method :tika_command
|
269
288
|
|
270
289
|
# Internal helper for building the Java command to call Tika
|
271
290
|
#
|
272
291
|
def self.switch_for_type(type)
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
292
|
+
{
|
293
|
+
text: ['-t'],
|
294
|
+
html: ['-h'],
|
295
|
+
metadata: %w[-m -j],
|
296
|
+
mimetype: %w[-m -j]
|
297
|
+
}[type]
|
279
298
|
end
|
280
299
|
private_class_method :switch_for_type
|
281
300
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Henkei monkey patch for configuration support
|
4
|
+
class Henkei
|
5
|
+
def self.configuration
|
6
|
+
@configuration ||= Configuration.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.configure
|
10
|
+
yield(configuration)
|
11
|
+
end
|
12
|
+
|
13
|
+
# Handle Henkei configuration
|
14
|
+
class Configuration
|
15
|
+
attr_accessor :mime_library
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@mime_library = 'mime/types'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/henkei/version.rb
CHANGED
data/spec/henkei_spec.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: henkei
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.23.
|
4
|
+
version: 1.23.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erol Fornoles
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-02-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
@@ -32,25 +32,25 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: '3'
|
34
34
|
- !ruby/object:Gem::Dependency
|
35
|
-
name:
|
35
|
+
name: mini_mime
|
36
36
|
requirement: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 0.1.1
|
41
41
|
- - "<"
|
42
42
|
- !ruby/object:Gem::Version
|
43
|
-
version: '
|
43
|
+
version: '1'
|
44
44
|
type: :runtime
|
45
45
|
prerelease: false
|
46
46
|
version_requirements: !ruby/object:Gem::Requirement
|
47
47
|
requirements:
|
48
48
|
- - ">="
|
49
49
|
- !ruby/object:Gem::Version
|
50
|
-
version:
|
50
|
+
version: 0.1.1
|
51
51
|
- - "<"
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '
|
53
|
+
version: '1'
|
54
54
|
- !ruby/object:Gem::Dependency
|
55
55
|
name: bundler
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
@@ -158,6 +158,7 @@ files:
|
|
158
158
|
- jar/tika-app-1.23.jar
|
159
159
|
- jar/tika-config.xml
|
160
160
|
- lib/henkei.rb
|
161
|
+
- lib/henkei/configuration.rb
|
161
162
|
- lib/henkei/version.rb
|
162
163
|
- lib/henkei/yomu.rb
|
163
164
|
- spec/helper.rb
|
@@ -179,7 +180,10 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
179
180
|
requirements:
|
180
181
|
- - ">="
|
181
182
|
- !ruby/object:Gem::Version
|
182
|
-
version:
|
183
|
+
version: 2.4.0
|
184
|
+
- - "<"
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: 3.1.0
|
183
187
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
184
188
|
requirements:
|
185
189
|
- - ">="
|