henkei 1.20.0 → 1.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +45 -3
- data/.travis.yml +7 -3
- data/Gemfile +2 -0
- data/Rakefile +1 -0
- data/bin/console +8 -0
- data/henkei.gemspec +7 -3
- data/jar/{tika-app-1.20.jar → tika-app-1.23.jar} +0 -0
- data/lib/henkei.rb +44 -25
- data/lib/henkei/configuration.rb +21 -0
- data/lib/henkei/version.rb +3 -1
- data/lib/henkei/yomu.rb +2 -0
- data/spec/helper.rb +2 -0
- data/spec/henkei_spec.rb +45 -1
- data/spec/samples/pipe-error.png +0 -0
- metadata +37 -16
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 97b9b37da4e96281569a16e469d23a00c41cd35bbd15e5a7c1d1025ea2862f46
|
|
4
|
+
data.tar.gz: 45a7c3ae645fc417ae6064a7237d87ae7e56c3c20392f83cad42f9fdafd569e8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c323d5f5c2056bedce26e05b40d70eecd4f4fff6f8ae24362dfc80952e788f33454b8a7be6a375c54001694e6a2fbe2d7dfb60392dc7324b865d056e99637d78
|
|
7
|
+
data.tar.gz: fe9c6ee2a7033d55d8ccc36a3d3a1cc78fd459654314096fa087c8373544deaa291952b0303db11f362a7f758028acf11692f7b25ca132d7cb4c6603a10a5a9d
|
data/.rubocop.yml
CHANGED
|
@@ -1,10 +1,31 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
NewCops: enable
|
|
3
|
+
|
|
4
|
+
Layout/EmptyLinesAroundAttributeAccessor:
|
|
5
|
+
Enabled: true
|
|
6
|
+
|
|
7
|
+
Layout/LineLength:
|
|
8
|
+
Max: 120
|
|
9
|
+
|
|
10
|
+
Layout/SpaceAroundMethodCallOperator:
|
|
11
|
+
Enabled: true
|
|
12
|
+
|
|
13
|
+
Lint/DeprecatedOpenSSLConstant:
|
|
14
|
+
Enabled: true
|
|
15
|
+
|
|
16
|
+
Lint/MixedRegexpCaptureTypes:
|
|
17
|
+
Enabled: true
|
|
18
|
+
|
|
19
|
+
Lint/RaiseException:
|
|
20
|
+
Enabled: true
|
|
21
|
+
|
|
22
|
+
Lint/StructNewOverride:
|
|
23
|
+
Enabled: true
|
|
24
|
+
|
|
1
25
|
Metrics/BlockLength:
|
|
2
26
|
Exclude:
|
|
3
27
|
- 'spec/**/*'
|
|
4
28
|
|
|
5
|
-
Metrics/LineLength:
|
|
6
|
-
Max: 120
|
|
7
|
-
|
|
8
29
|
Metrics/MethodLength:
|
|
9
30
|
Max: 15
|
|
10
31
|
|
|
@@ -13,3 +34,24 @@ Style/ClassVars:
|
|
|
13
34
|
|
|
14
35
|
Style/DoubleNegation:
|
|
15
36
|
Enabled: false
|
|
37
|
+
|
|
38
|
+
Style/ExponentialNotation:
|
|
39
|
+
Enabled: true
|
|
40
|
+
|
|
41
|
+
Style/HashEachMethods:
|
|
42
|
+
Enabled: true
|
|
43
|
+
|
|
44
|
+
Style/HashTransformKeys:
|
|
45
|
+
Enabled: true
|
|
46
|
+
|
|
47
|
+
Style/HashTransformValues:
|
|
48
|
+
Enabled: true
|
|
49
|
+
|
|
50
|
+
Style/RedundantRegexpCharacterClass:
|
|
51
|
+
Enabled: true
|
|
52
|
+
|
|
53
|
+
Style/RedundantRegexpEscape:
|
|
54
|
+
Enabled: true
|
|
55
|
+
|
|
56
|
+
Style/SlicingWithRange:
|
|
57
|
+
Enabled: true
|
data/.travis.yml
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
1
|
env:
|
|
2
2
|
global:
|
|
3
3
|
- CC_TEST_REPORTER_ID=bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
|
|
4
|
+
jobs:
|
|
5
|
+
- INCLUDE_RAILS=false
|
|
6
|
+
- INCLUDE_RAILS=true
|
|
4
7
|
|
|
5
8
|
language: ruby
|
|
6
9
|
rvm:
|
|
7
|
-
- 2.2
|
|
8
|
-
- 2.3
|
|
9
10
|
- 2.4
|
|
10
11
|
- 2.5
|
|
12
|
+
- 2.6
|
|
13
|
+
- 2.7
|
|
14
|
+
- 3.0
|
|
11
15
|
|
|
12
16
|
before_install:
|
|
13
17
|
- gem update bundler
|
|
@@ -22,7 +26,7 @@ before_script:
|
|
|
22
26
|
- ./cc-test-reporter before-build
|
|
23
27
|
|
|
24
28
|
script:
|
|
25
|
-
- rubocop
|
|
29
|
+
- bundle exec rubocop
|
|
26
30
|
- bundle exec rspec
|
|
27
31
|
|
|
28
32
|
after_script:
|
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/bin/console
ADDED
data/henkei.gemspec
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
lib = File.expand_path('lib', __dir__)
|
|
2
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
3
5
|
|
|
@@ -13,6 +15,7 @@ Gem::Specification.new do |spec|
|
|
|
13
15
|
'(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
|
|
14
16
|
spec.homepage = 'http://github.com/abrom/henkei'
|
|
15
17
|
spec.license = 'MIT'
|
|
18
|
+
spec.required_ruby_version = ['>= 2.4.0', '< 3.1.0']
|
|
16
19
|
|
|
17
20
|
spec.files = `git ls-files`.split("\n")
|
|
18
21
|
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
|
@@ -20,11 +23,12 @@ Gem::Specification.new do |spec|
|
|
|
20
23
|
spec.require_paths = ['lib']
|
|
21
24
|
|
|
22
25
|
spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
|
|
23
|
-
spec.add_runtime_dependency '
|
|
26
|
+
spec.add_runtime_dependency 'mini_mime', '>= 0.1.1', '< 1'
|
|
24
27
|
|
|
25
|
-
spec.add_development_dependency 'bundler', '~>
|
|
28
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
|
29
|
+
spec.add_development_dependency 'rails', '~> 5.0'
|
|
26
30
|
spec.add_development_dependency 'rake', '~> 12.3'
|
|
27
31
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
|
28
|
-
spec.add_development_dependency 'rubocop', '~> 0.
|
|
32
|
+
spec.add_development_dependency 'rubocop', '~> 0.71'
|
|
29
33
|
spec.add_development_dependency 'simplecov', '~> 0.15'
|
|
30
34
|
end
|
|
Binary file
|
data/lib/henkei.rb
CHANGED
|
@@ -1,24 +1,49 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'henkei/version'
|
|
2
4
|
require 'henkei/yomu'
|
|
5
|
+
require 'henkei/configuration'
|
|
3
6
|
|
|
4
7
|
require 'net/http'
|
|
5
|
-
require '
|
|
8
|
+
require 'mini_mime'
|
|
9
|
+
|
|
10
|
+
# require 'mime/types' if available
|
|
11
|
+
begin
|
|
12
|
+
require 'mime/types'
|
|
13
|
+
rescue LoadError
|
|
14
|
+
nil
|
|
15
|
+
end
|
|
16
|
+
|
|
6
17
|
require 'time'
|
|
7
18
|
require 'json'
|
|
8
19
|
|
|
9
20
|
require 'socket'
|
|
10
21
|
require 'stringio'
|
|
11
22
|
|
|
23
|
+
require 'open3'
|
|
24
|
+
|
|
12
25
|
# Read text and metadata from files and documents using Apache Tika toolkit
|
|
13
26
|
class Henkei # rubocop:disable Metrics/ClassLength
|
|
14
27
|
GEM_PATH = File.dirname(File.dirname(__FILE__))
|
|
15
|
-
JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.
|
|
28
|
+
JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.23.jar')
|
|
16
29
|
CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
|
|
17
30
|
DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
|
|
18
31
|
|
|
19
32
|
@@server_port = nil
|
|
20
33
|
@@server_pid = nil
|
|
21
34
|
|
|
35
|
+
def self.mimetype(content_type)
|
|
36
|
+
if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
|
|
37
|
+
warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
|
|
38
|
+
' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
|
|
39
|
+
MIME::Types[content_type].first
|
|
40
|
+
else
|
|
41
|
+
MiniMime.lookup_by_content_type(content_type).tap do |object|
|
|
42
|
+
object.define_singleton_method(:extensions) { [extension] }
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
22
47
|
# Read text or metadata from a data buffer.
|
|
23
48
|
#
|
|
24
49
|
# data = File.read 'sample.pages'
|
|
@@ -32,7 +57,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
32
57
|
when :text then result
|
|
33
58
|
when :html then result
|
|
34
59
|
when :metadata then JSON.parse(result)
|
|
35
|
-
when :mimetype then
|
|
60
|
+
when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
|
|
36
61
|
end
|
|
37
62
|
end
|
|
38
63
|
|
|
@@ -108,9 +133,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
108
133
|
def mimetype
|
|
109
134
|
return @mimetype if defined? @mimetype
|
|
110
135
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
@mimetype = MIME::Types[type].first
|
|
136
|
+
content_type = metadata['Content-Type'].is_a?(Array) ? metadata['Content-Type'].first : metadata['Content-Type']
|
|
137
|
+
@mimetype = Henkei.mimetype(content_type)
|
|
114
138
|
end
|
|
115
139
|
|
|
116
140
|
# Returns +true+ if the Henkei document was specified using a file path.
|
|
@@ -182,7 +206,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
182
206
|
def self.server(type, custom_port = nil)
|
|
183
207
|
@@server_port = custom_port || DEFAULT_SERVER_PORT
|
|
184
208
|
|
|
185
|
-
@@server_pid = Process.spawn
|
|
209
|
+
@@server_pid = Process.spawn(*tika_command(type, server: true))
|
|
186
210
|
sleep(2) # Give the server 2 seconds to spin up.
|
|
187
211
|
@@server_pid
|
|
188
212
|
end
|
|
@@ -215,18 +239,14 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
215
239
|
# Provide the path to the Java binary
|
|
216
240
|
#
|
|
217
241
|
def self.java_path
|
|
218
|
-
ENV['JAVA_HOME'] ? ENV['JAVA_HOME']
|
|
242
|
+
ENV['JAVA_HOME'] ? "#{ENV['JAVA_HOME']}/bin/java" : 'java'
|
|
219
243
|
end
|
|
220
244
|
private_class_method :java_path
|
|
221
245
|
|
|
222
246
|
# Internal helper for calling to Tika library directly
|
|
223
247
|
#
|
|
224
248
|
def self.client_read(type, data)
|
|
225
|
-
|
|
226
|
-
io.write data
|
|
227
|
-
io.close_write
|
|
228
|
-
io.read
|
|
229
|
-
end
|
|
249
|
+
Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
|
|
230
250
|
end
|
|
231
251
|
private_class_method :client_read
|
|
232
252
|
|
|
@@ -246,7 +266,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
246
266
|
# tell Tika that we're done sending data
|
|
247
267
|
s.shutdown(Socket::SHUT_WR)
|
|
248
268
|
|
|
249
|
-
resp = ''
|
|
269
|
+
resp = String.new ''
|
|
250
270
|
loop do
|
|
251
271
|
chunk = s.recv(65_536)
|
|
252
272
|
break if chunk.empty? || !chunk
|
|
@@ -259,23 +279,22 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
|
259
279
|
|
|
260
280
|
# Internal helper for building the Java command to call Tika
|
|
261
281
|
#
|
|
262
|
-
def self.tika_command(type, server
|
|
263
|
-
command = [
|
|
264
|
-
command
|
|
265
|
-
command
|
|
266
|
-
command.join ' '
|
|
282
|
+
def self.tika_command(type, server: false)
|
|
283
|
+
command = [java_path, '-Djava.awt.headless=true', '-jar', Henkei::JAR_PATH, "--config=#{Henkei::CONFIG_PATH}"]
|
|
284
|
+
command += ['--server', '--port', @@server_port.to_s] if server
|
|
285
|
+
command + switch_for_type(type)
|
|
267
286
|
end
|
|
268
287
|
private_class_method :tika_command
|
|
269
288
|
|
|
270
289
|
# Internal helper for building the Java command to call Tika
|
|
271
290
|
#
|
|
272
291
|
def self.switch_for_type(type)
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
292
|
+
{
|
|
293
|
+
text: ['-t'],
|
|
294
|
+
html: ['-h'],
|
|
295
|
+
metadata: %w[-m -j],
|
|
296
|
+
mimetype: %w[-m -j]
|
|
297
|
+
}[type]
|
|
279
298
|
end
|
|
280
299
|
private_class_method :switch_for_type
|
|
281
300
|
end
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# Henkei monkey patch for configuration support
|
|
4
|
+
class Henkei
|
|
5
|
+
def self.configuration
|
|
6
|
+
@configuration ||= Configuration.new
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def self.configure
|
|
10
|
+
yield(configuration)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Handle Henkei configuration
|
|
14
|
+
class Configuration
|
|
15
|
+
attr_accessor :mime_library
|
|
16
|
+
|
|
17
|
+
def initialize
|
|
18
|
+
@mime_library = 'mime/types'
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
data/lib/henkei/version.rb
CHANGED
data/lib/henkei/yomu.rb
CHANGED
data/spec/helper.rb
CHANGED
data/spec/henkei_spec.rb
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'helper'
|
|
2
4
|
require 'henkei'
|
|
3
5
|
|
|
6
|
+
# Some of the tests have been known to fail in weird and wonderful ways when `rails` is included
|
|
7
|
+
require 'rails' if ENV['INCLUDE_RAILS'] == 'true'
|
|
8
|
+
|
|
4
9
|
describe Henkei do
|
|
5
10
|
let(:data) { File.read 'spec/samples/sample.docx' }
|
|
6
11
|
|
|
@@ -38,6 +43,16 @@ describe Henkei do
|
|
|
38
43
|
)
|
|
39
44
|
expect(mimetype.extensions).to include 'docx'
|
|
40
45
|
end
|
|
46
|
+
|
|
47
|
+
context 'when passing in the `pipe-error.png` test file' do
|
|
48
|
+
let(:data) { File.read 'spec/samples/pipe-error.png' }
|
|
49
|
+
|
|
50
|
+
it 'returns an empty result' do
|
|
51
|
+
text = Henkei.read :text, data
|
|
52
|
+
|
|
53
|
+
expect(text).to eq ''
|
|
54
|
+
end
|
|
55
|
+
end
|
|
41
56
|
end
|
|
42
57
|
|
|
43
58
|
describe '.new' do
|
|
@@ -127,6 +142,23 @@ describe Henkei do
|
|
|
127
142
|
specify '#metadata reads metadata' do
|
|
128
143
|
expect(henkei.metadata['Content-Type']).to eq %w[application/vnd.apple.pages application/vnd.apple.pages]
|
|
129
144
|
end
|
|
145
|
+
|
|
146
|
+
context 'when passing in the `pipe-error.png` test file' do
|
|
147
|
+
let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
|
|
148
|
+
|
|
149
|
+
it '#text returns an empty result' do
|
|
150
|
+
expect(henkei.text).to eq ''
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
it '#html returns an empty body' do
|
|
154
|
+
expect(henkei.html).to include '<body/>'
|
|
155
|
+
expect(henkei.html).to include '<meta name="tiff:ImageWidth" content="792"/>'
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
it '#mimetype returns `image/png`' do
|
|
159
|
+
expect(henkei.mimetype.content_type).to eq 'image/png'
|
|
160
|
+
end
|
|
161
|
+
end
|
|
130
162
|
end
|
|
131
163
|
|
|
132
164
|
context 'initialized with a given URI' do
|
|
@@ -155,6 +187,18 @@ describe Henkei do
|
|
|
155
187
|
end
|
|
156
188
|
end
|
|
157
189
|
|
|
190
|
+
context 'when source is a remote PDF' do
|
|
191
|
+
let(:henkei) { Henkei.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
|
|
192
|
+
|
|
193
|
+
specify '#text reads text' do
|
|
194
|
+
expect(henkei.text).to include 'Dummy PDF file'
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
specify '#metadata reads metadata' do
|
|
198
|
+
expect(henkei.metadata['Content-Type']).to eq 'application/pdf'
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
158
202
|
context 'working as server mode' do
|
|
159
203
|
specify '#starts and kills server' do
|
|
160
204
|
begin
|
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: henkei
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.23.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Erol Fornoles
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date:
|
|
12
|
+
date: 2021-02-02 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: json
|
|
@@ -32,39 +32,53 @@ dependencies:
|
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
33
|
version: '3'
|
|
34
34
|
- !ruby/object:Gem::Dependency
|
|
35
|
-
name:
|
|
35
|
+
name: mini_mime
|
|
36
36
|
requirement: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - ">="
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version:
|
|
40
|
+
version: 0.1.1
|
|
41
41
|
- - "<"
|
|
42
42
|
- !ruby/object:Gem::Version
|
|
43
|
-
version: '
|
|
43
|
+
version: '1'
|
|
44
44
|
type: :runtime
|
|
45
45
|
prerelease: false
|
|
46
46
|
version_requirements: !ruby/object:Gem::Requirement
|
|
47
47
|
requirements:
|
|
48
48
|
- - ">="
|
|
49
49
|
- !ruby/object:Gem::Version
|
|
50
|
-
version:
|
|
50
|
+
version: 0.1.1
|
|
51
51
|
- - "<"
|
|
52
52
|
- !ruby/object:Gem::Version
|
|
53
|
-
version: '
|
|
53
|
+
version: '1'
|
|
54
54
|
- !ruby/object:Gem::Dependency
|
|
55
55
|
name: bundler
|
|
56
56
|
requirement: !ruby/object:Gem::Requirement
|
|
57
57
|
requirements:
|
|
58
58
|
- - "~>"
|
|
59
59
|
- !ruby/object:Gem::Version
|
|
60
|
-
version: '
|
|
60
|
+
version: '2.0'
|
|
61
61
|
type: :development
|
|
62
62
|
prerelease: false
|
|
63
63
|
version_requirements: !ruby/object:Gem::Requirement
|
|
64
64
|
requirements:
|
|
65
65
|
- - "~>"
|
|
66
66
|
- !ruby/object:Gem::Version
|
|
67
|
-
version: '
|
|
67
|
+
version: '2.0'
|
|
68
|
+
- !ruby/object:Gem::Dependency
|
|
69
|
+
name: rails
|
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
|
71
|
+
requirements:
|
|
72
|
+
- - "~>"
|
|
73
|
+
- !ruby/object:Gem::Version
|
|
74
|
+
version: '5.0'
|
|
75
|
+
type: :development
|
|
76
|
+
prerelease: false
|
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
78
|
+
requirements:
|
|
79
|
+
- - "~>"
|
|
80
|
+
- !ruby/object:Gem::Version
|
|
81
|
+
version: '5.0'
|
|
68
82
|
- !ruby/object:Gem::Dependency
|
|
69
83
|
name: rake
|
|
70
84
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -99,14 +113,14 @@ dependencies:
|
|
|
99
113
|
requirements:
|
|
100
114
|
- - "~>"
|
|
101
115
|
- !ruby/object:Gem::Version
|
|
102
|
-
version: '0.
|
|
116
|
+
version: '0.71'
|
|
103
117
|
type: :development
|
|
104
118
|
prerelease: false
|
|
105
119
|
version_requirements: !ruby/object:Gem::Requirement
|
|
106
120
|
requirements:
|
|
107
121
|
- - "~>"
|
|
108
122
|
- !ruby/object:Gem::Version
|
|
109
|
-
version: '0.
|
|
123
|
+
version: '0.71'
|
|
110
124
|
- !ruby/object:Gem::Dependency
|
|
111
125
|
name: simplecov
|
|
112
126
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -125,7 +139,8 @@ description: Read text and metadata from files and documents using Apache Tika t
|
|
|
125
139
|
email:
|
|
126
140
|
- erol.fornoles@gmail.com
|
|
127
141
|
- a.bromwich@gmail.com
|
|
128
|
-
executables:
|
|
142
|
+
executables:
|
|
143
|
+
- console
|
|
129
144
|
extensions: []
|
|
130
145
|
extra_rdoc_files: []
|
|
131
146
|
files:
|
|
@@ -138,14 +153,17 @@ files:
|
|
|
138
153
|
- NOTICE.txt
|
|
139
154
|
- README.md
|
|
140
155
|
- Rakefile
|
|
156
|
+
- bin/console
|
|
141
157
|
- henkei.gemspec
|
|
142
|
-
- jar/tika-app-1.
|
|
158
|
+
- jar/tika-app-1.23.jar
|
|
143
159
|
- jar/tika-config.xml
|
|
144
160
|
- lib/henkei.rb
|
|
161
|
+
- lib/henkei/configuration.rb
|
|
145
162
|
- lib/henkei/version.rb
|
|
146
163
|
- lib/henkei/yomu.rb
|
|
147
164
|
- spec/helper.rb
|
|
148
165
|
- spec/henkei_spec.rb
|
|
166
|
+
- spec/samples/pipe-error.png
|
|
149
167
|
- spec/samples/sample filename with spaces.pages
|
|
150
168
|
- spec/samples/sample-metadata-values-with-colons.doc
|
|
151
169
|
- spec/samples/sample.docx
|
|
@@ -162,15 +180,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
162
180
|
requirements:
|
|
163
181
|
- - ">="
|
|
164
182
|
- !ruby/object:Gem::Version
|
|
165
|
-
version:
|
|
183
|
+
version: 2.4.0
|
|
184
|
+
- - "<"
|
|
185
|
+
- !ruby/object:Gem::Version
|
|
186
|
+
version: 3.1.0
|
|
166
187
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
167
188
|
requirements:
|
|
168
189
|
- - ">="
|
|
169
190
|
- !ruby/object:Gem::Version
|
|
170
191
|
version: '0'
|
|
171
192
|
requirements: []
|
|
172
|
-
|
|
173
|
-
rubygems_version: 2.7.6
|
|
193
|
+
rubygems_version: 3.0.6
|
|
174
194
|
signing_key:
|
|
175
195
|
specification_version: 4
|
|
176
196
|
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
|
@@ -178,6 +198,7 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
|
|
|
178
198
|
test_files:
|
|
179
199
|
- spec/helper.rb
|
|
180
200
|
- spec/henkei_spec.rb
|
|
201
|
+
- spec/samples/pipe-error.png
|
|
181
202
|
- spec/samples/sample filename with spaces.pages
|
|
182
203
|
- spec/samples/sample-metadata-values-with-colons.doc
|
|
183
204
|
- spec/samples/sample.docx
|