henkei 1.28.3.1 → 1.28.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +3 -3
- data/.rubocop.yml +12 -1
- data/henkei.gemspec +2 -3
- data/lib/henkei/version.rb +1 -1
- data/lib/henkei.rb +6 -7
- data/spec/henkei_spec.rb +38 -37
- metadata +5 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6f63ac4ab328389684f60a90d9e5bfecbf14e74d9c9b6685abce2e984257ef7
|
4
|
+
data.tar.gz: 3f952baeb1a1c089ae4bccaa51d502909e6ed71f5bb30343ade57a51fab8a688
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 955576e745930cc52480ae2ebd25d93d74b7334868605b6a65a46832702693e8e7028a2c6f8ce548395a61bb31f6d0da40bf9118c23af0689988cb09f24a9166
|
7
|
+
data.tar.gz: 476c59eb877e5e03cf316a83126ff39b5e2e86cec0424c33d04cf324bc1dd2ffa52323b436747f1fd206680da6efe8e43e2794e8ca271b0a4854187f82728df8
|
data/.github/workflows/test.yml
CHANGED
@@ -14,10 +14,10 @@ jobs:
|
|
14
14
|
runs-on: ubuntu-latest
|
15
15
|
strategy:
|
16
16
|
matrix:
|
17
|
-
ruby-version: ['2.
|
17
|
+
ruby-version: ['2.7', '3.0', '3.1', '3.2']
|
18
18
|
|
19
19
|
steps:
|
20
|
-
- uses: actions/checkout@
|
20
|
+
- uses: actions/checkout@v3
|
21
21
|
|
22
22
|
- name: Set up Ruby
|
23
23
|
uses: ruby/setup-ruby@v1
|
@@ -32,6 +32,6 @@ jobs:
|
|
32
32
|
run: bundle exec rspec
|
33
33
|
|
34
34
|
- name: Test & publish code coverage
|
35
|
-
uses: paambaati/codeclimate-action@v3.
|
35
|
+
uses: paambaati/codeclimate-action@v3.2.0
|
36
36
|
env:
|
37
37
|
CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
|
data/.rubocop.yml
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
require:
|
2
|
+
- rubocop-performance
|
3
|
+
- rubocop-rake
|
4
|
+
- rubocop-rspec
|
5
|
+
|
1
6
|
AllCops:
|
2
7
|
NewCops: enable
|
3
|
-
TargetRubyVersion: 2.
|
8
|
+
TargetRubyVersion: 2.7
|
4
9
|
|
5
10
|
Layout/EmptyLinesAroundAttributeAccessor:
|
6
11
|
Enabled: true
|
@@ -30,6 +35,12 @@ Metrics/BlockLength:
|
|
30
35
|
Metrics/MethodLength:
|
31
36
|
Max: 15
|
32
37
|
|
38
|
+
RSpec/ExampleLength:
|
39
|
+
Max: 12
|
40
|
+
|
41
|
+
RSpec/MultipleExpectations:
|
42
|
+
Max: 4
|
43
|
+
|
33
44
|
Style/ClassVars:
|
34
45
|
Enabled: false
|
35
46
|
|
data/henkei.gemspec
CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
5
|
|
6
6
|
require 'henkei/version'
|
7
7
|
|
8
|
-
Gem::Specification.new do |spec|
|
8
|
+
Gem::Specification.new do |spec|
|
9
9
|
spec.name = 'henkei'
|
10
10
|
spec.version = Henkei::VERSION
|
11
11
|
spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
|
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
15
15
|
'(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
|
16
16
|
spec.homepage = 'https://github.com/abrom/henkei'
|
17
17
|
spec.license = 'MIT'
|
18
|
-
spec.required_ruby_version = ['>= 2.
|
18
|
+
spec.required_ruby_version = ['>= 2.7.0', '< 3.3.0']
|
19
19
|
|
20
20
|
# Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
|
21
21
|
# delete this section to allow pushing this gem to any host.
|
@@ -37,7 +37,6 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
|
|
37
37
|
spec.add_development_dependency 'rspec', '~> 3.7'
|
38
38
|
spec.add_development_dependency 'rubocop', '~> 1.26'
|
39
39
|
spec.add_development_dependency 'rubocop-performance', '~> 1.13'
|
40
|
-
spec.add_development_dependency 'rubocop-rails', '~> 2.14'
|
41
40
|
spec.add_development_dependency 'rubocop-rake', '~> 0.6'
|
42
41
|
spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
|
43
42
|
spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
|
data/lib/henkei/version.rb
CHANGED
data/lib/henkei.rb
CHANGED
@@ -34,8 +34,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
34
34
|
|
35
35
|
def self.mimetype(content_type)
|
36
36
|
if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
|
37
|
-
warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
|
38
|
-
'
|
37
|
+
warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead. ' \
|
38
|
+
'Use Henkei.configure and assign "mini_mime" to `mime_library`.'
|
39
39
|
MIME::Types[content_type].first
|
40
40
|
else
|
41
41
|
MiniMime.lookup_by_content_type(content_type).tap do |object|
|
@@ -78,7 +78,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
78
78
|
if input.is_a? String
|
79
79
|
if File.exist? input
|
80
80
|
@path = input
|
81
|
-
elsif input
|
81
|
+
elsif input&.match?(URI::DEFAULT_PARSER.make_regexp)
|
82
82
|
@uri = URI.parse input
|
83
83
|
else
|
84
84
|
raise Errno::ENOENT, "missing file or invalid URI - #{input}"
|
@@ -265,7 +265,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
265
265
|
# tell Tika that we're done sending data
|
266
266
|
s.shutdown(Socket::SHUT_WR)
|
267
267
|
|
268
|
-
resp =
|
268
|
+
resp = +''
|
269
269
|
loop do
|
270
270
|
chunk = s.recv(65_536)
|
271
271
|
break if chunk.empty? || !chunk
|
@@ -300,9 +300,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
300
300
|
# Internal helper to remove erroneous output
|
301
301
|
#
|
302
302
|
def self.filter_response(response)
|
303
|
-
response.
|
304
|
-
|
305
|
-
''
|
303
|
+
response.delete_prefix(
|
304
|
+
"WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n"
|
306
305
|
)
|
307
306
|
end
|
308
307
|
private_class_method :filter_response
|
data/spec/henkei_spec.rb
CHANGED
@@ -15,13 +15,13 @@ describe Henkei do
|
|
15
15
|
|
16
16
|
describe '.read' do
|
17
17
|
it 'reads text' do
|
18
|
-
text =
|
18
|
+
text = described_class.read :text, data
|
19
19
|
|
20
20
|
expect(text).to include 'The quick brown fox jumped over the lazy cat.'
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'reads metadata' do
|
24
|
-
metadata =
|
24
|
+
metadata = described_class.read :metadata, data
|
25
25
|
|
26
26
|
expect(metadata['Content-Type']).to(
|
27
27
|
eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
@@ -30,13 +30,13 @@ describe Henkei do
|
|
30
30
|
|
31
31
|
it 'reads metadata values with colons as strings' do
|
32
32
|
data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
|
33
|
-
metadata =
|
33
|
+
metadata = described_class.read :metadata, data
|
34
34
|
|
35
35
|
expect(metadata['dc:title']).to eq 'problem: test'
|
36
36
|
end
|
37
37
|
|
38
38
|
it 'reads mimetype' do
|
39
|
-
mimetype =
|
39
|
+
mimetype = described_class.read :mimetype, data
|
40
40
|
|
41
41
|
expect(mimetype.content_type).to(
|
42
42
|
eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
@@ -48,7 +48,7 @@ describe Henkei do
|
|
48
48
|
let(:data) { File.read 'spec/samples/pipe-error.png' }
|
49
49
|
|
50
50
|
it 'returns an empty result' do
|
51
|
-
text =
|
51
|
+
text = described_class.read :text, data
|
52
52
|
|
53
53
|
expect(text).to eq ''
|
54
54
|
end
|
@@ -57,11 +57,11 @@ describe Henkei do
|
|
57
57
|
|
58
58
|
describe '.new' do
|
59
59
|
it 'requires parameters' do
|
60
|
-
expect {
|
60
|
+
expect { described_class.new }.to raise_error ArgumentError
|
61
61
|
end
|
62
62
|
|
63
63
|
it 'accepts a root path' do
|
64
|
-
henkei =
|
64
|
+
henkei = described_class.new File.join(Henkei::GEM_PATH, 'spec/samples/sample.pages')
|
65
65
|
|
66
66
|
expect(henkei).to be_path
|
67
67
|
expect(henkei).not_to be_uri
|
@@ -69,7 +69,7 @@ describe Henkei do
|
|
69
69
|
end
|
70
70
|
|
71
71
|
it 'accepts a relative path' do
|
72
|
-
henkei =
|
72
|
+
henkei = described_class.new 'spec/samples/sample.pages'
|
73
73
|
|
74
74
|
expect(henkei).to be_path
|
75
75
|
expect(henkei).not_to be_uri
|
@@ -77,7 +77,7 @@ describe Henkei do
|
|
77
77
|
end
|
78
78
|
|
79
79
|
it 'accepts a path with spaces' do
|
80
|
-
henkei =
|
80
|
+
henkei = described_class.new 'spec/samples/sample filename with spaces.pages'
|
81
81
|
|
82
82
|
expect(henkei).to be_path
|
83
83
|
expect(henkei).not_to be_uri
|
@@ -85,7 +85,7 @@ describe Henkei do
|
|
85
85
|
end
|
86
86
|
|
87
87
|
it 'accepts a URI' do
|
88
|
-
henkei =
|
88
|
+
henkei = described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
|
89
89
|
|
90
90
|
expect(henkei).to be_uri
|
91
91
|
expect(henkei).not_to be_path
|
@@ -94,7 +94,7 @@ describe Henkei do
|
|
94
94
|
|
95
95
|
it 'accepts a stream or object that can be read' do
|
96
96
|
File.open 'spec/samples/sample.pages', 'r' do |file|
|
97
|
-
henkei =
|
97
|
+
henkei = described_class.new file
|
98
98
|
|
99
99
|
expect(henkei).to be_stream
|
100
100
|
expect(henkei).not_to be_path
|
@@ -103,37 +103,38 @@ describe Henkei do
|
|
103
103
|
end
|
104
104
|
|
105
105
|
it 'refuses a path to a missing file' do
|
106
|
-
expect {
|
106
|
+
expect { described_class.new 'test/sample/missing.pages' }.to raise_error Errno::ENOENT
|
107
107
|
end
|
108
108
|
|
109
109
|
it 'refuses other objects' do
|
110
110
|
[nil, 1, 1.1].each do |object|
|
111
|
-
expect {
|
111
|
+
expect { described_class.new object }.to raise_error TypeError
|
112
112
|
end
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
116
116
|
describe '.creation_date' do
|
117
|
-
let(:henkei) {
|
118
|
-
|
117
|
+
let(:henkei) { described_class.new 'spec/samples/sample.pages' }
|
118
|
+
|
119
|
+
it 'returns Time' do
|
119
120
|
expect(henkei.creation_date).to be_a Time
|
120
121
|
end
|
121
122
|
end
|
122
123
|
|
123
124
|
describe '.java' do
|
124
125
|
specify 'with no specified JAVA_HOME' do
|
125
|
-
expect(
|
126
|
+
expect(described_class.send(:java_path)).to eq 'java'
|
126
127
|
end
|
127
128
|
|
128
129
|
specify 'with a specified JAVA_HOME' do
|
129
130
|
ENV['JAVA_HOME'] = '/path/to/java/home'
|
130
131
|
|
131
|
-
expect(
|
132
|
+
expect(described_class.send(:java_path)).to eq '/path/to/java/home/bin/java'
|
132
133
|
end
|
133
134
|
end
|
134
135
|
|
135
|
-
context 'initialized with a given path' do
|
136
|
-
let(:henkei) {
|
136
|
+
context 'when initialized with a given path' do
|
137
|
+
let(:henkei) { described_class.new 'spec/samples/sample.pages' }
|
137
138
|
|
138
139
|
specify '#text reads text' do
|
139
140
|
expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
|
@@ -144,7 +145,7 @@ describe Henkei do
|
|
144
145
|
end
|
145
146
|
|
146
147
|
context 'when passing in the `pipe-error.png` test file' do
|
147
|
-
let(:henkei) {
|
148
|
+
let(:henkei) { described_class.new 'spec/samples/pipe-error.png' }
|
148
149
|
|
149
150
|
it '#text returns an empty result' do
|
150
151
|
expect(henkei.text).to eq ''
|
@@ -161,8 +162,8 @@ describe Henkei do
|
|
161
162
|
end
|
162
163
|
end
|
163
164
|
|
164
|
-
context 'initialized with a given URI' do
|
165
|
-
let(:henkei) {
|
165
|
+
context 'when initialized with a given URI' do
|
166
|
+
let(:henkei) { described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
|
166
167
|
|
167
168
|
specify '#text reads text' do
|
168
169
|
expect(henkei.text).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
|
@@ -175,8 +176,8 @@ describe Henkei do
|
|
175
176
|
end
|
176
177
|
end
|
177
178
|
|
178
|
-
context 'initialized with a given stream' do
|
179
|
-
let(:henkei) {
|
179
|
+
context 'when initialized with a given stream' do
|
180
|
+
let(:henkei) { described_class.new File.open('spec/samples/sample.pages', 'rb') }
|
180
181
|
|
181
182
|
specify '#text reads text' do
|
182
183
|
expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
|
@@ -188,7 +189,7 @@ describe Henkei do
|
|
188
189
|
end
|
189
190
|
|
190
191
|
context 'when source is a remote PDF' do
|
191
|
-
let(:henkei) {
|
192
|
+
let(:henkei) { described_class.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
|
192
193
|
|
193
194
|
specify '#text reads text' do
|
194
195
|
expect(henkei.text).to include 'Dummy PDF file'
|
@@ -199,35 +200,35 @@ describe Henkei do
|
|
199
200
|
end
|
200
201
|
end
|
201
202
|
|
202
|
-
context 'working as server mode' do
|
203
|
+
context 'when working as server mode' do
|
203
204
|
specify '#starts and kills server' do
|
204
|
-
|
205
|
-
expect(
|
206
|
-
expect(
|
205
|
+
described_class.server(:text)
|
206
|
+
expect(described_class.class_variable_get(:@@server_pid)).not_to be_nil
|
207
|
+
expect(described_class.class_variable_get(:@@server_port)).not_to be_nil
|
207
208
|
|
208
|
-
s = TCPSocket.new('localhost',
|
209
|
+
s = TCPSocket.new('localhost', described_class.class_variable_get(:@@server_port))
|
209
210
|
expect(s).to be_a TCPSocket
|
210
211
|
s.close
|
211
212
|
ensure
|
212
|
-
port =
|
213
|
-
|
213
|
+
port = described_class.class_variable_get(:@@server_port)
|
214
|
+
described_class.kill_server!
|
214
215
|
sleep 2
|
215
216
|
expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
|
216
217
|
end
|
217
218
|
|
218
219
|
specify '#runs samples through server mode' do
|
219
|
-
|
220
|
-
expect(
|
220
|
+
described_class.server(:text)
|
221
|
+
expect(described_class.new('spec/samples/sample.pages').text).to(
|
221
222
|
include 'The quick brown fox jumped over the lazy cat.'
|
222
223
|
)
|
223
|
-
expect(
|
224
|
+
expect(described_class.new('spec/samples/sample filename with spaces.pages').text).to(
|
224
225
|
include 'The quick brown fox jumped over the lazy cat.'
|
225
226
|
)
|
226
|
-
expect(
|
227
|
+
expect(described_class.new('spec/samples/sample.docx').text).to(
|
227
228
|
include 'The quick brown fox jumped over the lazy cat.'
|
228
229
|
)
|
229
230
|
ensure
|
230
|
-
|
231
|
+
described_class.kill_server!
|
231
232
|
end
|
232
233
|
end
|
233
234
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: henkei
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.28.3.
|
4
|
+
version: 1.28.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erol Fornoles
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-01-22 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
@@ -135,20 +135,6 @@ dependencies:
|
|
135
135
|
- - "~>"
|
136
136
|
- !ruby/object:Gem::Version
|
137
137
|
version: '1.13'
|
138
|
-
- !ruby/object:Gem::Dependency
|
139
|
-
name: rubocop-rails
|
140
|
-
requirement: !ruby/object:Gem::Requirement
|
141
|
-
requirements:
|
142
|
-
- - "~>"
|
143
|
-
- !ruby/object:Gem::Version
|
144
|
-
version: '2.14'
|
145
|
-
type: :development
|
146
|
-
prerelease: false
|
147
|
-
version_requirements: !ruby/object:Gem::Requirement
|
148
|
-
requirements:
|
149
|
-
- - "~>"
|
150
|
-
- !ruby/object:Gem::Version
|
151
|
-
version: '2.14'
|
152
138
|
- !ruby/object:Gem::Dependency
|
153
139
|
name: rubocop-rake
|
154
140
|
requirement: !ruby/object:Gem::Requirement
|
@@ -244,17 +230,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
244
230
|
requirements:
|
245
231
|
- - ">="
|
246
232
|
- !ruby/object:Gem::Version
|
247
|
-
version: 2.
|
233
|
+
version: 2.7.0
|
248
234
|
- - "<"
|
249
235
|
- !ruby/object:Gem::Version
|
250
|
-
version: 3.
|
236
|
+
version: 3.3.0
|
251
237
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
252
238
|
requirements:
|
253
239
|
- - ">="
|
254
240
|
- !ruby/object:Gem::Version
|
255
241
|
version: '0'
|
256
242
|
requirements: []
|
257
|
-
rubygems_version: 3.
|
243
|
+
rubygems_version: 3.4.1
|
258
244
|
signing_key:
|
259
245
|
specification_version: 4
|
260
246
|
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|