henkei 1.28.3.1 → 1.28.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bd6ddc3967c88a20c41845c884623e60d689bf20f84e95de5050a8abcdad8037
4
- data.tar.gz: 197f4ee86dad00184c1129c58a27bd511244e238e94ca50e0eaffa77f15a2052
3
+ metadata.gz: d6f63ac4ab328389684f60a90d9e5bfecbf14e74d9c9b6685abce2e984257ef7
4
+ data.tar.gz: 3f952baeb1a1c089ae4bccaa51d502909e6ed71f5bb30343ade57a51fab8a688
5
5
  SHA512:
6
- metadata.gz: 4cf5bd57225bcdfa44884d05eb100aac25d46b0ebf08024b4f8e8923eea40f0a25635e4c89ef20972a51e9b6278e348bd47db299db704db82264ae7138d3eb3e
7
- data.tar.gz: a801fc206f243dc029b04d85d5796f6f005a3ccfe4b61a8501ebfc7a32250e397b73dcd317d8ea61ea7c9f4f4be5f806ae97d0b18349e1acf2c671e45dcf4a24
6
+ metadata.gz: 955576e745930cc52480ae2ebd25d93d74b7334868605b6a65a46832702693e8e7028a2c6f8ce548395a61bb31f6d0da40bf9118c23af0689988cb09f24a9166
7
+ data.tar.gz: 476c59eb877e5e03cf316a83126ff39b5e2e86cec0424c33d04cf324bc1dd2ffa52323b436747f1fd206680da6efe8e43e2794e8ca271b0a4854187f82728df8
@@ -14,10 +14,10 @@ jobs:
14
14
  runs-on: ubuntu-latest
15
15
  strategy:
16
16
  matrix:
17
- ruby-version: ['2.6', '2.7', '3.0', '3.1']
17
+ ruby-version: ['2.7', '3.0', '3.1', '3.2']
18
18
 
19
19
  steps:
20
- - uses: actions/checkout@v2
20
+ - uses: actions/checkout@v3
21
21
 
22
22
  - name: Set up Ruby
23
23
  uses: ruby/setup-ruby@v1
@@ -32,6 +32,6 @@ jobs:
32
32
  run: bundle exec rspec
33
33
 
34
34
  - name: Test & publish code coverage
35
- uses: paambaati/codeclimate-action@v3.0.0
35
+ uses: paambaati/codeclimate-action@v3.2.0
36
36
  env:
37
37
  CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/.rubocop.yml CHANGED
@@ -1,6 +1,11 @@
1
+ require:
2
+ - rubocop-performance
3
+ - rubocop-rake
4
+ - rubocop-rspec
5
+
1
6
  AllCops:
2
7
  NewCops: enable
3
- TargetRubyVersion: 2.6
8
+ TargetRubyVersion: 2.7
4
9
 
5
10
  Layout/EmptyLinesAroundAttributeAccessor:
6
11
  Enabled: true
@@ -30,6 +35,12 @@ Metrics/BlockLength:
30
35
  Metrics/MethodLength:
31
36
  Max: 15
32
37
 
38
+ RSpec/ExampleLength:
39
+ Max: 12
40
+
41
+ RSpec/MultipleExpectations:
42
+ Max: 4
43
+
33
44
  Style/ClassVars:
34
45
  Enabled: false
35
46
 
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
8
+ Gem::Specification.new do |spec|
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
16
  spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
- spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
18
+ spec.required_ruby_version = ['>= 2.7.0', '< 3.3.0']
19
19
 
20
20
  # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
21
  # delete this section to allow pushing this gem to any host.
@@ -37,7 +37,6 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
37
37
  spec.add_development_dependency 'rspec', '~> 3.7'
38
38
  spec.add_development_dependency 'rubocop', '~> 1.26'
39
39
  spec.add_development_dependency 'rubocop-performance', '~> 1.13'
40
- spec.add_development_dependency 'rubocop-rails', '~> 2.14'
41
40
  spec.add_development_dependency 'rubocop-rake', '~> 0.6'
42
41
  spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
43
42
  spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '1.28.3.1'
4
+ VERSION = '1.28.3.2'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -34,8 +34,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
34
34
 
35
35
  def self.mimetype(content_type)
36
36
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
37
- warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
38
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
37
+ warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead. ' \
38
+ 'Use Henkei.configure and assign "mini_mime" to `mime_library`.'
39
39
  MIME::Types[content_type].first
40
40
  else
41
41
  MiniMime.lookup_by_content_type(content_type).tap do |object|
@@ -78,7 +78,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
78
78
  if input.is_a? String
79
79
  if File.exist? input
80
80
  @path = input
81
- elsif input =~ URI::DEFAULT_PARSER.make_regexp
81
+ elsif input&.match?(URI::DEFAULT_PARSER.make_regexp)
82
82
  @uri = URI.parse input
83
83
  else
84
84
  raise Errno::ENOENT, "missing file or invalid URI - #{input}"
@@ -265,7 +265,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
265
265
  # tell Tika that we're done sending data
266
266
  s.shutdown(Socket::SHUT_WR)
267
267
 
268
- resp = String.new ''
268
+ resp = +''
269
269
  loop do
270
270
  chunk = s.recv(65_536)
271
271
  break if chunk.empty? || !chunk
@@ -300,9 +300,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
300
300
  # Internal helper to remove erroneous output
301
301
  #
302
302
  def self.filter_response(response)
303
- response.gsub(
304
- /\AWARNING: sun\.reflect\.Reflection\.getCallerClass is not supported\. This will impact performance\.\n/,
305
- ''
303
+ response.delete_prefix(
304
+ "WARNING: sun.reflect.Reflection.getCallerClass is not supported. This will impact performance.\n"
306
305
  )
307
306
  end
308
307
  private_class_method :filter_response
data/spec/henkei_spec.rb CHANGED
@@ -15,13 +15,13 @@ describe Henkei do
15
15
 
16
16
  describe '.read' do
17
17
  it 'reads text' do
18
- text = Henkei.read :text, data
18
+ text = described_class.read :text, data
19
19
 
20
20
  expect(text).to include 'The quick brown fox jumped over the lazy cat.'
21
21
  end
22
22
 
23
23
  it 'reads metadata' do
24
- metadata = Henkei.read :metadata, data
24
+ metadata = described_class.read :metadata, data
25
25
 
26
26
  expect(metadata['Content-Type']).to(
27
27
  eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
@@ -30,13 +30,13 @@ describe Henkei do
30
30
 
31
31
  it 'reads metadata values with colons as strings' do
32
32
  data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
33
- metadata = Henkei.read :metadata, data
33
+ metadata = described_class.read :metadata, data
34
34
 
35
35
  expect(metadata['dc:title']).to eq 'problem: test'
36
36
  end
37
37
 
38
38
  it 'reads mimetype' do
39
- mimetype = Henkei.read :mimetype, data
39
+ mimetype = described_class.read :mimetype, data
40
40
 
41
41
  expect(mimetype.content_type).to(
42
42
  eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
@@ -48,7 +48,7 @@ describe Henkei do
48
48
  let(:data) { File.read 'spec/samples/pipe-error.png' }
49
49
 
50
50
  it 'returns an empty result' do
51
- text = Henkei.read :text, data
51
+ text = described_class.read :text, data
52
52
 
53
53
  expect(text).to eq ''
54
54
  end
@@ -57,11 +57,11 @@ describe Henkei do
57
57
 
58
58
  describe '.new' do
59
59
  it 'requires parameters' do
60
- expect { Henkei.new }.to raise_error ArgumentError
60
+ expect { described_class.new }.to raise_error ArgumentError
61
61
  end
62
62
 
63
63
  it 'accepts a root path' do
64
- henkei = Henkei.new 'spec/samples/sample.pages'
64
+ henkei = described_class.new File.join(Henkei::GEM_PATH, 'spec/samples/sample.pages')
65
65
 
66
66
  expect(henkei).to be_path
67
67
  expect(henkei).not_to be_uri
@@ -69,7 +69,7 @@ describe Henkei do
69
69
  end
70
70
 
71
71
  it 'accepts a relative path' do
72
- henkei = Henkei.new 'spec/samples/sample.pages'
72
+ henkei = described_class.new 'spec/samples/sample.pages'
73
73
 
74
74
  expect(henkei).to be_path
75
75
  expect(henkei).not_to be_uri
@@ -77,7 +77,7 @@ describe Henkei do
77
77
  end
78
78
 
79
79
  it 'accepts a path with spaces' do
80
- henkei = Henkei.new 'spec/samples/sample filename with spaces.pages'
80
+ henkei = described_class.new 'spec/samples/sample filename with spaces.pages'
81
81
 
82
82
  expect(henkei).to be_path
83
83
  expect(henkei).not_to be_uri
@@ -85,7 +85,7 @@ describe Henkei do
85
85
  end
86
86
 
87
87
  it 'accepts a URI' do
88
- henkei = Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
88
+ henkei = described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
89
89
 
90
90
  expect(henkei).to be_uri
91
91
  expect(henkei).not_to be_path
@@ -94,7 +94,7 @@ describe Henkei do
94
94
 
95
95
  it 'accepts a stream or object that can be read' do
96
96
  File.open 'spec/samples/sample.pages', 'r' do |file|
97
- henkei = Henkei.new file
97
+ henkei = described_class.new file
98
98
 
99
99
  expect(henkei).to be_stream
100
100
  expect(henkei).not_to be_path
@@ -103,37 +103,38 @@ describe Henkei do
103
103
  end
104
104
 
105
105
  it 'refuses a path to a missing file' do
106
- expect { Henkei.new 'test/sample/missing.pages' }.to raise_error Errno::ENOENT
106
+ expect { described_class.new 'test/sample/missing.pages' }.to raise_error Errno::ENOENT
107
107
  end
108
108
 
109
109
  it 'refuses other objects' do
110
110
  [nil, 1, 1.1].each do |object|
111
- expect { Henkei.new object }.to raise_error TypeError
111
+ expect { described_class.new object }.to raise_error TypeError
112
112
  end
113
113
  end
114
114
  end
115
115
 
116
116
  describe '.creation_date' do
117
- let(:henkei) { Henkei.new 'spec/samples/sample.pages' }
118
- it 'should return Time' do
117
+ let(:henkei) { described_class.new 'spec/samples/sample.pages' }
118
+
119
+ it 'returns Time' do
119
120
  expect(henkei.creation_date).to be_a Time
120
121
  end
121
122
  end
122
123
 
123
124
  describe '.java' do
124
125
  specify 'with no specified JAVA_HOME' do
125
- expect(Henkei.send(:java_path)).to eq 'java'
126
+ expect(described_class.send(:java_path)).to eq 'java'
126
127
  end
127
128
 
128
129
  specify 'with a specified JAVA_HOME' do
129
130
  ENV['JAVA_HOME'] = '/path/to/java/home'
130
131
 
131
- expect(Henkei.send(:java_path)).to eq '/path/to/java/home/bin/java'
132
+ expect(described_class.send(:java_path)).to eq '/path/to/java/home/bin/java'
132
133
  end
133
134
  end
134
135
 
135
- context 'initialized with a given path' do
136
- let(:henkei) { Henkei.new 'spec/samples/sample.pages' }
136
+ context 'when initialized with a given path' do
137
+ let(:henkei) { described_class.new 'spec/samples/sample.pages' }
137
138
 
138
139
  specify '#text reads text' do
139
140
  expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
@@ -144,7 +145,7 @@ describe Henkei do
144
145
  end
145
146
 
146
147
  context 'when passing in the `pipe-error.png` test file' do
147
- let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
148
+ let(:henkei) { described_class.new 'spec/samples/pipe-error.png' }
148
149
 
149
150
  it '#text returns an empty result' do
150
151
  expect(henkei.text).to eq ''
@@ -161,8 +162,8 @@ describe Henkei do
161
162
  end
162
163
  end
163
164
 
164
- context 'initialized with a given URI' do
165
- let(:henkei) { Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
165
+ context 'when initialized with a given URI' do
166
+ let(:henkei) { described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
166
167
 
167
168
  specify '#text reads text' do
168
169
  expect(henkei.text).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
@@ -175,8 +176,8 @@ describe Henkei do
175
176
  end
176
177
  end
177
178
 
178
- context 'initialized with a given stream' do
179
- let(:henkei) { Henkei.new File.open('spec/samples/sample.pages', 'rb') }
179
+ context 'when initialized with a given stream' do
180
+ let(:henkei) { described_class.new File.open('spec/samples/sample.pages', 'rb') }
180
181
 
181
182
  specify '#text reads text' do
182
183
  expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
@@ -188,7 +189,7 @@ describe Henkei do
188
189
  end
189
190
 
190
191
  context 'when source is a remote PDF' do
191
- let(:henkei) { Henkei.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
192
+ let(:henkei) { described_class.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
192
193
 
193
194
  specify '#text reads text' do
194
195
  expect(henkei.text).to include 'Dummy PDF file'
@@ -199,35 +200,35 @@ describe Henkei do
199
200
  end
200
201
  end
201
202
 
202
- context 'working as server mode' do
203
+ context 'when working as server mode' do
203
204
  specify '#starts and kills server' do
204
- Henkei.server(:text)
205
- expect(Henkei.class_variable_get(:@@server_pid)).not_to be_nil
206
- expect(Henkei.class_variable_get(:@@server_port)).not_to be_nil
205
+ described_class.server(:text)
206
+ expect(described_class.class_variable_get(:@@server_pid)).not_to be_nil
207
+ expect(described_class.class_variable_get(:@@server_port)).not_to be_nil
207
208
 
208
- s = TCPSocket.new('localhost', Henkei.class_variable_get(:@@server_port))
209
+ s = TCPSocket.new('localhost', described_class.class_variable_get(:@@server_port))
209
210
  expect(s).to be_a TCPSocket
210
211
  s.close
211
212
  ensure
212
- port = Henkei.class_variable_get(:@@server_port)
213
- Henkei.kill_server!
213
+ port = described_class.class_variable_get(:@@server_port)
214
+ described_class.kill_server!
214
215
  sleep 2
215
216
  expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
216
217
  end
217
218
 
218
219
  specify '#runs samples through server mode' do
219
- Henkei.server(:text)
220
- expect(Henkei.new('spec/samples/sample.pages').text).to(
220
+ described_class.server(:text)
221
+ expect(described_class.new('spec/samples/sample.pages').text).to(
221
222
  include 'The quick brown fox jumped over the lazy cat.'
222
223
  )
223
- expect(Henkei.new('spec/samples/sample filename with spaces.pages').text).to(
224
+ expect(described_class.new('spec/samples/sample filename with spaces.pages').text).to(
224
225
  include 'The quick brown fox jumped over the lazy cat.'
225
226
  )
226
- expect(Henkei.new('spec/samples/sample.docx').text).to(
227
+ expect(described_class.new('spec/samples/sample.docx').text).to(
227
228
  include 'The quick brown fox jumped over the lazy cat.'
228
229
  )
229
230
  ensure
230
- Henkei.kill_server!
231
+ described_class.kill_server!
231
232
  end
232
233
  end
233
234
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.28.3.1
4
+ version: 1.28.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-05-28 00:00:00.000000000 Z
12
+ date: 2023-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -135,20 +135,6 @@ dependencies:
135
135
  - - "~>"
136
136
  - !ruby/object:Gem::Version
137
137
  version: '1.13'
138
- - !ruby/object:Gem::Dependency
139
- name: rubocop-rails
140
- requirement: !ruby/object:Gem::Requirement
141
- requirements:
142
- - - "~>"
143
- - !ruby/object:Gem::Version
144
- version: '2.14'
145
- type: :development
146
- prerelease: false
147
- version_requirements: !ruby/object:Gem::Requirement
148
- requirements:
149
- - - "~>"
150
- - !ruby/object:Gem::Version
151
- version: '2.14'
152
138
  - !ruby/object:Gem::Dependency
153
139
  name: rubocop-rake
154
140
  requirement: !ruby/object:Gem::Requirement
@@ -244,17 +230,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
244
230
  requirements:
245
231
  - - ">="
246
232
  - !ruby/object:Gem::Version
247
- version: 2.6.0
233
+ version: 2.7.0
248
234
  - - "<"
249
235
  - !ruby/object:Gem::Version
250
- version: 3.2.0
236
+ version: 3.3.0
251
237
  required_rubygems_version: !ruby/object:Gem::Requirement
252
238
  requirements:
253
239
  - - ">="
254
240
  - !ruby/object:Gem::Version
255
241
  version: '0'
256
242
  requirements: []
257
- rubygems_version: 3.2.3
243
+ rubygems_version: 3.4.1
258
244
  signing_key:
259
245
  specification_version: 4
260
246
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,