henkei 2.4.0.1 → 2.4.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 52de171e9cba852d1551459674a12adfca8fd6166cb5e5707f3bc6a7cec9415c
4
- data.tar.gz: '08807feea85b577c37153c290331c8f9c4441c2eef6c2600d630948a27b9ba5e'
3
+ metadata.gz: a039368aaffaee95c3d48c5b56fc30b81babafc7b996986e56c788bb12c20bc0
4
+ data.tar.gz: d31e08f66e605ea99209911edebbb62fce65e75c328043a9ed3e53f1eac4d80d
5
5
  SHA512:
6
- metadata.gz: 6481f5588edeb5cf7e806cd9326636d14e936bee95064de49017614e999f295f9683b3cbe6346dbb4d0611753288d7b628435514fe13ff0b039723de55262db1
7
- data.tar.gz: 27a33e20e068708563324db99798abf56a81b25386899e41cc4b5e15097df11e7bc61f69b1705a5a0d537ac3488c608e28c6337e6bc8d4fe6af3b2aba6e19416
6
+ metadata.gz: 1daffbde6948f1e8d9c8003c8ab60a3f86051c307b04a0aa3a654bdf05efd830c3148e17a555fee4cd4f52d85ed80eb6071e4df54754e5458b7e1caa1a9b2474
7
+ data.tar.gz: a230d09cde52cbedbc0b67ed025a7666119b5c66f7611cb730b1558aba5b77f5f50cc878e17f8d11a14d9840d0b379aeedb00d141fbde924ec30f3fe42414747
@@ -14,10 +14,10 @@ jobs:
14
14
  runs-on: ubuntu-latest
15
15
  strategy:
16
16
  matrix:
17
- ruby-version: ['2.6', '2.7', '3.0', '3.1']
17
+ ruby-version: ['2.7', '3.0', '3.1', '3.2']
18
18
 
19
19
  steps:
20
- - uses: actions/checkout@v2
20
+ - uses: actions/checkout@v3
21
21
 
22
22
  - name: Set up Ruby
23
23
  uses: ruby/setup-ruby@v1
@@ -32,6 +32,6 @@ jobs:
32
32
  run: bundle exec rspec
33
33
 
34
34
  - name: Test & publish code coverage
35
- uses: paambaati/codeclimate-action@v3.0.0
35
+ uses: paambaati/codeclimate-action@v3.2.0
36
36
  env:
37
37
  CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/.rubocop.yml CHANGED
@@ -1,6 +1,10 @@
1
+ require:
2
+ - rubocop-rake
3
+ - rubocop-rspec
4
+
1
5
  AllCops:
2
6
  NewCops: enable
3
- TargetRubyVersion: 2.6
7
+ TargetRubyVersion: 2.7
4
8
 
5
9
  Layout/EmptyLinesAroundAttributeAccessor:
6
10
  Enabled: true
@@ -30,6 +34,12 @@ Metrics/BlockLength:
30
34
  Metrics/MethodLength:
31
35
  Max: 15
32
36
 
37
+ RSpec/ExampleLength:
38
+ Max: 10
39
+
40
+ RSpec/MultipleExpectations:
41
+ Max: 3
42
+
33
43
  Style/ClassVars:
34
44
  Enabled: false
35
45
 
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
8
+ Gem::Specification.new do |spec|
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -15,7 +15,7 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
16
  spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
- spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
18
+ spec.required_ruby_version = ['>= 2.7.0', '< 3.3.0']
19
19
 
20
20
  # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
21
  # delete this section to allow pushing this gem to any host.
@@ -38,7 +38,6 @@ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
38
38
  spec.add_development_dependency 'rspec', '~> 3.7'
39
39
  spec.add_development_dependency 'rubocop', '~> 1.26'
40
40
  spec.add_development_dependency 'rubocop-performance', '~> 1.13'
41
- spec.add_development_dependency 'rubocop-rails', '~> 2.14'
42
41
  spec.add_development_dependency 'rubocop-rake', '~> 0.6'
43
42
  spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
44
43
  spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '2.4.0.1'
4
+ VERSION = '2.4.0.2'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -31,8 +31,8 @@ class Henkei # rubocop:disable Metrics/ClassLength
31
31
 
32
32
  def self.mimetype(content_type)
33
33
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
34
- warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
35
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
34
+ warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead. ' \
35
+ 'Use Henkei.configure and assign "mini_mime" to `mime_library`.'
36
36
  MIME::Types[content_type].first
37
37
  else
38
38
  MiniMime.lookup_by_content_type(content_type).tap do |object|
data/spec/henkei_spec.rb CHANGED
@@ -20,13 +20,13 @@ describe Henkei do
20
20
 
21
21
  describe '.read' do
22
22
  it 'reads text' do
23
- text = Henkei.read :text, data
23
+ text = described_class.read :text, data
24
24
 
25
25
  expect(text).to include 'The quick brown fox jumped over the lazy cat.'
26
26
  end
27
27
 
28
28
  it 'reads metadata' do
29
- metadata = Henkei.read :metadata, data
29
+ metadata = described_class.read :metadata, data
30
30
 
31
31
  expect(metadata['Content-Type']).to(
32
32
  eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
@@ -35,13 +35,13 @@ describe Henkei do
35
35
 
36
36
  it 'reads metadata values with colons as strings' do
37
37
  data = File.read 'spec/samples/sample-metadata-values-with-colons.doc'
38
- metadata = Henkei.read :metadata, data
38
+ metadata = described_class.read :metadata, data
39
39
 
40
40
  expect(metadata['dc:title']).to eq 'problem: test'
41
41
  end
42
42
 
43
43
  it 'reads mimetype' do
44
- mimetype = Henkei.read :mimetype, data
44
+ mimetype = described_class.read :mimetype, data
45
45
 
46
46
  expect(mimetype.content_type).to(
47
47
  eq 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
@@ -53,7 +53,7 @@ describe Henkei do
53
53
  let(:data) { File.read 'spec/samples/pipe-error.png' }
54
54
 
55
55
  it 'returns an empty result' do
56
- text = Henkei.read :text, data
56
+ text = described_class.read :text, data
57
57
 
58
58
  expect(text).to eq ''
59
59
  end
@@ -61,15 +61,12 @@ describe Henkei do
61
61
  unless ci?
62
62
  context 'when `include_ocr` is enabled' do
63
63
  it 'returns parsed plain text in the image' do
64
- text = Henkei.read :text, data, include_ocr: true
64
+ text = described_class.read :text, data, include_ocr: true
65
65
 
66
66
  expect(text).to include <<~TEXT
67
67
  West Side
68
68
 
69
69
  Sea Island
70
- PP
71
-
72
- Richmond
73
70
  TEXT
74
71
  end
75
72
  end
@@ -79,11 +76,11 @@ describe Henkei do
79
76
 
80
77
  describe '.new' do
81
78
  it 'requires parameters' do
82
- expect { Henkei.new }.to raise_error ArgumentError
79
+ expect { described_class.new }.to raise_error ArgumentError
83
80
  end
84
81
 
85
82
  it 'accepts a root path' do
86
- henkei = Henkei.new 'spec/samples/sample.pages'
83
+ henkei = described_class.new File.join(Henkei::GEM_PATH, 'spec/samples/sample.pages')
87
84
 
88
85
  expect(henkei).to be_path
89
86
  expect(henkei).not_to be_uri
@@ -91,7 +88,7 @@ describe Henkei do
91
88
  end
92
89
 
93
90
  it 'accepts a relative path' do
94
- henkei = Henkei.new 'spec/samples/sample.pages'
91
+ henkei = described_class.new 'spec/samples/sample.pages'
95
92
 
96
93
  expect(henkei).to be_path
97
94
  expect(henkei).not_to be_uri
@@ -99,7 +96,7 @@ describe Henkei do
99
96
  end
100
97
 
101
98
  it 'accepts a path with spaces' do
102
- henkei = Henkei.new 'spec/samples/sample filename with spaces.pages'
99
+ henkei = described_class.new 'spec/samples/sample filename with spaces.pages'
103
100
 
104
101
  expect(henkei).to be_path
105
102
  expect(henkei).not_to be_uri
@@ -107,7 +104,7 @@ describe Henkei do
107
104
  end
108
105
 
109
106
  it 'accepts a URI' do
110
- henkei = Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
107
+ henkei = described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
111
108
 
112
109
  expect(henkei).to be_uri
113
110
  expect(henkei).not_to be_path
@@ -116,7 +113,7 @@ describe Henkei do
116
113
 
117
114
  it 'accepts a stream or object that can be read' do
118
115
  File.open 'spec/samples/sample.pages', 'r' do |file|
119
- henkei = Henkei.new file
116
+ henkei = described_class.new file
120
117
 
121
118
  expect(henkei).to be_stream
122
119
  expect(henkei).not_to be_path
@@ -125,38 +122,38 @@ describe Henkei do
125
122
  end
126
123
 
127
124
  it 'refuses a path to a missing file' do
128
- expect { Henkei.new 'test/sample/missing.pages' }.to raise_error Errno::ENOENT
125
+ expect { described_class.new 'test/sample/missing.pages' }.to raise_error Errno::ENOENT
129
126
  end
130
127
 
131
128
  it 'refuses other objects' do
132
129
  [nil, 1, 1.1].each do |object|
133
- expect { Henkei.new object }.to raise_error TypeError
130
+ expect { described_class.new object }.to raise_error TypeError
134
131
  end
135
132
  end
136
133
  end
137
134
 
138
135
  describe '.creation_date' do
139
- let(:henkei) { Henkei.new 'spec/samples/sample.pages' }
136
+ let(:henkei) { described_class.new 'spec/samples/sample.pages' }
140
137
 
141
- it 'should return Time' do
138
+ it 'returns a Time' do
142
139
  expect(henkei.creation_date).to be_a Time
143
140
  end
144
141
  end
145
142
 
146
143
  describe '.java' do
147
144
  specify 'with no specified JAVA_HOME' do
148
- expect(Henkei.send(:java_path)).to eq 'java'
145
+ expect(described_class.send(:java_path)).to eq 'java'
149
146
  end
150
147
 
151
148
  specify 'with a specified JAVA_HOME' do
152
149
  ENV['JAVA_HOME'] = '/path/to/java/home'
153
150
 
154
- expect(Henkei.send(:java_path)).to eq '/path/to/java/home/bin/java'
151
+ expect(described_class.send(:java_path)).to eq '/path/to/java/home/bin/java'
155
152
  end
156
153
  end
157
154
 
158
- context 'initialized with a given path' do
159
- let(:henkei) { Henkei.new 'spec/samples/sample.pages' }
155
+ context 'when initialized with a given path' do
156
+ let(:henkei) { described_class.new 'spec/samples/sample.pages' }
160
157
 
161
158
  specify '#text reads text' do
162
159
  expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
@@ -167,7 +164,7 @@ describe Henkei do
167
164
  end
168
165
 
169
166
  context 'when passing in the `pipe-error.png` test file' do
170
- let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
167
+ let(:henkei) { described_class.new 'spec/samples/pipe-error.png' }
171
168
 
172
169
  it '#text returns an empty result' do
173
170
  expect(henkei.text).to eq ''
@@ -189,9 +186,6 @@ describe Henkei do
189
186
  West Side
190
187
 
191
188
  Sea Island
192
- PP
193
-
194
- Richmond
195
189
  TEXT
196
190
  end
197
191
 
@@ -199,7 +193,7 @@ describe Henkei do
199
193
  expect(henkei.html(include_ocr: true)).to include '<meta name="tiff:ImageWidth" content="792"/>'
200
194
 
201
195
  html_body = Nokogiri::HTML(henkei.html(include_ocr: true)).at_xpath('//body')
202
- ['Anmore', 'Coquitlam', 'West Side', 'Sea Island', 'Richmond', 'Steveston'].each do |location|
196
+ ['West Side', 'Sea Island', 'Richmond', 'Steveston'].each do |location|
203
197
  expect(html_body.text).to include location
204
198
  end
205
199
  end
@@ -208,8 +202,8 @@ describe Henkei do
208
202
  end
209
203
  end
210
204
 
211
- context 'initialized with a given URI' do
212
- let(:henkei) { Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
205
+ context 'when initialized with a given URI' do
206
+ let(:henkei) { described_class.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx' }
213
207
 
214
208
  specify '#text reads text' do
215
209
  expect(henkei.text).to include 'Lorem ipsum dolor sit amet, consectetuer adipiscing elit.'
@@ -222,8 +216,8 @@ describe Henkei do
222
216
  end
223
217
  end
224
218
 
225
- context 'initialized with a given stream' do
226
- let(:henkei) { Henkei.new File.open('spec/samples/sample.pages', 'rb') }
219
+ context 'when initialized with a given stream' do
220
+ let(:henkei) { described_class.new File.open('spec/samples/sample.pages', 'rb') }
227
221
 
228
222
  specify '#text reads text' do
229
223
  expect(henkei.text).to include 'The quick brown fox jumped over the lazy cat.'
@@ -235,7 +229,7 @@ describe Henkei do
235
229
  end
236
230
 
237
231
  context 'when source is a remote PDF' do
238
- let(:henkei) { Henkei.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
232
+ let(:henkei) { described_class.new 'https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf' }
239
233
 
240
234
  specify '#text reads text' do
241
235
  expect(henkei.text).to include 'Dummy PDF file'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.4.0.1
4
+ version: 2.4.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-05-28 00:00:00.000000000 Z
12
+ date: 2023-01-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -149,20 +149,6 @@ dependencies:
149
149
  - - "~>"
150
150
  - !ruby/object:Gem::Version
151
151
  version: '1.13'
152
- - !ruby/object:Gem::Dependency
153
- name: rubocop-rails
154
- requirement: !ruby/object:Gem::Requirement
155
- requirements:
156
- - - "~>"
157
- - !ruby/object:Gem::Version
158
- version: '2.14'
159
- type: :development
160
- prerelease: false
161
- version_requirements: !ruby/object:Gem::Requirement
162
- requirements:
163
- - - "~>"
164
- - !ruby/object:Gem::Version
165
- version: '2.14'
166
152
  - !ruby/object:Gem::Dependency
167
153
  name: rubocop-rake
168
154
  requirement: !ruby/object:Gem::Requirement
@@ -259,17 +245,17 @@ required_ruby_version: !ruby/object:Gem::Requirement
259
245
  requirements:
260
246
  - - ">="
261
247
  - !ruby/object:Gem::Version
262
- version: 2.6.0
248
+ version: 2.7.0
263
249
  - - "<"
264
250
  - !ruby/object:Gem::Version
265
- version: 3.2.0
251
+ version: 3.3.0
266
252
  required_rubygems_version: !ruby/object:Gem::Requirement
267
253
  requirements:
268
254
  - - ">="
269
255
  - !ruby/object:Gem::Version
270
256
  version: '0'
271
257
  requirements: []
272
- rubygems_version: 3.2.3
258
+ rubygems_version: 3.4.1
273
259
  signing_key:
274
260
  specification_version: 4
275
261
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,