henkei 2.3.0.1 → 2.4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad6513ead7b83776d281da0be4b857428812109d3cda95f98a6504434bb0c91f
4
- data.tar.gz: 1a0c9db6ca53ea23e5b28c1a0bdd2415d1dbcb284d1f9958ab0529b2e9e96f5a
3
+ metadata.gz: 52de171e9cba852d1551459674a12adfca8fd6166cb5e5707f3bc6a7cec9415c
4
+ data.tar.gz: '08807feea85b577c37153c290331c8f9c4441c2eef6c2600d630948a27b9ba5e'
5
5
  SHA512:
6
- metadata.gz: 9f270bdc844331e88a96b57f24d949ef3517345a3c6907b374945ac3f7a72066bc1eab81993ea61ae823d91a0e922ae5a08d382ac4b3e420662f9471299ea5cb
7
- data.tar.gz: 58ef5f1d2157d6de60d35817238cdb966e92ac880763ac077510efe5ebcd497cf6a231a5deab1510d680ed9bc0acdbf33217126a68feaea2592a0508682ab133
6
+ metadata.gz: 6481f5588edeb5cf7e806cd9326636d14e936bee95064de49017614e999f295f9683b3cbe6346dbb4d0611753288d7b628435514fe13ff0b039723de55262db1
7
+ data.tar.gz: 27a33e20e068708563324db99798abf56a81b25386899e41cc4b5e15097df11e7bc61f69b1705a5a0d537ac3488c608e28c6337e6bc8d4fe6af3b2aba6e19416
@@ -0,0 +1,37 @@
1
+ name: Test Henkei Ruby gem
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ env:
10
+ CI: true
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ ruby-version: ['2.6', '2.7', '3.0', '3.1']
18
+
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+
22
+ - name: Set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby-version }}
26
+ bundler-cache: true
27
+
28
+ - name: Lint code - Rubocop
29
+ run: bundle exec rubocop
30
+
31
+ - name: Run tests
32
+ run: bundle exec rspec
33
+
34
+ - name: Test & publish code coverage
35
+ uses: paambaati/codeclimate-action@v3.0.0
36
+ env:
37
+ CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Travis Build Status](http://img.shields.io/travis/abrom/henkei.svg?style=flat)](https://travis-ci.org/abrom/henkei)
1
+ [![Github Build Status](https://github.com/abrom/henkei/actions/workflows/test.yml/badge.svg)](https://github.com/abrom/henkei/actions/workflows/test.yml)
2
2
  [![Maintainability](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/maintainability)](https://codeclimate.com/github/abrom/henkei/maintainability)
3
3
  [![Test Coverage](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/test_coverage)](https://codeclimate.com/github/abrom/henkei/test_coverage)
4
4
  [![Gem Version](http://img.shields.io/gem/v/henkei.svg?style=flat)](#)
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec|
8
+ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -13,13 +13,19 @@ Gem::Specification.new do |spec|
13
13
  spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
14
14
  spec.summary = 'Read text and metadata from files and documents ' \
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
- spec.homepage = 'http://github.com/abrom/henkei'
16
+ spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
18
  spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
19
19
 
20
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
+ # delete this section to allow pushing this gem to any host.
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.' unless spec.respond_to?(:metadata)
23
+
24
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
25
+ spec.metadata['rubygems_mfa_required'] = 'true'
26
+
20
27
  spec.files = `git ls-files`.split("\n")
21
28
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
29
  spec.require_paths = ['lib']
24
30
 
25
31
  spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
@@ -30,6 +36,10 @@ Gem::Specification.new do |spec|
30
36
  spec.add_development_dependency 'rails', '~> 5.0'
31
37
  spec.add_development_dependency 'rake', '~> 12.3'
32
38
  spec.add_development_dependency 'rspec', '~> 3.7'
33
- spec.add_development_dependency 'rubocop', '~> 0.71'
34
- spec.add_development_dependency 'simplecov', '~> 0.15'
39
+ spec.add_development_dependency 'rubocop', '~> 1.26'
40
+ spec.add_development_dependency 'rubocop-performance', '~> 1.13'
41
+ spec.add_development_dependency 'rubocop-rails', '~> 2.14'
42
+ spec.add_development_dependency 'rubocop-rake', '~> 0.6'
43
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
44
+ spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
35
45
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '2.3.0.1'
4
+ VERSION = '2.4.0.1'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -25,14 +25,14 @@ require 'open3'
25
25
  # Read text and metadata from files and documents using Apache Tika toolkit
26
26
  class Henkei # rubocop:disable Metrics/ClassLength
27
27
  GEM_PATH = File.dirname(File.dirname(__FILE__))
28
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-2.3.0.jar')
28
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-2.4.0.jar')
29
29
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
30
30
  CONFIG_WITHOUT_OCR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config-without-ocr.xml')
31
31
 
32
32
  def self.mimetype(content_type)
33
33
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
34
34
  warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
35
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
35
+ ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
36
36
  MIME::Types[content_type].first
37
37
  else
38
38
  MiniMime.lookup_by_content_type(content_type).tap do |object|
@@ -51,8 +51,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
51
51
  result = client_read(type, data, include_ocr: include_ocr)
52
52
 
53
53
  case type
54
- when :text then result
55
- when :html then result
54
+ when :text, :html then result
56
55
  when :metadata then JSON.parse(result)
57
56
  when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
58
57
  end
data/spec/henkei_spec.rb CHANGED
@@ -7,8 +7,8 @@ require 'nokogiri'
7
7
  # Some of the tests have been known to fail in weird and wonderful ways when `rails` is included
8
8
  require 'rails' if ENV['INCLUDE_RAILS'] == 'true'
9
9
 
10
- def travis_ci?
11
- ENV['CI'] == 'true' && ENV['TRAVIS'] == 'true'
10
+ def ci?
11
+ ENV['CI'] == 'true'
12
12
  end
13
13
 
14
14
  describe Henkei do
@@ -58,17 +58,17 @@ describe Henkei do
58
58
  expect(text).to eq ''
59
59
  end
60
60
 
61
- unless travis_ci?
61
+ unless ci?
62
62
  context 'when `include_ocr` is enabled' do
63
63
  it 'returns parsed plain text in the image' do
64
64
  text = Henkei.read :text, data, include_ocr: true
65
65
 
66
66
  expect(text).to include <<~TEXT
67
67
  West Side
68
-
68
+
69
69
  Sea Island
70
70
  PP
71
-
71
+
72
72
  Richmond
73
73
  TEXT
74
74
  end
@@ -182,15 +182,15 @@ describe Henkei do
182
182
  expect(henkei.mimetype.content_type).to eq 'image/png'
183
183
  end
184
184
 
185
- unless travis_ci?
185
+ unless ci?
186
186
  context 'when `include_ocr` is enabled' do
187
187
  it '#text returns plain text of parsed text in the image' do
188
188
  expect(henkei.text(include_ocr: true)).to include <<~TEXT
189
189
  West Side
190
-
190
+
191
191
  Sea Island
192
192
  PP
193
-
193
+
194
194
  Richmond
195
195
  TEXT
196
196
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0.1
4
+ version: 2.4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-24 00:00:00.000000000 Z
12
+ date: 2022-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -127,14 +127,70 @@ dependencies:
127
127
  requirements:
128
128
  - - "~>"
129
129
  - !ruby/object:Gem::Version
130
- version: '0.71'
130
+ version: '1.26'
131
131
  type: :development
132
132
  prerelease: false
133
133
  version_requirements: !ruby/object:Gem::Requirement
134
134
  requirements:
135
135
  - - "~>"
136
136
  - !ruby/object:Gem::Version
137
- version: '0.71'
137
+ version: '1.26'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rubocop-performance
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '1.13'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '1.13'
152
+ - !ruby/object:Gem::Dependency
153
+ name: rubocop-rails
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '2.14'
159
+ type: :development
160
+ prerelease: false
161
+ version_requirements: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '2.14'
166
+ - !ruby/object:Gem::Dependency
167
+ name: rubocop-rake
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '0.6'
173
+ type: :development
174
+ prerelease: false
175
+ version_requirements: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - "~>"
178
+ - !ruby/object:Gem::Version
179
+ version: '0.6'
180
+ - !ruby/object:Gem::Dependency
181
+ name: rubocop-rspec
182
+ requirement: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - "~>"
185
+ - !ruby/object:Gem::Version
186
+ version: '2.9'
187
+ type: :development
188
+ prerelease: false
189
+ version_requirements: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - "~>"
192
+ - !ruby/object:Gem::Version
193
+ version: '2.9'
138
194
  - !ruby/object:Gem::Dependency
139
195
  name: simplecov
140
196
  requirement: !ruby/object:Gem::Requirement
@@ -142,6 +198,9 @@ dependencies:
142
198
  - - "~>"
143
199
  - !ruby/object:Gem::Version
144
200
  version: '0.15'
201
+ - - "<"
202
+ - !ruby/object:Gem::Version
203
+ version: '0.18'
145
204
  type: :development
146
205
  prerelease: false
147
206
  version_requirements: !ruby/object:Gem::Requirement
@@ -149,6 +208,9 @@ dependencies:
149
208
  - - "~>"
150
209
  - !ruby/object:Gem::Version
151
210
  version: '0.15'
211
+ - - "<"
212
+ - !ruby/object:Gem::Version
213
+ version: '0.18'
152
214
  description: Read text and metadata from files and documents using Apache Tika toolkit
153
215
  email:
154
216
  - erol.fornoles@gmail.com
@@ -158,10 +220,10 @@ executables:
158
220
  extensions: []
159
221
  extra_rdoc_files: []
160
222
  files:
223
+ - ".github/workflows/test.yml"
161
224
  - ".gitignore"
162
225
  - ".rspec"
163
226
  - ".rubocop.yml"
164
- - ".travis.yml"
165
227
  - Gemfile
166
228
  - LICENSE
167
229
  - NOTICE.txt
@@ -169,7 +231,7 @@ files:
169
231
  - Rakefile
170
232
  - bin/console
171
233
  - henkei.gemspec
172
- - jar/tika-app-2.3.0.jar
234
+ - jar/tika-app-2.4.0.jar
173
235
  - jar/tika-config-without-ocr.xml
174
236
  - jar/tika-config.xml
175
237
  - lib/henkei.rb
@@ -183,10 +245,12 @@ files:
183
245
  - spec/samples/sample-metadata-values-with-colons.doc
184
246
  - spec/samples/sample.docx
185
247
  - spec/samples/sample.pages
186
- homepage: http://github.com/abrom/henkei
248
+ homepage: https://github.com/abrom/henkei
187
249
  licenses:
188
250
  - MIT
189
- metadata: {}
251
+ metadata:
252
+ allowed_push_host: https://rubygems.org
253
+ rubygems_mfa_required: 'true'
190
254
  post_install_message:
191
255
  rdoc_options: []
192
256
  require_paths:
@@ -205,16 +269,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
205
269
  - !ruby/object:Gem::Version
206
270
  version: '0'
207
271
  requirements: []
208
- rubygems_version: 3.0.9
272
+ rubygems_version: 3.2.3
209
273
  signing_key:
210
274
  specification_version: 4
211
275
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
212
276
  .rtf, .pdf) using Apache Tika toolkit
213
- test_files:
214
- - spec/helper.rb
215
- - spec/henkei_spec.rb
216
- - spec/samples/pipe-error.png
217
- - spec/samples/sample filename with spaces.pages
218
- - spec/samples/sample-metadata-values-with-colons.doc
219
- - spec/samples/sample.docx
220
- - spec/samples/sample.pages
277
+ test_files: []
data/.travis.yml DELETED
@@ -1,32 +0,0 @@
1
- env:
2
- global:
3
- - CC_TEST_REPORTER_ID=bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
4
- jobs:
5
- - INCLUDE_RAILS=false
6
- - INCLUDE_RAILS=true
7
-
8
- language: ruby
9
- rvm:
10
- - 2.6
11
- - 2.7
12
- - 3.0
13
- - 3.1
14
-
15
- before_install:
16
- - gem update bundler
17
-
18
- install:
19
- - bundle install --jobs=3 --retry=3
20
- - gem install rubocop
21
-
22
- before_script:
23
- - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
24
- - chmod +x ./cc-test-reporter
25
- - ./cc-test-reporter before-build
26
-
27
- script:
28
- - bundle exec rubocop
29
- - bundle exec rspec
30
-
31
- after_script:
32
- - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT