henkei 2.3.0.1 → 2.4.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ad6513ead7b83776d281da0be4b857428812109d3cda95f98a6504434bb0c91f
4
- data.tar.gz: 1a0c9db6ca53ea23e5b28c1a0bdd2415d1dbcb284d1f9958ab0529b2e9e96f5a
3
+ metadata.gz: 52de171e9cba852d1551459674a12adfca8fd6166cb5e5707f3bc6a7cec9415c
4
+ data.tar.gz: '08807feea85b577c37153c290331c8f9c4441c2eef6c2600d630948a27b9ba5e'
5
5
  SHA512:
6
- metadata.gz: 9f270bdc844331e88a96b57f24d949ef3517345a3c6907b374945ac3f7a72066bc1eab81993ea61ae823d91a0e922ae5a08d382ac4b3e420662f9471299ea5cb
7
- data.tar.gz: 58ef5f1d2157d6de60d35817238cdb966e92ac880763ac077510efe5ebcd497cf6a231a5deab1510d680ed9bc0acdbf33217126a68feaea2592a0508682ab133
6
+ metadata.gz: 6481f5588edeb5cf7e806cd9326636d14e936bee95064de49017614e999f295f9683b3cbe6346dbb4d0611753288d7b628435514fe13ff0b039723de55262db1
7
+ data.tar.gz: 27a33e20e068708563324db99798abf56a81b25386899e41cc4b5e15097df11e7bc61f69b1705a5a0d537ac3488c608e28c6337e6bc8d4fe6af3b2aba6e19416
@@ -0,0 +1,37 @@
1
+ name: Test Henkei Ruby gem
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ env:
10
+ CI: true
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ ruby-version: ['2.6', '2.7', '3.0', '3.1']
18
+
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+
22
+ - name: Set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby-version }}
26
+ bundler-cache: true
27
+
28
+ - name: Lint code - Rubocop
29
+ run: bundle exec rubocop
30
+
31
+ - name: Run tests
32
+ run: bundle exec rspec
33
+
34
+ - name: Test & publish code coverage
35
+ uses: paambaati/codeclimate-action@v3.0.0
36
+ env:
37
+ CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Travis Build Status](http://img.shields.io/travis/abrom/henkei.svg?style=flat)](https://travis-ci.org/abrom/henkei)
1
+ [![Github Build Status](https://github.com/abrom/henkei/actions/workflows/test.yml/badge.svg)](https://github.com/abrom/henkei/actions/workflows/test.yml)
2
2
  [![Maintainability](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/maintainability)](https://codeclimate.com/github/abrom/henkei/maintainability)
3
3
  [![Test Coverage](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/test_coverage)](https://codeclimate.com/github/abrom/henkei/test_coverage)
4
4
  [![Gem Version](http://img.shields.io/gem/v/henkei.svg?style=flat)](#)
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec|
8
+ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -13,13 +13,19 @@ Gem::Specification.new do |spec|
13
13
  spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
14
14
  spec.summary = 'Read text and metadata from files and documents ' \
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
- spec.homepage = 'http://github.com/abrom/henkei'
16
+ spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
18
  spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
19
19
 
20
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
+ # delete this section to allow pushing this gem to any host.
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.' unless spec.respond_to?(:metadata)
23
+
24
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
25
+ spec.metadata['rubygems_mfa_required'] = 'true'
26
+
20
27
  spec.files = `git ls-files`.split("\n")
21
28
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
29
  spec.require_paths = ['lib']
24
30
 
25
31
  spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
@@ -30,6 +36,10 @@ Gem::Specification.new do |spec|
30
36
  spec.add_development_dependency 'rails', '~> 5.0'
31
37
  spec.add_development_dependency 'rake', '~> 12.3'
32
38
  spec.add_development_dependency 'rspec', '~> 3.7'
33
- spec.add_development_dependency 'rubocop', '~> 0.71'
34
- spec.add_development_dependency 'simplecov', '~> 0.15'
39
+ spec.add_development_dependency 'rubocop', '~> 1.26'
40
+ spec.add_development_dependency 'rubocop-performance', '~> 1.13'
41
+ spec.add_development_dependency 'rubocop-rails', '~> 2.14'
42
+ spec.add_development_dependency 'rubocop-rake', '~> 0.6'
43
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
44
+ spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
35
45
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '2.3.0.1'
4
+ VERSION = '2.4.0.1'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -25,14 +25,14 @@ require 'open3'
25
25
  # Read text and metadata from files and documents using Apache Tika toolkit
26
26
  class Henkei # rubocop:disable Metrics/ClassLength
27
27
  GEM_PATH = File.dirname(File.dirname(__FILE__))
28
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-2.3.0.jar')
28
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-2.4.0.jar')
29
29
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
30
30
  CONFIG_WITHOUT_OCR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config-without-ocr.xml')
31
31
 
32
32
  def self.mimetype(content_type)
33
33
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
34
34
  warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
35
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
35
+ ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
36
36
  MIME::Types[content_type].first
37
37
  else
38
38
  MiniMime.lookup_by_content_type(content_type).tap do |object|
@@ -51,8 +51,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
51
51
  result = client_read(type, data, include_ocr: include_ocr)
52
52
 
53
53
  case type
54
- when :text then result
55
- when :html then result
54
+ when :text, :html then result
56
55
  when :metadata then JSON.parse(result)
57
56
  when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
58
57
  end
data/spec/henkei_spec.rb CHANGED
@@ -7,8 +7,8 @@ require 'nokogiri'
7
7
  # Some of the tests have been known to fail in weird and wonderful ways when `rails` is included
8
8
  require 'rails' if ENV['INCLUDE_RAILS'] == 'true'
9
9
 
10
- def travis_ci?
11
- ENV['CI'] == 'true' && ENV['TRAVIS'] == 'true'
10
+ def ci?
11
+ ENV['CI'] == 'true'
12
12
  end
13
13
 
14
14
  describe Henkei do
@@ -58,17 +58,17 @@ describe Henkei do
58
58
  expect(text).to eq ''
59
59
  end
60
60
 
61
- unless travis_ci?
61
+ unless ci?
62
62
  context 'when `include_ocr` is enabled' do
63
63
  it 'returns parsed plain text in the image' do
64
64
  text = Henkei.read :text, data, include_ocr: true
65
65
 
66
66
  expect(text).to include <<~TEXT
67
67
  West Side
68
-
68
+
69
69
  Sea Island
70
70
  PP
71
-
71
+
72
72
  Richmond
73
73
  TEXT
74
74
  end
@@ -182,15 +182,15 @@ describe Henkei do
182
182
  expect(henkei.mimetype.content_type).to eq 'image/png'
183
183
  end
184
184
 
185
- unless travis_ci?
185
+ unless ci?
186
186
  context 'when `include_ocr` is enabled' do
187
187
  it '#text returns plain text of parsed text in the image' do
188
188
  expect(henkei.text(include_ocr: true)).to include <<~TEXT
189
189
  West Side
190
-
190
+
191
191
  Sea Island
192
192
  PP
193
-
193
+
194
194
  Richmond
195
195
  TEXT
196
196
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.0.1
4
+ version: 2.4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-02-24 00:00:00.000000000 Z
12
+ date: 2022-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -127,14 +127,70 @@ dependencies:
127
127
  requirements:
128
128
  - - "~>"
129
129
  - !ruby/object:Gem::Version
130
- version: '0.71'
130
+ version: '1.26'
131
131
  type: :development
132
132
  prerelease: false
133
133
  version_requirements: !ruby/object:Gem::Requirement
134
134
  requirements:
135
135
  - - "~>"
136
136
  - !ruby/object:Gem::Version
137
- version: '0.71'
137
+ version: '1.26'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rubocop-performance
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '1.13'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '1.13'
152
+ - !ruby/object:Gem::Dependency
153
+ name: rubocop-rails
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '2.14'
159
+ type: :development
160
+ prerelease: false
161
+ version_requirements: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '2.14'
166
+ - !ruby/object:Gem::Dependency
167
+ name: rubocop-rake
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '0.6'
173
+ type: :development
174
+ prerelease: false
175
+ version_requirements: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - "~>"
178
+ - !ruby/object:Gem::Version
179
+ version: '0.6'
180
+ - !ruby/object:Gem::Dependency
181
+ name: rubocop-rspec
182
+ requirement: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - "~>"
185
+ - !ruby/object:Gem::Version
186
+ version: '2.9'
187
+ type: :development
188
+ prerelease: false
189
+ version_requirements: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - "~>"
192
+ - !ruby/object:Gem::Version
193
+ version: '2.9'
138
194
  - !ruby/object:Gem::Dependency
139
195
  name: simplecov
140
196
  requirement: !ruby/object:Gem::Requirement
@@ -142,6 +198,9 @@ dependencies:
142
198
  - - "~>"
143
199
  - !ruby/object:Gem::Version
144
200
  version: '0.15'
201
+ - - "<"
202
+ - !ruby/object:Gem::Version
203
+ version: '0.18'
145
204
  type: :development
146
205
  prerelease: false
147
206
  version_requirements: !ruby/object:Gem::Requirement
@@ -149,6 +208,9 @@ dependencies:
149
208
  - - "~>"
150
209
  - !ruby/object:Gem::Version
151
210
  version: '0.15'
211
+ - - "<"
212
+ - !ruby/object:Gem::Version
213
+ version: '0.18'
152
214
  description: Read text and metadata from files and documents using Apache Tika toolkit
153
215
  email:
154
216
  - erol.fornoles@gmail.com
@@ -158,10 +220,10 @@ executables:
158
220
  extensions: []
159
221
  extra_rdoc_files: []
160
222
  files:
223
+ - ".github/workflows/test.yml"
161
224
  - ".gitignore"
162
225
  - ".rspec"
163
226
  - ".rubocop.yml"
164
- - ".travis.yml"
165
227
  - Gemfile
166
228
  - LICENSE
167
229
  - NOTICE.txt
@@ -169,7 +231,7 @@ files:
169
231
  - Rakefile
170
232
  - bin/console
171
233
  - henkei.gemspec
172
- - jar/tika-app-2.3.0.jar
234
+ - jar/tika-app-2.4.0.jar
173
235
  - jar/tika-config-without-ocr.xml
174
236
  - jar/tika-config.xml
175
237
  - lib/henkei.rb
@@ -183,10 +245,12 @@ files:
183
245
  - spec/samples/sample-metadata-values-with-colons.doc
184
246
  - spec/samples/sample.docx
185
247
  - spec/samples/sample.pages
186
- homepage: http://github.com/abrom/henkei
248
+ homepage: https://github.com/abrom/henkei
187
249
  licenses:
188
250
  - MIT
189
- metadata: {}
251
+ metadata:
252
+ allowed_push_host: https://rubygems.org
253
+ rubygems_mfa_required: 'true'
190
254
  post_install_message:
191
255
  rdoc_options: []
192
256
  require_paths:
@@ -205,16 +269,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
205
269
  - !ruby/object:Gem::Version
206
270
  version: '0'
207
271
  requirements: []
208
- rubygems_version: 3.0.9
272
+ rubygems_version: 3.2.3
209
273
  signing_key:
210
274
  specification_version: 4
211
275
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
212
276
  .rtf, .pdf) using Apache Tika toolkit
213
- test_files:
214
- - spec/helper.rb
215
- - spec/henkei_spec.rb
216
- - spec/samples/pipe-error.png
217
- - spec/samples/sample filename with spaces.pages
218
- - spec/samples/sample-metadata-values-with-colons.doc
219
- - spec/samples/sample.docx
220
- - spec/samples/sample.pages
277
+ test_files: []
data/.travis.yml DELETED
@@ -1,32 +0,0 @@
1
- env:
2
- global:
3
- - CC_TEST_REPORTER_ID=bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
4
- jobs:
5
- - INCLUDE_RAILS=false
6
- - INCLUDE_RAILS=true
7
-
8
- language: ruby
9
- rvm:
10
- - 2.6
11
- - 2.7
12
- - 3.0
13
- - 3.1
14
-
15
- before_install:
16
- - gem update bundler
17
-
18
- install:
19
- - bundle install --jobs=3 --retry=3
20
- - gem install rubocop
21
-
22
- before_script:
23
- - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
24
- - chmod +x ./cc-test-reporter
25
- - ./cc-test-reporter before-build
26
-
27
- script:
28
- - bundle exec rubocop
29
- - bundle exec rspec
30
-
31
- after_script:
32
- - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT