henkei 1.27.1 → 1.28.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f17479851871389aaeaa73798025c62d10cabc40a6ac703c2b25385258ae179
4
- data.tar.gz: 3e1ff327a0ff55ca55b8ce2d44ffce9bd493dcf2894129ecfc143b965d765e84
3
+ metadata.gz: 9e77d196ca581d7e2d12b1045710115fd9ab6ef903b4a8142d473591956f526f
4
+ data.tar.gz: fb66905068f383d12a104c128b0ae7458964ab89e3076f497c6b5d0855d2c532
5
5
  SHA512:
6
- metadata.gz: ab3a4254edd84f35c990013f684b1f3fbfb98f19c8d6e74aa3f27ffd4e3280c8e84b1679508b751af5178f3cd827cc5fb717d5d8819819fcc1cf77057272bae5
7
- data.tar.gz: 8c6d9fcc938653550877fdce23de4a15d0427e41de023a113fd27c3e0bf213765df445b9d14e45496302148a1dc8a7120591505ed68e55b50f4927bdcafb940b
6
+ metadata.gz: ab2f691265c8b721608da07c47cf185838e969a60d631e0bd2a8a50b201caaa71743c0b30d2252f1fd08e0d7f0ef3a2e6cbfedeeeba74c6c869bf4a0ac0292dd
7
+ data.tar.gz: ac5bb11ebe786907c207ff1fe8115486deba3f361f42b179104cf119a714c6d778746f412a670df96e9ef7a6cf897fc15f1ddea1690d33a8646a42dcb0d375a2
@@ -0,0 +1,37 @@
1
+ name: Test Henkei Ruby gem
2
+
3
+ on:
4
+ push:
5
+ branches: [1.x]
6
+ pull_request:
7
+ branches: [1.x]
8
+
9
+ env:
10
+ CI: true
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ ruby-version: ['2.6', '2.7', '3.0', '3.1']
18
+
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+
22
+ - name: Set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby-version }}
26
+ bundler-cache: true
27
+
28
+ - name: Lint code - Rubocop
29
+ run: bundle exec rubocop
30
+
31
+ - name: Run tests
32
+ run: bundle exec rspec
33
+
34
+ - name: Test & publish code coverage
35
+ uses: paambaati/codeclimate-action@v3.0.0
36
+ env:
37
+ CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
2
  NewCops: enable
3
+ TargetRubyVersion: 2.6
3
4
 
4
5
  Layout/EmptyLinesAroundAttributeAccessor:
5
6
  Enabled: true
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Travis Build Status](http://img.shields.io/travis/abrom/henkei.svg?style=flat)](https://travis-ci.org/abrom/henkei)
1
+ [![Github Build Status](https://github.com/abrom/henkei/actions/workflows/test.yml/badge.svg)](https://github.com/abrom/henkei/actions/workflows/test.yml)
2
2
  [![Maintainability](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/maintainability)](https://codeclimate.com/github/abrom/henkei/maintainability)
3
3
  [![Test Coverage](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/test_coverage)](https://codeclimate.com/github/abrom/henkei/test_coverage)
4
4
  [![Gem Version](http://img.shields.io/gem/v/henkei.svg?style=flat)](#)
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec|
8
+ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -13,13 +13,19 @@ Gem::Specification.new do |spec|
13
13
  spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
14
14
  spec.summary = 'Read text and metadata from files and documents ' \
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
- spec.homepage = 'http://github.com/abrom/henkei'
16
+ spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
- spec.required_ruby_version = ['>= 2.4.0', '< 3.1.0']
18
+ spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
19
+
20
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
+ # delete this section to allow pushing this gem to any host.
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.' unless spec.respond_to?(:metadata)
23
+
24
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
25
+ spec.metadata['rubygems_mfa_required'] = 'true'
19
26
 
20
27
  spec.files = `git ls-files`.split("\n")
21
28
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
29
  spec.require_paths = ['lib']
24
30
 
25
31
  spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
@@ -29,6 +35,10 @@ Gem::Specification.new do |spec|
29
35
  spec.add_development_dependency 'rails', '~> 5.0'
30
36
  spec.add_development_dependency 'rake', '~> 12.3'
31
37
  spec.add_development_dependency 'rspec', '~> 3.7'
32
- spec.add_development_dependency 'rubocop', '~> 0.71'
33
- spec.add_development_dependency 'simplecov', '~> 0.15'
38
+ spec.add_development_dependency 'rubocop', '~> 1.26'
39
+ spec.add_development_dependency 'rubocop-performance', '~> 1.13'
40
+ spec.add_development_dependency 'rubocop-rails', '~> 2.14'
41
+ spec.add_development_dependency 'rubocop-rake', '~> 0.6'
42
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
43
+ spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
34
44
  end
data/jar/tika-config.xml CHANGED
@@ -1,3 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
1
2
  <properties>
2
3
  <service-loader initializableProblemHandler="ignore"/>
3
4
  </properties>
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '1.27.1'
4
+ VERSION = '1.28.1'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -25,7 +25,7 @@ require 'open3'
25
25
  # Read text and metadata from files and documents using Apache Tika toolkit
26
26
  class Henkei # rubocop:disable Metrics/ClassLength
27
27
  GEM_PATH = File.dirname(File.dirname(__FILE__))
28
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.27.jar')
28
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.28.jar')
29
29
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
30
30
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
31
31
 
@@ -35,7 +35,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
35
35
  def self.mimetype(content_type)
36
36
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
37
37
  warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
38
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
38
+ ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
39
39
  MIME::Types[content_type].first
40
40
  else
41
41
  MiniMime.lookup_by_content_type(content_type).tap do |object|
@@ -54,8 +54,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
54
54
  result = @@server_pid ? server_read(data) : client_read(type, data)
55
55
 
56
56
  case type
57
- when :text then result
58
- when :html then result
57
+ when :text, :html then result
59
58
  when :metadata then JSON.parse(result)
60
59
  when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
61
60
  end
@@ -246,7 +245,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
246
245
  # Internal helper for calling to Tika library directly
247
246
  #
248
247
  def self.client_read(type, data)
249
- Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
248
+ filter_response Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
250
249
  end
251
250
  private_class_method :client_read
252
251
 
@@ -273,7 +272,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
273
272
 
274
273
  resp << chunk
275
274
  end
276
- resp
275
+ filter_response resp
277
276
  end
278
277
  private_class_method :server_read
279
278
 
@@ -297,4 +296,14 @@ class Henkei # rubocop:disable Metrics/ClassLength
297
296
  }[type]
298
297
  end
299
298
  private_class_method :switch_for_type
299
+
300
+ # Internal helper to remove erroneous output
301
+ #
302
+ def self.filter_response(response)
303
+ response.gsub(
304
+ /\AWARNING: sun\.reflect\.Reflection\.getCallerClass is not supported\. This will impact performance\.\n/,
305
+ ''
306
+ )
307
+ end
308
+ private_class_method :filter_response
300
309
  end
data/spec/henkei_spec.rb CHANGED
@@ -201,37 +201,33 @@ describe Henkei do
201
201
 
202
202
  context 'working as server mode' do
203
203
  specify '#starts and kills server' do
204
- begin
205
- Henkei.server(:text)
206
- expect(Henkei.class_variable_get(:@@server_pid)).not_to be_nil
207
- expect(Henkei.class_variable_get(:@@server_port)).not_to be_nil
208
-
209
- s = TCPSocket.new('localhost', Henkei.class_variable_get(:@@server_port))
210
- expect(s).to be_a TCPSocket
211
- s.close
212
- ensure
213
- port = Henkei.class_variable_get(:@@server_port)
214
- Henkei.kill_server!
215
- sleep 2
216
- expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
217
- end
204
+ Henkei.server(:text)
205
+ expect(Henkei.class_variable_get(:@@server_pid)).not_to be_nil
206
+ expect(Henkei.class_variable_get(:@@server_port)).not_to be_nil
207
+
208
+ s = TCPSocket.new('localhost', Henkei.class_variable_get(:@@server_port))
209
+ expect(s).to be_a TCPSocket
210
+ s.close
211
+ ensure
212
+ port = Henkei.class_variable_get(:@@server_port)
213
+ Henkei.kill_server!
214
+ sleep 2
215
+ expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
218
216
  end
219
217
 
220
218
  specify '#runs samples through server mode' do
221
- begin
222
- Henkei.server(:text)
223
- expect(Henkei.new('spec/samples/sample.pages').text).to(
224
- include 'The quick brown fox jumped over the lazy cat.'
225
- )
226
- expect(Henkei.new('spec/samples/sample filename with spaces.pages').text).to(
227
- include 'The quick brown fox jumped over the lazy cat.'
228
- )
229
- expect(Henkei.new('spec/samples/sample.docx').text).to(
230
- include 'The quick brown fox jumped over the lazy cat.'
231
- )
232
- ensure
233
- Henkei.kill_server!
234
- end
219
+ Henkei.server(:text)
220
+ expect(Henkei.new('spec/samples/sample.pages').text).to(
221
+ include 'The quick brown fox jumped over the lazy cat.'
222
+ )
223
+ expect(Henkei.new('spec/samples/sample filename with spaces.pages').text).to(
224
+ include 'The quick brown fox jumped over the lazy cat.'
225
+ )
226
+ expect(Henkei.new('spec/samples/sample.docx').text).to(
227
+ include 'The quick brown fox jumped over the lazy cat.'
228
+ )
229
+ ensure
230
+ Henkei.kill_server!
235
231
  end
236
232
  end
237
233
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.27.1
4
+ version: 1.28.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
8
8
  - Andrew Bromwich
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-07-19 00:00:00.000000000 Z
12
+ date: 2022-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -113,14 +113,70 @@ dependencies:
113
113
  requirements:
114
114
  - - "~>"
115
115
  - !ruby/object:Gem::Version
116
- version: '0.71'
116
+ version: '1.26'
117
117
  type: :development
118
118
  prerelease: false
119
119
  version_requirements: !ruby/object:Gem::Requirement
120
120
  requirements:
121
121
  - - "~>"
122
122
  - !ruby/object:Gem::Version
123
- version: '0.71'
123
+ version: '1.26'
124
+ - !ruby/object:Gem::Dependency
125
+ name: rubocop-performance
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.13'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '1.13'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rubocop-rails
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '2.14'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '2.14'
152
+ - !ruby/object:Gem::Dependency
153
+ name: rubocop-rake
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '0.6'
159
+ type: :development
160
+ prerelease: false
161
+ version_requirements: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '0.6'
166
+ - !ruby/object:Gem::Dependency
167
+ name: rubocop-rspec
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '2.9'
173
+ type: :development
174
+ prerelease: false
175
+ version_requirements: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - "~>"
178
+ - !ruby/object:Gem::Version
179
+ version: '2.9'
124
180
  - !ruby/object:Gem::Dependency
125
181
  name: simplecov
126
182
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +184,9 @@ dependencies:
128
184
  - - "~>"
129
185
  - !ruby/object:Gem::Version
130
186
  version: '0.15'
187
+ - - "<"
188
+ - !ruby/object:Gem::Version
189
+ version: '0.18'
131
190
  type: :development
132
191
  prerelease: false
133
192
  version_requirements: !ruby/object:Gem::Requirement
@@ -135,6 +194,9 @@ dependencies:
135
194
  - - "~>"
136
195
  - !ruby/object:Gem::Version
137
196
  version: '0.15'
197
+ - - "<"
198
+ - !ruby/object:Gem::Version
199
+ version: '0.18'
138
200
  description: Read text and metadata from files and documents using Apache Tika toolkit
139
201
  email:
140
202
  - erol.fornoles@gmail.com
@@ -144,10 +206,10 @@ executables:
144
206
  extensions: []
145
207
  extra_rdoc_files: []
146
208
  files:
209
+ - ".github/workflows/test.yml"
147
210
  - ".gitignore"
148
211
  - ".rspec"
149
212
  - ".rubocop.yml"
150
- - ".travis.yml"
151
213
  - Gemfile
152
214
  - LICENSE
153
215
  - NOTICE.txt
@@ -155,7 +217,7 @@ files:
155
217
  - Rakefile
156
218
  - bin/console
157
219
  - henkei.gemspec
158
- - jar/tika-app-1.27.jar
220
+ - jar/tika-app-1.28.jar
159
221
  - jar/tika-config.xml
160
222
  - lib/henkei.rb
161
223
  - lib/henkei/configuration.rb
@@ -168,11 +230,13 @@ files:
168
230
  - spec/samples/sample-metadata-values-with-colons.doc
169
231
  - spec/samples/sample.docx
170
232
  - spec/samples/sample.pages
171
- homepage: http://github.com/abrom/henkei
233
+ homepage: https://github.com/abrom/henkei
172
234
  licenses:
173
235
  - MIT
174
- metadata: {}
175
- post_install_message:
236
+ metadata:
237
+ allowed_push_host: https://rubygems.org
238
+ rubygems_mfa_required: 'true'
239
+ post_install_message:
176
240
  rdoc_options: []
177
241
  require_paths:
178
242
  - lib
@@ -180,26 +244,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
180
244
  requirements:
181
245
  - - ">="
182
246
  - !ruby/object:Gem::Version
183
- version: 2.4.0
247
+ version: 2.6.0
184
248
  - - "<"
185
249
  - !ruby/object:Gem::Version
186
- version: 3.1.0
250
+ version: 3.2.0
187
251
  required_rubygems_version: !ruby/object:Gem::Requirement
188
252
  requirements:
189
253
  - - ">="
190
254
  - !ruby/object:Gem::Version
191
255
  version: '0'
192
256
  requirements: []
193
- rubygems_version: 3.0.6
194
- signing_key:
257
+ rubygems_version: 3.2.3
258
+ signing_key:
195
259
  specification_version: 4
196
260
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
197
261
  .rtf, .pdf) using Apache Tika toolkit
198
- test_files:
199
- - spec/helper.rb
200
- - spec/henkei_spec.rb
201
- - spec/samples/pipe-error.png
202
- - spec/samples/sample filename with spaces.pages
203
- - spec/samples/sample-metadata-values-with-colons.doc
204
- - spec/samples/sample.docx
205
- - spec/samples/sample.pages
262
+ test_files: []
data/.travis.yml DELETED
@@ -1,32 +0,0 @@
1
- env:
2
- global:
3
- - CC_TEST_REPORTER_ID=bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
4
- jobs:
5
- - INCLUDE_RAILS=false
6
- - INCLUDE_RAILS=true
7
-
8
- language: ruby
9
- rvm:
10
- - 2.5
11
- - 2.6
12
- - 2.7
13
- - 3.0
14
-
15
- before_install:
16
- - gem update bundler
17
-
18
- install:
19
- - bundle install --jobs=3 --retry=3
20
- - gem install rubocop
21
-
22
- before_script:
23
- - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
24
- - chmod +x ./cc-test-reporter
25
- - ./cc-test-reporter before-build
26
-
27
- script:
28
- - bundle exec rubocop
29
- - bundle exec rspec
30
-
31
- after_script:
32
- - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT