henkei 1.27.1 → 1.28.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7f17479851871389aaeaa73798025c62d10cabc40a6ac703c2b25385258ae179
4
- data.tar.gz: 3e1ff327a0ff55ca55b8ce2d44ffce9bd493dcf2894129ecfc143b965d765e84
3
+ metadata.gz: 9e77d196ca581d7e2d12b1045710115fd9ab6ef903b4a8142d473591956f526f
4
+ data.tar.gz: fb66905068f383d12a104c128b0ae7458964ab89e3076f497c6b5d0855d2c532
5
5
  SHA512:
6
- metadata.gz: ab3a4254edd84f35c990013f684b1f3fbfb98f19c8d6e74aa3f27ffd4e3280c8e84b1679508b751af5178f3cd827cc5fb717d5d8819819fcc1cf77057272bae5
7
- data.tar.gz: 8c6d9fcc938653550877fdce23de4a15d0427e41de023a113fd27c3e0bf213765df445b9d14e45496302148a1dc8a7120591505ed68e55b50f4927bdcafb940b
6
+ metadata.gz: ab2f691265c8b721608da07c47cf185838e969a60d631e0bd2a8a50b201caaa71743c0b30d2252f1fd08e0d7f0ef3a2e6cbfedeeeba74c6c869bf4a0ac0292dd
7
+ data.tar.gz: ac5bb11ebe786907c207ff1fe8115486deba3f361f42b179104cf119a714c6d778746f412a670df96e9ef7a6cf897fc15f1ddea1690d33a8646a42dcb0d375a2
@@ -0,0 +1,37 @@
1
+ name: Test Henkei Ruby gem
2
+
3
+ on:
4
+ push:
5
+ branches: [1.x]
6
+ pull_request:
7
+ branches: [1.x]
8
+
9
+ env:
10
+ CI: true
11
+
12
+ jobs:
13
+ test:
14
+ runs-on: ubuntu-latest
15
+ strategy:
16
+ matrix:
17
+ ruby-version: ['2.6', '2.7', '3.0', '3.1']
18
+
19
+ steps:
20
+ - uses: actions/checkout@v2
21
+
22
+ - name: Set up Ruby
23
+ uses: ruby/setup-ruby@v1
24
+ with:
25
+ ruby-version: ${{ matrix.ruby-version }}
26
+ bundler-cache: true
27
+
28
+ - name: Lint code - Rubocop
29
+ run: bundle exec rubocop
30
+
31
+ - name: Run tests
32
+ run: bundle exec rspec
33
+
34
+ - name: Test & publish code coverage
35
+ uses: paambaati/codeclimate-action@v3.0.0
36
+ env:
37
+ CC_TEST_REPORTER_ID: bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
data/.rubocop.yml CHANGED
@@ -1,5 +1,6 @@
1
1
  AllCops:
2
2
  NewCops: enable
3
+ TargetRubyVersion: 2.6
3
4
 
4
5
  Layout/EmptyLinesAroundAttributeAccessor:
5
6
  Enabled: true
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- [![Travis Build Status](http://img.shields.io/travis/abrom/henkei.svg?style=flat)](https://travis-ci.org/abrom/henkei)
1
+ [![Github Build Status](https://github.com/abrom/henkei/actions/workflows/test.yml/badge.svg)](https://github.com/abrom/henkei/actions/workflows/test.yml)
2
2
  [![Maintainability](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/maintainability)](https://codeclimate.com/github/abrom/henkei/maintainability)
3
3
  [![Test Coverage](https://api.codeclimate.com/v1/badges/d06e8c917cf7d8c07234/test_coverage)](https://codeclimate.com/github/abrom/henkei/test_coverage)
4
4
  [![Gem Version](http://img.shields.io/gem/v/henkei.svg?style=flat)](#)
data/henkei.gemspec CHANGED
@@ -5,7 +5,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
6
  require 'henkei/version'
7
7
 
8
- Gem::Specification.new do |spec|
8
+ Gem::Specification.new do |spec| # rubocop:disable Metrics/BlockLength
9
9
  spec.name = 'henkei'
10
10
  spec.version = Henkei::VERSION
11
11
  spec.authors = ['Erol Fornoles', 'Andrew Bromwich']
@@ -13,13 +13,19 @@ Gem::Specification.new do |spec|
13
13
  spec.description = 'Read text and metadata from files and documents using Apache Tika toolkit'
14
14
  spec.summary = 'Read text and metadata from files and documents ' \
15
15
  '(.doc, .docx, .pages, .odt, .rtf, .pdf) using Apache Tika toolkit'
16
- spec.homepage = 'http://github.com/abrom/henkei'
16
+ spec.homepage = 'https://github.com/abrom/henkei'
17
17
  spec.license = 'MIT'
18
- spec.required_ruby_version = ['>= 2.4.0', '< 3.1.0']
18
+ spec.required_ruby_version = ['>= 2.6.0', '< 3.2.0']
19
+
20
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
21
+ # delete this section to allow pushing this gem to any host.
22
+ raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.' unless spec.respond_to?(:metadata)
23
+
24
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
25
+ spec.metadata['rubygems_mfa_required'] = 'true'
19
26
 
20
27
  spec.files = `git ls-files`.split("\n")
21
28
  spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
22
- spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
23
29
  spec.require_paths = ['lib']
24
30
 
25
31
  spec.add_runtime_dependency 'json', '>= 1.8', '< 3'
@@ -29,6 +35,10 @@ Gem::Specification.new do |spec|
29
35
  spec.add_development_dependency 'rails', '~> 5.0'
30
36
  spec.add_development_dependency 'rake', '~> 12.3'
31
37
  spec.add_development_dependency 'rspec', '~> 3.7'
32
- spec.add_development_dependency 'rubocop', '~> 0.71'
33
- spec.add_development_dependency 'simplecov', '~> 0.15'
38
+ spec.add_development_dependency 'rubocop', '~> 1.26'
39
+ spec.add_development_dependency 'rubocop-performance', '~> 1.13'
40
+ spec.add_development_dependency 'rubocop-rails', '~> 2.14'
41
+ spec.add_development_dependency 'rubocop-rake', '~> 0.6'
42
+ spec.add_development_dependency 'rubocop-rspec', '~> 2.9'
43
+ spec.add_development_dependency 'simplecov', '~> 0.15', '< 0.18'
34
44
  end
data/jar/tika-config.xml CHANGED
@@ -1,3 +1,4 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
1
2
  <properties>
2
3
  <service-loader initializableProblemHandler="ignore"/>
3
4
  </properties>
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '1.27.1'
4
+ VERSION = '1.28.1'
5
5
  end
data/lib/henkei.rb CHANGED
@@ -25,7 +25,7 @@ require 'open3'
25
25
  # Read text and metadata from files and documents using Apache Tika toolkit
26
26
  class Henkei # rubocop:disable Metrics/ClassLength
27
27
  GEM_PATH = File.dirname(File.dirname(__FILE__))
28
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.27.jar')
28
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.28.jar')
29
29
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
30
30
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
31
31
 
@@ -35,7 +35,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
35
35
  def self.mimetype(content_type)
36
36
  if Henkei.configuration.mime_library == 'mime/types' && defined?(MIME::Types)
37
37
  warn '[DEPRECATION] `mime/types` is deprecated. Please use `mini_mime` instead.'\
38
- ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
38
+ ' Use Henkei.configure and assign "mini_mime" to `mime_library`.'
39
39
  MIME::Types[content_type].first
40
40
  else
41
41
  MiniMime.lookup_by_content_type(content_type).tap do |object|
@@ -54,8 +54,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
54
54
  result = @@server_pid ? server_read(data) : client_read(type, data)
55
55
 
56
56
  case type
57
- when :text then result
58
- when :html then result
57
+ when :text, :html then result
59
58
  when :metadata then JSON.parse(result)
60
59
  when :mimetype then Henkei.mimetype(JSON.parse(result)['Content-Type'])
61
60
  end
@@ -246,7 +245,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
246
245
  # Internal helper for calling to Tika library directly
247
246
  #
248
247
  def self.client_read(type, data)
249
- Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
248
+ filter_response Open3.capture2(*tika_command(type), stdin_data: data, binmode: true).first
250
249
  end
251
250
  private_class_method :client_read
252
251
 
@@ -273,7 +272,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
273
272
 
274
273
  resp << chunk
275
274
  end
276
- resp
275
+ filter_response resp
277
276
  end
278
277
  private_class_method :server_read
279
278
 
@@ -297,4 +296,14 @@ class Henkei # rubocop:disable Metrics/ClassLength
297
296
  }[type]
298
297
  end
299
298
  private_class_method :switch_for_type
299
+
300
+ # Internal helper to remove erroneous output
301
+ #
302
+ def self.filter_response(response)
303
+ response.gsub(
304
+ /\AWARNING: sun\.reflect\.Reflection\.getCallerClass is not supported\. This will impact performance\.\n/,
305
+ ''
306
+ )
307
+ end
308
+ private_class_method :filter_response
300
309
  end
data/spec/henkei_spec.rb CHANGED
@@ -201,37 +201,33 @@ describe Henkei do
201
201
 
202
202
  context 'working as server mode' do
203
203
  specify '#starts and kills server' do
204
- begin
205
- Henkei.server(:text)
206
- expect(Henkei.class_variable_get(:@@server_pid)).not_to be_nil
207
- expect(Henkei.class_variable_get(:@@server_port)).not_to be_nil
208
-
209
- s = TCPSocket.new('localhost', Henkei.class_variable_get(:@@server_port))
210
- expect(s).to be_a TCPSocket
211
- s.close
212
- ensure
213
- port = Henkei.class_variable_get(:@@server_port)
214
- Henkei.kill_server!
215
- sleep 2
216
- expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
217
- end
204
+ Henkei.server(:text)
205
+ expect(Henkei.class_variable_get(:@@server_pid)).not_to be_nil
206
+ expect(Henkei.class_variable_get(:@@server_port)).not_to be_nil
207
+
208
+ s = TCPSocket.new('localhost', Henkei.class_variable_get(:@@server_port))
209
+ expect(s).to be_a TCPSocket
210
+ s.close
211
+ ensure
212
+ port = Henkei.class_variable_get(:@@server_port)
213
+ Henkei.kill_server!
214
+ sleep 2
215
+ expect { TCPSocket.new('localhost', port) }.to raise_error Errno::ECONNREFUSED
218
216
  end
219
217
 
220
218
  specify '#runs samples through server mode' do
221
- begin
222
- Henkei.server(:text)
223
- expect(Henkei.new('spec/samples/sample.pages').text).to(
224
- include 'The quick brown fox jumped over the lazy cat.'
225
- )
226
- expect(Henkei.new('spec/samples/sample filename with spaces.pages').text).to(
227
- include 'The quick brown fox jumped over the lazy cat.'
228
- )
229
- expect(Henkei.new('spec/samples/sample.docx').text).to(
230
- include 'The quick brown fox jumped over the lazy cat.'
231
- )
232
- ensure
233
- Henkei.kill_server!
234
- end
219
+ Henkei.server(:text)
220
+ expect(Henkei.new('spec/samples/sample.pages').text).to(
221
+ include 'The quick brown fox jumped over the lazy cat.'
222
+ )
223
+ expect(Henkei.new('spec/samples/sample filename with spaces.pages').text).to(
224
+ include 'The quick brown fox jumped over the lazy cat.'
225
+ )
226
+ expect(Henkei.new('spec/samples/sample.docx').text).to(
227
+ include 'The quick brown fox jumped over the lazy cat.'
228
+ )
229
+ ensure
230
+ Henkei.kill_server!
235
231
  end
236
232
  end
237
233
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.27.1
4
+ version: 1.28.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
8
8
  - Andrew Bromwich
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-07-19 00:00:00.000000000 Z
12
+ date: 2022-05-28 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -113,14 +113,70 @@ dependencies:
113
113
  requirements:
114
114
  - - "~>"
115
115
  - !ruby/object:Gem::Version
116
- version: '0.71'
116
+ version: '1.26'
117
117
  type: :development
118
118
  prerelease: false
119
119
  version_requirements: !ruby/object:Gem::Requirement
120
120
  requirements:
121
121
  - - "~>"
122
122
  - !ruby/object:Gem::Version
123
- version: '0.71'
123
+ version: '1.26'
124
+ - !ruby/object:Gem::Dependency
125
+ name: rubocop-performance
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '1.13'
131
+ type: :development
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '1.13'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rubocop-rails
140
+ requirement: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '2.14'
145
+ type: :development
146
+ prerelease: false
147
+ version_requirements: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '2.14'
152
+ - !ruby/object:Gem::Dependency
153
+ name: rubocop-rake
154
+ requirement: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '0.6'
159
+ type: :development
160
+ prerelease: false
161
+ version_requirements: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '0.6'
166
+ - !ruby/object:Gem::Dependency
167
+ name: rubocop-rspec
168
+ requirement: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '2.9'
173
+ type: :development
174
+ prerelease: false
175
+ version_requirements: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - "~>"
178
+ - !ruby/object:Gem::Version
179
+ version: '2.9'
124
180
  - !ruby/object:Gem::Dependency
125
181
  name: simplecov
126
182
  requirement: !ruby/object:Gem::Requirement
@@ -128,6 +184,9 @@ dependencies:
128
184
  - - "~>"
129
185
  - !ruby/object:Gem::Version
130
186
  version: '0.15'
187
+ - - "<"
188
+ - !ruby/object:Gem::Version
189
+ version: '0.18'
131
190
  type: :development
132
191
  prerelease: false
133
192
  version_requirements: !ruby/object:Gem::Requirement
@@ -135,6 +194,9 @@ dependencies:
135
194
  - - "~>"
136
195
  - !ruby/object:Gem::Version
137
196
  version: '0.15'
197
+ - - "<"
198
+ - !ruby/object:Gem::Version
199
+ version: '0.18'
138
200
  description: Read text and metadata from files and documents using Apache Tika toolkit
139
201
  email:
140
202
  - erol.fornoles@gmail.com
@@ -144,10 +206,10 @@ executables:
144
206
  extensions: []
145
207
  extra_rdoc_files: []
146
208
  files:
209
+ - ".github/workflows/test.yml"
147
210
  - ".gitignore"
148
211
  - ".rspec"
149
212
  - ".rubocop.yml"
150
- - ".travis.yml"
151
213
  - Gemfile
152
214
  - LICENSE
153
215
  - NOTICE.txt
@@ -155,7 +217,7 @@ files:
155
217
  - Rakefile
156
218
  - bin/console
157
219
  - henkei.gemspec
158
- - jar/tika-app-1.27.jar
220
+ - jar/tika-app-1.28.jar
159
221
  - jar/tika-config.xml
160
222
  - lib/henkei.rb
161
223
  - lib/henkei/configuration.rb
@@ -168,11 +230,13 @@ files:
168
230
  - spec/samples/sample-metadata-values-with-colons.doc
169
231
  - spec/samples/sample.docx
170
232
  - spec/samples/sample.pages
171
- homepage: http://github.com/abrom/henkei
233
+ homepage: https://github.com/abrom/henkei
172
234
  licenses:
173
235
  - MIT
174
- metadata: {}
175
- post_install_message:
236
+ metadata:
237
+ allowed_push_host: https://rubygems.org
238
+ rubygems_mfa_required: 'true'
239
+ post_install_message:
176
240
  rdoc_options: []
177
241
  require_paths:
178
242
  - lib
@@ -180,26 +244,19 @@ required_ruby_version: !ruby/object:Gem::Requirement
180
244
  requirements:
181
245
  - - ">="
182
246
  - !ruby/object:Gem::Version
183
- version: 2.4.0
247
+ version: 2.6.0
184
248
  - - "<"
185
249
  - !ruby/object:Gem::Version
186
- version: 3.1.0
250
+ version: 3.2.0
187
251
  required_rubygems_version: !ruby/object:Gem::Requirement
188
252
  requirements:
189
253
  - - ">="
190
254
  - !ruby/object:Gem::Version
191
255
  version: '0'
192
256
  requirements: []
193
- rubygems_version: 3.0.6
194
- signing_key:
257
+ rubygems_version: 3.2.3
258
+ signing_key:
195
259
  specification_version: 4
196
260
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
197
261
  .rtf, .pdf) using Apache Tika toolkit
198
- test_files:
199
- - spec/helper.rb
200
- - spec/henkei_spec.rb
201
- - spec/samples/pipe-error.png
202
- - spec/samples/sample filename with spaces.pages
203
- - spec/samples/sample-metadata-values-with-colons.doc
204
- - spec/samples/sample.docx
205
- - spec/samples/sample.pages
262
+ test_files: []
data/.travis.yml DELETED
@@ -1,32 +0,0 @@
1
- env:
2
- global:
3
- - CC_TEST_REPORTER_ID=bb96c1ff9dc66724c38fb4eb54486dd72dc88a7fd6e727c034b9cf8d747d069e
4
- jobs:
5
- - INCLUDE_RAILS=false
6
- - INCLUDE_RAILS=true
7
-
8
- language: ruby
9
- rvm:
10
- - 2.5
11
- - 2.6
12
- - 2.7
13
- - 3.0
14
-
15
- before_install:
16
- - gem update bundler
17
-
18
- install:
19
- - bundle install --jobs=3 --retry=3
20
- - gem install rubocop
21
-
22
- before_script:
23
- - curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
24
- - chmod +x ./cc-test-reporter
25
- - ./cc-test-reporter before-build
26
-
27
- script:
28
- - bundle exec rubocop
29
- - bundle exec rspec
30
-
31
- after_script:
32
- - ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT