henkei 1.22.0 → 1.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6f2128fb621a465d981a99a0c5198208ef9bc245f7d955799892d41e3f1118d
4
- data.tar.gz: 6e6bc03ed962ade5d3e93aff358820ba26eb0b758cf65a3fb6e61de4634704ca
3
+ metadata.gz: f9ebc4be4691020c72328134a33a9ffe6b4fb79a939ddc9ce833c56551abb86d
4
+ data.tar.gz: 17402ba43e9840b59090a82f1cd39e79e594ca3d36c763b958f9243174990f8e
5
5
  SHA512:
6
- metadata.gz: 894dfe43478258980edcdabd464ae6903793c7eb6c0b03661e1548af2b99701e9f320b5dca0e237925c9ef3a1f7b0bd9bfbeb80b6d8ce47c766ca601b23c380b
7
- data.tar.gz: 813774141e6acc2d1343aea110cd40d84c12482c0d7d769e54e11914eeba28e62cf1c15390407594e92b0d3b4ca6366cc279651d3239b3e0cd5c67cc11ae3caa
6
+ metadata.gz: 74dcf4d6f2ce5f99b77b3c1fdd34a271220c58e8aae167b40cde35eef2166570d3c4de7b94f91d98158fe3cc384ec7a7688cf98812e378607e31f8d24e06420f
7
+ data.tar.gz: aa210ee582d56592932684216eb93cd3f91ea7ba95e3b1d4bc672ed09bdc9605008e7f6f21472ca291c574cb1050b18eb6bdb47aa5caa03341ae393cbb0b9939
@@ -1,10 +1,10 @@
1
+ Layout/LineLength:
2
+ Max: 120
3
+
1
4
  Metrics/BlockLength:
2
5
  Exclude:
3
6
  - 'spec/**/*'
4
7
 
5
- Metrics/LineLength:
6
- Max: 120
7
-
8
8
  Metrics/MethodLength:
9
9
  Max: 15
10
10
 
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'henkei'
6
+
7
+ require 'irb'
8
+ IRB.start
@@ -11,10 +11,12 @@ require 'json'
11
11
  require 'socket'
12
12
  require 'stringio'
13
13
 
14
+ require 'open3'
15
+
14
16
  # Read text and metadata from files and documents using Apache Tika toolkit
15
17
  class Henkei # rubocop:disable Metrics/ClassLength
16
18
  GEM_PATH = File.dirname(File.dirname(__FILE__))
17
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.22.jar')
19
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.23.jar')
18
20
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
19
21
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
20
22
 
@@ -224,11 +226,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
224
226
  # Internal helper for calling to Tika library directly
225
227
  #
226
228
  def self.client_read(type, data)
227
- IO.popen tika_command(type), 'r+' do |io|
228
- io.write data
229
- io.close_write
230
- io.read
231
- end
229
+ Open3.capture2(tika_command(type), stdin_data: data).first
232
230
  end
233
231
  private_class_method :client_read
234
232
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '1.22.0'
4
+ VERSION = '1.23.0'
5
5
  end
@@ -40,6 +40,16 @@ describe Henkei do
40
40
  )
41
41
  expect(mimetype.extensions).to include 'docx'
42
42
  end
43
+
44
+ context 'when passing in the `pipe-error.png` test file' do
45
+ let(:data) { File.read 'spec/samples/pipe-error.png' }
46
+
47
+ it 'returns an empty result' do
48
+ text = Henkei.read :text, data
49
+
50
+ expect(text).to eq ''
51
+ end
52
+ end
43
53
  end
44
54
 
45
55
  describe '.new' do
@@ -129,6 +139,23 @@ describe Henkei do
129
139
  specify '#metadata reads metadata' do
130
140
  expect(henkei.metadata['Content-Type']).to eq %w[application/vnd.apple.pages application/vnd.apple.pages]
131
141
  end
142
+
143
+ context 'when passing in the `pipe-error.png` test file' do
144
+ let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
145
+
146
+ it '#text returns an empty result' do
147
+ expect(henkei.text).to eq ''
148
+ end
149
+
150
+ it '#html returns an empty body' do
151
+ expect(henkei.html).to include '<body/>'
152
+ expect(henkei.html).to include '<meta name="tiff:ImageWidth" content="792"/>'
153
+ end
154
+
155
+ it '#mimetype returns an empty result' do
156
+ expect(henkei.mimetype.content_type).to eq 'image/png'
157
+ end
158
+ end
132
159
  end
133
160
 
134
161
  context 'initialized with a given URI' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.22.0
4
+ version: 1.23.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-10-30 00:00:00.000000000 Z
12
+ date: 2019-12-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -125,7 +125,8 @@ description: Read text and metadata from files and documents using Apache Tika t
125
125
  email:
126
126
  - erol.fornoles@gmail.com
127
127
  - a.bromwich@gmail.com
128
- executables: []
128
+ executables:
129
+ - console
129
130
  extensions: []
130
131
  extra_rdoc_files: []
131
132
  files:
@@ -138,14 +139,16 @@ files:
138
139
  - NOTICE.txt
139
140
  - README.md
140
141
  - Rakefile
142
+ - bin/console
141
143
  - henkei.gemspec
142
- - jar/tika-app-1.22.jar
144
+ - jar/tika-app-1.23.jar
143
145
  - jar/tika-config.xml
144
146
  - lib/henkei.rb
145
147
  - lib/henkei/version.rb
146
148
  - lib/henkei/yomu.rb
147
149
  - spec/helper.rb
148
150
  - spec/henkei_spec.rb
151
+ - spec/samples/pipe-error.png
149
152
  - spec/samples/sample filename with spaces.pages
150
153
  - spec/samples/sample-metadata-values-with-colons.doc
151
154
  - spec/samples/sample.docx
@@ -169,8 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
169
172
  - !ruby/object:Gem::Version
170
173
  version: '0'
171
174
  requirements: []
172
- rubyforge_project:
173
- rubygems_version: 2.7.6.2
175
+ rubygems_version: 3.0.6
174
176
  signing_key:
175
177
  specification_version: 4
176
178
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
@@ -178,6 +180,7 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
178
180
  test_files:
179
181
  - spec/helper.rb
180
182
  - spec/henkei_spec.rb
183
+ - spec/samples/pipe-error.png
181
184
  - spec/samples/sample filename with spaces.pages
182
185
  - spec/samples/sample-metadata-values-with-colons.doc
183
186
  - spec/samples/sample.docx