henkei 1.22.0 → 1.23.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a6f2128fb621a465d981a99a0c5198208ef9bc245f7d955799892d41e3f1118d
4
- data.tar.gz: 6e6bc03ed962ade5d3e93aff358820ba26eb0b758cf65a3fb6e61de4634704ca
3
+ metadata.gz: f9ebc4be4691020c72328134a33a9ffe6b4fb79a939ddc9ce833c56551abb86d
4
+ data.tar.gz: 17402ba43e9840b59090a82f1cd39e79e594ca3d36c763b958f9243174990f8e
5
5
  SHA512:
6
- metadata.gz: 894dfe43478258980edcdabd464ae6903793c7eb6c0b03661e1548af2b99701e9f320b5dca0e237925c9ef3a1f7b0bd9bfbeb80b6d8ce47c766ca601b23c380b
7
- data.tar.gz: 813774141e6acc2d1343aea110cd40d84c12482c0d7d769e54e11914eeba28e62cf1c15390407594e92b0d3b4ca6366cc279651d3239b3e0cd5c67cc11ae3caa
6
+ metadata.gz: 74dcf4d6f2ce5f99b77b3c1fdd34a271220c58e8aae167b40cde35eef2166570d3c4de7b94f91d98158fe3cc384ec7a7688cf98812e378607e31f8d24e06420f
7
+ data.tar.gz: aa210ee582d56592932684216eb93cd3f91ea7ba95e3b1d4bc672ed09bdc9605008e7f6f21472ca291c574cb1050b18eb6bdb47aa5caa03341ae393cbb0b9939
@@ -1,10 +1,10 @@
1
+ Layout/LineLength:
2
+ Max: 120
3
+
1
4
  Metrics/BlockLength:
2
5
  Exclude:
3
6
  - 'spec/**/*'
4
7
 
5
- Metrics/LineLength:
6
- Max: 120
7
-
8
8
  Metrics/MethodLength:
9
9
  Max: 15
10
10
 
@@ -0,0 +1,8 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'henkei'
6
+
7
+ require 'irb'
8
+ IRB.start
@@ -11,10 +11,12 @@ require 'json'
11
11
  require 'socket'
12
12
  require 'stringio'
13
13
 
14
+ require 'open3'
15
+
14
16
  # Read text and metadata from files and documents using Apache Tika toolkit
15
17
  class Henkei # rubocop:disable Metrics/ClassLength
16
18
  GEM_PATH = File.dirname(File.dirname(__FILE__))
17
- JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.22.jar')
19
+ JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.23.jar')
18
20
  CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
19
21
  DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
20
22
 
@@ -224,11 +226,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
224
226
  # Internal helper for calling to Tika library directly
225
227
  #
226
228
  def self.client_read(type, data)
227
- IO.popen tika_command(type), 'r+' do |io|
228
- io.write data
229
- io.close_write
230
- io.read
231
- end
229
+ Open3.capture2(tika_command(type), stdin_data: data).first
232
230
  end
233
231
  private_class_method :client_read
234
232
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class Henkei
4
- VERSION = '1.22.0'
4
+ VERSION = '1.23.0'
5
5
  end
@@ -40,6 +40,16 @@ describe Henkei do
40
40
  )
41
41
  expect(mimetype.extensions).to include 'docx'
42
42
  end
43
+
44
+ context 'when passing in the `pipe-error.png` test file' do
45
+ let(:data) { File.read 'spec/samples/pipe-error.png' }
46
+
47
+ it 'returns an empty result' do
48
+ text = Henkei.read :text, data
49
+
50
+ expect(text).to eq ''
51
+ end
52
+ end
43
53
  end
44
54
 
45
55
  describe '.new' do
@@ -129,6 +139,23 @@ describe Henkei do
129
139
  specify '#metadata reads metadata' do
130
140
  expect(henkei.metadata['Content-Type']).to eq %w[application/vnd.apple.pages application/vnd.apple.pages]
131
141
  end
142
+
143
+ context 'when passing in the `pipe-error.png` test file' do
144
+ let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
145
+
146
+ it '#text returns an empty result' do
147
+ expect(henkei.text).to eq ''
148
+ end
149
+
150
+ it '#html returns an empty body' do
151
+ expect(henkei.html).to include '<body/>'
152
+ expect(henkei.html).to include '<meta name="tiff:ImageWidth" content="792"/>'
153
+ end
154
+
155
+ it '#mimetype returns an empty result' do
156
+ expect(henkei.mimetype.content_type).to eq 'image/png'
157
+ end
158
+ end
132
159
  end
133
160
 
134
161
  context 'initialized with a given URI' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.22.0
4
+ version: 1.23.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-10-30 00:00:00.000000000 Z
12
+ date: 2019-12-26 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: json
@@ -125,7 +125,8 @@ description: Read text and metadata from files and documents using Apache Tika t
125
125
  email:
126
126
  - erol.fornoles@gmail.com
127
127
  - a.bromwich@gmail.com
128
- executables: []
128
+ executables:
129
+ - console
129
130
  extensions: []
130
131
  extra_rdoc_files: []
131
132
  files:
@@ -138,14 +139,16 @@ files:
138
139
  - NOTICE.txt
139
140
  - README.md
140
141
  - Rakefile
142
+ - bin/console
141
143
  - henkei.gemspec
142
- - jar/tika-app-1.22.jar
144
+ - jar/tika-app-1.23.jar
143
145
  - jar/tika-config.xml
144
146
  - lib/henkei.rb
145
147
  - lib/henkei/version.rb
146
148
  - lib/henkei/yomu.rb
147
149
  - spec/helper.rb
148
150
  - spec/henkei_spec.rb
151
+ - spec/samples/pipe-error.png
149
152
  - spec/samples/sample filename with spaces.pages
150
153
  - spec/samples/sample-metadata-values-with-colons.doc
151
154
  - spec/samples/sample.docx
@@ -169,8 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
169
172
  - !ruby/object:Gem::Version
170
173
  version: '0'
171
174
  requirements: []
172
- rubyforge_project:
173
- rubygems_version: 2.7.6.2
175
+ rubygems_version: 3.0.6
174
176
  signing_key:
175
177
  specification_version: 4
176
178
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
@@ -178,6 +180,7 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
178
180
  test_files:
179
181
  - spec/helper.rb
180
182
  - spec/henkei_spec.rb
183
+ - spec/samples/pipe-error.png
181
184
  - spec/samples/sample filename with spaces.pages
182
185
  - spec/samples/sample-metadata-values-with-colons.doc
183
186
  - spec/samples/sample.docx