henkei 1.22.0 → 1.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -3
- data/bin/console +8 -0
- data/jar/{tika-app-1.22.jar → tika-app-1.23.jar} +0 -0
- data/lib/henkei.rb +4 -6
- data/lib/henkei/version.rb +1 -1
- data/spec/henkei_spec.rb +27 -0
- data/spec/samples/pipe-error.png +0 -0
- metadata +9 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9ebc4be4691020c72328134a33a9ffe6b4fb79a939ddc9ce833c56551abb86d
|
4
|
+
data.tar.gz: 17402ba43e9840b59090a82f1cd39e79e594ca3d36c763b958f9243174990f8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74dcf4d6f2ce5f99b77b3c1fdd34a271220c58e8aae167b40cde35eef2166570d3c4de7b94f91d98158fe3cc384ec7a7688cf98812e378607e31f8d24e06420f
|
7
|
+
data.tar.gz: aa210ee582d56592932684216eb93cd3f91ea7ba95e3b1d4bc672ed09bdc9605008e7f6f21472ca291c574cb1050b18eb6bdb47aa5caa03341ae393cbb0b9939
|
data/.rubocop.yml
CHANGED
data/bin/console
ADDED
Binary file
|
data/lib/henkei.rb
CHANGED
@@ -11,10 +11,12 @@ require 'json'
|
|
11
11
|
require 'socket'
|
12
12
|
require 'stringio'
|
13
13
|
|
14
|
+
require 'open3'
|
15
|
+
|
14
16
|
# Read text and metadata from files and documents using Apache Tika toolkit
|
15
17
|
class Henkei # rubocop:disable Metrics/ClassLength
|
16
18
|
GEM_PATH = File.dirname(File.dirname(__FILE__))
|
17
|
-
JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.
|
19
|
+
JAR_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-app-1.23.jar')
|
18
20
|
CONFIG_PATH = File.join(Henkei::GEM_PATH, 'jar', 'tika-config.xml')
|
19
21
|
DEFAULT_SERVER_PORT = 9293 # an arbitrary, but perfectly cromulent, port
|
20
22
|
|
@@ -224,11 +226,7 @@ class Henkei # rubocop:disable Metrics/ClassLength
|
|
224
226
|
# Internal helper for calling to Tika library directly
|
225
227
|
#
|
226
228
|
def self.client_read(type, data)
|
227
|
-
|
228
|
-
io.write data
|
229
|
-
io.close_write
|
230
|
-
io.read
|
231
|
-
end
|
229
|
+
Open3.capture2(tika_command(type), stdin_data: data).first
|
232
230
|
end
|
233
231
|
private_class_method :client_read
|
234
232
|
|
data/lib/henkei/version.rb
CHANGED
data/spec/henkei_spec.rb
CHANGED
@@ -40,6 +40,16 @@ describe Henkei do
|
|
40
40
|
)
|
41
41
|
expect(mimetype.extensions).to include 'docx'
|
42
42
|
end
|
43
|
+
|
44
|
+
context 'when passing in the `pipe-error.png` test file' do
|
45
|
+
let(:data) { File.read 'spec/samples/pipe-error.png' }
|
46
|
+
|
47
|
+
it 'returns an empty result' do
|
48
|
+
text = Henkei.read :text, data
|
49
|
+
|
50
|
+
expect(text).to eq ''
|
51
|
+
end
|
52
|
+
end
|
43
53
|
end
|
44
54
|
|
45
55
|
describe '.new' do
|
@@ -129,6 +139,23 @@ describe Henkei do
|
|
129
139
|
specify '#metadata reads metadata' do
|
130
140
|
expect(henkei.metadata['Content-Type']).to eq %w[application/vnd.apple.pages application/vnd.apple.pages]
|
131
141
|
end
|
142
|
+
|
143
|
+
context 'when passing in the `pipe-error.png` test file' do
|
144
|
+
let(:henkei) { Henkei.new 'spec/samples/pipe-error.png' }
|
145
|
+
|
146
|
+
it '#text returns an empty result' do
|
147
|
+
expect(henkei.text).to eq ''
|
148
|
+
end
|
149
|
+
|
150
|
+
it '#html returns an empty body' do
|
151
|
+
expect(henkei.html).to include '<body/>'
|
152
|
+
expect(henkei.html).to include '<meta name="tiff:ImageWidth" content="792"/>'
|
153
|
+
end
|
154
|
+
|
155
|
+
it '#mimetype returns an empty result' do
|
156
|
+
expect(henkei.mimetype.content_type).to eq 'image/png'
|
157
|
+
end
|
158
|
+
end
|
132
159
|
end
|
133
160
|
|
134
161
|
context 'initialized with a given URI' do
|
Binary file
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: henkei
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.23.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erol Fornoles
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-12-26 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: json
|
@@ -125,7 +125,8 @@ description: Read text and metadata from files and documents using Apache Tika t
|
|
125
125
|
email:
|
126
126
|
- erol.fornoles@gmail.com
|
127
127
|
- a.bromwich@gmail.com
|
128
|
-
executables:
|
128
|
+
executables:
|
129
|
+
- console
|
129
130
|
extensions: []
|
130
131
|
extra_rdoc_files: []
|
131
132
|
files:
|
@@ -138,14 +139,16 @@ files:
|
|
138
139
|
- NOTICE.txt
|
139
140
|
- README.md
|
140
141
|
- Rakefile
|
142
|
+
- bin/console
|
141
143
|
- henkei.gemspec
|
142
|
-
- jar/tika-app-1.
|
144
|
+
- jar/tika-app-1.23.jar
|
143
145
|
- jar/tika-config.xml
|
144
146
|
- lib/henkei.rb
|
145
147
|
- lib/henkei/version.rb
|
146
148
|
- lib/henkei/yomu.rb
|
147
149
|
- spec/helper.rb
|
148
150
|
- spec/henkei_spec.rb
|
151
|
+
- spec/samples/pipe-error.png
|
149
152
|
- spec/samples/sample filename with spaces.pages
|
150
153
|
- spec/samples/sample-metadata-values-with-colons.doc
|
151
154
|
- spec/samples/sample.docx
|
@@ -169,8 +172,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
169
172
|
- !ruby/object:Gem::Version
|
170
173
|
version: '0'
|
171
174
|
requirements: []
|
172
|
-
|
173
|
-
rubygems_version: 2.7.6.2
|
175
|
+
rubygems_version: 3.0.6
|
174
176
|
signing_key:
|
175
177
|
specification_version: 4
|
176
178
|
summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,
|
@@ -178,6 +180,7 @@ summary: Read text and metadata from files and documents (.doc, .docx, .pages, .
|
|
178
180
|
test_files:
|
179
181
|
- spec/helper.rb
|
180
182
|
- spec/henkei_spec.rb
|
183
|
+
- spec/samples/pipe-error.png
|
181
184
|
- spec/samples/sample filename with spaces.pages
|
182
185
|
- spec/samples/sample-metadata-values-with-colons.doc
|
183
186
|
- spec/samples/sample.docx
|