henkei 1.14.1 → 1.14.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/henkei.gemspec +1 -1
- data/lib/henkei.rb +24 -21
- data/lib/henkei/version.rb +1 -1
- data/lib/henkei/yomu.rb +1 -0
- data/spec/helper.rb +0 -1
- data/spec/henkei_spec.rb +2 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e761f2e45ae10632d62ec0852c8392f04ca07bfb
|
4
|
+
data.tar.gz: 53b61bbc7101e3a64cc641d7f3c63372dd86cf04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bd2cad3649411bacc1c4e998df75a41ea01975aa1a249848ecaa36e8def72b9c0f9cc33d2e30a40ad1db960bfadc3157b690ef95a0cf7347a4c8e52b23d9470
|
7
|
+
data.tar.gz: f9bb8ea2a7f292935f63501a43dd90f39bda399f8e34dada7742ea200dc0d7f92efc1881cc9c3a3715502d3ed230e874cc7a73f5c944f9150550c6963e003c82
|
data/README.md
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
[](https://travis-ci.org/abrom/henkei)
|
2
|
+
[](https://codeclimate.com/github/abrom/henkei)
|
3
|
+
[](#)
|
4
4
|
|
5
5
|
# Henkei 変形
|
6
6
|
|
7
7
|
[Henkei](http://github.com/abrom/henkei) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
|
8
8
|
|
9
|
+
The library was forked from [Yomu](http://github.com/Erol/yomu) as it is no longer maintained.
|
10
|
+
|
9
11
|
Here are some of the formats supported:
|
10
12
|
|
11
13
|
- Microsoft Office OLE 2 and Office Open XML Formats (.doc, .docx, .xls, .xlsx,
|
data/henkei.gemspec
CHANGED
data/lib/henkei.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'henkei/version'
|
2
|
+
require 'henkei/yomu'
|
2
3
|
|
3
4
|
require 'net/http'
|
4
5
|
require 'mime/types'
|
@@ -37,16 +38,17 @@ class Henkei
|
|
37
38
|
end
|
38
39
|
|
39
40
|
def self._client_read(type, data)
|
40
|
-
switch =
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
41
|
+
switch =
|
42
|
+
case type
|
43
|
+
when :text
|
44
|
+
'-t'
|
45
|
+
when :html
|
46
|
+
'-h'
|
47
|
+
when :metadata
|
48
|
+
'-m -j'
|
49
|
+
when :mimetype
|
50
|
+
'-m -j'
|
51
|
+
end
|
50
52
|
|
51
53
|
IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
|
52
54
|
io.write data
|
@@ -150,7 +152,7 @@ class Henkei
|
|
150
152
|
def mimetype
|
151
153
|
return @mimetype if defined? @mimetype
|
152
154
|
|
153
|
-
type = metadata[
|
155
|
+
type = metadata['Content-Type'].is_a?(Array) ? metadata['Content-Type'].first : metadata['Content-Type']
|
154
156
|
|
155
157
|
@mimetype = MIME::Types[type].first
|
156
158
|
end
|
@@ -221,16 +223,17 @@ class Henkei
|
|
221
223
|
# Henkei.server(:text, 9294)
|
222
224
|
#
|
223
225
|
def self.server(type, custom_port=nil)
|
224
|
-
switch =
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
226
|
+
switch =
|
227
|
+
case type
|
228
|
+
when :text
|
229
|
+
'-t'
|
230
|
+
when :html
|
231
|
+
'-h'
|
232
|
+
when :metadata
|
233
|
+
'-m -j'
|
234
|
+
when :mimetype
|
235
|
+
'-m -j'
|
236
|
+
end
|
234
237
|
|
235
238
|
@@server_port = custom_port || DEFAULT_SERVER_PORT
|
236
239
|
|
data/lib/henkei/version.rb
CHANGED
data/lib/henkei/yomu.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Yomu = Henkei
|
data/spec/helper.rb
CHANGED
data/spec/henkei_spec.rb
CHANGED
@@ -122,7 +122,7 @@ describe Henkei do
|
|
122
122
|
end
|
123
123
|
|
124
124
|
specify '#metadata reads metadata' do
|
125
|
-
expect( henkei.metadata['Content-Type'] ).to eql [
|
125
|
+
expect( henkei.metadata['Content-Type'] ).to eql ['application/vnd.apple.pages', 'application/vnd.apple.pages']
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
@@ -146,7 +146,7 @@ describe Henkei do
|
|
146
146
|
end
|
147
147
|
|
148
148
|
specify '#metadata reads metadata' do
|
149
|
-
expect( henkei.metadata['Content-Type'] ).to eql [
|
149
|
+
expect( henkei.metadata['Content-Type'] ).to eql ['application/vnd.apple.pages', 'application/vnd.apple.pages']
|
150
150
|
end
|
151
151
|
end
|
152
152
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: henkei
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erol Fornoles
|
@@ -73,14 +73,14 @@ dependencies:
|
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '3.5'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '3.5'
|
84
84
|
description: Read text and metadata from files and documents (.doc, .docx, .pages,
|
85
85
|
.odt, .rtf, .pdf)
|
86
86
|
email:
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- jar/tika-app-1.14.jar
|
103
103
|
- lib/henkei.rb
|
104
104
|
- lib/henkei/version.rb
|
105
|
+
- lib/henkei/yomu.rb
|
105
106
|
- spec/helper.rb
|
106
107
|
- spec/henkei_spec.rb
|
107
108
|
- spec/samples/sample filename with spaces.pages
|