henkei 1.14.1 → 1.14.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/henkei.gemspec +1 -1
- data/lib/henkei.rb +24 -21
- data/lib/henkei/version.rb +1 -1
- data/lib/henkei/yomu.rb +1 -0
- data/spec/helper.rb +0 -1
- data/spec/henkei_spec.rb +2 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e761f2e45ae10632d62ec0852c8392f04ca07bfb
|
4
|
+
data.tar.gz: 53b61bbc7101e3a64cc641d7f3c63372dd86cf04
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bd2cad3649411bacc1c4e998df75a41ea01975aa1a249848ecaa36e8def72b9c0f9cc33d2e30a40ad1db960bfadc3157b690ef95a0cf7347a4c8e52b23d9470
|
7
|
+
data.tar.gz: f9bb8ea2a7f292935f63501a43dd90f39bda399f8e34dada7742ea200dc0d7f92efc1881cc9c3a3715502d3ed230e874cc7a73f5c944f9150550c6963e003c82
|
data/README.md
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
-
[![Travis Build Status](http://img.shields.io/travis/
|
2
|
-
[![Code Climate Score](http://img.shields.io/codeclimate/github/
|
3
|
-
[![Gem Version](http://img.shields.io/gem/v/
|
1
|
+
[![Travis Build Status](http://img.shields.io/travis/abrom/henkei.svg?style=flat)](https://travis-ci.org/abrom/henkei)
|
2
|
+
[![Code Climate Score](http://img.shields.io/codeclimate/github/abrom/henkei.svg?style=flat)](https://codeclimate.com/github/abrom/henkei)
|
3
|
+
[![Gem Version](http://img.shields.io/gem/v/henkei.svg?style=flat)](#)
|
4
4
|
|
5
5
|
# Henkei 変形
|
6
6
|
|
7
7
|
[Henkei](http://github.com/abrom/henkei) is a library for extracting text and metadata from files and documents using the [Apache Tika](http://tika.apache.org/) content analysis toolkit.
|
8
8
|
|
9
|
+
The library was forked from [Yomu](http://github.com/Erol/yomu) as it is no longer maintained.
|
10
|
+
|
9
11
|
Here are some of the formats supported:
|
10
12
|
|
11
13
|
- Microsoft Office OLE 2 and Office Open XML Formats (.doc, .docx, .xls, .xlsx,
|
data/henkei.gemspec
CHANGED
data/lib/henkei.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'henkei/version'
|
2
|
+
require 'henkei/yomu'
|
2
3
|
|
3
4
|
require 'net/http'
|
4
5
|
require 'mime/types'
|
@@ -37,16 +38,17 @@ class Henkei
|
|
37
38
|
end
|
38
39
|
|
39
40
|
def self._client_read(type, data)
|
40
|
-
switch =
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
41
|
+
switch =
|
42
|
+
case type
|
43
|
+
when :text
|
44
|
+
'-t'
|
45
|
+
when :html
|
46
|
+
'-h'
|
47
|
+
when :metadata
|
48
|
+
'-m -j'
|
49
|
+
when :mimetype
|
50
|
+
'-m -j'
|
51
|
+
end
|
50
52
|
|
51
53
|
IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
|
52
54
|
io.write data
|
@@ -150,7 +152,7 @@ class Henkei
|
|
150
152
|
def mimetype
|
151
153
|
return @mimetype if defined? @mimetype
|
152
154
|
|
153
|
-
type = metadata[
|
155
|
+
type = metadata['Content-Type'].is_a?(Array) ? metadata['Content-Type'].first : metadata['Content-Type']
|
154
156
|
|
155
157
|
@mimetype = MIME::Types[type].first
|
156
158
|
end
|
@@ -221,16 +223,17 @@ class Henkei
|
|
221
223
|
# Henkei.server(:text, 9294)
|
222
224
|
#
|
223
225
|
def self.server(type, custom_port=nil)
|
224
|
-
switch =
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
226
|
+
switch =
|
227
|
+
case type
|
228
|
+
when :text
|
229
|
+
'-t'
|
230
|
+
when :html
|
231
|
+
'-h'
|
232
|
+
when :metadata
|
233
|
+
'-m -j'
|
234
|
+
when :mimetype
|
235
|
+
'-m -j'
|
236
|
+
end
|
234
237
|
|
235
238
|
@@server_port = custom_port || DEFAULT_SERVER_PORT
|
236
239
|
|
data/lib/henkei/version.rb
CHANGED
data/lib/henkei/yomu.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
Yomu = Henkei
|
data/spec/helper.rb
CHANGED
data/spec/henkei_spec.rb
CHANGED
@@ -122,7 +122,7 @@ describe Henkei do
|
|
122
122
|
end
|
123
123
|
|
124
124
|
specify '#metadata reads metadata' do
|
125
|
-
expect( henkei.metadata['Content-Type'] ).to eql [
|
125
|
+
expect( henkei.metadata['Content-Type'] ).to eql ['application/vnd.apple.pages', 'application/vnd.apple.pages']
|
126
126
|
end
|
127
127
|
end
|
128
128
|
|
@@ -146,7 +146,7 @@ describe Henkei do
|
|
146
146
|
end
|
147
147
|
|
148
148
|
specify '#metadata reads metadata' do
|
149
|
-
expect( henkei.metadata['Content-Type'] ).to eql [
|
149
|
+
expect( henkei.metadata['Content-Type'] ).to eql ['application/vnd.apple.pages', 'application/vnd.apple.pages']
|
150
150
|
end
|
151
151
|
end
|
152
152
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: henkei
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.14.
|
4
|
+
version: 1.14.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erol Fornoles
|
@@ -73,14 +73,14 @@ dependencies:
|
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version: '
|
76
|
+
version: '3.5'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version: '
|
83
|
+
version: '3.5'
|
84
84
|
description: Read text and metadata from files and documents (.doc, .docx, .pages,
|
85
85
|
.odt, .rtf, .pdf)
|
86
86
|
email:
|
@@ -102,6 +102,7 @@ files:
|
|
102
102
|
- jar/tika-app-1.14.jar
|
103
103
|
- lib/henkei.rb
|
104
104
|
- lib/henkei/version.rb
|
105
|
+
- lib/henkei/yomu.rb
|
105
106
|
- spec/helper.rb
|
106
107
|
- spec/henkei_spec.rb
|
107
108
|
- spec/samples/sample filename with spaces.pages
|