henkei 1.14.3 → 1.14.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/henkei.rb +72 -58
  3. data/lib/henkei/version.rb +1 -1
  4. metadata +18 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e982c720251c865584f5336e41f5693d7897655
4
- data.tar.gz: a7dc85cebe95091daa572cfc1d27006c21b35512
3
+ metadata.gz: e3ab53035e222c201453eaf44efc1186a9c9ae2f
4
+ data.tar.gz: 9e527cde254131285d90ccd69f7c1ef57e8eaa53
5
5
  SHA512:
6
- metadata.gz: c9ca2e89a63ac1790f3b9c5ab82c72e609566110313538851842ad58fcb1056fa08811e8dfc6e63d102e8702860fc22df4c6b6d4473b4b4d24029ef79f81badf
7
- data.tar.gz: c4a1a3f2a8580480ae5911f4485e646e02355e51ab7e5adde1518d2f0c9bbd6c78cf7af5c744005c9d733a704e2ed925e0eaff621986fcc019de3cf9b766ce5a
6
+ metadata.gz: 757063d9530cf4d88988bb2494e4f3f198465ea27c5603c2aed9bd6cfbbeb008a89d766f79fcb522f769ce49c078f7d4fe34b1256b4cbe241ae5dbe4543f5010
7
+ data.tar.gz: 7dd520e273bd2808871af156f53b5b8a39476fb8f969fee52e6424c5782c50122b4a0f3644199fdc9213973331ffe4a265fbd36c6a3b0d97a4990f8c8e9da98f
@@ -22,9 +22,9 @@ class Henkei
22
22
  # data = File.read 'sample.pages'
23
23
  # text = Henkei.read :text, data
24
24
  # metadata = Henkei.read :metadata, data
25
-
25
+ #
26
26
  def self.read(type, data)
27
- result = @@server_pid ? self._server_read(type, data) : self._client_read(type, data)
27
+ result = @@server_pid ? server_read(type, data) : client_read(type, data)
28
28
 
29
29
  case type
30
30
  when :text
@@ -38,49 +38,6 @@ class Henkei
38
38
  end
39
39
  end
40
40
 
41
- def self._client_read(type, data)
42
- switch =
43
- case type
44
- when :text
45
- '-t'
46
- when :html
47
- '-h'
48
- when :metadata
49
- '-m -j'
50
- when :mimetype
51
- '-m -j'
52
- end
53
-
54
- IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
55
- io.write data
56
- io.close_write
57
- io.read
58
- end
59
- end
60
-
61
-
62
- def self._server_read(_, data)
63
- s = TCPSocket.new('localhost', @@server_port)
64
- file = StringIO.new(data, 'r')
65
-
66
- while 1
67
- chunk = file.read(65536)
68
- break unless chunk
69
- s.write(chunk)
70
- end
71
-
72
- # tell Tika that we're done sending data
73
- s.shutdown(Socket::SHUT_WR)
74
-
75
- resp = ''
76
- while 1
77
- chunk = s.recv(65536)
78
- break if chunk.empty? || !chunk
79
- resp << chunk
80
- end
81
- resp
82
- end
83
-
84
41
  # Create a new instance of Henkei with a given document.
85
42
  #
86
43
  # Using a file path:
@@ -94,7 +51,7 @@ class Henkei
94
51
  # From a stream or an object which responds to +read+
95
52
  #
96
53
  # Henkei.new File.open('sample.pages')
97
-
54
+ #
98
55
  def initialize(input)
99
56
  if input.is_a? String
100
57
  if File.exists? input
@@ -115,7 +72,7 @@ class Henkei
115
72
  #
116
73
  # henkei = Henkei.new 'sample.pages'
117
74
  # henkei.text
118
-
75
+ #
119
76
  def text
120
77
  return @text if defined? @text
121
78
 
@@ -126,7 +83,7 @@ class Henkei
126
83
  #
127
84
  # henkei = Henkei.new 'sample.pages'
128
85
  # henkei.html
129
-
86
+ #
130
87
  def html
131
88
  return @html if defined? @html
132
89
 
@@ -137,7 +94,7 @@ class Henkei
137
94
  #
138
95
  # henkei = Henkei.new 'sample.pages'
139
96
  # henkei.metadata['Content-Type']
140
-
97
+ #
141
98
  def metadata
142
99
  return @metadata if defined? @metadata
143
100
 
@@ -149,7 +106,7 @@ class Henkei
149
106
  # henkei = Henkei.new 'sample.docx'
150
107
  # henkei.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
151
108
  # henkei.mimetype.extensions #=> ['docx']
152
-
109
+ #
153
110
  def mimetype
154
111
  return @mimetype if defined? @mimetype
155
112
 
@@ -162,8 +119,7 @@ class Henkei
162
119
  #
163
120
  # henkei = Henkei.new 'sample.pages'
164
121
  # henkei.path? #=> true
165
-
166
-
122
+ #
167
123
  def creation_date
168
124
  return @creation_date if defined? @creation_date
169
125
 
@@ -174,17 +130,22 @@ class Henkei
174
130
  end
175
131
  end
176
132
 
133
+ # Returns +true+ if the Henkei document was specified using a file path.
134
+ #
135
+ # henkei = Henkei.new '/my/document/path/sample.docx'
136
+ # henkei.path? #=> true
137
+ #
177
138
  def path?
178
- defined? @path
139
+ !!@path
179
140
  end
180
141
 
181
142
  # Returns +true+ if the Henkei document was specified using a URI.
182
143
  #
183
144
  # henkei = Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
184
145
  # henkei.uri? #=> true
185
-
146
+ #
186
147
  def uri?
187
- defined? @uri
148
+ !!@uri
188
149
  end
189
150
 
190
151
  # Returns +true+ if the Henkei document was specified from a stream or an object which responds to +read+.
@@ -192,16 +153,16 @@ class Henkei
192
153
  # file = File.open('sample.pages')
193
154
  # henkei = Henkei.new file
194
155
  # henkei.stream? #=> true
195
-
156
+ #
196
157
  def stream?
197
- defined? @stream
158
+ !!@stream
198
159
  end
199
160
 
200
161
  # Returns the raw/unparsed content of the Henkei document.
201
162
  #
202
163
  # henkei = Henkei.new 'sample.pages'
203
164
  # henkei.data
204
-
165
+ #
205
166
  def data
206
167
  return @data if defined? @data
207
168
 
@@ -257,6 +218,7 @@ class Henkei
257
218
  # ensure
258
219
  # Henkei.kill_server!
259
220
  # end
221
+ #
260
222
  def self.kill_server!
261
223
  if @@server_pid
262
224
  Process.kill('INT', @@server_pid)
@@ -265,8 +227,60 @@ class Henkei
265
227
  end
266
228
  end
267
229
 
230
+ ### Private class methods
231
+
232
+ # Provide the path to the Java binary
233
+ #
268
234
  def self.java
269
235
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
270
236
  end
271
237
  private_class_method :java
238
+
239
+ # Internal helper for calling to Tika library directly
240
+ #
241
+ def self.client_read(type, data)
242
+ switch =
243
+ case type
244
+ when :text
245
+ '-t'
246
+ when :html
247
+ '-h'
248
+ when :metadata
249
+ '-m -j'
250
+ when :mimetype
251
+ '-m -j'
252
+ end
253
+
254
+ IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
255
+ io.write data
256
+ io.close_write
257
+ io.read
258
+ end
259
+ end
260
+ private_class_method :client_read
261
+
262
+ # Internal helper for calling to running Tika server
263
+ #
264
+ def self.server_read(_, data)
265
+ s = TCPSocket.new('localhost', @@server_port)
266
+ file = StringIO.new(data, 'r')
267
+
268
+ while 1
269
+ chunk = file.read(65536)
270
+ break unless chunk
271
+ s.write(chunk)
272
+ end
273
+
274
+ # tell Tika that we're done sending data
275
+ s.shutdown(Socket::SHUT_WR)
276
+
277
+ resp = ''
278
+ while 1
279
+ chunk = s.recv(65536)
280
+ break if chunk.empty? || !chunk
281
+ resp << chunk
282
+ end
283
+ resp
284
+ end
285
+ private_class_method :server_read
272
286
  end
@@ -1,3 +1,3 @@
1
1
  class Henkei
2
- VERSION = '1.14.3'
2
+ VERSION = '1.14.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.3
4
+ version: 1.14.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,76 +9,76 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-02-26 00:00:00.000000000 Z
12
+ date: 2017-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mime-types
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.23'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.23'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: json
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '1.8'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '1.8'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: bundler
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - ~>
46
+ - - "~>"
47
47
  - !ruby/object:Gem::Version
48
48
  version: '1.3'
49
49
  type: :development
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - ~>
53
+ - - "~>"
54
54
  - !ruby/object:Gem::Version
55
55
  version: '1.3'
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rake
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
- - - '>='
60
+ - - ">="
61
61
  - !ruby/object:Gem::Version
62
62
  version: '0'
63
63
  type: :development
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - '>='
67
+ - - ">="
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: rspec
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - ~>
74
+ - - "~>"
75
75
  - !ruby/object:Gem::Version
76
76
  version: '3.5'
77
77
  type: :development
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - ~>
81
+ - - "~>"
82
82
  - !ruby/object:Gem::Version
83
83
  version: '3.5'
84
84
  description: Read text and metadata from files and documents (.doc, .docx, .pages,
@@ -90,9 +90,9 @@ executables: []
90
90
  extensions: []
91
91
  extra_rdoc_files: []
92
92
  files:
93
- - .gitignore
94
- - .rspec
95
- - .travis.yml
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
96
  - Gemfile
97
97
  - LICENSE
98
98
  - NOTICE.txt
@@ -119,17 +119,17 @@ require_paths:
119
119
  - lib
120
120
  required_ruby_version: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  requirements:
127
- - - '>='
127
+ - - ">="
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
131
  rubyforge_project:
132
- rubygems_version: 2.4.6
132
+ rubygems_version: 2.4.8
133
133
  signing_key:
134
134
  specification_version: 4
135
135
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,