henkei 1.14.3 → 1.14.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/henkei.rb +72 -58
  3. data/lib/henkei/version.rb +1 -1
  4. metadata +18 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7e982c720251c865584f5336e41f5693d7897655
4
- data.tar.gz: a7dc85cebe95091daa572cfc1d27006c21b35512
3
+ metadata.gz: e3ab53035e222c201453eaf44efc1186a9c9ae2f
4
+ data.tar.gz: 9e527cde254131285d90ccd69f7c1ef57e8eaa53
5
5
  SHA512:
6
- metadata.gz: c9ca2e89a63ac1790f3b9c5ab82c72e609566110313538851842ad58fcb1056fa08811e8dfc6e63d102e8702860fc22df4c6b6d4473b4b4d24029ef79f81badf
7
- data.tar.gz: c4a1a3f2a8580480ae5911f4485e646e02355e51ab7e5adde1518d2f0c9bbd6c78cf7af5c744005c9d733a704e2ed925e0eaff621986fcc019de3cf9b766ce5a
6
+ metadata.gz: 757063d9530cf4d88988bb2494e4f3f198465ea27c5603c2aed9bd6cfbbeb008a89d766f79fcb522f769ce49c078f7d4fe34b1256b4cbe241ae5dbe4543f5010
7
+ data.tar.gz: 7dd520e273bd2808871af156f53b5b8a39476fb8f969fee52e6424c5782c50122b4a0f3644199fdc9213973331ffe4a265fbd36c6a3b0d97a4990f8c8e9da98f
@@ -22,9 +22,9 @@ class Henkei
22
22
  # data = File.read 'sample.pages'
23
23
  # text = Henkei.read :text, data
24
24
  # metadata = Henkei.read :metadata, data
25
-
25
+ #
26
26
  def self.read(type, data)
27
- result = @@server_pid ? self._server_read(type, data) : self._client_read(type, data)
27
+ result = @@server_pid ? server_read(type, data) : client_read(type, data)
28
28
 
29
29
  case type
30
30
  when :text
@@ -38,49 +38,6 @@ class Henkei
38
38
  end
39
39
  end
40
40
 
41
- def self._client_read(type, data)
42
- switch =
43
- case type
44
- when :text
45
- '-t'
46
- when :html
47
- '-h'
48
- when :metadata
49
- '-m -j'
50
- when :mimetype
51
- '-m -j'
52
- end
53
-
54
- IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
55
- io.write data
56
- io.close_write
57
- io.read
58
- end
59
- end
60
-
61
-
62
- def self._server_read(_, data)
63
- s = TCPSocket.new('localhost', @@server_port)
64
- file = StringIO.new(data, 'r')
65
-
66
- while 1
67
- chunk = file.read(65536)
68
- break unless chunk
69
- s.write(chunk)
70
- end
71
-
72
- # tell Tika that we're done sending data
73
- s.shutdown(Socket::SHUT_WR)
74
-
75
- resp = ''
76
- while 1
77
- chunk = s.recv(65536)
78
- break if chunk.empty? || !chunk
79
- resp << chunk
80
- end
81
- resp
82
- end
83
-
84
41
  # Create a new instance of Henkei with a given document.
85
42
  #
86
43
  # Using a file path:
@@ -94,7 +51,7 @@ class Henkei
94
51
  # From a stream or an object which responds to +read+
95
52
  #
96
53
  # Henkei.new File.open('sample.pages')
97
-
54
+ #
98
55
  def initialize(input)
99
56
  if input.is_a? String
100
57
  if File.exists? input
@@ -115,7 +72,7 @@ class Henkei
115
72
  #
116
73
  # henkei = Henkei.new 'sample.pages'
117
74
  # henkei.text
118
-
75
+ #
119
76
  def text
120
77
  return @text if defined? @text
121
78
 
@@ -126,7 +83,7 @@ class Henkei
126
83
  #
127
84
  # henkei = Henkei.new 'sample.pages'
128
85
  # henkei.html
129
-
86
+ #
130
87
  def html
131
88
  return @html if defined? @html
132
89
 
@@ -137,7 +94,7 @@ class Henkei
137
94
  #
138
95
  # henkei = Henkei.new 'sample.pages'
139
96
  # henkei.metadata['Content-Type']
140
-
97
+ #
141
98
  def metadata
142
99
  return @metadata if defined? @metadata
143
100
 
@@ -149,7 +106,7 @@ class Henkei
149
106
  # henkei = Henkei.new 'sample.docx'
150
107
  # henkei.mimetype.content_type #=> 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
151
108
  # henkei.mimetype.extensions #=> ['docx']
152
-
109
+ #
153
110
  def mimetype
154
111
  return @mimetype if defined? @mimetype
155
112
 
@@ -162,8 +119,7 @@ class Henkei
162
119
  #
163
120
  # henkei = Henkei.new 'sample.pages'
164
121
  # henkei.path? #=> true
165
-
166
-
122
+ #
167
123
  def creation_date
168
124
  return @creation_date if defined? @creation_date
169
125
 
@@ -174,17 +130,22 @@ class Henkei
174
130
  end
175
131
  end
176
132
 
133
+ # Returns +true+ if the Henkei document was specified using a file path.
134
+ #
135
+ # henkei = Henkei.new '/my/document/path/sample.docx'
136
+ # henkei.path? #=> true
137
+ #
177
138
  def path?
178
- defined? @path
139
+ !!@path
179
140
  end
180
141
 
181
142
  # Returns +true+ if the Henkei document was specified using a URI.
182
143
  #
183
144
  # henkei = Henkei.new 'http://svn.apache.org/repos/asf/poi/trunk/test-data/document/sample.docx'
184
145
  # henkei.uri? #=> true
185
-
146
+ #
186
147
  def uri?
187
- defined? @uri
148
+ !!@uri
188
149
  end
189
150
 
190
151
  # Returns +true+ if the Henkei document was specified from a stream or an object which responds to +read+.
@@ -192,16 +153,16 @@ class Henkei
192
153
  # file = File.open('sample.pages')
193
154
  # henkei = Henkei.new file
194
155
  # henkei.stream? #=> true
195
-
156
+ #
196
157
  def stream?
197
- defined? @stream
158
+ !!@stream
198
159
  end
199
160
 
200
161
  # Returns the raw/unparsed content of the Henkei document.
201
162
  #
202
163
  # henkei = Henkei.new 'sample.pages'
203
164
  # henkei.data
204
-
165
+ #
205
166
  def data
206
167
  return @data if defined? @data
207
168
 
@@ -257,6 +218,7 @@ class Henkei
257
218
  # ensure
258
219
  # Henkei.kill_server!
259
220
  # end
221
+ #
260
222
  def self.kill_server!
261
223
  if @@server_pid
262
224
  Process.kill('INT', @@server_pid)
@@ -265,8 +227,60 @@ class Henkei
265
227
  end
266
228
  end
267
229
 
230
+ ### Private class methods
231
+
232
+ # Provide the path to the Java binary
233
+ #
268
234
  def self.java
269
235
  ENV['JAVA_HOME'] ? ENV['JAVA_HOME'] + '/bin/java' : 'java'
270
236
  end
271
237
  private_class_method :java
238
+
239
+ # Internal helper for calling to Tika library directly
240
+ #
241
+ def self.client_read(type, data)
242
+ switch =
243
+ case type
244
+ when :text
245
+ '-t'
246
+ when :html
247
+ '-h'
248
+ when :metadata
249
+ '-m -j'
250
+ when :mimetype
251
+ '-m -j'
252
+ end
253
+
254
+ IO.popen "#{java} -Djava.awt.headless=true -jar #{Henkei::JARPATH} #{switch}", 'r+' do |io|
255
+ io.write data
256
+ io.close_write
257
+ io.read
258
+ end
259
+ end
260
+ private_class_method :client_read
261
+
262
+ # Internal helper for calling to running Tika server
263
+ #
264
+ def self.server_read(_, data)
265
+ s = TCPSocket.new('localhost', @@server_port)
266
+ file = StringIO.new(data, 'r')
267
+
268
+ while 1
269
+ chunk = file.read(65536)
270
+ break unless chunk
271
+ s.write(chunk)
272
+ end
273
+
274
+ # tell Tika that we're done sending data
275
+ s.shutdown(Socket::SHUT_WR)
276
+
277
+ resp = ''
278
+ while 1
279
+ chunk = s.recv(65536)
280
+ break if chunk.empty? || !chunk
281
+ resp << chunk
282
+ end
283
+ resp
284
+ end
285
+ private_class_method :server_read
272
286
  end
@@ -1,3 +1,3 @@
1
1
  class Henkei
2
- VERSION = '1.14.3'
2
+ VERSION = '1.14.4'
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: henkei
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.14.3
4
+ version: 1.14.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erol Fornoles
@@ -9,76 +9,76 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2017-02-26 00:00:00.000000000 Z
12
+ date: 2017-05-10 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: mime-types
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '1.23'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '1.23'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: json
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '1.8'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '1.8'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: bundler
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - ~>
46
+ - - "~>"
47
47
  - !ruby/object:Gem::Version
48
48
  version: '1.3'
49
49
  type: :development
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - ~>
53
+ - - "~>"
54
54
  - !ruby/object:Gem::Version
55
55
  version: '1.3'
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rake
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
- - - '>='
60
+ - - ">="
61
61
  - !ruby/object:Gem::Version
62
62
  version: '0'
63
63
  type: :development
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - '>='
67
+ - - ">="
68
68
  - !ruby/object:Gem::Version
69
69
  version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: rspec
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - ~>
74
+ - - "~>"
75
75
  - !ruby/object:Gem::Version
76
76
  version: '3.5'
77
77
  type: :development
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - ~>
81
+ - - "~>"
82
82
  - !ruby/object:Gem::Version
83
83
  version: '3.5'
84
84
  description: Read text and metadata from files and documents (.doc, .docx, .pages,
@@ -90,9 +90,9 @@ executables: []
90
90
  extensions: []
91
91
  extra_rdoc_files: []
92
92
  files:
93
- - .gitignore
94
- - .rspec
95
- - .travis.yml
93
+ - ".gitignore"
94
+ - ".rspec"
95
+ - ".travis.yml"
96
96
  - Gemfile
97
97
  - LICENSE
98
98
  - NOTICE.txt
@@ -119,17 +119,17 @@ require_paths:
119
119
  - lib
120
120
  required_ruby_version: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - '>='
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
125
  required_rubygems_version: !ruby/object:Gem::Requirement
126
126
  requirements:
127
- - - '>='
127
+ - - ">="
128
128
  - !ruby/object:Gem::Version
129
129
  version: '0'
130
130
  requirements: []
131
131
  rubyforge_project:
132
- rubygems_version: 2.4.6
132
+ rubygems_version: 2.4.8
133
133
  signing_key:
134
134
  specification_version: 4
135
135
  summary: Read text and metadata from files and documents (.doc, .docx, .pages, .odt,