msworddoc-extractor 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/worddoc-extract +48 -0
- data/lib/msworddoc/extractor.rb +376 -0
- data/lib/msworddoc-extractor.rb +1 -0
- data/test/fareast.doc +0 -0
- data/test/lorem.doc +0 -0
- data/test/test_fareast.rb +32 -0
- data/test/test_io.rb +34 -0
- data/test/test_msworddoc.rb +34 -0
- metadata +81 -0
data/bin/worddoc-extract
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'msworddoc-extractor'
|
5
|
+
|
6
|
+
def app(*argv)
|
7
|
+
actions = []
|
8
|
+
|
9
|
+
options = [
|
10
|
+
[ '-d', '--document', 'Main contents (default)', :document ],
|
11
|
+
[ '-w', '--whole', 'Whole text contents', :whole_contents ],
|
12
|
+
[ '-i', '--header', 'Header parts', :header ],
|
13
|
+
[ '-f', '--footnote', 'Footnotes', :footnote ],
|
14
|
+
[ '-e', '--endnote', 'Endnotes', :endnote ],
|
15
|
+
[ '-a', '--annotation', 'Annotations', :annotatation ],
|
16
|
+
[ '-t', '--textbox', 'Text boxes', :textbox ],
|
17
|
+
[ '--header_textbox', 'Header text boxes', :header_textbox ],
|
18
|
+
[ '-m', '--macro', 'Macro part', :marco ],
|
19
|
+
]
|
20
|
+
|
21
|
+
optparse = OptionParser.new do |opt|
|
22
|
+
opt.banner = 'Usage: worddoc-extract [options] <files> ...'
|
23
|
+
|
24
|
+
options.each do |o|
|
25
|
+
action = o.pop
|
26
|
+
opt.on(*o) { actions << action }
|
27
|
+
end
|
28
|
+
|
29
|
+
opt.separator ''
|
30
|
+
opt.on('-h', '--help', 'Show this help') { puts opt; exit }
|
31
|
+
end
|
32
|
+
|
33
|
+
if actions.empty?
|
34
|
+
actions = [ :document ]
|
35
|
+
end
|
36
|
+
|
37
|
+
files = optparse.parse(argv)
|
38
|
+
|
39
|
+
files.each do |file|
|
40
|
+
doc = MSWordDoc::Extractor.load_file(file)
|
41
|
+
actions.each do |action|
|
42
|
+
puts doc.send(action)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
app(*ARGV)
|
48
|
+
|
@@ -0,0 +1,376 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'ole/storage'
|
3
|
+
|
4
|
+
module MSWordDoc
|
5
|
+
module Extractor
|
6
|
+
VERSION = '0.1.0'
|
7
|
+
|
8
|
+
def self.load(file)
|
9
|
+
doc = Essence.new()
|
10
|
+
|
11
|
+
ole = Ole::Storage.open(file)
|
12
|
+
|
13
|
+
doc.load_storage(ole)
|
14
|
+
|
15
|
+
if block_given?
|
16
|
+
begin
|
17
|
+
yield doc
|
18
|
+
ensure
|
19
|
+
doc.close()
|
20
|
+
end
|
21
|
+
|
22
|
+
return
|
23
|
+
end
|
24
|
+
|
25
|
+
return doc
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class Essence
|
30
|
+
PPS_NAME_WORDDOC = 'WordDocument'
|
31
|
+
PPS_NAME_TABLE_TMPL = '%dTable'
|
32
|
+
|
33
|
+
MAGIC_MSWORD = 0xa5ec
|
34
|
+
NFIB_MSWORD6 = 101
|
35
|
+
|
36
|
+
OFFSET_FIB_IDENT = 0x0000
|
37
|
+
OFFSET_FIB_FIB = 0x0002
|
38
|
+
|
39
|
+
OFFSET_FIB_FLAGS = 0x000a
|
40
|
+
OFFSET_FIB_FCCLX = 0x01a2
|
41
|
+
OFFSET_FIB_LCBCLX = 0x01a6
|
42
|
+
|
43
|
+
OFFSET_FIB_FCMIN = 0x0018
|
44
|
+
OFFSET_FIB_FCMAC = 0x001c
|
45
|
+
OFFSET_FIB_CBMAC = 0x0040
|
46
|
+
|
47
|
+
MASK_FIBFLAG_COMPLEX = 0x0004
|
48
|
+
MASK_FIBFLAG_ENCRYPTED = 0x0100
|
49
|
+
MASK_FIBFLAG_WHICHTBLSTM = 0x0200
|
50
|
+
|
51
|
+
LENGTH_CP = 4
|
52
|
+
LENGTH_PCD = 8
|
53
|
+
|
54
|
+
OFFSET_FIB_CCP_MAP = {
|
55
|
+
:Text => 0x004c,
|
56
|
+
:Ftn => 0x0050,
|
57
|
+
:Hdd => 0x0054,
|
58
|
+
:Mcr => 0x0058,
|
59
|
+
:Atn => 0x005c,
|
60
|
+
:Edn => 0x0060,
|
61
|
+
:Txbx => 0x0064,
|
62
|
+
:HdrTxbx => 0x0068,
|
63
|
+
}
|
64
|
+
|
65
|
+
def initialize
|
66
|
+
@flag = {}
|
67
|
+
@ccp = {}
|
68
|
+
@ole = nil
|
69
|
+
end
|
70
|
+
|
71
|
+
def close
|
72
|
+
@ole.close()
|
73
|
+
@ole = nil
|
74
|
+
end
|
75
|
+
|
76
|
+
def load_storage(ole)
|
77
|
+
@ole = ole
|
78
|
+
|
79
|
+
@ole.file.open(PPS_NAME_WORDDOC) do |f|
|
80
|
+
parse_fib(f)
|
81
|
+
end
|
82
|
+
|
83
|
+
name_of_table = PPS_NAME_TABLE_TMPL % (@flag[:fWhichTblStm] ? 1 : 0)
|
84
|
+
@ole.file.open name_of_table do |f|
|
85
|
+
parse_piece_table(f)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def whole_contents(*args)
|
90
|
+
return retrieve_and_filter(0, -1, *args)
|
91
|
+
end
|
92
|
+
|
93
|
+
def document(*args)
|
94
|
+
return retrieve_and_filter(0, @ccp[:Text], *args)
|
95
|
+
end
|
96
|
+
|
97
|
+
def footnote(*args)
|
98
|
+
return retrieve_and_filter(@ccp[:Text], @ccp[:Ftn], *args)
|
99
|
+
end
|
100
|
+
|
101
|
+
def header(*args)
|
102
|
+
skips = [ :Text, :Ftn ]
|
103
|
+
return retrieve_token_and_filter(skips, :Hdd, *args)
|
104
|
+
end
|
105
|
+
|
106
|
+
def macro(*args)
|
107
|
+
skips = [ :Text, :Ftn, :Hdd ]
|
108
|
+
return retrieve_token_and_filter(skips, :Mcr, *args)
|
109
|
+
end
|
110
|
+
|
111
|
+
def annotation(*args)
|
112
|
+
skips = [ :Text, :Ftn, :Hdd, :Mcr ]
|
113
|
+
return retrieve_token_and_filter(skips, :Atn, *args)
|
114
|
+
end
|
115
|
+
|
116
|
+
def endnote(*args)
|
117
|
+
skips = [ :Text, :Ftn, :Hdd, :Mcr, :Atn ]
|
118
|
+
return retrieve_token_and_filter(skips, :Edn, *args)
|
119
|
+
end
|
120
|
+
|
121
|
+
def textbox(*args)
|
122
|
+
skips = [ :Text, :Ftn, :Hdd, :Mcr, :Atn, :Edn ]
|
123
|
+
return retrieve_token_and_filter(skips, :Txbx, *args)
|
124
|
+
end
|
125
|
+
|
126
|
+
def header_textbox(*args)
|
127
|
+
skips = [ :Text, :Ftn, :Hdd, :Mcr, :Atn, :Edn, :Txbx ]
|
128
|
+
return retrieve_token_and_filter(skips, :HdrTxbx, *args)
|
129
|
+
end
|
130
|
+
|
131
|
+
private
|
132
|
+
|
133
|
+
def parse_fib(f)
|
134
|
+
if get_ushort(f, OFFSET_FIB_IDENT) != MAGIC_MSWORD then
|
135
|
+
raise 'Not a Word document'
|
136
|
+
end
|
137
|
+
|
138
|
+
nFib = get_ushort(f, OFFSET_FIB_FIB)
|
139
|
+
if nFib < NFIB_MSWORD6
|
140
|
+
raise 'Unsupported version'
|
141
|
+
end
|
142
|
+
|
143
|
+
flags = get_ushort(f, OFFSET_FIB_FLAGS)
|
144
|
+
|
145
|
+
@flag[:fComplex] = (flags & MASK_FIBFLAG_COMPLEX != 0)
|
146
|
+
|
147
|
+
@flag[:fEncrypted] = (flags & MASK_FIBFLAG_ENCRYPTED != 0)
|
148
|
+
if @flag[:fEncypted]
|
149
|
+
raise 'Encrypted MSWord document file is not supported'
|
150
|
+
end
|
151
|
+
|
152
|
+
@flag[:fWhichTblStm] = (flags & MASK_FIBFLAG_WHICHTBLSTM != 0)
|
153
|
+
|
154
|
+
@fcMin = get_ulong(f, OFFSET_FIB_FCMIN)
|
155
|
+
@fcMac = get_ulong(f, OFFSET_FIB_FCMAC)
|
156
|
+
@cbMac = get_ulong(f, OFFSET_FIB_CBMAC)
|
157
|
+
|
158
|
+
@fcClx = get_ulong(f, OFFSET_FIB_FCCLX)
|
159
|
+
@lcbClx = get_ulong(f, OFFSET_FIB_LCBCLX)
|
160
|
+
|
161
|
+
parse_fib_ccps(f)
|
162
|
+
end
|
163
|
+
|
164
|
+
def parse_fib_ccps(f)
|
165
|
+
OFFSET_FIB_CCP_MAP.each do |key, offset|
|
166
|
+
@ccp[key] = get_ulong(f, offset)
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
def parse_piece_table(f)
|
171
|
+
if @lcbClx <= 0
|
172
|
+
# create pseudo piece table
|
173
|
+
ccpAll = 0
|
174
|
+
OFFSET_FIB_CCP_MAP.each do |key, offset|
|
175
|
+
ccpAll += @ccp[key]
|
176
|
+
end
|
177
|
+
|
178
|
+
@pcds = [
|
179
|
+
{
|
180
|
+
:fc => @fcMin,
|
181
|
+
:cp => 0,
|
182
|
+
:ccp => ccpAll,
|
183
|
+
}
|
184
|
+
]
|
185
|
+
|
186
|
+
return
|
187
|
+
end
|
188
|
+
|
189
|
+
f.pos = @fcClx
|
190
|
+
clx = f.read(@lcbClx)
|
191
|
+
|
192
|
+
while clx.length > 0
|
193
|
+
clxt = clx.slice!(0, 1).unpack('C')[0]
|
194
|
+
break if clxt == 2 # plcfpcd
|
195
|
+
|
196
|
+
if clxt == 1 # grpprl => SKIP
|
197
|
+
skip = clx.slice!(0, 2).unpack('v')[0]
|
198
|
+
|
199
|
+
clx.slice!(0, skip)
|
200
|
+
else
|
201
|
+
raise 'Unknown CLX block'
|
202
|
+
end
|
203
|
+
end
|
204
|
+
raise 'PCDs not found' unless clx.length > 0
|
205
|
+
|
206
|
+
length = clx.slice!(0, 4).unpack('V')[0]
|
207
|
+
|
208
|
+
n = (length - LENGTH_CP) / (LENGTH_CP + LENGTH_PCD)
|
209
|
+
|
210
|
+
cps = []
|
211
|
+
(n+1).times do
|
212
|
+
cps << clx.slice!(0, LENGTH_CP).unpack('V')[0]
|
213
|
+
end
|
214
|
+
|
215
|
+
@pcds = []
|
216
|
+
1.upto(n) do |i|
|
217
|
+
pcd_data = clx.slice!(0, LENGTH_PCD)
|
218
|
+
|
219
|
+
fc = pcd_data.slice(2, 4).unpack('V')[0]
|
220
|
+
|
221
|
+
@pcds << {
|
222
|
+
:fc => fc,
|
223
|
+
:cp => cps[i - 1],
|
224
|
+
:ccp => cps[i] - cps[i - 1]
|
225
|
+
}
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
def retrieve_substring(f, offset, length = -1)
|
230
|
+
i = 0
|
231
|
+
while i < @pcds.length
|
232
|
+
if @pcds[i][:cp] > offset then break end
|
233
|
+
|
234
|
+
i += 1
|
235
|
+
end
|
236
|
+
i -= 1
|
237
|
+
raise 'could not find suitable heading piece' unless i >= 0
|
238
|
+
|
239
|
+
output = ""
|
240
|
+
while length > 0 || length < 0
|
241
|
+
pcd = @pcds[i]
|
242
|
+
|
243
|
+
len = length
|
244
|
+
if pcd[:ccp] < len || len < 0
|
245
|
+
len = pcd[:ccp]
|
246
|
+
end
|
247
|
+
|
248
|
+
if pcd[:fc] & 0x40000000 != 0
|
249
|
+
# cp1252
|
250
|
+
fc = (pcd[:fc] ^ 0x40000000) >> 1
|
251
|
+
fc += offset
|
252
|
+
offset = 0
|
253
|
+
|
254
|
+
f.pos = fc
|
255
|
+
output << convert_from_cp1252(f.read(len))
|
256
|
+
else
|
257
|
+
# UTF-16LE
|
258
|
+
fc = pcd[:fc]
|
259
|
+
fc += offset * 2
|
260
|
+
offset = 0
|
261
|
+
|
262
|
+
f.pos = fc
|
263
|
+
output << convert_from_utf16le(f.read(len * 2))
|
264
|
+
end
|
265
|
+
|
266
|
+
if length >= 0
|
267
|
+
length -= len
|
268
|
+
end
|
269
|
+
|
270
|
+
i += 1
|
271
|
+
break if i >= @pcds.length
|
272
|
+
end
|
273
|
+
|
274
|
+
return output
|
275
|
+
end
|
276
|
+
|
277
|
+
def get_ushort(f, pos)
|
278
|
+
f.pos = pos
|
279
|
+
return f.read(2).unpack('v')[0]
|
280
|
+
end
|
281
|
+
|
282
|
+
def get_ulong(f, pos)
|
283
|
+
f.pos = pos
|
284
|
+
return f.read(4).unpack('V')[0]
|
285
|
+
end
|
286
|
+
|
287
|
+
def retrieve_token_and_filter(skip_tokens, target, *args)
|
288
|
+
skip = skip_tokens.inject(0) {|sum, key| sum + @ccp[key] }
|
289
|
+
return retrieve_and_filter(skip, @ccp[target], *args)
|
290
|
+
end
|
291
|
+
|
292
|
+
def retrieve_and_filter(offset, length, *args)
|
293
|
+
opts = Hash[*args]
|
294
|
+
|
295
|
+
string = ""
|
296
|
+
@ole.file.open PPS_NAME_WORDDOC do |f|
|
297
|
+
string = retrieve_substring(f, offset, length)
|
298
|
+
end
|
299
|
+
|
300
|
+
if ! opts[:raw]
|
301
|
+
return format_into_plain(string)
|
302
|
+
end
|
303
|
+
|
304
|
+
return string
|
305
|
+
end
|
306
|
+
|
307
|
+
CHARMAP = {
|
308
|
+
"\x0d" => "\n", # ASIS: Line Feed
|
309
|
+
"\x09" => "\t", # ASIS: Tab
|
310
|
+
|
311
|
+
"\x0d" => "\n", # Paragraph ends; \n + U+2029?
|
312
|
+
|
313
|
+
"\x0b" => "\n", # Hard line breaks
|
314
|
+
|
315
|
+
"\x2d" => "\x2d", # ASIS: Breaking hyphens; U+2010?
|
316
|
+
"\x1f" => "\u{00ad}", # Non-required hyphens (into Soft hyphen)
|
317
|
+
"\x1e" => "\u{2011}", # Non-breaking hyphens
|
318
|
+
|
319
|
+
"\xa0" => "\xa0", # ASIS: Non-breaking-spaces
|
320
|
+
|
321
|
+
"\x0c" => "\x0c", # ASIS: Page breaks or Section marks
|
322
|
+
|
323
|
+
"\x0e" => "\x0e", # ASIS: Column breaks
|
324
|
+
|
325
|
+
"\x13" => "", # Field begin mark
|
326
|
+
"\x15" => "", # Field end mark
|
327
|
+
"\x14" => "", # Field separator
|
328
|
+
|
329
|
+
"\x07" => "\t", # Cell mark or Row mark
|
330
|
+
}
|
331
|
+
|
332
|
+
def format_into_plain(text)
|
333
|
+
text = text.gsub(/([\x07]*)[\x07]{2}/, '\1\n') \
|
334
|
+
.gsub(/([\x00-\x1f])/) { CHARMAP[$1] || "" }
|
335
|
+
|
336
|
+
return text
|
337
|
+
end
|
338
|
+
|
339
|
+
if defined?(Encoding)
|
340
|
+
# for Ruby 1.9+
|
341
|
+
|
342
|
+
def convert_from_cp1252(str)
|
343
|
+
@enc_utf8 ||= Encoding.find('UTF-8')
|
344
|
+
@enc_cp1252 ||= Encoding.find('Windows-1252')
|
345
|
+
return str.encode(@enc_utf8, @enc_cp1252)
|
346
|
+
end
|
347
|
+
|
348
|
+
def convert_from_utf16le(str)
|
349
|
+
@enc_utf8 ||= Encoding.find('UTF-8')
|
350
|
+
@enc_utf16 ||= Encoding.find('UTF-16LE')
|
351
|
+
return str.encode(@enc_utf8, @enc_utf16)
|
352
|
+
end
|
353
|
+
else
|
354
|
+
# for Ruby 1.8
|
355
|
+
require 'nkf'
|
356
|
+
|
357
|
+
def convert_from_cp1252(str)
|
358
|
+
return NKF.nkf(dest_encoding() + ' -W', str)
|
359
|
+
end
|
360
|
+
|
361
|
+
def convert_from_utf16le(str)
|
362
|
+
return NKF.nkf(dest_encoding() + ' -W16L0', str)
|
363
|
+
end
|
364
|
+
|
365
|
+
def dest_encoding
|
366
|
+
case $KCODE
|
367
|
+
when /^E/i then '-e'
|
368
|
+
when /^S/i then '-s'
|
369
|
+
when /^U/i then '-w'
|
370
|
+
else '-w'
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
end
|
376
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require File.expand_path('../msworddoc/extractor', __FILE__)
|
data/test/fareast.doc
ADDED
Binary file
|
data/test/lorem.doc
ADDED
Binary file
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'rubygems'
|
3
|
+
require 'test/unit'
|
4
|
+
begin
|
5
|
+
require 'redgreen'
|
6
|
+
rescue LoadError
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'msworddoc-extractor'
|
10
|
+
|
11
|
+
class TestFareast < Test::Unit::TestCase
|
12
|
+
def setup
|
13
|
+
@doc = MSWordDoc::Extractor.load('test/fareast.doc')
|
14
|
+
end
|
15
|
+
|
16
|
+
def teardown
|
17
|
+
@doc.close
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_document
|
21
|
+
assert_match %r{ 色は匂へど \s+ 散りぬるを }xm, @doc.document, "document"
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_header
|
25
|
+
assert_match %r{ いろは歌 }xm, @doc.header, "header"
|
26
|
+
end
|
27
|
+
|
28
|
+
def test_footnote
|
29
|
+
assert_match %r{ いろはにほへとちりぬるを }xm, @doc.footnote, "footnote"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/test/test_io.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
begin
|
4
|
+
require 'redgreen'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'msworddoc-extractor'
|
9
|
+
require 'stringio'
|
10
|
+
|
11
|
+
class TestIO < Test::Unit::TestCase
|
12
|
+
def test_fileio
|
13
|
+
open('test/lorem.doc', 'r') do |file|
|
14
|
+
MSWordDoc::Extractor.load(file) do |doc|
|
15
|
+
assert_match %r{ \A Lorem \s+ ipsum \s+ }xm, doc.document, "document"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_stringio
|
21
|
+
data = ''
|
22
|
+
open('test/lorem.doc', 'r') do |file|
|
23
|
+
data = file.read()
|
24
|
+
end
|
25
|
+
|
26
|
+
io = StringIO.new(data, 'r')
|
27
|
+
|
28
|
+
MSWordDoc::Extractor.load(io) do |doc|
|
29
|
+
assert_match %r{ \A Lorem \s+ ipsum \s+ }xm, doc.document, "document"
|
30
|
+
end
|
31
|
+
|
32
|
+
io.close()
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
begin
|
4
|
+
require 'redgreen'
|
5
|
+
rescue LoadError
|
6
|
+
end
|
7
|
+
|
8
|
+
require 'msworddoc-extractor'
|
9
|
+
|
10
|
+
class TestMsworddoc < Test::Unit::TestCase
|
11
|
+
def setup
|
12
|
+
@doc = MSWordDoc::Extractor.load('test/lorem.doc')
|
13
|
+
end
|
14
|
+
|
15
|
+
def teardown
|
16
|
+
@doc.close
|
17
|
+
end
|
18
|
+
|
19
|
+
def test_document
|
20
|
+
assert_match %r{ \A Lorem \s+ ipsum \s+ }xm, @doc.document, "document"
|
21
|
+
assert_match %r{ \s+ Duis \s+ aute \s+ }xm, @doc.document, "document"
|
22
|
+
assert_match %r{ \s+ id \s+ est \s+ laborum[.] }xm, @doc.document, "document"
|
23
|
+
end
|
24
|
+
|
25
|
+
def test_header
|
26
|
+
assert_match %r{ \A Lorem \s+ ipsum \s+ ... }xm, @doc.header, "header"
|
27
|
+
end
|
28
|
+
|
29
|
+
def test_footnote
|
30
|
+
assert_match %r{ The \s+ quick \s+ brown \s+ fox \s+ }xm, @doc.footnote, "footnote"
|
31
|
+
assert_match %r{ \s+ jumps \s+ over \s+ the \s+ lazy \s+ dog[.] }xm, @doc.footnote, "footnote"
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
metadata
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: msworddoc-extractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- ITO Nobuaki
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-05-11 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: ruby-ole
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Extract text contents from Microsoft Word Document.
|
31
|
+
email:
|
32
|
+
- daydream.trippers@gmail.com
|
33
|
+
executables:
|
34
|
+
- worddoc-extract
|
35
|
+
extensions: []
|
36
|
+
extra_rdoc_files: []
|
37
|
+
files:
|
38
|
+
- bin/worddoc-extract
|
39
|
+
- lib/msworddoc-extractor.rb
|
40
|
+
- lib/msworddoc/extractor.rb
|
41
|
+
- test/test_msworddoc.rb
|
42
|
+
- test/test_fareast.rb
|
43
|
+
- test/test_io.rb
|
44
|
+
- test/lorem.doc
|
45
|
+
- test/fareast.doc
|
46
|
+
homepage: ''
|
47
|
+
licenses: []
|
48
|
+
post_install_message:
|
49
|
+
rdoc_options: []
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
segments:
|
59
|
+
- 0
|
60
|
+
hash: 1216945514845922976
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
segments:
|
68
|
+
- 0
|
69
|
+
hash: 1216945514845922976
|
70
|
+
requirements: []
|
71
|
+
rubyforge_project:
|
72
|
+
rubygems_version: 1.8.23
|
73
|
+
signing_key:
|
74
|
+
specification_version: 3
|
75
|
+
summary: Extract text contents from Microsoft Word Document
|
76
|
+
test_files:
|
77
|
+
- test/test_msworddoc.rb
|
78
|
+
- test/test_fareast.rb
|
79
|
+
- test/test_io.rb
|
80
|
+
- test/lorem.doc
|
81
|
+
- test/fareast.doc
|