simple_cfb 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +201 -0
- data/README.md +146 -0
- data/Rakefile +17 -0
- data/lib/simple_cfb/simple_cfb.rb +1256 -0
- data/lib/simple_cfb/version.rb +15 -0
- data/lib/simple_cfb.rb +4 -0
- metadata +145 -0
@@ -0,0 +1,1256 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'ostruct'
|
4
|
+
require 'date'
|
5
|
+
require 'stringio'
|
6
|
+
require 'active_support/core_ext/object/blank.rb'
|
7
|
+
require 'active_support/core_ext/object/try.rb'
|
8
|
+
|
9
|
+
# Ported from https://github.com/SheetJS/js-cfb.
|
10
|
+
#
|
11
|
+
# File data is added with #add then, when finished, the entire blob of CFB
|
12
|
+
# data is generated in one go with #write. Progressive creation is impossible
|
13
|
+
# as the CFB file requires information on file sizes and directory entries at
|
14
|
+
# the start of output, so all of that must be known beforehand.
|
15
|
+
#
|
16
|
+
# Files can be parsed into a new object with #parse!, then #file_index and
|
17
|
+
# #full_paths examined to extract the parsed CFB container components.
|
18
|
+
#
|
19
|
+
# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/
|
20
|
+
#
|
21
|
+
# This Ruby port tries to be equivalent to the JavaScript original, but in so
|
22
|
+
# doing there are likely additional bugs and I've omitted anything that wasn't
|
23
|
+
# needed for encrypted OOXML writing and reading.
|
24
|
+
#
|
25
|
+
class SimpleCfb
|
26
|
+
|
27
|
+
# CFB miscellaneous
|
28
|
+
#
|
29
|
+
MSSZ = 64 # Mini Sector Size = 1<<6
|
30
|
+
MSCSZ = 4096 # Mini Stream Cutoff Size
|
31
|
+
|
32
|
+
# Convenience accessor to binary-encoded NUL byte.
|
33
|
+
#
|
34
|
+
NUL = String.new("\x00", encoding: 'ASCII-8BIT')
|
35
|
+
|
36
|
+
# 2.1 Compound File Sector Numbers and Types
|
37
|
+
#
|
38
|
+
FREESECT = -1
|
39
|
+
ENDOFCHAIN = -2
|
40
|
+
FATSECT = -3
|
41
|
+
DIFSECT = -4
|
42
|
+
MAXREGSECT = -6
|
43
|
+
|
44
|
+
# Compound File Header
|
45
|
+
#
|
46
|
+
HEADER_SIGNATURE = String.new("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", encoding: 'ASCII-8BIT')
|
47
|
+
HEADER_CLSID = String.new("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", encoding: 'ASCII-8BIT')
|
48
|
+
HEADER_MINOR_VERSION = String.new("\x3e\x00", encoding: 'ASCII-8BIT')
|
49
|
+
MAXREGSID = -6
|
50
|
+
NOSTREAM = -1
|
51
|
+
STREAM = 2
|
52
|
+
|
53
|
+
# 2.6.1 Compound File Directory Entry
|
54
|
+
#
|
55
|
+
ENTRY_TYPES = ['unknown', 'storage', 'stream', 'lockbytes', 'property', 'root']
|
56
|
+
|
57
|
+
# Initial seed filename
|
58
|
+
#
|
59
|
+
SEED_FILENAME = "\u0001Sh33tJ5"
|
60
|
+
|
61
|
+
# Used internally for parser.
|
62
|
+
#
|
63
|
+
class SectorList < Array
|
64
|
+
attr_accessor :fat_addrs
|
65
|
+
attr_accessor :ssz
|
66
|
+
end
|
67
|
+
|
68
|
+
# =========================================================================
|
69
|
+
# PUBLIC CLASS INTERFACE
|
70
|
+
# =========================================================================
|
71
|
+
|
72
|
+
# Returns +true+ if the executing computer is little-endian natively,
|
73
|
+
# else +false+.
|
74
|
+
#
|
75
|
+
def self.host_is_little_endian?
|
76
|
+
[42].pack('l').bytes[0] == 42
|
77
|
+
end
|
78
|
+
|
79
|
+
# Treat an input ASCII-8BIT encoded string as 4 bytes and from this parse and
|
80
|
+
# return an unsigned 32-bit little-endian integer.
|
81
|
+
#
|
82
|
+
# +input+:: ASCII-8BIT encoded string including 4 byte sequence
|
83
|
+
# +index+:: Index into +input+ to start reading bytes (default 0)
|
84
|
+
#
|
85
|
+
def self.get_uint32le(input, index = 0)
|
86
|
+
data = input.slice(index, 4)
|
87
|
+
data = data.reverse() unless self.host_is_little_endian?
|
88
|
+
|
89
|
+
data.unpack('L').first
|
90
|
+
end
|
91
|
+
|
92
|
+
# Treat an input ASCII-8BIT encoded string as 4 bytes and from this parse and
|
93
|
+
# return a signed 32-bit little-endian integer.
|
94
|
+
#
|
95
|
+
# +input+:: ASCII-8BIT encoded string including 4 byte sequence
|
96
|
+
# +index+:: Index into +input+ to start reading bytes (default 0)
|
97
|
+
#
|
98
|
+
def self.get_int32le(input, index = 0)
|
99
|
+
data = input.slice(index, 4)
|
100
|
+
data = data.reverse() unless self.host_is_little_endian?
|
101
|
+
|
102
|
+
data.unpack('l').first
|
103
|
+
end
|
104
|
+
|
105
|
+
# Parse a ctime/mtime 8-byte sequence (4 16-bit little endian pairs) into a
|
106
|
+
# returned Ruby Time object, or +nil+ if the values are all zero.
|
107
|
+
#
|
108
|
+
# +data+:: ASCII-8BIT encoded string, 8 bytes long.
|
109
|
+
#
|
110
|
+
def self.get_time(data)
|
111
|
+
high = self.get_uint32le(data, 4)
|
112
|
+
low = self.get_uint32le(data, 0)
|
113
|
+
|
114
|
+
return nil if high.zero? && low.zero?
|
115
|
+
|
116
|
+
high = (high / 1e7) * 2.pow(32)
|
117
|
+
low = (low / 1e7)
|
118
|
+
|
119
|
+
return Time.at(high + low - 11644473600).utc
|
120
|
+
end
|
121
|
+
|
122
|
+
# =========================================================================
|
123
|
+
# PUBLIC INSTANCE INTERFACE
|
124
|
+
# =========================================================================
|
125
|
+
|
126
|
+
attr_accessor :full_paths, :file_index
|
127
|
+
|
128
|
+
def initialize
|
129
|
+
self.reinit()
|
130
|
+
end
|
131
|
+
|
132
|
+
# Add a file entry. Supports only root filenames only. File must not be
|
133
|
+
# added already.
|
134
|
+
#
|
135
|
+
# +name+:: Filename, e.g. "Foo", in your preferred string encoding
|
136
|
+
# +content+:: Mandatory ASCII-8BIT encoded string containing file data
|
137
|
+
#
|
138
|
+
def add(name, content)
|
139
|
+
self.reinit()
|
140
|
+
|
141
|
+
fpath = self.full_paths[0]
|
142
|
+
|
143
|
+
if name.slice(0, fpath.size) == fpath
|
144
|
+
fpath = name
|
145
|
+
else
|
146
|
+
fpath += '/' unless fpath.end_with?('/')
|
147
|
+
fpath = (fpath + name).gsub('//', '/')
|
148
|
+
end
|
149
|
+
|
150
|
+
file = OpenStruct.new({name: filename(name), type: 2, content: content, size: content.bytesize})
|
151
|
+
|
152
|
+
self.file_index << file
|
153
|
+
self.full_paths << fpath
|
154
|
+
|
155
|
+
rebuild(force_gc: true)
|
156
|
+
|
157
|
+
return file
|
158
|
+
end
|
159
|
+
|
160
|
+
# Compile and return the CFB file data.
|
161
|
+
#
|
162
|
+
def write
|
163
|
+
|
164
|
+
# Commented out for now, because we prefer parity with the JS code for
|
165
|
+
# test verification purposes. The overhead seems minimal.
|
166
|
+
#
|
167
|
+
# # Get rid of the seed file if it's still present and we seem to have
|
168
|
+
# # more file entries than the root directory and seed entry.
|
169
|
+
# #
|
170
|
+
# seed_leaf = "/#{SEED_FILENAME}"
|
171
|
+
# seed_index = self.full_paths.find_index do | path |
|
172
|
+
# path.end_with?(seed_leaf)
|
173
|
+
# end
|
174
|
+
#
|
175
|
+
# unless seed_index.nil? || self.file_index.size < 3
|
176
|
+
# self.file_index.delete_at(seed_index)
|
177
|
+
# self.full_paths.delete_at(seed_index)
|
178
|
+
# end
|
179
|
+
#
|
180
|
+
# self.rebuild(force_gc: true)
|
181
|
+
self.rebuild(force_gc: false)
|
182
|
+
|
183
|
+
mini_size = 0
|
184
|
+
fat_size = 0
|
185
|
+
|
186
|
+
0.upto(self.file_index.size - 1) do | i |
|
187
|
+
flen = self.file_index[i]&.content&.bytesize
|
188
|
+
next if flen.nil? || flen.zero?
|
189
|
+
|
190
|
+
if flen < 0x1000
|
191
|
+
mini_size += (flen + 0x3F) >> 6
|
192
|
+
else
|
193
|
+
fat_size += (flen + 0x01FF) >> 9
|
194
|
+
end
|
195
|
+
end
|
196
|
+
|
197
|
+
dir_cnt = (self.full_paths.size + 3) >> 2
|
198
|
+
mini_cnt = (mini_size + 7) >> 3
|
199
|
+
mfat_cnt = (mini_size + 0x7F) >> 7
|
200
|
+
fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt
|
201
|
+
fat_cnt = (fat_base + 0x7F) >> 7
|
202
|
+
difat_cnt = fat_cnt <= 109 ? 0 : ((fat_cnt - 109).to_f / 0x7F).ceil()
|
203
|
+
|
204
|
+
while (((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt)
|
205
|
+
fat_cnt += 1
|
206
|
+
difat_cnt = fat_cnt <= 109 ? 0 : ((fat_cnt - 109).to_f / 0x7F).ceil()
|
207
|
+
end
|
208
|
+
|
209
|
+
el = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]
|
210
|
+
|
211
|
+
self.file_index[0].size = mini_size << 6
|
212
|
+
self.file_index[0].start = el[0] + el[1] + el[2] + el[3] + el[4] + el[5]
|
213
|
+
|
214
|
+
el[7] = el[0] + el[1] + el[2] + el[3] + el[4] + el[5] + ((el[6] + 7) >> 3)
|
215
|
+
|
216
|
+
o = String.new(encoding: 'ASCII-8BIT')
|
217
|
+
|
218
|
+
o << HEADER_SIGNATURE
|
219
|
+
o << NUL * 2 * 8
|
220
|
+
o << write_shift(2, 0x003E)
|
221
|
+
o << write_shift(2, 0x0003)
|
222
|
+
o << write_shift(2, 0xFFFE)
|
223
|
+
o << write_shift(2, 0x0009)
|
224
|
+
o << write_shift(2, 0x0006)
|
225
|
+
o << NUL * 2 * 3
|
226
|
+
|
227
|
+
o << write_shift( 4, 0)
|
228
|
+
o << write_shift( 4, el[2])
|
229
|
+
o << write_shift( 4, el[0] + el[1] + el[2] + el[3] - 1)
|
230
|
+
o << write_shift( 4, 0)
|
231
|
+
o << write_shift( 4, 1<<12)
|
232
|
+
o << write_shift( 4, (el[3].blank? || el[3].zero?) ? ENDOFCHAIN : el[0] + el[1] + el[2] - 1)
|
233
|
+
o << write_shift( 4, el[3])
|
234
|
+
o << write_shift(-4, (el[1].blank? || el[1].zero?) ? ENDOFCHAIN : el[0] - 1)
|
235
|
+
o << write_shift( 4, el[1])
|
236
|
+
|
237
|
+
i = 0
|
238
|
+
t = 0
|
239
|
+
|
240
|
+
while i < 109
|
241
|
+
o << write_shift(-4, i < el[2] ? el[1] + i : -1)
|
242
|
+
i += 1
|
243
|
+
end
|
244
|
+
|
245
|
+
unless el[1].blank? || el[1].zero?
|
246
|
+
t = 0
|
247
|
+
while t < el[1]
|
248
|
+
while i < 236 + t * 127
|
249
|
+
o << write_shift(-4, i < el[2] ? el[1] + i : -1)
|
250
|
+
i += 1
|
251
|
+
end
|
252
|
+
|
253
|
+
o << write_shift(-4, t == el[1] - 1 ? ENDOFCHAIN : t + 1)
|
254
|
+
t += 1
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
chainit = Proc.new do | w |
|
259
|
+
t += w
|
260
|
+
|
261
|
+
while i < t - 1
|
262
|
+
o << write_shift(-4, i + 1)
|
263
|
+
i += 1
|
264
|
+
end
|
265
|
+
|
266
|
+
unless w.blank? || w.zero?
|
267
|
+
i += 1
|
268
|
+
o << write_shift(-4, ENDOFCHAIN)
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
i = 0
|
273
|
+
t = el[1]
|
274
|
+
|
275
|
+
while i < t
|
276
|
+
o << write_shift(-4, DIFSECT)
|
277
|
+
i += 1
|
278
|
+
end
|
279
|
+
|
280
|
+
t += el[2]
|
281
|
+
|
282
|
+
while i < t
|
283
|
+
o << write_shift(-4, FATSECT)
|
284
|
+
i += 1
|
285
|
+
end
|
286
|
+
|
287
|
+
chainit.call(el[3])
|
288
|
+
chainit.call(el[4])
|
289
|
+
|
290
|
+
j = 0
|
291
|
+
flen = 0
|
292
|
+
file = self.file_index[0]
|
293
|
+
|
294
|
+
while j < self.file_index.size
|
295
|
+
file = self.file_index[j]
|
296
|
+
j += 1
|
297
|
+
|
298
|
+
next if file.content.nil?
|
299
|
+
|
300
|
+
flen = file.content.bytesize
|
301
|
+
next if flen < 0x1000
|
302
|
+
|
303
|
+
file.start = t
|
304
|
+
chainit.call((flen + 0x01FF) >> 9)
|
305
|
+
end
|
306
|
+
|
307
|
+
chainit.call((el[6] + 7) >> 3)
|
308
|
+
|
309
|
+
while o.size & 0x1FF != 0
|
310
|
+
o << write_shift(-4, ENDOFCHAIN)
|
311
|
+
end
|
312
|
+
|
313
|
+
t = i = j = 0
|
314
|
+
|
315
|
+
while j < self.file_index.size do
|
316
|
+
file = self.file_index[j]
|
317
|
+
j += 1
|
318
|
+
|
319
|
+
next if file.content.nil?
|
320
|
+
|
321
|
+
flen = file.content.bytesize
|
322
|
+
next if flen == 0 || flen >= 0x1000
|
323
|
+
|
324
|
+
file.start = t
|
325
|
+
chainit.call((flen + 0x3F) >> 6)
|
326
|
+
end
|
327
|
+
|
328
|
+
while o.size & 0x1FF != 0
|
329
|
+
o << write_shift(-4, ENDOFCHAIN)
|
330
|
+
end
|
331
|
+
|
332
|
+
i = 0
|
333
|
+
|
334
|
+
while i < (el[4] << 2) do
|
335
|
+
nm = self.full_paths[i]
|
336
|
+
|
337
|
+
if nm.blank?
|
338
|
+
0.upto(16) { o << write_shift(4, 0) } # Remember, #upto is inclusive -> *17* words
|
339
|
+
0.upto(2 ) { o << write_shift(4, -1) }
|
340
|
+
0.upto(11) { o << write_shift(4, 0) }
|
341
|
+
|
342
|
+
i += 1
|
343
|
+
next # NOTE EARLY LOOP RESTART
|
344
|
+
end
|
345
|
+
|
346
|
+
file = self.file_index[i]
|
347
|
+
|
348
|
+
if i.zero?
|
349
|
+
file.start = file.size.blank? || file.size.zero? ? ENDOFCHAIN : file.start - 1;
|
350
|
+
end
|
351
|
+
|
352
|
+
u_nm = file.name
|
353
|
+
u_nm = u_nm[0...32] if u_nm.size > 32
|
354
|
+
|
355
|
+
flen = 2 * (u_nm.size + 1)
|
356
|
+
|
357
|
+
o << write_shift(64, u_nm, 'utf16le')
|
358
|
+
o << write_shift(2, flen)
|
359
|
+
o << write_shift(1, file.type)
|
360
|
+
o << write_shift(1, file.color)
|
361
|
+
o << write_shift(-4, file.L)
|
362
|
+
o << write_shift(-4, file.R)
|
363
|
+
o << write_shift(-4, file.C)
|
364
|
+
|
365
|
+
if file.clsid.blank?
|
366
|
+
j = 0
|
367
|
+
while j < 4
|
368
|
+
o << write_shift(4, 0)
|
369
|
+
j += 1
|
370
|
+
end
|
371
|
+
else
|
372
|
+
o << file.clsid
|
373
|
+
end
|
374
|
+
|
375
|
+
o << write_shift(4, file.state.blank? || file.state.zero? ? 0 : file.state)
|
376
|
+
o << write_shift(4, 0)
|
377
|
+
o << write_shift(4, 0)
|
378
|
+
o << write_shift(4, 0)
|
379
|
+
o << write_shift(4, 0)
|
380
|
+
o << write_shift(4, file.start)
|
381
|
+
o << write_shift(4, file.size)
|
382
|
+
o << write_shift(4, 0)
|
383
|
+
|
384
|
+
i += 1
|
385
|
+
end
|
386
|
+
|
387
|
+
i = 1
|
388
|
+
|
389
|
+
while i < self.file_index.size do
|
390
|
+
file = self.file_index[i]
|
391
|
+
|
392
|
+
if file.size.present? && file.size >= 0x1000
|
393
|
+
aligned_size = (file.start + 1) << 9
|
394
|
+
while (o.size < aligned_size) do; o << 0x00; end
|
395
|
+
|
396
|
+
o << file.content
|
397
|
+
while (o.size % 512 != 0) do; o << 0x00; end
|
398
|
+
end
|
399
|
+
|
400
|
+
i += 1
|
401
|
+
end
|
402
|
+
|
403
|
+
i = 1
|
404
|
+
|
405
|
+
while i < self.file_index.size do
|
406
|
+
file = self.file_index[i]
|
407
|
+
|
408
|
+
if file.size.present? && file.size > 0 && file.size < 0x1000
|
409
|
+
o << file.content
|
410
|
+
while (o.size % 64 != 0) do; o << 0x00; end
|
411
|
+
end
|
412
|
+
|
413
|
+
i += 1
|
414
|
+
end
|
415
|
+
|
416
|
+
while (o.size < el[7] << 9) do; o << 0x00; end
|
417
|
+
|
418
|
+
return o
|
419
|
+
end # "def write"
|
420
|
+
|
421
|
+
# Parses an input file into this object, allowing you to extract individual
|
422
|
+
# files thereafter via #read.
|
423
|
+
#
|
424
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
425
|
+
# which will therefore have advanced when the method returns.
|
426
|
+
#
|
427
|
+
def parse!(file)
|
428
|
+
raise "CFB corrupt - file size < 512 bytes" if file.size < 512
|
429
|
+
|
430
|
+
mver = 3
|
431
|
+
ssz = 512
|
432
|
+
nmfs = 0 # number of mini FAT sectors
|
433
|
+
difat_sec_cnt = 0
|
434
|
+
dir_start = 0
|
435
|
+
minifat_start = 0
|
436
|
+
difat_start = 0
|
437
|
+
fat_addrs = [] # locations of FAT sectors
|
438
|
+
|
439
|
+
# [MS-CFB] 2.2 Compound File Header
|
440
|
+
# Check major version
|
441
|
+
#
|
442
|
+
major, minor = self.check_get_mver(file)
|
443
|
+
|
444
|
+
if major == 3
|
445
|
+
ssz = 512
|
446
|
+
elsif major == 4
|
447
|
+
ssz = 4096
|
448
|
+
elsif major == 0 && minor == 0
|
449
|
+
raise 'Zip contents are not supported'
|
450
|
+
else
|
451
|
+
raise "Major version: Only 3 or 4 is supported; #{mver} encountered"
|
452
|
+
end
|
453
|
+
|
454
|
+
self.check_shifts(file, major)
|
455
|
+
|
456
|
+
# Number of Directory Sectors
|
457
|
+
#
|
458
|
+
dir_cnt = self.read_shift(file, 4, 'i')
|
459
|
+
raise "Directory sectors: Expected 0, saw #{dir_cnt}" if major == 3 && dir_cnt != 0
|
460
|
+
|
461
|
+
# Number of FAT Sectors
|
462
|
+
#
|
463
|
+
file.seek(file.pos + 4)
|
464
|
+
|
465
|
+
# First Directory Sector Location
|
466
|
+
#
|
467
|
+
dir_start = self.read_shift(file, 4, 'i')
|
468
|
+
|
469
|
+
# Transaction Signature
|
470
|
+
#
|
471
|
+
file.seek(file.pos + 4)
|
472
|
+
|
473
|
+
# Mini Stream Cutoff Size
|
474
|
+
#
|
475
|
+
self.check_field(file, "\x00\x10\x00\x00", 'Mini stream cutoff size')
|
476
|
+
|
477
|
+
# First Mini FAT Sector Location
|
478
|
+
#
|
479
|
+
minifat_start = self.read_shift(file, 4, 'i')
|
480
|
+
|
481
|
+
# Number of Mini FAT Sectors
|
482
|
+
#
|
483
|
+
nmfs = self.read_shift(file, 4, 'i')
|
484
|
+
|
485
|
+
# First DIFAT sector location
|
486
|
+
#
|
487
|
+
difat_start = self.read_shift(file, 4, 'i')
|
488
|
+
|
489
|
+
# Number of DIFAT Sectors
|
490
|
+
#
|
491
|
+
difat_sec_cnt = self.read_shift(file, 4, 'i')
|
492
|
+
|
493
|
+
# Grab FAT Sector Locations
|
494
|
+
#
|
495
|
+
q = -1
|
496
|
+
j = 0
|
497
|
+
|
498
|
+
while (j < 109) # 109 = (512 - file.pos) >> 2
|
499
|
+
q = self.read_shift(file, 4, 'i')
|
500
|
+
break if q < 0
|
501
|
+
fat_addrs[j] = q
|
502
|
+
j += 1
|
503
|
+
end
|
504
|
+
|
505
|
+
# Break the file up into sectors, skipping the file header of 'ssz' size.
|
506
|
+
#
|
507
|
+
sectors = []
|
508
|
+
file.seek(ssz)
|
509
|
+
|
510
|
+
while ! file.eof?
|
511
|
+
sectors << file.read(ssz)
|
512
|
+
end
|
513
|
+
|
514
|
+
self.sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs)
|
515
|
+
|
516
|
+
# Chains
|
517
|
+
#
|
518
|
+
sector_list = self.make_sector_list(sectors, dir_start, fat_addrs, ssz)
|
519
|
+
sector_list[dir_start].name = '!Directory'
|
520
|
+
|
521
|
+
if nmfs > 0 && minifat_start != ENDOFCHAIN
|
522
|
+
sector_list[minifat_start].name = '!MiniFAT'
|
523
|
+
end
|
524
|
+
|
525
|
+
sector_list[fat_addrs[0]].name = '!FAT'
|
526
|
+
sector_list.fat_addrs = fat_addrs
|
527
|
+
sector_list.ssz = ssz
|
528
|
+
|
529
|
+
# [MS-CFB] 2.6.1 Compound File Directory Entry
|
530
|
+
#
|
531
|
+
files = {}
|
532
|
+
paths = []
|
533
|
+
|
534
|
+
self.full_paths = []
|
535
|
+
self.file_index = []
|
536
|
+
self.read_directory(
|
537
|
+
dir_start,
|
538
|
+
sector_list,
|
539
|
+
sectors,
|
540
|
+
paths,
|
541
|
+
nmfs,
|
542
|
+
files,
|
543
|
+
minifat_start
|
544
|
+
)
|
545
|
+
|
546
|
+
self.build_full_paths(paths)
|
547
|
+
ensure
|
548
|
+
file.close() unless file.nil?
|
549
|
+
end # "def parse!"
|
550
|
+
|
551
|
+
# =========================================================================
|
552
|
+
# PRIVATE INSTANCE METHODS
|
553
|
+
# =========================================================================
|
554
|
+
#
|
555
|
+
private
|
556
|
+
|
557
|
+
# Initialise or reinitialise the internal file data. After being called
|
558
|
+
# for the first time, calling here is really only useful to make sure
|
559
|
+
# that internal file path and index arrays look consistent.
|
560
|
+
#
|
561
|
+
def reinit
|
562
|
+
self.full_paths ||= []
|
563
|
+
self.file_index ||= []
|
564
|
+
|
565
|
+
if self.full_paths.size != self.file_index.size
|
566
|
+
raise 'Inconsistent CFB structure'
|
567
|
+
end
|
568
|
+
|
569
|
+
if self.full_paths.size == 0
|
570
|
+
root = 'Root Entry'
|
571
|
+
|
572
|
+
self.full_paths << root + '/'
|
573
|
+
self.file_index << OpenStruct.new({name: root, type: 5})
|
574
|
+
|
575
|
+
# Add starting seed file
|
576
|
+
#
|
577
|
+
nm = SEED_FILENAME
|
578
|
+
p = [55, 50, 54, 50].pack('C*')
|
579
|
+
|
580
|
+
self.full_paths << self.full_paths[0] + nm
|
581
|
+
self.file_index << OpenStruct.new({name: nm, type: 2, content: p, R: 69, L: 69, C: 69})
|
582
|
+
end
|
583
|
+
end
|
584
|
+
|
585
|
+
# Strange function that's very much not the same as "File.dirname".
|
586
|
+
#
|
587
|
+
def dirname(p)
|
588
|
+
if p.end_with?('/')
|
589
|
+
chomped = p.chomp('/')
|
590
|
+
return chomped.include?('/') ? self.dirname(chomped) : p # NOTE EARLY EXIT AND RECURSION
|
591
|
+
end
|
592
|
+
|
593
|
+
c = p.rindex('/')
|
594
|
+
|
595
|
+
return c.nil? ? p : p.slice(0, c + 1)
|
596
|
+
end
|
597
|
+
|
598
|
+
# Strange function that's very much not the same as "File.basename".
|
599
|
+
#
|
600
|
+
def filename(p)
|
601
|
+
if p.end_with?('/')
|
602
|
+
return filename(p.chomp('/')) # NOTE EARLY EXIT AND RECURSION
|
603
|
+
end
|
604
|
+
|
605
|
+
c = p.rindex('/')
|
606
|
+
|
607
|
+
return c.nil? ? p : p[(c + 1)..]
|
608
|
+
end
|
609
|
+
|
610
|
+
# Compare file-path-name with some FAT concepts thrown in (L vs R); related
|
611
|
+
# to CFB section 2.6.4 (red-black trees).
|
612
|
+
#
|
613
|
+
def namecmp(l, r)
|
614
|
+
el = l.split('/')
|
615
|
+
ar = r.split('/')
|
616
|
+
i = 0
|
617
|
+
z = [el.size, ar.size].min
|
618
|
+
|
619
|
+
while i < z do
|
620
|
+
c = el[i].size - ar[i].size
|
621
|
+
|
622
|
+
return c if c != 0
|
623
|
+
return el[i] < r[i] ? -1 : 1 if el[i] != ar[i]
|
624
|
+
|
625
|
+
i += 1
|
626
|
+
end
|
627
|
+
|
628
|
+
return el.size - ar.size
|
629
|
+
end
|
630
|
+
|
631
|
+
# CFB internal knowledge would be required to understand this; seems to be
|
632
|
+
# recalculating data structures that then theoretically would make life
|
633
|
+
# easier during the file output stage.
|
634
|
+
#
|
635
|
+
def rebuild(force_gc: false)
|
636
|
+
self.reinit()
|
637
|
+
|
638
|
+
s = false
|
639
|
+
gc = force_gc
|
640
|
+
|
641
|
+
unless gc == true
|
642
|
+
(self.full_paths.size - 1).downto(0) do | i |
|
643
|
+
file = self.file_index[i]
|
644
|
+
|
645
|
+
case file.type
|
646
|
+
when 0
|
647
|
+
if s == true
|
648
|
+
gc = true
|
649
|
+
else
|
650
|
+
self.file_index.pop()
|
651
|
+
self.full_paths.pop()
|
652
|
+
end
|
653
|
+
|
654
|
+
when 1, 2, 5
|
655
|
+
s = true
|
656
|
+
gc ||= (file.R * file.L * file.C rescue nil).nil?
|
657
|
+
gc ||= file.R.try(:>, -1) && file.L.try(:>, -1) && file.R == file.L
|
658
|
+
|
659
|
+
else
|
660
|
+
gc = true
|
661
|
+
end
|
662
|
+
end
|
663
|
+
end
|
664
|
+
|
665
|
+
return unless gc == true
|
666
|
+
|
667
|
+
now = Date.parse('1987-01-19')
|
668
|
+
|
669
|
+
# Track which names exist
|
670
|
+
|
671
|
+
track_full_paths = {}
|
672
|
+
data = []
|
673
|
+
|
674
|
+
0.upto(self.full_paths.size - 1) do | i |
|
675
|
+
track_full_paths[self.full_paths[i]] = true
|
676
|
+
next if self.file_index[i].type == 0
|
677
|
+
data.push([self.full_paths[i], self.file_index[i]])
|
678
|
+
end
|
679
|
+
|
680
|
+
0.upto(data.size - 1) do | i |
|
681
|
+
dad = self.dirname(data[i][0])
|
682
|
+
s = track_full_paths[dad]
|
683
|
+
|
684
|
+
while s.blank?
|
685
|
+
while self.dirname(dad).present? && track_full_paths[self.dirname(dad)].blank?
|
686
|
+
dir = self.dirname(dad)
|
687
|
+
end
|
688
|
+
|
689
|
+
data.push([
|
690
|
+
dad,
|
691
|
+
OpenStruct.new({
|
692
|
+
name: self.filname(dad).gsub('/', ''),
|
693
|
+
type: 1,
|
694
|
+
clsid: HEADER_CLSID,
|
695
|
+
ct: now,
|
696
|
+
mt: now,
|
697
|
+
content: null
|
698
|
+
})
|
699
|
+
])
|
700
|
+
|
701
|
+
# Add name to set
|
702
|
+
#
|
703
|
+
track_full_paths[dad] = true
|
704
|
+
|
705
|
+
dad = self.dirname(data[i][0])
|
706
|
+
s = track_full_paths[dad]
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
data.sort! { |x, y| self.namecmp(x[0], y[0]) }
|
711
|
+
|
712
|
+
self.full_paths = []
|
713
|
+
self.file_index = []
|
714
|
+
|
715
|
+
0.upto(data.size - 1) do | i |
|
716
|
+
self.full_paths << data[i][0]
|
717
|
+
self.file_index << data[i][1]
|
718
|
+
end
|
719
|
+
|
720
|
+
0.upto(data.size - 1) do | i |
|
721
|
+
nm = self.full_paths[i]
|
722
|
+
elt = self.file_index[i]
|
723
|
+
|
724
|
+
elt.name = self.filename(nm).gsub('/', '')
|
725
|
+
elt.color = 1
|
726
|
+
elt.L = -1
|
727
|
+
elt.R = -1
|
728
|
+
elt.C = -1
|
729
|
+
elt.size = elt.content.nil? ? 0 : elt.content.bytesize
|
730
|
+
elt.start = 0
|
731
|
+
elt.clsid = elt.clsid || HEADER_CLSID
|
732
|
+
|
733
|
+
if i == 0
|
734
|
+
elt.C = data.size > 1 ? 1 : -1
|
735
|
+
elt.size = 0
|
736
|
+
elt.type = 5
|
737
|
+
|
738
|
+
elsif nm.end_with?('/')
|
739
|
+
j = i + 1
|
740
|
+
while j < data.size do
|
741
|
+
break if self.dirname(self.full_paths[j]) == nm
|
742
|
+
j += 1
|
743
|
+
end
|
744
|
+
|
745
|
+
elt.C = j >= data.size ? -1 : j
|
746
|
+
|
747
|
+
j = i + 1
|
748
|
+
while j < data.size do
|
749
|
+
break if self.dirname(self.full_paths[j]) == self.dirname(nm)
|
750
|
+
j += 1
|
751
|
+
end
|
752
|
+
|
753
|
+
elt.R = j >= data.size ? -1 : j
|
754
|
+
elt.type = 1
|
755
|
+
|
756
|
+
else
|
757
|
+
elt.R = i + 1 if self.dirname(self.full_paths[i + 1] || '') == self.dirname(nm)
|
758
|
+
elt.type = 2
|
759
|
+
|
760
|
+
end
|
761
|
+
end
|
762
|
+
end
|
763
|
+
|
764
|
+
# Returns a chunk of data representing a converted write of the input in
|
765
|
+
# the +value+ parameter.
|
766
|
+
#
|
767
|
+
# The JS code from which this was ported has a very, VERY strange method
|
768
|
+
# signature...
|
769
|
+
#
|
770
|
+
# +size+:: Either a number of bytes to write or a format specifier (see
|
771
|
+
# below).
|
772
|
+
#
|
773
|
+
# +value+:: A value to write; its type is interpreted through both the
|
774
|
+
# +size+ and +format+ parameters.
|
775
|
+
#
|
776
|
+
# +format+:: Either 'hex' or 'utf16le' in which case the value is treated
|
777
|
+
# as a hex string (e.g. "deadbeef", high nibble first) or
|
778
|
+
# character data in arbitrary Ruby string encoding; written to
|
779
|
+
# the output as parsed bytes from the hex data, or little
|
780
|
+
# endian UTF-16 byte pairs, respectively. If the input value
|
781
|
+
# is longer than +size+ *IN BYTES* then it is truncated, else
|
782
|
+
# if need be, padded with zeros - again *IN BYTES*, so the
|
783
|
+
# maximum length in characters of a "utf16le" string is half
|
784
|
+
# the amount in +size+.
|
785
|
+
#
|
786
|
+
# If +format+ is something else or omitted, "size" becomes an
|
787
|
+
# indication of format (!). The value is treated as an 8-bit
|
788
|
+
# byte (+size+ is 1) and masked as such, 16-bit unsigned
|
789
|
+
# little-endian value (2), or uint32 (4) - or a signed int32
|
790
|
+
# (+size+ is -4 - yes, that's minus 4) - written out as four
|
791
|
+
# bytes, little-endian.
|
792
|
+
#
|
793
|
+
def write_shift(size, value, format = nil)
|
794
|
+
output_buffer = nil
|
795
|
+
|
796
|
+
case format
|
797
|
+
when 'hex'
|
798
|
+
bytes = [value].pack('H*').ljust(size, NUL)
|
799
|
+
bytes = bytes[0...size]
|
800
|
+
|
801
|
+
output_buffer = bytes
|
802
|
+
|
803
|
+
when 'utf16le'
|
804
|
+
chars = value.ljust(size / 2, NUL)
|
805
|
+
chars = chars[0...(size / 2)]
|
806
|
+
|
807
|
+
output_buffer = chars.encode('UTF-16LE').force_encoding('ASCII-8BIT')
|
808
|
+
|
809
|
+
else
|
810
|
+
case size
|
811
|
+
when 1
|
812
|
+
output_buffer = [value].pack('C') # Unsigned 8-bit, bitwise truncated
|
813
|
+
when 2
|
814
|
+
output_buffer = [value].pack('v') # Unsigned 16-bit little-endian, bitwise truncated
|
815
|
+
when 4
|
816
|
+
output_buffer = [value].pack('V') # Unsigned 32-bit little-endian, bitwise truncated
|
817
|
+
when -4
|
818
|
+
int32_4_bytes = [value].pack('l')
|
819
|
+
int32_4_bytes = int32_4_bytes.reverse() unless self.class.host_is_little_endian?
|
820
|
+
output_buffer = int32_4_bytes
|
821
|
+
end
|
822
|
+
end
|
823
|
+
|
824
|
+
return output_buffer
|
825
|
+
end
|
826
|
+
|
827
|
+
# A method that's a companion to #write_shift and equally strange!
|
828
|
+
#
|
829
|
+
# Read from file for 'size' bytes if size is 1, 2 or 4, parsing the bytes
|
830
|
+
# as an 8-bit unsigned, 16-bit unsigned or 32-bit integer where the value
|
831
|
+
# of 't' indicates if the 32-bit integer is signed ('t' is string 'i') or
|
832
|
+
# unsigned ('t' is anything else); or if size is 16, just return a string
|
833
|
+
# of 16 bytes read as-is.
|
834
|
+
#
|
835
|
+
# This implementation is slightly cleaner and more appropriate than the
|
836
|
+
# one in the original source, by omitting unused conversions.
|
837
|
+
#
|
838
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
839
|
+
# which will therefore have advanced when the method returns.
|
840
|
+
#
|
841
|
+
# +size+:: 1, 2, 4 to read 1, 2 or 4 bytes returned as a parsed 8, 16 or
|
842
|
+
# 32-bit little-endian integer respectively, or pass 16 to read
|
843
|
+
# 16 bytes of raw data returned as an ASCII-8BIT encoded string.
|
844
|
+
#
|
845
|
+
# +type+:: If +size+ is 4, pass 'i' to read as a signed 32-bit integer,
|
846
|
+
# else (omitted, or not 'i') value is read as unsigned.
|
847
|
+
#
|
848
|
+
def read_shift(file, size, t = nil)
|
849
|
+
return case size
|
850
|
+
when 1 # Unsigned 8-bit
|
851
|
+
file.read(1).bytes.first
|
852
|
+
|
853
|
+
when 2 # Unsigned 16-bit little-endian
|
854
|
+
file.read(2).unpack('v').first
|
855
|
+
|
856
|
+
when 4 # 32-bit little-endian signed or unsigned
|
857
|
+
data = file.read(4)
|
858
|
+
|
859
|
+
if t == 'i' # Signed 32-bit little-endian
|
860
|
+
self.class.get_int32le(data)
|
861
|
+
else # Unsigned 32-bit little-endian
|
862
|
+
self.class.get_uint32le(data)
|
863
|
+
end
|
864
|
+
|
865
|
+
when 16
|
866
|
+
file.read(16)
|
867
|
+
end
|
868
|
+
end
|
869
|
+
|
870
|
+
# Read from the file, expecting to see a particular value; if not, throw
|
871
|
+
# an exception.
|
872
|
+
#
|
873
|
+
# +file+:: Source I/O stream. Data is read from the current file
|
874
|
+
# pointer, which will therefore have advanced when the
|
875
|
+
# method returns.
|
876
|
+
#
|
877
|
+
# +expected+:: The expected value, as a String that'll be forced to
|
878
|
+
# ASCII-8BIT encoding, if not that way already.
|
879
|
+
#
|
880
|
+
# +field_name+:: The field name to include in the raised exception, just
|
881
|
+
# for human diagnostic purposes.
|
882
|
+
#
|
883
|
+
def check_field(file, expected, field_name)
|
884
|
+
expected = expected.dup.force_encoding('ASCII-8BIT')
|
885
|
+
data = file.read(expected.bytesize)
|
886
|
+
|
887
|
+
if data != expected
|
888
|
+
raise "#{field_name}: Expected #{expected.inspect}, but got #{data.inspect}"
|
889
|
+
end
|
890
|
+
end
|
891
|
+
|
892
|
+
# Return a tuple array of major, minor file version, with 0, 0 for ZIP
|
893
|
+
# files, else read from the CFB file, checking header in passing. File
|
894
|
+
# pointer is assumed to be at zero on entry.
|
895
|
+
#
|
896
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
897
|
+
# which will therefore have advanced when the method returns.
|
898
|
+
#
|
899
|
+
def check_get_mver(file)
|
900
|
+
return [0, 0] if file.read(1) == 0x50 && file.read(1) == 0x4b
|
901
|
+
|
902
|
+
file.rewind()
|
903
|
+
check_field(file, HEADER_SIGNATURE, 'Header signature')
|
904
|
+
|
905
|
+
file.seek(file.pos + 16) # Skip all-NUL CLSID, 16 bytes
|
906
|
+
|
907
|
+
# Minor version
|
908
|
+
minor = self.read_shift(file, 2)
|
909
|
+
major = self.read_shift(file, 2)
|
910
|
+
|
911
|
+
return [major, minor]
|
912
|
+
end
|
913
|
+
|
914
|
+
# Check sector shifts in the file header.
|
915
|
+
#
|
916
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
917
|
+
# which will therefore have advanced when the method returns.
|
918
|
+
#
|
919
|
+
# +major+:: Major version number - must be 3 or 4.
|
920
|
+
#
|
921
|
+
def check_shifts(file, major)
|
922
|
+
|
923
|
+
# Skip byte order marker (always indicates little-endian)
|
924
|
+
#
|
925
|
+
file.seek(file.pos + 2)
|
926
|
+
|
927
|
+
shift = self.read_shift(file, 2)
|
928
|
+
|
929
|
+
case shift
|
930
|
+
when 0x09
|
931
|
+
raise "Sector shift: Expected 9, saw #{shift}" if major != 3
|
932
|
+
when 0x0c
|
933
|
+
raise "Sector shift: Expected 12, saw #{shift}" if major != 4
|
934
|
+
else
|
935
|
+
raise "Sector shift: Unsupported value #{shift}"
|
936
|
+
end
|
937
|
+
|
938
|
+
# Mini Sector Shift
|
939
|
+
#
|
940
|
+
self.check_field(file, "\x06\x00", 'Mini sector shift')
|
941
|
+
|
942
|
+
# Reserved
|
943
|
+
#
|
944
|
+
self.check_field(file, "\x00\x00\x00\x00\x00\x00", 'Reserved')
|
945
|
+
end
|
946
|
+
|
947
|
+
# Chase down the rest of the DIFAT chain to build a comprehensive list
|
948
|
+
# DIFAT chains by storing the next sector number as the last 32 bits.
|
949
|
+
#
|
950
|
+
# +idx+:: Sector index; usually, start DIFAT sector initially
|
951
|
+
# +cnt+:: DIFAT sector count expected
|
952
|
+
# +sectors+:: Array of sectors
|
953
|
+
# +ssz+:: Size of a sector
|
954
|
+
# +fat_addrs+:: Array MODIFIED IN PLACE with sector addresses added
|
955
|
+
#
|
956
|
+
def sleuth_fat(idx, cnt, sectors, ssz, fat_addrs)
|
957
|
+
q = ENDOFCHAIN
|
958
|
+
|
959
|
+
if idx == ENDOFCHAIN
|
960
|
+
raise 'DIFAT chain shorter than expected' if cnt != 0
|
961
|
+
elsif idx != FREESECT
|
962
|
+
sector = sectors[idx]
|
963
|
+
m = (ssz >> 2) - 1
|
964
|
+
i = 0
|
965
|
+
|
966
|
+
return if sector.nil?
|
967
|
+
|
968
|
+
while i < m
|
969
|
+
q = self.class.get_int32le(sector, i * 4)
|
970
|
+
break if q == ENDOFCHAIN
|
971
|
+
|
972
|
+
fat_addrs << q
|
973
|
+
i += 1
|
974
|
+
end
|
975
|
+
|
976
|
+
if cnt >= 1
|
977
|
+
self.sleuth_fat(
|
978
|
+
self.class.get_int32le(sector, ssz - 4),
|
979
|
+
cnt - 1,
|
980
|
+
sectors,
|
981
|
+
ssz,
|
982
|
+
fat_addrs
|
983
|
+
)
|
984
|
+
end
|
985
|
+
end
|
986
|
+
end
|
987
|
+
|
988
|
+
# Follow the linked list of sectors for a given starting point.
|
989
|
+
#
|
990
|
+
# Parameters need to be guessed from caller use cases.
|
991
|
+
#
|
992
|
+
def get_sector_list(sectors, start, fat_addrs, ssz, chkd)
|
993
|
+
chkd ||= []
|
994
|
+
buf = []
|
995
|
+
buf_chain = []
|
996
|
+
modulus = ssz - 1
|
997
|
+
j = start
|
998
|
+
jj = 0
|
999
|
+
|
1000
|
+
while j >= 0
|
1001
|
+
chkd[j] = true
|
1002
|
+
buf[buf.length] = j
|
1003
|
+
buf_chain.push(sectors[j])
|
1004
|
+
|
1005
|
+
addr = fat_addrs[((j * 4).to_f / ssz).floor()]
|
1006
|
+
jj = ((j * 4) & modulus)
|
1007
|
+
|
1008
|
+
raise "FAT boundary crossed: #{j} 4 #{ssz}" if ssz < 4 + jj
|
1009
|
+
break if sectors[addr].nil?
|
1010
|
+
|
1011
|
+
j = self.class.get_int32le(sectors[addr], jj)
|
1012
|
+
end
|
1013
|
+
|
1014
|
+
return OpenStruct.new(nodes: buf, data: buf_chain.join)
|
1015
|
+
end
|
1016
|
+
|
1017
|
+
# Chase down the sector linked lists.
|
1018
|
+
#
|
1019
|
+
# Parameters need to be guessed from caller use cases.
|
1020
|
+
#
|
1021
|
+
def make_sector_list(sectors, dir_start, fat_addrs, ssz)
|
1022
|
+
sl = sectors.length
|
1023
|
+
sector_list = SectorList.new
|
1024
|
+
chkd = []
|
1025
|
+
buf = []
|
1026
|
+
buf_chain = []
|
1027
|
+
|
1028
|
+
modulus = ssz - 1
|
1029
|
+
i = 0
|
1030
|
+
j = 0
|
1031
|
+
k = 0
|
1032
|
+
jj = 0
|
1033
|
+
|
1034
|
+
0.upto(sl - 1) do | i |
|
1035
|
+
buf = []
|
1036
|
+
k = i + dir_start
|
1037
|
+
k -= sl if k >= sl
|
1038
|
+
|
1039
|
+
next if chkd[k]
|
1040
|
+
|
1041
|
+
buf_chain = []
|
1042
|
+
seen = []
|
1043
|
+
j = k
|
1044
|
+
|
1045
|
+
while j >= 0
|
1046
|
+
seen[j] = true
|
1047
|
+
chkd[j] = true
|
1048
|
+
|
1049
|
+
buf[buf.size] = j;
|
1050
|
+
buf_chain << sectors[j]
|
1051
|
+
|
1052
|
+
addr = fat_addrs[((j * 4).to_f / ssz).floor()]
|
1053
|
+
jj = (j * 4) & modulus
|
1054
|
+
|
1055
|
+
raise "FAT boundary crossed: #{j} 4 #{ssz}" if ssz < 4 + jj
|
1056
|
+
break if sectors[addr].nil?
|
1057
|
+
|
1058
|
+
j = self.class.get_int32le(sectors[addr], jj)
|
1059
|
+
break if seen[j]
|
1060
|
+
end
|
1061
|
+
|
1062
|
+
sector_list[k] = OpenStruct.new(nodes: buf, data: buf_chain.join())
|
1063
|
+
end
|
1064
|
+
|
1065
|
+
return sector_list
|
1066
|
+
end
|
1067
|
+
|
1068
|
+
# [MS-CFB] 2.6.1 Compound File Directory Entry.
|
1069
|
+
#
|
1070
|
+
# Parameters need to be guessed from caller use cases.
|
1071
|
+
#
|
1072
|
+
def read_directory(dir_start, sector_list, sectors, paths, nmfs, files, mini)
|
1073
|
+
minifat_store = 0
|
1074
|
+
pl = paths.any? ? 2 : 0
|
1075
|
+
sector = sector_list[dir_start].data
|
1076
|
+
i = 0
|
1077
|
+
namelen = 0
|
1078
|
+
name = nil
|
1079
|
+
|
1080
|
+
while i < sector.size
|
1081
|
+
blob = StringIO.new(sector.slice(i, 128))
|
1082
|
+
|
1083
|
+
blob.seek(64)
|
1084
|
+
namelen = self.read_shift(blob, 2)
|
1085
|
+
|
1086
|
+
blob.seek(0)
|
1087
|
+
name = blob.read(namelen - pl).force_encoding('UTF-16LE')
|
1088
|
+
nul_terminator = String.new("\x00\x00", encoding: 'UTF-16LE')
|
1089
|
+
name.chomp!(nul_terminator)
|
1090
|
+
name.encode!('UTF-8')
|
1091
|
+
|
1092
|
+
paths << name
|
1093
|
+
|
1094
|
+
blob.seek(66)
|
1095
|
+
o = OpenStruct.new({
|
1096
|
+
name: name,
|
1097
|
+
type: self.read_shift(blob, 1),
|
1098
|
+
color: self.read_shift(blob, 1),
|
1099
|
+
L: self.read_shift(blob, 4, 'i'),
|
1100
|
+
R: self.read_shift(blob, 4, 'i'),
|
1101
|
+
C: self.read_shift(blob, 4, 'i'),
|
1102
|
+
clsid: self.read_shift(blob, 16),
|
1103
|
+
state: self.read_shift(blob, 4, 'i'),
|
1104
|
+
start: 0,
|
1105
|
+
size: 0
|
1106
|
+
})
|
1107
|
+
|
1108
|
+
o.ct = self.class.get_time(blob.read(8))
|
1109
|
+
o.mt = self.class.get_time(blob.read(8))
|
1110
|
+
o.start = self.read_shift(blob, 4, 'i')
|
1111
|
+
o.size = self.read_shift(blob, 4, 'i')
|
1112
|
+
|
1113
|
+
if o.size < 0 && o.start < 0
|
1114
|
+
o.size = o.type = 0
|
1115
|
+
o.start = ENDOFCHAIN
|
1116
|
+
o.name = ''
|
1117
|
+
end
|
1118
|
+
|
1119
|
+
if o.type === 5 # Root
|
1120
|
+
minifat_store = o.start
|
1121
|
+
|
1122
|
+
if nmfs > 0 && minifat_store != ENDOFCHAIN
|
1123
|
+
sector_list[minifat_store].name = '!StreamData'
|
1124
|
+
end
|
1125
|
+
elsif o.size >= 4096 # MSCSZ
|
1126
|
+
o.storage = 'fat'
|
1127
|
+
if sector_list[o.start].nil?
|
1128
|
+
sector_list[o.start] = self.get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz)
|
1129
|
+
end
|
1130
|
+
sector_list[o.start].name = o.name
|
1131
|
+
o.content = sector_list[o.start].data.slice(0, o.size)
|
1132
|
+
else
|
1133
|
+
o.storage = 'minifat';
|
1134
|
+
|
1135
|
+
if o.size < 0
|
1136
|
+
o.size = 0
|
1137
|
+
elsif minifat_store != ENDOFCHAIN && o.start != ENDOFCHAIN && ! sector_list[minifat_store].nil?
|
1138
|
+
o.content = self.get_mfat_entry(o, sector_list[minifat_store].data, sector_list[mini]&.data)
|
1139
|
+
end
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
files[name] = o;
|
1143
|
+
self.file_index << o
|
1144
|
+
|
1145
|
+
i += 128
|
1146
|
+
end
|
1147
|
+
end
|
1148
|
+
|
1149
|
+
# [MS-CFB] 2.6.4 Red-Black Tree.
|
1150
|
+
#
|
1151
|
+
# +paths+:: Array of incomplete paths (often just leafnames) where indices
|
1152
|
+
# in the array correspond to "self.file_index" entries; contents
|
1153
|
+
# in "self.full_paths" will be overwritten if present.
|
1154
|
+
#
|
1155
|
+
def build_full_paths(paths)
|
1156
|
+
i = 0
|
1157
|
+
j = 0
|
1158
|
+
el = ar = ce = 0
|
1159
|
+
pl = paths.length
|
1160
|
+
dad = []
|
1161
|
+
q = []
|
1162
|
+
|
1163
|
+
while i < pl
|
1164
|
+
dad[i] = q[i] = i
|
1165
|
+
self.full_paths[i] = paths[i]
|
1166
|
+
|
1167
|
+
i += 1
|
1168
|
+
end
|
1169
|
+
|
1170
|
+
while j < q.length
|
1171
|
+
i = q[j]
|
1172
|
+
el = self.file_index[i].L
|
1173
|
+
ar = self.file_index[i].R
|
1174
|
+
ce = self.file_index[i].C
|
1175
|
+
|
1176
|
+
if dad[i] == i
|
1177
|
+
dad[i] = dad[el] if el != NOSTREAM && dad[el] != el
|
1178
|
+
dad[i] = dad[ar] if ar != NOSTREAM && dad[ar] != ar
|
1179
|
+
end
|
1180
|
+
|
1181
|
+
dad[ce] = i if ce != NOSTREAM
|
1182
|
+
|
1183
|
+
if el != NOSTREAM && i != dad[i]
|
1184
|
+
dad[el] = dad[i]
|
1185
|
+
q << el if q.rindex(el) < j
|
1186
|
+
end
|
1187
|
+
|
1188
|
+
if ar != NOSTREAM && i != dad[i]
|
1189
|
+
dad[ar] = dad[i]
|
1190
|
+
q << ar if q.rindex(ar) < j
|
1191
|
+
end
|
1192
|
+
|
1193
|
+
j += 1
|
1194
|
+
end
|
1195
|
+
|
1196
|
+
1.upto(pl - 1) do | i |
|
1197
|
+
if dad[i] == i
|
1198
|
+
if ar != NOSTREAM && dad[ar] != ar
|
1199
|
+
dad[i] = dad[ar]
|
1200
|
+
elsif el != NOSTREAM && dad[el] != el
|
1201
|
+
dad[i] = dad[el]
|
1202
|
+
end
|
1203
|
+
end
|
1204
|
+
end
|
1205
|
+
|
1206
|
+
1.upto(pl - 1) do | i |
|
1207
|
+
next if self.file_index[i].type == 0 # (unknown)
|
1208
|
+
|
1209
|
+
j = i;
|
1210
|
+
|
1211
|
+
if j != dad[j]
|
1212
|
+
loop do
|
1213
|
+
j = dad[j]
|
1214
|
+
self.full_paths[i] = self.full_paths[j] + '/' + self.full_paths[i]
|
1215
|
+
|
1216
|
+
break unless j != 0 && NOSTREAM != dad[j] && j != dad[j]
|
1217
|
+
end
|
1218
|
+
end
|
1219
|
+
|
1220
|
+
dad[i] = -1
|
1221
|
+
end
|
1222
|
+
|
1223
|
+
self.full_paths[0] << '/'
|
1224
|
+
|
1225
|
+
1.upto(pl - 1) do | i |
|
1226
|
+
if self.file_index[i].type != STREAM
|
1227
|
+
self.full_paths[i] << '/'
|
1228
|
+
end
|
1229
|
+
end
|
1230
|
+
end
|
1231
|
+
|
1232
|
+
# Read entry contents. Undocumented in JS code; looks like:
|
1233
|
+
#
|
1234
|
+
# +entry+:: The internal file structure being compiled; updated on exit
|
1235
|
+
# +payload+:: MiniFAT sector data (file contents within)
|
1236
|
+
# +mini+:: MiniFAT indices (of file contents in sector data)
|
1237
|
+
#
|
1238
|
+
# Returns the extracted data as an ASCII-8BIT encoded string.
|
1239
|
+
#
|
1240
|
+
def get_mfat_entry(entry, payload, mini)
|
1241
|
+
start = entry.start
|
1242
|
+
size = entry.size
|
1243
|
+
o = String.new(encoding: 'ASCII-8BIT')
|
1244
|
+
idx = start;
|
1245
|
+
|
1246
|
+
while mini.present? && size > 0 && idx >= 0 do
|
1247
|
+
o << payload.slice(idx * MSSZ, MSSZ)
|
1248
|
+
size -= MSSZ
|
1249
|
+
idx = self.class.get_int32le(mini, idx * 4)
|
1250
|
+
end
|
1251
|
+
|
1252
|
+
return '' if o.bytesize == 0
|
1253
|
+
return o.slice(0, entry.size)
|
1254
|
+
end
|
1255
|
+
|
1256
|
+
end # "class SimpleCfb"
|