simple_cfb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +201 -0
- data/README.md +146 -0
- data/Rakefile +17 -0
- data/lib/simple_cfb/simple_cfb.rb +1256 -0
- data/lib/simple_cfb/version.rb +15 -0
- data/lib/simple_cfb.rb +4 -0
- metadata +145 -0
|
@@ -0,0 +1,1256 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'ostruct'
|
|
4
|
+
require 'date'
|
|
5
|
+
require 'stringio'
|
|
6
|
+
require 'active_support/core_ext/object/blank.rb'
|
|
7
|
+
require 'active_support/core_ext/object/try.rb'
|
|
8
|
+
|
|
9
|
+
# Ported from https://github.com/SheetJS/js-cfb.
|
|
10
|
+
#
|
|
11
|
+
# File data is added with #add then, when finished, the entire blob of CFB
|
|
12
|
+
# data is generated in one go with #write. Progressive creation is impossible
|
|
13
|
+
# as the CFB file requires information on file sizes and directory entries at
|
|
14
|
+
# the start of output, so all of that must be known beforehand.
|
|
15
|
+
#
|
|
16
|
+
# Files can be parsed into a new object with #parse!, then #file_index and
|
|
17
|
+
# #full_paths examined to extract the parsed CFB container components.
|
|
18
|
+
#
|
|
19
|
+
# https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-cfb/
|
|
20
|
+
#
|
|
21
|
+
# This Ruby port tries to be equivalent to the JavaScript original, but in so
|
|
22
|
+
# doing there are likely additional bugs and I've omitted anything that wasn't
|
|
23
|
+
# needed for encrypted OOXML writing and reading.
|
|
24
|
+
#
|
|
25
|
+
class SimpleCfb
|
|
26
|
+
|
|
27
|
+
# CFB miscellaneous
|
|
28
|
+
#
|
|
29
|
+
MSSZ = 64 # Mini Sector Size = 1<<6
|
|
30
|
+
MSCSZ = 4096 # Mini Stream Cutoff Size
|
|
31
|
+
|
|
32
|
+
# Convenience accessor to binary-encoded NUL byte.
|
|
33
|
+
#
|
|
34
|
+
NUL = String.new("\x00", encoding: 'ASCII-8BIT')
|
|
35
|
+
|
|
36
|
+
# 2.1 Compound File Sector Numbers and Types
|
|
37
|
+
#
|
|
38
|
+
FREESECT = -1
|
|
39
|
+
ENDOFCHAIN = -2
|
|
40
|
+
FATSECT = -3
|
|
41
|
+
DIFSECT = -4
|
|
42
|
+
MAXREGSECT = -6
|
|
43
|
+
|
|
44
|
+
# Compound File Header
|
|
45
|
+
#
|
|
46
|
+
HEADER_SIGNATURE = String.new("\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1", encoding: 'ASCII-8BIT')
|
|
47
|
+
HEADER_CLSID = String.new("\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", encoding: 'ASCII-8BIT')
|
|
48
|
+
HEADER_MINOR_VERSION = String.new("\x3e\x00", encoding: 'ASCII-8BIT')
|
|
49
|
+
MAXREGSID = -6
|
|
50
|
+
NOSTREAM = -1
|
|
51
|
+
STREAM = 2
|
|
52
|
+
|
|
53
|
+
# 2.6.1 Compound File Directory Entry
|
|
54
|
+
#
|
|
55
|
+
ENTRY_TYPES = ['unknown', 'storage', 'stream', 'lockbytes', 'property', 'root']
|
|
56
|
+
|
|
57
|
+
# Initial seed filename
|
|
58
|
+
#
|
|
59
|
+
SEED_FILENAME = "\u0001Sh33tJ5"
|
|
60
|
+
|
|
61
|
+
# Used internally for parser.
|
|
62
|
+
#
|
|
63
|
+
class SectorList < Array
|
|
64
|
+
attr_accessor :fat_addrs
|
|
65
|
+
attr_accessor :ssz
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# =========================================================================
|
|
69
|
+
# PUBLIC CLASS INTERFACE
|
|
70
|
+
# =========================================================================
|
|
71
|
+
|
|
72
|
+
# Returns +true+ if the executing computer is little-endian natively,
|
|
73
|
+
# else +false+.
|
|
74
|
+
#
|
|
75
|
+
def self.host_is_little_endian?
|
|
76
|
+
[42].pack('l').bytes[0] == 42
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Treat an input ASCII-8BIT encoded string as 4 bytes and from this parse and
|
|
80
|
+
# return an unsigned 32-bit little-endian integer.
|
|
81
|
+
#
|
|
82
|
+
# +input+:: ASCII-8BIT encoded string including 4 byte sequence
|
|
83
|
+
# +index+:: Index into +input+ to start reading bytes (default 0)
|
|
84
|
+
#
|
|
85
|
+
def self.get_uint32le(input, index = 0)
|
|
86
|
+
data = input.slice(index, 4)
|
|
87
|
+
data = data.reverse() unless self.host_is_little_endian?
|
|
88
|
+
|
|
89
|
+
data.unpack('L').first
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Treat an input ASCII-8BIT encoded string as 4 bytes and from this parse and
|
|
93
|
+
# return a signed 32-bit little-endian integer.
|
|
94
|
+
#
|
|
95
|
+
# +input+:: ASCII-8BIT encoded string including 4 byte sequence
|
|
96
|
+
# +index+:: Index into +input+ to start reading bytes (default 0)
|
|
97
|
+
#
|
|
98
|
+
def self.get_int32le(input, index = 0)
|
|
99
|
+
data = input.slice(index, 4)
|
|
100
|
+
data = data.reverse() unless self.host_is_little_endian?
|
|
101
|
+
|
|
102
|
+
data.unpack('l').first
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Parse a ctime/mtime 8-byte sequence (4 16-bit little endian pairs) into a
|
|
106
|
+
# returned Ruby Time object, or +nil+ if the values are all zero.
|
|
107
|
+
#
|
|
108
|
+
# +data+:: ASCII-8BIT encoded string, 8 bytes long.
|
|
109
|
+
#
|
|
110
|
+
def self.get_time(data)
|
|
111
|
+
high = self.get_uint32le(data, 4)
|
|
112
|
+
low = self.get_uint32le(data, 0)
|
|
113
|
+
|
|
114
|
+
return nil if high.zero? && low.zero?
|
|
115
|
+
|
|
116
|
+
high = (high / 1e7) * 2.pow(32)
|
|
117
|
+
low = (low / 1e7)
|
|
118
|
+
|
|
119
|
+
return Time.at(high + low - 11644473600).utc
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# =========================================================================
|
|
123
|
+
# PUBLIC INSTANCE INTERFACE
|
|
124
|
+
# =========================================================================
|
|
125
|
+
|
|
126
|
+
attr_accessor :full_paths, :file_index
|
|
127
|
+
|
|
128
|
+
def initialize
|
|
129
|
+
self.reinit()
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Add a file entry. Supports only root filenames only. File must not be
|
|
133
|
+
# added already.
|
|
134
|
+
#
|
|
135
|
+
# +name+:: Filename, e.g. "Foo", in your preferred string encoding
|
|
136
|
+
# +content+:: Mandatory ASCII-8BIT encoded string containing file data
|
|
137
|
+
#
|
|
138
|
+
def add(name, content)
|
|
139
|
+
self.reinit()
|
|
140
|
+
|
|
141
|
+
fpath = self.full_paths[0]
|
|
142
|
+
|
|
143
|
+
if name.slice(0, fpath.size) == fpath
|
|
144
|
+
fpath = name
|
|
145
|
+
else
|
|
146
|
+
fpath += '/' unless fpath.end_with?('/')
|
|
147
|
+
fpath = (fpath + name).gsub('//', '/')
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
file = OpenStruct.new({name: filename(name), type: 2, content: content, size: content.bytesize})
|
|
151
|
+
|
|
152
|
+
self.file_index << file
|
|
153
|
+
self.full_paths << fpath
|
|
154
|
+
|
|
155
|
+
rebuild(force_gc: true)
|
|
156
|
+
|
|
157
|
+
return file
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Compile and return the CFB file data.
|
|
161
|
+
#
|
|
162
|
+
def write
|
|
163
|
+
|
|
164
|
+
# Commented out for now, because we prefer parity with the JS code for
|
|
165
|
+
# test verification purposes. The overhead seems minimal.
|
|
166
|
+
#
|
|
167
|
+
# # Get rid of the seed file if it's still present and we seem to have
|
|
168
|
+
# # more file entries than the root directory and seed entry.
|
|
169
|
+
# #
|
|
170
|
+
# seed_leaf = "/#{SEED_FILENAME}"
|
|
171
|
+
# seed_index = self.full_paths.find_index do | path |
|
|
172
|
+
# path.end_with?(seed_leaf)
|
|
173
|
+
# end
|
|
174
|
+
#
|
|
175
|
+
# unless seed_index.nil? || self.file_index.size < 3
|
|
176
|
+
# self.file_index.delete_at(seed_index)
|
|
177
|
+
# self.full_paths.delete_at(seed_index)
|
|
178
|
+
# end
|
|
179
|
+
#
|
|
180
|
+
# self.rebuild(force_gc: true)
|
|
181
|
+
self.rebuild(force_gc: false)
|
|
182
|
+
|
|
183
|
+
mini_size = 0
|
|
184
|
+
fat_size = 0
|
|
185
|
+
|
|
186
|
+
0.upto(self.file_index.size - 1) do | i |
|
|
187
|
+
flen = self.file_index[i]&.content&.bytesize
|
|
188
|
+
next if flen.nil? || flen.zero?
|
|
189
|
+
|
|
190
|
+
if flen < 0x1000
|
|
191
|
+
mini_size += (flen + 0x3F) >> 6
|
|
192
|
+
else
|
|
193
|
+
fat_size += (flen + 0x01FF) >> 9
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
dir_cnt = (self.full_paths.size + 3) >> 2
|
|
198
|
+
mini_cnt = (mini_size + 7) >> 3
|
|
199
|
+
mfat_cnt = (mini_size + 0x7F) >> 7
|
|
200
|
+
fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt
|
|
201
|
+
fat_cnt = (fat_base + 0x7F) >> 7
|
|
202
|
+
difat_cnt = fat_cnt <= 109 ? 0 : ((fat_cnt - 109).to_f / 0x7F).ceil()
|
|
203
|
+
|
|
204
|
+
while (((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt)
|
|
205
|
+
fat_cnt += 1
|
|
206
|
+
difat_cnt = fat_cnt <= 109 ? 0 : ((fat_cnt - 109).to_f / 0x7F).ceil()
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
el = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]
|
|
210
|
+
|
|
211
|
+
self.file_index[0].size = mini_size << 6
|
|
212
|
+
self.file_index[0].start = el[0] + el[1] + el[2] + el[3] + el[4] + el[5]
|
|
213
|
+
|
|
214
|
+
el[7] = el[0] + el[1] + el[2] + el[3] + el[4] + el[5] + ((el[6] + 7) >> 3)
|
|
215
|
+
|
|
216
|
+
o = String.new(encoding: 'ASCII-8BIT')
|
|
217
|
+
|
|
218
|
+
o << HEADER_SIGNATURE
|
|
219
|
+
o << NUL * 2 * 8
|
|
220
|
+
o << write_shift(2, 0x003E)
|
|
221
|
+
o << write_shift(2, 0x0003)
|
|
222
|
+
o << write_shift(2, 0xFFFE)
|
|
223
|
+
o << write_shift(2, 0x0009)
|
|
224
|
+
o << write_shift(2, 0x0006)
|
|
225
|
+
o << NUL * 2 * 3
|
|
226
|
+
|
|
227
|
+
o << write_shift( 4, 0)
|
|
228
|
+
o << write_shift( 4, el[2])
|
|
229
|
+
o << write_shift( 4, el[0] + el[1] + el[2] + el[3] - 1)
|
|
230
|
+
o << write_shift( 4, 0)
|
|
231
|
+
o << write_shift( 4, 1<<12)
|
|
232
|
+
o << write_shift( 4, (el[3].blank? || el[3].zero?) ? ENDOFCHAIN : el[0] + el[1] + el[2] - 1)
|
|
233
|
+
o << write_shift( 4, el[3])
|
|
234
|
+
o << write_shift(-4, (el[1].blank? || el[1].zero?) ? ENDOFCHAIN : el[0] - 1)
|
|
235
|
+
o << write_shift( 4, el[1])
|
|
236
|
+
|
|
237
|
+
i = 0
|
|
238
|
+
t = 0
|
|
239
|
+
|
|
240
|
+
while i < 109
|
|
241
|
+
o << write_shift(-4, i < el[2] ? el[1] + i : -1)
|
|
242
|
+
i += 1
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
unless el[1].blank? || el[1].zero?
|
|
246
|
+
t = 0
|
|
247
|
+
while t < el[1]
|
|
248
|
+
while i < 236 + t * 127
|
|
249
|
+
o << write_shift(-4, i < el[2] ? el[1] + i : -1)
|
|
250
|
+
i += 1
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
o << write_shift(-4, t == el[1] - 1 ? ENDOFCHAIN : t + 1)
|
|
254
|
+
t += 1
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
chainit = Proc.new do | w |
|
|
259
|
+
t += w
|
|
260
|
+
|
|
261
|
+
while i < t - 1
|
|
262
|
+
o << write_shift(-4, i + 1)
|
|
263
|
+
i += 1
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
unless w.blank? || w.zero?
|
|
267
|
+
i += 1
|
|
268
|
+
o << write_shift(-4, ENDOFCHAIN)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
i = 0
|
|
273
|
+
t = el[1]
|
|
274
|
+
|
|
275
|
+
while i < t
|
|
276
|
+
o << write_shift(-4, DIFSECT)
|
|
277
|
+
i += 1
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
t += el[2]
|
|
281
|
+
|
|
282
|
+
while i < t
|
|
283
|
+
o << write_shift(-4, FATSECT)
|
|
284
|
+
i += 1
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
chainit.call(el[3])
|
|
288
|
+
chainit.call(el[4])
|
|
289
|
+
|
|
290
|
+
j = 0
|
|
291
|
+
flen = 0
|
|
292
|
+
file = self.file_index[0]
|
|
293
|
+
|
|
294
|
+
while j < self.file_index.size
|
|
295
|
+
file = self.file_index[j]
|
|
296
|
+
j += 1
|
|
297
|
+
|
|
298
|
+
next if file.content.nil?
|
|
299
|
+
|
|
300
|
+
flen = file.content.bytesize
|
|
301
|
+
next if flen < 0x1000
|
|
302
|
+
|
|
303
|
+
file.start = t
|
|
304
|
+
chainit.call((flen + 0x01FF) >> 9)
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
chainit.call((el[6] + 7) >> 3)
|
|
308
|
+
|
|
309
|
+
while o.size & 0x1FF != 0
|
|
310
|
+
o << write_shift(-4, ENDOFCHAIN)
|
|
311
|
+
end
|
|
312
|
+
|
|
313
|
+
t = i = j = 0
|
|
314
|
+
|
|
315
|
+
while j < self.file_index.size do
|
|
316
|
+
file = self.file_index[j]
|
|
317
|
+
j += 1
|
|
318
|
+
|
|
319
|
+
next if file.content.nil?
|
|
320
|
+
|
|
321
|
+
flen = file.content.bytesize
|
|
322
|
+
next if flen == 0 || flen >= 0x1000
|
|
323
|
+
|
|
324
|
+
file.start = t
|
|
325
|
+
chainit.call((flen + 0x3F) >> 6)
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
while o.size & 0x1FF != 0
|
|
329
|
+
o << write_shift(-4, ENDOFCHAIN)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
i = 0
|
|
333
|
+
|
|
334
|
+
while i < (el[4] << 2) do
|
|
335
|
+
nm = self.full_paths[i]
|
|
336
|
+
|
|
337
|
+
if nm.blank?
|
|
338
|
+
0.upto(16) { o << write_shift(4, 0) } # Remember, #upto is inclusive -> *17* words
|
|
339
|
+
0.upto(2 ) { o << write_shift(4, -1) }
|
|
340
|
+
0.upto(11) { o << write_shift(4, 0) }
|
|
341
|
+
|
|
342
|
+
i += 1
|
|
343
|
+
next # NOTE EARLY LOOP RESTART
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
file = self.file_index[i]
|
|
347
|
+
|
|
348
|
+
if i.zero?
|
|
349
|
+
file.start = file.size.blank? || file.size.zero? ? ENDOFCHAIN : file.start - 1;
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
u_nm = file.name
|
|
353
|
+
u_nm = u_nm[0...32] if u_nm.size > 32
|
|
354
|
+
|
|
355
|
+
flen = 2 * (u_nm.size + 1)
|
|
356
|
+
|
|
357
|
+
o << write_shift(64, u_nm, 'utf16le')
|
|
358
|
+
o << write_shift(2, flen)
|
|
359
|
+
o << write_shift(1, file.type)
|
|
360
|
+
o << write_shift(1, file.color)
|
|
361
|
+
o << write_shift(-4, file.L)
|
|
362
|
+
o << write_shift(-4, file.R)
|
|
363
|
+
o << write_shift(-4, file.C)
|
|
364
|
+
|
|
365
|
+
if file.clsid.blank?
|
|
366
|
+
j = 0
|
|
367
|
+
while j < 4
|
|
368
|
+
o << write_shift(4, 0)
|
|
369
|
+
j += 1
|
|
370
|
+
end
|
|
371
|
+
else
|
|
372
|
+
o << file.clsid
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
o << write_shift(4, file.state.blank? || file.state.zero? ? 0 : file.state)
|
|
376
|
+
o << write_shift(4, 0)
|
|
377
|
+
o << write_shift(4, 0)
|
|
378
|
+
o << write_shift(4, 0)
|
|
379
|
+
o << write_shift(4, 0)
|
|
380
|
+
o << write_shift(4, file.start)
|
|
381
|
+
o << write_shift(4, file.size)
|
|
382
|
+
o << write_shift(4, 0)
|
|
383
|
+
|
|
384
|
+
i += 1
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
i = 1
|
|
388
|
+
|
|
389
|
+
while i < self.file_index.size do
|
|
390
|
+
file = self.file_index[i]
|
|
391
|
+
|
|
392
|
+
if file.size.present? && file.size >= 0x1000
|
|
393
|
+
aligned_size = (file.start + 1) << 9
|
|
394
|
+
while (o.size < aligned_size) do; o << 0x00; end
|
|
395
|
+
|
|
396
|
+
o << file.content
|
|
397
|
+
while (o.size % 512 != 0) do; o << 0x00; end
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
i += 1
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
i = 1
|
|
404
|
+
|
|
405
|
+
while i < self.file_index.size do
|
|
406
|
+
file = self.file_index[i]
|
|
407
|
+
|
|
408
|
+
if file.size.present? && file.size > 0 && file.size < 0x1000
|
|
409
|
+
o << file.content
|
|
410
|
+
while (o.size % 64 != 0) do; o << 0x00; end
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
i += 1
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
while (o.size < el[7] << 9) do; o << 0x00; end
|
|
417
|
+
|
|
418
|
+
return o
|
|
419
|
+
end # "def write"
|
|
420
|
+
|
|
421
|
+
# Parses an input file into this object, allowing you to extract individual
|
|
422
|
+
# files thereafter via #read.
|
|
423
|
+
#
|
|
424
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
|
425
|
+
# which will therefore have advanced when the method returns.
|
|
426
|
+
#
|
|
427
|
+
def parse!(file)
|
|
428
|
+
raise "CFB corrupt - file size < 512 bytes" if file.size < 512
|
|
429
|
+
|
|
430
|
+
mver = 3
|
|
431
|
+
ssz = 512
|
|
432
|
+
nmfs = 0 # number of mini FAT sectors
|
|
433
|
+
difat_sec_cnt = 0
|
|
434
|
+
dir_start = 0
|
|
435
|
+
minifat_start = 0
|
|
436
|
+
difat_start = 0
|
|
437
|
+
fat_addrs = [] # locations of FAT sectors
|
|
438
|
+
|
|
439
|
+
# [MS-CFB] 2.2 Compound File Header
|
|
440
|
+
# Check major version
|
|
441
|
+
#
|
|
442
|
+
major, minor = self.check_get_mver(file)
|
|
443
|
+
|
|
444
|
+
if major == 3
|
|
445
|
+
ssz = 512
|
|
446
|
+
elsif major == 4
|
|
447
|
+
ssz = 4096
|
|
448
|
+
elsif major == 0 && minor == 0
|
|
449
|
+
raise 'Zip contents are not supported'
|
|
450
|
+
else
|
|
451
|
+
raise "Major version: Only 3 or 4 is supported; #{mver} encountered"
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
self.check_shifts(file, major)
|
|
455
|
+
|
|
456
|
+
# Number of Directory Sectors
|
|
457
|
+
#
|
|
458
|
+
dir_cnt = self.read_shift(file, 4, 'i')
|
|
459
|
+
raise "Directory sectors: Expected 0, saw #{dir_cnt}" if major == 3 && dir_cnt != 0
|
|
460
|
+
|
|
461
|
+
# Number of FAT Sectors
|
|
462
|
+
#
|
|
463
|
+
file.seek(file.pos + 4)
|
|
464
|
+
|
|
465
|
+
# First Directory Sector Location
|
|
466
|
+
#
|
|
467
|
+
dir_start = self.read_shift(file, 4, 'i')
|
|
468
|
+
|
|
469
|
+
# Transaction Signature
|
|
470
|
+
#
|
|
471
|
+
file.seek(file.pos + 4)
|
|
472
|
+
|
|
473
|
+
# Mini Stream Cutoff Size
|
|
474
|
+
#
|
|
475
|
+
self.check_field(file, "\x00\x10\x00\x00", 'Mini stream cutoff size')
|
|
476
|
+
|
|
477
|
+
# First Mini FAT Sector Location
|
|
478
|
+
#
|
|
479
|
+
minifat_start = self.read_shift(file, 4, 'i')
|
|
480
|
+
|
|
481
|
+
# Number of Mini FAT Sectors
|
|
482
|
+
#
|
|
483
|
+
nmfs = self.read_shift(file, 4, 'i')
|
|
484
|
+
|
|
485
|
+
# First DIFAT sector location
|
|
486
|
+
#
|
|
487
|
+
difat_start = self.read_shift(file, 4, 'i')
|
|
488
|
+
|
|
489
|
+
# Number of DIFAT Sectors
|
|
490
|
+
#
|
|
491
|
+
difat_sec_cnt = self.read_shift(file, 4, 'i')
|
|
492
|
+
|
|
493
|
+
# Grab FAT Sector Locations
|
|
494
|
+
#
|
|
495
|
+
q = -1
|
|
496
|
+
j = 0
|
|
497
|
+
|
|
498
|
+
while (j < 109) # 109 = (512 - file.pos) >> 2
|
|
499
|
+
q = self.read_shift(file, 4, 'i')
|
|
500
|
+
break if q < 0
|
|
501
|
+
fat_addrs[j] = q
|
|
502
|
+
j += 1
|
|
503
|
+
end
|
|
504
|
+
|
|
505
|
+
# Break the file up into sectors, skipping the file header of 'ssz' size.
|
|
506
|
+
#
|
|
507
|
+
sectors = []
|
|
508
|
+
file.seek(ssz)
|
|
509
|
+
|
|
510
|
+
while ! file.eof?
|
|
511
|
+
sectors << file.read(ssz)
|
|
512
|
+
end
|
|
513
|
+
|
|
514
|
+
self.sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs)
|
|
515
|
+
|
|
516
|
+
# Chains
|
|
517
|
+
#
|
|
518
|
+
sector_list = self.make_sector_list(sectors, dir_start, fat_addrs, ssz)
|
|
519
|
+
sector_list[dir_start].name = '!Directory'
|
|
520
|
+
|
|
521
|
+
if nmfs > 0 && minifat_start != ENDOFCHAIN
|
|
522
|
+
sector_list[minifat_start].name = '!MiniFAT'
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
sector_list[fat_addrs[0]].name = '!FAT'
|
|
526
|
+
sector_list.fat_addrs = fat_addrs
|
|
527
|
+
sector_list.ssz = ssz
|
|
528
|
+
|
|
529
|
+
# [MS-CFB] 2.6.1 Compound File Directory Entry
|
|
530
|
+
#
|
|
531
|
+
files = {}
|
|
532
|
+
paths = []
|
|
533
|
+
|
|
534
|
+
self.full_paths = []
|
|
535
|
+
self.file_index = []
|
|
536
|
+
self.read_directory(
|
|
537
|
+
dir_start,
|
|
538
|
+
sector_list,
|
|
539
|
+
sectors,
|
|
540
|
+
paths,
|
|
541
|
+
nmfs,
|
|
542
|
+
files,
|
|
543
|
+
minifat_start
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
self.build_full_paths(paths)
|
|
547
|
+
ensure
|
|
548
|
+
file.close() unless file.nil?
|
|
549
|
+
end # "def parse!"
|
|
550
|
+
|
|
551
|
+
# =========================================================================
|
|
552
|
+
# PRIVATE INSTANCE METHODS
|
|
553
|
+
# =========================================================================
|
|
554
|
+
#
|
|
555
|
+
private
|
|
556
|
+
|
|
557
|
+
# Initialise or reinitialise the internal file data. After being called
|
|
558
|
+
# for the first time, calling here is really only useful to make sure
|
|
559
|
+
# that internal file path and index arrays look consistent.
|
|
560
|
+
#
|
|
561
|
+
def reinit
|
|
562
|
+
self.full_paths ||= []
|
|
563
|
+
self.file_index ||= []
|
|
564
|
+
|
|
565
|
+
if self.full_paths.size != self.file_index.size
|
|
566
|
+
raise 'Inconsistent CFB structure'
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
if self.full_paths.size == 0
|
|
570
|
+
root = 'Root Entry'
|
|
571
|
+
|
|
572
|
+
self.full_paths << root + '/'
|
|
573
|
+
self.file_index << OpenStruct.new({name: root, type: 5})
|
|
574
|
+
|
|
575
|
+
# Add starting seed file
|
|
576
|
+
#
|
|
577
|
+
nm = SEED_FILENAME
|
|
578
|
+
p = [55, 50, 54, 50].pack('C*')
|
|
579
|
+
|
|
580
|
+
self.full_paths << self.full_paths[0] + nm
|
|
581
|
+
self.file_index << OpenStruct.new({name: nm, type: 2, content: p, R: 69, L: 69, C: 69})
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# Strange function that's very much not the same as "File.dirname".
|
|
586
|
+
#
|
|
587
|
+
def dirname(p)
|
|
588
|
+
if p.end_with?('/')
|
|
589
|
+
chomped = p.chomp('/')
|
|
590
|
+
return chomped.include?('/') ? self.dirname(chomped) : p # NOTE EARLY EXIT AND RECURSION
|
|
591
|
+
end
|
|
592
|
+
|
|
593
|
+
c = p.rindex('/')
|
|
594
|
+
|
|
595
|
+
return c.nil? ? p : p.slice(0, c + 1)
|
|
596
|
+
end
|
|
597
|
+
|
|
598
|
+
# Strange function that's very much not the same as "File.basename".
|
|
599
|
+
#
|
|
600
|
+
def filename(p)
|
|
601
|
+
if p.end_with?('/')
|
|
602
|
+
return filename(p.chomp('/')) # NOTE EARLY EXIT AND RECURSION
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
c = p.rindex('/')
|
|
606
|
+
|
|
607
|
+
return c.nil? ? p : p[(c + 1)..]
|
|
608
|
+
end
|
|
609
|
+
|
|
610
|
+
# Compare file-path-name with some FAT concepts thrown in (L vs R); related
|
|
611
|
+
# to CFB section 2.6.4 (red-black trees).
|
|
612
|
+
#
|
|
613
|
+
def namecmp(l, r)
|
|
614
|
+
el = l.split('/')
|
|
615
|
+
ar = r.split('/')
|
|
616
|
+
i = 0
|
|
617
|
+
z = [el.size, ar.size].min
|
|
618
|
+
|
|
619
|
+
while i < z do
|
|
620
|
+
c = el[i].size - ar[i].size
|
|
621
|
+
|
|
622
|
+
return c if c != 0
|
|
623
|
+
return el[i] < r[i] ? -1 : 1 if el[i] != ar[i]
|
|
624
|
+
|
|
625
|
+
i += 1
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
return el.size - ar.size
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
# CFB internal knowledge would be required to understand this; seems to be
|
|
632
|
+
# recalculating data structures that then theoretically would make life
|
|
633
|
+
# easier during the file output stage.
|
|
634
|
+
#
|
|
635
|
+
def rebuild(force_gc: false)
|
|
636
|
+
self.reinit()
|
|
637
|
+
|
|
638
|
+
s = false
|
|
639
|
+
gc = force_gc
|
|
640
|
+
|
|
641
|
+
unless gc == true
|
|
642
|
+
(self.full_paths.size - 1).downto(0) do | i |
|
|
643
|
+
file = self.file_index[i]
|
|
644
|
+
|
|
645
|
+
case file.type
|
|
646
|
+
when 0
|
|
647
|
+
if s == true
|
|
648
|
+
gc = true
|
|
649
|
+
else
|
|
650
|
+
self.file_index.pop()
|
|
651
|
+
self.full_paths.pop()
|
|
652
|
+
end
|
|
653
|
+
|
|
654
|
+
when 1, 2, 5
|
|
655
|
+
s = true
|
|
656
|
+
gc ||= (file.R * file.L * file.C rescue nil).nil?
|
|
657
|
+
gc ||= file.R.try(:>, -1) && file.L.try(:>, -1) && file.R == file.L
|
|
658
|
+
|
|
659
|
+
else
|
|
660
|
+
gc = true
|
|
661
|
+
end
|
|
662
|
+
end
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
return unless gc == true
|
|
666
|
+
|
|
667
|
+
now = Date.parse('1987-01-19')
|
|
668
|
+
|
|
669
|
+
# Track which names exist
|
|
670
|
+
|
|
671
|
+
track_full_paths = {}
|
|
672
|
+
data = []
|
|
673
|
+
|
|
674
|
+
0.upto(self.full_paths.size - 1) do | i |
|
|
675
|
+
track_full_paths[self.full_paths[i]] = true
|
|
676
|
+
next if self.file_index[i].type == 0
|
|
677
|
+
data.push([self.full_paths[i], self.file_index[i]])
|
|
678
|
+
end
|
|
679
|
+
|
|
680
|
+
0.upto(data.size - 1) do | i |
|
|
681
|
+
dad = self.dirname(data[i][0])
|
|
682
|
+
s = track_full_paths[dad]
|
|
683
|
+
|
|
684
|
+
while s.blank?
|
|
685
|
+
while self.dirname(dad).present? && track_full_paths[self.dirname(dad)].blank?
|
|
686
|
+
dir = self.dirname(dad)
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
data.push([
|
|
690
|
+
dad,
|
|
691
|
+
OpenStruct.new({
|
|
692
|
+
name: self.filname(dad).gsub('/', ''),
|
|
693
|
+
type: 1,
|
|
694
|
+
clsid: HEADER_CLSID,
|
|
695
|
+
ct: now,
|
|
696
|
+
mt: now,
|
|
697
|
+
content: null
|
|
698
|
+
})
|
|
699
|
+
])
|
|
700
|
+
|
|
701
|
+
# Add name to set
|
|
702
|
+
#
|
|
703
|
+
track_full_paths[dad] = true
|
|
704
|
+
|
|
705
|
+
dad = self.dirname(data[i][0])
|
|
706
|
+
s = track_full_paths[dad]
|
|
707
|
+
end
|
|
708
|
+
end
|
|
709
|
+
|
|
710
|
+
data.sort! { |x, y| self.namecmp(x[0], y[0]) }
|
|
711
|
+
|
|
712
|
+
self.full_paths = []
|
|
713
|
+
self.file_index = []
|
|
714
|
+
|
|
715
|
+
0.upto(data.size - 1) do | i |
|
|
716
|
+
self.full_paths << data[i][0]
|
|
717
|
+
self.file_index << data[i][1]
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
0.upto(data.size - 1) do | i |
|
|
721
|
+
nm = self.full_paths[i]
|
|
722
|
+
elt = self.file_index[i]
|
|
723
|
+
|
|
724
|
+
elt.name = self.filename(nm).gsub('/', '')
|
|
725
|
+
elt.color = 1
|
|
726
|
+
elt.L = -1
|
|
727
|
+
elt.R = -1
|
|
728
|
+
elt.C = -1
|
|
729
|
+
elt.size = elt.content.nil? ? 0 : elt.content.bytesize
|
|
730
|
+
elt.start = 0
|
|
731
|
+
elt.clsid = elt.clsid || HEADER_CLSID
|
|
732
|
+
|
|
733
|
+
if i == 0
|
|
734
|
+
elt.C = data.size > 1 ? 1 : -1
|
|
735
|
+
elt.size = 0
|
|
736
|
+
elt.type = 5
|
|
737
|
+
|
|
738
|
+
elsif nm.end_with?('/')
|
|
739
|
+
j = i + 1
|
|
740
|
+
while j < data.size do
|
|
741
|
+
break if self.dirname(self.full_paths[j]) == nm
|
|
742
|
+
j += 1
|
|
743
|
+
end
|
|
744
|
+
|
|
745
|
+
elt.C = j >= data.size ? -1 : j
|
|
746
|
+
|
|
747
|
+
j = i + 1
|
|
748
|
+
while j < data.size do
|
|
749
|
+
break if self.dirname(self.full_paths[j]) == self.dirname(nm)
|
|
750
|
+
j += 1
|
|
751
|
+
end
|
|
752
|
+
|
|
753
|
+
elt.R = j >= data.size ? -1 : j
|
|
754
|
+
elt.type = 1
|
|
755
|
+
|
|
756
|
+
else
|
|
757
|
+
elt.R = i + 1 if self.dirname(self.full_paths[i + 1] || '') == self.dirname(nm)
|
|
758
|
+
elt.type = 2
|
|
759
|
+
|
|
760
|
+
end
|
|
761
|
+
end
|
|
762
|
+
end
|
|
763
|
+
|
|
764
|
+
# Returns a chunk of data representing a converted write of the input in
|
|
765
|
+
# the +value+ parameter.
|
|
766
|
+
#
|
|
767
|
+
# The JS code from which this was ported has a very, VERY strange method
|
|
768
|
+
# signature...
|
|
769
|
+
#
|
|
770
|
+
# +size+:: Either a number of bytes to write or a format specifier (see
|
|
771
|
+
# below).
|
|
772
|
+
#
|
|
773
|
+
# +value+:: A value to write; its type is interpreted through both the
|
|
774
|
+
# +size+ and +format+ parameters.
|
|
775
|
+
#
|
|
776
|
+
# +format+:: Either 'hex' or 'utf16le' in which case the value is treated
|
|
777
|
+
# as a hex string (e.g. "deadbeef", high nibble first) or
|
|
778
|
+
# character data in arbitrary Ruby string encoding; written to
|
|
779
|
+
# the output as parsed bytes from the hex data, or little
|
|
780
|
+
# endian UTF-16 byte pairs, respectively. If the input value
|
|
781
|
+
# is longer than +size+ *IN BYTES* then it is truncated, else
|
|
782
|
+
# if need be, padded with zeros - again *IN BYTES*, so the
|
|
783
|
+
# maximum length in characters of a "utf16le" string is half
|
|
784
|
+
# the amount in +size+.
|
|
785
|
+
#
|
|
786
|
+
# If +format+ is something else or omitted, "size" becomes an
|
|
787
|
+
# indication of format (!). The value is treated as an 8-bit
|
|
788
|
+
# byte (+size+ is 1) and masked as such, 16-bit unsigned
|
|
789
|
+
# little-endian value (2), or uint32 (4) - or a signed int32
|
|
790
|
+
# (+size+ is -4 - yes, that's minus 4) - written out as four
|
|
791
|
+
# bytes, little-endian.
|
|
792
|
+
#
|
|
793
|
+
def write_shift(size, value, format = nil)
|
|
794
|
+
output_buffer = nil
|
|
795
|
+
|
|
796
|
+
case format
|
|
797
|
+
when 'hex'
|
|
798
|
+
bytes = [value].pack('H*').ljust(size, NUL)
|
|
799
|
+
bytes = bytes[0...size]
|
|
800
|
+
|
|
801
|
+
output_buffer = bytes
|
|
802
|
+
|
|
803
|
+
when 'utf16le'
|
|
804
|
+
chars = value.ljust(size / 2, NUL)
|
|
805
|
+
chars = chars[0...(size / 2)]
|
|
806
|
+
|
|
807
|
+
output_buffer = chars.encode('UTF-16LE').force_encoding('ASCII-8BIT')
|
|
808
|
+
|
|
809
|
+
else
|
|
810
|
+
case size
|
|
811
|
+
when 1
|
|
812
|
+
output_buffer = [value].pack('C') # Unsigned 8-bit, bitwise truncated
|
|
813
|
+
when 2
|
|
814
|
+
output_buffer = [value].pack('v') # Unsigned 16-bit little-endian, bitwise truncated
|
|
815
|
+
when 4
|
|
816
|
+
output_buffer = [value].pack('V') # Unsigned 32-bit little-endian, bitwise truncated
|
|
817
|
+
when -4
|
|
818
|
+
int32_4_bytes = [value].pack('l')
|
|
819
|
+
int32_4_bytes = int32_4_bytes.reverse() unless self.class.host_is_little_endian?
|
|
820
|
+
output_buffer = int32_4_bytes
|
|
821
|
+
end
|
|
822
|
+
end
|
|
823
|
+
|
|
824
|
+
return output_buffer
|
|
825
|
+
end
|
|
826
|
+
|
|
827
|
+
# A method that's a companion to #write_shift and equally strange!
|
|
828
|
+
#
|
|
829
|
+
# Read from file for 'size' bytes if size is 1, 2 or 4, parsing the bytes
|
|
830
|
+
# as an 8-bit unsigned, 16-bit unsigned or 32-bit integer where the value
|
|
831
|
+
# of 't' indicates if the 32-bit integer is signed ('t' is string 'i') or
|
|
832
|
+
# unsigned ('t' is anything else); or if size is 16, just return a string
|
|
833
|
+
# of 16 bytes read as-is.
|
|
834
|
+
#
|
|
835
|
+
# This implementation is slightly cleaner and more appropriate than the
|
|
836
|
+
# one in the original source, by omitting unused conversions.
|
|
837
|
+
#
|
|
838
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
|
839
|
+
# which will therefore have advanced when the method returns.
|
|
840
|
+
#
|
|
841
|
+
# +size+:: 1, 2, 4 to read 1, 2 or 4 bytes returned as a parsed 8, 16 or
|
|
842
|
+
# 32-bit little-endian integer respectively, or pass 16 to read
|
|
843
|
+
# 16 bytes of raw data returned as an ASCII-8BIT encoded string.
|
|
844
|
+
#
|
|
845
|
+
# +type+:: If +size+ is 4, pass 'i' to read as a signed 32-bit integer,
|
|
846
|
+
# else (omitted, or not 'i') value is read as unsigned.
|
|
847
|
+
#
|
|
848
|
+
def read_shift(file, size, t = nil)
|
|
849
|
+
return case size
|
|
850
|
+
when 1 # Unsigned 8-bit
|
|
851
|
+
file.read(1).bytes.first
|
|
852
|
+
|
|
853
|
+
when 2 # Unsigned 16-bit little-endian
|
|
854
|
+
file.read(2).unpack('v').first
|
|
855
|
+
|
|
856
|
+
when 4 # 32-bit little-endian signed or unsigned
|
|
857
|
+
data = file.read(4)
|
|
858
|
+
|
|
859
|
+
if t == 'i' # Signed 32-bit little-endian
|
|
860
|
+
self.class.get_int32le(data)
|
|
861
|
+
else # Unsigned 32-bit little-endian
|
|
862
|
+
self.class.get_uint32le(data)
|
|
863
|
+
end
|
|
864
|
+
|
|
865
|
+
when 16
|
|
866
|
+
file.read(16)
|
|
867
|
+
end
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
# Read from the file, expecting to see a particular value; if not, throw
|
|
871
|
+
# an exception.
|
|
872
|
+
#
|
|
873
|
+
# +file+:: Source I/O stream. Data is read from the current file
|
|
874
|
+
# pointer, which will therefore have advanced when the
|
|
875
|
+
# method returns.
|
|
876
|
+
#
|
|
877
|
+
# +expected+:: The expected value, as a String that'll be forced to
|
|
878
|
+
# ASCII-8BIT encoding, if not that way already.
|
|
879
|
+
#
|
|
880
|
+
# +field_name+:: The field name to include in the raised exception, just
|
|
881
|
+
# for human diagnostic purposes.
|
|
882
|
+
#
|
|
883
|
+
def check_field(file, expected, field_name)
|
|
884
|
+
expected = expected.dup.force_encoding('ASCII-8BIT')
|
|
885
|
+
data = file.read(expected.bytesize)
|
|
886
|
+
|
|
887
|
+
if data != expected
|
|
888
|
+
raise "#{field_name}: Expected #{expected.inspect}, but got #{data.inspect}"
|
|
889
|
+
end
|
|
890
|
+
end
|
|
891
|
+
|
|
892
|
+
# Return a tuple array of major, minor file version, with 0, 0 for ZIP
|
|
893
|
+
# files, else read from the CFB file, checking header in passing. File
|
|
894
|
+
# pointer is assumed to be at zero on entry.
|
|
895
|
+
#
|
|
896
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
|
897
|
+
# which will therefore have advanced when the method returns.
|
|
898
|
+
#
|
|
899
|
+
def check_get_mver(file)
|
|
900
|
+
return [0, 0] if file.read(1) == 0x50 && file.read(1) == 0x4b
|
|
901
|
+
|
|
902
|
+
file.rewind()
|
|
903
|
+
check_field(file, HEADER_SIGNATURE, 'Header signature')
|
|
904
|
+
|
|
905
|
+
file.seek(file.pos + 16) # Skip all-NUL CLSID, 16 bytes
|
|
906
|
+
|
|
907
|
+
# Minor version
|
|
908
|
+
minor = self.read_shift(file, 2)
|
|
909
|
+
major = self.read_shift(file, 2)
|
|
910
|
+
|
|
911
|
+
return [major, minor]
|
|
912
|
+
end
|
|
913
|
+
|
|
914
|
+
# Check sector shifts in the file header.
|
|
915
|
+
#
|
|
916
|
+
# +file+:: Source I/O stream. Data is read from the current file pointer,
|
|
917
|
+
# which will therefore have advanced when the method returns.
|
|
918
|
+
#
|
|
919
|
+
# +major+:: Major version number - must be 3 or 4.
|
|
920
|
+
#
|
|
921
|
+
def check_shifts(file, major)
|
|
922
|
+
|
|
923
|
+
# Skip byte order marker (always indicates little-endian)
|
|
924
|
+
#
|
|
925
|
+
file.seek(file.pos + 2)
|
|
926
|
+
|
|
927
|
+
shift = self.read_shift(file, 2)
|
|
928
|
+
|
|
929
|
+
case shift
|
|
930
|
+
when 0x09
|
|
931
|
+
raise "Sector shift: Expected 9, saw #{shift}" if major != 3
|
|
932
|
+
when 0x0c
|
|
933
|
+
raise "Sector shift: Expected 12, saw #{shift}" if major != 4
|
|
934
|
+
else
|
|
935
|
+
raise "Sector shift: Unsupported value #{shift}"
|
|
936
|
+
end
|
|
937
|
+
|
|
938
|
+
# Mini Sector Shift
|
|
939
|
+
#
|
|
940
|
+
self.check_field(file, "\x06\x00", 'Mini sector shift')
|
|
941
|
+
|
|
942
|
+
# Reserved
|
|
943
|
+
#
|
|
944
|
+
self.check_field(file, "\x00\x00\x00\x00\x00\x00", 'Reserved')
|
|
945
|
+
end
|
|
946
|
+
|
|
947
|
+
# Chase down the rest of the DIFAT chain to build a comprehensive list
|
|
948
|
+
# DIFAT chains by storing the next sector number as the last 32 bits.
|
|
949
|
+
#
|
|
950
|
+
# +idx+:: Sector index; usually, start DIFAT sector initially
|
|
951
|
+
# +cnt+:: DIFAT sector count expected
|
|
952
|
+
# +sectors+:: Array of sectors
|
|
953
|
+
# +ssz+:: Size of a sector
|
|
954
|
+
# +fat_addrs+:: Array MODIFIED IN PLACE with sector addresses added
|
|
955
|
+
#
|
|
956
|
+
def sleuth_fat(idx, cnt, sectors, ssz, fat_addrs)
|
|
957
|
+
q = ENDOFCHAIN
|
|
958
|
+
|
|
959
|
+
if idx == ENDOFCHAIN
|
|
960
|
+
raise 'DIFAT chain shorter than expected' if cnt != 0
|
|
961
|
+
elsif idx != FREESECT
|
|
962
|
+
sector = sectors[idx]
|
|
963
|
+
m = (ssz >> 2) - 1
|
|
964
|
+
i = 0
|
|
965
|
+
|
|
966
|
+
return if sector.nil?
|
|
967
|
+
|
|
968
|
+
while i < m
|
|
969
|
+
q = self.class.get_int32le(sector, i * 4)
|
|
970
|
+
break if q == ENDOFCHAIN
|
|
971
|
+
|
|
972
|
+
fat_addrs << q
|
|
973
|
+
i += 1
|
|
974
|
+
end
|
|
975
|
+
|
|
976
|
+
if cnt >= 1
|
|
977
|
+
self.sleuth_fat(
|
|
978
|
+
self.class.get_int32le(sector, ssz - 4),
|
|
979
|
+
cnt - 1,
|
|
980
|
+
sectors,
|
|
981
|
+
ssz,
|
|
982
|
+
fat_addrs
|
|
983
|
+
)
|
|
984
|
+
end
|
|
985
|
+
end
|
|
986
|
+
end
|
|
987
|
+
|
|
988
|
+
# Follow the linked list of sectors for a given starting point.
|
|
989
|
+
#
|
|
990
|
+
# Parameters need to be guessed from caller use cases.
|
|
991
|
+
#
|
|
992
|
+
def get_sector_list(sectors, start, fat_addrs, ssz, chkd)
|
|
993
|
+
chkd ||= []
|
|
994
|
+
buf = []
|
|
995
|
+
buf_chain = []
|
|
996
|
+
modulus = ssz - 1
|
|
997
|
+
j = start
|
|
998
|
+
jj = 0
|
|
999
|
+
|
|
1000
|
+
while j >= 0
|
|
1001
|
+
chkd[j] = true
|
|
1002
|
+
buf[buf.length] = j
|
|
1003
|
+
buf_chain.push(sectors[j])
|
|
1004
|
+
|
|
1005
|
+
addr = fat_addrs[((j * 4).to_f / ssz).floor()]
|
|
1006
|
+
jj = ((j * 4) & modulus)
|
|
1007
|
+
|
|
1008
|
+
raise "FAT boundary crossed: #{j} 4 #{ssz}" if ssz < 4 + jj
|
|
1009
|
+
break if sectors[addr].nil?
|
|
1010
|
+
|
|
1011
|
+
j = self.class.get_int32le(sectors[addr], jj)
|
|
1012
|
+
end
|
|
1013
|
+
|
|
1014
|
+
return OpenStruct.new(nodes: buf, data: buf_chain.join)
|
|
1015
|
+
end
|
|
1016
|
+
|
|
1017
|
+
# Chase down the sector linked lists.
|
|
1018
|
+
#
|
|
1019
|
+
# Parameters need to be guessed from caller use cases.
|
|
1020
|
+
#
|
|
1021
|
+
def make_sector_list(sectors, dir_start, fat_addrs, ssz)
|
|
1022
|
+
sl = sectors.length
|
|
1023
|
+
sector_list = SectorList.new
|
|
1024
|
+
chkd = []
|
|
1025
|
+
buf = []
|
|
1026
|
+
buf_chain = []
|
|
1027
|
+
|
|
1028
|
+
modulus = ssz - 1
|
|
1029
|
+
i = 0
|
|
1030
|
+
j = 0
|
|
1031
|
+
k = 0
|
|
1032
|
+
jj = 0
|
|
1033
|
+
|
|
1034
|
+
0.upto(sl - 1) do | i |
|
|
1035
|
+
buf = []
|
|
1036
|
+
k = i + dir_start
|
|
1037
|
+
k -= sl if k >= sl
|
|
1038
|
+
|
|
1039
|
+
next if chkd[k]
|
|
1040
|
+
|
|
1041
|
+
buf_chain = []
|
|
1042
|
+
seen = []
|
|
1043
|
+
j = k
|
|
1044
|
+
|
|
1045
|
+
while j >= 0
|
|
1046
|
+
seen[j] = true
|
|
1047
|
+
chkd[j] = true
|
|
1048
|
+
|
|
1049
|
+
buf[buf.size] = j;
|
|
1050
|
+
buf_chain << sectors[j]
|
|
1051
|
+
|
|
1052
|
+
addr = fat_addrs[((j * 4).to_f / ssz).floor()]
|
|
1053
|
+
jj = (j * 4) & modulus
|
|
1054
|
+
|
|
1055
|
+
raise "FAT boundary crossed: #{j} 4 #{ssz}" if ssz < 4 + jj
|
|
1056
|
+
break if sectors[addr].nil?
|
|
1057
|
+
|
|
1058
|
+
j = self.class.get_int32le(sectors[addr], jj)
|
|
1059
|
+
break if seen[j]
|
|
1060
|
+
end
|
|
1061
|
+
|
|
1062
|
+
sector_list[k] = OpenStruct.new(nodes: buf, data: buf_chain.join())
|
|
1063
|
+
end
|
|
1064
|
+
|
|
1065
|
+
return sector_list
|
|
1066
|
+
end
|
|
1067
|
+
|
|
1068
|
+
# [MS-CFB] 2.6.1 Compound File Directory Entry.
|
|
1069
|
+
#
|
|
1070
|
+
# Parameters need to be guessed from caller use cases.
|
|
1071
|
+
#
|
|
1072
|
+
def read_directory(dir_start, sector_list, sectors, paths, nmfs, files, mini)
|
|
1073
|
+
minifat_store = 0
|
|
1074
|
+
pl = paths.any? ? 2 : 0
|
|
1075
|
+
sector = sector_list[dir_start].data
|
|
1076
|
+
i = 0
|
|
1077
|
+
namelen = 0
|
|
1078
|
+
name = nil
|
|
1079
|
+
|
|
1080
|
+
while i < sector.size
|
|
1081
|
+
blob = StringIO.new(sector.slice(i, 128))
|
|
1082
|
+
|
|
1083
|
+
blob.seek(64)
|
|
1084
|
+
namelen = self.read_shift(blob, 2)
|
|
1085
|
+
|
|
1086
|
+
blob.seek(0)
|
|
1087
|
+
name = blob.read(namelen - pl).force_encoding('UTF-16LE')
|
|
1088
|
+
nul_terminator = String.new("\x00\x00", encoding: 'UTF-16LE')
|
|
1089
|
+
name.chomp!(nul_terminator)
|
|
1090
|
+
name.encode!('UTF-8')
|
|
1091
|
+
|
|
1092
|
+
paths << name
|
|
1093
|
+
|
|
1094
|
+
blob.seek(66)
|
|
1095
|
+
o = OpenStruct.new({
|
|
1096
|
+
name: name,
|
|
1097
|
+
type: self.read_shift(blob, 1),
|
|
1098
|
+
color: self.read_shift(blob, 1),
|
|
1099
|
+
L: self.read_shift(blob, 4, 'i'),
|
|
1100
|
+
R: self.read_shift(blob, 4, 'i'),
|
|
1101
|
+
C: self.read_shift(blob, 4, 'i'),
|
|
1102
|
+
clsid: self.read_shift(blob, 16),
|
|
1103
|
+
state: self.read_shift(blob, 4, 'i'),
|
|
1104
|
+
start: 0,
|
|
1105
|
+
size: 0
|
|
1106
|
+
})
|
|
1107
|
+
|
|
1108
|
+
o.ct = self.class.get_time(blob.read(8))
|
|
1109
|
+
o.mt = self.class.get_time(blob.read(8))
|
|
1110
|
+
o.start = self.read_shift(blob, 4, 'i')
|
|
1111
|
+
o.size = self.read_shift(blob, 4, 'i')
|
|
1112
|
+
|
|
1113
|
+
if o.size < 0 && o.start < 0
|
|
1114
|
+
o.size = o.type = 0
|
|
1115
|
+
o.start = ENDOFCHAIN
|
|
1116
|
+
o.name = ''
|
|
1117
|
+
end
|
|
1118
|
+
|
|
1119
|
+
if o.type === 5 # Root
|
|
1120
|
+
minifat_store = o.start
|
|
1121
|
+
|
|
1122
|
+
if nmfs > 0 && minifat_store != ENDOFCHAIN
|
|
1123
|
+
sector_list[minifat_store].name = '!StreamData'
|
|
1124
|
+
end
|
|
1125
|
+
elsif o.size >= 4096 # MSCSZ
|
|
1126
|
+
o.storage = 'fat'
|
|
1127
|
+
if sector_list[o.start].nil?
|
|
1128
|
+
sector_list[o.start] = self.get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz)
|
|
1129
|
+
end
|
|
1130
|
+
sector_list[o.start].name = o.name
|
|
1131
|
+
o.content = sector_list[o.start].data.slice(0, o.size)
|
|
1132
|
+
else
|
|
1133
|
+
o.storage = 'minifat';
|
|
1134
|
+
|
|
1135
|
+
if o.size < 0
|
|
1136
|
+
o.size = 0
|
|
1137
|
+
elsif minifat_store != ENDOFCHAIN && o.start != ENDOFCHAIN && ! sector_list[minifat_store].nil?
|
|
1138
|
+
o.content = self.get_mfat_entry(o, sector_list[minifat_store].data, sector_list[mini]&.data)
|
|
1139
|
+
end
|
|
1140
|
+
end
|
|
1141
|
+
|
|
1142
|
+
files[name] = o;
|
|
1143
|
+
self.file_index << o
|
|
1144
|
+
|
|
1145
|
+
i += 128
|
|
1146
|
+
end
|
|
1147
|
+
end
|
|
1148
|
+
|
|
1149
|
+
# [MS-CFB] 2.6.4 Red-Black Tree.
|
|
1150
|
+
#
|
|
1151
|
+
# +paths+:: Array of incomplete paths (often just leafnames) where indices
|
|
1152
|
+
# in the array correspond to "self.file_index" entries; contents
|
|
1153
|
+
# in "self.full_paths" will be overwritten if present.
|
|
1154
|
+
#
|
|
1155
|
+
def build_full_paths(paths)
|
|
1156
|
+
i = 0
|
|
1157
|
+
j = 0
|
|
1158
|
+
el = ar = ce = 0
|
|
1159
|
+
pl = paths.length
|
|
1160
|
+
dad = []
|
|
1161
|
+
q = []
|
|
1162
|
+
|
|
1163
|
+
while i < pl
|
|
1164
|
+
dad[i] = q[i] = i
|
|
1165
|
+
self.full_paths[i] = paths[i]
|
|
1166
|
+
|
|
1167
|
+
i += 1
|
|
1168
|
+
end
|
|
1169
|
+
|
|
1170
|
+
while j < q.length
|
|
1171
|
+
i = q[j]
|
|
1172
|
+
el = self.file_index[i].L
|
|
1173
|
+
ar = self.file_index[i].R
|
|
1174
|
+
ce = self.file_index[i].C
|
|
1175
|
+
|
|
1176
|
+
if dad[i] == i
|
|
1177
|
+
dad[i] = dad[el] if el != NOSTREAM && dad[el] != el
|
|
1178
|
+
dad[i] = dad[ar] if ar != NOSTREAM && dad[ar] != ar
|
|
1179
|
+
end
|
|
1180
|
+
|
|
1181
|
+
dad[ce] = i if ce != NOSTREAM
|
|
1182
|
+
|
|
1183
|
+
if el != NOSTREAM && i != dad[i]
|
|
1184
|
+
dad[el] = dad[i]
|
|
1185
|
+
q << el if q.rindex(el) < j
|
|
1186
|
+
end
|
|
1187
|
+
|
|
1188
|
+
if ar != NOSTREAM && i != dad[i]
|
|
1189
|
+
dad[ar] = dad[i]
|
|
1190
|
+
q << ar if q.rindex(ar) < j
|
|
1191
|
+
end
|
|
1192
|
+
|
|
1193
|
+
j += 1
|
|
1194
|
+
end
|
|
1195
|
+
|
|
1196
|
+
1.upto(pl - 1) do | i |
|
|
1197
|
+
if dad[i] == i
|
|
1198
|
+
if ar != NOSTREAM && dad[ar] != ar
|
|
1199
|
+
dad[i] = dad[ar]
|
|
1200
|
+
elsif el != NOSTREAM && dad[el] != el
|
|
1201
|
+
dad[i] = dad[el]
|
|
1202
|
+
end
|
|
1203
|
+
end
|
|
1204
|
+
end
|
|
1205
|
+
|
|
1206
|
+
1.upto(pl - 1) do | i |
|
|
1207
|
+
next if self.file_index[i].type == 0 # (unknown)
|
|
1208
|
+
|
|
1209
|
+
j = i;
|
|
1210
|
+
|
|
1211
|
+
if j != dad[j]
|
|
1212
|
+
loop do
|
|
1213
|
+
j = dad[j]
|
|
1214
|
+
self.full_paths[i] = self.full_paths[j] + '/' + self.full_paths[i]
|
|
1215
|
+
|
|
1216
|
+
break unless j != 0 && NOSTREAM != dad[j] && j != dad[j]
|
|
1217
|
+
end
|
|
1218
|
+
end
|
|
1219
|
+
|
|
1220
|
+
dad[i] = -1
|
|
1221
|
+
end
|
|
1222
|
+
|
|
1223
|
+
self.full_paths[0] << '/'
|
|
1224
|
+
|
|
1225
|
+
1.upto(pl - 1) do | i |
|
|
1226
|
+
if self.file_index[i].type != STREAM
|
|
1227
|
+
self.full_paths[i] << '/'
|
|
1228
|
+
end
|
|
1229
|
+
end
|
|
1230
|
+
end
|
|
1231
|
+
|
|
1232
|
+
# Read entry contents. Undocumented in JS code; looks like:
|
|
1233
|
+
#
|
|
1234
|
+
# +entry+:: The internal file structure being compiled; updated on exit
|
|
1235
|
+
# +payload+:: MiniFAT sector data (file contents within)
|
|
1236
|
+
# +mini+:: MiniFAT indices (of file contents in sector data)
|
|
1237
|
+
#
|
|
1238
|
+
# Returns the extracted data as an ASCII-8BIT encoded string.
|
|
1239
|
+
#
|
|
1240
|
+
def get_mfat_entry(entry, payload, mini)
|
|
1241
|
+
start = entry.start
|
|
1242
|
+
size = entry.size
|
|
1243
|
+
o = String.new(encoding: 'ASCII-8BIT')
|
|
1244
|
+
idx = start;
|
|
1245
|
+
|
|
1246
|
+
while mini.present? && size > 0 && idx >= 0 do
|
|
1247
|
+
o << payload.slice(idx * MSSZ, MSSZ)
|
|
1248
|
+
size -= MSSZ
|
|
1249
|
+
idx = self.class.get_int32le(mini, idx * 4)
|
|
1250
|
+
end
|
|
1251
|
+
|
|
1252
|
+
return '' if o.bytesize == 0
|
|
1253
|
+
return o.slice(0, entry.size)
|
|
1254
|
+
end
|
|
1255
|
+
|
|
1256
|
+
end # "class SimpleCfb"
|