keeguon-ruby-ole 1.2.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ require 'ole/storage/base'
2
+ require 'ole/storage/file_system'
3
+ require 'ole/storage/meta_data'
@@ -0,0 +1,945 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ require 'tempfile'
4
+
5
+ require 'ole/base'
6
+ require 'ole/types'
7
+ require 'ole/ranges_io'
8
+
9
+ module Ole # :nodoc:
10
+ #
11
+ # This class is the primary way the user interacts with an OLE storage file.
12
+ #
13
+ # = TODO
14
+ #
15
+ # * the custom header cruft for Header and Dirent needs some love.
16
+ # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
17
+ # and, in a manner of speaking, but arguably different, Storage itself.
18
+ # they have differing api's which would be nice to rethink.
19
+ # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
20
+ #
21
+ class Storage
22
+ # thrown for any bogus OLE file errors.
23
+ class FormatError < StandardError # :nodoc:
24
+ end
25
+
26
+ # options used at creation time
27
+ attr_reader :params
28
+ # The top of the ole tree structure
29
+ attr_reader :root
30
+ # The tree structure in its original flattened form. only valid after #load, or #flush.
31
+ attr_reader :dirents
32
+ # The underlying io object to/from which the ole object is serialized, whether we
33
+ # should close it, and whether it is writeable
34
+ attr_reader :io, :close_parent, :writeable
35
+ # Low level internals, you probably shouldn't need to mess with these
36
+ attr_reader :header, :bbat, :sbat, :sb_file
37
+
38
+ # +arg+ should be either a filename, or an +IO+ object, and needs to be seekable.
39
+ # +mode+ is optional, and should be a regular mode string.
40
+ def initialize arg, mode=nil, params={}
41
+ params, mode = mode, nil if Hash === mode
42
+ params = {:update_timestamps => true}.merge(params)
43
+ @params = params
44
+
45
+ # get the io object
46
+ @close_parent, @io = if String === arg
47
+ mode ||= 'rb'
48
+ [true, open(arg, mode)]
49
+ else
50
+ raise ArgumentError, 'unable to specify mode string with io object' if mode
51
+ [false, arg]
52
+ end
53
+ # force encoding, to avoid picking up source encoding with StringIO or files in text mode
54
+ @io.set_encoding Encoding::ASCII_8BIT if @io.respond_to?(:set_encoding)
55
+ # do we have this file opened for writing? use mode when provided,
56
+ # otherwise try no-op methods which will raise if read-only
57
+ @writeable = begin
58
+ if mode
59
+ IOMode.new(mode).writeable?
60
+ else
61
+ # works on mri 1.8 & jruby
62
+ @io.flush
63
+ begin
64
+ # works on mri 1.9 & rubinius, throws EBADF on windows
65
+ @io.write_nonblock('') if @io.respond_to?(:write_nonblock)
66
+ rescue Errno::EBADF
67
+ # for windows
68
+ @io.syswrite('');
69
+ end
70
+ true
71
+ end
72
+ rescue IOError
73
+ false
74
+ end
75
+ # silence undefined warning in clear
76
+ @sb_file = nil
77
+ # if the io object has data, we should load it, otherwise start afresh
78
+ # this should be based on the mode string rather.
79
+ @io.size > 0 ? load : clear
80
+ end
81
+
82
+ # somewhat similar to File.open, the open class method allows a block form where
83
+ # the Ole::Storage object is automatically closed on completion of the block.
84
+ def self.open arg, mode=nil, params={}
85
+ ole = new arg, mode, params
86
+ if block_given?
87
+ begin yield ole
88
+ ensure; ole.close
89
+ end
90
+ else ole
91
+ end
92
+ end
93
+
94
+ # load document from file.
95
+ #
96
+ # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
97
+ #
98
+ # 1. reterminate any chain not ending in EOC.
99
+ # compare file size with actually allocated blocks per file.
100
+ # 2. pass through all chain heads looking for collisions, and making sure nothing points to them
101
+ # (ie they are really heads). in both sbat and mbat
102
+ # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
103
+ # in the bat for them.
104
+ # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
105
+ # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
106
+ # will be automatically thrown away at close time.
107
+ def load
108
+ # we always read 512 for the header block. if the block size ends up being different,
109
+ # what happens to the 109 fat entries. are there more/less entries?
110
+ @io.rewind
111
+ header_block = @io.read 512
112
+ @header = Header.new header_block
113
+
114
+ # create an empty bbat.
115
+ @bbat = AllocationTable::Big.new self
116
+ bbat_chain = header_block[Header::SIZE..-1].unpack 'V*'
117
+ mbat_block = @header.mbat_start
118
+ @header.num_mbat.times do
119
+ blocks = @bbat.read([mbat_block]).unpack 'V*'
120
+ mbat_block = blocks.pop
121
+ bbat_chain += blocks
122
+ end
123
+ # am i using num_bat in the right way?
124
+ @bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
125
+
126
+ # get block chain for directories, read it, then split it into chunks and load the
127
+ # directory entries. semantics changed - used to cut at first dir where dir.type == 0
128
+ @dirents = @bbat.read(@header.dirent_start).to_enum(:each_chunk, Dirent::SIZE).
129
+ map { |str| Dirent.new self, str }
130
+
131
+ # now reorder from flat into a tree
132
+ # links are stored in some kind of balanced binary tree
133
+ # check that everything is visited at least, and at most once
134
+ # similarly with the blocks of the file.
135
+ # was thinking of moving this to Dirent.to_tree instead.
136
+ class << @dirents
137
+ def to_tree idx=0
138
+ return [] if idx == Dirent::EOT
139
+ d = self[idx]
140
+ to_tree(d.child).each { |child| d << child }
141
+ raise FormatError, "directory #{d.inspect} used twice" if d.idx
142
+ d.idx = idx
143
+ to_tree(d.prev) + [d] + to_tree(d.next)
144
+ end
145
+ end
146
+
147
+ @root = @dirents.to_tree.first
148
+ @dirents.reject! { |d| d.type_id == 0 }
149
+ # silence this warning by default, its not really important (issue #5).
150
+ # fairly common one appears to be "R" (from office OS X?) which smells
151
+ # like some kind of UTF16 snafu, but scottwillson also has had some kanji...
152
+ #Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
153
+ unused = @dirents.reject(&:idx).length
154
+ Log.warn "#{unused} unused directories" if unused > 0
155
+
156
+ # FIXME i don't currently use @header.num_sbat which i should
157
+ # hmm. nor do i write it. it means what exactly again?
158
+ # which mode to use here?
159
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
160
+ @sbat = AllocationTable::Small.new self
161
+ @sbat.load @bbat.read(@header.sbat_start)
162
+ end
163
+
164
+ def close
165
+ @sb_file.close
166
+ flush if @writeable
167
+ @io.close if @close_parent
168
+ end
169
+
170
+ # the flush method is the main "save" method. all file contents are always
171
+ # written directly to the file by the RangesIO objects, all this method does
172
+ # is write out all the file meta data - dirents, allocation tables, file header
173
+ # etc.
174
+ #
175
+ # maybe add an option to zero the padding, and any remaining avail blocks in the
176
+ # allocation table.
177
+ #
178
+ # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
179
+ # of bbat to AllocationTable::Big.
180
+ def flush
181
+ # update root dirent, and flatten dirent tree
182
+ @root.name = 'Root Entry'
183
+ @root.first_block = @sb_file.first_block
184
+ @root.size = @sb_file.size
185
+ @dirents = @root.flatten
186
+
187
+ # serialize the dirents using the bbat
188
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
189
+ io.write @dirents.map { |dirent| dirent.to_s }.join
190
+ padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
191
+ io.write 0.chr * padding
192
+ @header.dirent_start = io.first_block
193
+ end
194
+
195
+ # serialize the sbat
196
+ # perhaps the blocks used by the sbat should be marked with BAT?
197
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
198
+ io.write @sbat.to_s
199
+ @header.sbat_start = io.first_block
200
+ @header.num_sbat = @bbat.chain(@header.sbat_start).length
201
+ end
202
+
203
+ # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
204
+ # truncate. then when its time to write, convert that chain and some chunk of blocks at
205
+ # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
206
+ # done.
207
+ # this is perhaps not good, as we reclaim all bat blocks here, which
208
+ # may include the sbat we just wrote. FIXME
209
+ @bbat.map! do |b|
210
+ b == AllocationTable::BAT || b == AllocationTable::META_BAT ? AllocationTable::AVAIL : b
211
+ end
212
+
213
+ # currently we use a loop. this could be better, but basically,
214
+ # the act of writing out the bat, itself requires blocks which get
215
+ # recorded in the bat.
216
+ #
217
+ # i'm sure that there'd be some simpler closed form solution to this. solve
218
+ # recursive func:
219
+ #
220
+ # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
221
+ # bbat_len = initial_bbat_len + num_mbat_blocks
222
+ # mbat_len = ceil(bbat_len * 4 / block_size)
223
+ #
224
+ # the actual bbat allocation table is itself stored throughout the file, and that chain
225
+ # is stored in the initial blocks, and the mbat blocks.
226
+ num_mbat_blocks = 0
227
+ io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
228
+ # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
229
+ # contiguous chunk at the end.
230
+ # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
231
+ # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
232
+ # be fixed easily, add an io truncate
233
+ @bbat.truncate!
234
+ @io.truncate @bbat.block_size * (@bbat.length + 1)
235
+ while true
236
+ # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
237
+ # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
238
+ # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
239
+ # mbat must remain contiguous.
240
+ bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
241
+ # now storing the excess mbat blocks also increases the size of the bbat:
242
+ new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / (@bbat.block_size.to_f - 4)).ceil
243
+ if new_num_mbat_blocks != num_mbat_blocks
244
+ # need more space for the mbat.
245
+ num_mbat_blocks = new_num_mbat_blocks
246
+ elsif io.size != bbat_data_len
247
+ # need more space for the bat
248
+ # this may grow the bbat, depending on existing available blocks
249
+ io.truncate bbat_data_len
250
+ else
251
+ break
252
+ end
253
+ end
254
+
255
+ # now extract the info we want:
256
+ ranges = io.ranges
257
+ bbat_chain = @bbat.chain io.first_block
258
+ io.close
259
+ bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
260
+ # tack on the mbat stuff
261
+ @header.num_bat = bbat_chain.length
262
+ mbat_blocks = (0...num_mbat_blocks).map do
263
+ block = @bbat.free_block
264
+ @bbat[block] = AllocationTable::META_BAT
265
+ block
266
+ end
267
+ @header.mbat_start = mbat_blocks.first || AllocationTable::EOC
268
+
269
+ # now finally write the bbat, using a not resizable io.
270
+ # the mode here will be 'r', which allows write atm.
271
+ RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s }
272
+
273
+ # this is the mbat. pad it out.
274
+ bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
275
+ @header.num_mbat = num_mbat_blocks
276
+ if num_mbat_blocks != 0
277
+ # write out the mbat blocks now. first of all, where are they going to be?
278
+ mbat_data = bbat_chain[109..-1]
279
+ # expand the mbat_data to include the linked list forward pointers.
280
+ mbat_data = mbat_data.to_enum(:each_slice, @bbat.block_size / 4 - 1).to_a.
281
+ zip(mbat_blocks[1..-1] + [nil]).map { |a, b| b ? a + [b] : a }
282
+ # pad out the last one.
283
+ mbat_data.last.push(*([AllocationTable::AVAIL] * (@bbat.block_size / 4 - mbat_data.last.length)))
284
+ RangesIO.open @io, :ranges => @bbat.ranges(mbat_blocks) do |f|
285
+ f.write mbat_data.flatten.pack('V*')
286
+ end
287
+ end
288
+
289
+ # now seek back and write the header out
290
+ @io.seek 0
291
+ @io.write @header.to_s + bbat_chain[0, 109].pack('V*')
292
+ @io.flush
293
+ end
294
+
295
+ def clear
296
+ # initialize to equivalent of loading an empty ole document.
297
+ Log.warn 'creating new ole storage object on non-writable io' unless @writeable
298
+ @header = Header.new
299
+ @bbat = AllocationTable::Big.new self
300
+ @root = Dirent.new self, :type => :root, :name => 'Root Entry'
301
+ @dirents = [@root]
302
+ @root.idx = 0
303
+ @sb_file.close if @sb_file
304
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
305
+ @sbat = AllocationTable::Small.new self
306
+ # throw everything else the hell away
307
+ @io.truncate 0
308
+ end
309
+
310
+ # could be useful with mis-behaving ole documents. or to just clean them up.
311
+ def repack temp=:file
312
+ case temp
313
+ when :file
314
+ Tempfile.open 'ole-repack' do |io|
315
+ io.binmode
316
+ repack_using_io io
317
+ end
318
+ when :mem; StringIO.open('', &method(:repack_using_io))
319
+ else raise ArgumentError, "unknown temp backing #{temp.inspect}"
320
+ end
321
+ end
322
+
323
+ def repack_using_io temp_io
324
+ @io.rewind
325
+ IO.copy @io, temp_io
326
+ clear
327
+ Storage.open temp_io, nil, @params do |temp_ole|
328
+ #temp_ole.root.type = :dir
329
+ Dirent.copy temp_ole.root, root
330
+ end
331
+ end
332
+
333
+ def bat_for_size size
334
+ # note >=, not > previously.
335
+ size >= @header.threshold ? @bbat : @sbat
336
+ end
337
+
338
+ def inspect
339
+ "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
340
+ end
341
+
342
+ #
343
+ # A class which wraps the ole header
344
+ #
345
+ # Header.new can be both used to load from a string, or to create from
346
+ # defaults. Serialization is accomplished with the #to_s method.
347
+ #
348
+ class Header < Struct.new(
349
+ :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
350
+ :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
351
+ :sbat_start, :num_sbat, :mbat_start, :num_mbat
352
+ )
353
+ PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5'
354
+ SIZE = 0x4c
355
+ # i have seen it pointed out that the first 4 bytes of hex,
356
+ # 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
357
+ MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
358
+ # what you get if creating new header from scratch.
359
+ # AllocationTable::EOC isn't available yet. meh.
360
+ EOC = 0xfffffffe
361
+ DEFAULT = [
362
+ MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
363
+ 0.chr * 6, 0, 1, EOC, 0.chr * 4,
364
+ 4096, EOC, 0, EOC, 0
365
+ ]
366
+
367
+ def initialize values=DEFAULT
368
+ values = values.unpack(PACK) if String === values
369
+ super(*values)
370
+ validate!
371
+ end
372
+
373
+ def to_s
374
+ to_a.pack PACK
375
+ end
376
+
377
+ def validate!
378
+ raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC
379
+ if num_bat == 0 or # is that valid for a completely empty file?
380
+ # not sure about this one. basically to do max possible bat given size of mbat
381
+ num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
382
+ # shouldn't need to use the mbat as there is enough space in the header block
383
+ num_bat < 109 && num_mbat != 0 or
384
+ # given the size of the header is 76, if b_shift <= 6, blocks address the header.
385
+ s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
386
+ # we only handle little endian
387
+ byte_order != "\xfe\xff"
388
+ raise FormatError, "not valid OLE2 structured storage file"
389
+ end
390
+ # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
391
+ # 3 for this value.
392
+ # transacting_signature != "\x00" * 4 or
393
+ if threshold != 4096 or
394
+ num_mbat == 0 && ![AllocationTable::EOC, AllocationTable::AVAIL].include?(mbat_start) or
395
+ reserved != "\x00" * 6
396
+ Log.warn "may not be a valid OLE2 structured storage file"
397
+ end
398
+ true
399
+ end
400
+ end
401
+
402
+ #
403
+ # +AllocationTable+'s hold the chains corresponding to files. Given
404
+ # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
405
+ # the blocks that make up that file.
406
+ #
407
+ # There are 2 allocation tables, the bbat, and sbat, for big and small
408
+ # blocks respectively. The block chain should be loaded using either
409
+ # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
410
+ # as appropriate.
411
+ #
412
+ # Whether or not big or small blocks are used for a file depends on
413
+ # whether its size is over the <tt>Header#threshold</tt> level.
414
+ #
415
+ # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
416
+ # which are stored in blocks throughout the file. The blocks are either
417
+ # big or small, and are accessed using the <tt>AllocationTable</tt>.
418
+ #
419
+ # The bbat allocation table's data is stored in the spare room in the header
420
+ # block, and in extra blocks throughout the file as referenced by the meta
421
+ # bat. That chain is linear, as there is no higher level table.
422
+ #
423
+ # AllocationTable.new is used to create an empty table. It can parse a string
424
+ # with the #load method. Serialization is accomplished with the #to_s method.
425
+ #
426
+ class AllocationTable < Array
427
+ # a free block (I don't currently leave any blocks free), although I do pad out
428
+ # the allocation table with AVAIL to the block size.
429
+ AVAIL = 0xffffffff
430
+ EOC = 0xfffffffe # end of a chain
431
+ # these blocks are used for storing the allocation table chains
432
+ BAT = 0xfffffffd
433
+ META_BAT = 0xfffffffc
434
+
435
+ attr_reader :ole, :io, :block_size
436
+ def initialize ole
437
+ @ole = ole
438
+ @sparse = true
439
+ super()
440
+ end
441
+
442
+ def load data
443
+ replace data.unpack('V*')
444
+ end
445
+
446
+ def truncate
447
+ # this strips trailing AVAILs. come to think of it, this has the potential to break
448
+ # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
449
+ # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
450
+ # at load time.
451
+ temp = reverse
452
+ not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
453
+ temp.reverse
454
+ end
455
+
456
+ def truncate!
457
+ replace truncate
458
+ end
459
+
460
+ def to_s
461
+ table = truncate
462
+ # pad it out some
463
+ num = @ole.bbat.block_size / 4
464
+ # do you really use AVAIL? they probably extend past end of file, and may shortly
465
+ # be used for the bat. not really good.
466
+ table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
467
+ table.pack 'V*'
468
+ end
469
+
470
+ # rewrote this to be non-recursive as it broke on a large attachment
471
+ # chain with a stack error
472
+ def chain idx
473
+ a = []
474
+ until idx >= META_BAT
475
+ raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length
476
+ a << idx
477
+ idx = self[idx]
478
+ end
479
+ Log.warn "invalid chain terminator #{idx}" unless idx == EOC
480
+ a
481
+ end
482
+
483
+ # Turn a chain (an array given by +chain+) of blocks (optionally
484
+ # truncated to +size+) into an array of arrays describing the stretches of
485
+ # bytes in the file that it belongs to.
486
+ #
487
+ # The blocks are Big or Small blocks depending on the table type.
488
+ def blocks_to_ranges chain, size=nil
489
+ # truncate the chain if required
490
+ chain = chain[0, (size.to_f / block_size).ceil] if size
491
+ # convert chain to ranges of the block size
492
+ ranges = chain.map { |i| [block_size * i, block_size] }
493
+ # truncate final range if required
494
+ ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
495
+ ranges
496
+ end
497
+
498
+ def ranges chain, size=nil
499
+ chain = self.chain(chain) unless Array === chain
500
+ blocks_to_ranges chain, size
501
+ end
502
+
503
+ # quick shortcut. chain can be either a head (in which case the table is used to
504
+ # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
505
+ def open chain, size=nil, &block
506
+ RangesIO.open @io, :ranges => ranges(chain, size), &block
507
+ end
508
+
509
+ def read chain, size=nil
510
+ open chain, size, &:read
511
+ end
512
+
513
+ # catch any method that may add an AVAIL somewhere in the middle, thus invalidating
514
+ # the @sparse speedup for free_block. annoying using eval, but define_method won't
515
+ # work for this.
516
+ # FIXME
517
+ [:map!, :collect!].each do |name|
518
+ eval <<-END
519
+ def #{name}(*args, &block)
520
+ @sparse = true
521
+ super
522
+ end
523
+ END
524
+ end
525
+
526
+ def []= idx, val
527
+ @sparse = true if val == AVAIL
528
+ super
529
+ end
530
+
531
+ def free_block
532
+ if @sparse
533
+ i = index(AVAIL) and return i
534
+ @sparse = false
535
+ end
536
+ push AVAIL
537
+ length - 1
538
+ end
539
+
540
+ # must return first_block. modifies +blocks+ in place
541
+ def resize_chain blocks, size
542
+ new_num_blocks = (size / block_size.to_f).ceil
543
+ old_num_blocks = blocks.length
544
+ if new_num_blocks < old_num_blocks
545
+ # de-allocate some of our old blocks. TODO maybe zero them out in the file???
546
+ (new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL }
547
+ self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0
548
+ blocks.slice! new_num_blocks..-1
549
+ elsif new_num_blocks > old_num_blocks
550
+ # need some more blocks.
551
+ last_block = blocks.last
552
+ (new_num_blocks - old_num_blocks).times do
553
+ block = free_block
554
+ # connect the chain. handle corner case of blocks being [] initially
555
+ self[last_block] = block if last_block
556
+ blocks << block
557
+ last_block = block
558
+ self[last_block] = EOC
559
+ end
560
+ end
561
+ # update ranges, and return that also now
562
+ blocks
563
+ end
564
+
565
+ class Big < AllocationTable
566
+ def initialize(*args)
567
+ super
568
+ @block_size = 1 << @ole.header.b_shift
569
+ @io = @ole.io
570
+ end
571
+
572
+ # Big blocks are kind of -1 based, in order to not clash with the header.
573
+ def blocks_to_ranges chain, size=nil
574
+ #super chain.map { |b| b + 1 }, size
575
+ # duplicated from AllocationTable#blocks_to_ranges to avoid chain.map
576
+ # which was decent part of benchmark profile
577
+ chain = chain[0, (size.to_f / block_size).ceil] if size
578
+ ranges = chain.map { |i| [block_size * (i + 1), block_size] }
579
+ ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
580
+ ranges
581
+ end
582
+ end
583
+
584
+ class Small < AllocationTable
585
+ def initialize(*args)
586
+ super
587
+ @block_size = 1 << @ole.header.s_shift
588
+ @io = @ole.sb_file
589
+ end
590
+ end
591
+ end
592
+
593
+ # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
594
+ # AllocationTable, and can be resized. used for read/write to 2 streams:
595
+ # 1. serialized dirent data
596
+ # 2. sbat table data
597
+ # 3. all dirents but through RangesIOMigrateable below
598
+ #
599
+ # Note that all internal access to first_block is through accessors, as it is sometimes
600
+ # useful to redirect it.
601
+ class RangesIOResizeable < RangesIO
602
+ attr_reader :bat
603
+ attr_accessor :first_block
604
+ def initialize bat, mode='r', params={}
605
+ mode, params = 'r', mode if Hash === mode
606
+ first_block, size = params.values_at :first_block, :size
607
+ raise ArgumentError, 'must specify first_block' unless first_block
608
+ @bat = bat
609
+ self.first_block = first_block
610
+ # we now cache the blocks chain, for faster resizing.
611
+ @blocks = @bat.chain first_block
612
+ super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
613
+ end
614
+
615
+ def truncate size
616
+ # note that old_blocks is != @ranges.length necessarily. i'm planning to write a
617
+ # merge_ranges function that merges sequential ranges into one as an optimization.
618
+ @bat.resize_chain @blocks, size
619
+ @pos = size if @pos > size
620
+ self.ranges = @bat.ranges(@blocks, size)
621
+ self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first
622
+
623
+ # don't know if this is required, but we explicitly request our @io to grow if necessary
624
+ # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
625
+ # can be made.
626
+ # maybe its ok to just seek out there later??
627
+ max = @ranges.map { |pos, len| pos + len }.max || 0
628
+ @io.truncate max if max > @io.size
629
+ end
630
+ end
631
+
632
+ # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
633
+ # between bats based on size, and updating the dirent.
634
+ class RangesIOMigrateable < RangesIOResizeable
635
+ attr_reader :dirent
636
+ def initialize dirent, mode='r'
637
+ @dirent = dirent
638
+ super @dirent.ole.bat_for_size(@dirent.size), mode,
639
+ :first_block => @dirent.first_block, :size => @dirent.size
640
+ end
641
+
642
+ def truncate size
643
+ bat = @dirent.ole.bat_for_size size
644
+ if bat.class != @bat.class
645
+ # bat migration needed! we need to backup some data. the amount of data
646
+ # should be <= @ole.header.threshold, so we can just hold it all in one buffer.
647
+ # backup this
648
+ pos = [@pos, size].min
649
+ self.pos = 0
650
+ keep = read [@size, size].min
651
+ # this does a normal truncate to 0, removing our presence from the old bat, and
652
+ # rewrite the dirent's first_block
653
+ super 0
654
+ @bat = bat
655
+ # just change the underlying io from right under everyone :)
656
+ @io = bat.io
657
+ # important to do this now, before the write. as the below write will always
658
+ # migrate us back to sbat! this will now allocate us +size+ in the new bat.
659
+ super
660
+ self.pos = 0
661
+ write keep
662
+ self.pos = pos
663
+ else
664
+ super
665
+ end
666
+ # now just update the file
667
+ @dirent.size = size
668
+ end
669
+
670
+ # forward this to the dirent
671
+ def first_block
672
+ @dirent.first_block
673
+ end
674
+
675
+ def first_block= val
676
+ @dirent.first_block = val
677
+ end
678
+ end
679
+
680
+ #
681
+ # A class which wraps an ole directory entry. Can be either a directory
682
+ # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
683
+ #
684
+ # Most interaction with <tt>Ole::Storage</tt> is through this class.
685
+ # The 2 most important functions are <tt>Dirent#children</tt>, and
686
+ # <tt>Dirent#data</tt>.
687
+ #
688
+ # was considering separate classes for dirs and files. some methods/attrs only
689
+ # applicable to one or the other.
690
+ #
691
+ # As with the other classes, #to_s performs the serialization.
692
+ #
693
+ class Dirent < Struct.new(
694
+ :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
695
+ :clsid, :flags, # dirs only
696
+ :create_time_str, :modify_time_str, # files only
697
+ :first_block, :size, :reserved
698
+ )
699
+ include RecursivelyEnumerable
700
+
701
+ PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4'
702
+ SIZE = 128
703
+ TYPE_MAP = {
704
+ # this is temporary
705
+ 0 => :empty,
706
+ 1 => :dir,
707
+ 2 => :file,
708
+ 5 => :root
709
+ }
710
+ # something to do with the fact that the tree is supposed to be red-black
711
+ COLOUR_MAP = {
712
+ 0 => :red,
713
+ 1 => :black
714
+ }
715
+ # used in the next / prev / child stuff to show that the tree ends here.
716
+ # also used for first_block for directory.
717
+ EOT = 0xffffffff
718
+ DEFAULT = [
719
+ 0.chr * 2, 2, 0, # will get overwritten
720
+ 1, EOT, EOT, EOT,
721
+ 0.chr * 16, 0, nil, nil,
722
+ AllocationTable::EOC, 0, 0.chr * 4
723
+ ]
724
+
725
+ # This returns all the children of this +Dirent+. It is filled in
726
+ # when the tree structure is recreated.
727
+ attr_reader :children
728
+ attr_reader :name
729
+ attr_reader :ole, :type, :create_time, :modify_time
730
+ attr_reader :parent
731
+
732
+ # i think its just used by the tree building
733
+ attr_accessor :idx
734
+
735
+ # these are for internal use and are used for faster lookup.
736
+ attr_reader :name_lookup
737
+ attr_writer :parent
738
+ protected :name_lookup, :parent=
739
+
740
+ def initialize ole, values=DEFAULT, params={}
741
+ @ole = ole
742
+ values, params = DEFAULT, values if Hash === values
743
+ values = values.unpack(PACK) if String === values
744
+ super(*values)
745
+
746
+ # extra parsing from the actual struct values
747
+ @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len])
748
+ @type = if params[:type]
749
+ unless TYPE_MAP.values.include?(params[:type])
750
+ raise ArgumentError, "unknown type #{params[:type].inspect}"
751
+ end
752
+ params[:type]
753
+ else
754
+ TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
755
+ end
756
+
757
+ # further extra type specific stuff
758
+ if file?
759
+ default_time = @ole.params[:update_timestamps] ? Types::FileTime.now : nil
760
+ @create_time ||= default_time
761
+ @modify_time ||= default_time
762
+ @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
763
+ @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
764
+ @children = nil
765
+ @name_lookup = nil
766
+ else
767
+ @create_time = nil
768
+ @modify_time = nil
769
+ self.size = 0 unless @type == :root
770
+ @children = []
771
+ @name_lookup = {}
772
+ end
773
+
774
+ @parent = nil
775
+
776
+ # to silence warnings. used for tree building at load time
777
+ # only.
778
+ @idx = nil
779
+ end
780
+
781
+ def name= name
782
+ if @parent
783
+ map = @parent.instance_variable_get :@name_lookup
784
+ map.delete @name
785
+ map[name] = self
786
+ end
787
+ @name = name
788
+ end
789
+
790
+ def open mode='r'
791
+ raise Errno::EISDIR unless file?
792
+ io = RangesIOMigrateable.new self, mode
793
+ @modify_time = Types::FileTime.now if io.mode.writeable?
794
+ if block_given?
795
+ begin yield io
796
+ ensure; io.close
797
+ end
798
+ else io
799
+ end
800
+ end
801
+
802
+ def read limit=nil
803
+ open { |io| io.read limit }
804
+ end
805
+
806
+ def file?
807
+ type == :file
808
+ end
809
+
810
+ def dir?
811
+ # to count root as a dir.
812
+ !file?
813
+ end
814
+
815
+ # maybe need some options regarding case sensitivity.
816
+ def / name
817
+ @name_lookup[name]
818
+ end
819
+
820
+ def [] idx
821
+ if String === idx
822
+ #warn 'String form of Dirent#[] is deprecated'
823
+ self / idx
824
+ else
825
+ super
826
+ end
827
+ end
828
+
829
+ # move to ruby-msg. and remove from here
830
+ def time
831
+ #warn 'Dirent#time is deprecated'
832
+ create_time || modify_time
833
+ end
834
+
835
+ def each_child(&block)
836
+ @children.each(&block) if dir?
837
+ end
838
+
839
+ # flattens the tree starting from here into +dirents+. note it modifies its argument.
840
+ def flatten dirents=[]
841
+ @idx = dirents.length
842
+ dirents << self
843
+ if file?
844
+ self.prev = self.next = self.child = EOT
845
+ else
846
+ children.each { |child| child.flatten dirents }
847
+ self.child = Dirent.flatten_helper children
848
+ end
849
+ dirents
850
+ end
851
+
852
+ # i think making the tree structure optimized is actually more complex than this, and
853
+ # requires some intelligent ordering of the children based on names, but as long as
854
+ # it is valid its ok.
855
+ # actually, i think its ok. gsf for example only outputs a singly-linked-list, where
856
+ # prev is always EOT.
857
+ def self.flatten_helper children
858
+ return EOT if children.empty?
859
+ i = children.length / 2
860
+ this = children[i]
861
+ this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
862
+ this.idx
863
+ end
864
+
865
+ def to_s
866
+ tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
867
+ tmp = tmp[0, 62] if tmp.length > 62
868
+ tmp += 0.chr * 2
869
+ self.name_len = tmp.length
870
+ self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
871
+ # type_id can perhaps be set in the initializer, as its read only now.
872
+ self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
873
+ # for the case of files, it is assumed that that was handled already
874
+ # note not dir?, so as not to override root's first_block
875
+ self.first_block = Dirent::EOT if type == :dir
876
+ if file?
877
+ # this is messed up. it changes the time stamps regardless of whether the file
878
+ # was actually touched. instead, any open call with a writeable mode, should update
879
+ # the modify time. create time would be set in new.
880
+ if @ole.params[:update_timestamps]
881
+ self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
882
+ self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
883
+ end
884
+ else
885
+ self.create_time_str = 0.chr * 8
886
+ self.modify_time_str = 0.chr * 8
887
+ end
888
+ to_a.pack PACK
889
+ end
890
+
891
+ def inspect
892
+ str = "#<Dirent:#{name.inspect}"
893
+ # perhaps i should remove the data snippet. its not that useful anymore.
894
+ # there is also some dir specific stuff. like clsid, flags, that i should
895
+ # probably include
896
+ if file?
897
+ tmp = read 9
898
+ data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
899
+ str << " size=#{size}" +
900
+ "#{modify_time ? ' modify_time=' + modify_time.to_s.inspect : nil}" +
901
+ " data=#{data.inspect}"
902
+ end
903
+ str + '>'
904
+ end
905
+
906
+ def << child
907
+ child.parent = self
908
+ @name_lookup[child.name] = child
909
+ @children << child
910
+ end
911
+
912
+ # remove the Dirent +child+ from the children array, truncating the data
913
+ # by default.
914
+ def delete child, truncate=true
915
+ # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
916
+ unless @children.delete(child)
917
+ raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}"
918
+ end
919
+ @name_lookup.delete(child.name)
920
+ child.parent = nil
921
+ # free our blocks
922
+ child.open { |io| io.truncate 0 } if child.file?
923
+ end
924
+
925
+ def self.copy src, dst
926
+ # copies the contents of src to dst. must be the same type. this will throw an
927
+ # error on copying to root. maybe this will recurse too much for big documents??
928
+ raise ArgumentError, 'differing types' if src.file? and !dst.file?
929
+ dst.name = src.name
930
+ if src.dir?
931
+ src.children.each do |src_child|
932
+ dst_child = Dirent.new dst.ole, :type => src_child.type
933
+ dst << dst_child
934
+ Dirent.copy src_child, dst_child
935
+ end
936
+ else
937
+ src.open do |src_io|
938
+ dst.open { |dst_io| IO.copy src_io, dst_io }
939
+ end
940
+ end
941
+ end
942
+ end
943
+ end
944
+ end
945
+