ruby-ole 1.2.6 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,949 +1,3 @@
1
- require 'tempfile'
2
-
3
- require 'ole/base'
4
- require 'ole/types'
5
- require 'ole/ranges_io'
6
-
7
- module Ole # :nodoc:
8
- #
9
- # = Introduction
10
- #
11
- # <tt>Ole::Storage</tt> is a class intended to abstract away details of the
12
- # access to OLE2 structured storage files, such as those produced by
13
- # Microsoft Office, eg *.doc, *.msg etc.
14
- #
15
- # = Usage
16
- #
17
- # Usage should be fairly straight forward:
18
- #
19
- # # get the parent ole storage object
20
- # ole = Ole::Storage.open 'myfile.msg', 'r+'
21
- # # => #<Ole::Storage io=#<File:myfile.msg> root=#<Dirent:"Root Entry">>
22
- # # read some data
23
- # ole.root[1].read 4
24
- # # => "\001\000\376\377"
25
- # # get the top level root object and output a tree structure for
26
- # # debugging
27
- # puts ole.root.to_tree
28
- # # =>
29
- # - #<Dirent:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
30
- # |- #<Dirent:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
31
- # | |- #<Dirent:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
32
- # ...
33
- # |- #<Dirent:"__substg1.0_8002001E" size=4 data="MTEuMA==">
34
- # |- #<Dirent:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
35
- # \- #<Dirent:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
36
- # |- #<Dirent:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
37
- # ...
38
- # # write some data, and finish up (note that open is 'r+', so this overwrites
39
- # # but doesn't truncate)
40
- # ole.root["\001CompObj"].open { |f| f.write "blah blah" }
41
- # ole.close
42
- #
43
- # = Thanks
44
- #
45
- # * The code contained in this project was initially based on chicago's libole
46
- # (source available at http://prdownloads.sf.net/chicago/ole.tgz).
47
- #
48
- # * It was later augmented with some corrections by inspecting pole, and (purely
49
- # for header definitions) gsf.
50
- #
51
- # * The property set parsing code came from the apache java project POIFS.
52
- #
53
- # * The excellent idea for using a pseudo file system style interface by providing
54
- # #file and #dir methods which mimic File and Dir, was borrowed (along with almost
55
- # unchanged tests!) from Thomas Sondergaard's rubyzip.
56
- #
57
- # = TODO
58
- #
59
- # * the custom header cruft for Header and Dirent needs some love.
60
- # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
61
- # and, in a manner of speaking, but arguably different, Storage itself.
62
- # they have differing api's which would be nice to rethink.
63
- # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
64
- #
65
- class Storage
66
- # thrown for any bogus OLE file errors.
67
- class FormatError < StandardError # :nodoc:
68
- end
69
-
70
- VERSION = '1.2.6'
71
-
72
- # options used at creation time
73
- attr_reader :params
74
- # The top of the ole tree structure
75
- attr_reader :root
76
- # The tree structure in its original flattened form. only valid after #load, or #flush.
77
- attr_reader :dirents
78
- # The underlying io object to/from which the ole object is serialized, whether we
79
- # should close it, and whether it is writeable
80
- attr_reader :io, :close_parent, :writeable
81
- # Low level internals, you probably shouldn't need to mess with these
82
- attr_reader :header, :bbat, :sbat, :sb_file
83
-
84
- # maybe include an option hash, and allow :close_parent => true, to be more general.
85
- # +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
86
- def initialize arg, mode=nil, params={}
87
- params, mode = mode, nil if Hash === mode
88
- params = {:update_timestamps => true}.merge(params)
89
- @params = params
90
-
91
- # get the io object
92
- @close_parent, @io = if String === arg
93
- mode ||= 'rb'
94
- [true, open(arg, mode)]
95
- else
96
- raise ArgumentError, 'unable to specify mode string with io object' if mode
97
- [false, arg]
98
- end
99
- # do we have this file opened for writing? don't know of a better way to tell
100
- # (unless we parse the mode string in the open case)
101
- # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
102
- # reason to use mode string parsing when available, and fall back to something like
103
- # io.writeable? otherwise.
104
- @writeable = begin
105
- if mode
106
- IO::Mode.new(mode).writeable?
107
- else
108
- @io.flush
109
- true
110
- end
111
- rescue IOError
112
- false
113
- end
114
- # silence undefined warning in clear
115
- @sb_file = nil
116
- # if the io object has data, we should load it, otherwise start afresh
117
- # this should be based on the mode string rather.
118
- @io.size > 0 ? load : clear
119
- end
120
-
121
- def self.open arg, mode=nil, params={}
122
- ole = new arg, mode, params
123
- if block_given?
124
- begin yield ole
125
- ensure; ole.close
126
- end
127
- else ole
128
- end
129
- end
130
-
131
- # load document from file.
132
- #
133
- # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
134
- #
135
- # 1. reterminate any chain not ending in EOC.
136
- # compare file size with actually allocated blocks per file.
137
- # 2. pass through all chain heads looking for collisions, and making sure nothing points to them
138
- # (ie they are really heads). in both sbat and mbat
139
- # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
140
- # in the bat for them.
141
- # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
142
- # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
143
- # will be automatically thrown away at close time.
144
- def load
145
- # we always read 512 for the header block. if the block size ends up being different,
146
- # what happens to the 109 fat entries. are there more/less entries?
147
- @io.rewind
148
- header_block = @io.read 512
149
- @header = Header.new header_block
150
-
151
- # create an empty bbat.
152
- @bbat = AllocationTable::Big.new self
153
- mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start }
154
- bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'V*'
155
- # am i using num_bat in the right way?
156
- @bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
157
-
158
- # get block chain for directories, read it, then split it into chunks and load the
159
- # directory entries. semantics changed - used to cut at first dir where dir.type == 0
160
- @dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo).
161
- map { |str| Dirent.new self, str }.reject { |d| d.type_id == 0 }
162
-
163
- # now reorder from flat into a tree
164
- # links are stored in some kind of balanced binary tree
165
- # check that everything is visited at least, and at most once
166
- # similarly with the blocks of the file.
167
- # was thinking of moving this to Dirent.to_tree instead.
168
- class << @dirents
169
- def to_tree idx=0
170
- return [] if idx == Dirent::EOT
171
- d = self[idx]
172
- d.children = to_tree d.child
173
- raise FormatError, "directory #{d.inspect} used twice" if d.idx
174
- d.idx = idx
175
- to_tree(d.prev) + [d] + to_tree(d.next)
176
- end
177
- end
178
-
179
- @root = @dirents.to_tree.first
180
- Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
181
- unused = @dirents.reject(&:idx).length
182
- Log.warn "#{unused} unused directories" if unused > 0
183
-
184
- # FIXME i don't currently use @header.num_sbat which i should
185
- # hmm. nor do i write it. it means what exactly again?
186
- # which mode to use here?
187
- @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
188
- @sbat = AllocationTable::Small.new self
189
- @sbat.load @bbat.read(@header.sbat_start)
190
- end
191
-
192
- def close
193
- @sb_file.close
194
- flush if @writeable
195
- @io.close if @close_parent
196
- end
197
-
198
- # the flush method is the main "save" method. all file contents are always
199
- # written directly to the file by the RangesIO objects, all this method does
200
- # is write out all the file meta data - dirents, allocation tables, file header
201
- # etc.
202
- #
203
- # maybe add an option to zero the padding, and any remaining avail blocks in the
204
- # allocation table.
205
- #
206
- # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
207
- # of bbat to AllocationTable::Big.
208
- def flush
209
- # update root dirent, and flatten dirent tree
210
- @root.name = 'Root Entry'
211
- @root.first_block = @sb_file.first_block
212
- @root.size = @sb_file.size
213
- @dirents = @root.flatten
214
-
215
- # serialize the dirents using the bbat
216
- RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
217
- @dirents.each { |dirent| io.write dirent.to_s }
218
- padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
219
- io.write 0.chr * padding
220
- @header.dirent_start = io.first_block
221
- end
222
-
223
- # serialize the sbat
224
- # perhaps the blocks used by the sbat should be marked with BAT?
225
- RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
226
- io.write @sbat.to_s
227
- @header.sbat_start = io.first_block
228
- @header.num_sbat = @bbat.chain(@header.sbat_start).length
229
- end
230
-
231
- # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
232
- # truncate. then when its time to write, convert that chain and some chunk of blocks at
233
- # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
234
- # done.
235
- # this is perhaps not good, as we reclaim all bat blocks here, which
236
- # may include the sbat we just wrote. FIXME
237
- @bbat.map! do |b|
238
- b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
239
- AllocationTable::AVAIL : b
240
- end
241
-
242
- # currently we use a loop. this could be better, but basically,
243
- # the act of writing out the bat, itself requires blocks which get
244
- # recorded in the bat.
245
- #
246
- # i'm sure that there'd be some simpler closed form solution to this. solve
247
- # recursive func:
248
- #
249
- # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
250
- # bbat_len = initial_bbat_len + num_mbat_blocks
251
- # mbat_len = ceil(bbat_len * 4 / block_size)
252
- #
253
- # the actual bbat allocation table is itself stored throughout the file, and that chain
254
- # is stored in the initial blocks, and the mbat blocks.
255
- num_mbat_blocks = 0
256
- io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
257
- # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
258
- # contiguous chunk at the end.
259
- # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
260
- # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
261
- # be fixed easily, add an io truncate
262
- @bbat.truncate!
263
- before = @io.size
264
- @io.truncate @bbat.block_size * (@bbat.length + 1)
265
- while true
266
- # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
267
- # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
268
- # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
269
- # mbat must remain contiguous.
270
- bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
271
- # now storing the excess mbat blocks also increases the size of the bbat:
272
- new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / @bbat.block_size.to_f).ceil
273
- if new_num_mbat_blocks != num_mbat_blocks
274
- # need more space for the mbat.
275
- num_mbat_blocks = new_num_mbat_blocks
276
- elsif io.size != bbat_data_len
277
- # need more space for the bat
278
- # this may grow the bbat, depending on existing available blocks
279
- io.truncate bbat_data_len
280
- else
281
- break
282
- end
283
- end
284
-
285
- # now extract the info we want:
286
- ranges = io.ranges
287
- bbat_chain = @bbat.chain io.first_block
288
- # the extra mbat data is a set of contiguous blocks at the end
289
- io.close
290
- bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
291
- # tack on the mbat stuff
292
- @header.mbat_start = @bbat.length # need to record this here before tacking on the mbat
293
- @header.num_bat = bbat_chain.length
294
- num_mbat_blocks.times { @bbat << AllocationTable::META_BAT }
295
-
296
- # now finally write the bbat, using a not resizable io.
297
- # the mode here will be 'r', which allows write atm.
298
- RangesIO.open(@io, :ranges => ranges) { |f| f.write @bbat.to_s }
299
-
300
- # this is the mbat. pad it out.
301
- bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
302
- @header.num_mbat = num_mbat_blocks
303
- if num_mbat_blocks == 0
304
- @header.mbat_start = AllocationTable::EOC
305
- else
306
- # write out the mbat blocks now. first of all, where are they going to be?
307
- mbat_data = bbat_chain[109..-1]
308
- q = @bbat.block_size / 4
309
- mbat_data += [AllocationTable::AVAIL] *((mbat_data.length / q.to_f).ceil * q - mbat_data.length)
310
- ranges = @bbat.ranges((0...num_mbat_blocks).map { |i| @header.mbat_start + i })
311
- RangesIO.open(@io, :ranges => ranges) { |f| f.write mbat_data.pack('V*') }
312
- end
313
-
314
- # now seek back and write the header out
315
- @io.seek 0
316
- @io.write @header.to_s + bbat_chain[0, 109].pack('V*')
317
- @io.flush
318
- end
319
-
320
- def clear
321
- # initialize to equivalent of loading an empty ole document.
322
- Log.warn 'creating new ole storage object on non-writable io' unless @writeable
323
- @header = Header.new
324
- @bbat = AllocationTable::Big.new self
325
- @root = Dirent.new self, :type => :root, :name => 'Root Entry'
326
- @dirents = [@root]
327
- @root.idx = 0
328
- @sb_file.close if @sb_file
329
- @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
330
- @sbat = AllocationTable::Small.new self
331
- # throw everything else the hell away
332
- @io.truncate 0
333
- end
334
-
335
- # could be useful with mis-behaving ole documents. or to just clean them up.
336
- def repack temp=:file
337
- case temp
338
- when :file
339
- Tempfile.open 'ole-repack' do |io|
340
- io.binmode
341
- repack_using_io io
342
- end
343
- when :mem; StringIO.open(&method(:repack_using_io))
344
- else raise ArgumentError, "unknown temp backing #{temp.inspect}"
345
- end
346
- end
347
-
348
- def repack_using_io temp_io
349
- @io.rewind
350
- IO.copy @io, temp_io
351
- clear
352
- Storage.open temp_io, nil, @params do |temp_ole|
353
- #temp_ole.root.type = :dir
354
- Dirent.copy temp_ole.root, root
355
- end
356
- end
357
-
358
- def bat_for_size size
359
- # note >=, not > previously.
360
- size >= @header.threshold ? @bbat : @sbat
361
- end
362
-
363
- def inspect
364
- "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
365
- end
366
-
367
- #
368
- # A class which wraps the ole header
369
- #
370
- # Header.new can be both used to load from a string, or to create from
371
- # defaults. Serialization is accomplished with the #to_s method.
372
- #
373
- class Header < Struct.new(
374
- :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
375
- :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
376
- :sbat_start, :num_sbat, :mbat_start, :num_mbat
377
- )
378
- PACK = 'a8 a16 v2 a2 v2 a6 V3 a4 V5'
379
- SIZE = 0x4c
380
- # i have seen it pointed out that the first 4 bytes of hex,
381
- # 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
382
- MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
383
- # what you get if creating new header from scratch.
384
- # AllocationTable::EOC isn't available yet. meh.
385
- EOC = 0xfffffffe
386
- DEFAULT = [
387
- MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
388
- 0.chr * 6, 0, 1, EOC, 0.chr * 4,
389
- 4096, EOC, 0, EOC, 0
390
- ]
391
-
392
- def initialize values=DEFAULT
393
- values = values.unpack(PACK) if String === values
394
- super(*values)
395
- validate!
396
- end
397
-
398
- def to_s
399
- to_a.pack PACK
400
- end
401
-
402
- def validate!
403
- raise FormatError, "OLE2 signature is invalid" unless magic == MAGIC
404
- if num_bat == 0 or # is that valid for a completely empty file?
405
- # not sure about this one. basically to do max possible bat given size of mbat
406
- num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
407
- # shouldn't need to use the mbat as there is enough space in the header block
408
- num_bat < 109 && num_mbat != 0 or
409
- # given the size of the header is 76, if b_shift <= 6, blocks address the header.
410
- s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
411
- # we only handle little endian
412
- byte_order != "\xfe\xff"
413
- raise FormatError, "not valid OLE2 structured storage file"
414
- end
415
- # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
416
- # 3 for this value.
417
- # transacting_signature != "\x00" * 4 or
418
- if threshold != 4096 or
419
- num_mbat == 0 && mbat_start != AllocationTable::EOC or
420
- reserved != "\x00" * 6
421
- Log.warn "may not be a valid OLE2 structured storage file"
422
- end
423
- true
424
- end
425
- end
426
-
427
- #
428
- # +AllocationTable+'s hold the chains corresponding to files. Given
429
- # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
430
- # the blocks that make up that file.
431
- #
432
- # There are 2 allocation tables, the bbat, and sbat, for big and small
433
- # blocks respectively. The block chain should be loaded using either
434
- # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
435
- # as appropriate.
436
- #
437
- # Whether or not big or small blocks are used for a file depends on
438
- # whether its size is over the <tt>Header#threshold</tt> level.
439
- #
440
- # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
441
- # which are stored in blocks throughout the file. The blocks are either
442
- # big or small, and are accessed using the <tt>AllocationTable</tt>.
443
- #
444
- # The bbat allocation table's data is stored in the spare room in the header
445
- # block, and in extra blocks throughout the file as referenced by the meta
446
- # bat. That chain is linear, as there is no higher level table.
447
- #
448
- # AllocationTable.new is used to create an empty table. It can parse a string
449
- # with the #load method. Serialization is accomplished with the #to_s method.
450
- #
451
- class AllocationTable < Array
452
- # a free block (I don't currently leave any blocks free), although I do pad out
453
- # the allocation table with AVAIL to the block size.
454
- AVAIL = 0xffffffff
455
- EOC = 0xfffffffe # end of a chain
456
- # these blocks are used for storing the allocation table chains
457
- BAT = 0xfffffffd
458
- META_BAT = 0xfffffffc
459
-
460
- attr_reader :ole, :io, :block_size
461
- def initialize ole
462
- @ole = ole
463
- @sparse = true
464
- super()
465
- end
466
-
467
- def load data
468
- replace data.unpack('V*')
469
- end
470
-
471
- def truncate
472
- # this strips trailing AVAILs. come to think of it, this has the potential to break
473
- # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
474
- # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
475
- # at load time.
476
- temp = reverse
477
- not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
478
- temp.reverse
479
- end
480
-
481
- def truncate!
482
- replace truncate
483
- end
484
-
485
- def to_s
486
- table = truncate
487
- # pad it out some
488
- num = @ole.bbat.block_size / 4
489
- # do you really use AVAIL? they probably extend past end of file, and may shortly
490
- # be used for the bat. not really good.
491
- table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
492
- table.pack 'V*'
493
- end
494
-
495
- # rewrote this to be non-recursive as it broke on a large attachment
496
- # chain with a stack error
497
- def chain idx
498
- a = []
499
- until idx >= META_BAT
500
- raise FormatError, "broken allocationtable chain" if idx < 0 || idx > length
501
- a << idx
502
- idx = self[idx]
503
- end
504
- Log.warn "invalid chain terminator #{idx}" unless idx == EOC
505
- a
506
- end
507
-
508
- # Turn a chain (an array given by +chain+) of blocks (optionally
509
- # truncated to +size+) into an array of arrays describing the stretches of
510
- # bytes in the file that it belongs to.
511
- #
512
- # The blocks are Big or Small blocks depending on the table type.
513
- def blocks_to_ranges chain, size=nil
514
- # truncate the chain if required
515
- chain = chain[0...(size.to_f / block_size).ceil] if size
516
- # convert chain to ranges of the block size
517
- ranges = chain.map { |i| [block_size * i, block_size] }
518
- # truncate final range if required
519
- ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
520
- ranges
521
- end
522
-
523
- def ranges chain, size=nil
524
- chain = self.chain(chain) unless Array === chain
525
- blocks_to_ranges chain, size
526
- end
527
-
528
- # quick shortcut. chain can be either a head (in which case the table is used to
529
- # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
530
- def open chain, size=nil, &block
531
- RangesIO.open @io, :ranges => ranges(chain, size), &block
532
- end
533
-
534
- def read chain, size=nil
535
- open chain, size, &:read
536
- end
537
-
538
- # catch any method that may add an AVAIL somewhere in the middle, thus invalidating
539
- # the @sparse speedup for free_block. annoying using eval, but define_method won't
540
- # work for this.
541
- # FIXME
542
- [:map!, :collect!].each do |name|
543
- eval <<-END
544
- def #{name}(*args, &block)
545
- @sparse = true
546
- super
547
- end
548
- END
549
- end
550
-
551
- def []= idx, val
552
- @sparse = true if val == AVAIL
553
- super
554
- end
555
-
556
- def free_block
557
- if @sparse
558
- i = index(AVAIL) and return i
559
- end
560
- @sparse = false
561
- push AVAIL
562
- length - 1
563
- end
564
-
565
- # must return first_block
566
- def resize_chain blocks, size
567
- new_num_blocks = (size / block_size.to_f).ceil
568
- old_num_blocks = blocks.length
569
- if new_num_blocks < old_num_blocks
570
- # de-allocate some of our old blocks. TODO maybe zero them out in the file???
571
- (new_num_blocks...old_num_blocks).each { |i| self[blocks[i]] = AVAIL }
572
- self[blocks[new_num_blocks-1]] = EOC if new_num_blocks > 0
573
- blocks.slice! new_num_blocks..-1
574
- elsif new_num_blocks > old_num_blocks
575
- # need some more blocks.
576
- last_block = blocks.last
577
- (new_num_blocks - old_num_blocks).times do
578
- block = free_block
579
- # connect the chain. handle corner case of blocks being [] initially
580
- self[last_block] = block if last_block
581
- blocks << block
582
- last_block = block
583
- self[last_block] = EOC
584
- end
585
- end
586
- # update ranges, and return that also now
587
- blocks
588
- end
589
-
590
- class Big < AllocationTable
591
- def initialize(*args)
592
- super
593
- @block_size = 1 << @ole.header.b_shift
594
- @io = @ole.io
595
- end
596
-
597
- # Big blocks are kind of -1 based, in order to not clash with the header.
598
- def blocks_to_ranges blocks, size
599
- super blocks.map { |b| b + 1 }, size
600
- end
601
- end
602
-
603
- class Small < AllocationTable
604
- def initialize(*args)
605
- super
606
- @block_size = 1 << @ole.header.s_shift
607
- @io = @ole.sb_file
608
- end
609
- end
610
- end
611
-
612
- # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
613
- # AllocationTable, and can be resized. used for read/write to 2 streams:
614
- # 1. serialized dirent data
615
- # 2. sbat table data
616
- # 3. all dirents but through RangesIOMigrateable below
617
- #
618
- # Note that all internal access to first_block is through accessors, as it is sometimes
619
- # useful to redirect it.
620
- class RangesIOResizeable < RangesIO
621
- attr_reader :bat
622
- attr_accessor :first_block
623
- def initialize bat, mode='r', params={}
624
- mode, params = 'r', mode if Hash === mode
625
- first_block, size = params.values_at :first_block, :size
626
- raise ArgumentError, 'must specify first_block' unless first_block
627
- @bat = bat
628
- self.first_block = first_block
629
- # we now cache the blocks chain, for faster resizing.
630
- @blocks = @bat.chain first_block
631
- super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
632
- end
633
-
634
- def truncate size
635
- # note that old_blocks is != @ranges.length necessarily. i'm planning to write a
636
- # merge_ranges function that merges sequential ranges into one as an optimization.
637
- @bat.resize_chain @blocks, size
638
- @ranges = @bat.ranges @blocks, size
639
- @pos = @size if @pos > size
640
- self.first_block = @blocks.empty? ? AllocationTable::EOC : @blocks.first
641
-
642
- # don't know if this is required, but we explicitly request our @io to grow if necessary
643
- # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
644
- # can be made.
645
- # maybe its ok to just seek out there later??
646
- max = @ranges.map { |pos, len| pos + len }.max || 0
647
- @io.truncate max if max > @io.size
648
-
649
- @size = size
650
- end
651
- end
652
-
653
- # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
654
- # between bats based on size, and updating the dirent.
655
- class RangesIOMigrateable < RangesIOResizeable
656
- attr_reader :dirent
657
- def initialize dirent, mode='r'
658
- @dirent = dirent
659
- super @dirent.ole.bat_for_size(@dirent.size), mode,
660
- :first_block => @dirent.first_block, :size => @dirent.size
661
- end
662
-
663
- def truncate size
664
- bat = @dirent.ole.bat_for_size size
665
- if bat.class != @bat.class
666
- # bat migration needed! we need to backup some data. the amount of data
667
- # should be <= @ole.header.threshold, so we can just hold it all in one buffer.
668
- # backup this
669
- pos = @pos
670
- @pos = 0
671
- keep = read [@size, size].min
672
- # this does a normal truncate to 0, removing our presence from the old bat, and
673
- # rewrite the dirent's first_block
674
- super 0
675
- @bat = bat
676
- # just change the underlying io from right under everyone :)
677
- @io = bat.io
678
- # important to do this now, before the write. as the below write will always
679
- # migrate us back to sbat! this will now allocate us +size+ in the new bat.
680
- super
681
- @pos = 0
682
- write keep
683
- @pos = pos
684
- else
685
- super
686
- end
687
- # now just update the file
688
- @dirent.size = size
689
- end
690
-
691
- # forward this to the dirent
692
- def first_block
693
- @dirent.first_block
694
- end
695
-
696
- def first_block= val
697
- @dirent.first_block = val
698
- end
699
- end
700
-
701
- #
702
- # A class which wraps an ole directory entry. Can be either a directory
703
- # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
704
- #
705
- # Most interaction with <tt>Ole::Storage</tt> is through this class.
706
- # The 2 most important functions are <tt>Dirent#children</tt>, and
707
- # <tt>Dirent#data</tt>.
708
- #
709
- # was considering separate classes for dirs and files. some methods/attrs only
710
- # applicable to one or the other.
711
- #
712
- # As with the other classes, #to_s performs the serialization.
713
- #
714
- class Dirent < Struct.new(
715
- :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
716
- :clsid, :flags, # dirs only
717
- :create_time_str, :modify_time_str, # files only
718
- :first_block, :size, :reserved
719
- )
720
- include RecursivelyEnumerable
721
-
722
- PACK = 'a64 v C C V3 a16 V a8 a8 V2 a4'
723
- SIZE = 128
724
- TYPE_MAP = {
725
- # this is temporary
726
- 0 => :empty,
727
- 1 => :dir,
728
- 2 => :file,
729
- 5 => :root
730
- }
731
- # something to do with the fact that the tree is supposed to be red-black
732
- COLOUR_MAP = {
733
- 0 => :red,
734
- 1 => :black
735
- }
736
- # used in the next / prev / child stuff to show that the tree ends here.
737
- # also used for first_block for directory.
738
- EOT = 0xffffffff
739
- DEFAULT = [
740
- 0.chr * 2, 2, 0, # will get overwritten
741
- 1, EOT, EOT, EOT,
742
- 0.chr * 16, 0, nil, nil,
743
- AllocationTable::EOC, 0, 0.chr * 4
744
- ]
745
-
746
- # i think its just used by the tree building
747
- attr_accessor :idx
748
- # This returns all the children of this +Dirent+. It is filled in
749
- # when the tree structure is recreated.
750
- attr_accessor :children
751
- attr_accessor :name
752
- attr_reader :ole, :type, :create_time, :modify_time
753
- def initialize ole, values=DEFAULT, params={}
754
- @ole = ole
755
- values, params = DEFAULT, values if Hash === values
756
- values = values.unpack(PACK) if String === values
757
- super(*values)
758
-
759
- # extra parsing from the actual struct values
760
- @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len])
761
- @type = if params[:type]
762
- unless TYPE_MAP.values.include?(params[:type])
763
- raise ArgumentError, "unknown type #{params[:type].inspect}"
764
- end
765
- params[:type]
766
- else
767
- TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
768
- end
769
-
770
- # further extra type specific stuff
771
- if file?
772
- default_time = @ole.params[:update_timestamps] ? Time.now : nil
773
- @create_time ||= default_time
774
- @modify_time ||= default_time
775
- @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
776
- @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
777
- @children = nil
778
- else
779
- @create_time = nil
780
- @modify_time = nil
781
- self.size = 0 unless @type == :root
782
- @children = []
783
- end
784
-
785
- # to silence warnings. used for tree building at load time
786
- # only.
787
- @idx = nil
788
- end
789
-
790
- def open mode='r'
791
- raise Errno::EISDIR unless file?
792
- io = RangesIOMigrateable.new self, mode
793
- # TODO work on the mode string stuff a bit more.
794
- # maybe let the io object know about the mode, so it can refuse
795
- # to work for read/write appropriately. maybe redefine all unusable
796
- # methods using singleton class to throw errors.
797
- # for now, i just want to implement truncation on use of 'w'. later,
798
- # i need to do 'a' etc.
799
- case mode
800
- when 'r', 'r+'
801
- # as i don't enforce reading/writing, nothing changes here. kind of
802
- # need to enforce tt if i want modify times to work better.
803
- @modify_time = Time.now if mode == 'r+'
804
- when 'w'
805
- @modify_time = Time.now
806
- #io.truncate 0
807
- else
808
- raise NotImplementedError, "unsupported mode - #{mode.inspect}"
809
- end
810
- if block_given?
811
- begin yield io
812
- ensure; io.close
813
- end
814
- else io
815
- end
816
- end
817
-
818
- def read limit=nil
819
- open { |io| io.read limit }
820
- end
821
-
822
- def file?
823
- type == :file
824
- end
825
-
826
- def dir?
827
- # to count root as a dir.
828
- !file?
829
- end
830
-
831
- # maybe need some options regarding case sensitivity.
832
- def / name
833
- children.find { |child| name === child.name }
834
- end
835
-
836
- def [] idx
837
- if String === idx
838
- #warn 'String form of Dirent#[] is deprecated'
839
- self / idx
840
- else
841
- super
842
- end
843
- end
844
-
845
- # move to ruby-msg. and remove from here
846
- def time
847
- #warn 'Dirent#time is deprecated'
848
- create_time || modify_time
849
- end
850
-
851
- def each_child(&block)
852
- @children.each(&block)
853
- end
854
-
855
- # flattens the tree starting from here into +dirents+. note it modifies its argument.
856
- def flatten dirents=[]
857
- @idx = dirents.length
858
- dirents << self
859
- if file?
860
- self.prev = self.next = self.child = EOT
861
- else
862
- children.each { |child| child.flatten dirents }
863
- self.child = Dirent.flatten_helper children
864
- end
865
- dirents
866
- end
867
-
868
- # i think making the tree structure optimized is actually more complex than this, and
869
- # requires some intelligent ordering of the children based on names, but as long as
870
- # it is valid its ok.
871
- # actually, i think its ok. gsf for example only outputs a singly-linked-list, where
872
- # prev is always EOT.
873
- def self.flatten_helper children
874
- return EOT if children.empty?
875
- i = children.length / 2
876
- this = children[i]
877
- this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
878
- this.idx
879
- end
880
-
881
- def to_s
882
- tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
883
- tmp = tmp[0, 62] if tmp.length > 62
884
- tmp += 0.chr * 2
885
- self.name_len = tmp.length
886
- self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
887
- # type_id can perhaps be set in the initializer, as its read only now.
888
- self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
889
- # for the case of files, it is assumed that that was handled already
890
- # note not dir?, so as not to override root's first_block
891
- self.first_block = Dirent::EOT if type == :dir
892
- if file?
893
- # this is messed up. it changes the time stamps regardless of whether the file
894
- # was actually touched. instead, any open call with a writeable mode, should update
895
- # the modify time. create time would be set in new.
896
- if @ole.params[:update_timestamps]
897
- self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
898
- self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
899
- end
900
- else
901
- self.create_time_str = 0.chr * 8
902
- self.modify_time_str = 0.chr * 8
903
- end
904
- to_a.pack PACK
905
- end
906
-
907
- def inspect
908
- str = "#<Dirent:#{name.inspect}"
909
- # perhaps i should remove the data snippet. its not that useful anymore.
910
- if file?
911
- tmp = read 9
912
- data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
913
- str << " size=#{size}" +
914
- "#{modify_time ? ' modify_time=' + modify_time.to_s.inspect : nil}" +
915
- " data=#{data.inspect}"
916
- else
917
- # there is some dir specific stuff. like clsid, flags.
918
- end
919
- str + '>'
920
- end
921
-
922
- def delete child
923
- # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
924
- raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
925
- # free our blocks
926
- child.open { |io| io.truncate 0 }
927
- end
928
-
929
- def self.copy src, dst
930
- # copies the contents of src to dst. must be the same type. this will throw an
931
- # error on copying to root. maybe this will recurse too much for big documents??
932
- raise ArgumentError, 'differing types' if src.file? and !dst.file?
933
- dst.name = src.name
934
- if src.dir?
935
- src.children.each do |src_child|
936
- dst_child = Dirent.new dst.ole, :type => src_child.type
937
- dst.children << dst_child
938
- Dirent.copy src_child, dst_child
939
- end
940
- else
941
- src.open do |src_io|
942
- dst.open { |dst_io| IO.copy src_io, dst_io }
943
- end
944
- end
945
- end
946
- end
947
- end
948
- end
949
-
1
+ require 'ole/storage/base'
2
+ require 'ole/storage/file_system'
3
+ require 'ole/storage/meta_data'