ruby-msg 1.2.17.3 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,184 +0,0 @@
1
-
2
- # move to support?
3
- class IO # :nodoc:
4
- def self.copy src, dst
5
- until src.eof?
6
- buf = src.read(4096)
7
- dst.write buf
8
- end
9
- end
10
- end
11
-
12
- #
13
- # = Introduction
14
- #
15
- # +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
16
- # slices of the input file by providing a list of ranges. Intended as an initial measure to curb
17
- # inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
18
- # no method to stream it.
19
- #
20
- # This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
21
- # and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
22
- # getting 16 bytes doesn't read the whole thing).
23
- #
24
- # In the simplest case it can be used with a single range to provide a limited io to a section of
25
- # a file.
26
- #
27
- # = Limitations
28
- #
29
- # * No buffering. by design at the moment. Intended for large reads
30
- #
31
- # = TODO
32
- #
33
- # On further reflection, this class is something of a joining/optimization of
34
- # two separate IO classes. a SubfileIO, for providing access to a range within
35
- # a File as a separate IO object, and a ConcatIO, allowing the presentation of
36
- # a bunch of io objects as a single unified whole.
37
- #
38
- # I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
39
- # convert a whole mime message into an IO stream, that can be read from.
40
- # It will just be the concatenation of a series of IO objects, corresponding to
41
- # headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
42
- # original message proper, or RangesIO as provided by the Attachment#data, that
43
- # will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
44
- # fly. Thus the attachment, in its plain or encoded form, and the message as a
45
- # whole never exists as a single string in memory, as it does now. This is a
46
- # fair bit of work to achieve, but generally useful I believe.
47
- #
48
- # This class isn't ole specific, maybe move it to my general ruby stream project.
49
- #
50
- class RangesIO
51
- attr_reader :io, :ranges, :size, :pos
52
- # +io+ is the parent io object that we are wrapping.
53
- #
54
- # +ranges+ are byte offsets, either
55
- # 1. an array of ranges [1..2, 4..5, 6..8] or
56
- # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
57
- # (think the way String indexing works)
58
- # The +ranges+ provide sequential slices of the file that will be read. they can overlap.
59
- def initialize io, ranges, opts={}
60
- @opts = {:close_parent => false}.merge opts
61
- @io = io
62
- # convert ranges to arrays. check for negative ranges?
63
- @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
64
- # calculate size
65
- @size = @ranges.inject(0) { |total, (pos, len)| total + len }
66
- # initial position in the file
67
- @pos = 0
68
- end
69
-
70
- def pos= pos, whence=IO::SEEK_SET
71
- # FIXME support other whence values
72
- raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
73
- # just a simple pos calculation. invalidate buffers if we had them
74
- @pos = pos
75
- end
76
-
77
- alias seek :pos=
78
- alias tell :pos
79
-
80
- def close
81
- @io.close if @opts[:close_parent]
82
- end
83
-
84
- def range_and_offset pos
85
- off = nil
86
- r = ranges.inject(0) do |total, r|
87
- to = total + r[1]
88
- if pos <= to
89
- off = pos - total
90
- break r
91
- end
92
- to
93
- end
94
- # should be impossible for any valid pos, (0...size) === pos
95
- raise "unable to find range for pos #{pos.inspect}" unless off
96
- [r, off]
97
- end
98
-
99
- def eof?
100
- @pos == @size
101
- end
102
-
103
- # read bytes from file, to a maximum of +limit+, or all available if unspecified.
104
- def read limit=nil
105
- data = ''
106
- limit ||= size
107
- # special case eof
108
- return data if eof?
109
- r, off = range_and_offset @pos
110
- i = ranges.index r
111
- # this may be conceptually nice (create sub-range starting where we are), but
112
- # for a large range array its pretty wasteful. even the previous way was. but
113
- # i'm not trying to optimize this atm. it may even go to c later if necessary.
114
- ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
115
- @io.seek pos
116
- if limit < len
117
- # FIXME this += isn't correct if there is a read error
118
- # or something.
119
- @pos += limit
120
- break data << @io.read(limit)
121
- end
122
- # this can also stuff up. if the ranges are beyond the size of the file, we can get
123
- # nil here.
124
- data << @io.read(len)
125
- @pos += len
126
- limit -= len
127
- end
128
- data
129
- end
130
-
131
- # you may override this call to update @ranges and @size, if applicable. then write
132
- # support can grow below
133
- def truncate size
134
- raise NotImplementedError, 'truncate not supported'
135
- end
136
- # why not? :)
137
- alias size= :truncate
138
-
139
- def write data
140
- # short cut. needed because truncate 0 may return no ranges, instead of empty range,
141
- # thus range_and_offset fails.
142
- return 0 if data.empty?
143
- data_pos = 0
144
- # if we don't have room, we can use the truncate hook to make more space.
145
- if data.length > @size - @pos
146
- begin
147
- truncate @pos + data.length
148
- rescue NotImplementedError
149
- # FIXME maybe warn instead, then just truncate the data?
150
- raise "unable to satisfy write of #{data.length} bytes"
151
- end
152
- end
153
- r, off = range_and_offset @pos
154
- i = ranges.index r
155
- ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
156
- @io.seek pos
157
- if data_pos + len > data.length
158
- chunk = data[data_pos..-1]
159
- @io.write chunk
160
- @pos += chunk.length
161
- data_pos = data.length
162
- break
163
- end
164
- @io.write data[data_pos, len]
165
- @pos += len
166
- data_pos += len
167
- end
168
- data_pos
169
- end
170
-
171
- # this will be generalised to a module later
172
- def each_read blocksize=4096
173
- yield read(blocksize) until eof?
174
- end
175
-
176
- def inspect
177
- # the rescue is for empty files
178
- pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
179
- range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
180
- "#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
181
- "current_range=#{range_str}>"
182
- end
183
- end
184
-
@@ -1,927 +0,0 @@
1
- #! /usr/bin/ruby -w
2
-
3
- $: << File.dirname(__FILE__) + '/..'
4
-
5
- require 'support'
6
-
7
- require 'stringio'
8
- require 'tempfile'
9
-
10
- require 'ole/base'
11
- require 'ole/types'
12
- # not strictly ole related
13
- require 'ole/io_helpers'
14
-
15
- module Ole # :nodoc:
16
- #
17
- # = Introduction
18
- #
19
- # <tt>Ole::Storage</tt> is a class intended to abstract away details of the
20
- # access to OLE2 structured storage files, such as those produced by
21
- # Microsoft Office, eg *.doc, *.msg etc.
22
- #
23
- # Initially based on chicago's libole, source available at
24
- # http://prdownloads.sf.net/chicago/ole.tgz
25
- # Later augmented with some corrections by inspecting pole, and (purely
26
- # for header definitions) gsf.
27
- #
28
- # = Usage
29
- #
30
- # Usage should be fairly straight forward:
31
- #
32
- # # get the parent ole storage object
33
- # ole = Ole::Storage.open 'myfile.msg', 'r+'
34
- # # => #<Ole::Storage io=#<File:myfile.msg> root=#<Dirent:"Root Entry">>
35
- # # read some data
36
- # ole.root[1].read 4
37
- # # => "\001\000\376\377"
38
- # # get the top level root object and output a tree structure for
39
- # # debugging
40
- # puts ole.root.to_tree
41
- # # =>
42
- # - #<Dirent:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
43
- # |- #<Dirent:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
44
- # | |- #<Dirent:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
45
- # ...
46
- # |- #<Dirent:"__substg1.0_8002001E" size=4 data="MTEuMA==">
47
- # |- #<Dirent:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
48
- # \- #<Dirent:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
49
- # |- #<Dirent:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
50
- # ...
51
- # # write some data, and finish up (note that open is 'r+', so this overwrites
52
- # # but doesn't truncate)
53
- # ole.root["\001CompObj"].open { |f| f.write "blah blah" }
54
- # ole.close
55
- #
56
- # = TODO
57
- #
58
- # 1. tests. lock down how things work at the moment - mostly good.
59
- # create from scratch works now, as does copying in a subtree of another doc, so
60
- # ole embedded attachment serialization works now. i can save embedded xls in an msg
61
- # into a separate file, and open it. this was a goal. now i would want to implemenet
62
- # to_mime conversion for embedded attachments, that serializes them to ole, but handles
63
- # some separately like various meta file types as plain .wmf attachments perhaps. this
64
- # will give pretty good .eml's from emails with embedded attachments.
65
- # the other todo is .rtf output, with full support for embedded ole objects...
66
- # 2. lots of tidying up
67
- # - main FIXME's in this regard are:
68
- # * the custom header cruft for Header and Dirent needs some love.
69
- # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
70
- # and, in a manner of speaking, but arguably different, Storage itself.
71
- # they have differing api's which would be nice to clean.
72
- # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
73
- # * ole types need work, can't serialize datetime at the moment.
74
- # 3. need to fix META_BAT support in #flush.
75
- #
76
- class Storage
77
- VERSION = '1.1.3'
78
-
79
- # The top of the ole tree structure
80
- attr_reader :root
81
- # The tree structure in its original flattened form. only valid after #load, or #flush.
82
- attr_reader :dirents
83
- # The underlying io object to/from which the ole object is serialized, whether we
84
- # should close it, and whether it is writeable
85
- attr_reader :io, :close_parent, :writeable
86
- # Low level internals, you probably shouldn't need to mess with these
87
- attr_reader :header, :bbat, :sbat, :sb_file
88
-
89
- # maybe include an option hash, and allow :close_parent => true, to be more general.
90
- # +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
91
- def initialize arg, mode=nil
92
- # get the io object
93
- @close_parent, @io = if String === arg
94
- [true, open(arg, mode || 'rb')]
95
- else
96
- raise 'unable to specify mode string with io object' if mode
97
- [false, arg]
98
- end
99
- # do we have this file opened for writing? don't know of a better way to tell
100
- @writeable = begin
101
- @io.flush
102
- true
103
- rescue IOError
104
- false
105
- end
106
- # silence undefined warning in clear
107
- @sb_file = nil
108
- # if the io object has data, we should load it, otherwise start afresh
109
- @io.size > 0 ? load : clear
110
- end
111
-
112
- def self.new arg, mode=nil
113
- ole = super
114
- if block_given?
115
- begin yield ole
116
- ensure; ole.close
117
- end
118
- else ole
119
- end
120
- end
121
-
122
- class << self
123
- # encouraged
124
- alias open :new
125
- # deprecated
126
- alias load :new
127
- end
128
-
129
- # load document from file.
130
- def load
131
- # we always read 512 for the header block. if the block size ends up being different,
132
- # what happens to the 109 fat entries. are there more/less entries?
133
- @io.rewind
134
- header_block = @io.read 512
135
- @header = Header.load header_block
136
-
137
- # create an empty bbat
138
- @bbat = AllocationTable::Big.new self
139
- # extra mbat blocks
140
- mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start }
141
- bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'L*'
142
- # am i using num_bat in the right way?
143
- @bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
144
-
145
- # get block chain for directories, read it, then split it into chunks and load the
146
- # directory entries. semantics changed - used to cut at first dir where dir.type == 0
147
- @dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo).
148
- map { |str| Dirent.load self, str }.reject { |d| d.type_id == 0 }
149
-
150
- # now reorder from flat into a tree
151
- # links are stored in some kind of balanced binary tree
152
- # check that everything is visited at least, and at most once
153
- # similarly with the blocks of the file.
154
- # was thinking of moving this to Dirent.to_tree instead.
155
- class << @dirents
156
- def to_tree idx=0
157
- return [] if idx == Dirent::EOT
158
- d = self[idx]
159
- d.children = to_tree d.child
160
- raise "directory #{d.inspect} used twice" if d.idx
161
- d.idx = idx
162
- to_tree(d.prev) + [d] + to_tree(d.next)
163
- end
164
- end
165
-
166
- @root = @dirents.to_tree.first
167
- Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
168
- unused = @dirents.reject(&:idx).length
169
- Log.warn "* #{unused} unused directories" if unused > 0
170
-
171
- # FIXME i don't currently use @header.num_sbat which i should
172
- # hmm. nor do i write it. it means what exactly again?
173
- @sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
174
- @sbat = AllocationTable::Small.new self
175
- @sbat.load @bbat.read(@header.sbat_start)
176
- end
177
-
178
- def close
179
- flush if @writeable
180
- @sb_file.close
181
- @io.close if @close_parent
182
- end
183
-
184
- # should have a #open_dirent i think. and use it in load and flush. neater.
185
- # also was thinking about Dirent#open_padding. then i can more easily clean up the padding
186
- # to be 0.chr
187
- =begin
188
- thoughts on fixes:
189
- 1. reterminate any chain not ending in EOC.
190
- 2. pass through all chain heads looking for collisions, and making sure nothing points to them
191
- (ie they are really heads).
192
- 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
193
- in the bat for them.
194
- this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
195
- directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
196
- destroy things.
197
- =end
198
- def flush
199
- # recreate dirs from our tree, split into dirs and big and small files
200
- @root.type = :root
201
- @root.name = 'Root Entry'
202
- @root.first_block = @sb_file.first_block
203
- @root.size = @sb_file.size
204
- @dirents = @root.flatten
205
-
206
- # maybe i should move the block form up to RangesIO, and get it for free at all levels.
207
- # Dirent#open gets block form for free then
208
- io = RangesIOResizeable.new @bbat, @header.dirent_start
209
- io.truncate 0
210
- @dirents.each { |dirent| io.write dirent.save }
211
- padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
212
- io.write 0.chr * padding
213
- @header.dirent_start = io.first_block
214
- io.close
215
-
216
- # similarly for the sbat data.
217
- io = RangesIOResizeable.new @bbat, @header.sbat_start
218
- io.truncate 0
219
- io.write @sbat.save
220
- @header.sbat_start = io.first_block
221
- @header.num_sbat = @bbat.chain(@header.sbat_start).length
222
- io.close
223
-
224
- # what follows will be slightly more complex for the bat fiddling.
225
-
226
- # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
227
- # truncate. then when its time to write, convert that chain and some chunk of blocks at
228
- # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
229
- # done.
230
- @bbat.table.map! do |b|
231
- b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
232
- AllocationTable::AVAIL : b
233
- end
234
- io = RangesIOResizeable.new @bbat, AllocationTable::EOC
235
-
236
- # use crappy loop for now:
237
- while true
238
- bbat_data = @bbat.save
239
- #mbat_data = bbat_data.length / @bbat.block_size * 4
240
- mbat_chain = @bbat.chain io.first_block
241
- raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
242
- # so we can ignore meta blocks in this calculation:
243
- break if io.size >= bbat_data.length # it shouldn't be bigger right?
244
- # this may grow the bbat, depending on existing available blocks
245
- io.truncate bbat_data.length
246
- end
247
-
248
- # now extract the info we want:
249
- ranges = io.ranges
250
- mbat_chain = @bbat.chain io.first_block
251
- io.close
252
- mbat_chain.each { |b| @bbat.table[b] = AllocationTable::BAT }
253
- @header.num_bat = mbat_chain.length
254
- #p @bbat.truncated_table
255
- #p ranges
256
- #p mbat_chain
257
- # not resizeable!
258
- io = RangesIO.new @io, ranges
259
- io.write @bbat.save
260
- io.close
261
- mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
262
- @header.mbat_start = AllocationTable::EOC
263
- @header.num_mbat = 0
264
-
265
- =begin
266
- # Old save code. remove shortly
267
-
268
- bbat_data = new_bbat.save
269
- # must exist as linear chain stored in header.
270
- @header.num_bat = (bbat_data.length / new_bbat.block_size.to_f).ceil
271
- base = io.pos / new_bbat.block_size - 1
272
- io.write bbat_data
273
- # now that spanned a number of blocks:
274
- mbat = (0...@header.num_bat).map { |i| i + base }
275
- mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
276
- header_mbat = mbat[0...109]
277
- other_mbat_data = mbat[109..-1].pack 'L*'
278
- @header.mbat_start = base + @header.num_bat
279
- @header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
280
- io.write other_mbat_data
281
- =end
282
-
283
- @root.type = :dir
284
-
285
- # now seek back and write the header out
286
- @io.seek 0
287
- @io.write @header.save + mbat_chain.pack('L*')
288
- @io.flush
289
- end
290
-
291
- def clear
292
- # initialize to equivalent of loading an empty ole document.
293
- Log.warn 'creating new ole storage object on non-writable io' unless @writeable
294
- @header = Header.new
295
- @bbat = AllocationTable::Big.new self
296
- @root = Dirent.new self, :dir
297
- @root.name = 'Root Entry'
298
- @dirents = [@root]
299
- @root.idx = 0
300
- @root.children = []
301
- # size shouldn't display for non-files
302
- @root.size = 0
303
- @sb_file.close if @sb_file
304
- @sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
305
- @sbat = AllocationTable::Small.new self
306
- # throw everything else the hell away
307
- @io.truncate 0
308
- end
309
-
310
- # could be useful with mis-behaving ole documents. or to just clean them up.
311
- def repack temp=:file
312
- case temp
313
- when :file; Tempfile.open 'w+', &method(:repack_using_io)
314
- when :mem; StringIO.open(&method(:repack_using_io))
315
- else raise "unknown temp backing #{temp.inspect}"
316
- end
317
- end
318
-
319
- def repack_using_io temp_io
320
- @io.rewind
321
- IO.copy @io, temp_io
322
- clear
323
- Storage.open temp_io do |temp_ole|
324
- temp_ole.root.type = :dir
325
- Dirent.copy temp_ole.root, root
326
- end
327
- end
328
-
329
- def bat_for_size size
330
- # note >=, not > previously.
331
- size >= @header.threshold ? @bbat : @sbat
332
- end
333
-
334
- def inspect
335
- "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
336
- end
337
-
338
- # A class which wraps the ole header
339
- class Header < Struct.new(
340
- :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
341
- :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
342
- :sbat_start, :num_sbat, :mbat_start, :num_mbat
343
- )
344
- PACK = 'a8 a16 S2 a2 S2 a6 L3 a4 L5'
345
- SIZE = 0x4c
346
- # i have seen it pointed out that the first 4 bytes of hex,
347
- # 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
348
- MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
349
- # what you get if creating new header from scratch.
350
- # AllocationTable::EOC isn't available yet. meh.
351
- EOC = 0xfffffffe
352
- DEFAULT = [
353
- MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
354
- 0.chr * 6, 0, 1, EOC, 0.chr * 4,
355
- 4096, EOC, 0, EOC, 0
356
- ]
357
-
358
- # 2 basic initializations, from scratch, or from a data string.
359
- # from scratch will be geared towards creating a new ole object
360
- def initialize *values
361
- super(*(values.empty? ? DEFAULT : values))
362
- validate!
363
- end
364
-
365
- def self.load str
366
- Header.new(*str.unpack(PACK))
367
- end
368
-
369
- def save
370
- to_a.pack PACK
371
- end
372
-
373
- def validate!
374
- raise "OLE2 signature is invalid" unless magic == MAGIC
375
- if num_bat == 0 or # is that valid for a completely empty file?
376
- # not sure about this one. basically to do max possible bat given size of mbat
377
- num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
378
- # shouldn't need to use the mbat as there is enough space in the header block
379
- num_bat < 109 && num_mbat != 0 or
380
- # given the size of the header is 76, if b_shift <= 6, blocks address the header.
381
- s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
382
- # we only handle little endian
383
- byte_order != "\xfe\xff"
384
- raise "not valid OLE2 structured storage file"
385
- end
386
- # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
387
- # 3 for this value.
388
- # transacting_signature != "\x00" * 4 or
389
- if threshold != 4096 or
390
- num_mbat == 0 && mbat_start != AllocationTable::EOC or
391
- reserved != "\x00" * 6
392
- Log.warn "may not be a valid OLE2 structured storage file"
393
- end
394
- true
395
- end
396
- end
397
-
398
- #
399
- # +AllocationTable+'s hold the chains corresponding to files. Given
400
- # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
401
- # the blocks that make up that file.
402
- #
403
- # There are 2 allocation tables, the bbat, and sbat, for big and small
404
- # blocks respectively. The block chain should be loaded using either
405
- # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
406
- # as appropriate.
407
- #
408
- # Whether or not big or small blocks are used for a file depends on
409
- # whether its size is over the <tt>Header#threshold</tt> level.
410
- #
411
- # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
412
- # which are stored in blocks throughout the file. The blocks are either
413
- # big or small, and are accessed using the <tt>AllocationTable</tt>.
414
- #
415
- # The bbat allocation table's data is stored in the spare room in the header
416
- # block, and in extra blocks throughout the file as referenced by the meta
417
- # bat. That chain is linear, as there is no higher level table.
418
- #
419
- class AllocationTable
420
- # a free block (I don't currently leave any blocks free), although I do pad out
421
- # the allocation table with AVAIL to the block size.
422
- AVAIL = 0xffffffff
423
- EOC = 0xfffffffe # end of a chain
424
- # these blocks correspond to the bat, and aren't part of a file, nor available.
425
- # (I don't currently output these)
426
- BAT = 0xfffffffd
427
- META_BAT = 0xfffffffc
428
-
429
- attr_reader :ole, :io, :table, :block_size
430
- def initialize ole
431
- @ole = ole
432
- @table = []
433
- end
434
-
435
- def load data
436
- @table = data.unpack('L*')
437
- end
438
-
439
- def truncated_table
440
- # this strips trailing AVAILs. come to think of it, this has the potential to break
441
- # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
442
- # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
443
- # at load time.
444
- temp = @table.reverse
445
- not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
446
- temp.reverse
447
- end
448
-
449
- def save
450
- table = truncated_table #@table
451
- # pad it out some
452
- num = @ole.bbat.block_size / 4
453
- # do you really use AVAIL? they probably extend past end of file, and may shortly
454
- # be used for the bat. not really good.
455
- table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
456
- table.pack 'L*'
457
- end
458
-
459
- # rewriting this to be non-recursive. it broke on a large attachment
460
- # building up the chain, causing a stack error. need tail-call elimination...
461
- def chain start
462
- a = []
463
- idx = start
464
- until idx >= META_BAT
465
- raise "broken allocationtable chain" if idx < 0 || idx > @table.length
466
- a << idx
467
- idx = @table[idx]
468
- end
469
- Log.warn "invalid chain terminator #{idx}" unless idx == EOC
470
- a
471
- end
472
-
473
- def ranges chain, size=nil
474
- chain = self.chain(chain) unless Array === chain
475
- blocks_to_ranges chain, size
476
- end
477
-
478
- # Turn a chain (an array given by +chain+) of big blocks, optionally
479
- # truncated to +size+, into an array of arrays describing the stretches of
480
- # bytes in the file that it belongs to.
481
- #
482
- # Big blocks are of size Ole::Storage::Header#b_size, and are stored
483
- # directly in the parent file.
484
- # truncate the chain if required
485
- # convert chain to ranges of the block size
486
- # truncate final range if required
487
-
488
- def blocks_to_ranges chain, size=nil
489
- chain = chain[0...(size.to_f / block_size).ceil] if size
490
- ranges = chain.map { |i| [block_size * i, block_size] }
491
- ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
492
- ranges
493
- end
494
-
495
- # quick shortcut. chain can be either a head (in which case the table is used to
496
- # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
497
- # its not resizeable or migrateable. it probably could be resizeable though, using
498
- # self as the bat. but what would the first_block be?
499
- def open chain, size=nil
500
- io = RangesIO.new @io, ranges(chain, size)
501
- if block_given?
502
- begin yield io
503
- ensure; io.close
504
- end
505
- else io
506
- end
507
- end
508
-
509
- def read chain, size=nil
510
- open chain, size, &:read
511
- end
512
-
513
- # ----------------------
514
-
515
- def get_free_block
516
- @table.each_index { |i| return i if @table[i] == AVAIL }
517
- @table.push AVAIL
518
- @table.length - 1
519
- end
520
-
521
- # must return first_block
522
- def resize_chain first_block, size
523
- new_num_blocks = (size / block_size.to_f).ceil
524
- blocks = chain first_block
525
- old_num_blocks = blocks.length
526
- if new_num_blocks < old_num_blocks
527
- # de-allocate some of our old blocks. TODO maybe zero them out in the file???
528
- (new_num_blocks...old_num_blocks).each { |i| @table[blocks[i]] = AVAIL }
529
- # if we have a chain, terminate it and return head, otherwise return EOC
530
- if new_num_blocks > 0
531
- @table[blocks[new_num_blocks-1]] = EOC
532
- first_block
533
- else EOC
534
- end
535
- elsif new_num_blocks > old_num_blocks
536
- # need some more blocks.
537
- last_block = blocks.last
538
- (new_num_blocks - old_num_blocks).times do
539
- block = get_free_block
540
- # connect the chain. handle corner case of blocks being [] initially
541
- if last_block
542
- @table[last_block] = block
543
- else
544
- first_block = block
545
- end
546
- last_block = block
547
- # this is just to inhibit the problem where it gets picked as being a free block
548
- # again next time around.
549
- @table[last_block] = EOC
550
- end
551
- first_block
552
- else first_block
553
- end
554
- end
555
-
556
- class Big < AllocationTable
557
- def initialize(*args)
558
- super
559
- @block_size = 1 << @ole.header.b_shift
560
- @io = @ole.io
561
- end
562
-
563
- # Big blocks are kind of -1 based, in order to not clash with the header.
564
- def blocks_to_ranges blocks, size
565
- super blocks.map { |b| b + 1 }, size
566
- end
567
- end
568
-
569
- class Small < AllocationTable
570
- def initialize(*args)
571
- super
572
- @block_size = 1 << @ole.header.s_shift
573
- @io = @ole.sb_file
574
- end
575
- end
576
- end
577
-
578
- # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
579
- # AllocationTable, and can be resized. used for read/write to 2 streams:
580
- # 1. serialized dirent data
581
- # 2. sbat table data
582
- # 3. all dirents but through RangesIOMigrateable below
583
- #
584
- # Note that all internal access to first_block is through accessors, as it is sometimes
585
- # useful to redirect it.
586
- class RangesIOResizeable < RangesIO
587
- attr_reader :bat
588
- attr_accessor :first_block
589
- def initialize bat, first_block, size=nil
590
- @bat = bat
591
- self.first_block = first_block
592
- super @bat.io, @bat.ranges(first_block, size)
593
- end
594
-
595
- def truncate size
596
- # note that old_blocks is != @ranges.length necessarily. i'm planning to write a
597
- # merge_ranges function that merges sequential ranges into one as an optimization.
598
- self.first_block = @bat.resize_chain first_block, size
599
- @ranges = @bat.ranges first_block, size
600
- @pos = @size if @pos > size
601
-
602
- # don't know if this is required, but we explicitly request our @io to grow if necessary
603
- # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
604
- # can be made.
605
- # maybe its ok to just seek out there later??
606
- max = @ranges.map { |pos, len| pos + len }.max || 0
607
- @io.truncate max if max > @io.size
608
-
609
- @size = size
610
- end
611
- end
612
-
613
- # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
614
- # between bats based on size, and updating the dirent, instead of the ole copy back
615
- # on close.
616
- class RangesIOMigrateable < RangesIOResizeable
617
- attr_reader :dirent
618
- def initialize dirent
619
- @dirent = dirent
620
- super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
621
- end
622
-
623
- def truncate size
624
- bat = @dirent.ole.bat_for_size size
625
- if bat != @bat
626
- # bat migration needed! we need to backup some data. the amount of data
627
- # should be <= @ole.header.threshold, so we can just hold it all in one buffer.
628
- # backup this
629
- pos = @pos
630
- @pos = 0
631
- keep = read [@size, size].min
632
- # this does a normal truncate to 0, removing our presence from the old bat, and
633
- # rewrite the dirent's first_block
634
- super 0
635
- @bat = bat
636
- # just change the underlying io from right under everyone :)
637
- @io = bat.io
638
- # important to do this now, before the write. as the below write will always
639
- # migrate us back to sbat! this will now allocate us +size+ in the new bat.
640
- super
641
- @pos = 0
642
- write keep
643
- @pos = pos
644
- else
645
- super
646
- end
647
- # now just update the file
648
- @dirent.size = size
649
- end
650
-
651
- # forward this to the dirent
652
- def first_block
653
- @dirent.first_block
654
- end
655
-
656
- def first_block= val
657
- @dirent.first_block = val
658
- end
659
- end
660
-
661
- #
662
- # A class which wraps an ole directory entry. Can be either a directory
663
- # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
664
- #
665
- # Most interaction with <tt>Ole::Storage</tt> is through this class.
666
- # The 2 most important functions are <tt>Dirent#children</tt>, and
667
- # <tt>Dirent#data</tt>.
668
- #
669
- # was considering separate classes for dirs and files. some methods/attrs only
670
- # applicable to one or the other.
671
- #
672
- # Note that Dirent is still using a home grown Struct variant, with explicit
673
- # MEMBERS etc. any reason for that still?
674
- #
675
- class Dirent
676
- MEMBERS = [
677
- :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
678
- :clsid, :flags, # dirs only
679
- :create_time_str, :modify_time_str, # files only
680
- :first_block, :size, :reserved
681
- ]
682
- PACK = 'a64 S C C L3 a16 L a8 a8 L2 a4'
683
- SIZE = 128
684
- TYPE_MAP = {
685
- # this is temporary
686
- 0 => :empty,
687
- 1 => :dir,
688
- 2 => :file,
689
- 5 => :root
690
- }
691
- COLOUR_MAP = {
692
- 0 => :red,
693
- 1 => :black
694
- }
695
- # used in the next / prev / child stuff to show that the tree ends here.
696
- # also used for first_block for directory.
697
- EOT = 0xffffffff
698
-
699
- include Enumerable
700
-
701
- # Dirent's should be created in 1 of 2 ways, either Dirent.new ole, [:dir/:file/:root],
702
- # or Dirent.load '... dirent data ...'
703
- # its a bit clunky, but thats how it is at the moment. you can assign to type, but
704
- # shouldn't.
705
-
706
- attr_accessor :idx
707
- # This returns all the children of this +Dirent+. It is filled in
708
- # when the tree structure is recreated.
709
- attr_accessor :children
710
- attr_reader :ole, :type, :create_time, :modify_time, :name
711
- def initialize ole, type
712
- @ole = ole
713
- # this isn't really good enough. need default values put in there.
714
- @values = [
715
- 0.chr * 2, 2, 0, # will get overwritten
716
- 1, EOT, EOT, EOT,
717
- 0.chr * 16, 0, nil, nil,
718
- AllocationTable::EOC, 0, 0.chr * 4]
719
- # maybe check types here.
720
- @type = type
721
- @create_time = @modify_time = nil
722
- @children = []
723
- if file?
724
- @create_time = Time.now
725
- @modify_time = Time.now
726
- end
727
- end
728
-
729
- def self.load ole, str
730
- # load should function without the need for the initializer.
731
- dirent = Dirent.allocate
732
- dirent.load ole, str
733
- dirent
734
- end
735
-
736
- def load ole, str
737
- @ole = ole
738
- @values = str.unpack PACK
739
- @name = Types::FROM_UTF16.iconv name_utf16[0...name_len].sub(/\x00\x00$/, '')
740
- @type = TYPE_MAP[type_id] or raise "unknown type #{type_id.inspect}"
741
- if file?
742
- @create_time = Types.load_time create_time_str
743
- @modify_time = Types.load_time modify_time_str
744
- end
745
- end
746
-
747
- # only defined for files really. and the above children stuff is only for children.
748
- # maybe i should have some sort of File and Dir class, that subclass Dirents? a dirent
749
- # is just a data holder.
750
- # this can be used for write support if the underlying io object was opened for writing.
751
- # maybe take a mode string argument, and do truncation, append etc stuff.
752
- def open
753
- return nil unless file?
754
- io = RangesIOMigrateable.new self
755
- if block_given?
756
- begin yield io
757
- ensure; io.close
758
- end
759
- else io
760
- end
761
- end
762
-
763
- def read limit=nil
764
- open { |io| io.read limit }
765
- end
766
-
767
- def dir?
768
- # to count root as a dir.
769
- type != :file
770
- end
771
-
772
- def file?
773
- type == :file
774
- end
775
-
776
- def time
777
- # time is nil for streams, otherwise try to parse either of the time pairse (not
778
- # sure of their meaning - created / modified?)
779
- #@time ||= file? ? nil : (Dirent.parse_time(secs1, days1) || Dirent.parse_time(secs2, days2))
780
- create_time || modify_time
781
- end
782
-
783
- def each(&block)
784
- @children.each(&block)
785
- end
786
-
787
- def [] idx
788
- return children[idx] if Integer === idx
789
- # path style look up.
790
- # maybe take another arg to allow creation? or leave that to the filesystem
791
- # add on.
792
- # not sure if '/' is a valid char in an Dirent#name, so no splitting etc at
793
- # this level.
794
- # also what about warning about multiple hits for the same name?
795
- children.find { |child| idx === child.name }
796
- end
797
-
798
- # solution for the above '/' thing for now.
799
- def / path
800
- self[path]
801
- end
802
-
803
- def to_tree
804
- if children and !children.empty?
805
- str = "- #{inspect}\n"
806
- children.each_with_index do |child, i|
807
- last = i == children.length - 1
808
- child.to_tree.split(/\n/).each_with_index do |line, j|
809
- str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}#{line}\n"
810
- end
811
- end
812
- str
813
- else "- #{inspect}\n"
814
- end
815
- end
816
-
817
- MEMBERS.each_with_index do |sym, i|
818
- define_method(sym) { @values[i] }
819
- define_method(sym.to_s + '=') { |val| @values[i] = val }
820
- end
821
-
822
- def to_a
823
- @values
824
- end
825
-
826
- # flattens the tree starting from here into +dirents+. note it modifies its argument.
827
- def flatten dirents=[]
828
- @idx = dirents.length
829
- dirents << self
830
- children.each { |child| child.flatten dirents }
831
- self.child = Dirent.flatten_helper children
832
- dirents
833
- end
834
-
835
- # i think making the tree structure optimized is actually more complex than this, and
836
- # requires some intelligent ordering of the children based on names, but as long as
837
- # it is valid its ok.
838
- # actually, i think its ok. gsf for example only outputs a singly-linked-list, where
839
- # prev is always EOT.
840
- def self.flatten_helper children
841
- return EOT if children.empty?
842
- i = children.length / 2
843
- this = children[i]
844
- this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
845
- this.idx
846
- end
847
-
848
- attr_accessor :name, :type
849
- def save
850
- tmp = Types::TO_UTF16.iconv(name)
851
- tmp = tmp[0, 62] if tmp.length > 62
852
- tmp += 0.chr * 2
853
- self.name_len = tmp.length
854
- self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
855
- begin
856
- self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
857
- rescue
858
- raise "unknown type #{type.inspect}"
859
- end
860
- # for the case of files, it is assumed that that was handled already
861
- # note not dir?, so as not to override root's first_block
862
- self.first_block = Dirent::EOT if type == :dir
863
- if 0 #file?
864
- #self.create_time_str = ?? #Types.load_time create_time_str
865
- #self.modify_time_str = ?? #Types.load_time modify_time_str
866
- else
867
- self.create_time_str = 0.chr * 8
868
- self.modify_time_str = 0.chr * 8
869
- end
870
- @values.pack PACK
871
- end
872
-
873
- def inspect
874
- str = "#<Dirent:#{name.inspect}"
875
- # perhaps i should remove the data snippet. its not that useful anymore.
876
- if file?
877
- tmp = read 9
878
- data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
879
- str << " size=#{size}" +
880
- "#{time ? ' time=' + time.to_s.inspect : nil}" +
881
- " data=#{data.inspect}"
882
- else
883
- # there is some dir specific stuff. like clsid, flags.
884
- end
885
- str + '>'
886
- end
887
-
888
- # --------
889
- # and for creation of a dirent. don't like the name. is it a file or a directory?
890
- # assign to type later? io will be empty.
891
- def new_child type
892
- child = Dirent.new ole, type
893
- children << child
894
- yield child if block_given?
895
- child
896
- end
897
-
898
- def delete child
899
- # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
900
- raise "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
901
- # free our blocks
902
- child.open { |io| io.truncate 0 }
903
- end
904
-
905
- def self.copy src, dst
906
- # copies the contents of src to dst. must be the same type. this will throw an
907
- # error on copying to root. maybe this will recurse too much for big documents??
908
- raise 'differing types' if src.type == :file and dst.type != :file
909
- dst.name = src.name
910
- if src.dir?
911
- src.children.each do |src_child|
912
- dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
913
- end
914
- else
915
- src.open do |src_io|
916
- dst.open { |dst_io| IO.copy src_io, dst_io }
917
- end
918
- end
919
- end
920
- end
921
- end
922
- end
923
-
924
- if $0 == __FILE__
925
- puts Ole::Storage.open(ARGV[0]) { |ole| ole.root.to_tree }
926
- end
927
-