ruby-msg 1.2.17.3 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,184 +0,0 @@
1
-
2
- # move to support?
3
- class IO # :nodoc:
4
- def self.copy src, dst
5
- until src.eof?
6
- buf = src.read(4096)
7
- dst.write buf
8
- end
9
- end
10
- end
11
-
12
- #
13
- # = Introduction
14
- #
15
- # +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
16
- # slices of the input file by providing a list of ranges. Intended as an initial measure to curb
17
- # inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
18
- # no method to stream it.
19
- #
20
- # This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
21
- # and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
22
- # getting 16 bytes doesn't read the whole thing).
23
- #
24
- # In the simplest case it can be used with a single range to provide a limited io to a section of
25
- # a file.
26
- #
27
- # = Limitations
28
- #
29
- # * No buffering. by design at the moment. Intended for large reads
30
- #
31
- # = TODO
32
- #
33
- # On further reflection, this class is something of a joining/optimization of
34
- # two separate IO classes. a SubfileIO, for providing access to a range within
35
- # a File as a separate IO object, and a ConcatIO, allowing the presentation of
36
- # a bunch of io objects as a single unified whole.
37
- #
38
- # I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
39
- # convert a whole mime message into an IO stream, that can be read from.
40
- # It will just be the concatenation of a series of IO objects, corresponding to
41
- # headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
42
- # original message proper, or RangesIO as provided by the Attachment#data, that
43
- # will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
44
- # fly. Thus the attachment, in its plain or encoded form, and the message as a
45
- # whole never exists as a single string in memory, as it does now. This is a
46
- # fair bit of work to achieve, but generally useful I believe.
47
- #
48
- # This class isn't ole specific, maybe move it to my general ruby stream project.
49
- #
50
- class RangesIO
51
- attr_reader :io, :ranges, :size, :pos
52
- # +io+ is the parent io object that we are wrapping.
53
- #
54
- # +ranges+ are byte offsets, either
55
- # 1. an array of ranges [1..2, 4..5, 6..8] or
56
- # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
57
- # (think the way String indexing works)
58
- # The +ranges+ provide sequential slices of the file that will be read. they can overlap.
59
- def initialize io, ranges, opts={}
60
- @opts = {:close_parent => false}.merge opts
61
- @io = io
62
- # convert ranges to arrays. check for negative ranges?
63
- @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
64
- # calculate size
65
- @size = @ranges.inject(0) { |total, (pos, len)| total + len }
66
- # initial position in the file
67
- @pos = 0
68
- end
69
-
70
- def pos= pos, whence=IO::SEEK_SET
71
- # FIXME support other whence values
72
- raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
73
- # just a simple pos calculation. invalidate buffers if we had them
74
- @pos = pos
75
- end
76
-
77
- alias seek :pos=
78
- alias tell :pos
79
-
80
- def close
81
- @io.close if @opts[:close_parent]
82
- end
83
-
84
- def range_and_offset pos
85
- off = nil
86
- r = ranges.inject(0) do |total, r|
87
- to = total + r[1]
88
- if pos <= to
89
- off = pos - total
90
- break r
91
- end
92
- to
93
- end
94
- # should be impossible for any valid pos, (0...size) === pos
95
- raise "unable to find range for pos #{pos.inspect}" unless off
96
- [r, off]
97
- end
98
-
99
- def eof?
100
- @pos == @size
101
- end
102
-
103
- # read bytes from file, to a maximum of +limit+, or all available if unspecified.
104
- def read limit=nil
105
- data = ''
106
- limit ||= size
107
- # special case eof
108
- return data if eof?
109
- r, off = range_and_offset @pos
110
- i = ranges.index r
111
- # this may be conceptually nice (create sub-range starting where we are), but
112
- # for a large range array its pretty wasteful. even the previous way was. but
113
- # i'm not trying to optimize this atm. it may even go to c later if necessary.
114
- ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
115
- @io.seek pos
116
- if limit < len
117
- # FIXME this += isn't correct if there is a read error
118
- # or something.
119
- @pos += limit
120
- break data << @io.read(limit)
121
- end
122
- # this can also stuff up. if the ranges are beyond the size of the file, we can get
123
- # nil here.
124
- data << @io.read(len)
125
- @pos += len
126
- limit -= len
127
- end
128
- data
129
- end
130
-
131
- # you may override this call to update @ranges and @size, if applicable. then write
132
- # support can grow below
133
- def truncate size
134
- raise NotImplementedError, 'truncate not supported'
135
- end
136
- # why not? :)
137
- alias size= :truncate
138
-
139
- def write data
140
- # short cut. needed because truncate 0 may return no ranges, instead of empty range,
141
- # thus range_and_offset fails.
142
- return 0 if data.empty?
143
- data_pos = 0
144
- # if we don't have room, we can use the truncate hook to make more space.
145
- if data.length > @size - @pos
146
- begin
147
- truncate @pos + data.length
148
- rescue NotImplementedError
149
- # FIXME maybe warn instead, then just truncate the data?
150
- raise "unable to satisfy write of #{data.length} bytes"
151
- end
152
- end
153
- r, off = range_and_offset @pos
154
- i = ranges.index r
155
- ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
156
- @io.seek pos
157
- if data_pos + len > data.length
158
- chunk = data[data_pos..-1]
159
- @io.write chunk
160
- @pos += chunk.length
161
- data_pos = data.length
162
- break
163
- end
164
- @io.write data[data_pos, len]
165
- @pos += len
166
- data_pos += len
167
- end
168
- data_pos
169
- end
170
-
171
- # this will be generalised to a module later
172
- def each_read blocksize=4096
173
- yield read(blocksize) until eof?
174
- end
175
-
176
- def inspect
177
- # the rescue is for empty files
178
- pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
179
- range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
180
- "#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
181
- "current_range=#{range_str}>"
182
- end
183
- end
184
-
@@ -1,927 +0,0 @@
1
- #! /usr/bin/ruby -w
2
-
3
- $: << File.dirname(__FILE__) + '/..'
4
-
5
- require 'support'
6
-
7
- require 'stringio'
8
- require 'tempfile'
9
-
10
- require 'ole/base'
11
- require 'ole/types'
12
- # not strictly ole related
13
- require 'ole/io_helpers'
14
-
15
- module Ole # :nodoc:
16
- #
17
- # = Introduction
18
- #
19
- # <tt>Ole::Storage</tt> is a class intended to abstract away details of the
20
- # access to OLE2 structured storage files, such as those produced by
21
- # Microsoft Office, eg *.doc, *.msg etc.
22
- #
23
- # Initially based on chicago's libole, source available at
24
- # http://prdownloads.sf.net/chicago/ole.tgz
25
- # Later augmented with some corrections by inspecting pole, and (purely
26
- # for header definitions) gsf.
27
- #
28
- # = Usage
29
- #
30
- # Usage should be fairly straight forward:
31
- #
32
- # # get the parent ole storage object
33
- # ole = Ole::Storage.open 'myfile.msg', 'r+'
34
- # # => #<Ole::Storage io=#<File:myfile.msg> root=#<Dirent:"Root Entry">>
35
- # # read some data
36
- # ole.root[1].read 4
37
- # # => "\001\000\376\377"
38
- # # get the top level root object and output a tree structure for
39
- # # debugging
40
- # puts ole.root.to_tree
41
- # # =>
42
- # - #<Dirent:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
43
- # |- #<Dirent:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
44
- # | |- #<Dirent:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
45
- # ...
46
- # |- #<Dirent:"__substg1.0_8002001E" size=4 data="MTEuMA==">
47
- # |- #<Dirent:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
48
- # \- #<Dirent:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
49
- # |- #<Dirent:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
50
- # ...
51
- # # write some data, and finish up (note that open is 'r+', so this overwrites
52
- # # but doesn't truncate)
53
- # ole.root["\001CompObj"].open { |f| f.write "blah blah" }
54
- # ole.close
55
- #
56
- # = TODO
57
- #
58
- # 1. tests. lock down how things work at the moment - mostly good.
59
- # create from scratch works now, as does copying in a subtree of another doc, so
60
- # ole embedded attachment serialization works now. i can save embedded xls in an msg
61
- # into a separate file, and open it. this was a goal. now i would want to implemenet
62
- # to_mime conversion for embedded attachments, that serializes them to ole, but handles
63
- # some separately like various meta file types as plain .wmf attachments perhaps. this
64
- # will give pretty good .eml's from emails with embedded attachments.
65
- # the other todo is .rtf output, with full support for embedded ole objects...
66
- # 2. lots of tidying up
67
- # - main FIXME's in this regard are:
68
- # * the custom header cruft for Header and Dirent needs some love.
69
- # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
70
- # and, in a manner of speaking, but arguably different, Storage itself.
71
- # they have differing api's which would be nice to clean.
72
- # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
73
- # * ole types need work, can't serialize datetime at the moment.
74
- # 3. need to fix META_BAT support in #flush.
75
- #
76
- class Storage
77
- VERSION = '1.1.3'
78
-
79
- # The top of the ole tree structure
80
- attr_reader :root
81
- # The tree structure in its original flattened form. only valid after #load, or #flush.
82
- attr_reader :dirents
83
- # The underlying io object to/from which the ole object is serialized, whether we
84
- # should close it, and whether it is writeable
85
- attr_reader :io, :close_parent, :writeable
86
- # Low level internals, you probably shouldn't need to mess with these
87
- attr_reader :header, :bbat, :sbat, :sb_file
88
-
89
- # maybe include an option hash, and allow :close_parent => true, to be more general.
90
- # +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
91
- def initialize arg, mode=nil
92
- # get the io object
93
- @close_parent, @io = if String === arg
94
- [true, open(arg, mode || 'rb')]
95
- else
96
- raise 'unable to specify mode string with io object' if mode
97
- [false, arg]
98
- end
99
- # do we have this file opened for writing? don't know of a better way to tell
100
- @writeable = begin
101
- @io.flush
102
- true
103
- rescue IOError
104
- false
105
- end
106
- # silence undefined warning in clear
107
- @sb_file = nil
108
- # if the io object has data, we should load it, otherwise start afresh
109
- @io.size > 0 ? load : clear
110
- end
111
-
112
- def self.new arg, mode=nil
113
- ole = super
114
- if block_given?
115
- begin yield ole
116
- ensure; ole.close
117
- end
118
- else ole
119
- end
120
- end
121
-
122
- class << self
123
- # encouraged
124
- alias open :new
125
- # deprecated
126
- alias load :new
127
- end
128
-
129
- # load document from file.
130
- def load
131
- # we always read 512 for the header block. if the block size ends up being different,
132
- # what happens to the 109 fat entries. are there more/less entries?
133
- @io.rewind
134
- header_block = @io.read 512
135
- @header = Header.load header_block
136
-
137
- # create an empty bbat
138
- @bbat = AllocationTable::Big.new self
139
- # extra mbat blocks
140
- mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start }
141
- bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'L*'
142
- # am i using num_bat in the right way?
143
- @bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
144
-
145
- # get block chain for directories, read it, then split it into chunks and load the
146
- # directory entries. semantics changed - used to cut at first dir where dir.type == 0
147
- @dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo).
148
- map { |str| Dirent.load self, str }.reject { |d| d.type_id == 0 }
149
-
150
- # now reorder from flat into a tree
151
- # links are stored in some kind of balanced binary tree
152
- # check that everything is visited at least, and at most once
153
- # similarly with the blocks of the file.
154
- # was thinking of moving this to Dirent.to_tree instead.
155
- class << @dirents
156
- def to_tree idx=0
157
- return [] if idx == Dirent::EOT
158
- d = self[idx]
159
- d.children = to_tree d.child
160
- raise "directory #{d.inspect} used twice" if d.idx
161
- d.idx = idx
162
- to_tree(d.prev) + [d] + to_tree(d.next)
163
- end
164
- end
165
-
166
- @root = @dirents.to_tree.first
167
- Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
168
- unused = @dirents.reject(&:idx).length
169
- Log.warn "* #{unused} unused directories" if unused > 0
170
-
171
- # FIXME i don't currently use @header.num_sbat which i should
172
- # hmm. nor do i write it. it means what exactly again?
173
- @sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
174
- @sbat = AllocationTable::Small.new self
175
- @sbat.load @bbat.read(@header.sbat_start)
176
- end
177
-
178
- def close
179
- flush if @writeable
180
- @sb_file.close
181
- @io.close if @close_parent
182
- end
183
-
184
- # should have a #open_dirent i think. and use it in load and flush. neater.
185
- # also was thinking about Dirent#open_padding. then i can more easily clean up the padding
186
- # to be 0.chr
187
- =begin
188
- thoughts on fixes:
189
- 1. reterminate any chain not ending in EOC.
190
- 2. pass through all chain heads looking for collisions, and making sure nothing points to them
191
- (ie they are really heads).
192
- 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
193
- in the bat for them.
194
- this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
195
- directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
196
- destroy things.
197
- =end
198
- def flush
199
- # recreate dirs from our tree, split into dirs and big and small files
200
- @root.type = :root
201
- @root.name = 'Root Entry'
202
- @root.first_block = @sb_file.first_block
203
- @root.size = @sb_file.size
204
- @dirents = @root.flatten
205
-
206
- # maybe i should move the block form up to RangesIO, and get it for free at all levels.
207
- # Dirent#open gets block form for free then
208
- io = RangesIOResizeable.new @bbat, @header.dirent_start
209
- io.truncate 0
210
- @dirents.each { |dirent| io.write dirent.save }
211
- padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
212
- io.write 0.chr * padding
213
- @header.dirent_start = io.first_block
214
- io.close
215
-
216
- # similarly for the sbat data.
217
- io = RangesIOResizeable.new @bbat, @header.sbat_start
218
- io.truncate 0
219
- io.write @sbat.save
220
- @header.sbat_start = io.first_block
221
- @header.num_sbat = @bbat.chain(@header.sbat_start).length
222
- io.close
223
-
224
- # what follows will be slightly more complex for the bat fiddling.
225
-
226
- # create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
227
- # truncate. then when its time to write, convert that chain and some chunk of blocks at
228
- # the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
229
- # done.
230
- @bbat.table.map! do |b|
231
- b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
232
- AllocationTable::AVAIL : b
233
- end
234
- io = RangesIOResizeable.new @bbat, AllocationTable::EOC
235
-
236
- # use crappy loop for now:
237
- while true
238
- bbat_data = @bbat.save
239
- #mbat_data = bbat_data.length / @bbat.block_size * 4
240
- mbat_chain = @bbat.chain io.first_block
241
- raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
242
- # so we can ignore meta blocks in this calculation:
243
- break if io.size >= bbat_data.length # it shouldn't be bigger right?
244
- # this may grow the bbat, depending on existing available blocks
245
- io.truncate bbat_data.length
246
- end
247
-
248
- # now extract the info we want:
249
- ranges = io.ranges
250
- mbat_chain = @bbat.chain io.first_block
251
- io.close
252
- mbat_chain.each { |b| @bbat.table[b] = AllocationTable::BAT }
253
- @header.num_bat = mbat_chain.length
254
- #p @bbat.truncated_table
255
- #p ranges
256
- #p mbat_chain
257
- # not resizeable!
258
- io = RangesIO.new @io, ranges
259
- io.write @bbat.save
260
- io.close
261
- mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
262
- @header.mbat_start = AllocationTable::EOC
263
- @header.num_mbat = 0
264
-
265
- =begin
266
- # Old save code. remove shortly
267
-
268
- bbat_data = new_bbat.save
269
- # must exist as linear chain stored in header.
270
- @header.num_bat = (bbat_data.length / new_bbat.block_size.to_f).ceil
271
- base = io.pos / new_bbat.block_size - 1
272
- io.write bbat_data
273
- # now that spanned a number of blocks:
274
- mbat = (0...@header.num_bat).map { |i| i + base }
275
- mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
276
- header_mbat = mbat[0...109]
277
- other_mbat_data = mbat[109..-1].pack 'L*'
278
- @header.mbat_start = base + @header.num_bat
279
- @header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
280
- io.write other_mbat_data
281
- =end
282
-
283
- @root.type = :dir
284
-
285
- # now seek back and write the header out
286
- @io.seek 0
287
- @io.write @header.save + mbat_chain.pack('L*')
288
- @io.flush
289
- end
290
-
291
- def clear
292
- # initialize to equivalent of loading an empty ole document.
293
- Log.warn 'creating new ole storage object on non-writable io' unless @writeable
294
- @header = Header.new
295
- @bbat = AllocationTable::Big.new self
296
- @root = Dirent.new self, :dir
297
- @root.name = 'Root Entry'
298
- @dirents = [@root]
299
- @root.idx = 0
300
- @root.children = []
301
- # size shouldn't display for non-files
302
- @root.size = 0
303
- @sb_file.close if @sb_file
304
- @sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
305
- @sbat = AllocationTable::Small.new self
306
- # throw everything else the hell away
307
- @io.truncate 0
308
- end
309
-
310
- # could be useful with mis-behaving ole documents. or to just clean them up.
311
- def repack temp=:file
312
- case temp
313
- when :file; Tempfile.open 'w+', &method(:repack_using_io)
314
- when :mem; StringIO.open(&method(:repack_using_io))
315
- else raise "unknown temp backing #{temp.inspect}"
316
- end
317
- end
318
-
319
- def repack_using_io temp_io
320
- @io.rewind
321
- IO.copy @io, temp_io
322
- clear
323
- Storage.open temp_io do |temp_ole|
324
- temp_ole.root.type = :dir
325
- Dirent.copy temp_ole.root, root
326
- end
327
- end
328
-
329
- def bat_for_size size
330
- # note >=, not > previously.
331
- size >= @header.threshold ? @bbat : @sbat
332
- end
333
-
334
- def inspect
335
- "#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
336
- end
337
-
338
- # A class which wraps the ole header
339
- class Header < Struct.new(
340
- :magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
341
- :reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
342
- :sbat_start, :num_sbat, :mbat_start, :num_mbat
343
- )
344
- PACK = 'a8 a16 S2 a2 S2 a6 L3 a4 L5'
345
- SIZE = 0x4c
346
- # i have seen it pointed out that the first 4 bytes of hex,
347
- # 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
348
- MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
349
- # what you get if creating new header from scratch.
350
- # AllocationTable::EOC isn't available yet. meh.
351
- EOC = 0xfffffffe
352
- DEFAULT = [
353
- MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
354
- 0.chr * 6, 0, 1, EOC, 0.chr * 4,
355
- 4096, EOC, 0, EOC, 0
356
- ]
357
-
358
- # 2 basic initializations, from scratch, or from a data string.
359
- # from scratch will be geared towards creating a new ole object
360
- def initialize *values
361
- super(*(values.empty? ? DEFAULT : values))
362
- validate!
363
- end
364
-
365
- def self.load str
366
- Header.new(*str.unpack(PACK))
367
- end
368
-
369
- def save
370
- to_a.pack PACK
371
- end
372
-
373
- def validate!
374
- raise "OLE2 signature is invalid" unless magic == MAGIC
375
- if num_bat == 0 or # is that valid for a completely empty file?
376
- # not sure about this one. basically to do max possible bat given size of mbat
377
- num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
378
- # shouldn't need to use the mbat as there is enough space in the header block
379
- num_bat < 109 && num_mbat != 0 or
380
- # given the size of the header is 76, if b_shift <= 6, blocks address the header.
381
- s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
382
- # we only handle little endian
383
- byte_order != "\xfe\xff"
384
- raise "not valid OLE2 structured storage file"
385
- end
386
- # relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
387
- # 3 for this value.
388
- # transacting_signature != "\x00" * 4 or
389
- if threshold != 4096 or
390
- num_mbat == 0 && mbat_start != AllocationTable::EOC or
391
- reserved != "\x00" * 6
392
- Log.warn "may not be a valid OLE2 structured storage file"
393
- end
394
- true
395
- end
396
- end
397
-
398
- #
399
- # +AllocationTable+'s hold the chains corresponding to files. Given
400
- # an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
401
- # the blocks that make up that file.
402
- #
403
- # There are 2 allocation tables, the bbat, and sbat, for big and small
404
- # blocks respectively. The block chain should be loaded using either
405
- # <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
406
- # as appropriate.
407
- #
408
- # Whether or not big or small blocks are used for a file depends on
409
- # whether its size is over the <tt>Header#threshold</tt> level.
410
- #
411
- # An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
412
- # which are stored in blocks throughout the file. The blocks are either
413
- # big or small, and are accessed using the <tt>AllocationTable</tt>.
414
- #
415
- # The bbat allocation table's data is stored in the spare room in the header
416
- # block, and in extra blocks throughout the file as referenced by the meta
417
- # bat. That chain is linear, as there is no higher level table.
418
- #
419
- class AllocationTable
420
- # a free block (I don't currently leave any blocks free), although I do pad out
421
- # the allocation table with AVAIL to the block size.
422
- AVAIL = 0xffffffff
423
- EOC = 0xfffffffe # end of a chain
424
- # these blocks correspond to the bat, and aren't part of a file, nor available.
425
- # (I don't currently output these)
426
- BAT = 0xfffffffd
427
- META_BAT = 0xfffffffc
428
-
429
- attr_reader :ole, :io, :table, :block_size
430
- def initialize ole
431
- @ole = ole
432
- @table = []
433
- end
434
-
435
- def load data
436
- @table = data.unpack('L*')
437
- end
438
-
439
- def truncated_table
440
- # this strips trailing AVAILs. come to think of it, this has the potential to break
441
- # bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
442
- # very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
443
- # at load time.
444
- temp = @table.reverse
445
- not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
446
- temp.reverse
447
- end
448
-
449
- def save
450
- table = truncated_table #@table
451
- # pad it out some
452
- num = @ole.bbat.block_size / 4
453
- # do you really use AVAIL? they probably extend past end of file, and may shortly
454
- # be used for the bat. not really good.
455
- table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
456
- table.pack 'L*'
457
- end
458
-
459
- # rewriting this to be non-recursive. it broke on a large attachment
460
- # building up the chain, causing a stack error. need tail-call elimination...
461
- def chain start
462
- a = []
463
- idx = start
464
- until idx >= META_BAT
465
- raise "broken allocationtable chain" if idx < 0 || idx > @table.length
466
- a << idx
467
- idx = @table[idx]
468
- end
469
- Log.warn "invalid chain terminator #{idx}" unless idx == EOC
470
- a
471
- end
472
-
473
- def ranges chain, size=nil
474
- chain = self.chain(chain) unless Array === chain
475
- blocks_to_ranges chain, size
476
- end
477
-
478
- # Turn a chain (an array given by +chain+) of big blocks, optionally
479
- # truncated to +size+, into an array of arrays describing the stretches of
480
- # bytes in the file that it belongs to.
481
- #
482
- # Big blocks are of size Ole::Storage::Header#b_size, and are stored
483
- # directly in the parent file.
484
- # truncate the chain if required
485
- # convert chain to ranges of the block size
486
- # truncate final range if required
487
-
488
- def blocks_to_ranges chain, size=nil
489
- chain = chain[0...(size.to_f / block_size).ceil] if size
490
- ranges = chain.map { |i| [block_size * i, block_size] }
491
- ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
492
- ranges
493
- end
494
-
495
- # quick shortcut. chain can be either a head (in which case the table is used to
496
- # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
497
- # its not resizeable or migrateable. it probably could be resizeable though, using
498
- # self as the bat. but what would the first_block be?
499
- def open chain, size=nil
500
- io = RangesIO.new @io, ranges(chain, size)
501
- if block_given?
502
- begin yield io
503
- ensure; io.close
504
- end
505
- else io
506
- end
507
- end
508
-
509
- def read chain, size=nil
510
- open chain, size, &:read
511
- end
512
-
513
- # ----------------------
514
-
515
- def get_free_block
516
- @table.each_index { |i| return i if @table[i] == AVAIL }
517
- @table.push AVAIL
518
- @table.length - 1
519
- end
520
-
521
- # must return first_block
522
- def resize_chain first_block, size
523
- new_num_blocks = (size / block_size.to_f).ceil
524
- blocks = chain first_block
525
- old_num_blocks = blocks.length
526
- if new_num_blocks < old_num_blocks
527
- # de-allocate some of our old blocks. TODO maybe zero them out in the file???
528
- (new_num_blocks...old_num_blocks).each { |i| @table[blocks[i]] = AVAIL }
529
- # if we have a chain, terminate it and return head, otherwise return EOC
530
- if new_num_blocks > 0
531
- @table[blocks[new_num_blocks-1]] = EOC
532
- first_block
533
- else EOC
534
- end
535
- elsif new_num_blocks > old_num_blocks
536
- # need some more blocks.
537
- last_block = blocks.last
538
- (new_num_blocks - old_num_blocks).times do
539
- block = get_free_block
540
- # connect the chain. handle corner case of blocks being [] initially
541
- if last_block
542
- @table[last_block] = block
543
- else
544
- first_block = block
545
- end
546
- last_block = block
547
- # this is just to inhibit the problem where it gets picked as being a free block
548
- # again next time around.
549
- @table[last_block] = EOC
550
- end
551
- first_block
552
- else first_block
553
- end
554
- end
555
-
556
- class Big < AllocationTable
557
- def initialize(*args)
558
- super
559
- @block_size = 1 << @ole.header.b_shift
560
- @io = @ole.io
561
- end
562
-
563
- # Big blocks are kind of -1 based, in order to not clash with the header.
564
- def blocks_to_ranges blocks, size
565
- super blocks.map { |b| b + 1 }, size
566
- end
567
- end
568
-
569
- class Small < AllocationTable
570
- def initialize(*args)
571
- super
572
- @block_size = 1 << @ole.header.s_shift
573
- @io = @ole.sb_file
574
- end
575
- end
576
- end
577
-
578
- # like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
579
- # AllocationTable, and can be resized. used for read/write to 2 streams:
580
- # 1. serialized dirent data
581
- # 2. sbat table data
582
- # 3. all dirents but through RangesIOMigrateable below
583
- #
584
- # Note that all internal access to first_block is through accessors, as it is sometimes
585
- # useful to redirect it.
586
- class RangesIOResizeable < RangesIO
587
- attr_reader :bat
588
- attr_accessor :first_block
589
- def initialize bat, first_block, size=nil
590
- @bat = bat
591
- self.first_block = first_block
592
- super @bat.io, @bat.ranges(first_block, size)
593
- end
594
-
595
- def truncate size
596
- # note that old_blocks is != @ranges.length necessarily. i'm planning to write a
597
- # merge_ranges function that merges sequential ranges into one as an optimization.
598
- self.first_block = @bat.resize_chain first_block, size
599
- @ranges = @bat.ranges first_block, size
600
- @pos = @size if @pos > size
601
-
602
- # don't know if this is required, but we explicitly request our @io to grow if necessary
603
- # we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
604
- # can be made.
605
- # maybe its ok to just seek out there later??
606
- max = @ranges.map { |pos, len| pos + len }.max || 0
607
- @io.truncate max if max > @io.size
608
-
609
- @size = size
610
- end
611
- end
612
-
613
- # like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
614
- # between bats based on size, and updating the dirent, instead of the ole copy back
615
- # on close.
616
- class RangesIOMigrateable < RangesIOResizeable
617
- attr_reader :dirent
618
- def initialize dirent
619
- @dirent = dirent
620
- super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
621
- end
622
-
623
- def truncate size
624
- bat = @dirent.ole.bat_for_size size
625
- if bat != @bat
626
- # bat migration needed! we need to backup some data. the amount of data
627
- # should be <= @ole.header.threshold, so we can just hold it all in one buffer.
628
- # backup this
629
- pos = @pos
630
- @pos = 0
631
- keep = read [@size, size].min
632
- # this does a normal truncate to 0, removing our presence from the old bat, and
633
- # rewrite the dirent's first_block
634
- super 0
635
- @bat = bat
636
- # just change the underlying io from right under everyone :)
637
- @io = bat.io
638
- # important to do this now, before the write. as the below write will always
639
- # migrate us back to sbat! this will now allocate us +size+ in the new bat.
640
- super
641
- @pos = 0
642
- write keep
643
- @pos = pos
644
- else
645
- super
646
- end
647
- # now just update the file
648
- @dirent.size = size
649
- end
650
-
651
- # forward this to the dirent
652
- def first_block
653
- @dirent.first_block
654
- end
655
-
656
- def first_block= val
657
- @dirent.first_block = val
658
- end
659
- end
660
-
661
- #
662
- # A class which wraps an ole directory entry. Can be either a directory
663
- # (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
664
- #
665
- # Most interaction with <tt>Ole::Storage</tt> is through this class.
666
- # The 2 most important functions are <tt>Dirent#children</tt>, and
667
- # <tt>Dirent#data</tt>.
668
- #
669
- # was considering separate classes for dirs and files. some methods/attrs only
670
- # applicable to one or the other.
671
- #
672
- # Note that Dirent is still using a home grown Struct variant, with explicit
673
- # MEMBERS etc. any reason for that still?
674
- #
675
- class Dirent
676
- MEMBERS = [
677
- :name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
678
- :clsid, :flags, # dirs only
679
- :create_time_str, :modify_time_str, # files only
680
- :first_block, :size, :reserved
681
- ]
682
- PACK = 'a64 S C C L3 a16 L a8 a8 L2 a4'
683
- SIZE = 128
684
- TYPE_MAP = {
685
- # this is temporary
686
- 0 => :empty,
687
- 1 => :dir,
688
- 2 => :file,
689
- 5 => :root
690
- }
691
- COLOUR_MAP = {
692
- 0 => :red,
693
- 1 => :black
694
- }
695
- # used in the next / prev / child stuff to show that the tree ends here.
696
- # also used for first_block for directory.
697
- EOT = 0xffffffff
698
-
699
- include Enumerable
700
-
701
- # Dirent's should be created in 1 of 2 ways, either Dirent.new ole, [:dir/:file/:root],
702
- # or Dirent.load '... dirent data ...'
703
- # its a bit clunky, but thats how it is at the moment. you can assign to type, but
704
- # shouldn't.
705
-
706
- attr_accessor :idx
707
- # This returns all the children of this +Dirent+. It is filled in
708
- # when the tree structure is recreated.
709
- attr_accessor :children
710
- attr_reader :ole, :type, :create_time, :modify_time, :name
711
- def initialize ole, type
712
- @ole = ole
713
- # this isn't really good enough. need default values put in there.
714
- @values = [
715
- 0.chr * 2, 2, 0, # will get overwritten
716
- 1, EOT, EOT, EOT,
717
- 0.chr * 16, 0, nil, nil,
718
- AllocationTable::EOC, 0, 0.chr * 4]
719
- # maybe check types here.
720
- @type = type
721
- @create_time = @modify_time = nil
722
- @children = []
723
- if file?
724
- @create_time = Time.now
725
- @modify_time = Time.now
726
- end
727
- end
728
-
729
- def self.load ole, str
730
- # load should function without the need for the initializer.
731
- dirent = Dirent.allocate
732
- dirent.load ole, str
733
- dirent
734
- end
735
-
736
- def load ole, str
737
- @ole = ole
738
- @values = str.unpack PACK
739
- @name = Types::FROM_UTF16.iconv name_utf16[0...name_len].sub(/\x00\x00$/, '')
740
- @type = TYPE_MAP[type_id] or raise "unknown type #{type_id.inspect}"
741
- if file?
742
- @create_time = Types.load_time create_time_str
743
- @modify_time = Types.load_time modify_time_str
744
- end
745
- end
746
-
747
- # only defined for files really. and the above children stuff is only for children.
748
- # maybe i should have some sort of File and Dir class, that subclass Dirents? a dirent
749
- # is just a data holder.
750
- # this can be used for write support if the underlying io object was opened for writing.
751
- # maybe take a mode string argument, and do truncation, append etc stuff.
752
- def open
753
- return nil unless file?
754
- io = RangesIOMigrateable.new self
755
- if block_given?
756
- begin yield io
757
- ensure; io.close
758
- end
759
- else io
760
- end
761
- end
762
-
763
- def read limit=nil
764
- open { |io| io.read limit }
765
- end
766
-
767
- def dir?
768
- # to count root as a dir.
769
- type != :file
770
- end
771
-
772
- def file?
773
- type == :file
774
- end
775
-
776
- def time
777
- # time is nil for streams, otherwise try to parse either of the time pairse (not
778
- # sure of their meaning - created / modified?)
779
- #@time ||= file? ? nil : (Dirent.parse_time(secs1, days1) || Dirent.parse_time(secs2, days2))
780
- create_time || modify_time
781
- end
782
-
783
- def each(&block)
784
- @children.each(&block)
785
- end
786
-
787
- def [] idx
788
- return children[idx] if Integer === idx
789
- # path style look up.
790
- # maybe take another arg to allow creation? or leave that to the filesystem
791
- # add on.
792
- # not sure if '/' is a valid char in an Dirent#name, so no splitting etc at
793
- # this level.
794
- # also what about warning about multiple hits for the same name?
795
- children.find { |child| idx === child.name }
796
- end
797
-
798
- # solution for the above '/' thing for now.
799
- def / path
800
- self[path]
801
- end
802
-
803
- def to_tree
804
- if children and !children.empty?
805
- str = "- #{inspect}\n"
806
- children.each_with_index do |child, i|
807
- last = i == children.length - 1
808
- child.to_tree.split(/\n/).each_with_index do |line, j|
809
- str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}#{line}\n"
810
- end
811
- end
812
- str
813
- else "- #{inspect}\n"
814
- end
815
- end
816
-
817
- MEMBERS.each_with_index do |sym, i|
818
- define_method(sym) { @values[i] }
819
- define_method(sym.to_s + '=') { |val| @values[i] = val }
820
- end
821
-
822
- def to_a
823
- @values
824
- end
825
-
826
- # flattens the tree starting from here into +dirents+. note it modifies its argument.
827
- def flatten dirents=[]
828
- @idx = dirents.length
829
- dirents << self
830
- children.each { |child| child.flatten dirents }
831
- self.child = Dirent.flatten_helper children
832
- dirents
833
- end
834
-
835
- # i think making the tree structure optimized is actually more complex than this, and
836
- # requires some intelligent ordering of the children based on names, but as long as
837
- # it is valid its ok.
838
- # actually, i think its ok. gsf for example only outputs a singly-linked-list, where
839
- # prev is always EOT.
840
- def self.flatten_helper children
841
- return EOT if children.empty?
842
- i = children.length / 2
843
- this = children[i]
844
- this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
845
- this.idx
846
- end
847
-
848
- attr_accessor :name, :type
849
- def save
850
- tmp = Types::TO_UTF16.iconv(name)
851
- tmp = tmp[0, 62] if tmp.length > 62
852
- tmp += 0.chr * 2
853
- self.name_len = tmp.length
854
- self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
855
- begin
856
- self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
857
- rescue
858
- raise "unknown type #{type.inspect}"
859
- end
860
- # for the case of files, it is assumed that that was handled already
861
- # note not dir?, so as not to override root's first_block
862
- self.first_block = Dirent::EOT if type == :dir
863
- if 0 #file?
864
- #self.create_time_str = ?? #Types.load_time create_time_str
865
- #self.modify_time_str = ?? #Types.load_time modify_time_str
866
- else
867
- self.create_time_str = 0.chr * 8
868
- self.modify_time_str = 0.chr * 8
869
- end
870
- @values.pack PACK
871
- end
872
-
873
- def inspect
874
- str = "#<Dirent:#{name.inspect}"
875
- # perhaps i should remove the data snippet. its not that useful anymore.
876
- if file?
877
- tmp = read 9
878
- data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
879
- str << " size=#{size}" +
880
- "#{time ? ' time=' + time.to_s.inspect : nil}" +
881
- " data=#{data.inspect}"
882
- else
883
- # there is some dir specific stuff. like clsid, flags.
884
- end
885
- str + '>'
886
- end
887
-
888
- # --------
889
- # and for creation of a dirent. don't like the name. is it a file or a directory?
890
- # assign to type later? io will be empty.
891
- def new_child type
892
- child = Dirent.new ole, type
893
- children << child
894
- yield child if block_given?
895
- child
896
- end
897
-
898
- def delete child
899
- # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
900
- raise "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
901
- # free our blocks
902
- child.open { |io| io.truncate 0 }
903
- end
904
-
905
- def self.copy src, dst
906
- # copies the contents of src to dst. must be the same type. this will throw an
907
- # error on copying to root. maybe this will recurse too much for big documents??
908
- raise 'differing types' if src.type == :file and dst.type != :file
909
- dst.name = src.name
910
- if src.dir?
911
- src.children.each do |src_child|
912
- dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
913
- end
914
- else
915
- src.open do |src_io|
916
- dst.open { |dst_io| IO.copy src_io, dst_io }
917
- end
918
- end
919
- end
920
- end
921
- end
922
- end
923
-
924
- if $0 == __FILE__
925
- puts Ole::Storage.open(ARGV[0]) { |ole| ole.root.to_tree }
926
- end
927
-