ruby-msg 1.2.17.3 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FIXES +22 -0
- data/Rakefile +13 -16
- data/bin/msgtool +1 -1
- data/lib/msg.rb +26 -9
- data/lib/msg/properties.rb +28 -11
- metadata +19 -18
- data/bin/oletool +0 -35
- data/lib/ole/base.rb +0 -5
- data/lib/ole/file_system.rb +0 -181
- data/lib/ole/io_helpers.rb +0 -184
- data/lib/ole/storage.rb +0 -927
- data/lib/ole/types.rb +0 -36
- data/lib/support.rb +0 -51
- data/test/test_storage.rb +0 -139
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
data/lib/ole/io_helpers.rb
DELETED
@@ -1,184 +0,0 @@
|
|
1
|
-
|
2
|
-
# move to support?
|
3
|
-
class IO # :nodoc:
|
4
|
-
def self.copy src, dst
|
5
|
-
until src.eof?
|
6
|
-
buf = src.read(4096)
|
7
|
-
dst.write buf
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
#
|
13
|
-
# = Introduction
|
14
|
-
#
|
15
|
-
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
16
|
-
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
17
|
-
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
18
|
-
# no method to stream it.
|
19
|
-
#
|
20
|
-
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
21
|
-
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
22
|
-
# getting 16 bytes doesn't read the whole thing).
|
23
|
-
#
|
24
|
-
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
25
|
-
# a file.
|
26
|
-
#
|
27
|
-
# = Limitations
|
28
|
-
#
|
29
|
-
# * No buffering. by design at the moment. Intended for large reads
|
30
|
-
#
|
31
|
-
# = TODO
|
32
|
-
#
|
33
|
-
# On further reflection, this class is something of a joining/optimization of
|
34
|
-
# two separate IO classes. a SubfileIO, for providing access to a range within
|
35
|
-
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
36
|
-
# a bunch of io objects as a single unified whole.
|
37
|
-
#
|
38
|
-
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
39
|
-
# convert a whole mime message into an IO stream, that can be read from.
|
40
|
-
# It will just be the concatenation of a series of IO objects, corresponding to
|
41
|
-
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
42
|
-
# original message proper, or RangesIO as provided by the Attachment#data, that
|
43
|
-
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
44
|
-
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
45
|
-
# whole never exists as a single string in memory, as it does now. This is a
|
46
|
-
# fair bit of work to achieve, but generally useful I believe.
|
47
|
-
#
|
48
|
-
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
49
|
-
#
|
50
|
-
class RangesIO
|
51
|
-
attr_reader :io, :ranges, :size, :pos
|
52
|
-
# +io+ is the parent io object that we are wrapping.
|
53
|
-
#
|
54
|
-
# +ranges+ are byte offsets, either
|
55
|
-
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
56
|
-
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
57
|
-
# (think the way String indexing works)
|
58
|
-
# The +ranges+ provide sequential slices of the file that will be read. they can overlap.
|
59
|
-
def initialize io, ranges, opts={}
|
60
|
-
@opts = {:close_parent => false}.merge opts
|
61
|
-
@io = io
|
62
|
-
# convert ranges to arrays. check for negative ranges?
|
63
|
-
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
64
|
-
# calculate size
|
65
|
-
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
|
66
|
-
# initial position in the file
|
67
|
-
@pos = 0
|
68
|
-
end
|
69
|
-
|
70
|
-
def pos= pos, whence=IO::SEEK_SET
|
71
|
-
# FIXME support other whence values
|
72
|
-
raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
|
73
|
-
# just a simple pos calculation. invalidate buffers if we had them
|
74
|
-
@pos = pos
|
75
|
-
end
|
76
|
-
|
77
|
-
alias seek :pos=
|
78
|
-
alias tell :pos
|
79
|
-
|
80
|
-
def close
|
81
|
-
@io.close if @opts[:close_parent]
|
82
|
-
end
|
83
|
-
|
84
|
-
def range_and_offset pos
|
85
|
-
off = nil
|
86
|
-
r = ranges.inject(0) do |total, r|
|
87
|
-
to = total + r[1]
|
88
|
-
if pos <= to
|
89
|
-
off = pos - total
|
90
|
-
break r
|
91
|
-
end
|
92
|
-
to
|
93
|
-
end
|
94
|
-
# should be impossible for any valid pos, (0...size) === pos
|
95
|
-
raise "unable to find range for pos #{pos.inspect}" unless off
|
96
|
-
[r, off]
|
97
|
-
end
|
98
|
-
|
99
|
-
def eof?
|
100
|
-
@pos == @size
|
101
|
-
end
|
102
|
-
|
103
|
-
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
104
|
-
def read limit=nil
|
105
|
-
data = ''
|
106
|
-
limit ||= size
|
107
|
-
# special case eof
|
108
|
-
return data if eof?
|
109
|
-
r, off = range_and_offset @pos
|
110
|
-
i = ranges.index r
|
111
|
-
# this may be conceptually nice (create sub-range starting where we are), but
|
112
|
-
# for a large range array its pretty wasteful. even the previous way was. but
|
113
|
-
# i'm not trying to optimize this atm. it may even go to c later if necessary.
|
114
|
-
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
115
|
-
@io.seek pos
|
116
|
-
if limit < len
|
117
|
-
# FIXME this += isn't correct if there is a read error
|
118
|
-
# or something.
|
119
|
-
@pos += limit
|
120
|
-
break data << @io.read(limit)
|
121
|
-
end
|
122
|
-
# this can also stuff up. if the ranges are beyond the size of the file, we can get
|
123
|
-
# nil here.
|
124
|
-
data << @io.read(len)
|
125
|
-
@pos += len
|
126
|
-
limit -= len
|
127
|
-
end
|
128
|
-
data
|
129
|
-
end
|
130
|
-
|
131
|
-
# you may override this call to update @ranges and @size, if applicable. then write
|
132
|
-
# support can grow below
|
133
|
-
def truncate size
|
134
|
-
raise NotImplementedError, 'truncate not supported'
|
135
|
-
end
|
136
|
-
# why not? :)
|
137
|
-
alias size= :truncate
|
138
|
-
|
139
|
-
def write data
|
140
|
-
# short cut. needed because truncate 0 may return no ranges, instead of empty range,
|
141
|
-
# thus range_and_offset fails.
|
142
|
-
return 0 if data.empty?
|
143
|
-
data_pos = 0
|
144
|
-
# if we don't have room, we can use the truncate hook to make more space.
|
145
|
-
if data.length > @size - @pos
|
146
|
-
begin
|
147
|
-
truncate @pos + data.length
|
148
|
-
rescue NotImplementedError
|
149
|
-
# FIXME maybe warn instead, then just truncate the data?
|
150
|
-
raise "unable to satisfy write of #{data.length} bytes"
|
151
|
-
end
|
152
|
-
end
|
153
|
-
r, off = range_and_offset @pos
|
154
|
-
i = ranges.index r
|
155
|
-
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
156
|
-
@io.seek pos
|
157
|
-
if data_pos + len > data.length
|
158
|
-
chunk = data[data_pos..-1]
|
159
|
-
@io.write chunk
|
160
|
-
@pos += chunk.length
|
161
|
-
data_pos = data.length
|
162
|
-
break
|
163
|
-
end
|
164
|
-
@io.write data[data_pos, len]
|
165
|
-
@pos += len
|
166
|
-
data_pos += len
|
167
|
-
end
|
168
|
-
data_pos
|
169
|
-
end
|
170
|
-
|
171
|
-
# this will be generalised to a module later
|
172
|
-
def each_read blocksize=4096
|
173
|
-
yield read(blocksize) until eof?
|
174
|
-
end
|
175
|
-
|
176
|
-
def inspect
|
177
|
-
# the rescue is for empty files
|
178
|
-
pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
|
179
|
-
range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
|
180
|
-
"#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
|
181
|
-
"current_range=#{range_str}>"
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
data/lib/ole/storage.rb
DELETED
@@ -1,927 +0,0 @@
|
|
1
|
-
#! /usr/bin/ruby -w
|
2
|
-
|
3
|
-
$: << File.dirname(__FILE__) + '/..'
|
4
|
-
|
5
|
-
require 'support'
|
6
|
-
|
7
|
-
require 'stringio'
|
8
|
-
require 'tempfile'
|
9
|
-
|
10
|
-
require 'ole/base'
|
11
|
-
require 'ole/types'
|
12
|
-
# not strictly ole related
|
13
|
-
require 'ole/io_helpers'
|
14
|
-
|
15
|
-
module Ole # :nodoc:
|
16
|
-
#
|
17
|
-
# = Introduction
|
18
|
-
#
|
19
|
-
# <tt>Ole::Storage</tt> is a class intended to abstract away details of the
|
20
|
-
# access to OLE2 structured storage files, such as those produced by
|
21
|
-
# Microsoft Office, eg *.doc, *.msg etc.
|
22
|
-
#
|
23
|
-
# Initially based on chicago's libole, source available at
|
24
|
-
# http://prdownloads.sf.net/chicago/ole.tgz
|
25
|
-
# Later augmented with some corrections by inspecting pole, and (purely
|
26
|
-
# for header definitions) gsf.
|
27
|
-
#
|
28
|
-
# = Usage
|
29
|
-
#
|
30
|
-
# Usage should be fairly straight forward:
|
31
|
-
#
|
32
|
-
# # get the parent ole storage object
|
33
|
-
# ole = Ole::Storage.open 'myfile.msg', 'r+'
|
34
|
-
# # => #<Ole::Storage io=#<File:myfile.msg> root=#<Dirent:"Root Entry">>
|
35
|
-
# # read some data
|
36
|
-
# ole.root[1].read 4
|
37
|
-
# # => "\001\000\376\377"
|
38
|
-
# # get the top level root object and output a tree structure for
|
39
|
-
# # debugging
|
40
|
-
# puts ole.root.to_tree
|
41
|
-
# # =>
|
42
|
-
# - #<Dirent:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
|
43
|
-
# |- #<Dirent:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
|
44
|
-
# | |- #<Dirent:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
|
45
|
-
# ...
|
46
|
-
# |- #<Dirent:"__substg1.0_8002001E" size=4 data="MTEuMA==">
|
47
|
-
# |- #<Dirent:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
|
48
|
-
# \- #<Dirent:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
|
49
|
-
# |- #<Dirent:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
|
50
|
-
# ...
|
51
|
-
# # write some data, and finish up (note that open is 'r+', so this overwrites
|
52
|
-
# # but doesn't truncate)
|
53
|
-
# ole.root["\001CompObj"].open { |f| f.write "blah blah" }
|
54
|
-
# ole.close
|
55
|
-
#
|
56
|
-
# = TODO
|
57
|
-
#
|
58
|
-
# 1. tests. lock down how things work at the moment - mostly good.
|
59
|
-
# create from scratch works now, as does copying in a subtree of another doc, so
|
60
|
-
# ole embedded attachment serialization works now. i can save embedded xls in an msg
|
61
|
-
# into a separate file, and open it. this was a goal. now i would want to implemenet
|
62
|
-
# to_mime conversion for embedded attachments, that serializes them to ole, but handles
|
63
|
-
# some separately like various meta file types as plain .wmf attachments perhaps. this
|
64
|
-
# will give pretty good .eml's from emails with embedded attachments.
|
65
|
-
# the other todo is .rtf output, with full support for embedded ole objects...
|
66
|
-
# 2. lots of tidying up
|
67
|
-
# - main FIXME's in this regard are:
|
68
|
-
# * the custom header cruft for Header and Dirent needs some love.
|
69
|
-
# * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
|
70
|
-
# and, in a manner of speaking, but arguably different, Storage itself.
|
71
|
-
# they have differing api's which would be nice to clean.
|
72
|
-
# AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
|
73
|
-
# * ole types need work, can't serialize datetime at the moment.
|
74
|
-
# 3. need to fix META_BAT support in #flush.
|
75
|
-
#
|
76
|
-
class Storage
|
77
|
-
VERSION = '1.1.3'
|
78
|
-
|
79
|
-
# The top of the ole tree structure
|
80
|
-
attr_reader :root
|
81
|
-
# The tree structure in its original flattened form. only valid after #load, or #flush.
|
82
|
-
attr_reader :dirents
|
83
|
-
# The underlying io object to/from which the ole object is serialized, whether we
|
84
|
-
# should close it, and whether it is writeable
|
85
|
-
attr_reader :io, :close_parent, :writeable
|
86
|
-
# Low level internals, you probably shouldn't need to mess with these
|
87
|
-
attr_reader :header, :bbat, :sbat, :sb_file
|
88
|
-
|
89
|
-
# maybe include an option hash, and allow :close_parent => true, to be more general.
|
90
|
-
# +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
|
91
|
-
def initialize arg, mode=nil
|
92
|
-
# get the io object
|
93
|
-
@close_parent, @io = if String === arg
|
94
|
-
[true, open(arg, mode || 'rb')]
|
95
|
-
else
|
96
|
-
raise 'unable to specify mode string with io object' if mode
|
97
|
-
[false, arg]
|
98
|
-
end
|
99
|
-
# do we have this file opened for writing? don't know of a better way to tell
|
100
|
-
@writeable = begin
|
101
|
-
@io.flush
|
102
|
-
true
|
103
|
-
rescue IOError
|
104
|
-
false
|
105
|
-
end
|
106
|
-
# silence undefined warning in clear
|
107
|
-
@sb_file = nil
|
108
|
-
# if the io object has data, we should load it, otherwise start afresh
|
109
|
-
@io.size > 0 ? load : clear
|
110
|
-
end
|
111
|
-
|
112
|
-
def self.new arg, mode=nil
|
113
|
-
ole = super
|
114
|
-
if block_given?
|
115
|
-
begin yield ole
|
116
|
-
ensure; ole.close
|
117
|
-
end
|
118
|
-
else ole
|
119
|
-
end
|
120
|
-
end
|
121
|
-
|
122
|
-
class << self
|
123
|
-
# encouraged
|
124
|
-
alias open :new
|
125
|
-
# deprecated
|
126
|
-
alias load :new
|
127
|
-
end
|
128
|
-
|
129
|
-
# load document from file.
|
130
|
-
def load
|
131
|
-
# we always read 512 for the header block. if the block size ends up being different,
|
132
|
-
# what happens to the 109 fat entries. are there more/less entries?
|
133
|
-
@io.rewind
|
134
|
-
header_block = @io.read 512
|
135
|
-
@header = Header.load header_block
|
136
|
-
|
137
|
-
# create an empty bbat
|
138
|
-
@bbat = AllocationTable::Big.new self
|
139
|
-
# extra mbat blocks
|
140
|
-
mbat_blocks = (0...@header.num_mbat).map { |i| i + @header.mbat_start }
|
141
|
-
bbat_chain = (header_block[Header::SIZE..-1] + @bbat.read(mbat_blocks)).unpack 'L*'
|
142
|
-
# am i using num_bat in the right way?
|
143
|
-
@bbat.load @bbat.read(bbat_chain[0, @header.num_bat])
|
144
|
-
|
145
|
-
# get block chain for directories, read it, then split it into chunks and load the
|
146
|
-
# directory entries. semantics changed - used to cut at first dir where dir.type == 0
|
147
|
-
@dirents = @bbat.read(@header.dirent_start).scan(/.{#{Dirent::SIZE}}/mo).
|
148
|
-
map { |str| Dirent.load self, str }.reject { |d| d.type_id == 0 }
|
149
|
-
|
150
|
-
# now reorder from flat into a tree
|
151
|
-
# links are stored in some kind of balanced binary tree
|
152
|
-
# check that everything is visited at least, and at most once
|
153
|
-
# similarly with the blocks of the file.
|
154
|
-
# was thinking of moving this to Dirent.to_tree instead.
|
155
|
-
class << @dirents
|
156
|
-
def to_tree idx=0
|
157
|
-
return [] if idx == Dirent::EOT
|
158
|
-
d = self[idx]
|
159
|
-
d.children = to_tree d.child
|
160
|
-
raise "directory #{d.inspect} used twice" if d.idx
|
161
|
-
d.idx = idx
|
162
|
-
to_tree(d.prev) + [d] + to_tree(d.next)
|
163
|
-
end
|
164
|
-
end
|
165
|
-
|
166
|
-
@root = @dirents.to_tree.first
|
167
|
-
Log.warn "root name was #{@root.name.inspect}" unless @root.name == 'Root Entry'
|
168
|
-
unused = @dirents.reject(&:idx).length
|
169
|
-
Log.warn "* #{unused} unused directories" if unused > 0
|
170
|
-
|
171
|
-
# FIXME i don't currently use @header.num_sbat which i should
|
172
|
-
# hmm. nor do i write it. it means what exactly again?
|
173
|
-
@sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
|
174
|
-
@sbat = AllocationTable::Small.new self
|
175
|
-
@sbat.load @bbat.read(@header.sbat_start)
|
176
|
-
end
|
177
|
-
|
178
|
-
def close
|
179
|
-
flush if @writeable
|
180
|
-
@sb_file.close
|
181
|
-
@io.close if @close_parent
|
182
|
-
end
|
183
|
-
|
184
|
-
# should have a #open_dirent i think. and use it in load and flush. neater.
|
185
|
-
# also was thinking about Dirent#open_padding. then i can more easily clean up the padding
|
186
|
-
# to be 0.chr
|
187
|
-
=begin
|
188
|
-
thoughts on fixes:
|
189
|
-
1. reterminate any chain not ending in EOC.
|
190
|
-
2. pass through all chain heads looking for collisions, and making sure nothing points to them
|
191
|
-
(ie they are really heads).
|
192
|
-
3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
|
193
|
-
in the bat for them.
|
194
|
-
this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
|
195
|
-
directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
|
196
|
-
destroy things.
|
197
|
-
=end
|
198
|
-
def flush
|
199
|
-
# recreate dirs from our tree, split into dirs and big and small files
|
200
|
-
@root.type = :root
|
201
|
-
@root.name = 'Root Entry'
|
202
|
-
@root.first_block = @sb_file.first_block
|
203
|
-
@root.size = @sb_file.size
|
204
|
-
@dirents = @root.flatten
|
205
|
-
|
206
|
-
# maybe i should move the block form up to RangesIO, and get it for free at all levels.
|
207
|
-
# Dirent#open gets block form for free then
|
208
|
-
io = RangesIOResizeable.new @bbat, @header.dirent_start
|
209
|
-
io.truncate 0
|
210
|
-
@dirents.each { |dirent| io.write dirent.save }
|
211
|
-
padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
|
212
|
-
io.write 0.chr * padding
|
213
|
-
@header.dirent_start = io.first_block
|
214
|
-
io.close
|
215
|
-
|
216
|
-
# similarly for the sbat data.
|
217
|
-
io = RangesIOResizeable.new @bbat, @header.sbat_start
|
218
|
-
io.truncate 0
|
219
|
-
io.write @sbat.save
|
220
|
-
@header.sbat_start = io.first_block
|
221
|
-
@header.num_sbat = @bbat.chain(@header.sbat_start).length
|
222
|
-
io.close
|
223
|
-
|
224
|
-
# what follows will be slightly more complex for the bat fiddling.
|
225
|
-
|
226
|
-
# create RangesIOResizeable hooked up to the bbat. use that to claim bbat blocks using
|
227
|
-
# truncate. then when its time to write, convert that chain and some chunk of blocks at
|
228
|
-
# the end, into META_BAT blocks. write out the chain, and those meta bat blocks, and its
|
229
|
-
# done.
|
230
|
-
@bbat.table.map! do |b|
|
231
|
-
b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
|
232
|
-
AllocationTable::AVAIL : b
|
233
|
-
end
|
234
|
-
io = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
235
|
-
|
236
|
-
# use crappy loop for now:
|
237
|
-
while true
|
238
|
-
bbat_data = @bbat.save
|
239
|
-
#mbat_data = bbat_data.length / @bbat.block_size * 4
|
240
|
-
mbat_chain = @bbat.chain io.first_block
|
241
|
-
raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
|
242
|
-
# so we can ignore meta blocks in this calculation:
|
243
|
-
break if io.size >= bbat_data.length # it shouldn't be bigger right?
|
244
|
-
# this may grow the bbat, depending on existing available blocks
|
245
|
-
io.truncate bbat_data.length
|
246
|
-
end
|
247
|
-
|
248
|
-
# now extract the info we want:
|
249
|
-
ranges = io.ranges
|
250
|
-
mbat_chain = @bbat.chain io.first_block
|
251
|
-
io.close
|
252
|
-
mbat_chain.each { |b| @bbat.table[b] = AllocationTable::BAT }
|
253
|
-
@header.num_bat = mbat_chain.length
|
254
|
-
#p @bbat.truncated_table
|
255
|
-
#p ranges
|
256
|
-
#p mbat_chain
|
257
|
-
# not resizeable!
|
258
|
-
io = RangesIO.new @io, ranges
|
259
|
-
io.write @bbat.save
|
260
|
-
io.close
|
261
|
-
mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
|
262
|
-
@header.mbat_start = AllocationTable::EOC
|
263
|
-
@header.num_mbat = 0
|
264
|
-
|
265
|
-
=begin
|
266
|
-
# Old save code. remove shortly
|
267
|
-
|
268
|
-
bbat_data = new_bbat.save
|
269
|
-
# must exist as linear chain stored in header.
|
270
|
-
@header.num_bat = (bbat_data.length / new_bbat.block_size.to_f).ceil
|
271
|
-
base = io.pos / new_bbat.block_size - 1
|
272
|
-
io.write bbat_data
|
273
|
-
# now that spanned a number of blocks:
|
274
|
-
mbat = (0...@header.num_bat).map { |i| i + base }
|
275
|
-
mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
|
276
|
-
header_mbat = mbat[0...109]
|
277
|
-
other_mbat_data = mbat[109..-1].pack 'L*'
|
278
|
-
@header.mbat_start = base + @header.num_bat
|
279
|
-
@header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
|
280
|
-
io.write other_mbat_data
|
281
|
-
=end
|
282
|
-
|
283
|
-
@root.type = :dir
|
284
|
-
|
285
|
-
# now seek back and write the header out
|
286
|
-
@io.seek 0
|
287
|
-
@io.write @header.save + mbat_chain.pack('L*')
|
288
|
-
@io.flush
|
289
|
-
end
|
290
|
-
|
291
|
-
def clear
|
292
|
-
# initialize to equivalent of loading an empty ole document.
|
293
|
-
Log.warn 'creating new ole storage object on non-writable io' unless @writeable
|
294
|
-
@header = Header.new
|
295
|
-
@bbat = AllocationTable::Big.new self
|
296
|
-
@root = Dirent.new self, :dir
|
297
|
-
@root.name = 'Root Entry'
|
298
|
-
@dirents = [@root]
|
299
|
-
@root.idx = 0
|
300
|
-
@root.children = []
|
301
|
-
# size shouldn't display for non-files
|
302
|
-
@root.size = 0
|
303
|
-
@sb_file.close if @sb_file
|
304
|
-
@sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
305
|
-
@sbat = AllocationTable::Small.new self
|
306
|
-
# throw everything else the hell away
|
307
|
-
@io.truncate 0
|
308
|
-
end
|
309
|
-
|
310
|
-
# could be useful with mis-behaving ole documents. or to just clean them up.
|
311
|
-
def repack temp=:file
|
312
|
-
case temp
|
313
|
-
when :file; Tempfile.open 'w+', &method(:repack_using_io)
|
314
|
-
when :mem; StringIO.open(&method(:repack_using_io))
|
315
|
-
else raise "unknown temp backing #{temp.inspect}"
|
316
|
-
end
|
317
|
-
end
|
318
|
-
|
319
|
-
def repack_using_io temp_io
|
320
|
-
@io.rewind
|
321
|
-
IO.copy @io, temp_io
|
322
|
-
clear
|
323
|
-
Storage.open temp_io do |temp_ole|
|
324
|
-
temp_ole.root.type = :dir
|
325
|
-
Dirent.copy temp_ole.root, root
|
326
|
-
end
|
327
|
-
end
|
328
|
-
|
329
|
-
def bat_for_size size
|
330
|
-
# note >=, not > previously.
|
331
|
-
size >= @header.threshold ? @bbat : @sbat
|
332
|
-
end
|
333
|
-
|
334
|
-
def inspect
|
335
|
-
"#<#{self.class} io=#{@io.inspect} root=#{@root.inspect}>"
|
336
|
-
end
|
337
|
-
|
338
|
-
# A class which wraps the ole header
|
339
|
-
class Header < Struct.new(
|
340
|
-
:magic, :clsid, :minor_ver, :major_ver, :byte_order, :b_shift, :s_shift,
|
341
|
-
:reserved, :csectdir, :num_bat, :dirent_start, :transacting_signature, :threshold,
|
342
|
-
:sbat_start, :num_sbat, :mbat_start, :num_mbat
|
343
|
-
)
|
344
|
-
PACK = 'a8 a16 S2 a2 S2 a6 L3 a4 L5'
|
345
|
-
SIZE = 0x4c
|
346
|
-
# i have seen it pointed out that the first 4 bytes of hex,
|
347
|
-
# 0xd0cf11e0, is supposed to spell out docfile. hmmm :)
|
348
|
-
MAGIC = "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" # expected value of Header#magic
|
349
|
-
# what you get if creating new header from scratch.
|
350
|
-
# AllocationTable::EOC isn't available yet. meh.
|
351
|
-
EOC = 0xfffffffe
|
352
|
-
DEFAULT = [
|
353
|
-
MAGIC, 0.chr * 16, 59, 3, "\xfe\xff", 9, 6,
|
354
|
-
0.chr * 6, 0, 1, EOC, 0.chr * 4,
|
355
|
-
4096, EOC, 0, EOC, 0
|
356
|
-
]
|
357
|
-
|
358
|
-
# 2 basic initializations, from scratch, or from a data string.
|
359
|
-
# from scratch will be geared towards creating a new ole object
|
360
|
-
def initialize *values
|
361
|
-
super(*(values.empty? ? DEFAULT : values))
|
362
|
-
validate!
|
363
|
-
end
|
364
|
-
|
365
|
-
def self.load str
|
366
|
-
Header.new(*str.unpack(PACK))
|
367
|
-
end
|
368
|
-
|
369
|
-
def save
|
370
|
-
to_a.pack PACK
|
371
|
-
end
|
372
|
-
|
373
|
-
def validate!
|
374
|
-
raise "OLE2 signature is invalid" unless magic == MAGIC
|
375
|
-
if num_bat == 0 or # is that valid for a completely empty file?
|
376
|
-
# not sure about this one. basically to do max possible bat given size of mbat
|
377
|
-
num_bat > 109 && num_bat > 109 + num_mbat * (1 << b_shift - 2) or
|
378
|
-
# shouldn't need to use the mbat as there is enough space in the header block
|
379
|
-
num_bat < 109 && num_mbat != 0 or
|
380
|
-
# given the size of the header is 76, if b_shift <= 6, blocks address the header.
|
381
|
-
s_shift > b_shift or b_shift <= 6 or b_shift >= 31 or
|
382
|
-
# we only handle little endian
|
383
|
-
byte_order != "\xfe\xff"
|
384
|
-
raise "not valid OLE2 structured storage file"
|
385
|
-
end
|
386
|
-
# relaxed this, due to test-msg/qwerty_[1-3]*.msg they all had
|
387
|
-
# 3 for this value.
|
388
|
-
# transacting_signature != "\x00" * 4 or
|
389
|
-
if threshold != 4096 or
|
390
|
-
num_mbat == 0 && mbat_start != AllocationTable::EOC or
|
391
|
-
reserved != "\x00" * 6
|
392
|
-
Log.warn "may not be a valid OLE2 structured storage file"
|
393
|
-
end
|
394
|
-
true
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
#
|
399
|
-
# +AllocationTable+'s hold the chains corresponding to files. Given
|
400
|
-
# an initial index, <tt>AllocationTable#chain</tt> follows the chain, returning
|
401
|
-
# the blocks that make up that file.
|
402
|
-
#
|
403
|
-
# There are 2 allocation tables, the bbat, and sbat, for big and small
|
404
|
-
# blocks respectively. The block chain should be loaded using either
|
405
|
-
# <tt>Storage#read_big_blocks</tt> or <tt>Storage#read_small_blocks</tt>
|
406
|
-
# as appropriate.
|
407
|
-
#
|
408
|
-
# Whether or not big or small blocks are used for a file depends on
|
409
|
-
# whether its size is over the <tt>Header#threshold</tt> level.
|
410
|
-
#
|
411
|
-
# An <tt>Ole::Storage</tt> document is serialized as a series of directory objects,
|
412
|
-
# which are stored in blocks throughout the file. The blocks are either
|
413
|
-
# big or small, and are accessed using the <tt>AllocationTable</tt>.
|
414
|
-
#
|
415
|
-
# The bbat allocation table's data is stored in the spare room in the header
|
416
|
-
# block, and in extra blocks throughout the file as referenced by the meta
|
417
|
-
# bat. That chain is linear, as there is no higher level table.
|
418
|
-
#
|
419
|
-
class AllocationTable
|
420
|
-
# a free block (I don't currently leave any blocks free), although I do pad out
|
421
|
-
# the allocation table with AVAIL to the block size.
|
422
|
-
AVAIL = 0xffffffff
|
423
|
-
EOC = 0xfffffffe # end of a chain
|
424
|
-
# these blocks correspond to the bat, and aren't part of a file, nor available.
|
425
|
-
# (I don't currently output these)
|
426
|
-
BAT = 0xfffffffd
|
427
|
-
META_BAT = 0xfffffffc
|
428
|
-
|
429
|
-
attr_reader :ole, :io, :table, :block_size
|
430
|
-
def initialize ole
|
431
|
-
@ole = ole
|
432
|
-
@table = []
|
433
|
-
end
|
434
|
-
|
435
|
-
def load data
|
436
|
-
@table = data.unpack('L*')
|
437
|
-
end
|
438
|
-
|
439
|
-
def truncated_table
|
440
|
-
# this strips trailing AVAILs. come to think of it, this has the potential to break
|
441
|
-
# bogus ole. if you terminate using AVAIL instead of EOC, like I did before. but that is
|
442
|
-
# very broken. however, if a chain ends with AVAIL, it should probably be fixed to EOC
|
443
|
-
# at load time.
|
444
|
-
temp = @table.reverse
|
445
|
-
not_avail = temp.find { |b| b != AVAIL } and temp = temp[temp.index(not_avail)..-1]
|
446
|
-
temp.reverse
|
447
|
-
end
|
448
|
-
|
449
|
-
def save
|
450
|
-
table = truncated_table #@table
|
451
|
-
# pad it out some
|
452
|
-
num = @ole.bbat.block_size / 4
|
453
|
-
# do you really use AVAIL? they probably extend past end of file, and may shortly
|
454
|
-
# be used for the bat. not really good.
|
455
|
-
table += [AVAIL] * (num - (table.length % num)) if (table.length % num) != 0
|
456
|
-
table.pack 'L*'
|
457
|
-
end
|
458
|
-
|
459
|
-
# rewriting this to be non-recursive. it broke on a large attachment
|
460
|
-
# building up the chain, causing a stack error. need tail-call elimination...
|
461
|
-
def chain start
|
462
|
-
a = []
|
463
|
-
idx = start
|
464
|
-
until idx >= META_BAT
|
465
|
-
raise "broken allocationtable chain" if idx < 0 || idx > @table.length
|
466
|
-
a << idx
|
467
|
-
idx = @table[idx]
|
468
|
-
end
|
469
|
-
Log.warn "invalid chain terminator #{idx}" unless idx == EOC
|
470
|
-
a
|
471
|
-
end
|
472
|
-
|
473
|
-
def ranges chain, size=nil
|
474
|
-
chain = self.chain(chain) unless Array === chain
|
475
|
-
blocks_to_ranges chain, size
|
476
|
-
end
|
477
|
-
|
478
|
-
# Turn a chain (an array given by +chain+) of big blocks, optionally
|
479
|
-
# truncated to +size+, into an array of arrays describing the stretches of
|
480
|
-
# bytes in the file that it belongs to.
|
481
|
-
#
|
482
|
-
# Big blocks are of size Ole::Storage::Header#b_size, and are stored
|
483
|
-
# directly in the parent file.
|
484
|
-
# truncate the chain if required
|
485
|
-
# convert chain to ranges of the block size
|
486
|
-
# truncate final range if required
|
487
|
-
|
488
|
-
def blocks_to_ranges chain, size=nil
|
489
|
-
chain = chain[0...(size.to_f / block_size).ceil] if size
|
490
|
-
ranges = chain.map { |i| [block_size * i, block_size] }
|
491
|
-
ranges.last[1] -= (ranges.length * block_size - size) if ranges.last and size
|
492
|
-
ranges
|
493
|
-
end
|
494
|
-
|
495
|
-
# quick shortcut. chain can be either a head (in which case the table is used to
|
496
|
-
# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
|
497
|
-
# its not resizeable or migrateable. it probably could be resizeable though, using
|
498
|
-
# self as the bat. but what would the first_block be?
|
499
|
-
def open chain, size=nil
|
500
|
-
io = RangesIO.new @io, ranges(chain, size)
|
501
|
-
if block_given?
|
502
|
-
begin yield io
|
503
|
-
ensure; io.close
|
504
|
-
end
|
505
|
-
else io
|
506
|
-
end
|
507
|
-
end
|
508
|
-
|
509
|
-
def read chain, size=nil
|
510
|
-
open chain, size, &:read
|
511
|
-
end
|
512
|
-
|
513
|
-
# ----------------------
|
514
|
-
|
515
|
-
def get_free_block
|
516
|
-
@table.each_index { |i| return i if @table[i] == AVAIL }
|
517
|
-
@table.push AVAIL
|
518
|
-
@table.length - 1
|
519
|
-
end
|
520
|
-
|
521
|
-
# must return first_block
|
522
|
-
def resize_chain first_block, size
|
523
|
-
new_num_blocks = (size / block_size.to_f).ceil
|
524
|
-
blocks = chain first_block
|
525
|
-
old_num_blocks = blocks.length
|
526
|
-
if new_num_blocks < old_num_blocks
|
527
|
-
# de-allocate some of our old blocks. TODO maybe zero them out in the file???
|
528
|
-
(new_num_blocks...old_num_blocks).each { |i| @table[blocks[i]] = AVAIL }
|
529
|
-
# if we have a chain, terminate it and return head, otherwise return EOC
|
530
|
-
if new_num_blocks > 0
|
531
|
-
@table[blocks[new_num_blocks-1]] = EOC
|
532
|
-
first_block
|
533
|
-
else EOC
|
534
|
-
end
|
535
|
-
elsif new_num_blocks > old_num_blocks
|
536
|
-
# need some more blocks.
|
537
|
-
last_block = blocks.last
|
538
|
-
(new_num_blocks - old_num_blocks).times do
|
539
|
-
block = get_free_block
|
540
|
-
# connect the chain. handle corner case of blocks being [] initially
|
541
|
-
if last_block
|
542
|
-
@table[last_block] = block
|
543
|
-
else
|
544
|
-
first_block = block
|
545
|
-
end
|
546
|
-
last_block = block
|
547
|
-
# this is just to inhibit the problem where it gets picked as being a free block
|
548
|
-
# again next time around.
|
549
|
-
@table[last_block] = EOC
|
550
|
-
end
|
551
|
-
first_block
|
552
|
-
else first_block
|
553
|
-
end
|
554
|
-
end
|
555
|
-
|
556
|
-
class Big < AllocationTable
|
557
|
-
def initialize(*args)
|
558
|
-
super
|
559
|
-
@block_size = 1 << @ole.header.b_shift
|
560
|
-
@io = @ole.io
|
561
|
-
end
|
562
|
-
|
563
|
-
# Big blocks are kind of -1 based, in order to not clash with the header.
|
564
|
-
def blocks_to_ranges blocks, size
|
565
|
-
super blocks.map { |b| b + 1 }, size
|
566
|
-
end
|
567
|
-
end
|
568
|
-
|
569
|
-
class Small < AllocationTable
|
570
|
-
def initialize(*args)
|
571
|
-
super
|
572
|
-
@block_size = 1 << @ole.header.s_shift
|
573
|
-
@io = @ole.sb_file
|
574
|
-
end
|
575
|
-
end
|
576
|
-
end
|
577
|
-
|
578
|
-
# like normal RangesIO, but Ole::Storage specific. the ranges are backed by an
|
579
|
-
# AllocationTable, and can be resized. used for read/write to 2 streams:
|
580
|
-
# 1. serialized dirent data
|
581
|
-
# 2. sbat table data
|
582
|
-
# 3. all dirents but through RangesIOMigrateable below
|
583
|
-
#
|
584
|
-
# Note that all internal access to first_block is through accessors, as it is sometimes
|
585
|
-
# useful to redirect it.
|
586
|
-
class RangesIOResizeable < RangesIO
|
587
|
-
attr_reader :bat
|
588
|
-
attr_accessor :first_block
|
589
|
-
def initialize bat, first_block, size=nil
|
590
|
-
@bat = bat
|
591
|
-
self.first_block = first_block
|
592
|
-
super @bat.io, @bat.ranges(first_block, size)
|
593
|
-
end
|
594
|
-
|
595
|
-
def truncate size
|
596
|
-
# note that old_blocks is != @ranges.length necessarily. i'm planning to write a
|
597
|
-
# merge_ranges function that merges sequential ranges into one as an optimization.
|
598
|
-
self.first_block = @bat.resize_chain first_block, size
|
599
|
-
@ranges = @bat.ranges first_block, size
|
600
|
-
@pos = @size if @pos > size
|
601
|
-
|
602
|
-
# don't know if this is required, but we explicitly request our @io to grow if necessary
|
603
|
-
# we never shrink it though. maybe this belongs in allocationtable, where smarter decisions
|
604
|
-
# can be made.
|
605
|
-
# maybe its ok to just seek out there later??
|
606
|
-
max = @ranges.map { |pos, len| pos + len }.max || 0
|
607
|
-
@io.truncate max if max > @io.size
|
608
|
-
|
609
|
-
@size = size
|
610
|
-
end
|
611
|
-
end
|
612
|
-
|
613
|
-
# like RangesIOResizeable, but Ole::Storage::Dirent specific. provides for migration
|
614
|
-
# between bats based on size, and updating the dirent, instead of the ole copy back
|
615
|
-
# on close.
|
616
|
-
class RangesIOMigrateable < RangesIOResizeable
|
617
|
-
attr_reader :dirent
|
618
|
-
def initialize dirent
|
619
|
-
@dirent = dirent
|
620
|
-
super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
|
621
|
-
end
|
622
|
-
|
623
|
-
def truncate size
|
624
|
-
bat = @dirent.ole.bat_for_size size
|
625
|
-
if bat != @bat
|
626
|
-
# bat migration needed! we need to backup some data. the amount of data
|
627
|
-
# should be <= @ole.header.threshold, so we can just hold it all in one buffer.
|
628
|
-
# backup this
|
629
|
-
pos = @pos
|
630
|
-
@pos = 0
|
631
|
-
keep = read [@size, size].min
|
632
|
-
# this does a normal truncate to 0, removing our presence from the old bat, and
|
633
|
-
# rewrite the dirent's first_block
|
634
|
-
super 0
|
635
|
-
@bat = bat
|
636
|
-
# just change the underlying io from right under everyone :)
|
637
|
-
@io = bat.io
|
638
|
-
# important to do this now, before the write. as the below write will always
|
639
|
-
# migrate us back to sbat! this will now allocate us +size+ in the new bat.
|
640
|
-
super
|
641
|
-
@pos = 0
|
642
|
-
write keep
|
643
|
-
@pos = pos
|
644
|
-
else
|
645
|
-
super
|
646
|
-
end
|
647
|
-
# now just update the file
|
648
|
-
@dirent.size = size
|
649
|
-
end
|
650
|
-
|
651
|
-
# forward this to the dirent
|
652
|
-
def first_block
|
653
|
-
@dirent.first_block
|
654
|
-
end
|
655
|
-
|
656
|
-
def first_block= val
|
657
|
-
@dirent.first_block = val
|
658
|
-
end
|
659
|
-
end
|
660
|
-
|
661
|
-
#
|
662
|
-
# A class which wraps an ole directory entry. Can be either a directory
|
663
|
-
# (<tt>Dirent#dir?</tt>) or a file (<tt>Dirent#file?</tt>)
|
664
|
-
#
|
665
|
-
# Most interaction with <tt>Ole::Storage</tt> is through this class.
|
666
|
-
# The 2 most important functions are <tt>Dirent#children</tt>, and
|
667
|
-
# <tt>Dirent#data</tt>.
|
668
|
-
#
|
669
|
-
# was considering separate classes for dirs and files. some methods/attrs only
|
670
|
-
# applicable to one or the other.
|
671
|
-
#
|
672
|
-
# Note that Dirent is still using a home grown Struct variant, with explicit
|
673
|
-
# MEMBERS etc. any reason for that still?
|
674
|
-
#
|
675
|
-
class Dirent
|
676
|
-
MEMBERS = [
|
677
|
-
:name_utf16, :name_len, :type_id, :colour, :prev, :next, :child,
|
678
|
-
:clsid, :flags, # dirs only
|
679
|
-
:create_time_str, :modify_time_str, # files only
|
680
|
-
:first_block, :size, :reserved
|
681
|
-
]
|
682
|
-
PACK = 'a64 S C C L3 a16 L a8 a8 L2 a4'
|
683
|
-
SIZE = 128
|
684
|
-
TYPE_MAP = {
|
685
|
-
# this is temporary
|
686
|
-
0 => :empty,
|
687
|
-
1 => :dir,
|
688
|
-
2 => :file,
|
689
|
-
5 => :root
|
690
|
-
}
|
691
|
-
COLOUR_MAP = {
|
692
|
-
0 => :red,
|
693
|
-
1 => :black
|
694
|
-
}
|
695
|
-
# used in the next / prev / child stuff to show that the tree ends here.
|
696
|
-
# also used for first_block for directory.
|
697
|
-
EOT = 0xffffffff
|
698
|
-
|
699
|
-
include Enumerable
|
700
|
-
|
701
|
-
# Dirent's should be created in 1 of 2 ways, either Dirent.new ole, [:dir/:file/:root],
|
702
|
-
# or Dirent.load '... dirent data ...'
|
703
|
-
# its a bit clunky, but thats how it is at the moment. you can assign to type, but
|
704
|
-
# shouldn't.
|
705
|
-
|
706
|
-
attr_accessor :idx
|
707
|
-
# This returns all the children of this +Dirent+. It is filled in
|
708
|
-
# when the tree structure is recreated.
|
709
|
-
attr_accessor :children
|
710
|
-
attr_reader :ole, :type, :create_time, :modify_time, :name
|
711
|
-
def initialize ole, type
|
712
|
-
@ole = ole
|
713
|
-
# this isn't really good enough. need default values put in there.
|
714
|
-
@values = [
|
715
|
-
0.chr * 2, 2, 0, # will get overwritten
|
716
|
-
1, EOT, EOT, EOT,
|
717
|
-
0.chr * 16, 0, nil, nil,
|
718
|
-
AllocationTable::EOC, 0, 0.chr * 4]
|
719
|
-
# maybe check types here.
|
720
|
-
@type = type
|
721
|
-
@create_time = @modify_time = nil
|
722
|
-
@children = []
|
723
|
-
if file?
|
724
|
-
@create_time = Time.now
|
725
|
-
@modify_time = Time.now
|
726
|
-
end
|
727
|
-
end
|
728
|
-
|
729
|
-
def self.load ole, str
|
730
|
-
# load should function without the need for the initializer.
|
731
|
-
dirent = Dirent.allocate
|
732
|
-
dirent.load ole, str
|
733
|
-
dirent
|
734
|
-
end
|
735
|
-
|
736
|
-
def load ole, str
|
737
|
-
@ole = ole
|
738
|
-
@values = str.unpack PACK
|
739
|
-
@name = Types::FROM_UTF16.iconv name_utf16[0...name_len].sub(/\x00\x00$/, '')
|
740
|
-
@type = TYPE_MAP[type_id] or raise "unknown type #{type_id.inspect}"
|
741
|
-
if file?
|
742
|
-
@create_time = Types.load_time create_time_str
|
743
|
-
@modify_time = Types.load_time modify_time_str
|
744
|
-
end
|
745
|
-
end
|
746
|
-
|
747
|
-
# only defined for files really. and the above children stuff is only for children.
|
748
|
-
# maybe i should have some sort of File and Dir class, that subclass Dirents? a dirent
|
749
|
-
# is just a data holder.
|
750
|
-
# this can be used for write support if the underlying io object was opened for writing.
|
751
|
-
# maybe take a mode string argument, and do truncation, append etc stuff.
|
752
|
-
def open
|
753
|
-
return nil unless file?
|
754
|
-
io = RangesIOMigrateable.new self
|
755
|
-
if block_given?
|
756
|
-
begin yield io
|
757
|
-
ensure; io.close
|
758
|
-
end
|
759
|
-
else io
|
760
|
-
end
|
761
|
-
end
|
762
|
-
|
763
|
-
def read limit=nil
|
764
|
-
open { |io| io.read limit }
|
765
|
-
end
|
766
|
-
|
767
|
-
def dir?
|
768
|
-
# to count root as a dir.
|
769
|
-
type != :file
|
770
|
-
end
|
771
|
-
|
772
|
-
def file?
|
773
|
-
type == :file
|
774
|
-
end
|
775
|
-
|
776
|
-
def time
|
777
|
-
# time is nil for streams, otherwise try to parse either of the time pairse (not
|
778
|
-
# sure of their meaning - created / modified?)
|
779
|
-
#@time ||= file? ? nil : (Dirent.parse_time(secs1, days1) || Dirent.parse_time(secs2, days2))
|
780
|
-
create_time || modify_time
|
781
|
-
end
|
782
|
-
|
783
|
-
def each(&block)
|
784
|
-
@children.each(&block)
|
785
|
-
end
|
786
|
-
|
787
|
-
def [] idx
|
788
|
-
return children[idx] if Integer === idx
|
789
|
-
# path style look up.
|
790
|
-
# maybe take another arg to allow creation? or leave that to the filesystem
|
791
|
-
# add on.
|
792
|
-
# not sure if '/' is a valid char in an Dirent#name, so no splitting etc at
|
793
|
-
# this level.
|
794
|
-
# also what about warning about multiple hits for the same name?
|
795
|
-
children.find { |child| idx === child.name }
|
796
|
-
end
|
797
|
-
|
798
|
-
# solution for the above '/' thing for now.
|
799
|
-
def / path
|
800
|
-
self[path]
|
801
|
-
end
|
802
|
-
|
803
|
-
def to_tree
|
804
|
-
if children and !children.empty?
|
805
|
-
str = "- #{inspect}\n"
|
806
|
-
children.each_with_index do |child, i|
|
807
|
-
last = i == children.length - 1
|
808
|
-
child.to_tree.split(/\n/).each_with_index do |line, j|
|
809
|
-
str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}#{line}\n"
|
810
|
-
end
|
811
|
-
end
|
812
|
-
str
|
813
|
-
else "- #{inspect}\n"
|
814
|
-
end
|
815
|
-
end
|
816
|
-
|
817
|
-
MEMBERS.each_with_index do |sym, i|
|
818
|
-
define_method(sym) { @values[i] }
|
819
|
-
define_method(sym.to_s + '=') { |val| @values[i] = val }
|
820
|
-
end
|
821
|
-
|
822
|
-
def to_a
|
823
|
-
@values
|
824
|
-
end
|
825
|
-
|
826
|
-
# flattens the tree starting from here into +dirents+. note it modifies its argument.
|
827
|
-
def flatten dirents=[]
|
828
|
-
@idx = dirents.length
|
829
|
-
dirents << self
|
830
|
-
children.each { |child| child.flatten dirents }
|
831
|
-
self.child = Dirent.flatten_helper children
|
832
|
-
dirents
|
833
|
-
end
|
834
|
-
|
835
|
-
# i think making the tree structure optimized is actually more complex than this, and
|
836
|
-
# requires some intelligent ordering of the children based on names, but as long as
|
837
|
-
# it is valid its ok.
|
838
|
-
# actually, i think its ok. gsf for example only outputs a singly-linked-list, where
|
839
|
-
# prev is always EOT.
|
840
|
-
def self.flatten_helper children
|
841
|
-
return EOT if children.empty?
|
842
|
-
i = children.length / 2
|
843
|
-
this = children[i]
|
844
|
-
this.prev, this.next = [(0...i), (i+1..-1)].map { |r| flatten_helper children[r] }
|
845
|
-
this.idx
|
846
|
-
end
|
847
|
-
|
848
|
-
attr_accessor :name, :type
|
849
|
-
def save
|
850
|
-
tmp = Types::TO_UTF16.iconv(name)
|
851
|
-
tmp = tmp[0, 62] if tmp.length > 62
|
852
|
-
tmp += 0.chr * 2
|
853
|
-
self.name_len = tmp.length
|
854
|
-
self.name_utf16 = tmp + 0.chr * (64 - tmp.length)
|
855
|
-
begin
|
856
|
-
self.type_id = TYPE_MAP.to_a.find { |id, name| @type == name }.first
|
857
|
-
rescue
|
858
|
-
raise "unknown type #{type.inspect}"
|
859
|
-
end
|
860
|
-
# for the case of files, it is assumed that that was handled already
|
861
|
-
# note not dir?, so as not to override root's first_block
|
862
|
-
self.first_block = Dirent::EOT if type == :dir
|
863
|
-
if 0 #file?
|
864
|
-
#self.create_time_str = ?? #Types.load_time create_time_str
|
865
|
-
#self.modify_time_str = ?? #Types.load_time modify_time_str
|
866
|
-
else
|
867
|
-
self.create_time_str = 0.chr * 8
|
868
|
-
self.modify_time_str = 0.chr * 8
|
869
|
-
end
|
870
|
-
@values.pack PACK
|
871
|
-
end
|
872
|
-
|
873
|
-
def inspect
|
874
|
-
str = "#<Dirent:#{name.inspect}"
|
875
|
-
# perhaps i should remove the data snippet. its not that useful anymore.
|
876
|
-
if file?
|
877
|
-
tmp = read 9
|
878
|
-
data = tmp.length == 9 ? tmp[0, 5] + '...' : tmp
|
879
|
-
str << " size=#{size}" +
|
880
|
-
"#{time ? ' time=' + time.to_s.inspect : nil}" +
|
881
|
-
" data=#{data.inspect}"
|
882
|
-
else
|
883
|
-
# there is some dir specific stuff. like clsid, flags.
|
884
|
-
end
|
885
|
-
str + '>'
|
886
|
-
end
|
887
|
-
|
888
|
-
# --------
|
889
|
-
# and for creation of a dirent. don't like the name. is it a file or a directory?
|
890
|
-
# assign to type later? io will be empty.
|
891
|
-
def new_child type
|
892
|
-
child = Dirent.new ole, type
|
893
|
-
children << child
|
894
|
-
yield child if block_given?
|
895
|
-
child
|
896
|
-
end
|
897
|
-
|
898
|
-
def delete child
|
899
|
-
# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
|
900
|
-
raise "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
|
901
|
-
# free our blocks
|
902
|
-
child.open { |io| io.truncate 0 }
|
903
|
-
end
|
904
|
-
|
905
|
-
def self.copy src, dst
|
906
|
-
# copies the contents of src to dst. must be the same type. this will throw an
|
907
|
-
# error on copying to root. maybe this will recurse too much for big documents??
|
908
|
-
raise 'differing types' if src.type == :file and dst.type != :file
|
909
|
-
dst.name = src.name
|
910
|
-
if src.dir?
|
911
|
-
src.children.each do |src_child|
|
912
|
-
dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
|
913
|
-
end
|
914
|
-
else
|
915
|
-
src.open do |src_io|
|
916
|
-
dst.open { |dst_io| IO.copy src_io, dst_io }
|
917
|
-
end
|
918
|
-
end
|
919
|
-
end
|
920
|
-
end
|
921
|
-
end
|
922
|
-
end
|
923
|
-
|
924
|
-
if $0 == __FILE__
|
925
|
-
puts Ole::Storage.open(ARGV[0]) { |ole| ole.root.to_tree }
|
926
|
-
end
|
927
|
-
|