ruby-ole 1.2.2 → 1.2.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,22 @@
1
+ == 1.2.3 / 2007-12-28
2
+
3
+ - MBAT write support re-implmented. Can now write files over ~8mb again.
4
+ - Minor fixes (truncation in #flush, file modification timestamps)
5
+ - More test coverage
6
+ - Initial (read-only) property set support.
7
+ - Complete filesystem api, to pass most of the rubyzip tests.
8
+ - Add a ChangeLog :).
9
+
10
+ == 1.2.2 / 2007-11-05
11
+
12
+ - Lots of test updates, 90% coverage.
13
+ - Fix +to_tree+ method to be more efficient, and stream output.
14
+ - Optimizations from benchmarks and profiling, mostly for writes. Fixed
15
+ AllocationTable#resize_chain, RangesIOResizable#truncate and
16
+ AllocationTable#free_block.
17
+ - Add in filesystem test file from rubyzip, and start working on a
18
+ filesystem api.
19
+
20
+ == 1.2.1 / 2007-08-20
21
+
22
+ - Separate out from ruby-msg as new project.
data/Rakefile CHANGED
@@ -47,16 +47,18 @@ spec = Gem::Specification.new do |s|
47
47
  s.version = PKG_VERSION
48
48
  s.summary = %q{Ruby OLE library.}
49
49
  s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
50
- s.authors = ["Charles Lowe"]
50
+ s.authors = ['Charles Lowe']
51
51
  s.email = %q{aquasync@gmail.com}
52
52
  s.homepage = %q{http://code.google.com/p/ruby-ole}
53
- #s.rubyforge_project = %q{ruby-ole}
53
+ s.rubyforge_project = %q{ruby-ole}
54
54
 
55
55
  s.executables = ['oletool']
56
- s.files = ['Rakefile']
57
- s.files += Dir.glob("lib/**/*.rb")
58
- s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
59
- s.files += Dir.glob("bin/*")
56
+ s.files = ['Rakefile', 'ChangeLog']
57
+ s.files += FileList['lib/**/*.rb']
58
+ s.files += FileList['test/test_*.rb', 'test/*.doc']
59
+ s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation']
60
+ s.files += FileList['bin/*']
61
+ s.test_files = FileList['test/test_*.rb']
60
62
 
61
63
  s.has_rdoc = true
62
64
  s.rdoc_options += [
@@ -64,13 +66,11 @@ spec = Gem::Specification.new do |s|
64
66
  '--title', "#{PKG_NAME} documentation",
65
67
  '--tab-width', '2'
66
68
  ]
67
-
68
- s.autorequire = 'ole/storage'
69
69
  end
70
70
 
71
71
  Rake::GemPackageTask.new(spec) do |t|
72
72
  t.gem_spec = spec
73
- t.need_tar = true
73
+ t.need_tar = false
74
74
  t.need_zip = false
75
75
  t.package_dir = 'build'
76
76
  end
@@ -29,8 +29,8 @@
29
29
  #
30
30
  # TODO
31
31
  #
32
- # - check for all new_child calls. eg Dir.mkdir, and File.open, and also
33
- # File.rename, to add in filename length checks (max 32 / 31 or something).
32
+ # - check Dir.mkdir, and File.open, and File.rename, to add in filename
33
+ # length checks (max 32 / 31 or something).
34
34
  # do the automatic truncation, and add in any necessary warnings.
35
35
  #
36
36
  # - File.split('a/') == File.split('a') == ['.', 'a']
@@ -130,6 +130,8 @@ module Ole # :nodoc:
130
130
  path = "#{pwd}/#{path}" unless path =~ /^\//
131
131
  # at this point its already absolute. we use File.expand_path
132
132
  # just for the .. and . handling
133
+ # Hmmm, FIXME: won't work on windows i think. on windows it will prepend
134
+ # the current drive i believe. may just need to strip the first 2 chars.
133
135
  File.expand_path path
134
136
  end
135
137
 
@@ -160,8 +162,7 @@ module Ole # :nodoc:
160
162
  end
161
163
 
162
164
  def open path, mode='r', &block
163
- # FIXME - mode strings are more complex than this.
164
- if mode == 'w'
165
+ if IO::Mode.new(mode).create?
165
166
  begin
166
167
  dirent = dirent_from_path path
167
168
  rescue Errno::ENOENT
@@ -169,8 +170,7 @@ module Ole # :nodoc:
169
170
  # a get_parent_dirent function.
170
171
  parent_path, basename = File.split expand_path(path)
171
172
  parent = @ole.dir.send :dirent_from_path, parent_path, path
172
- dirent = parent.new_child :file
173
- dirent.name = basename
173
+ parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
174
174
  end
175
175
  else
176
176
  dirent = dirent_from_path path
@@ -351,7 +351,7 @@ module Ole # :nodoc:
351
351
  # now, we first should ensure that it doesn't already exist
352
352
  # either as a file or a directory.
353
353
  raise Errno::EEXIST, path if parent/basename
354
- parent.new_child(:dir) { |child| child.name = basename }
354
+ parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
355
355
  0
356
356
  end
357
357
 
@@ -1,37 +1,42 @@
1
+ require 'ole/types'
1
2
 
2
3
  module Ole
3
4
  module Types
4
- # should have a list of the VT_* variant types, and have all the serialization related code
5
- # here... implement dump & load functions like marshalling
6
- class Guid
7
- SIZE = 16
8
-
9
- def self.load str
10
- Types.load_guid str
11
- end
12
- end
13
-
14
- # see http://poi.apache.org/hpsf/internals.html
5
+ #
6
+ # The PropertySet class currently supports readonly access to the properties
7
+ # serialized in "property set" streams, such as the file "\005SummaryInformation",
8
+ # in OLE files.
9
+ #
10
+ # Think has its roots in MFC property set serialization.
11
+ #
12
+ # See http://poi.apache.org/hpsf/internals.html for details
13
+ #
15
14
  class PropertySet
16
15
  HEADER_SIZE = 28
17
- HEADER_UNPACK = "vvVa#{Guid::SIZE}V"
16
+ HEADER_UNPACK = "vvVa#{Clsid::SIZE}V"
18
17
  OS_MAP = {
19
18
  0 => :win16,
20
19
  1 => :mac,
21
20
  2 => :win32
22
21
  }
23
22
 
23
+ # define a smattering of the property set guids.
24
+ FMTID_SummaryInformation = Clsid.parse '{f29f85e0-4ff9-1068-ab91-08002b27b3d9}'
25
+ FMTID_DocSummaryInformation = Clsid.parse '{d5cdd502-2e9c-101b-9397-08002b2cf9ae}'
26
+ FMTID_UserDefinedProperties = Clsid.parse '{d5cdd505-2e9c-101b-9397-08002b2cf9ae}'
27
+
24
28
  class Section < Struct.new(:guid, :offset)
29
+ include Variant::Constants
25
30
  include Enumerable
26
31
 
27
- SIZE = Guid::SIZE + 4
28
- UNPACK_STR = "a#{Guid::SIZE}v"
32
+ SIZE = Clsid::SIZE + 4
33
+ UNPACK_STR = "a#{Clsid::SIZE}v"
29
34
 
30
35
  attr_reader :length
31
36
  def initialize str, property_set
32
37
  @property_set = property_set
33
38
  super(*str.unpack(UNPACK_STR))
34
- self.guid = Guid.load guid
39
+ self.guid = Clsid.load guid
35
40
  load_header
36
41
  end
37
42
 
@@ -49,14 +54,20 @@ module Ole
49
54
  io.read(length * 8).scan(/.{8}/m).each do |str|
50
55
  id, property_offset = str.unpack 'V2'
51
56
  io.seek offset + property_offset
52
- type = io.read(4).unpack('V')[0]
53
- yield id, type, io.read(10)
57
+ type, value = io.read(8).unpack('V2')
58
+ # is the method of serialization here custom?
59
+ case type
60
+ when VT_LPSTR, VT_LPWSTR
61
+ value = Variant.load type, io.read(value)
62
+ # ....
63
+ end
64
+ yield id, type, value
54
65
  end
55
66
  self
56
67
  end
57
68
 
58
69
  def properties
59
- to_a
70
+ to_enum.to_a
60
71
  end
61
72
  end
62
73
 
@@ -66,13 +77,13 @@ module Ole
66
77
  load_header io.read(HEADER_SIZE)
67
78
  load_section_list io.read(@num_sections * Section::SIZE)
68
79
  # expect no gap between last section and start of data.
69
- Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
80
+ #Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
70
81
  end
71
82
 
72
83
  def load_header str
73
84
  @signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_UNPACK
74
85
  # should i check that unknown == 0? it usually is. so is the guid actually
75
- @guid = Guid.load @guid
86
+ @guid = Clsid.load @guid
76
87
  @os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
77
88
  end
78
89
 
@@ -1,36 +1,5 @@
1
- class IOModeString
2
- def initialize mode='r'
3
- @mode = mode
4
- if @mode['b']
5
- @binary = true
6
- @mode = @mode.sub 'b', ''
7
- else
8
- @binary = false
9
- end
10
- if @mode[/\+$/]
11
- @plus = true
12
- @mode = @mode.sub(/\+$/, '')
13
- else
14
- @plus = false
15
- end
16
- end
17
-
18
- def explicit_binary?
19
- @binary
20
- end
21
-
22
- def binary?
23
- RUBY_PLATFORM !~ /win/ or @binary
24
- end
25
-
26
- def to_s
27
- @mode
28
- end
29
-
30
- def inspect
31
- "#<#{self.class}:#{to_s.inspect}>"
32
- end
33
- end
1
+ # need IO::Mode
2
+ require 'ole/support'
34
3
 
35
4
  #
36
5
  # = Introduction
@@ -71,26 +40,40 @@ end
71
40
  # This class isn't ole specific, maybe move it to my general ruby stream project.
72
41
  #
73
42
  class RangesIO
74
- attr_reader :io, :ranges, :size, :pos
43
+ attr_reader :io, :mode, :ranges, :size, :pos
75
44
  # +io+:: the parent io object that we are wrapping.
76
- #
77
- # +ranges+:: byte offsets, either:
78
- # 1. an array of ranges [1..2, 4..5, 6..8] or
79
- # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
80
- # (think the way String indexing works)
45
+ # +mode+:: the mode to use
46
+ # +params+:: hash of params.
47
+ # * :ranges - byte offsets, either:
48
+ # 1. an array of ranges [1..2, 4..5, 6..8] or
49
+ # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
50
+ # (think the way String indexing works)
51
+ # * :close_parent - boolean to close parent when this object is closed
81
52
  #
82
53
  # NOTE: the +ranges+ can overlap.
83
- def initialize io, ranges, opts={}
84
- @opts = {:close_parent => false}.merge opts
54
+ def initialize io, mode='r', params={}
55
+ mode, params = 'r', mode if Hash === mode
56
+ ranges = params[:ranges]
57
+ @params = {:close_parent => false}.merge params
58
+ @mode = IO::Mode.new mode
85
59
  @io = io
86
60
  # convert ranges to arrays. check for negative ranges?
61
+ ranges ||= [0, io.size]
87
62
  @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
88
63
  # calculate size
89
64
  @size = @ranges.inject(0) { |total, (pos, len)| total + len }
90
65
  # initial position in the file
91
66
  @pos = 0
67
+
68
+ # handle some mode flags
69
+ truncate 0 if @mode.truncate?
70
+ seek size if @mode.append?
92
71
  end
93
72
 
73
+ #IOError: closed stream
74
+ # get this for reading, writing, everything...
75
+ #IOError: not opened for writing
76
+
94
77
  # add block form. TODO add test for this
95
78
  def self.open(*args, &block)
96
79
  ranges_io = new(*args)
@@ -120,7 +103,7 @@ class RangesIO
120
103
  alias tell :pos
121
104
 
122
105
  def close
123
- @io.close if @opts[:close_parent]
106
+ @io.close if @params[:close_parent]
124
107
  end
125
108
 
126
109
  # returns the [+offset+, +size+], pair inorder to read/write at +pos+
@@ -169,8 +152,7 @@ class RangesIO
169
152
  data
170
153
  end
171
154
 
172
- # you may override this call to update @ranges and @size, if applicable. then write
173
- # support can grow below
155
+ # you may override this call to update @ranges and @size, if applicable.
174
156
  def truncate size
175
157
  raise NotImplementedError, 'truncate not supported'
176
158
  end
@@ -232,3 +214,15 @@ class RangesIO
232
214
  end
233
215
  end
234
216
 
217
+ # this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
218
+ # only really needed for the allocation table writes etc. maybe just use explicit modes
219
+ # for those
220
+ # better yet write a test that breaks before I fix it.
221
+ class RangesIONonResizeable < RangesIO
222
+ def initialize io, mode='r', params={}
223
+ mode, params = 'r', mode if Hash === mode
224
+ flags = IO::Mode.new(mode).flags & ~IO::TRUNC
225
+ super io, flags, params
226
+ end
227
+ end
228
+
@@ -1,7 +1,3 @@
1
- #! /usr/bin/ruby -w
2
-
3
- $: << File.dirname(__FILE__) + '/..'
4
-
5
1
  require 'tempfile'
6
2
 
7
3
  require 'ole/base'
@@ -16,11 +12,6 @@ module Ole # :nodoc:
16
12
  # access to OLE2 structured storage files, such as those produced by
17
13
  # Microsoft Office, eg *.doc, *.msg etc.
18
14
  #
19
- # Initially based on chicago's libole, source available at
20
- # http://prdownloads.sf.net/chicago/ole.tgz
21
- # Later augmented with some corrections by inspecting pole, and (purely
22
- # for header definitions) gsf.
23
- #
24
15
  # = Usage
25
16
  #
26
17
  # Usage should be fairly straight forward:
@@ -49,24 +40,37 @@ module Ole # :nodoc:
49
40
  # ole.root["\001CompObj"].open { |f| f.write "blah blah" }
50
41
  # ole.close
51
42
  #
43
+ # = Thanks
44
+ #
45
+ # * The code contained in this project was initially based on chicago's libole
46
+ # (source available at http://prdownloads.sf.net/chicago/ole.tgz).
47
+ #
48
+ # * It was later augmented with some corrections by inspecting pole, and (purely
49
+ # for header definitions) gsf.
50
+ #
51
+ # * The property set parsing code came from the apache java project POIFS.
52
+ #
53
+ # * The excellent idea for using a pseudo file system style interface by providing
54
+ # #file and #dir methods which mimic File and Dir, was borrowed (along with almost
55
+ # unchanged tests!) from Thomas Sondergaard's rubyzip.
56
+ #
52
57
  # = TODO
53
58
  #
54
59
  # * the custom header cruft for Header and Dirent needs some love.
55
60
  # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
56
61
  # and, in a manner of speaking, but arguably different, Storage itself.
57
- # they have differing api's which would be nice to clean.
62
+ # they have differing api's which would be nice to rethink.
58
63
  # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
59
- # * need to fix META_BAT support in #flush.
60
64
  #
61
65
  class Storage
62
66
  # thrown for any bogus OLE file errors.
63
67
  class FormatError < StandardError # :nodoc:
64
68
  end
65
69
 
66
- VERSION = '1.2.2'
70
+ VERSION = '1.2.3'
67
71
 
68
72
  # options used at creation time
69
- attr_reader :opts
73
+ attr_reader :params
70
74
  # The top of the ole tree structure
71
75
  attr_reader :root
72
76
  # The tree structure in its original flattened form. only valid after #load, or #flush.
@@ -79,23 +83,31 @@ module Ole # :nodoc:
79
83
 
80
84
  # maybe include an option hash, and allow :close_parent => true, to be more general.
81
85
  # +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
82
- def initialize arg, mode=nil, opts={}
83
- opts, mode = mode, nil if Hash === mode
84
- opts = {:update_timestamps => true}.merge(opts)
85
- @opts = opts
86
+ def initialize arg, mode=nil, params={}
87
+ params, mode = mode, nil if Hash === mode
88
+ params = {:update_timestamps => true}.merge(params)
89
+ @params = params
86
90
 
87
91
  # get the io object
88
92
  @close_parent, @io = if String === arg
89
- [true, open(arg, mode || 'rb')]
93
+ mode ||= 'rb'
94
+ [true, open(arg, mode)]
90
95
  else
91
96
  raise ArgumentError, 'unable to specify mode string with io object' if mode
92
97
  [false, arg]
93
98
  end
94
99
  # do we have this file opened for writing? don't know of a better way to tell
95
100
  # (unless we parse the mode string in the open case)
101
+ # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
102
+ # reason to use mode string parsing when available, and fall back to something like
103
+ # io.writeable? otherwise.
96
104
  @writeable = begin
97
- @io.flush
98
- true
105
+ if mode
106
+ IO::Mode.new(mode).writeable?
107
+ else
108
+ @io.flush
109
+ true
110
+ end
99
111
  rescue IOError
100
112
  false
101
113
  end
@@ -106,8 +118,8 @@ module Ole # :nodoc:
106
118
  @io.size > 0 ? load : clear
107
119
  end
108
120
 
109
- def self.open arg, mode=nil, opts={}
110
- ole = new arg, mode, opts
121
+ def self.open arg, mode=nil, params={}
122
+ ole = new arg, mode, params
111
123
  if block_given?
112
124
  begin yield ole
113
125
  ensure; ole.close
@@ -117,6 +129,18 @@ module Ole # :nodoc:
117
129
  end
118
130
 
119
131
  # load document from file.
132
+ #
133
+ # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
134
+ #
135
+ # 1. reterminate any chain not ending in EOC.
136
+ # compare file size with actually allocated blocks per file.
137
+ # 2. pass through all chain heads looking for collisions, and making sure nothing points to them
138
+ # (ie they are really heads). in both sbat and mbat
139
+ # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
140
+ # in the bat for them.
141
+ # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
142
+ # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
143
+ # will be automatically thrown away at close time.
120
144
  def load
121
145
  # we always read 512 for the header block. if the block size ends up being different,
122
146
  # what happens to the 109 fat entries. are there more/less entries?
@@ -159,7 +183,8 @@ module Ole # :nodoc:
159
183
 
160
184
  # FIXME i don't currently use @header.num_sbat which i should
161
185
  # hmm. nor do i write it. it means what exactly again?
162
- @sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
186
+ # which mode to use here?
187
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
163
188
  @sbat = AllocationTable::Small.new self
164
189
  @sbat.load @bbat.read(@header.sbat_start)
165
190
  end
@@ -170,25 +195,16 @@ module Ole # :nodoc:
170
195
  @io.close if @close_parent
171
196
  end
172
197
 
173
- # should have a #open_dirent i think. and use it in load and flush. neater.
174
- # also was thinking about Dirent#open_padding. then i can more easily clean up the padding
175
- # to be 0.chr
176
- =begin
177
- thoughts on fixes:
178
- 1. reterminate any chain not ending in EOC.
179
- 2. pass through all chain heads looking for collisions, and making sure nothing points to them
180
- (ie they are really heads).
181
- 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
182
- in the bat for them.
183
- this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
184
- directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
185
- destroy things.
186
- =end
187
-
188
198
  # the flush method is the main "save" method. all file contents are always
189
199
  # written directly to the file by the RangesIO objects, all this method does
190
200
  # is write out all the file meta data - dirents, allocation tables, file header
191
201
  # etc.
202
+ #
203
+ # maybe add an option to zero the padding, and any remaining avail blocks in the
204
+ # allocation table.
205
+ #
206
+ # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
207
+ # of bbat to AllocationTable::Big.
192
208
  def flush
193
209
  # update root dirent, and flatten dirent tree
194
210
  @root.name = 'Root Entry'
@@ -197,8 +213,7 @@ destroy things.
197
213
  @dirents = @root.flatten
198
214
 
199
215
  # serialize the dirents using the bbat
200
- RangesIOResizeable.open @bbat, @header.dirent_start do |io|
201
- io.truncate 0
216
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
202
217
  @dirents.each { |dirent| io.write dirent.to_s }
203
218
  padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
204
219
  io.write 0.chr * padding
@@ -207,8 +222,7 @@ destroy things.
207
222
 
208
223
  # serialize the sbat
209
224
  # perhaps the blocks used by the sbat should be marked with BAT?
210
- RangesIOResizeable.open @bbat, @header.sbat_start do |io|
211
- io.truncate 0
225
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
212
226
  io.write @sbat.to_s
213
227
  @header.sbat_start = io.first_block
214
228
  @header.num_sbat = @bbat.chain(@header.sbat_start).length
@@ -224,50 +238,82 @@ destroy things.
224
238
  b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
225
239
  AllocationTable::AVAIL : b
226
240
  end
227
- io = RangesIOResizeable.new @bbat, AllocationTable::EOC
228
241
 
229
242
  # currently we use a loop. this could be better, but basically,
230
243
  # the act of writing out the bat, itself requires blocks which get
231
244
  # recorded in the bat.
245
+ #
246
+ # i'm sure that there'd be some simpler closed form solution to this. solve
247
+ # recursive func:
248
+ #
249
+ # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
250
+ # bbat_len = initial_bbat_len + num_mbat_blocks
251
+ # mbat_len = ceil(bbat_len * 4 / block_size)
252
+ #
253
+ # the actual bbat allocation table is itself stored throughout the file, and that chain
254
+ # is stored in the initial blocks, and the mbat blocks.
255
+ num_mbat_blocks = 0
256
+ io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
257
+ # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
258
+ # contiguous chunk at the end.
259
+ # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
260
+ # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
261
+ # be fixed easily, add an io truncate
262
+ @bbat.truncate!
263
+ before = @io.size
264
+ @io.truncate @bbat.block_size * (@bbat.length + 1)
232
265
  while true
233
- bbat_data = @bbat.to_s
234
- mbat_chain = @bbat.chain io.first_block
235
- raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
236
- # so we can ignore meta blocks in this calculation:
237
- break if io.size >= bbat_data.length # it shouldn't be bigger right?
238
- # this may grow the bbat, depending on existing available blocks
239
- io.truncate bbat_data.length
266
+ # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
267
+ # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
268
+ # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
269
+ # mbat must remain contiguous.
270
+ bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
271
+ # now storing the excess mbat blocks also increases the size of the bbat:
272
+ new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / @bbat.block_size.to_f).ceil
273
+ if new_num_mbat_blocks != num_mbat_blocks
274
+ # need more space for the mbat.
275
+ num_mbat_blocks = new_num_mbat_blocks
276
+ elsif io.size != bbat_data_len
277
+ # need more space for the bat
278
+ # this may grow the bbat, depending on existing available blocks
279
+ io.truncate bbat_data_len
280
+ else
281
+ break
282
+ end
240
283
  end
241
284
 
242
285
  # now extract the info we want:
243
286
  ranges = io.ranges
244
- mbat_chain = @bbat.chain io.first_block
287
+ bbat_chain = @bbat.chain io.first_block
288
+ # the extra mbat data is a set of contiguous blocks at the end
245
289
  io.close
246
- mbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
247
- @header.num_bat = mbat_chain.length
290
+ bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
291
+ # tack on the mbat stuff
292
+ @header.mbat_start = @bbat.length # need to record this here before tacking on the mbat
293
+ @header.num_bat = bbat_chain.length
294
+ num_mbat_blocks.times { @bbat << AllocationTable::META_BAT }
248
295
 
249
296
  # now finally write the bbat, using a not resizable io.
250
- RangesIO.open(@io, ranges) { |io| io.write @bbat.to_s }
251
-
252
- # this is the mbat
253
- mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
254
- @header.mbat_start = AllocationTable::EOC
255
- @header.num_mbat = 0
256
-
257
- =begin
258
- # now that spanned a number of blocks:
259
- mbat = (0...@header.num_bat).map { |i| i + base }
260
- mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
261
- header_mbat = mbat[0...109]
262
- other_mbat_data = mbat[109..-1].pack 'L*'
263
- @header.mbat_start = base + @header.num_bat
264
- @header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
265
- io.write other_mbat_data
266
- =end
297
+ # the mode here will be 'r', which allows write atm.
298
+ RangesIO.open(@io, :ranges => ranges) { |io| io.write @bbat.to_s }
299
+
300
+ # this is the mbat. pad it out.
301
+ bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
302
+ @header.num_mbat = num_mbat_blocks
303
+ if num_mbat_blocks == 0
304
+ @header.mbat_start = AllocationTable::EOC
305
+ else
306
+ # write out the mbat blocks now. first of all, where are they going to be?
307
+ mbat_data = bbat_chain[109..-1]
308
+ q = @bbat.block_size / 4
309
+ mbat_data += [AllocationTable::AVAIL] *((mbat_data.length / q.to_f).ceil * q - mbat_data.length)
310
+ ranges = @bbat.ranges((0...num_mbat_blocks).map { |i| @header.mbat_start + i })
311
+ RangesIO.open(@io, :ranges => ranges) { |io| io.write mbat_data.pack('L*') }
312
+ end
267
313
 
268
314
  # now seek back and write the header out
269
315
  @io.seek 0
270
- @io.write @header.to_s + mbat_chain.pack('L*')
316
+ @io.write @header.to_s + bbat_chain[0, 109].pack('L*')
271
317
  @io.flush
272
318
  end
273
319
 
@@ -280,16 +326,17 @@ destroy things.
280
326
  @dirents = [@root]
281
327
  @root.idx = 0
282
328
  @sb_file.close if @sb_file
283
- @sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
329
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
284
330
  @sbat = AllocationTable::Small.new self
285
331
  # throw everything else the hell away
286
332
  @io.truncate 0
287
333
  end
288
334
 
289
335
  # could be useful with mis-behaving ole documents. or to just clean them up.
336
+ # FIXME: heard Tempfile is not binary on windows. check
290
337
  def repack temp=:file
291
338
  case temp
292
- when :file; Tempfile.open 'w+', &method(:repack_using_io)
339
+ when :file; Tempfile.open 'ole-repack', &method(:repack_using_io)
293
340
  when :mem; StringIO.open(&method(:repack_using_io))
294
341
  else raise ArgumentError, "unknown temp backing #{temp.inspect}"
295
342
  end
@@ -299,7 +346,7 @@ destroy things.
299
346
  @io.rewind
300
347
  IO.copy @io, temp_io
301
348
  clear
302
- Storage.open temp_io, nil, @opts do |temp_ole|
349
+ Storage.open temp_io, nil, @params do |temp_ole|
303
350
  #temp_ole.root.type = :dir
304
351
  Dirent.copy temp_ole.root, root
305
352
  end
@@ -428,6 +475,10 @@ destroy things.
428
475
  temp.reverse
429
476
  end
430
477
 
478
+ def truncate!
479
+ replace truncate
480
+ end
481
+
431
482
  def to_s
432
483
  table = truncate
433
484
  # pad it out some
@@ -474,7 +525,7 @@ destroy things.
474
525
  # quick shortcut. chain can be either a head (in which case the table is used to
475
526
  # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
476
527
  def open chain, size=nil, &block
477
- RangesIO.open @io, ranges(chain, size), &block
528
+ RangesIO.open @io, :ranges => ranges(chain, size), &block
478
529
  end
479
530
 
480
531
  def read chain, size=nil
@@ -566,12 +617,15 @@ destroy things.
566
617
  class RangesIOResizeable < RangesIO
567
618
  attr_reader :bat
568
619
  attr_accessor :first_block
569
- def initialize bat, first_block, size=nil
620
+ def initialize bat, mode='r', params={}
621
+ mode, params = 'r', mode if Hash === mode
622
+ first_block, size = params.values_at :first_block, :size
623
+ raise ArgumentError, 'must specify first_block' unless first_block
570
624
  @bat = bat
571
625
  self.first_block = first_block
572
- # we know cache the blocks chain, for faster resizing.
626
+ # we now cache the blocks chain, for faster resizing.
573
627
  @blocks = @bat.chain first_block
574
- super @bat.io, @bat.ranges(@blocks, size)
628
+ super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
575
629
  end
576
630
 
577
631
  def truncate size
@@ -597,9 +651,10 @@ destroy things.
597
651
  # between bats based on size, and updating the dirent.
598
652
  class RangesIOMigrateable < RangesIOResizeable
599
653
  attr_reader :dirent
600
- def initialize dirent
654
+ def initialize dirent, mode='r'
601
655
  @dirent = dirent
602
- super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
656
+ super @dirent.ole.bat_for_size(@dirent.size), mode,
657
+ :first_block => @dirent.first_block, :size => @dirent.size
603
658
  end
604
659
 
605
660
  def truncate size
@@ -692,30 +747,30 @@ destroy things.
692
747
  attr_accessor :children
693
748
  attr_accessor :name
694
749
  attr_reader :ole, :type, :create_time, :modify_time
695
- def initialize ole, values=DEFAULT, opts={}
750
+ def initialize ole, values=DEFAULT, params={}
696
751
  @ole = ole
697
- values, opts = DEFAULT, values if Hash === values
752
+ values, params = DEFAULT, values if Hash === values
698
753
  values = values.unpack(PACK) if String === values
699
754
  super(*values)
700
755
 
701
756
  # extra parsing from the actual struct values
702
- @name = opts[:name] || Types::FROM_UTF16.iconv(name_utf16[0...name_len].sub(/\x00\x00$/, ''))
703
- @type = if opts[:type]
704
- unless TYPE_MAP.values.include?(opts[:type])
705
- raise ArgumentError, "unknown type #{opts[:type].inspect}"
757
+ @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len].sub(/\x00\x00$/, ''))
758
+ @type = if params[:type]
759
+ unless TYPE_MAP.values.include?(params[:type])
760
+ raise ArgumentError, "unknown type #{params[:type].inspect}"
706
761
  end
707
- opts[:type]
762
+ params[:type]
708
763
  else
709
764
  TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
710
765
  end
711
766
 
712
767
  # further extra type specific stuff
713
768
  if file?
714
- default_time = @ole.opts[:update_timestamps] ? Time.now : nil
769
+ default_time = @ole.params[:update_timestamps] ? Time.now : nil
715
770
  @create_time ||= default_time
716
771
  @modify_time ||= default_time
717
- @create_time = Types.load_time(create_time_str) if create_time_str
718
- @modify_time = Types.load_time(create_time_str) if modify_time_str
772
+ @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
773
+ @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
719
774
  @children = nil
720
775
  else
721
776
  @create_time = nil
@@ -727,7 +782,7 @@ destroy things.
727
782
 
728
783
  def open mode='r'
729
784
  raise Errno::EISDIR unless file?
730
- io = RangesIOMigrateable.new self
785
+ io = RangesIOMigrateable.new self, mode
731
786
  # TODO work on the mode string stuff a bit more.
732
787
  # maybe let the io object know about the mode, so it can refuse
733
788
  # to work for read/write appropriately. maybe redefine all unusable
@@ -736,9 +791,12 @@ destroy things.
736
791
  # i need to do 'a' etc.
737
792
  case mode
738
793
  when 'r', 'r+'
739
- # as i don't enforce reading/writing, nothing changes here
794
+ # as i don't enforce reading/writing, nothing changes here. kind of
795
+ # need to enforce tt if i want modify times to work better.
796
+ @modify_time = Time.now if mode == 'r+'
740
797
  when 'w'
741
- io.truncate 0
798
+ @modify_time = Time.now
799
+ #io.truncate 0
742
800
  else
743
801
  raise NotImplementedError, "unsupported mode - #{mode.inspect}"
744
802
  end
@@ -763,13 +821,14 @@ destroy things.
763
821
  !file?
764
822
  end
765
823
 
824
+ # maybe need some options regarding case sensitivity.
766
825
  def / name
767
826
  children.find { |child| name === child.name }
768
827
  end
769
828
 
770
829
  def [] idx
771
830
  if String === idx
772
- warn 'String form of Dirent#[] is deprecated'
831
+ #warn 'String form of Dirent#[] is deprecated'
773
832
  self / idx
774
833
  else
775
834
  super
@@ -778,7 +837,7 @@ destroy things.
778
837
 
779
838
  # move to ruby-msg. and remove from here
780
839
  def time
781
- warn 'Dirent#time is deprecated'
840
+ #warn 'Dirent#time is deprecated'
782
841
  create_time || modify_time
783
842
  end
784
843
 
@@ -813,7 +872,7 @@ destroy things.
813
872
  end
814
873
 
815
874
  def to_s
816
- tmp = Types::TO_UTF16.iconv(name)
875
+ tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
817
876
  tmp = tmp[0, 62] if tmp.length > 62
818
877
  tmp += 0.chr * 2
819
878
  self.name_len = tmp.length
@@ -824,9 +883,12 @@ destroy things.
824
883
  # note not dir?, so as not to override root's first_block
825
884
  self.first_block = Dirent::EOT if type == :dir
826
885
  if file?
827
- if @ole.opts[:update_timestamps]
828
- self.create_time_str = Types.save_time @create_time
829
- self.modify_time_str = Types.save_time Time.now
886
+ # this is messed up. it changes the time stamps regardless of whether the file
887
+ # was actually touched. instead, any open call with a writeable mode, should update
888
+ # the modify time. create time would be set in new.
889
+ if @ole.params[:update_timestamps]
890
+ self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
891
+ self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
830
892
  end
831
893
  else
832
894
  self.create_time_str = 0.chr * 8
@@ -850,16 +912,6 @@ destroy things.
850
912
  str + '>'
851
913
  end
852
914
 
853
- # --------
854
- # and for creation of a dirent. don't like the name. is it a file or a directory?
855
- # assign to type later? io will be empty.
856
- def new_child type
857
- child = Dirent.new ole, :type => type
858
- children << child
859
- yield child if block_given?
860
- child
861
- end
862
-
863
915
  def delete child
864
916
  # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
865
917
  raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
@@ -874,7 +926,9 @@ destroy things.
874
926
  dst.name = src.name
875
927
  if src.dir?
876
928
  src.children.each do |src_child|
877
- dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
929
+ dst_child = Dirent.new dst.ole, :type => src_child.type
930
+ dst.children << dst_child
931
+ Dirent.copy src_child, dst_child
878
932
  end
879
933
  else
880
934
  src.open do |src_io|