ruby-ole 1.2.2 → 1.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,22 @@
1
+ == 1.2.3 / 2007-12-28
2
+
3
+ - MBAT write support re-implmented. Can now write files over ~8mb again.
4
+ - Minor fixes (truncation in #flush, file modification timestamps)
5
+ - More test coverage
6
+ - Initial (read-only) property set support.
7
+ - Complete filesystem api, to pass most of the rubyzip tests.
8
+ - Add a ChangeLog :).
9
+
10
+ == 1.2.2 / 2007-11-05
11
+
12
+ - Lots of test updates, 90% coverage.
13
+ - Fix +to_tree+ method to be more efficient, and stream output.
14
+ - Optimizations from benchmarks and profiling, mostly for writes. Fixed
15
+ AllocationTable#resize_chain, RangesIOResizable#truncate and
16
+ AllocationTable#free_block.
17
+ - Add in filesystem test file from rubyzip, and start working on a
18
+ filesystem api.
19
+
20
+ == 1.2.1 / 2007-08-20
21
+
22
+ - Separate out from ruby-msg as new project.
data/Rakefile CHANGED
@@ -47,16 +47,18 @@ spec = Gem::Specification.new do |s|
47
47
  s.version = PKG_VERSION
48
48
  s.summary = %q{Ruby OLE library.}
49
49
  s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
50
- s.authors = ["Charles Lowe"]
50
+ s.authors = ['Charles Lowe']
51
51
  s.email = %q{aquasync@gmail.com}
52
52
  s.homepage = %q{http://code.google.com/p/ruby-ole}
53
- #s.rubyforge_project = %q{ruby-ole}
53
+ s.rubyforge_project = %q{ruby-ole}
54
54
 
55
55
  s.executables = ['oletool']
56
- s.files = ['Rakefile']
57
- s.files += Dir.glob("lib/**/*.rb")
58
- s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
59
- s.files += Dir.glob("bin/*")
56
+ s.files = ['Rakefile', 'ChangeLog']
57
+ s.files += FileList['lib/**/*.rb']
58
+ s.files += FileList['test/test_*.rb', 'test/*.doc']
59
+ s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation']
60
+ s.files += FileList['bin/*']
61
+ s.test_files = FileList['test/test_*.rb']
60
62
 
61
63
  s.has_rdoc = true
62
64
  s.rdoc_options += [
@@ -64,13 +66,11 @@ spec = Gem::Specification.new do |s|
64
66
  '--title', "#{PKG_NAME} documentation",
65
67
  '--tab-width', '2'
66
68
  ]
67
-
68
- s.autorequire = 'ole/storage'
69
69
  end
70
70
 
71
71
  Rake::GemPackageTask.new(spec) do |t|
72
72
  t.gem_spec = spec
73
- t.need_tar = true
73
+ t.need_tar = false
74
74
  t.need_zip = false
75
75
  t.package_dir = 'build'
76
76
  end
@@ -29,8 +29,8 @@
29
29
  #
30
30
  # TODO
31
31
  #
32
- # - check for all new_child calls. eg Dir.mkdir, and File.open, and also
33
- # File.rename, to add in filename length checks (max 32 / 31 or something).
32
+ # - check Dir.mkdir, and File.open, and File.rename, to add in filename
33
+ # length checks (max 32 / 31 or something).
34
34
  # do the automatic truncation, and add in any necessary warnings.
35
35
  #
36
36
  # - File.split('a/') == File.split('a') == ['.', 'a']
@@ -130,6 +130,8 @@ module Ole # :nodoc:
130
130
  path = "#{pwd}/#{path}" unless path =~ /^\//
131
131
  # at this point its already absolute. we use File.expand_path
132
132
  # just for the .. and . handling
133
+ # Hmmm, FIXME: won't work on windows i think. on windows it will prepend
134
+ # the current drive i believe. may just need to strip the first 2 chars.
133
135
  File.expand_path path
134
136
  end
135
137
 
@@ -160,8 +162,7 @@ module Ole # :nodoc:
160
162
  end
161
163
 
162
164
  def open path, mode='r', &block
163
- # FIXME - mode strings are more complex than this.
164
- if mode == 'w'
165
+ if IO::Mode.new(mode).create?
165
166
  begin
166
167
  dirent = dirent_from_path path
167
168
  rescue Errno::ENOENT
@@ -169,8 +170,7 @@ module Ole # :nodoc:
169
170
  # a get_parent_dirent function.
170
171
  parent_path, basename = File.split expand_path(path)
171
172
  parent = @ole.dir.send :dirent_from_path, parent_path, path
172
- dirent = parent.new_child :file
173
- dirent.name = basename
173
+ parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
174
174
  end
175
175
  else
176
176
  dirent = dirent_from_path path
@@ -351,7 +351,7 @@ module Ole # :nodoc:
351
351
  # now, we first should ensure that it doesn't already exist
352
352
  # either as a file or a directory.
353
353
  raise Errno::EEXIST, path if parent/basename
354
- parent.new_child(:dir) { |child| child.name = basename }
354
+ parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
355
355
  0
356
356
  end
357
357
 
@@ -1,37 +1,42 @@
1
+ require 'ole/types'
1
2
 
2
3
  module Ole
3
4
  module Types
4
- # should have a list of the VT_* variant types, and have all the serialization related code
5
- # here... implement dump & load functions like marshalling
6
- class Guid
7
- SIZE = 16
8
-
9
- def self.load str
10
- Types.load_guid str
11
- end
12
- end
13
-
14
- # see http://poi.apache.org/hpsf/internals.html
5
+ #
6
+ # The PropertySet class currently supports readonly access to the properties
7
+ # serialized in "property set" streams, such as the file "\005SummaryInformation",
8
+ # in OLE files.
9
+ #
10
+ # Think has its roots in MFC property set serialization.
11
+ #
12
+ # See http://poi.apache.org/hpsf/internals.html for details
13
+ #
15
14
  class PropertySet
16
15
  HEADER_SIZE = 28
17
- HEADER_UNPACK = "vvVa#{Guid::SIZE}V"
16
+ HEADER_UNPACK = "vvVa#{Clsid::SIZE}V"
18
17
  OS_MAP = {
19
18
  0 => :win16,
20
19
  1 => :mac,
21
20
  2 => :win32
22
21
  }
23
22
 
23
+ # define a smattering of the property set guids.
24
+ FMTID_SummaryInformation = Clsid.parse '{f29f85e0-4ff9-1068-ab91-08002b27b3d9}'
25
+ FMTID_DocSummaryInformation = Clsid.parse '{d5cdd502-2e9c-101b-9397-08002b2cf9ae}'
26
+ FMTID_UserDefinedProperties = Clsid.parse '{d5cdd505-2e9c-101b-9397-08002b2cf9ae}'
27
+
24
28
  class Section < Struct.new(:guid, :offset)
29
+ include Variant::Constants
25
30
  include Enumerable
26
31
 
27
- SIZE = Guid::SIZE + 4
28
- UNPACK_STR = "a#{Guid::SIZE}v"
32
+ SIZE = Clsid::SIZE + 4
33
+ UNPACK_STR = "a#{Clsid::SIZE}v"
29
34
 
30
35
  attr_reader :length
31
36
  def initialize str, property_set
32
37
  @property_set = property_set
33
38
  super(*str.unpack(UNPACK_STR))
34
- self.guid = Guid.load guid
39
+ self.guid = Clsid.load guid
35
40
  load_header
36
41
  end
37
42
 
@@ -49,14 +54,20 @@ module Ole
49
54
  io.read(length * 8).scan(/.{8}/m).each do |str|
50
55
  id, property_offset = str.unpack 'V2'
51
56
  io.seek offset + property_offset
52
- type = io.read(4).unpack('V')[0]
53
- yield id, type, io.read(10)
57
+ type, value = io.read(8).unpack('V2')
58
+ # is the method of serialization here custom?
59
+ case type
60
+ when VT_LPSTR, VT_LPWSTR
61
+ value = Variant.load type, io.read(value)
62
+ # ....
63
+ end
64
+ yield id, type, value
54
65
  end
55
66
  self
56
67
  end
57
68
 
58
69
  def properties
59
- to_a
70
+ to_enum.to_a
60
71
  end
61
72
  end
62
73
 
@@ -66,13 +77,13 @@ module Ole
66
77
  load_header io.read(HEADER_SIZE)
67
78
  load_section_list io.read(@num_sections * Section::SIZE)
68
79
  # expect no gap between last section and start of data.
69
- Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
80
+ #Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
70
81
  end
71
82
 
72
83
  def load_header str
73
84
  @signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_UNPACK
74
85
  # should i check that unknown == 0? it usually is. so is the guid actually
75
- @guid = Guid.load @guid
86
+ @guid = Clsid.load @guid
76
87
  @os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
77
88
  end
78
89
 
@@ -1,36 +1,5 @@
1
- class IOModeString
2
- def initialize mode='r'
3
- @mode = mode
4
- if @mode['b']
5
- @binary = true
6
- @mode = @mode.sub 'b', ''
7
- else
8
- @binary = false
9
- end
10
- if @mode[/\+$/]
11
- @plus = true
12
- @mode = @mode.sub(/\+$/, '')
13
- else
14
- @plus = false
15
- end
16
- end
17
-
18
- def explicit_binary?
19
- @binary
20
- end
21
-
22
- def binary?
23
- RUBY_PLATFORM !~ /win/ or @binary
24
- end
25
-
26
- def to_s
27
- @mode
28
- end
29
-
30
- def inspect
31
- "#<#{self.class}:#{to_s.inspect}>"
32
- end
33
- end
1
+ # need IO::Mode
2
+ require 'ole/support'
34
3
 
35
4
  #
36
5
  # = Introduction
@@ -71,26 +40,40 @@ end
71
40
  # This class isn't ole specific, maybe move it to my general ruby stream project.
72
41
  #
73
42
  class RangesIO
74
- attr_reader :io, :ranges, :size, :pos
43
+ attr_reader :io, :mode, :ranges, :size, :pos
75
44
  # +io+:: the parent io object that we are wrapping.
76
- #
77
- # +ranges+:: byte offsets, either:
78
- # 1. an array of ranges [1..2, 4..5, 6..8] or
79
- # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
80
- # (think the way String indexing works)
45
+ # +mode+:: the mode to use
46
+ # +params+:: hash of params.
47
+ # * :ranges - byte offsets, either:
48
+ # 1. an array of ranges [1..2, 4..5, 6..8] or
49
+ # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
50
+ # (think the way String indexing works)
51
+ # * :close_parent - boolean to close parent when this object is closed
81
52
  #
82
53
  # NOTE: the +ranges+ can overlap.
83
- def initialize io, ranges, opts={}
84
- @opts = {:close_parent => false}.merge opts
54
+ def initialize io, mode='r', params={}
55
+ mode, params = 'r', mode if Hash === mode
56
+ ranges = params[:ranges]
57
+ @params = {:close_parent => false}.merge params
58
+ @mode = IO::Mode.new mode
85
59
  @io = io
86
60
  # convert ranges to arrays. check for negative ranges?
61
+ ranges ||= [0, io.size]
87
62
  @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
88
63
  # calculate size
89
64
  @size = @ranges.inject(0) { |total, (pos, len)| total + len }
90
65
  # initial position in the file
91
66
  @pos = 0
67
+
68
+ # handle some mode flags
69
+ truncate 0 if @mode.truncate?
70
+ seek size if @mode.append?
92
71
  end
93
72
 
73
+ #IOError: closed stream
74
+ # get this for reading, writing, everything...
75
+ #IOError: not opened for writing
76
+
94
77
  # add block form. TODO add test for this
95
78
  def self.open(*args, &block)
96
79
  ranges_io = new(*args)
@@ -120,7 +103,7 @@ class RangesIO
120
103
  alias tell :pos
121
104
 
122
105
  def close
123
- @io.close if @opts[:close_parent]
106
+ @io.close if @params[:close_parent]
124
107
  end
125
108
 
126
109
  # returns the [+offset+, +size+], pair inorder to read/write at +pos+
@@ -169,8 +152,7 @@ class RangesIO
169
152
  data
170
153
  end
171
154
 
172
- # you may override this call to update @ranges and @size, if applicable. then write
173
- # support can grow below
155
+ # you may override this call to update @ranges and @size, if applicable.
174
156
  def truncate size
175
157
  raise NotImplementedError, 'truncate not supported'
176
158
  end
@@ -232,3 +214,15 @@ class RangesIO
232
214
  end
233
215
  end
234
216
 
217
+ # this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
218
+ # only really needed for the allocation table writes etc. maybe just use explicit modes
219
+ # for those
220
+ # better yet write a test that breaks before I fix it.
221
+ class RangesIONonResizeable < RangesIO
222
+ def initialize io, mode='r', params={}
223
+ mode, params = 'r', mode if Hash === mode
224
+ flags = IO::Mode.new(mode).flags & ~IO::TRUNC
225
+ super io, flags, params
226
+ end
227
+ end
228
+
@@ -1,7 +1,3 @@
1
- #! /usr/bin/ruby -w
2
-
3
- $: << File.dirname(__FILE__) + '/..'
4
-
5
1
  require 'tempfile'
6
2
 
7
3
  require 'ole/base'
@@ -16,11 +12,6 @@ module Ole # :nodoc:
16
12
  # access to OLE2 structured storage files, such as those produced by
17
13
  # Microsoft Office, eg *.doc, *.msg etc.
18
14
  #
19
- # Initially based on chicago's libole, source available at
20
- # http://prdownloads.sf.net/chicago/ole.tgz
21
- # Later augmented with some corrections by inspecting pole, and (purely
22
- # for header definitions) gsf.
23
- #
24
15
  # = Usage
25
16
  #
26
17
  # Usage should be fairly straight forward:
@@ -49,24 +40,37 @@ module Ole # :nodoc:
49
40
  # ole.root["\001CompObj"].open { |f| f.write "blah blah" }
50
41
  # ole.close
51
42
  #
43
+ # = Thanks
44
+ #
45
+ # * The code contained in this project was initially based on chicago's libole
46
+ # (source available at http://prdownloads.sf.net/chicago/ole.tgz).
47
+ #
48
+ # * It was later augmented with some corrections by inspecting pole, and (purely
49
+ # for header definitions) gsf.
50
+ #
51
+ # * The property set parsing code came from the apache java project POIFS.
52
+ #
53
+ # * The excellent idea for using a pseudo file system style interface by providing
54
+ # #file and #dir methods which mimic File and Dir, was borrowed (along with almost
55
+ # unchanged tests!) from Thomas Sondergaard's rubyzip.
56
+ #
52
57
  # = TODO
53
58
  #
54
59
  # * the custom header cruft for Header and Dirent needs some love.
55
60
  # * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
56
61
  # and, in a manner of speaking, but arguably different, Storage itself.
57
- # they have differing api's which would be nice to clean.
62
+ # they have differing api's which would be nice to rethink.
58
63
  # AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
59
- # * need to fix META_BAT support in #flush.
60
64
  #
61
65
  class Storage
62
66
  # thrown for any bogus OLE file errors.
63
67
  class FormatError < StandardError # :nodoc:
64
68
  end
65
69
 
66
- VERSION = '1.2.2'
70
+ VERSION = '1.2.3'
67
71
 
68
72
  # options used at creation time
69
- attr_reader :opts
73
+ attr_reader :params
70
74
  # The top of the ole tree structure
71
75
  attr_reader :root
72
76
  # The tree structure in its original flattened form. only valid after #load, or #flush.
@@ -79,23 +83,31 @@ module Ole # :nodoc:
79
83
 
80
84
  # maybe include an option hash, and allow :close_parent => true, to be more general.
81
85
  # +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
82
- def initialize arg, mode=nil, opts={}
83
- opts, mode = mode, nil if Hash === mode
84
- opts = {:update_timestamps => true}.merge(opts)
85
- @opts = opts
86
+ def initialize arg, mode=nil, params={}
87
+ params, mode = mode, nil if Hash === mode
88
+ params = {:update_timestamps => true}.merge(params)
89
+ @params = params
86
90
 
87
91
  # get the io object
88
92
  @close_parent, @io = if String === arg
89
- [true, open(arg, mode || 'rb')]
93
+ mode ||= 'rb'
94
+ [true, open(arg, mode)]
90
95
  else
91
96
  raise ArgumentError, 'unable to specify mode string with io object' if mode
92
97
  [false, arg]
93
98
  end
94
99
  # do we have this file opened for writing? don't know of a better way to tell
95
100
  # (unless we parse the mode string in the open case)
101
+ # hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
102
+ # reason to use mode string parsing when available, and fall back to something like
103
+ # io.writeable? otherwise.
96
104
  @writeable = begin
97
- @io.flush
98
- true
105
+ if mode
106
+ IO::Mode.new(mode).writeable?
107
+ else
108
+ @io.flush
109
+ true
110
+ end
99
111
  rescue IOError
100
112
  false
101
113
  end
@@ -106,8 +118,8 @@ module Ole # :nodoc:
106
118
  @io.size > 0 ? load : clear
107
119
  end
108
120
 
109
- def self.open arg, mode=nil, opts={}
110
- ole = new arg, mode, opts
121
+ def self.open arg, mode=nil, params={}
122
+ ole = new arg, mode, params
111
123
  if block_given?
112
124
  begin yield ole
113
125
  ensure; ole.close
@@ -117,6 +129,18 @@ module Ole # :nodoc:
117
129
  end
118
130
 
119
131
  # load document from file.
132
+ #
133
+ # TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
134
+ #
135
+ # 1. reterminate any chain not ending in EOC.
136
+ # compare file size with actually allocated blocks per file.
137
+ # 2. pass through all chain heads looking for collisions, and making sure nothing points to them
138
+ # (ie they are really heads). in both sbat and mbat
139
+ # 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
140
+ # in the bat for them.
141
+ # 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
142
+ # (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
143
+ # will be automatically thrown away at close time.
120
144
  def load
121
145
  # we always read 512 for the header block. if the block size ends up being different,
122
146
  # what happens to the 109 fat entries. are there more/less entries?
@@ -159,7 +183,8 @@ module Ole # :nodoc:
159
183
 
160
184
  # FIXME i don't currently use @header.num_sbat which i should
161
185
  # hmm. nor do i write it. it means what exactly again?
162
- @sb_file = RangesIOResizeable.new @bbat, @root.first_block, @root.size
186
+ # which mode to use here?
187
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
163
188
  @sbat = AllocationTable::Small.new self
164
189
  @sbat.load @bbat.read(@header.sbat_start)
165
190
  end
@@ -170,25 +195,16 @@ module Ole # :nodoc:
170
195
  @io.close if @close_parent
171
196
  end
172
197
 
173
- # should have a #open_dirent i think. and use it in load and flush. neater.
174
- # also was thinking about Dirent#open_padding. then i can more easily clean up the padding
175
- # to be 0.chr
176
- =begin
177
- thoughts on fixes:
178
- 1. reterminate any chain not ending in EOC.
179
- 2. pass through all chain heads looking for collisions, and making sure nothing points to them
180
- (ie they are really heads).
181
- 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
182
- in the bat for them.
183
- this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
184
- directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
185
- destroy things.
186
- =end
187
-
188
198
  # the flush method is the main "save" method. all file contents are always
189
199
  # written directly to the file by the RangesIO objects, all this method does
190
200
  # is write out all the file meta data - dirents, allocation tables, file header
191
201
  # etc.
202
+ #
203
+ # maybe add an option to zero the padding, and any remaining avail blocks in the
204
+ # allocation table.
205
+ #
206
+ # TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
207
+ # of bbat to AllocationTable::Big.
192
208
  def flush
193
209
  # update root dirent, and flatten dirent tree
194
210
  @root.name = 'Root Entry'
@@ -197,8 +213,7 @@ destroy things.
197
213
  @dirents = @root.flatten
198
214
 
199
215
  # serialize the dirents using the bbat
200
- RangesIOResizeable.open @bbat, @header.dirent_start do |io|
201
- io.truncate 0
216
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
202
217
  @dirents.each { |dirent| io.write dirent.to_s }
203
218
  padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
204
219
  io.write 0.chr * padding
@@ -207,8 +222,7 @@ destroy things.
207
222
 
208
223
  # serialize the sbat
209
224
  # perhaps the blocks used by the sbat should be marked with BAT?
210
- RangesIOResizeable.open @bbat, @header.sbat_start do |io|
211
- io.truncate 0
225
+ RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
212
226
  io.write @sbat.to_s
213
227
  @header.sbat_start = io.first_block
214
228
  @header.num_sbat = @bbat.chain(@header.sbat_start).length
@@ -224,50 +238,82 @@ destroy things.
224
238
  b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
225
239
  AllocationTable::AVAIL : b
226
240
  end
227
- io = RangesIOResizeable.new @bbat, AllocationTable::EOC
228
241
 
229
242
  # currently we use a loop. this could be better, but basically,
230
243
  # the act of writing out the bat, itself requires blocks which get
231
244
  # recorded in the bat.
245
+ #
246
+ # i'm sure that there'd be some simpler closed form solution to this. solve
247
+ # recursive func:
248
+ #
249
+ # num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
250
+ # bbat_len = initial_bbat_len + num_mbat_blocks
251
+ # mbat_len = ceil(bbat_len * 4 / block_size)
252
+ #
253
+ # the actual bbat allocation table is itself stored throughout the file, and that chain
254
+ # is stored in the initial blocks, and the mbat blocks.
255
+ num_mbat_blocks = 0
256
+ io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
257
+ # truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
258
+ # contiguous chunk at the end.
259
+ # hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
260
+ # delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
261
+ # be fixed easily, add an io truncate
262
+ @bbat.truncate!
263
+ before = @io.size
264
+ @io.truncate @bbat.block_size * (@bbat.length + 1)
232
265
  while true
233
- bbat_data = @bbat.to_s
234
- mbat_chain = @bbat.chain io.first_block
235
- raise NotImplementedError, "don't handle writing out extra META_BAT blocks yet" if mbat_chain.length > 109
236
- # so we can ignore meta blocks in this calculation:
237
- break if io.size >= bbat_data.length # it shouldn't be bigger right?
238
- # this may grow the bbat, depending on existing available blocks
239
- io.truncate bbat_data.length
266
+ # get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
267
+ # the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
268
+ # progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
269
+ # mbat must remain contiguous.
270
+ bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
271
+ # now storing the excess mbat blocks also increases the size of the bbat:
272
+ new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / @bbat.block_size.to_f).ceil
273
+ if new_num_mbat_blocks != num_mbat_blocks
274
+ # need more space for the mbat.
275
+ num_mbat_blocks = new_num_mbat_blocks
276
+ elsif io.size != bbat_data_len
277
+ # need more space for the bat
278
+ # this may grow the bbat, depending on existing available blocks
279
+ io.truncate bbat_data_len
280
+ else
281
+ break
282
+ end
240
283
  end
241
284
 
242
285
  # now extract the info we want:
243
286
  ranges = io.ranges
244
- mbat_chain = @bbat.chain io.first_block
287
+ bbat_chain = @bbat.chain io.first_block
288
+ # the extra mbat data is a set of contiguous blocks at the end
245
289
  io.close
246
- mbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
247
- @header.num_bat = mbat_chain.length
290
+ bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
291
+ # tack on the mbat stuff
292
+ @header.mbat_start = @bbat.length # need to record this here before tacking on the mbat
293
+ @header.num_bat = bbat_chain.length
294
+ num_mbat_blocks.times { @bbat << AllocationTable::META_BAT }
248
295
 
249
296
  # now finally write the bbat, using a not resizable io.
250
- RangesIO.open(@io, ranges) { |io| io.write @bbat.to_s }
251
-
252
- # this is the mbat
253
- mbat_chain += [AllocationTable::AVAIL] * (109 - mbat_chain.length)
254
- @header.mbat_start = AllocationTable::EOC
255
- @header.num_mbat = 0
256
-
257
- =begin
258
- # now that spanned a number of blocks:
259
- mbat = (0...@header.num_bat).map { |i| i + base }
260
- mbat += [AllocationTable::AVAIL] * (109 - mbat.length) if mbat.length < 109
261
- header_mbat = mbat[0...109]
262
- other_mbat_data = mbat[109..-1].pack 'L*'
263
- @header.mbat_start = base + @header.num_bat
264
- @header.num_mbat = (other_mbat_data.length / new_bbat.block_size.to_f).ceil
265
- io.write other_mbat_data
266
- =end
297
+ # the mode here will be 'r', which allows write atm.
298
+ RangesIO.open(@io, :ranges => ranges) { |io| io.write @bbat.to_s }
299
+
300
+ # this is the mbat. pad it out.
301
+ bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
302
+ @header.num_mbat = num_mbat_blocks
303
+ if num_mbat_blocks == 0
304
+ @header.mbat_start = AllocationTable::EOC
305
+ else
306
+ # write out the mbat blocks now. first of all, where are they going to be?
307
+ mbat_data = bbat_chain[109..-1]
308
+ q = @bbat.block_size / 4
309
+ mbat_data += [AllocationTable::AVAIL] *((mbat_data.length / q.to_f).ceil * q - mbat_data.length)
310
+ ranges = @bbat.ranges((0...num_mbat_blocks).map { |i| @header.mbat_start + i })
311
+ RangesIO.open(@io, :ranges => ranges) { |io| io.write mbat_data.pack('L*') }
312
+ end
267
313
 
268
314
  # now seek back and write the header out
269
315
  @io.seek 0
270
- @io.write @header.to_s + mbat_chain.pack('L*')
316
+ @io.write @header.to_s + bbat_chain[0, 109].pack('L*')
271
317
  @io.flush
272
318
  end
273
319
 
@@ -280,16 +326,17 @@ destroy things.
280
326
  @dirents = [@root]
281
327
  @root.idx = 0
282
328
  @sb_file.close if @sb_file
283
- @sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
329
+ @sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
284
330
  @sbat = AllocationTable::Small.new self
285
331
  # throw everything else the hell away
286
332
  @io.truncate 0
287
333
  end
288
334
 
289
335
  # could be useful with mis-behaving ole documents. or to just clean them up.
336
+ # FIXME: heard Tempfile is not binary on windows. check
290
337
  def repack temp=:file
291
338
  case temp
292
- when :file; Tempfile.open 'w+', &method(:repack_using_io)
339
+ when :file; Tempfile.open 'ole-repack', &method(:repack_using_io)
293
340
  when :mem; StringIO.open(&method(:repack_using_io))
294
341
  else raise ArgumentError, "unknown temp backing #{temp.inspect}"
295
342
  end
@@ -299,7 +346,7 @@ destroy things.
299
346
  @io.rewind
300
347
  IO.copy @io, temp_io
301
348
  clear
302
- Storage.open temp_io, nil, @opts do |temp_ole|
349
+ Storage.open temp_io, nil, @params do |temp_ole|
303
350
  #temp_ole.root.type = :dir
304
351
  Dirent.copy temp_ole.root, root
305
352
  end
@@ -428,6 +475,10 @@ destroy things.
428
475
  temp.reverse
429
476
  end
430
477
 
478
+ def truncate!
479
+ replace truncate
480
+ end
481
+
431
482
  def to_s
432
483
  table = truncate
433
484
  # pad it out some
@@ -474,7 +525,7 @@ destroy things.
474
525
  # quick shortcut. chain can be either a head (in which case the table is used to
475
526
  # turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
476
527
  def open chain, size=nil, &block
477
- RangesIO.open @io, ranges(chain, size), &block
528
+ RangesIO.open @io, :ranges => ranges(chain, size), &block
478
529
  end
479
530
 
480
531
  def read chain, size=nil
@@ -566,12 +617,15 @@ destroy things.
566
617
  class RangesIOResizeable < RangesIO
567
618
  attr_reader :bat
568
619
  attr_accessor :first_block
569
- def initialize bat, first_block, size=nil
620
+ def initialize bat, mode='r', params={}
621
+ mode, params = 'r', mode if Hash === mode
622
+ first_block, size = params.values_at :first_block, :size
623
+ raise ArgumentError, 'must specify first_block' unless first_block
570
624
  @bat = bat
571
625
  self.first_block = first_block
572
- # we know cache the blocks chain, for faster resizing.
626
+ # we now cache the blocks chain, for faster resizing.
573
627
  @blocks = @bat.chain first_block
574
- super @bat.io, @bat.ranges(@blocks, size)
628
+ super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
575
629
  end
576
630
 
577
631
  def truncate size
@@ -597,9 +651,10 @@ destroy things.
597
651
  # between bats based on size, and updating the dirent.
598
652
  class RangesIOMigrateable < RangesIOResizeable
599
653
  attr_reader :dirent
600
- def initialize dirent
654
+ def initialize dirent, mode='r'
601
655
  @dirent = dirent
602
- super @dirent.ole.bat_for_size(@dirent.size), @dirent.first_block, @dirent.size
656
+ super @dirent.ole.bat_for_size(@dirent.size), mode,
657
+ :first_block => @dirent.first_block, :size => @dirent.size
603
658
  end
604
659
 
605
660
  def truncate size
@@ -692,30 +747,30 @@ destroy things.
692
747
  attr_accessor :children
693
748
  attr_accessor :name
694
749
  attr_reader :ole, :type, :create_time, :modify_time
695
- def initialize ole, values=DEFAULT, opts={}
750
+ def initialize ole, values=DEFAULT, params={}
696
751
  @ole = ole
697
- values, opts = DEFAULT, values if Hash === values
752
+ values, params = DEFAULT, values if Hash === values
698
753
  values = values.unpack(PACK) if String === values
699
754
  super(*values)
700
755
 
701
756
  # extra parsing from the actual struct values
702
- @name = opts[:name] || Types::FROM_UTF16.iconv(name_utf16[0...name_len].sub(/\x00\x00$/, ''))
703
- @type = if opts[:type]
704
- unless TYPE_MAP.values.include?(opts[:type])
705
- raise ArgumentError, "unknown type #{opts[:type].inspect}"
757
+ @name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len].sub(/\x00\x00$/, ''))
758
+ @type = if params[:type]
759
+ unless TYPE_MAP.values.include?(params[:type])
760
+ raise ArgumentError, "unknown type #{params[:type].inspect}"
706
761
  end
707
- opts[:type]
762
+ params[:type]
708
763
  else
709
764
  TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
710
765
  end
711
766
 
712
767
  # further extra type specific stuff
713
768
  if file?
714
- default_time = @ole.opts[:update_timestamps] ? Time.now : nil
769
+ default_time = @ole.params[:update_timestamps] ? Time.now : nil
715
770
  @create_time ||= default_time
716
771
  @modify_time ||= default_time
717
- @create_time = Types.load_time(create_time_str) if create_time_str
718
- @modify_time = Types.load_time(create_time_str) if modify_time_str
772
+ @create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
773
+ @modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
719
774
  @children = nil
720
775
  else
721
776
  @create_time = nil
@@ -727,7 +782,7 @@ destroy things.
727
782
 
728
783
  def open mode='r'
729
784
  raise Errno::EISDIR unless file?
730
- io = RangesIOMigrateable.new self
785
+ io = RangesIOMigrateable.new self, mode
731
786
  # TODO work on the mode string stuff a bit more.
732
787
  # maybe let the io object know about the mode, so it can refuse
733
788
  # to work for read/write appropriately. maybe redefine all unusable
@@ -736,9 +791,12 @@ destroy things.
736
791
  # i need to do 'a' etc.
737
792
  case mode
738
793
  when 'r', 'r+'
739
- # as i don't enforce reading/writing, nothing changes here
794
+ # as i don't enforce reading/writing, nothing changes here. kind of
795
+ # need to enforce tt if i want modify times to work better.
796
+ @modify_time = Time.now if mode == 'r+'
740
797
  when 'w'
741
- io.truncate 0
798
+ @modify_time = Time.now
799
+ #io.truncate 0
742
800
  else
743
801
  raise NotImplementedError, "unsupported mode - #{mode.inspect}"
744
802
  end
@@ -763,13 +821,14 @@ destroy things.
763
821
  !file?
764
822
  end
765
823
 
824
+ # maybe need some options regarding case sensitivity.
766
825
  def / name
767
826
  children.find { |child| name === child.name }
768
827
  end
769
828
 
770
829
  def [] idx
771
830
  if String === idx
772
- warn 'String form of Dirent#[] is deprecated'
831
+ #warn 'String form of Dirent#[] is deprecated'
773
832
  self / idx
774
833
  else
775
834
  super
@@ -778,7 +837,7 @@ destroy things.
778
837
 
779
838
  # move to ruby-msg. and remove from here
780
839
  def time
781
- warn 'Dirent#time is deprecated'
840
+ #warn 'Dirent#time is deprecated'
782
841
  create_time || modify_time
783
842
  end
784
843
 
@@ -813,7 +872,7 @@ destroy things.
813
872
  end
814
873
 
815
874
  def to_s
816
- tmp = Types::TO_UTF16.iconv(name)
875
+ tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
817
876
  tmp = tmp[0, 62] if tmp.length > 62
818
877
  tmp += 0.chr * 2
819
878
  self.name_len = tmp.length
@@ -824,9 +883,12 @@ destroy things.
824
883
  # note not dir?, so as not to override root's first_block
825
884
  self.first_block = Dirent::EOT if type == :dir
826
885
  if file?
827
- if @ole.opts[:update_timestamps]
828
- self.create_time_str = Types.save_time @create_time
829
- self.modify_time_str = Types.save_time Time.now
886
+ # this is messed up. it changes the time stamps regardless of whether the file
887
+ # was actually touched. instead, any open call with a writeable mode, should update
888
+ # the modify time. create time would be set in new.
889
+ if @ole.params[:update_timestamps]
890
+ self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
891
+ self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
830
892
  end
831
893
  else
832
894
  self.create_time_str = 0.chr * 8
@@ -850,16 +912,6 @@ destroy things.
850
912
  str + '>'
851
913
  end
852
914
 
853
- # --------
854
- # and for creation of a dirent. don't like the name. is it a file or a directory?
855
- # assign to type later? io will be empty.
856
- def new_child type
857
- child = Dirent.new ole, :type => type
858
- children << child
859
- yield child if block_given?
860
- child
861
- end
862
-
863
915
  def delete child
864
916
  # remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
865
917
  raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
@@ -874,7 +926,9 @@ destroy things.
874
926
  dst.name = src.name
875
927
  if src.dir?
876
928
  src.children.each do |src_child|
877
- dst.new_child(src_child.type) { |dst_child| Dirent.copy src_child, dst_child }
929
+ dst_child = Dirent.new dst.ole, :type => src_child.type
930
+ dst.children << dst_child
931
+ Dirent.copy src_child, dst_child
878
932
  end
879
933
  else
880
934
  src.open do |src_io|