ruby-ole 1.2.2 → 1.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +22 -0
- data/Rakefile +9 -9
- data/lib/ole/file_system.rb +7 -7
- data/lib/ole/property_set.rb +31 -20
- data/lib/ole/ranges_io.rb +38 -44
- data/lib/ole/storage.rb +164 -110
- data/lib/ole/support.rb +80 -1
- data/lib/ole/types.rb +141 -40
- data/test/oleWithDirs.ole +0 -0
- data/test/test_SummaryInformation +0 -0
- data/test/test_mbat.rb +39 -0
- data/test/test_property_set.rb +30 -0
- data/test/test_ranges_io.rb +4 -4
- data/test/test_storage.rb +5 -1
- data/test/test_support.rb +39 -1
- data/test/test_types.rb +54 -0
- metadata +18 -6
data/ChangeLog
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
== 1.2.3 / 2007-12-28
|
2
|
+
|
3
|
+
- MBAT write support re-implmented. Can now write files over ~8mb again.
|
4
|
+
- Minor fixes (truncation in #flush, file modification timestamps)
|
5
|
+
- More test coverage
|
6
|
+
- Initial (read-only) property set support.
|
7
|
+
- Complete filesystem api, to pass most of the rubyzip tests.
|
8
|
+
- Add a ChangeLog :).
|
9
|
+
|
10
|
+
== 1.2.2 / 2007-11-05
|
11
|
+
|
12
|
+
- Lots of test updates, 90% coverage.
|
13
|
+
- Fix +to_tree+ method to be more efficient, and stream output.
|
14
|
+
- Optimizations from benchmarks and profiling, mostly for writes. Fixed
|
15
|
+
AllocationTable#resize_chain, RangesIOResizable#truncate and
|
16
|
+
AllocationTable#free_block.
|
17
|
+
- Add in filesystem test file from rubyzip, and start working on a
|
18
|
+
filesystem api.
|
19
|
+
|
20
|
+
== 1.2.1 / 2007-08-20
|
21
|
+
|
22
|
+
- Separate out from ruby-msg as new project.
|
data/Rakefile
CHANGED
@@ -47,16 +47,18 @@ spec = Gem::Specification.new do |s|
|
|
47
47
|
s.version = PKG_VERSION
|
48
48
|
s.summary = %q{Ruby OLE library.}
|
49
49
|
s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
|
50
|
-
s.authors = [
|
50
|
+
s.authors = ['Charles Lowe']
|
51
51
|
s.email = %q{aquasync@gmail.com}
|
52
52
|
s.homepage = %q{http://code.google.com/p/ruby-ole}
|
53
|
-
|
53
|
+
s.rubyforge_project = %q{ruby-ole}
|
54
54
|
|
55
55
|
s.executables = ['oletool']
|
56
|
-
s.files = ['Rakefile']
|
57
|
-
s.files +=
|
58
|
-
s.files +=
|
59
|
-
s.files +=
|
56
|
+
s.files = ['Rakefile', 'ChangeLog']
|
57
|
+
s.files += FileList['lib/**/*.rb']
|
58
|
+
s.files += FileList['test/test_*.rb', 'test/*.doc']
|
59
|
+
s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation']
|
60
|
+
s.files += FileList['bin/*']
|
61
|
+
s.test_files = FileList['test/test_*.rb']
|
60
62
|
|
61
63
|
s.has_rdoc = true
|
62
64
|
s.rdoc_options += [
|
@@ -64,13 +66,11 @@ spec = Gem::Specification.new do |s|
|
|
64
66
|
'--title', "#{PKG_NAME} documentation",
|
65
67
|
'--tab-width', '2'
|
66
68
|
]
|
67
|
-
|
68
|
-
s.autorequire = 'ole/storage'
|
69
69
|
end
|
70
70
|
|
71
71
|
Rake::GemPackageTask.new(spec) do |t|
|
72
72
|
t.gem_spec = spec
|
73
|
-
t.need_tar =
|
73
|
+
t.need_tar = false
|
74
74
|
t.need_zip = false
|
75
75
|
t.package_dir = 'build'
|
76
76
|
end
|
data/lib/ole/file_system.rb
CHANGED
@@ -29,8 +29,8 @@
|
|
29
29
|
#
|
30
30
|
# TODO
|
31
31
|
#
|
32
|
-
# - check
|
33
|
-
#
|
32
|
+
# - check Dir.mkdir, and File.open, and File.rename, to add in filename
|
33
|
+
# length checks (max 32 / 31 or something).
|
34
34
|
# do the automatic truncation, and add in any necessary warnings.
|
35
35
|
#
|
36
36
|
# - File.split('a/') == File.split('a') == ['.', 'a']
|
@@ -130,6 +130,8 @@ module Ole # :nodoc:
|
|
130
130
|
path = "#{pwd}/#{path}" unless path =~ /^\//
|
131
131
|
# at this point its already absolute. we use File.expand_path
|
132
132
|
# just for the .. and . handling
|
133
|
+
# Hmmm, FIXME: won't work on windows i think. on windows it will prepend
|
134
|
+
# the current drive i believe. may just need to strip the first 2 chars.
|
133
135
|
File.expand_path path
|
134
136
|
end
|
135
137
|
|
@@ -160,8 +162,7 @@ module Ole # :nodoc:
|
|
160
162
|
end
|
161
163
|
|
162
164
|
def open path, mode='r', &block
|
163
|
-
|
164
|
-
if mode == 'w'
|
165
|
+
if IO::Mode.new(mode).create?
|
165
166
|
begin
|
166
167
|
dirent = dirent_from_path path
|
167
168
|
rescue Errno::ENOENT
|
@@ -169,8 +170,7 @@ module Ole # :nodoc:
|
|
169
170
|
# a get_parent_dirent function.
|
170
171
|
parent_path, basename = File.split expand_path(path)
|
171
172
|
parent = @ole.dir.send :dirent_from_path, parent_path, path
|
172
|
-
dirent =
|
173
|
-
dirent.name = basename
|
173
|
+
parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
|
174
174
|
end
|
175
175
|
else
|
176
176
|
dirent = dirent_from_path path
|
@@ -351,7 +351,7 @@ module Ole # :nodoc:
|
|
351
351
|
# now, we first should ensure that it doesn't already exist
|
352
352
|
# either as a file or a directory.
|
353
353
|
raise Errno::EEXIST, path if parent/basename
|
354
|
-
parent.
|
354
|
+
parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
|
355
355
|
0
|
356
356
|
end
|
357
357
|
|
data/lib/ole/property_set.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
+
require 'ole/types'
|
1
2
|
|
2
3
|
module Ole
|
3
4
|
module Types
|
4
|
-
#
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
# see http://poi.apache.org/hpsf/internals.html
|
5
|
+
#
|
6
|
+
# The PropertySet class currently supports readonly access to the properties
|
7
|
+
# serialized in "property set" streams, such as the file "\005SummaryInformation",
|
8
|
+
# in OLE files.
|
9
|
+
#
|
10
|
+
# Think has its roots in MFC property set serialization.
|
11
|
+
#
|
12
|
+
# See http://poi.apache.org/hpsf/internals.html for details
|
13
|
+
#
|
15
14
|
class PropertySet
|
16
15
|
HEADER_SIZE = 28
|
17
|
-
HEADER_UNPACK = "vvVa#{
|
16
|
+
HEADER_UNPACK = "vvVa#{Clsid::SIZE}V"
|
18
17
|
OS_MAP = {
|
19
18
|
0 => :win16,
|
20
19
|
1 => :mac,
|
21
20
|
2 => :win32
|
22
21
|
}
|
23
22
|
|
23
|
+
# define a smattering of the property set guids.
|
24
|
+
FMTID_SummaryInformation = Clsid.parse '{f29f85e0-4ff9-1068-ab91-08002b27b3d9}'
|
25
|
+
FMTID_DocSummaryInformation = Clsid.parse '{d5cdd502-2e9c-101b-9397-08002b2cf9ae}'
|
26
|
+
FMTID_UserDefinedProperties = Clsid.parse '{d5cdd505-2e9c-101b-9397-08002b2cf9ae}'
|
27
|
+
|
24
28
|
class Section < Struct.new(:guid, :offset)
|
29
|
+
include Variant::Constants
|
25
30
|
include Enumerable
|
26
31
|
|
27
|
-
SIZE =
|
28
|
-
UNPACK_STR = "a#{
|
32
|
+
SIZE = Clsid::SIZE + 4
|
33
|
+
UNPACK_STR = "a#{Clsid::SIZE}v"
|
29
34
|
|
30
35
|
attr_reader :length
|
31
36
|
def initialize str, property_set
|
32
37
|
@property_set = property_set
|
33
38
|
super(*str.unpack(UNPACK_STR))
|
34
|
-
self.guid =
|
39
|
+
self.guid = Clsid.load guid
|
35
40
|
load_header
|
36
41
|
end
|
37
42
|
|
@@ -49,14 +54,20 @@ module Ole
|
|
49
54
|
io.read(length * 8).scan(/.{8}/m).each do |str|
|
50
55
|
id, property_offset = str.unpack 'V2'
|
51
56
|
io.seek offset + property_offset
|
52
|
-
type = io.read(
|
53
|
-
|
57
|
+
type, value = io.read(8).unpack('V2')
|
58
|
+
# is the method of serialization here custom?
|
59
|
+
case type
|
60
|
+
when VT_LPSTR, VT_LPWSTR
|
61
|
+
value = Variant.load type, io.read(value)
|
62
|
+
# ....
|
63
|
+
end
|
64
|
+
yield id, type, value
|
54
65
|
end
|
55
66
|
self
|
56
67
|
end
|
57
68
|
|
58
69
|
def properties
|
59
|
-
to_a
|
70
|
+
to_enum.to_a
|
60
71
|
end
|
61
72
|
end
|
62
73
|
|
@@ -66,13 +77,13 @@ module Ole
|
|
66
77
|
load_header io.read(HEADER_SIZE)
|
67
78
|
load_section_list io.read(@num_sections * Section::SIZE)
|
68
79
|
# expect no gap between last section and start of data.
|
69
|
-
Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
|
80
|
+
#Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
|
70
81
|
end
|
71
82
|
|
72
83
|
def load_header str
|
73
84
|
@signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_UNPACK
|
74
85
|
# should i check that unknown == 0? it usually is. so is the guid actually
|
75
|
-
@guid =
|
86
|
+
@guid = Clsid.load @guid
|
76
87
|
@os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
|
77
88
|
end
|
78
89
|
|
data/lib/ole/ranges_io.rb
CHANGED
@@ -1,36 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
@mode = mode
|
4
|
-
if @mode['b']
|
5
|
-
@binary = true
|
6
|
-
@mode = @mode.sub 'b', ''
|
7
|
-
else
|
8
|
-
@binary = false
|
9
|
-
end
|
10
|
-
if @mode[/\+$/]
|
11
|
-
@plus = true
|
12
|
-
@mode = @mode.sub(/\+$/, '')
|
13
|
-
else
|
14
|
-
@plus = false
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def explicit_binary?
|
19
|
-
@binary
|
20
|
-
end
|
21
|
-
|
22
|
-
def binary?
|
23
|
-
RUBY_PLATFORM !~ /win/ or @binary
|
24
|
-
end
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
@mode
|
28
|
-
end
|
29
|
-
|
30
|
-
def inspect
|
31
|
-
"#<#{self.class}:#{to_s.inspect}>"
|
32
|
-
end
|
33
|
-
end
|
1
|
+
# need IO::Mode
|
2
|
+
require 'ole/support'
|
34
3
|
|
35
4
|
#
|
36
5
|
# = Introduction
|
@@ -71,26 +40,40 @@ end
|
|
71
40
|
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
72
41
|
#
|
73
42
|
class RangesIO
|
74
|
-
attr_reader :io, :ranges, :size, :pos
|
43
|
+
attr_reader :io, :mode, :ranges, :size, :pos
|
75
44
|
# +io+:: the parent io object that we are wrapping.
|
76
|
-
#
|
77
|
-
# +
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
45
|
+
# +mode+:: the mode to use
|
46
|
+
# +params+:: hash of params.
|
47
|
+
# * :ranges - byte offsets, either:
|
48
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
49
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
50
|
+
# (think the way String indexing works)
|
51
|
+
# * :close_parent - boolean to close parent when this object is closed
|
81
52
|
#
|
82
53
|
# NOTE: the +ranges+ can overlap.
|
83
|
-
def initialize io,
|
84
|
-
|
54
|
+
def initialize io, mode='r', params={}
|
55
|
+
mode, params = 'r', mode if Hash === mode
|
56
|
+
ranges = params[:ranges]
|
57
|
+
@params = {:close_parent => false}.merge params
|
58
|
+
@mode = IO::Mode.new mode
|
85
59
|
@io = io
|
86
60
|
# convert ranges to arrays. check for negative ranges?
|
61
|
+
ranges ||= [0, io.size]
|
87
62
|
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
88
63
|
# calculate size
|
89
64
|
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
|
90
65
|
# initial position in the file
|
91
66
|
@pos = 0
|
67
|
+
|
68
|
+
# handle some mode flags
|
69
|
+
truncate 0 if @mode.truncate?
|
70
|
+
seek size if @mode.append?
|
92
71
|
end
|
93
72
|
|
73
|
+
#IOError: closed stream
|
74
|
+
# get this for reading, writing, everything...
|
75
|
+
#IOError: not opened for writing
|
76
|
+
|
94
77
|
# add block form. TODO add test for this
|
95
78
|
def self.open(*args, &block)
|
96
79
|
ranges_io = new(*args)
|
@@ -120,7 +103,7 @@ class RangesIO
|
|
120
103
|
alias tell :pos
|
121
104
|
|
122
105
|
def close
|
123
|
-
@io.close if @
|
106
|
+
@io.close if @params[:close_parent]
|
124
107
|
end
|
125
108
|
|
126
109
|
# returns the [+offset+, +size+], pair inorder to read/write at +pos+
|
@@ -169,8 +152,7 @@ class RangesIO
|
|
169
152
|
data
|
170
153
|
end
|
171
154
|
|
172
|
-
# you may override this call to update @ranges and @size, if applicable.
|
173
|
-
# support can grow below
|
155
|
+
# you may override this call to update @ranges and @size, if applicable.
|
174
156
|
def truncate size
|
175
157
|
raise NotImplementedError, 'truncate not supported'
|
176
158
|
end
|
@@ -232,3 +214,15 @@ class RangesIO
|
|
232
214
|
end
|
233
215
|
end
|
234
216
|
|
217
|
+
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
|
218
|
+
# only really needed for the allocation table writes etc. maybe just use explicit modes
|
219
|
+
# for those
|
220
|
+
# better yet write a test that breaks before I fix it.
|
221
|
+
class RangesIONonResizeable < RangesIO
|
222
|
+
def initialize io, mode='r', params={}
|
223
|
+
mode, params = 'r', mode if Hash === mode
|
224
|
+
flags = IO::Mode.new(mode).flags & ~IO::TRUNC
|
225
|
+
super io, flags, params
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
data/lib/ole/storage.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
#! /usr/bin/ruby -w
|
2
|
-
|
3
|
-
$: << File.dirname(__FILE__) + '/..'
|
4
|
-
|
5
1
|
require 'tempfile'
|
6
2
|
|
7
3
|
require 'ole/base'
|
@@ -16,11 +12,6 @@ module Ole # :nodoc:
|
|
16
12
|
# access to OLE2 structured storage files, such as those produced by
|
17
13
|
# Microsoft Office, eg *.doc, *.msg etc.
|
18
14
|
#
|
19
|
-
# Initially based on chicago's libole, source available at
|
20
|
-
# http://prdownloads.sf.net/chicago/ole.tgz
|
21
|
-
# Later augmented with some corrections by inspecting pole, and (purely
|
22
|
-
# for header definitions) gsf.
|
23
|
-
#
|
24
15
|
# = Usage
|
25
16
|
#
|
26
17
|
# Usage should be fairly straight forward:
|
@@ -49,24 +40,37 @@ module Ole # :nodoc:
|
|
49
40
|
# ole.root["\001CompObj"].open { |f| f.write "blah blah" }
|
50
41
|
# ole.close
|
51
42
|
#
|
43
|
+
# = Thanks
|
44
|
+
#
|
45
|
+
# * The code contained in this project was initially based on chicago's libole
|
46
|
+
# (source available at http://prdownloads.sf.net/chicago/ole.tgz).
|
47
|
+
#
|
48
|
+
# * It was later augmented with some corrections by inspecting pole, and (purely
|
49
|
+
# for header definitions) gsf.
|
50
|
+
#
|
51
|
+
# * The property set parsing code came from the apache java project POIFS.
|
52
|
+
#
|
53
|
+
# * The excellent idea for using a pseudo file system style interface by providing
|
54
|
+
# #file and #dir methods which mimic File and Dir, was borrowed (along with almost
|
55
|
+
# unchanged tests!) from Thomas Sondergaard's rubyzip.
|
56
|
+
#
|
52
57
|
# = TODO
|
53
58
|
#
|
54
59
|
# * the custom header cruft for Header and Dirent needs some love.
|
55
60
|
# * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
|
56
61
|
# and, in a manner of speaking, but arguably different, Storage itself.
|
57
|
-
# they have differing api's which would be nice to
|
62
|
+
# they have differing api's which would be nice to rethink.
|
58
63
|
# AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
|
59
|
-
# * need to fix META_BAT support in #flush.
|
60
64
|
#
|
61
65
|
class Storage
|
62
66
|
# thrown for any bogus OLE file errors.
|
63
67
|
class FormatError < StandardError # :nodoc:
|
64
68
|
end
|
65
69
|
|
66
|
-
VERSION = '1.2.
|
70
|
+
VERSION = '1.2.3'
|
67
71
|
|
68
72
|
# options used at creation time
|
69
|
-
attr_reader :
|
73
|
+
attr_reader :params
|
70
74
|
# The top of the ole tree structure
|
71
75
|
attr_reader :root
|
72
76
|
# The tree structure in its original flattened form. only valid after #load, or #flush.
|
@@ -79,23 +83,31 @@ module Ole # :nodoc:
|
|
79
83
|
|
80
84
|
# maybe include an option hash, and allow :close_parent => true, to be more general.
|
81
85
|
# +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
|
82
|
-
def initialize arg, mode=nil,
|
83
|
-
|
84
|
-
|
85
|
-
@
|
86
|
+
def initialize arg, mode=nil, params={}
|
87
|
+
params, mode = mode, nil if Hash === mode
|
88
|
+
params = {:update_timestamps => true}.merge(params)
|
89
|
+
@params = params
|
86
90
|
|
87
91
|
# get the io object
|
88
92
|
@close_parent, @io = if String === arg
|
89
|
-
|
93
|
+
mode ||= 'rb'
|
94
|
+
[true, open(arg, mode)]
|
90
95
|
else
|
91
96
|
raise ArgumentError, 'unable to specify mode string with io object' if mode
|
92
97
|
[false, arg]
|
93
98
|
end
|
94
99
|
# do we have this file opened for writing? don't know of a better way to tell
|
95
100
|
# (unless we parse the mode string in the open case)
|
101
|
+
# hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
|
102
|
+
# reason to use mode string parsing when available, and fall back to something like
|
103
|
+
# io.writeable? otherwise.
|
96
104
|
@writeable = begin
|
97
|
-
|
98
|
-
|
105
|
+
if mode
|
106
|
+
IO::Mode.new(mode).writeable?
|
107
|
+
else
|
108
|
+
@io.flush
|
109
|
+
true
|
110
|
+
end
|
99
111
|
rescue IOError
|
100
112
|
false
|
101
113
|
end
|
@@ -106,8 +118,8 @@ module Ole # :nodoc:
|
|
106
118
|
@io.size > 0 ? load : clear
|
107
119
|
end
|
108
120
|
|
109
|
-
def self.open arg, mode=nil,
|
110
|
-
ole = new arg, mode,
|
121
|
+
def self.open arg, mode=nil, params={}
|
122
|
+
ole = new arg, mode, params
|
111
123
|
if block_given?
|
112
124
|
begin yield ole
|
113
125
|
ensure; ole.close
|
@@ -117,6 +129,18 @@ module Ole # :nodoc:
|
|
117
129
|
end
|
118
130
|
|
119
131
|
# load document from file.
|
132
|
+
#
|
133
|
+
# TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
|
134
|
+
#
|
135
|
+
# 1. reterminate any chain not ending in EOC.
|
136
|
+
# compare file size with actually allocated blocks per file.
|
137
|
+
# 2. pass through all chain heads looking for collisions, and making sure nothing points to them
|
138
|
+
# (ie they are really heads). in both sbat and mbat
|
139
|
+
# 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
|
140
|
+
# in the bat for them.
|
141
|
+
# 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
|
142
|
+
# (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
|
143
|
+
# will be automatically thrown away at close time.
|
120
144
|
def load
|
121
145
|
# we always read 512 for the header block. if the block size ends up being different,
|
122
146
|
# what happens to the 109 fat entries. are there more/less entries?
|
@@ -159,7 +183,8 @@ module Ole # :nodoc:
|
|
159
183
|
|
160
184
|
# FIXME i don't currently use @header.num_sbat which i should
|
161
185
|
# hmm. nor do i write it. it means what exactly again?
|
162
|
-
|
186
|
+
# which mode to use here?
|
187
|
+
@sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
|
163
188
|
@sbat = AllocationTable::Small.new self
|
164
189
|
@sbat.load @bbat.read(@header.sbat_start)
|
165
190
|
end
|
@@ -170,25 +195,16 @@ module Ole # :nodoc:
|
|
170
195
|
@io.close if @close_parent
|
171
196
|
end
|
172
197
|
|
173
|
-
# should have a #open_dirent i think. and use it in load and flush. neater.
|
174
|
-
# also was thinking about Dirent#open_padding. then i can more easily clean up the padding
|
175
|
-
# to be 0.chr
|
176
|
-
=begin
|
177
|
-
thoughts on fixes:
|
178
|
-
1. reterminate any chain not ending in EOC.
|
179
|
-
2. pass through all chain heads looking for collisions, and making sure nothing points to them
|
180
|
-
(ie they are really heads).
|
181
|
-
3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
|
182
|
-
in the bat for them.
|
183
|
-
this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
|
184
|
-
directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
|
185
|
-
destroy things.
|
186
|
-
=end
|
187
|
-
|
188
198
|
# the flush method is the main "save" method. all file contents are always
|
189
199
|
# written directly to the file by the RangesIO objects, all this method does
|
190
200
|
# is write out all the file meta data - dirents, allocation tables, file header
|
191
201
|
# etc.
|
202
|
+
#
|
203
|
+
# maybe add an option to zero the padding, and any remaining avail blocks in the
|
204
|
+
# allocation table.
|
205
|
+
#
|
206
|
+
# TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
|
207
|
+
# of bbat to AllocationTable::Big.
|
192
208
|
def flush
|
193
209
|
# update root dirent, and flatten dirent tree
|
194
210
|
@root.name = 'Root Entry'
|
@@ -197,8 +213,7 @@ destroy things.
|
|
197
213
|
@dirents = @root.flatten
|
198
214
|
|
199
215
|
# serialize the dirents using the bbat
|
200
|
-
RangesIOResizeable.open @bbat, @header.dirent_start do |io|
|
201
|
-
io.truncate 0
|
216
|
+
RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
|
202
217
|
@dirents.each { |dirent| io.write dirent.to_s }
|
203
218
|
padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
|
204
219
|
io.write 0.chr * padding
|
@@ -207,8 +222,7 @@ destroy things.
|
|
207
222
|
|
208
223
|
# serialize the sbat
|
209
224
|
# perhaps the blocks used by the sbat should be marked with BAT?
|
210
|
-
RangesIOResizeable.open @bbat, @header.sbat_start do |io|
|
211
|
-
io.truncate 0
|
225
|
+
RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
|
212
226
|
io.write @sbat.to_s
|
213
227
|
@header.sbat_start = io.first_block
|
214
228
|
@header.num_sbat = @bbat.chain(@header.sbat_start).length
|
@@ -224,50 +238,82 @@ destroy things.
|
|
224
238
|
b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
|
225
239
|
AllocationTable::AVAIL : b
|
226
240
|
end
|
227
|
-
io = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
228
241
|
|
229
242
|
# currently we use a loop. this could be better, but basically,
|
230
243
|
# the act of writing out the bat, itself requires blocks which get
|
231
244
|
# recorded in the bat.
|
245
|
+
#
|
246
|
+
# i'm sure that there'd be some simpler closed form solution to this. solve
|
247
|
+
# recursive func:
|
248
|
+
#
|
249
|
+
# num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
|
250
|
+
# bbat_len = initial_bbat_len + num_mbat_blocks
|
251
|
+
# mbat_len = ceil(bbat_len * 4 / block_size)
|
252
|
+
#
|
253
|
+
# the actual bbat allocation table is itself stored throughout the file, and that chain
|
254
|
+
# is stored in the initial blocks, and the mbat blocks.
|
255
|
+
num_mbat_blocks = 0
|
256
|
+
io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
|
257
|
+
# truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
|
258
|
+
# contiguous chunk at the end.
|
259
|
+
# hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
|
260
|
+
# delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
|
261
|
+
# be fixed easily, add an io truncate
|
262
|
+
@bbat.truncate!
|
263
|
+
before = @io.size
|
264
|
+
@io.truncate @bbat.block_size * (@bbat.length + 1)
|
232
265
|
while true
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
#
|
237
|
-
|
238
|
-
#
|
239
|
-
|
266
|
+
# get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
|
267
|
+
# the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
|
268
|
+
# progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
|
269
|
+
# mbat must remain contiguous.
|
270
|
+
bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
|
271
|
+
# now storing the excess mbat blocks also increases the size of the bbat:
|
272
|
+
new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / @bbat.block_size.to_f).ceil
|
273
|
+
if new_num_mbat_blocks != num_mbat_blocks
|
274
|
+
# need more space for the mbat.
|
275
|
+
num_mbat_blocks = new_num_mbat_blocks
|
276
|
+
elsif io.size != bbat_data_len
|
277
|
+
# need more space for the bat
|
278
|
+
# this may grow the bbat, depending on existing available blocks
|
279
|
+
io.truncate bbat_data_len
|
280
|
+
else
|
281
|
+
break
|
282
|
+
end
|
240
283
|
end
|
241
284
|
|
242
285
|
# now extract the info we want:
|
243
286
|
ranges = io.ranges
|
244
|
-
|
287
|
+
bbat_chain = @bbat.chain io.first_block
|
288
|
+
# the extra mbat data is a set of contiguous blocks at the end
|
245
289
|
io.close
|
246
|
-
|
247
|
-
|
290
|
+
bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
|
291
|
+
# tack on the mbat stuff
|
292
|
+
@header.mbat_start = @bbat.length # need to record this here before tacking on the mbat
|
293
|
+
@header.num_bat = bbat_chain.length
|
294
|
+
num_mbat_blocks.times { @bbat << AllocationTable::META_BAT }
|
248
295
|
|
249
296
|
# now finally write the bbat, using a not resizable io.
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
@header.num_mbat =
|
256
|
-
|
257
|
-
=
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
=end
|
297
|
+
# the mode here will be 'r', which allows write atm.
|
298
|
+
RangesIO.open(@io, :ranges => ranges) { |io| io.write @bbat.to_s }
|
299
|
+
|
300
|
+
# this is the mbat. pad it out.
|
301
|
+
bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
|
302
|
+
@header.num_mbat = num_mbat_blocks
|
303
|
+
if num_mbat_blocks == 0
|
304
|
+
@header.mbat_start = AllocationTable::EOC
|
305
|
+
else
|
306
|
+
# write out the mbat blocks now. first of all, where are they going to be?
|
307
|
+
mbat_data = bbat_chain[109..-1]
|
308
|
+
q = @bbat.block_size / 4
|
309
|
+
mbat_data += [AllocationTable::AVAIL] *((mbat_data.length / q.to_f).ceil * q - mbat_data.length)
|
310
|
+
ranges = @bbat.ranges((0...num_mbat_blocks).map { |i| @header.mbat_start + i })
|
311
|
+
RangesIO.open(@io, :ranges => ranges) { |io| io.write mbat_data.pack('L*') }
|
312
|
+
end
|
267
313
|
|
268
314
|
# now seek back and write the header out
|
269
315
|
@io.seek 0
|
270
|
-
@io.write @header.to_s +
|
316
|
+
@io.write @header.to_s + bbat_chain[0, 109].pack('L*')
|
271
317
|
@io.flush
|
272
318
|
end
|
273
319
|
|
@@ -280,16 +326,17 @@ destroy things.
|
|
280
326
|
@dirents = [@root]
|
281
327
|
@root.idx = 0
|
282
328
|
@sb_file.close if @sb_file
|
283
|
-
@sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
329
|
+
@sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
|
284
330
|
@sbat = AllocationTable::Small.new self
|
285
331
|
# throw everything else the hell away
|
286
332
|
@io.truncate 0
|
287
333
|
end
|
288
334
|
|
289
335
|
# could be useful with mis-behaving ole documents. or to just clean them up.
|
336
|
+
# FIXME: heard Tempfile is not binary on windows. check
|
290
337
|
def repack temp=:file
|
291
338
|
case temp
|
292
|
-
when :file; Tempfile.open '
|
339
|
+
when :file; Tempfile.open 'ole-repack', &method(:repack_using_io)
|
293
340
|
when :mem; StringIO.open(&method(:repack_using_io))
|
294
341
|
else raise ArgumentError, "unknown temp backing #{temp.inspect}"
|
295
342
|
end
|
@@ -299,7 +346,7 @@ destroy things.
|
|
299
346
|
@io.rewind
|
300
347
|
IO.copy @io, temp_io
|
301
348
|
clear
|
302
|
-
Storage.open temp_io, nil, @
|
349
|
+
Storage.open temp_io, nil, @params do |temp_ole|
|
303
350
|
#temp_ole.root.type = :dir
|
304
351
|
Dirent.copy temp_ole.root, root
|
305
352
|
end
|
@@ -428,6 +475,10 @@ destroy things.
|
|
428
475
|
temp.reverse
|
429
476
|
end
|
430
477
|
|
478
|
+
def truncate!
|
479
|
+
replace truncate
|
480
|
+
end
|
481
|
+
|
431
482
|
def to_s
|
432
483
|
table = truncate
|
433
484
|
# pad it out some
|
@@ -474,7 +525,7 @@ destroy things.
|
|
474
525
|
# quick shortcut. chain can be either a head (in which case the table is used to
|
475
526
|
# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
|
476
527
|
def open chain, size=nil, &block
|
477
|
-
RangesIO.open @io, ranges(chain, size), &block
|
528
|
+
RangesIO.open @io, :ranges => ranges(chain, size), &block
|
478
529
|
end
|
479
530
|
|
480
531
|
def read chain, size=nil
|
@@ -566,12 +617,15 @@ destroy things.
|
|
566
617
|
class RangesIOResizeable < RangesIO
|
567
618
|
attr_reader :bat
|
568
619
|
attr_accessor :first_block
|
569
|
-
def initialize bat,
|
620
|
+
def initialize bat, mode='r', params={}
|
621
|
+
mode, params = 'r', mode if Hash === mode
|
622
|
+
first_block, size = params.values_at :first_block, :size
|
623
|
+
raise ArgumentError, 'must specify first_block' unless first_block
|
570
624
|
@bat = bat
|
571
625
|
self.first_block = first_block
|
572
|
-
# we
|
626
|
+
# we now cache the blocks chain, for faster resizing.
|
573
627
|
@blocks = @bat.chain first_block
|
574
|
-
super @bat.io, @bat.ranges(@blocks, size)
|
628
|
+
super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
|
575
629
|
end
|
576
630
|
|
577
631
|
def truncate size
|
@@ -597,9 +651,10 @@ destroy things.
|
|
597
651
|
# between bats based on size, and updating the dirent.
|
598
652
|
class RangesIOMigrateable < RangesIOResizeable
|
599
653
|
attr_reader :dirent
|
600
|
-
def initialize dirent
|
654
|
+
def initialize dirent, mode='r'
|
601
655
|
@dirent = dirent
|
602
|
-
super @dirent.ole.bat_for_size(@dirent.size),
|
656
|
+
super @dirent.ole.bat_for_size(@dirent.size), mode,
|
657
|
+
:first_block => @dirent.first_block, :size => @dirent.size
|
603
658
|
end
|
604
659
|
|
605
660
|
def truncate size
|
@@ -692,30 +747,30 @@ destroy things.
|
|
692
747
|
attr_accessor :children
|
693
748
|
attr_accessor :name
|
694
749
|
attr_reader :ole, :type, :create_time, :modify_time
|
695
|
-
def initialize ole, values=DEFAULT,
|
750
|
+
def initialize ole, values=DEFAULT, params={}
|
696
751
|
@ole = ole
|
697
|
-
values,
|
752
|
+
values, params = DEFAULT, values if Hash === values
|
698
753
|
values = values.unpack(PACK) if String === values
|
699
754
|
super(*values)
|
700
755
|
|
701
756
|
# extra parsing from the actual struct values
|
702
|
-
@name =
|
703
|
-
@type = if
|
704
|
-
unless TYPE_MAP.values.include?(
|
705
|
-
raise ArgumentError, "unknown type #{
|
757
|
+
@name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len].sub(/\x00\x00$/, ''))
|
758
|
+
@type = if params[:type]
|
759
|
+
unless TYPE_MAP.values.include?(params[:type])
|
760
|
+
raise ArgumentError, "unknown type #{params[:type].inspect}"
|
706
761
|
end
|
707
|
-
|
762
|
+
params[:type]
|
708
763
|
else
|
709
764
|
TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
|
710
765
|
end
|
711
766
|
|
712
767
|
# further extra type specific stuff
|
713
768
|
if file?
|
714
|
-
default_time = @ole.
|
769
|
+
default_time = @ole.params[:update_timestamps] ? Time.now : nil
|
715
770
|
@create_time ||= default_time
|
716
771
|
@modify_time ||= default_time
|
717
|
-
@create_time = Types.
|
718
|
-
@modify_time = Types.
|
772
|
+
@create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
|
773
|
+
@modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
|
719
774
|
@children = nil
|
720
775
|
else
|
721
776
|
@create_time = nil
|
@@ -727,7 +782,7 @@ destroy things.
|
|
727
782
|
|
728
783
|
def open mode='r'
|
729
784
|
raise Errno::EISDIR unless file?
|
730
|
-
io = RangesIOMigrateable.new self
|
785
|
+
io = RangesIOMigrateable.new self, mode
|
731
786
|
# TODO work on the mode string stuff a bit more.
|
732
787
|
# maybe let the io object know about the mode, so it can refuse
|
733
788
|
# to work for read/write appropriately. maybe redefine all unusable
|
@@ -736,9 +791,12 @@ destroy things.
|
|
736
791
|
# i need to do 'a' etc.
|
737
792
|
case mode
|
738
793
|
when 'r', 'r+'
|
739
|
-
# as i don't enforce reading/writing, nothing changes here
|
794
|
+
# as i don't enforce reading/writing, nothing changes here. kind of
|
795
|
+
# need to enforce tt if i want modify times to work better.
|
796
|
+
@modify_time = Time.now if mode == 'r+'
|
740
797
|
when 'w'
|
741
|
-
|
798
|
+
@modify_time = Time.now
|
799
|
+
#io.truncate 0
|
742
800
|
else
|
743
801
|
raise NotImplementedError, "unsupported mode - #{mode.inspect}"
|
744
802
|
end
|
@@ -763,13 +821,14 @@ destroy things.
|
|
763
821
|
!file?
|
764
822
|
end
|
765
823
|
|
824
|
+
# maybe need some options regarding case sensitivity.
|
766
825
|
def / name
|
767
826
|
children.find { |child| name === child.name }
|
768
827
|
end
|
769
828
|
|
770
829
|
def [] idx
|
771
830
|
if String === idx
|
772
|
-
warn 'String form of Dirent#[] is deprecated'
|
831
|
+
#warn 'String form of Dirent#[] is deprecated'
|
773
832
|
self / idx
|
774
833
|
else
|
775
834
|
super
|
@@ -778,7 +837,7 @@ destroy things.
|
|
778
837
|
|
779
838
|
# move to ruby-msg. and remove from here
|
780
839
|
def time
|
781
|
-
warn 'Dirent#time is deprecated'
|
840
|
+
#warn 'Dirent#time is deprecated'
|
782
841
|
create_time || modify_time
|
783
842
|
end
|
784
843
|
|
@@ -813,7 +872,7 @@ destroy things.
|
|
813
872
|
end
|
814
873
|
|
815
874
|
def to_s
|
816
|
-
tmp = Types::
|
875
|
+
tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
|
817
876
|
tmp = tmp[0, 62] if tmp.length > 62
|
818
877
|
tmp += 0.chr * 2
|
819
878
|
self.name_len = tmp.length
|
@@ -824,9 +883,12 @@ destroy things.
|
|
824
883
|
# note not dir?, so as not to override root's first_block
|
825
884
|
self.first_block = Dirent::EOT if type == :dir
|
826
885
|
if file?
|
827
|
-
|
828
|
-
|
829
|
-
|
886
|
+
# this is messed up. it changes the time stamps regardless of whether the file
|
887
|
+
# was actually touched. instead, any open call with a writeable mode, should update
|
888
|
+
# the modify time. create time would be set in new.
|
889
|
+
if @ole.params[:update_timestamps]
|
890
|
+
self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
|
891
|
+
self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
|
830
892
|
end
|
831
893
|
else
|
832
894
|
self.create_time_str = 0.chr * 8
|
@@ -850,16 +912,6 @@ destroy things.
|
|
850
912
|
str + '>'
|
851
913
|
end
|
852
914
|
|
853
|
-
# --------
|
854
|
-
# and for creation of a dirent. don't like the name. is it a file or a directory?
|
855
|
-
# assign to type later? io will be empty.
|
856
|
-
def new_child type
|
857
|
-
child = Dirent.new ole, :type => type
|
858
|
-
children << child
|
859
|
-
yield child if block_given?
|
860
|
-
child
|
861
|
-
end
|
862
|
-
|
863
915
|
def delete child
|
864
916
|
# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
|
865
917
|
raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
|
@@ -874,7 +926,9 @@ destroy things.
|
|
874
926
|
dst.name = src.name
|
875
927
|
if src.dir?
|
876
928
|
src.children.each do |src_child|
|
877
|
-
|
929
|
+
dst_child = Dirent.new dst.ole, :type => src_child.type
|
930
|
+
dst.children << dst_child
|
931
|
+
Dirent.copy src_child, dst_child
|
878
932
|
end
|
879
933
|
else
|
880
934
|
src.open do |src_io|
|