ruby-ole 1.2.2 → 1.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +22 -0
- data/Rakefile +9 -9
- data/lib/ole/file_system.rb +7 -7
- data/lib/ole/property_set.rb +31 -20
- data/lib/ole/ranges_io.rb +38 -44
- data/lib/ole/storage.rb +164 -110
- data/lib/ole/support.rb +80 -1
- data/lib/ole/types.rb +141 -40
- data/test/oleWithDirs.ole +0 -0
- data/test/test_SummaryInformation +0 -0
- data/test/test_mbat.rb +39 -0
- data/test/test_property_set.rb +30 -0
- data/test/test_ranges_io.rb +4 -4
- data/test/test_storage.rb +5 -1
- data/test/test_support.rb +39 -1
- data/test/test_types.rb +54 -0
- metadata +18 -6
data/ChangeLog
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
== 1.2.3 / 2007-12-28
|
2
|
+
|
3
|
+
- MBAT write support re-implmented. Can now write files over ~8mb again.
|
4
|
+
- Minor fixes (truncation in #flush, file modification timestamps)
|
5
|
+
- More test coverage
|
6
|
+
- Initial (read-only) property set support.
|
7
|
+
- Complete filesystem api, to pass most of the rubyzip tests.
|
8
|
+
- Add a ChangeLog :).
|
9
|
+
|
10
|
+
== 1.2.2 / 2007-11-05
|
11
|
+
|
12
|
+
- Lots of test updates, 90% coverage.
|
13
|
+
- Fix +to_tree+ method to be more efficient, and stream output.
|
14
|
+
- Optimizations from benchmarks and profiling, mostly for writes. Fixed
|
15
|
+
AllocationTable#resize_chain, RangesIOResizable#truncate and
|
16
|
+
AllocationTable#free_block.
|
17
|
+
- Add in filesystem test file from rubyzip, and start working on a
|
18
|
+
filesystem api.
|
19
|
+
|
20
|
+
== 1.2.1 / 2007-08-20
|
21
|
+
|
22
|
+
- Separate out from ruby-msg as new project.
|
data/Rakefile
CHANGED
@@ -47,16 +47,18 @@ spec = Gem::Specification.new do |s|
|
|
47
47
|
s.version = PKG_VERSION
|
48
48
|
s.summary = %q{Ruby OLE library.}
|
49
49
|
s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
|
50
|
-
s.authors = [
|
50
|
+
s.authors = ['Charles Lowe']
|
51
51
|
s.email = %q{aquasync@gmail.com}
|
52
52
|
s.homepage = %q{http://code.google.com/p/ruby-ole}
|
53
|
-
|
53
|
+
s.rubyforge_project = %q{ruby-ole}
|
54
54
|
|
55
55
|
s.executables = ['oletool']
|
56
|
-
s.files = ['Rakefile']
|
57
|
-
s.files +=
|
58
|
-
s.files +=
|
59
|
-
s.files +=
|
56
|
+
s.files = ['Rakefile', 'ChangeLog']
|
57
|
+
s.files += FileList['lib/**/*.rb']
|
58
|
+
s.files += FileList['test/test_*.rb', 'test/*.doc']
|
59
|
+
s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation']
|
60
|
+
s.files += FileList['bin/*']
|
61
|
+
s.test_files = FileList['test/test_*.rb']
|
60
62
|
|
61
63
|
s.has_rdoc = true
|
62
64
|
s.rdoc_options += [
|
@@ -64,13 +66,11 @@ spec = Gem::Specification.new do |s|
|
|
64
66
|
'--title', "#{PKG_NAME} documentation",
|
65
67
|
'--tab-width', '2'
|
66
68
|
]
|
67
|
-
|
68
|
-
s.autorequire = 'ole/storage'
|
69
69
|
end
|
70
70
|
|
71
71
|
Rake::GemPackageTask.new(spec) do |t|
|
72
72
|
t.gem_spec = spec
|
73
|
-
t.need_tar =
|
73
|
+
t.need_tar = false
|
74
74
|
t.need_zip = false
|
75
75
|
t.package_dir = 'build'
|
76
76
|
end
|
data/lib/ole/file_system.rb
CHANGED
@@ -29,8 +29,8 @@
|
|
29
29
|
#
|
30
30
|
# TODO
|
31
31
|
#
|
32
|
-
# - check
|
33
|
-
#
|
32
|
+
# - check Dir.mkdir, and File.open, and File.rename, to add in filename
|
33
|
+
# length checks (max 32 / 31 or something).
|
34
34
|
# do the automatic truncation, and add in any necessary warnings.
|
35
35
|
#
|
36
36
|
# - File.split('a/') == File.split('a') == ['.', 'a']
|
@@ -130,6 +130,8 @@ module Ole # :nodoc:
|
|
130
130
|
path = "#{pwd}/#{path}" unless path =~ /^\//
|
131
131
|
# at this point its already absolute. we use File.expand_path
|
132
132
|
# just for the .. and . handling
|
133
|
+
# Hmmm, FIXME: won't work on windows i think. on windows it will prepend
|
134
|
+
# the current drive i believe. may just need to strip the first 2 chars.
|
133
135
|
File.expand_path path
|
134
136
|
end
|
135
137
|
|
@@ -160,8 +162,7 @@ module Ole # :nodoc:
|
|
160
162
|
end
|
161
163
|
|
162
164
|
def open path, mode='r', &block
|
163
|
-
|
164
|
-
if mode == 'w'
|
165
|
+
if IO::Mode.new(mode).create?
|
165
166
|
begin
|
166
167
|
dirent = dirent_from_path path
|
167
168
|
rescue Errno::ENOENT
|
@@ -169,8 +170,7 @@ module Ole # :nodoc:
|
|
169
170
|
# a get_parent_dirent function.
|
170
171
|
parent_path, basename = File.split expand_path(path)
|
171
172
|
parent = @ole.dir.send :dirent_from_path, parent_path, path
|
172
|
-
dirent =
|
173
|
-
dirent.name = basename
|
173
|
+
parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
|
174
174
|
end
|
175
175
|
else
|
176
176
|
dirent = dirent_from_path path
|
@@ -351,7 +351,7 @@ module Ole # :nodoc:
|
|
351
351
|
# now, we first should ensure that it doesn't already exist
|
352
352
|
# either as a file or a directory.
|
353
353
|
raise Errno::EEXIST, path if parent/basename
|
354
|
-
parent.
|
354
|
+
parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
|
355
355
|
0
|
356
356
|
end
|
357
357
|
|
data/lib/ole/property_set.rb
CHANGED
@@ -1,37 +1,42 @@
|
|
1
|
+
require 'ole/types'
|
1
2
|
|
2
3
|
module Ole
|
3
4
|
module Types
|
4
|
-
#
|
5
|
-
#
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
# see http://poi.apache.org/hpsf/internals.html
|
5
|
+
#
|
6
|
+
# The PropertySet class currently supports readonly access to the properties
|
7
|
+
# serialized in "property set" streams, such as the file "\005SummaryInformation",
|
8
|
+
# in OLE files.
|
9
|
+
#
|
10
|
+
# Think has its roots in MFC property set serialization.
|
11
|
+
#
|
12
|
+
# See http://poi.apache.org/hpsf/internals.html for details
|
13
|
+
#
|
15
14
|
class PropertySet
|
16
15
|
HEADER_SIZE = 28
|
17
|
-
HEADER_UNPACK = "vvVa#{
|
16
|
+
HEADER_UNPACK = "vvVa#{Clsid::SIZE}V"
|
18
17
|
OS_MAP = {
|
19
18
|
0 => :win16,
|
20
19
|
1 => :mac,
|
21
20
|
2 => :win32
|
22
21
|
}
|
23
22
|
|
23
|
+
# define a smattering of the property set guids.
|
24
|
+
FMTID_SummaryInformation = Clsid.parse '{f29f85e0-4ff9-1068-ab91-08002b27b3d9}'
|
25
|
+
FMTID_DocSummaryInformation = Clsid.parse '{d5cdd502-2e9c-101b-9397-08002b2cf9ae}'
|
26
|
+
FMTID_UserDefinedProperties = Clsid.parse '{d5cdd505-2e9c-101b-9397-08002b2cf9ae}'
|
27
|
+
|
24
28
|
class Section < Struct.new(:guid, :offset)
|
29
|
+
include Variant::Constants
|
25
30
|
include Enumerable
|
26
31
|
|
27
|
-
SIZE =
|
28
|
-
UNPACK_STR = "a#{
|
32
|
+
SIZE = Clsid::SIZE + 4
|
33
|
+
UNPACK_STR = "a#{Clsid::SIZE}v"
|
29
34
|
|
30
35
|
attr_reader :length
|
31
36
|
def initialize str, property_set
|
32
37
|
@property_set = property_set
|
33
38
|
super(*str.unpack(UNPACK_STR))
|
34
|
-
self.guid =
|
39
|
+
self.guid = Clsid.load guid
|
35
40
|
load_header
|
36
41
|
end
|
37
42
|
|
@@ -49,14 +54,20 @@ module Ole
|
|
49
54
|
io.read(length * 8).scan(/.{8}/m).each do |str|
|
50
55
|
id, property_offset = str.unpack 'V2'
|
51
56
|
io.seek offset + property_offset
|
52
|
-
type = io.read(
|
53
|
-
|
57
|
+
type, value = io.read(8).unpack('V2')
|
58
|
+
# is the method of serialization here custom?
|
59
|
+
case type
|
60
|
+
when VT_LPSTR, VT_LPWSTR
|
61
|
+
value = Variant.load type, io.read(value)
|
62
|
+
# ....
|
63
|
+
end
|
64
|
+
yield id, type, value
|
54
65
|
end
|
55
66
|
self
|
56
67
|
end
|
57
68
|
|
58
69
|
def properties
|
59
|
-
to_a
|
70
|
+
to_enum.to_a
|
60
71
|
end
|
61
72
|
end
|
62
73
|
|
@@ -66,13 +77,13 @@ module Ole
|
|
66
77
|
load_header io.read(HEADER_SIZE)
|
67
78
|
load_section_list io.read(@num_sections * Section::SIZE)
|
68
79
|
# expect no gap between last section and start of data.
|
69
|
-
Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
|
80
|
+
#Log.warn "gap between section list and property data" unless io.pos == @sections.map(&:offset).min
|
70
81
|
end
|
71
82
|
|
72
83
|
def load_header str
|
73
84
|
@signature, @unknown, @os_id, @guid, @num_sections = str.unpack HEADER_UNPACK
|
74
85
|
# should i check that unknown == 0? it usually is. so is the guid actually
|
75
|
-
@guid =
|
86
|
+
@guid = Clsid.load @guid
|
76
87
|
@os = OS_MAP[@os_id] || Log.warn("unknown operating system id #{@os_id}")
|
77
88
|
end
|
78
89
|
|
data/lib/ole/ranges_io.rb
CHANGED
@@ -1,36 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
@mode = mode
|
4
|
-
if @mode['b']
|
5
|
-
@binary = true
|
6
|
-
@mode = @mode.sub 'b', ''
|
7
|
-
else
|
8
|
-
@binary = false
|
9
|
-
end
|
10
|
-
if @mode[/\+$/]
|
11
|
-
@plus = true
|
12
|
-
@mode = @mode.sub(/\+$/, '')
|
13
|
-
else
|
14
|
-
@plus = false
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def explicit_binary?
|
19
|
-
@binary
|
20
|
-
end
|
21
|
-
|
22
|
-
def binary?
|
23
|
-
RUBY_PLATFORM !~ /win/ or @binary
|
24
|
-
end
|
25
|
-
|
26
|
-
def to_s
|
27
|
-
@mode
|
28
|
-
end
|
29
|
-
|
30
|
-
def inspect
|
31
|
-
"#<#{self.class}:#{to_s.inspect}>"
|
32
|
-
end
|
33
|
-
end
|
1
|
+
# need IO::Mode
|
2
|
+
require 'ole/support'
|
34
3
|
|
35
4
|
#
|
36
5
|
# = Introduction
|
@@ -71,26 +40,40 @@ end
|
|
71
40
|
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
72
41
|
#
|
73
42
|
class RangesIO
|
74
|
-
attr_reader :io, :ranges, :size, :pos
|
43
|
+
attr_reader :io, :mode, :ranges, :size, :pos
|
75
44
|
# +io+:: the parent io object that we are wrapping.
|
76
|
-
#
|
77
|
-
# +
|
78
|
-
#
|
79
|
-
#
|
80
|
-
#
|
45
|
+
# +mode+:: the mode to use
|
46
|
+
# +params+:: hash of params.
|
47
|
+
# * :ranges - byte offsets, either:
|
48
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
49
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
50
|
+
# (think the way String indexing works)
|
51
|
+
# * :close_parent - boolean to close parent when this object is closed
|
81
52
|
#
|
82
53
|
# NOTE: the +ranges+ can overlap.
|
83
|
-
def initialize io,
|
84
|
-
|
54
|
+
def initialize io, mode='r', params={}
|
55
|
+
mode, params = 'r', mode if Hash === mode
|
56
|
+
ranges = params[:ranges]
|
57
|
+
@params = {:close_parent => false}.merge params
|
58
|
+
@mode = IO::Mode.new mode
|
85
59
|
@io = io
|
86
60
|
# convert ranges to arrays. check for negative ranges?
|
61
|
+
ranges ||= [0, io.size]
|
87
62
|
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
88
63
|
# calculate size
|
89
64
|
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
|
90
65
|
# initial position in the file
|
91
66
|
@pos = 0
|
67
|
+
|
68
|
+
# handle some mode flags
|
69
|
+
truncate 0 if @mode.truncate?
|
70
|
+
seek size if @mode.append?
|
92
71
|
end
|
93
72
|
|
73
|
+
#IOError: closed stream
|
74
|
+
# get this for reading, writing, everything...
|
75
|
+
#IOError: not opened for writing
|
76
|
+
|
94
77
|
# add block form. TODO add test for this
|
95
78
|
def self.open(*args, &block)
|
96
79
|
ranges_io = new(*args)
|
@@ -120,7 +103,7 @@ class RangesIO
|
|
120
103
|
alias tell :pos
|
121
104
|
|
122
105
|
def close
|
123
|
-
@io.close if @
|
106
|
+
@io.close if @params[:close_parent]
|
124
107
|
end
|
125
108
|
|
126
109
|
# returns the [+offset+, +size+], pair inorder to read/write at +pos+
|
@@ -169,8 +152,7 @@ class RangesIO
|
|
169
152
|
data
|
170
153
|
end
|
171
154
|
|
172
|
-
# you may override this call to update @ranges and @size, if applicable.
|
173
|
-
# support can grow below
|
155
|
+
# you may override this call to update @ranges and @size, if applicable.
|
174
156
|
def truncate size
|
175
157
|
raise NotImplementedError, 'truncate not supported'
|
176
158
|
end
|
@@ -232,3 +214,15 @@ class RangesIO
|
|
232
214
|
end
|
233
215
|
end
|
234
216
|
|
217
|
+
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
|
218
|
+
# only really needed for the allocation table writes etc. maybe just use explicit modes
|
219
|
+
# for those
|
220
|
+
# better yet write a test that breaks before I fix it.
|
221
|
+
class RangesIONonResizeable < RangesIO
|
222
|
+
def initialize io, mode='r', params={}
|
223
|
+
mode, params = 'r', mode if Hash === mode
|
224
|
+
flags = IO::Mode.new(mode).flags & ~IO::TRUNC
|
225
|
+
super io, flags, params
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
data/lib/ole/storage.rb
CHANGED
@@ -1,7 +1,3 @@
|
|
1
|
-
#! /usr/bin/ruby -w
|
2
|
-
|
3
|
-
$: << File.dirname(__FILE__) + '/..'
|
4
|
-
|
5
1
|
require 'tempfile'
|
6
2
|
|
7
3
|
require 'ole/base'
|
@@ -16,11 +12,6 @@ module Ole # :nodoc:
|
|
16
12
|
# access to OLE2 structured storage files, such as those produced by
|
17
13
|
# Microsoft Office, eg *.doc, *.msg etc.
|
18
14
|
#
|
19
|
-
# Initially based on chicago's libole, source available at
|
20
|
-
# http://prdownloads.sf.net/chicago/ole.tgz
|
21
|
-
# Later augmented with some corrections by inspecting pole, and (purely
|
22
|
-
# for header definitions) gsf.
|
23
|
-
#
|
24
15
|
# = Usage
|
25
16
|
#
|
26
17
|
# Usage should be fairly straight forward:
|
@@ -49,24 +40,37 @@ module Ole # :nodoc:
|
|
49
40
|
# ole.root["\001CompObj"].open { |f| f.write "blah blah" }
|
50
41
|
# ole.close
|
51
42
|
#
|
43
|
+
# = Thanks
|
44
|
+
#
|
45
|
+
# * The code contained in this project was initially based on chicago's libole
|
46
|
+
# (source available at http://prdownloads.sf.net/chicago/ole.tgz).
|
47
|
+
#
|
48
|
+
# * It was later augmented with some corrections by inspecting pole, and (purely
|
49
|
+
# for header definitions) gsf.
|
50
|
+
#
|
51
|
+
# * The property set parsing code came from the apache java project POIFS.
|
52
|
+
#
|
53
|
+
# * The excellent idea for using a pseudo file system style interface by providing
|
54
|
+
# #file and #dir methods which mimic File and Dir, was borrowed (along with almost
|
55
|
+
# unchanged tests!) from Thomas Sondergaard's rubyzip.
|
56
|
+
#
|
52
57
|
# = TODO
|
53
58
|
#
|
54
59
|
# * the custom header cruft for Header and Dirent needs some love.
|
55
60
|
# * i have a number of classes doing load/save combos: Header, AllocationTable, Dirent,
|
56
61
|
# and, in a manner of speaking, but arguably different, Storage itself.
|
57
|
-
# they have differing api's which would be nice to
|
62
|
+
# they have differing api's which would be nice to rethink.
|
58
63
|
# AllocationTable::Big must be created aot now, as it is used for all subsequent reads.
|
59
|
-
# * need to fix META_BAT support in #flush.
|
60
64
|
#
|
61
65
|
class Storage
|
62
66
|
# thrown for any bogus OLE file errors.
|
63
67
|
class FormatError < StandardError # :nodoc:
|
64
68
|
end
|
65
69
|
|
66
|
-
VERSION = '1.2.
|
70
|
+
VERSION = '1.2.3'
|
67
71
|
|
68
72
|
# options used at creation time
|
69
|
-
attr_reader :
|
73
|
+
attr_reader :params
|
70
74
|
# The top of the ole tree structure
|
71
75
|
attr_reader :root
|
72
76
|
# The tree structure in its original flattened form. only valid after #load, or #flush.
|
@@ -79,23 +83,31 @@ module Ole # :nodoc:
|
|
79
83
|
|
80
84
|
# maybe include an option hash, and allow :close_parent => true, to be more general.
|
81
85
|
# +arg+ should be either a file, or an +IO+ object, and needs to be seekable.
|
82
|
-
def initialize arg, mode=nil,
|
83
|
-
|
84
|
-
|
85
|
-
@
|
86
|
+
def initialize arg, mode=nil, params={}
|
87
|
+
params, mode = mode, nil if Hash === mode
|
88
|
+
params = {:update_timestamps => true}.merge(params)
|
89
|
+
@params = params
|
86
90
|
|
87
91
|
# get the io object
|
88
92
|
@close_parent, @io = if String === arg
|
89
|
-
|
93
|
+
mode ||= 'rb'
|
94
|
+
[true, open(arg, mode)]
|
90
95
|
else
|
91
96
|
raise ArgumentError, 'unable to specify mode string with io object' if mode
|
92
97
|
[false, arg]
|
93
98
|
end
|
94
99
|
# do we have this file opened for writing? don't know of a better way to tell
|
95
100
|
# (unless we parse the mode string in the open case)
|
101
|
+
# hmmm, note that in ruby 1.9 this doesn't work anymore. which is all the more
|
102
|
+
# reason to use mode string parsing when available, and fall back to something like
|
103
|
+
# io.writeable? otherwise.
|
96
104
|
@writeable = begin
|
97
|
-
|
98
|
-
|
105
|
+
if mode
|
106
|
+
IO::Mode.new(mode).writeable?
|
107
|
+
else
|
108
|
+
@io.flush
|
109
|
+
true
|
110
|
+
end
|
99
111
|
rescue IOError
|
100
112
|
false
|
101
113
|
end
|
@@ -106,8 +118,8 @@ module Ole # :nodoc:
|
|
106
118
|
@io.size > 0 ? load : clear
|
107
119
|
end
|
108
120
|
|
109
|
-
def self.open arg, mode=nil,
|
110
|
-
ole = new arg, mode,
|
121
|
+
def self.open arg, mode=nil, params={}
|
122
|
+
ole = new arg, mode, params
|
111
123
|
if block_given?
|
112
124
|
begin yield ole
|
113
125
|
ensure; ole.close
|
@@ -117,6 +129,18 @@ module Ole # :nodoc:
|
|
117
129
|
end
|
118
130
|
|
119
131
|
# load document from file.
|
132
|
+
#
|
133
|
+
# TODO: implement various allocationtable checks, maybe as a AllocationTable#fsck function :)
|
134
|
+
#
|
135
|
+
# 1. reterminate any chain not ending in EOC.
|
136
|
+
# compare file size with actually allocated blocks per file.
|
137
|
+
# 2. pass through all chain heads looking for collisions, and making sure nothing points to them
|
138
|
+
# (ie they are really heads). in both sbat and mbat
|
139
|
+
# 3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
|
140
|
+
# in the bat for them.
|
141
|
+
# 4. maybe a check of excess data. if there is data outside the bbat.truncate.length + 1 * block_size,
|
142
|
+
# (eg what is used for truncate in #flush), then maybe add some sort of message about that. it
|
143
|
+
# will be automatically thrown away at close time.
|
120
144
|
def load
|
121
145
|
# we always read 512 for the header block. if the block size ends up being different,
|
122
146
|
# what happens to the 109 fat entries. are there more/less entries?
|
@@ -159,7 +183,8 @@ module Ole # :nodoc:
|
|
159
183
|
|
160
184
|
# FIXME i don't currently use @header.num_sbat which i should
|
161
185
|
# hmm. nor do i write it. it means what exactly again?
|
162
|
-
|
186
|
+
# which mode to use here?
|
187
|
+
@sb_file = RangesIOResizeable.new @bbat, :first_block => @root.first_block, :size => @root.size
|
163
188
|
@sbat = AllocationTable::Small.new self
|
164
189
|
@sbat.load @bbat.read(@header.sbat_start)
|
165
190
|
end
|
@@ -170,25 +195,16 @@ module Ole # :nodoc:
|
|
170
195
|
@io.close if @close_parent
|
171
196
|
end
|
172
197
|
|
173
|
-
# should have a #open_dirent i think. and use it in load and flush. neater.
|
174
|
-
# also was thinking about Dirent#open_padding. then i can more easily clean up the padding
|
175
|
-
# to be 0.chr
|
176
|
-
=begin
|
177
|
-
thoughts on fixes:
|
178
|
-
1. reterminate any chain not ending in EOC.
|
179
|
-
2. pass through all chain heads looking for collisions, and making sure nothing points to them
|
180
|
-
(ie they are really heads).
|
181
|
-
3. we know the locations of the bbat data, and mbat data. ensure that there are placeholder blocks
|
182
|
-
in the bat for them.
|
183
|
-
this stuff will ensure reliability of input better. otherwise, its actually worth doing a repack
|
184
|
-
directly after read, to ensure the above is probably acounted for, before subsequent writes possibly
|
185
|
-
destroy things.
|
186
|
-
=end
|
187
|
-
|
188
198
|
# the flush method is the main "save" method. all file contents are always
|
189
199
|
# written directly to the file by the RangesIO objects, all this method does
|
190
200
|
# is write out all the file meta data - dirents, allocation tables, file header
|
191
201
|
# etc.
|
202
|
+
#
|
203
|
+
# maybe add an option to zero the padding, and any remaining avail blocks in the
|
204
|
+
# allocation table.
|
205
|
+
#
|
206
|
+
# TODO: long and overly complex. simplify and test better. eg, perhaps move serialization
|
207
|
+
# of bbat to AllocationTable::Big.
|
192
208
|
def flush
|
193
209
|
# update root dirent, and flatten dirent tree
|
194
210
|
@root.name = 'Root Entry'
|
@@ -197,8 +213,7 @@ destroy things.
|
|
197
213
|
@dirents = @root.flatten
|
198
214
|
|
199
215
|
# serialize the dirents using the bbat
|
200
|
-
RangesIOResizeable.open @bbat, @header.dirent_start do |io|
|
201
|
-
io.truncate 0
|
216
|
+
RangesIOResizeable.open @bbat, 'w', :first_block => @header.dirent_start do |io|
|
202
217
|
@dirents.each { |dirent| io.write dirent.to_s }
|
203
218
|
padding = (io.size / @bbat.block_size.to_f).ceil * @bbat.block_size - io.size
|
204
219
|
io.write 0.chr * padding
|
@@ -207,8 +222,7 @@ destroy things.
|
|
207
222
|
|
208
223
|
# serialize the sbat
|
209
224
|
# perhaps the blocks used by the sbat should be marked with BAT?
|
210
|
-
RangesIOResizeable.open @bbat, @header.sbat_start do |io|
|
211
|
-
io.truncate 0
|
225
|
+
RangesIOResizeable.open @bbat, 'w', :first_block => @header.sbat_start do |io|
|
212
226
|
io.write @sbat.to_s
|
213
227
|
@header.sbat_start = io.first_block
|
214
228
|
@header.num_sbat = @bbat.chain(@header.sbat_start).length
|
@@ -224,50 +238,82 @@ destroy things.
|
|
224
238
|
b == AllocationTable::BAT || b == AllocationTable::META_BAT ?
|
225
239
|
AllocationTable::AVAIL : b
|
226
240
|
end
|
227
|
-
io = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
228
241
|
|
229
242
|
# currently we use a loop. this could be better, but basically,
|
230
243
|
# the act of writing out the bat, itself requires blocks which get
|
231
244
|
# recorded in the bat.
|
245
|
+
#
|
246
|
+
# i'm sure that there'd be some simpler closed form solution to this. solve
|
247
|
+
# recursive func:
|
248
|
+
#
|
249
|
+
# num_mbat_blocks = ceil(max((mbat_len - 109) * 4 / block_size, 0))
|
250
|
+
# bbat_len = initial_bbat_len + num_mbat_blocks
|
251
|
+
# mbat_len = ceil(bbat_len * 4 / block_size)
|
252
|
+
#
|
253
|
+
# the actual bbat allocation table is itself stored throughout the file, and that chain
|
254
|
+
# is stored in the initial blocks, and the mbat blocks.
|
255
|
+
num_mbat_blocks = 0
|
256
|
+
io = RangesIOResizeable.new @bbat, 'w', :first_block => AllocationTable::EOC
|
257
|
+
# truncate now, so that we can simplify size calcs - the mbat blocks will be appended in a
|
258
|
+
# contiguous chunk at the end.
|
259
|
+
# hmmm, i think this truncate should be matched with a truncate of the underlying io. if you
|
260
|
+
# delete a lot of stuff, and free up trailing blocks, the file size never shrinks. this can
|
261
|
+
# be fixed easily, add an io truncate
|
262
|
+
@bbat.truncate!
|
263
|
+
before = @io.size
|
264
|
+
@io.truncate @bbat.block_size * (@bbat.length + 1)
|
232
265
|
while true
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
#
|
237
|
-
|
238
|
-
#
|
239
|
-
|
266
|
+
# get total bbat size. equivalent to @bbat.to_s.length, but for the factoring in of
|
267
|
+
# the mbat blocks. we can't just add the mbat blocks directly to the bbat, as as this iteration
|
268
|
+
# progresses, more blocks may be needed for the bat itself (if there are no more gaps), and the
|
269
|
+
# mbat must remain contiguous.
|
270
|
+
bbat_data_len = ((@bbat.length + num_mbat_blocks) * 4 / @bbat.block_size.to_f).ceil * @bbat.block_size
|
271
|
+
# now storing the excess mbat blocks also increases the size of the bbat:
|
272
|
+
new_num_mbat_blocks = ([bbat_data_len / @bbat.block_size - 109, 0].max * 4 / @bbat.block_size.to_f).ceil
|
273
|
+
if new_num_mbat_blocks != num_mbat_blocks
|
274
|
+
# need more space for the mbat.
|
275
|
+
num_mbat_blocks = new_num_mbat_blocks
|
276
|
+
elsif io.size != bbat_data_len
|
277
|
+
# need more space for the bat
|
278
|
+
# this may grow the bbat, depending on existing available blocks
|
279
|
+
io.truncate bbat_data_len
|
280
|
+
else
|
281
|
+
break
|
282
|
+
end
|
240
283
|
end
|
241
284
|
|
242
285
|
# now extract the info we want:
|
243
286
|
ranges = io.ranges
|
244
|
-
|
287
|
+
bbat_chain = @bbat.chain io.first_block
|
288
|
+
# the extra mbat data is a set of contiguous blocks at the end
|
245
289
|
io.close
|
246
|
-
|
247
|
-
|
290
|
+
bbat_chain.each { |b| @bbat[b] = AllocationTable::BAT }
|
291
|
+
# tack on the mbat stuff
|
292
|
+
@header.mbat_start = @bbat.length # need to record this here before tacking on the mbat
|
293
|
+
@header.num_bat = bbat_chain.length
|
294
|
+
num_mbat_blocks.times { @bbat << AllocationTable::META_BAT }
|
248
295
|
|
249
296
|
# now finally write the bbat, using a not resizable io.
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
@header.num_mbat =
|
256
|
-
|
257
|
-
=
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
=end
|
297
|
+
# the mode here will be 'r', which allows write atm.
|
298
|
+
RangesIO.open(@io, :ranges => ranges) { |io| io.write @bbat.to_s }
|
299
|
+
|
300
|
+
# this is the mbat. pad it out.
|
301
|
+
bbat_chain += [AllocationTable::AVAIL] * [109 - bbat_chain.length, 0].max
|
302
|
+
@header.num_mbat = num_mbat_blocks
|
303
|
+
if num_mbat_blocks == 0
|
304
|
+
@header.mbat_start = AllocationTable::EOC
|
305
|
+
else
|
306
|
+
# write out the mbat blocks now. first of all, where are they going to be?
|
307
|
+
mbat_data = bbat_chain[109..-1]
|
308
|
+
q = @bbat.block_size / 4
|
309
|
+
mbat_data += [AllocationTable::AVAIL] *((mbat_data.length / q.to_f).ceil * q - mbat_data.length)
|
310
|
+
ranges = @bbat.ranges((0...num_mbat_blocks).map { |i| @header.mbat_start + i })
|
311
|
+
RangesIO.open(@io, :ranges => ranges) { |io| io.write mbat_data.pack('L*') }
|
312
|
+
end
|
267
313
|
|
268
314
|
# now seek back and write the header out
|
269
315
|
@io.seek 0
|
270
|
-
@io.write @header.to_s +
|
316
|
+
@io.write @header.to_s + bbat_chain[0, 109].pack('L*')
|
271
317
|
@io.flush
|
272
318
|
end
|
273
319
|
|
@@ -280,16 +326,17 @@ destroy things.
|
|
280
326
|
@dirents = [@root]
|
281
327
|
@root.idx = 0
|
282
328
|
@sb_file.close if @sb_file
|
283
|
-
@sb_file = RangesIOResizeable.new @bbat, AllocationTable::EOC
|
329
|
+
@sb_file = RangesIOResizeable.new @bbat, :first_block => AllocationTable::EOC
|
284
330
|
@sbat = AllocationTable::Small.new self
|
285
331
|
# throw everything else the hell away
|
286
332
|
@io.truncate 0
|
287
333
|
end
|
288
334
|
|
289
335
|
# could be useful with mis-behaving ole documents. or to just clean them up.
|
336
|
+
# FIXME: heard Tempfile is not binary on windows. check
|
290
337
|
def repack temp=:file
|
291
338
|
case temp
|
292
|
-
when :file; Tempfile.open '
|
339
|
+
when :file; Tempfile.open 'ole-repack', &method(:repack_using_io)
|
293
340
|
when :mem; StringIO.open(&method(:repack_using_io))
|
294
341
|
else raise ArgumentError, "unknown temp backing #{temp.inspect}"
|
295
342
|
end
|
@@ -299,7 +346,7 @@ destroy things.
|
|
299
346
|
@io.rewind
|
300
347
|
IO.copy @io, temp_io
|
301
348
|
clear
|
302
|
-
Storage.open temp_io, nil, @
|
349
|
+
Storage.open temp_io, nil, @params do |temp_ole|
|
303
350
|
#temp_ole.root.type = :dir
|
304
351
|
Dirent.copy temp_ole.root, root
|
305
352
|
end
|
@@ -428,6 +475,10 @@ destroy things.
|
|
428
475
|
temp.reverse
|
429
476
|
end
|
430
477
|
|
478
|
+
def truncate!
|
479
|
+
replace truncate
|
480
|
+
end
|
481
|
+
|
431
482
|
def to_s
|
432
483
|
table = truncate
|
433
484
|
# pad it out some
|
@@ -474,7 +525,7 @@ destroy things.
|
|
474
525
|
# quick shortcut. chain can be either a head (in which case the table is used to
|
475
526
|
# turn it into a chain), or a chain. it is converted to ranges, then to rangesio.
|
476
527
|
def open chain, size=nil, &block
|
477
|
-
RangesIO.open @io, ranges(chain, size), &block
|
528
|
+
RangesIO.open @io, :ranges => ranges(chain, size), &block
|
478
529
|
end
|
479
530
|
|
480
531
|
def read chain, size=nil
|
@@ -566,12 +617,15 @@ destroy things.
|
|
566
617
|
class RangesIOResizeable < RangesIO
|
567
618
|
attr_reader :bat
|
568
619
|
attr_accessor :first_block
|
569
|
-
def initialize bat,
|
620
|
+
def initialize bat, mode='r', params={}
|
621
|
+
mode, params = 'r', mode if Hash === mode
|
622
|
+
first_block, size = params.values_at :first_block, :size
|
623
|
+
raise ArgumentError, 'must specify first_block' unless first_block
|
570
624
|
@bat = bat
|
571
625
|
self.first_block = first_block
|
572
|
-
# we
|
626
|
+
# we now cache the blocks chain, for faster resizing.
|
573
627
|
@blocks = @bat.chain first_block
|
574
|
-
super @bat.io, @bat.ranges(@blocks, size)
|
628
|
+
super @bat.io, mode, :ranges => @bat.ranges(@blocks, size)
|
575
629
|
end
|
576
630
|
|
577
631
|
def truncate size
|
@@ -597,9 +651,10 @@ destroy things.
|
|
597
651
|
# between bats based on size, and updating the dirent.
|
598
652
|
class RangesIOMigrateable < RangesIOResizeable
|
599
653
|
attr_reader :dirent
|
600
|
-
def initialize dirent
|
654
|
+
def initialize dirent, mode='r'
|
601
655
|
@dirent = dirent
|
602
|
-
super @dirent.ole.bat_for_size(@dirent.size),
|
656
|
+
super @dirent.ole.bat_for_size(@dirent.size), mode,
|
657
|
+
:first_block => @dirent.first_block, :size => @dirent.size
|
603
658
|
end
|
604
659
|
|
605
660
|
def truncate size
|
@@ -692,30 +747,30 @@ destroy things.
|
|
692
747
|
attr_accessor :children
|
693
748
|
attr_accessor :name
|
694
749
|
attr_reader :ole, :type, :create_time, :modify_time
|
695
|
-
def initialize ole, values=DEFAULT,
|
750
|
+
def initialize ole, values=DEFAULT, params={}
|
696
751
|
@ole = ole
|
697
|
-
values,
|
752
|
+
values, params = DEFAULT, values if Hash === values
|
698
753
|
values = values.unpack(PACK) if String === values
|
699
754
|
super(*values)
|
700
755
|
|
701
756
|
# extra parsing from the actual struct values
|
702
|
-
@name =
|
703
|
-
@type = if
|
704
|
-
unless TYPE_MAP.values.include?(
|
705
|
-
raise ArgumentError, "unknown type #{
|
757
|
+
@name = params[:name] || Types::Variant.load(Types::VT_LPWSTR, name_utf16[0...name_len].sub(/\x00\x00$/, ''))
|
758
|
+
@type = if params[:type]
|
759
|
+
unless TYPE_MAP.values.include?(params[:type])
|
760
|
+
raise ArgumentError, "unknown type #{params[:type].inspect}"
|
706
761
|
end
|
707
|
-
|
762
|
+
params[:type]
|
708
763
|
else
|
709
764
|
TYPE_MAP[type_id] or raise FormatError, "unknown type_id #{type_id.inspect}"
|
710
765
|
end
|
711
766
|
|
712
767
|
# further extra type specific stuff
|
713
768
|
if file?
|
714
|
-
default_time = @ole.
|
769
|
+
default_time = @ole.params[:update_timestamps] ? Time.now : nil
|
715
770
|
@create_time ||= default_time
|
716
771
|
@modify_time ||= default_time
|
717
|
-
@create_time = Types.
|
718
|
-
@modify_time = Types.
|
772
|
+
@create_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if create_time_str
|
773
|
+
@modify_time = Types::Variant.load(Types::VT_FILETIME, create_time_str) if modify_time_str
|
719
774
|
@children = nil
|
720
775
|
else
|
721
776
|
@create_time = nil
|
@@ -727,7 +782,7 @@ destroy things.
|
|
727
782
|
|
728
783
|
def open mode='r'
|
729
784
|
raise Errno::EISDIR unless file?
|
730
|
-
io = RangesIOMigrateable.new self
|
785
|
+
io = RangesIOMigrateable.new self, mode
|
731
786
|
# TODO work on the mode string stuff a bit more.
|
732
787
|
# maybe let the io object know about the mode, so it can refuse
|
733
788
|
# to work for read/write appropriately. maybe redefine all unusable
|
@@ -736,9 +791,12 @@ destroy things.
|
|
736
791
|
# i need to do 'a' etc.
|
737
792
|
case mode
|
738
793
|
when 'r', 'r+'
|
739
|
-
# as i don't enforce reading/writing, nothing changes here
|
794
|
+
# as i don't enforce reading/writing, nothing changes here. kind of
|
795
|
+
# need to enforce tt if i want modify times to work better.
|
796
|
+
@modify_time = Time.now if mode == 'r+'
|
740
797
|
when 'w'
|
741
|
-
|
798
|
+
@modify_time = Time.now
|
799
|
+
#io.truncate 0
|
742
800
|
else
|
743
801
|
raise NotImplementedError, "unsupported mode - #{mode.inspect}"
|
744
802
|
end
|
@@ -763,13 +821,14 @@ destroy things.
|
|
763
821
|
!file?
|
764
822
|
end
|
765
823
|
|
824
|
+
# maybe need some options regarding case sensitivity.
|
766
825
|
def / name
|
767
826
|
children.find { |child| name === child.name }
|
768
827
|
end
|
769
828
|
|
770
829
|
def [] idx
|
771
830
|
if String === idx
|
772
|
-
warn 'String form of Dirent#[] is deprecated'
|
831
|
+
#warn 'String form of Dirent#[] is deprecated'
|
773
832
|
self / idx
|
774
833
|
else
|
775
834
|
super
|
@@ -778,7 +837,7 @@ destroy things.
|
|
778
837
|
|
779
838
|
# move to ruby-msg. and remove from here
|
780
839
|
def time
|
781
|
-
warn 'Dirent#time is deprecated'
|
840
|
+
#warn 'Dirent#time is deprecated'
|
782
841
|
create_time || modify_time
|
783
842
|
end
|
784
843
|
|
@@ -813,7 +872,7 @@ destroy things.
|
|
813
872
|
end
|
814
873
|
|
815
874
|
def to_s
|
816
|
-
tmp = Types::
|
875
|
+
tmp = Types::Variant.dump(Types::VT_LPWSTR, name)
|
817
876
|
tmp = tmp[0, 62] if tmp.length > 62
|
818
877
|
tmp += 0.chr * 2
|
819
878
|
self.name_len = tmp.length
|
@@ -824,9 +883,12 @@ destroy things.
|
|
824
883
|
# note not dir?, so as not to override root's first_block
|
825
884
|
self.first_block = Dirent::EOT if type == :dir
|
826
885
|
if file?
|
827
|
-
|
828
|
-
|
829
|
-
|
886
|
+
# this is messed up. it changes the time stamps regardless of whether the file
|
887
|
+
# was actually touched. instead, any open call with a writeable mode, should update
|
888
|
+
# the modify time. create time would be set in new.
|
889
|
+
if @ole.params[:update_timestamps]
|
890
|
+
self.create_time_str = Types::Variant.dump Types::VT_FILETIME, @create_time
|
891
|
+
self.modify_time_str = Types::Variant.dump Types::VT_FILETIME, @modify_time
|
830
892
|
end
|
831
893
|
else
|
832
894
|
self.create_time_str = 0.chr * 8
|
@@ -850,16 +912,6 @@ destroy things.
|
|
850
912
|
str + '>'
|
851
913
|
end
|
852
914
|
|
853
|
-
# --------
|
854
|
-
# and for creation of a dirent. don't like the name. is it a file or a directory?
|
855
|
-
# assign to type later? io will be empty.
|
856
|
-
def new_child type
|
857
|
-
child = Dirent.new ole, :type => type
|
858
|
-
children << child
|
859
|
-
yield child if block_given?
|
860
|
-
child
|
861
|
-
end
|
862
|
-
|
863
915
|
def delete child
|
864
916
|
# remove from our child array, so that on reflatten and re-creation of @dirents, it will be gone
|
865
917
|
raise ArgumentError, "#{child.inspect} not a child of #{self.inspect}" unless @children.delete child
|
@@ -874,7 +926,9 @@ destroy things.
|
|
874
926
|
dst.name = src.name
|
875
927
|
if src.dir?
|
876
928
|
src.children.each do |src_child|
|
877
|
-
|
929
|
+
dst_child = Dirent.new dst.ole, :type => src_child.type
|
930
|
+
dst.children << dst_child
|
931
|
+
Dirent.copy src_child, dst_child
|
878
932
|
end
|
879
933
|
else
|
880
934
|
src.open do |src_io|
|