ruby-ole 1.2.6 → 1.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +12 -0
- data/README +27 -0
- data/Rakefile +2 -1
- data/bin/oletool +7 -1
- data/lib/ole/file_system.rb +2 -424
- data/lib/ole/storage.rb +3 -949
- data/lib/ole/storage/base.rb +948 -0
- data/lib/ole/storage/file_system.rb +444 -0
- data/lib/ole/storage/meta_data.rb +142 -0
- data/lib/ole/support.rb +23 -27
- data/lib/ole/types.rb +2 -243
- data/lib/ole/types/base.rb +247 -0
- data/lib/ole/types/property_set.rb +165 -0
- data/test/test_filesystem.rb +12 -8
- data/test/test_mbat.rb +2 -2
- data/test/test_meta_data.rb +45 -0
- data/test/test_property_set.rb +13 -13
- data/test/test_ranges_io.rb +10 -0
- data/test/test_storage.rb +91 -4
- data/test/test_types.rb +18 -5
- metadata +56 -42
- data/lib/ole/property_set.rb +0 -172
@@ -0,0 +1,444 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
|
5
|
+
#
|
6
|
+
# Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
|
7
|
+
# Zip::ZipFile to be used exchangably. It should be possible to write recursive copy using
|
8
|
+
# the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
|
9
|
+
# and zip files.
|
10
|
+
#
|
11
|
+
# = Usage
|
12
|
+
#
|
13
|
+
# Currently you can do something like the following:
|
14
|
+
#
|
15
|
+
# Ole::Storage.open 'test.doc' do |ole|
|
16
|
+
# ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
|
17
|
+
# ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# = Notes
|
21
|
+
#
|
22
|
+
# <tt>Ole::Storage</tt> files can have multiple files with the same name,
|
23
|
+
# or with / in the name, and other things that are probably invalid anyway.
|
24
|
+
# This API is unable to access those files, but of course the core, low-
|
25
|
+
# level API can.
|
26
|
+
#
|
27
|
+
# need to implement some more IO functions on RangesIO, like #puts, #print
|
28
|
+
# etc, like AbstractOutputStream from zipfile.
|
29
|
+
#
|
30
|
+
# = TODO
|
31
|
+
#
|
32
|
+
# - check Dir.mkdir, and File.open, and File.rename, to add in filename
|
33
|
+
# length checks (max 32 / 31 or something).
|
34
|
+
# do the automatic truncation, and add in any necessary warnings.
|
35
|
+
#
|
36
|
+
# - File.split('a/') == File.split('a') == ['.', 'a']
|
37
|
+
# the implication of this, is that things that try to force directory
|
38
|
+
# don't work. like, File.rename('a', 'b'), should work if a is a file
|
39
|
+
# or directory, but File.rename('a/', 'b') should only work if a is
|
40
|
+
# a directory. tricky, need to clean things up a bit more.
|
41
|
+
# i think a general path name => dirent method would work, with flags
|
42
|
+
# about what should raise an error.
|
43
|
+
#
|
44
|
+
# - Need to look at streamlining things after getting all the tests passing,
|
45
|
+
# as this file's getting pretty long - almost half the real implementation.
|
46
|
+
# and is probably more inefficient than necessary.
|
47
|
+
# too many exceptions in the expected path of certain functions.
|
48
|
+
#
|
49
|
+
# - should look at profiles before and after switching ruby-msg to use
|
50
|
+
# the filesystem api.
|
51
|
+
#
|
52
|
+
|
53
|
+
require 'ole/storage'
|
54
|
+
|
55
|
+
module Ole # :nodoc:
|
56
|
+
class Storage
|
57
|
+
def file
|
58
|
+
@file ||= FileClass.new self
|
59
|
+
end
|
60
|
+
|
61
|
+
def dir
|
62
|
+
@dir ||= DirClass.new self
|
63
|
+
end
|
64
|
+
|
65
|
+
# tries to get a dirent for path. return nil if it doesn't exist
|
66
|
+
# (change it)
|
67
|
+
def dirent_from_path path
|
68
|
+
dirent = @root
|
69
|
+
path = file.expand_path path
|
70
|
+
path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/)
|
71
|
+
until path.empty?
|
72
|
+
return nil if dirent.file?
|
73
|
+
return nil unless dirent = dirent/path.shift
|
74
|
+
end
|
75
|
+
dirent
|
76
|
+
end
|
77
|
+
|
78
|
+
class FileClass
|
79
|
+
class Stat
|
80
|
+
attr_reader :ftype, :size, :blocks, :blksize
|
81
|
+
attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino
|
82
|
+
def initialize dirent
|
83
|
+
@dirent = dirent
|
84
|
+
@size = dirent.size
|
85
|
+
if file?
|
86
|
+
@ftype = 'file'
|
87
|
+
bat = dirent.ole.bat_for_size(dirent.size)
|
88
|
+
@blocks = bat.chain(dirent.first_block).length
|
89
|
+
@blksize = bat.block_size
|
90
|
+
else
|
91
|
+
@ftype = 'directory'
|
92
|
+
@blocks = 0
|
93
|
+
@blksize = 0
|
94
|
+
end
|
95
|
+
# a lot of these are bogus. ole file format has no analogs
|
96
|
+
@nlink = 1
|
97
|
+
@uid, @gid = 0, 0
|
98
|
+
@dev, @rdev = 0, 0
|
99
|
+
@ino = 0
|
100
|
+
# need to add times - atime, mtime, ctime.
|
101
|
+
end
|
102
|
+
|
103
|
+
alias rdev_major :rdev
|
104
|
+
alias rdev_minor :rdev
|
105
|
+
|
106
|
+
def file?
|
107
|
+
@dirent.file?
|
108
|
+
end
|
109
|
+
|
110
|
+
def directory?
|
111
|
+
@dirent.dir?
|
112
|
+
end
|
113
|
+
|
114
|
+
def size?
|
115
|
+
size if file?
|
116
|
+
end
|
117
|
+
|
118
|
+
def inspect
|
119
|
+
pairs = (instance_variables - ['@dirent']).map do |n|
|
120
|
+
"#{n[1..-1]}=#{instance_variable_get n}"
|
121
|
+
end
|
122
|
+
"#<#{self.class} #{pairs * ', '}>"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def initialize ole
|
127
|
+
@ole = ole
|
128
|
+
end
|
129
|
+
|
130
|
+
def expand_path path
|
131
|
+
# get the raw stored pwd value (its blank for root)
|
132
|
+
pwd = @ole.dir.instance_variable_get :@pwd
|
133
|
+
# its only absolute if it starts with a '/'
|
134
|
+
path = "#{pwd}/#{path}" unless path =~ /^\//
|
135
|
+
# at this point its already absolute. we use File.expand_path
|
136
|
+
# just for the .. and . handling
|
137
|
+
# No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way?
|
138
|
+
File.expand_path(path)[File::ALT_SEPARATOR == "\\" ? (2..-1) : (0..-1)]
|
139
|
+
end
|
140
|
+
|
141
|
+
# +orig_path+ is just so that we can use the requested path
|
142
|
+
# in the error messages even if it has been already modified
|
143
|
+
def dirent_from_path path, orig_path=nil
|
144
|
+
orig_path ||= path
|
145
|
+
dirent = @ole.dirent_from_path path
|
146
|
+
raise Errno::ENOENT, orig_path unless dirent
|
147
|
+
raise Errno::EISDIR, orig_path if dirent.dir?
|
148
|
+
dirent
|
149
|
+
end
|
150
|
+
private :dirent_from_path
|
151
|
+
|
152
|
+
def exists? path
|
153
|
+
!!@ole.dirent_from_path(path)
|
154
|
+
end
|
155
|
+
alias exist? :exists?
|
156
|
+
|
157
|
+
def file? path
|
158
|
+
dirent = @ole.dirent_from_path path
|
159
|
+
dirent and dirent.file?
|
160
|
+
end
|
161
|
+
|
162
|
+
def directory? path
|
163
|
+
dirent = @ole.dirent_from_path path
|
164
|
+
dirent and dirent.dir?
|
165
|
+
end
|
166
|
+
|
167
|
+
def open path, mode='r', &block
|
168
|
+
if IO::Mode.new(mode).create?
|
169
|
+
begin
|
170
|
+
dirent = dirent_from_path path
|
171
|
+
rescue Errno::ENOENT
|
172
|
+
# maybe instead of repeating this everywhere, i should have
|
173
|
+
# a get_parent_dirent function.
|
174
|
+
parent_path, basename = File.split expand_path(path)
|
175
|
+
parent = @ole.dir.send :dirent_from_path, parent_path, path
|
176
|
+
parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
|
177
|
+
end
|
178
|
+
else
|
179
|
+
dirent = dirent_from_path path
|
180
|
+
end
|
181
|
+
dirent.open mode, &block
|
182
|
+
end
|
183
|
+
|
184
|
+
# explicit wrapper instead of alias to inhibit block
|
185
|
+
def new path, mode='r'
|
186
|
+
open path, mode
|
187
|
+
end
|
188
|
+
|
189
|
+
def size path
|
190
|
+
dirent_from_path(path).size
|
191
|
+
rescue Errno::EISDIR
|
192
|
+
# kind of arbitrary. I'm getting 4096 from ::File, but
|
193
|
+
# the zip tests want 0.
|
194
|
+
0
|
195
|
+
end
|
196
|
+
|
197
|
+
def size? path
|
198
|
+
dirent_from_path(path).size
|
199
|
+
# any other exceptions i need to rescue?
|
200
|
+
rescue Errno::ENOENT, Errno::EISDIR
|
201
|
+
nil
|
202
|
+
end
|
203
|
+
|
204
|
+
def stat path
|
205
|
+
# we do this to allow dirs.
|
206
|
+
dirent = @ole.dirent_from_path path
|
207
|
+
raise Errno::ENOENT, path unless dirent
|
208
|
+
Stat.new dirent
|
209
|
+
end
|
210
|
+
|
211
|
+
def read path
|
212
|
+
open path, &:read
|
213
|
+
end
|
214
|
+
|
215
|
+
# most of the work this function does is moving the dirent between
|
216
|
+
# 2 parents. the actual name changing is quite simple.
|
217
|
+
# File.rename can move a file into another folder, which is why i've
|
218
|
+
# done it too, though i think its not always possible...
|
219
|
+
#
|
220
|
+
# FIXME File.rename can be used for directories too....
|
221
|
+
def rename from_path, to_path
|
222
|
+
# check what we want to rename from exists. do it this
|
223
|
+
# way to allow directories.
|
224
|
+
dirent = @ole.dirent_from_path from_path
|
225
|
+
raise Errno::ENOENT, from_path unless dirent
|
226
|
+
# delete what we want to rename to if necessary
|
227
|
+
begin
|
228
|
+
unlink to_path
|
229
|
+
rescue Errno::ENOENT
|
230
|
+
# we actually get here, but rcov doesn't think so. add 1 + 1 to
|
231
|
+
# keep rcov happy for now... :)
|
232
|
+
1 + 1
|
233
|
+
end
|
234
|
+
# reparent the dirent
|
235
|
+
from_parent_path, from_basename = File.split expand_path(from_path)
|
236
|
+
to_parent_path, to_basename = File.split expand_path(to_path)
|
237
|
+
from_parent = @ole.dir.send :dirent_from_path, from_parent_path, from_path
|
238
|
+
to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path
|
239
|
+
from_parent.children.delete dirent
|
240
|
+
# and also change its name
|
241
|
+
dirent.name = to_basename
|
242
|
+
to_parent.children << dirent
|
243
|
+
0
|
244
|
+
end
|
245
|
+
|
246
|
+
# crappy copy from Dir.
|
247
|
+
def unlink(*paths)
|
248
|
+
paths.each do |path|
|
249
|
+
dirent = @ole.dirent_from_path path
|
250
|
+
# i think we should free all of our blocks from the
|
251
|
+
# allocation table.
|
252
|
+
# i think if you run repack, all free blocks should get zeroed,
|
253
|
+
# but currently the original data is there unmodified.
|
254
|
+
open(path) { |f| f.truncate 0 }
|
255
|
+
# remove ourself from our parent, so we won't be part of the dir
|
256
|
+
# tree at save time.
|
257
|
+
parent_path, basename = File.split expand_path(path)
|
258
|
+
parent = @ole.dir.send :dirent_from_path, parent_path, path
|
259
|
+
parent.children.delete dirent
|
260
|
+
end
|
261
|
+
paths.length # hmmm. as per ::File ?
|
262
|
+
end
|
263
|
+
alias delete :unlink
|
264
|
+
end
|
265
|
+
|
266
|
+
#
|
267
|
+
# an *instance* of this class is supposed to provide similar methods
|
268
|
+
# to the class methods of Dir itself.
|
269
|
+
#
|
270
|
+
# pretty complete. like zip/zipfilesystem's implementation, i provide
|
271
|
+
# everything except chroot and glob. glob could be done with a glob
|
272
|
+
# to regex regex, and then simply match in the entries array... although
|
273
|
+
# recursive glob complicates that somewhat.
|
274
|
+
#
|
275
|
+
# Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list.
|
276
|
+
class DirClass
|
277
|
+
def initialize ole
|
278
|
+
@ole = ole
|
279
|
+
@pwd = ''
|
280
|
+
end
|
281
|
+
|
282
|
+
# +orig_path+ is just so that we can use the requested path
|
283
|
+
# in the error messages even if it has been already modified
|
284
|
+
def dirent_from_path path, orig_path=nil
|
285
|
+
orig_path ||= path
|
286
|
+
dirent = @ole.dirent_from_path path
|
287
|
+
raise Errno::ENOENT, orig_path unless dirent
|
288
|
+
raise Errno::ENOTDIR, orig_path unless dirent.dir?
|
289
|
+
dirent
|
290
|
+
end
|
291
|
+
private :dirent_from_path
|
292
|
+
|
293
|
+
def open path
|
294
|
+
dir = Dir.new path, entries(path)
|
295
|
+
if block_given?
|
296
|
+
yield dir
|
297
|
+
else
|
298
|
+
dir
|
299
|
+
end
|
300
|
+
end
|
301
|
+
|
302
|
+
# as for file, explicit alias to inhibit block
|
303
|
+
def new path
|
304
|
+
open path
|
305
|
+
end
|
306
|
+
|
307
|
+
# pwd is always stored without the trailing slash. we handle
|
308
|
+
# the root case here
|
309
|
+
def pwd
|
310
|
+
if @pwd.empty?
|
311
|
+
'/'
|
312
|
+
else
|
313
|
+
@pwd
|
314
|
+
end
|
315
|
+
end
|
316
|
+
alias getwd :pwd
|
317
|
+
|
318
|
+
def chdir orig_path
|
319
|
+
# make path absolute, squeeze slashes, and remove trailing slash
|
320
|
+
path = @ole.file.expand_path(orig_path).gsub(/\/+/, '/').sub(/\/$/, '')
|
321
|
+
# this is just for the side effects of the exceptions if invalid
|
322
|
+
dirent_from_path path, orig_path
|
323
|
+
if block_given?
|
324
|
+
old_pwd = @pwd
|
325
|
+
begin
|
326
|
+
@pwd = path
|
327
|
+
yield
|
328
|
+
ensure
|
329
|
+
@pwd = old_pwd
|
330
|
+
end
|
331
|
+
else
|
332
|
+
@pwd = path
|
333
|
+
0
|
334
|
+
end
|
335
|
+
end
|
336
|
+
|
337
|
+
def entries path
|
338
|
+
dirent = dirent_from_path path
|
339
|
+
# Not sure about adding on the dots...
|
340
|
+
entries = %w[. ..] + dirent.children.map(&:name)
|
341
|
+
# do some checks about un-reachable files
|
342
|
+
seen = {}
|
343
|
+
entries.each do |n|
|
344
|
+
Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/']
|
345
|
+
Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n]
|
346
|
+
seen[n] = true
|
347
|
+
end
|
348
|
+
entries
|
349
|
+
end
|
350
|
+
|
351
|
+
def foreach path, &block
|
352
|
+
entries(path).each(&block)
|
353
|
+
end
|
354
|
+
|
355
|
+
# there are some other important ones, like:
|
356
|
+
# chroot (!), glob etc etc. for now, i think
|
357
|
+
def mkdir path
|
358
|
+
# as for rmdir below:
|
359
|
+
parent_path, basename = File.split @ole.file.expand_path(path)
|
360
|
+
# note that we will complain about the full path despite accessing
|
361
|
+
# the parent path. this is consistent with ::Dir
|
362
|
+
parent = dirent_from_path parent_path, path
|
363
|
+
# now, we first should ensure that it doesn't already exist
|
364
|
+
# either as a file or a directory.
|
365
|
+
raise Errno::EEXIST, path if parent/basename
|
366
|
+
parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
|
367
|
+
0
|
368
|
+
end
|
369
|
+
|
370
|
+
def rmdir path
|
371
|
+
dirent = dirent_from_path path
|
372
|
+
raise Errno::ENOTEMPTY, path unless dirent.children.empty?
|
373
|
+
|
374
|
+
# now delete it, how to do that? the canonical representation that is
|
375
|
+
# maintained is the root tree, and the children array. we must remove it
|
376
|
+
# from the children array.
|
377
|
+
# we need the parent then. this sucks but anyway:
|
378
|
+
# we need to split the path. but before we can do that, we need
|
379
|
+
# to expand it first. eg. say we need the parent to unlink
|
380
|
+
# a/b/../c. the parent should be a, not a/b/.., or a/b.
|
381
|
+
parent_path, basename = File.split @ole.file.expand_path(path)
|
382
|
+
# this shouldn't be able to fail if the above didn't
|
383
|
+
parent = dirent_from_path parent_path
|
384
|
+
# note that the way this currently works, on save and repack time this will get
|
385
|
+
# reflected. to work properly, ie to make a difference now it would have to re-write
|
386
|
+
# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
|
387
|
+
# #repack.
|
388
|
+
parent.children.delete dirent
|
389
|
+
0 # hmmm. as per ::Dir ?
|
390
|
+
end
|
391
|
+
alias delete :rmdir
|
392
|
+
alias unlink :rmdir
|
393
|
+
|
394
|
+
# note that there is nothing remotely ole specific about
|
395
|
+
# this class. it simply provides the dir like sequential access
|
396
|
+
# methods on top of an array.
|
397
|
+
# hmm, doesn't throw the IOError's on use of a closed directory...
|
398
|
+
class Dir
|
399
|
+
include Enumerable
|
400
|
+
|
401
|
+
attr_reader :path
|
402
|
+
def initialize path, entries
|
403
|
+
@path, @entries, @pos = path, entries, 0
|
404
|
+
@closed = false
|
405
|
+
end
|
406
|
+
|
407
|
+
def pos
|
408
|
+
raise IOError if @closed
|
409
|
+
@pos
|
410
|
+
end
|
411
|
+
|
412
|
+
def each(&block)
|
413
|
+
raise IOError if @closed
|
414
|
+
@entries.each(&block)
|
415
|
+
end
|
416
|
+
|
417
|
+
def close
|
418
|
+
@closed = true
|
419
|
+
end
|
420
|
+
|
421
|
+
def read
|
422
|
+
raise IOError if @closed
|
423
|
+
@entries[pos]
|
424
|
+
ensure
|
425
|
+
@pos += 1 if pos < @entries.length
|
426
|
+
end
|
427
|
+
|
428
|
+
def pos= pos
|
429
|
+
raise IOError if @closed
|
430
|
+
@pos = [[0, pos].max, @entries.length].min
|
431
|
+
end
|
432
|
+
|
433
|
+
def rewind
|
434
|
+
raise IOError if @closed
|
435
|
+
@pos = 0
|
436
|
+
end
|
437
|
+
|
438
|
+
alias tell :pos
|
439
|
+
alias seek :pos=
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
|
@@ -0,0 +1,142 @@
|
|
1
|
+
require 'ole/types/property_set'
|
2
|
+
|
3
|
+
module Ole
|
4
|
+
class Storage
|
5
|
+
#
|
6
|
+
# The MetaData class is designed to be high level interface to all the
|
7
|
+
# underlying meta data stored within different sections, themselves within
|
8
|
+
# different property set streams.
|
9
|
+
#
|
10
|
+
# With this class, you can simply get properties using their names, without
|
11
|
+
# needing to know about the underlying guids, property ids etc.
|
12
|
+
#
|
13
|
+
# Example:
|
14
|
+
#
|
15
|
+
# Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
|
16
|
+
#
|
17
|
+
# TODO:
|
18
|
+
#
|
19
|
+
# * add write support
|
20
|
+
# * fix some of the missing type coercion (eg FileTime)
|
21
|
+
# * maybe add back the ability to access individual property sets as a unit
|
22
|
+
# directly. ie <tt>ole.summary_information</tt>. Is this useful?
|
23
|
+
# * full key support, for unknown keys, like
|
24
|
+
# <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined
|
25
|
+
# properties too.
|
26
|
+
#
|
27
|
+
class MetaData
|
28
|
+
include Enumerable
|
29
|
+
|
30
|
+
FILE_MAP = {
|
31
|
+
Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation",
|
32
|
+
Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
|
33
|
+
}
|
34
|
+
|
35
|
+
FORMAT_MAP = {
|
36
|
+
'MSWordDoc' => :doc
|
37
|
+
}
|
38
|
+
|
39
|
+
CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
|
40
|
+
CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
|
41
|
+
CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
|
42
|
+
CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
|
43
|
+
|
44
|
+
CLSID_MAP = {
|
45
|
+
CLSID_EXCEL97 => :xls,
|
46
|
+
CLSID_EXCEL95 => :xls,
|
47
|
+
CLSID_WORD97 => :doc,
|
48
|
+
CLSID_WORD95 => :doc
|
49
|
+
}
|
50
|
+
|
51
|
+
MIME_TYPES = {
|
52
|
+
:xls => 'application/vnd.ms-excel',
|
53
|
+
:doc => 'application/msword',
|
54
|
+
:ppt => 'application/vnd.ms-powerpoint',
|
55
|
+
:msg => 'application/vnd.ms-outlook' # not registered at IANA, but sdeems most common usage
|
56
|
+
}
|
57
|
+
|
58
|
+
def initialize ole
|
59
|
+
@ole = ole
|
60
|
+
end
|
61
|
+
|
62
|
+
# i'm thinking of making file_format and mime_type available through
|
63
|
+
# #[], #each, and #to_h also, as calculated meta data (not assignable)
|
64
|
+
|
65
|
+
def comp_obj
|
66
|
+
return {} unless dirent = @ole.root["\001CompObj"]
|
67
|
+
data = dirent.read
|
68
|
+
# see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
|
69
|
+
# compobj_version: 0x0001
|
70
|
+
# byte_order: 0xffe
|
71
|
+
# windows_version: 0x00000a03 (win31 apparently)
|
72
|
+
# marker: 0xffffffff
|
73
|
+
compobj_version, byte_order, windows_version, marker, clsid =
|
74
|
+
data.unpack("vvVVa#{Types::Clsid::SIZE}")
|
75
|
+
strings = []
|
76
|
+
i = 28
|
77
|
+
while i < data.length
|
78
|
+
len = data[i, 4].unpack('V').first
|
79
|
+
i += 4
|
80
|
+
strings << data[i, len - 1]
|
81
|
+
i += len
|
82
|
+
end
|
83
|
+
# in the unknown chunk, you usually see something like 'Word.Document.6'
|
84
|
+
{:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
|
85
|
+
end
|
86
|
+
private :comp_obj
|
87
|
+
|
88
|
+
def file_format
|
89
|
+
comp_obj[:file_format]
|
90
|
+
end
|
91
|
+
|
92
|
+
def mime_type
|
93
|
+
# based on the CompObj stream contents
|
94
|
+
type = FORMAT_MAP[file_format]
|
95
|
+
return MIME_TYPES[type] if type
|
96
|
+
|
97
|
+
# based on the root clsid
|
98
|
+
type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
|
99
|
+
return MIME_TYPES[type] if type
|
100
|
+
|
101
|
+
# fallback to heuristics
|
102
|
+
has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
|
103
|
+
return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
|
104
|
+
return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
|
105
|
+
return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
|
106
|
+
end
|
107
|
+
|
108
|
+
def [] key
|
109
|
+
pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
|
110
|
+
file = FILE_MAP[pair.first] or return nil
|
111
|
+
dirent = @ole.root[file] or return nil
|
112
|
+
dirent.open { |io| return Types::PropertySet.new(io)[key] }
|
113
|
+
end
|
114
|
+
|
115
|
+
def []= key, value
|
116
|
+
raise NotImplementedError, 'meta data writes not implemented'
|
117
|
+
end
|
118
|
+
|
119
|
+
def each(&block)
|
120
|
+
FILE_MAP.values.each do |file|
|
121
|
+
dirent = @ole.root[file] or next
|
122
|
+
dirent.open { |io| Types::PropertySet.new(io).each(&block) }
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
def to_h
|
127
|
+
inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
|
128
|
+
end
|
129
|
+
|
130
|
+
def method_missing name, *args, &block
|
131
|
+
return super unless args.empty?
|
132
|
+
pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super
|
133
|
+
self[name]
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
def meta_data
|
138
|
+
@meta_data ||= MetaData.new(self)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|