ruby-ole 1.2.6 → 1.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,444 @@
1
+ #
2
+ # = Introduction
3
+ #
4
+ # This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
5
+ #
6
+ # Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
7
+ # Zip::ZipFile to be used exchangably. It should be possible to write recursive copy using
8
+ # the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
9
+ # and zip files.
10
+ #
11
+ # = Usage
12
+ #
13
+ # Currently you can do something like the following:
14
+ #
15
+ # Ole::Storage.open 'test.doc' do |ole|
16
+ # ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
17
+ # ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
18
+ # end
19
+ #
20
+ # = Notes
21
+ #
22
+ # <tt>Ole::Storage</tt> files can have multiple files with the same name,
23
+ # or with / in the name, and other things that are probably invalid anyway.
24
+ # This API is unable to access those files, but of course the core, low-
25
+ # level API can.
26
+ #
27
+ # need to implement some more IO functions on RangesIO, like #puts, #print
28
+ # etc, like AbstractOutputStream from zipfile.
29
+ #
30
+ # = TODO
31
+ #
32
+ # - check Dir.mkdir, and File.open, and File.rename, to add in filename
33
+ # length checks (max 32 / 31 or something).
34
+ # do the automatic truncation, and add in any necessary warnings.
35
+ #
36
+ # - File.split('a/') == File.split('a') == ['.', 'a']
37
+ # the implication of this, is that things that try to force directory
38
+ # don't work. like, File.rename('a', 'b'), should work if a is a file
39
+ # or directory, but File.rename('a/', 'b') should only work if a is
40
+ # a directory. tricky, need to clean things up a bit more.
41
+ # i think a general path name => dirent method would work, with flags
42
+ # about what should raise an error.
43
+ #
44
+ # - Need to look at streamlining things after getting all the tests passing,
45
+ # as this file's getting pretty long - almost half the real implementation.
46
+ # and is probably more inefficient than necessary.
47
+ # too many exceptions in the expected path of certain functions.
48
+ #
49
+ # - should look at profiles before and after switching ruby-msg to use
50
+ # the filesystem api.
51
+ #
52
+
53
+ require 'ole/storage'
54
+
55
+ module Ole # :nodoc:
56
+ class Storage
57
+ def file
58
+ @file ||= FileClass.new self
59
+ end
60
+
61
+ def dir
62
+ @dir ||= DirClass.new self
63
+ end
64
+
65
+ # tries to get a dirent for path. return nil if it doesn't exist
66
+ # (change it)
67
+ def dirent_from_path path
68
+ dirent = @root
69
+ path = file.expand_path path
70
+ path = path.sub(/^\/*/, '').sub(/\/*$/, '').split(/\/+/)
71
+ until path.empty?
72
+ return nil if dirent.file?
73
+ return nil unless dirent = dirent/path.shift
74
+ end
75
+ dirent
76
+ end
77
+
78
+ class FileClass
79
+ class Stat
80
+ attr_reader :ftype, :size, :blocks, :blksize
81
+ attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino
82
+ def initialize dirent
83
+ @dirent = dirent
84
+ @size = dirent.size
85
+ if file?
86
+ @ftype = 'file'
87
+ bat = dirent.ole.bat_for_size(dirent.size)
88
+ @blocks = bat.chain(dirent.first_block).length
89
+ @blksize = bat.block_size
90
+ else
91
+ @ftype = 'directory'
92
+ @blocks = 0
93
+ @blksize = 0
94
+ end
95
+ # a lot of these are bogus. ole file format has no analogs
96
+ @nlink = 1
97
+ @uid, @gid = 0, 0
98
+ @dev, @rdev = 0, 0
99
+ @ino = 0
100
+ # need to add times - atime, mtime, ctime.
101
+ end
102
+
103
+ alias rdev_major :rdev
104
+ alias rdev_minor :rdev
105
+
106
+ def file?
107
+ @dirent.file?
108
+ end
109
+
110
+ def directory?
111
+ @dirent.dir?
112
+ end
113
+
114
+ def size?
115
+ size if file?
116
+ end
117
+
118
+ def inspect
119
+ pairs = (instance_variables - ['@dirent']).map do |n|
120
+ "#{n[1..-1]}=#{instance_variable_get n}"
121
+ end
122
+ "#<#{self.class} #{pairs * ', '}>"
123
+ end
124
+ end
125
+
126
+ def initialize ole
127
+ @ole = ole
128
+ end
129
+
130
+ def expand_path path
131
+ # get the raw stored pwd value (its blank for root)
132
+ pwd = @ole.dir.instance_variable_get :@pwd
133
+ # its only absolute if it starts with a '/'
134
+ path = "#{pwd}/#{path}" unless path =~ /^\//
135
+ # at this point its already absolute. we use File.expand_path
136
+ # just for the .. and . handling
137
+ # No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way?
138
+ File.expand_path(path)[File::ALT_SEPARATOR == "\\" ? (2..-1) : (0..-1)]
139
+ end
140
+
141
+ # +orig_path+ is just so that we can use the requested path
142
+ # in the error messages even if it has been already modified
143
+ def dirent_from_path path, orig_path=nil
144
+ orig_path ||= path
145
+ dirent = @ole.dirent_from_path path
146
+ raise Errno::ENOENT, orig_path unless dirent
147
+ raise Errno::EISDIR, orig_path if dirent.dir?
148
+ dirent
149
+ end
150
+ private :dirent_from_path
151
+
152
+ def exists? path
153
+ !!@ole.dirent_from_path(path)
154
+ end
155
+ alias exist? :exists?
156
+
157
+ def file? path
158
+ dirent = @ole.dirent_from_path path
159
+ dirent and dirent.file?
160
+ end
161
+
162
+ def directory? path
163
+ dirent = @ole.dirent_from_path path
164
+ dirent and dirent.dir?
165
+ end
166
+
167
+ def open path, mode='r', &block
168
+ if IO::Mode.new(mode).create?
169
+ begin
170
+ dirent = dirent_from_path path
171
+ rescue Errno::ENOENT
172
+ # maybe instead of repeating this everywhere, i should have
173
+ # a get_parent_dirent function.
174
+ parent_path, basename = File.split expand_path(path)
175
+ parent = @ole.dir.send :dirent_from_path, parent_path, path
176
+ parent.children << dirent = Dirent.new(@ole, :type => :file, :name => basename)
177
+ end
178
+ else
179
+ dirent = dirent_from_path path
180
+ end
181
+ dirent.open mode, &block
182
+ end
183
+
184
+ # explicit wrapper instead of alias to inhibit block
185
+ def new path, mode='r'
186
+ open path, mode
187
+ end
188
+
189
+ def size path
190
+ dirent_from_path(path).size
191
+ rescue Errno::EISDIR
192
+ # kind of arbitrary. I'm getting 4096 from ::File, but
193
+ # the zip tests want 0.
194
+ 0
195
+ end
196
+
197
+ def size? path
198
+ dirent_from_path(path).size
199
+ # any other exceptions i need to rescue?
200
+ rescue Errno::ENOENT, Errno::EISDIR
201
+ nil
202
+ end
203
+
204
+ def stat path
205
+ # we do this to allow dirs.
206
+ dirent = @ole.dirent_from_path path
207
+ raise Errno::ENOENT, path unless dirent
208
+ Stat.new dirent
209
+ end
210
+
211
+ def read path
212
+ open path, &:read
213
+ end
214
+
215
+ # most of the work this function does is moving the dirent between
216
+ # 2 parents. the actual name changing is quite simple.
217
+ # File.rename can move a file into another folder, which is why i've
218
+ # done it too, though i think its not always possible...
219
+ #
220
+ # FIXME File.rename can be used for directories too....
221
+ def rename from_path, to_path
222
+ # check what we want to rename from exists. do it this
223
+ # way to allow directories.
224
+ dirent = @ole.dirent_from_path from_path
225
+ raise Errno::ENOENT, from_path unless dirent
226
+ # delete what we want to rename to if necessary
227
+ begin
228
+ unlink to_path
229
+ rescue Errno::ENOENT
230
+ # we actually get here, but rcov doesn't think so. add 1 + 1 to
231
+ # keep rcov happy for now... :)
232
+ 1 + 1
233
+ end
234
+ # reparent the dirent
235
+ from_parent_path, from_basename = File.split expand_path(from_path)
236
+ to_parent_path, to_basename = File.split expand_path(to_path)
237
+ from_parent = @ole.dir.send :dirent_from_path, from_parent_path, from_path
238
+ to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path
239
+ from_parent.children.delete dirent
240
+ # and also change its name
241
+ dirent.name = to_basename
242
+ to_parent.children << dirent
243
+ 0
244
+ end
245
+
246
+ # crappy copy from Dir.
247
+ def unlink(*paths)
248
+ paths.each do |path|
249
+ dirent = @ole.dirent_from_path path
250
+ # i think we should free all of our blocks from the
251
+ # allocation table.
252
+ # i think if you run repack, all free blocks should get zeroed,
253
+ # but currently the original data is there unmodified.
254
+ open(path) { |f| f.truncate 0 }
255
+ # remove ourself from our parent, so we won't be part of the dir
256
+ # tree at save time.
257
+ parent_path, basename = File.split expand_path(path)
258
+ parent = @ole.dir.send :dirent_from_path, parent_path, path
259
+ parent.children.delete dirent
260
+ end
261
+ paths.length # hmmm. as per ::File ?
262
+ end
263
+ alias delete :unlink
264
+ end
265
+
266
+ #
267
+ # an *instance* of this class is supposed to provide similar methods
268
+ # to the class methods of Dir itself.
269
+ #
270
+ # pretty complete. like zip/zipfilesystem's implementation, i provide
271
+ # everything except chroot and glob. glob could be done with a glob
272
+ # to regex regex, and then simply match in the entries array... although
273
+ # recursive glob complicates that somewhat.
274
+ #
275
+ # Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list.
276
+ class DirClass
277
+ def initialize ole
278
+ @ole = ole
279
+ @pwd = ''
280
+ end
281
+
282
+ # +orig_path+ is just so that we can use the requested path
283
+ # in the error messages even if it has been already modified
284
+ def dirent_from_path path, orig_path=nil
285
+ orig_path ||= path
286
+ dirent = @ole.dirent_from_path path
287
+ raise Errno::ENOENT, orig_path unless dirent
288
+ raise Errno::ENOTDIR, orig_path unless dirent.dir?
289
+ dirent
290
+ end
291
+ private :dirent_from_path
292
+
293
+ def open path
294
+ dir = Dir.new path, entries(path)
295
+ if block_given?
296
+ yield dir
297
+ else
298
+ dir
299
+ end
300
+ end
301
+
302
+ # as for file, explicit alias to inhibit block
303
+ def new path
304
+ open path
305
+ end
306
+
307
+ # pwd is always stored without the trailing slash. we handle
308
+ # the root case here
309
+ def pwd
310
+ if @pwd.empty?
311
+ '/'
312
+ else
313
+ @pwd
314
+ end
315
+ end
316
+ alias getwd :pwd
317
+
318
+ def chdir orig_path
319
+ # make path absolute, squeeze slashes, and remove trailing slash
320
+ path = @ole.file.expand_path(orig_path).gsub(/\/+/, '/').sub(/\/$/, '')
321
+ # this is just for the side effects of the exceptions if invalid
322
+ dirent_from_path path, orig_path
323
+ if block_given?
324
+ old_pwd = @pwd
325
+ begin
326
+ @pwd = path
327
+ yield
328
+ ensure
329
+ @pwd = old_pwd
330
+ end
331
+ else
332
+ @pwd = path
333
+ 0
334
+ end
335
+ end
336
+
337
+ def entries path
338
+ dirent = dirent_from_path path
339
+ # Not sure about adding on the dots...
340
+ entries = %w[. ..] + dirent.children.map(&:name)
341
+ # do some checks about un-reachable files
342
+ seen = {}
343
+ entries.each do |n|
344
+ Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/']
345
+ Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n]
346
+ seen[n] = true
347
+ end
348
+ entries
349
+ end
350
+
351
+ def foreach path, &block
352
+ entries(path).each(&block)
353
+ end
354
+
355
+ # there are some other important ones, like:
356
+ # chroot (!), glob etc etc. for now, i think
357
+ def mkdir path
358
+ # as for rmdir below:
359
+ parent_path, basename = File.split @ole.file.expand_path(path)
360
+ # note that we will complain about the full path despite accessing
361
+ # the parent path. this is consistent with ::Dir
362
+ parent = dirent_from_path parent_path, path
363
+ # now, we first should ensure that it doesn't already exist
364
+ # either as a file or a directory.
365
+ raise Errno::EEXIST, path if parent/basename
366
+ parent.children << Dirent.new(@ole, :type => :dir, :name => basename)
367
+ 0
368
+ end
369
+
370
+ def rmdir path
371
+ dirent = dirent_from_path path
372
+ raise Errno::ENOTEMPTY, path unless dirent.children.empty?
373
+
374
+ # now delete it, how to do that? the canonical representation that is
375
+ # maintained is the root tree, and the children array. we must remove it
376
+ # from the children array.
377
+ # we need the parent then. this sucks but anyway:
378
+ # we need to split the path. but before we can do that, we need
379
+ # to expand it first. eg. say we need the parent to unlink
380
+ # a/b/../c. the parent should be a, not a/b/.., or a/b.
381
+ parent_path, basename = File.split @ole.file.expand_path(path)
382
+ # this shouldn't be able to fail if the above didn't
383
+ parent = dirent_from_path parent_path
384
+ # note that the way this currently works, on save and repack time this will get
385
+ # reflected. to work properly, ie to make a difference now it would have to re-write
386
+ # the dirent. i think that Ole::Storage#close will handle that. and maybe include a
387
+ # #repack.
388
+ parent.children.delete dirent
389
+ 0 # hmmm. as per ::Dir ?
390
+ end
391
+ alias delete :rmdir
392
+ alias unlink :rmdir
393
+
394
+ # note that there is nothing remotely ole specific about
395
+ # this class. it simply provides the dir like sequential access
396
+ # methods on top of an array.
397
+ # hmm, doesn't throw the IOError's on use of a closed directory...
398
+ class Dir
399
+ include Enumerable
400
+
401
+ attr_reader :path
402
+ def initialize path, entries
403
+ @path, @entries, @pos = path, entries, 0
404
+ @closed = false
405
+ end
406
+
407
+ def pos
408
+ raise IOError if @closed
409
+ @pos
410
+ end
411
+
412
+ def each(&block)
413
+ raise IOError if @closed
414
+ @entries.each(&block)
415
+ end
416
+
417
+ def close
418
+ @closed = true
419
+ end
420
+
421
+ def read
422
+ raise IOError if @closed
423
+ @entries[pos]
424
+ ensure
425
+ @pos += 1 if pos < @entries.length
426
+ end
427
+
428
+ def pos= pos
429
+ raise IOError if @closed
430
+ @pos = [[0, pos].max, @entries.length].min
431
+ end
432
+
433
+ def rewind
434
+ raise IOError if @closed
435
+ @pos = 0
436
+ end
437
+
438
+ alias tell :pos
439
+ alias seek :pos=
440
+ end
441
+ end
442
+ end
443
+ end
444
+
@@ -0,0 +1,142 @@
1
+ require 'ole/types/property_set'
2
+
3
+ module Ole
4
+ class Storage
5
+ #
6
+ # The MetaData class is designed to be high level interface to all the
7
+ # underlying meta data stored within different sections, themselves within
8
+ # different property set streams.
9
+ #
10
+ # With this class, you can simply get properties using their names, without
11
+ # needing to know about the underlying guids, property ids etc.
12
+ #
13
+ # Example:
14
+ #
15
+ # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
16
+ #
17
+ # TODO:
18
+ #
19
+ # * add write support
20
+ # * fix some of the missing type coercion (eg FileTime)
21
+ # * maybe add back the ability to access individual property sets as a unit
22
+ # directly. ie <tt>ole.summary_information</tt>. Is this useful?
23
+ # * full key support, for unknown keys, like
24
+ # <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined
25
+ # properties too.
26
+ #
27
+ class MetaData
28
+ include Enumerable
29
+
30
+ FILE_MAP = {
31
+ Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation",
32
+ Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
33
+ }
34
+
35
+ FORMAT_MAP = {
36
+ 'MSWordDoc' => :doc
37
+ }
38
+
39
+ CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
40
+ CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
41
+ CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
42
+ CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
43
+
44
+ CLSID_MAP = {
45
+ CLSID_EXCEL97 => :xls,
46
+ CLSID_EXCEL95 => :xls,
47
+ CLSID_WORD97 => :doc,
48
+ CLSID_WORD95 => :doc
49
+ }
50
+
51
+ MIME_TYPES = {
52
+ :xls => 'application/vnd.ms-excel',
53
+ :doc => 'application/msword',
54
+ :ppt => 'application/vnd.ms-powerpoint',
55
+ :msg => 'application/vnd.ms-outlook' # not registered at IANA, but sdeems most common usage
56
+ }
57
+
58
+ def initialize ole
59
+ @ole = ole
60
+ end
61
+
62
+ # i'm thinking of making file_format and mime_type available through
63
+ # #[], #each, and #to_h also, as calculated meta data (not assignable)
64
+
65
+ def comp_obj
66
+ return {} unless dirent = @ole.root["\001CompObj"]
67
+ data = dirent.read
68
+ # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
69
+ # compobj_version: 0x0001
70
+ # byte_order: 0xffe
71
+ # windows_version: 0x00000a03 (win31 apparently)
72
+ # marker: 0xffffffff
73
+ compobj_version, byte_order, windows_version, marker, clsid =
74
+ data.unpack("vvVVa#{Types::Clsid::SIZE}")
75
+ strings = []
76
+ i = 28
77
+ while i < data.length
78
+ len = data[i, 4].unpack('V').first
79
+ i += 4
80
+ strings << data[i, len - 1]
81
+ i += len
82
+ end
83
+ # in the unknown chunk, you usually see something like 'Word.Document.6'
84
+ {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
85
+ end
86
+ private :comp_obj
87
+
88
+ def file_format
89
+ comp_obj[:file_format]
90
+ end
91
+
92
+ def mime_type
93
+ # based on the CompObj stream contents
94
+ type = FORMAT_MAP[file_format]
95
+ return MIME_TYPES[type] if type
96
+
97
+ # based on the root clsid
98
+ type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
99
+ return MIME_TYPES[type] if type
100
+
101
+ # fallback to heuristics
102
+ has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
103
+ return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
104
+ return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
105
+ return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
106
+ end
107
+
108
+ def [] key
109
+ pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
110
+ file = FILE_MAP[pair.first] or return nil
111
+ dirent = @ole.root[file] or return nil
112
+ dirent.open { |io| return Types::PropertySet.new(io)[key] }
113
+ end
114
+
115
+ def []= key, value
116
+ raise NotImplementedError, 'meta data writes not implemented'
117
+ end
118
+
119
+ def each(&block)
120
+ FILE_MAP.values.each do |file|
121
+ dirent = @ole.root[file] or next
122
+ dirent.open { |io| Types::PropertySet.new(io).each(&block) }
123
+ end
124
+ end
125
+
126
+ def to_h
127
+ inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
128
+ end
129
+
130
+ def method_missing name, *args, &block
131
+ return super unless args.empty?
132
+ pair = Types::PropertySet::PROPERTY_MAP[name.to_s] or return super
133
+ self[name]
134
+ end
135
+ end
136
+
137
+ def meta_data
138
+ @meta_data ||= MetaData.new(self)
139
+ end
140
+ end
141
+ end
142
+