keeguon-ruby-ole 1.2.11.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,394 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ #
4
+ # = Introduction
5
+ #
6
+ # This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
7
+ #
8
+ # = TODO
9
+ #
10
+ # - need to implement some more IO functions on RangesIO, like #puts, #print
11
+ # etc, like AbstractOutputStream from zipfile.
12
+ #
13
+ # - check Dir.mkdir, and File.open, and File.rename, to add in filename
14
+ # length checks (max 32 / 31 or something).
15
+ # do the automatic truncation, and add in any necessary warnings.
16
+ #
17
+ # - File.split('a/') == File.split('a') == ['.', 'a']
18
+ # the implication of this, is that things that try to force directory
19
+ # don't work. like, File.rename('a', 'b'), should work if a is a file
20
+ # or directory, but File.rename('a/', 'b') should only work if a is
21
+ # a directory. tricky, need to clean things up a bit more.
22
+ # i think a general path name => dirent method would work, with flags
23
+ # about what should raise an error.
24
+ #
25
+ # - Need to look at streamlining things after getting all the tests passing,
26
+ # as this file's getting pretty long - almost half the real implementation.
27
+ # and is probably more inefficient than necessary.
28
+ # too many exceptions in the expected path of certain functions.
29
+ #
30
+ # - should look at profiles before and after switching ruby-msg to use
31
+ # the filesystem api.
32
+ #
33
+
34
+ module Ole # :nodoc:
35
+ class Storage
36
+ def file
37
+ @file ||= FileClass.new self
38
+ end
39
+
40
+ def dir
41
+ @dir ||= DirClass.new self
42
+ end
43
+
44
+ # tries to get a dirent for path. return nil if it doesn't exist
45
+ # (change it)
46
+ def dirent_from_path path
47
+ dirent = @root
48
+ path = file.expand_path(path).split('/')
49
+ until path.empty?
50
+ part = path.shift
51
+ next if part.empty?
52
+ return nil if dirent.file?
53
+ return nil unless dirent = dirent/part
54
+ end
55
+ dirent
56
+ end
57
+
58
+ class FileClass
59
+ class Stat
60
+ attr_reader :ftype, :size, :blocks, :blksize
61
+ attr_reader :nlink, :uid, :gid, :dev, :rdev, :ino
62
+ def initialize dirent
63
+ @dirent = dirent
64
+ @size = dirent.size
65
+ if file?
66
+ @ftype = 'file'
67
+ bat = dirent.ole.bat_for_size(dirent.size)
68
+ @blocks = bat.chain(dirent.first_block).length
69
+ @blksize = bat.block_size
70
+ else
71
+ @ftype = 'directory'
72
+ @blocks = 0
73
+ @blksize = 0
74
+ end
75
+ # a lot of these are bogus. ole file format has no analogs
76
+ @nlink = 1
77
+ @uid, @gid = 0, 0
78
+ @dev, @rdev = 0, 0
79
+ @ino = 0
80
+ # need to add times - atime, mtime, ctime.
81
+ end
82
+
83
+ alias rdev_major :rdev
84
+ alias rdev_minor :rdev
85
+
86
+ def file?
87
+ @dirent.file?
88
+ end
89
+
90
+ def directory?
91
+ @dirent.dir?
92
+ end
93
+
94
+ def size?
95
+ size if file?
96
+ end
97
+
98
+ def inspect
99
+ pairs = (instance_variables - ['@dirent']).map do |n|
100
+ "#{n[1..-1]}=#{instance_variable_get n}"
101
+ end
102
+ "#<#{self.class} #{pairs * ', '}>"
103
+ end
104
+ end
105
+
106
+ def initialize ole
107
+ @ole = ole
108
+ end
109
+
110
+ def expand_path path
111
+ # its already absolute if it starts with a '/'
112
+ unless path =~ /^\//
113
+ # get the raw stored pwd value (its blank for root)
114
+ pwd = @ole.dir.instance_variable_get :@pwd
115
+ path = "#{pwd}/#{path}"
116
+ end
117
+ # at this point its already absolute. we use File.expand_path
118
+ # just for the .. and . handling
119
+ # No longer use RUBY_PLATFORM =~ /win/ as it matches darwin. better way?
120
+ if File::ALT_SEPARATOR != "\\"
121
+ File.expand_path(path)
122
+ else
123
+ File.expand_path(path)[2..-1]
124
+ end
125
+ end
126
+
127
+ # +orig_path+ is just so that we can use the requested path
128
+ # in the error messages even if it has been already modified
129
+ def dirent_from_path path, orig_path=nil
130
+ orig_path ||= path
131
+ dirent = @ole.dirent_from_path path
132
+ raise Errno::ENOENT, orig_path unless dirent
133
+ raise Errno::EISDIR, orig_path if dirent.dir?
134
+ dirent
135
+ end
136
+ private :dirent_from_path
137
+
138
+ def exists? path
139
+ !!@ole.dirent_from_path(path)
140
+ end
141
+ alias exist? :exists?
142
+
143
+ def file? path
144
+ dirent = @ole.dirent_from_path path
145
+ dirent and dirent.file?
146
+ end
147
+
148
+ def directory? path
149
+ dirent = @ole.dirent_from_path path
150
+ dirent and dirent.dir?
151
+ end
152
+
153
+ def open path, mode='r', &block
154
+ if IOMode.new(mode).create?
155
+ begin
156
+ dirent = dirent_from_path path
157
+ rescue Errno::ENOENT
158
+ # maybe instead of repeating this everywhere, i should have
159
+ # a get_parent_dirent function.
160
+ parent_path, basename = File.split expand_path(path)
161
+ parent = @ole.dir.send :dirent_from_path, parent_path, path
162
+ parent << dirent = Dirent.new(@ole, :type => :file, :name => basename)
163
+ end
164
+ else
165
+ dirent = dirent_from_path path
166
+ end
167
+ dirent.open mode, &block
168
+ end
169
+
170
+ # explicit wrapper instead of alias to inhibit block
171
+ def new path, mode='r'
172
+ open path, mode
173
+ end
174
+
175
+ def size path
176
+ dirent_from_path(path).size
177
+ rescue Errno::EISDIR
178
+ # kind of arbitrary. I'm getting 4096 from ::File, but
179
+ # the zip tests want 0.
180
+ 0
181
+ end
182
+
183
+ def size? path
184
+ dirent_from_path(path).size
185
+ # any other exceptions i need to rescue?
186
+ rescue Errno::ENOENT, Errno::EISDIR
187
+ nil
188
+ end
189
+
190
+ def stat path
191
+ # we do this to allow dirs.
192
+ dirent = @ole.dirent_from_path path
193
+ raise Errno::ENOENT, path unless dirent
194
+ Stat.new dirent
195
+ end
196
+
197
+ def read path
198
+ open path, &:read
199
+ end
200
+
201
+ # most of the work this function does is moving the dirent between
202
+ # 2 parents. the actual name changing is quite simple.
203
+ # File.rename can move a file into another folder, which is why i've
204
+ # done it too, though i think its not always possible...
205
+ #
206
+ # FIXME File.rename can be used for directories too....
207
+ def rename from_path, to_path
208
+ # check what we want to rename from exists. do it this
209
+ # way to allow directories.
210
+ dirent = @ole.dirent_from_path from_path
211
+ raise Errno::ENOENT, from_path unless dirent
212
+ # delete what we want to rename to if necessary
213
+ begin
214
+ unlink to_path
215
+ rescue Errno::ENOENT
216
+ # we actually get here, but rcov doesn't think so. add 1 + 1 to
217
+ # keep rcov happy for now... :)
218
+ 1 + 1
219
+ end
220
+ # reparent the dirent
221
+ to_parent_path, to_basename = File.split expand_path(to_path)
222
+ from_parent = dirent.parent
223
+ to_parent = @ole.dir.send :dirent_from_path, to_parent_path, to_path
224
+ from_parent.delete dirent, false
225
+ # and also change its name
226
+ dirent.name = to_basename
227
+ to_parent << dirent
228
+ 0
229
+ end
230
+
231
+ def unlink(*paths)
232
+ paths.each do |path|
233
+ dirent = dirent_from_path path
234
+ dirent.parent.delete dirent
235
+ end
236
+ paths.length # hmmm. as per ::File ?
237
+ end
238
+ alias delete :unlink
239
+ end
240
+
241
+ #
242
+ # An *instance* of this class is supposed to provide similar methods
243
+ # to the class methods of Dir itself.
244
+ #
245
+ # Fairly complete - like zip/zipfilesystem's implementation, i provide
246
+ # everything except chroot and glob. glob could be done with a glob
247
+ # to regex conversion, and then simply match in the entries array...
248
+ # although recursive glob complicates that somewhat.
249
+ #
250
+ # Dir.chroot, Dir.glob, Dir.[], and Dir.tmpdir is the complete list of
251
+ # methods still missing.
252
+ #
253
+ class DirClass
254
+ def initialize ole
255
+ @ole = ole
256
+ @pwd = ''
257
+ end
258
+
259
+ # +orig_path+ is just so that we can use the requested path
260
+ # in the error messages even if it has been already modified
261
+ def dirent_from_path path, orig_path=nil
262
+ orig_path ||= path
263
+ dirent = @ole.dirent_from_path path
264
+ raise Errno::ENOENT, orig_path unless dirent
265
+ raise Errno::ENOTDIR, orig_path unless dirent.dir?
266
+ dirent
267
+ end
268
+ private :dirent_from_path
269
+
270
+ def open path
271
+ dir = Dir.new path, entries(path)
272
+ return dir unless block_given?
273
+ yield dir
274
+ end
275
+
276
+ # as for file, explicit alias to inhibit block
277
+ def new path
278
+ open path
279
+ end
280
+
281
+ # pwd is always stored without the trailing slash. we handle
282
+ # the root case here
283
+ def pwd
284
+ return '/' if @pwd.empty?
285
+ @pwd
286
+ end
287
+ alias getwd :pwd
288
+
289
+ def chdir orig_path
290
+ # make path absolute, squeeze slashes, and remove trailing slash
291
+ path = @ole.file.expand_path(orig_path).squeeze('/').sub(/\/$/, '')
292
+ # this is just for the side effects of the exceptions if invalid
293
+ dirent_from_path path, orig_path
294
+ if block_given?
295
+ old_pwd = @pwd
296
+ begin
297
+ @pwd = path
298
+ yield
299
+ ensure
300
+ @pwd = old_pwd
301
+ end
302
+ else
303
+ @pwd = path
304
+ 0
305
+ end
306
+ end
307
+
308
+ def entries path
309
+ dirent = dirent_from_path path
310
+ # Not sure about adding on the dots...
311
+ entries = %w[. ..] + dirent.children.map(&:name)
312
+ # do some checks about un-reachable files
313
+ seen = {}
314
+ entries.each do |n|
315
+ Log.warn "inaccessible file (filename contains slash) - #{n.inspect}" if n['/']
316
+ Log.warn "inaccessible file (duplicate filename) - #{n.inspect}" if seen[n]
317
+ seen[n] = true
318
+ end
319
+ entries
320
+ end
321
+
322
+ def foreach path, &block
323
+ entries(path).each(&block)
324
+ end
325
+
326
+ def mkdir path
327
+ parent_path, basename = File.split @ole.file.expand_path(path)
328
+ # note that we will complain about the full path despite accessing
329
+ # the parent path. this is consistent with ::Dir
330
+ parent = dirent_from_path parent_path, path
331
+ # now, we first should ensure that it doesn't already exist
332
+ # either as a file or a directory.
333
+ raise Errno::EEXIST, path if parent/basename
334
+ parent << Dirent.new(@ole, :type => :dir, :name => basename)
335
+ 0
336
+ end
337
+
338
+ def rmdir path
339
+ dirent = dirent_from_path path
340
+ raise Errno::ENOTEMPTY, path unless dirent.children.empty?
341
+ dirent.parent.delete dirent
342
+ 0 # hmmm. as per ::Dir ?
343
+ end
344
+ alias delete :rmdir
345
+ alias unlink :rmdir
346
+
347
+ # note that there is nothing remotely ole specific about
348
+ # this class. it simply provides the dir like sequential access
349
+ # methods on top of an array.
350
+ class Dir
351
+ include Enumerable
352
+
353
+ attr_reader :path
354
+ def initialize path, entries
355
+ @path, @entries, @pos = path, entries, 0
356
+ @closed = false
357
+ end
358
+
359
+ def pos
360
+ raise IOError if @closed
361
+ @pos
362
+ end
363
+
364
+ def each(&block)
365
+ raise IOError if @closed
366
+ @entries.each(&block)
367
+ end
368
+
369
+ def close
370
+ @closed = true
371
+ end
372
+
373
+ def read
374
+ raise IOError if @closed
375
+ @entries[pos]
376
+ ensure
377
+ @pos += 1 if pos < @entries.length
378
+ end
379
+
380
+ def pos= pos
381
+ raise IOError if @closed
382
+ @pos = [[0, pos].max, @entries.length].min
383
+ end
384
+ alias tell :pos
385
+ alias seek :pos=
386
+
387
+ def rewind
388
+ seek 0
389
+ end
390
+ end
391
+ end
392
+ end
393
+ end
394
+
@@ -0,0 +1,150 @@
1
+ # encoding: ASCII-8BIT
2
+
3
+ require 'ole/types/property_set'
4
+
5
+ module Ole
6
+ class Storage
7
+ #
8
+ # The MetaData class is designed to be high level interface to all the
9
+ # underlying meta data stored within different sections, themselves within
10
+ # different property set streams.
11
+ #
12
+ # With this class, you can simply get properties using their names, without
13
+ # needing to know about the underlying guids, property ids etc.
14
+ #
15
+ # Example:
16
+ #
17
+ # Ole::Storage.open('test.doc') { |ole| p ole.meta_data.doc_author }
18
+ #
19
+ # TODO:
20
+ #
21
+ # * add write support
22
+ # * fix some of the missing type coercion (eg FileTime)
23
+ # * maybe add back the ability to access individual property sets as a unit
24
+ # directly. ie <tt>ole.summary_information</tt>. Is this useful?
25
+ # * full key support, for unknown keys, like
26
+ # <tt>ole.meta_data[myguid, myid]</tt>. probably needed for user-defined
27
+ # properties too.
28
+ #
29
+ class MetaData
30
+ include Enumerable
31
+
32
+ FILE_MAP = {
33
+ Types::PropertySet::FMTID_SummaryInformation => "\005SummaryInformation",
34
+ Types::PropertySet::FMTID_DocSummaryInfo => "\005DocumentSummaryInformation"
35
+ }
36
+
37
+ FORMAT_MAP = {
38
+ 'MSWordDoc' => :doc
39
+ }
40
+
41
+ CLSID_EXCEL97 = Types::Clsid.parse "{00020820-0000-0000-c000-000000000046}"
42
+ CLSID_EXCEL95 = Types::Clsid.parse "{00020810-0000-0000-c000-000000000046}"
43
+ CLSID_WORD97 = Types::Clsid.parse "{00020906-0000-0000-c000-000000000046}"
44
+ CLSID_WORD95 = Types::Clsid.parse "{00020900-0000-0000-c000-000000000046}"
45
+
46
+ CLSID_MAP = {
47
+ CLSID_EXCEL97 => :xls,
48
+ CLSID_EXCEL95 => :xls,
49
+ CLSID_WORD97 => :doc,
50
+ CLSID_WORD95 => :doc
51
+ }
52
+
53
+ MIME_TYPES = {
54
+ :xls => 'application/vnd.ms-excel',
55
+ :doc => 'application/msword',
56
+ :ppt => 'application/vnd.ms-powerpoint',
57
+ # not registered at IANA, but seems most common usage
58
+ :msg => 'application/vnd.ms-outlook',
59
+ # this is my default fallback option. also not registered at IANA.
60
+ # file(1)'s default is application/msword, which is useless...
61
+ nil => 'application/x-ole-storage'
62
+ }
63
+
64
+ def initialize ole
65
+ @ole = ole
66
+ end
67
+
68
+ # i'm thinking of making file_format and mime_type available through
69
+ # #[], #each, and #to_h also, as calculated meta data (not assignable)
70
+
71
+ def comp_obj
72
+ return {} unless dirent = @ole.root["\001CompObj"]
73
+ data = dirent.read
74
+ # see - https://gnunet.org/svn/Extractor/doc/StarWrite_File_Format.html
75
+ # compobj_version: 0x0001
76
+ # byte_order: 0xffe
77
+ # windows_version: 0x00000a03 (win31 apparently)
78
+ # marker: 0xffffffff
79
+ # compobj_version, byte_order, windows_version, marker, clsid =
80
+ # data.unpack("vvVVa#{Types::Clsid::SIZE}")
81
+ strings = []
82
+ i = 28
83
+ while i < data.length
84
+ len = data[i, 4].unpack('V').first
85
+ i += 4
86
+ strings << data[i, len - 1]
87
+ i += len
88
+ end
89
+ # in the unknown chunk, you usually see something like 'Word.Document.6'
90
+ {:username => strings[0], :file_format => strings[1], :unknown => strings[2..-1]}
91
+ end
92
+ private :comp_obj
93
+
94
+ def file_format
95
+ comp_obj[:file_format]
96
+ end
97
+
98
+ def mime_type
99
+ # based on the CompObj stream contents
100
+ type = FORMAT_MAP[file_format]
101
+ return MIME_TYPES[type] if type
102
+
103
+ # based on the root clsid
104
+ type = CLSID_MAP[Types::Clsid.load(@ole.root.clsid)]
105
+ return MIME_TYPES[type] if type
106
+
107
+ # fallback to heuristics
108
+ has_file = Hash[*@ole.root.children.map { |d| [d.name.downcase, true] }.flatten]
109
+ return MIME_TYPES[:msg] if has_file['__nameid_version1.0'] or has_file['__properties_version1.0']
110
+ return MIME_TYPES[:doc] if has_file['worddocument'] or has_file['document']
111
+ return MIME_TYPES[:xls] if has_file['workbook'] or has_file['book']
112
+
113
+ MIME_TYPES[nil]
114
+ end
115
+
116
+ def [] key
117
+ pair = Types::PropertySet::PROPERTY_MAP[key.to_s] or return nil
118
+ file = FILE_MAP[pair.first] or return nil
119
+ dirent = @ole.root[file] or return nil
120
+ dirent.open { |io| return Types::PropertySet.new(io)[key] }
121
+ end
122
+
123
+ def []= key, value
124
+ raise NotImplementedError, 'meta data writes not implemented'
125
+ end
126
+
127
+ def each(&block)
128
+ FILE_MAP.values.each do |file|
129
+ dirent = @ole.root[file] or next
130
+ dirent.open { |io| Types::PropertySet.new(io).each(&block) }
131
+ end
132
+ end
133
+
134
+ def to_h
135
+ inject({}) { |hash, (name, value)| hash.update name.to_sym => value }
136
+ end
137
+
138
+ def method_missing name, *args, &block
139
+ return super unless args.empty?
140
+ return super unless Types::PropertySet::PROPERTY_MAP[name.to_s]
141
+ self[name]
142
+ end
143
+ end
144
+
145
+ def meta_data
146
+ @meta_data ||= MetaData.new(self)
147
+ end
148
+ end
149
+ end
150
+