ruby-ole 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,60 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/testtask'
3
+ require 'rake/packagetask'
4
+ require 'rake/gempackagetask'
5
+
6
+ require 'rbconfig'
7
+ require 'fileutils'
8
+
9
+ $:.unshift 'lib'
10
+
11
+ require 'ole/storage'
12
+
13
+ PKG_NAME = 'ruby-ole'
14
+ PKG_VERSION = Ole::Storage::VERSION
15
+
16
+ task :default => [:test]
17
+
18
+ Rake::TestTask.new(:test) do |t|
19
+ t.test_files = FileList["test/test_*.rb"]
20
+ t.warning = true
21
+ t.verbose = true
22
+ end
23
+
24
+ # RDocTask wasn't working for me
25
+ desc 'Build the rdoc HTML Files'
26
+ task :rdoc do
27
+ system "rdoc -S -N --main 'Ole::Storage' --tab-width 2 --title '#{PKG_NAME} documentation' lib"
28
+ end
29
+
30
+ spec = Gem::Specification.new do |s|
31
+ s.name = PKG_NAME
32
+ s.version = PKG_VERSION
33
+ s.summary = %q{Ruby OLE library.}
34
+ s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
35
+ s.authors = ["Charles Lowe"]
36
+ s.email = %q{aquasync@gmail.com}
37
+ s.homepage = %q{http://code.google.com/p/ruby-ole}
38
+ #s.rubyforge_project = %q{ruby-ole}
39
+
40
+ s.executables = ['oletool']
41
+ s.files = ['Rakefile']
42
+ s.files += Dir.glob("lib/**/*.rb")
43
+ s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
44
+ s.files += Dir.glob("bin/*")
45
+
46
+ s.has_rdoc = true
47
+ s.rdoc_options += ['--main', 'Ole::Storage',
48
+ '--title', "#{PKG_NAME} documentation",
49
+ '--tab-width', '2']
50
+
51
+ s.autorequire = 'ole/storage'
52
+ end
53
+
54
+ Rake::GemPackageTask.new(spec) do |p|
55
+ p.gem_spec = spec
56
+ p.need_tar = true
57
+ p.need_zip = false
58
+ p.package_dir = 'build'
59
+ end
60
+
@@ -0,0 +1,35 @@
1
+ #! /usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'ole/storage'
6
+
7
+ def oletool
8
+ opts = {:verbose => false, :action => :tree}
9
+ op = OptionParser.new do |op|
10
+ op.banner = "Usage: oletool [options] [files]"
11
+ op.separator ''
12
+ op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
13
+ op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
14
+ op.separator ''
15
+ op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
16
+ op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
17
+ end
18
+ files = op.parse ARGV
19
+ if files.empty?
20
+ puts 'Must specify 1 or more msg files.'
21
+ puts op
22
+ exit 1
23
+ end
24
+ Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
25
+ files.each do |file|
26
+ case opts[:action]
27
+ when :tree
28
+ Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
29
+ when :repack
30
+ Ole::Storage.open file, 'r+', &:repack
31
+ end
32
+ end
33
+ end
34
+
35
+ oletool
@@ -0,0 +1,7 @@
1
+
2
+ require 'ole/support'
3
+
4
+ module Ole # :nodoc:
5
+ Log = Logger.new_with_callstack
6
+ end
7
+
@@ -0,0 +1,181 @@
1
+ #
2
+ # = Introduction
3
+ #
4
+ # This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
5
+ #
6
+ # Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
7
+ # Zip::ZipFile to be used exchangablyk. It should be possible to write recursive copy using
8
+ # the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
9
+ # and zip files.
10
+ #
11
+ # = Usage
12
+ #
13
+ # Currently you can do something like the following:
14
+ #
15
+ # Ole::Storage.open 'test.doc' do |ole|
16
+ # ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
17
+ # ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
18
+ # end
19
+ #
20
+ # = Notes
21
+ #
22
+ # *** This file is very incomplete
23
+ #
24
+ # i think its okay to have an api like this on top, but there are certain things that ole
25
+ # does that aren't captured.
26
+ # <tt>Ole::Storage</tt> can have multiple files with the same name, for example, or with
27
+ # / in the name, and other things that are probably invalid anyway.
28
+ # i think this should remain an addon, built on top of my core api.
29
+ # but still the ideas can be reflected in the core, ie, changing the read/write semantics.
30
+ #
31
+ # once the core changes are complete, this will be a pretty straight forward file to complete.
32
+ #
33
+
34
+ require 'ole/base'
35
+
36
+ module Ole # :nodoc:
37
+ class Storage
38
+ def file
39
+ @file ||= FileParent.new self
40
+ end
41
+
42
+ def dir
43
+ @dir ||= DirParent.new self
44
+ end
45
+
46
+ def dirent_from_path path_str
47
+ path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
48
+ dirent = @root
49
+ return dirent if path.empty?
50
+ path = path.split /\/+/
51
+ until path.empty?
52
+ raise "invalid path #{path_str.inspect}" if dirent.file?
53
+ if tmp = dirent[path.shift]
54
+ dirent = tmp
55
+ else
56
+ # allow write etc later.
57
+ raise "invalid path #{path_str.inspect}"
58
+ end
59
+ end
60
+ dirent
61
+ end
62
+
63
+ class FileParent
64
+ def initialize ole
65
+ @ole = ole
66
+ end
67
+
68
+ def open path_str, mode='r', &block
69
+ dirent = @ole.dirent_from_path path_str
70
+ # like Errno::EISDIR
71
+ raise "#{path_str.inspect} is a directory" unless dirent.file?
72
+ dirent.open(&block)
73
+ end
74
+
75
+ alias new :open
76
+
77
+ def read path
78
+ open(path) { |f| f.read }
79
+ end
80
+
81
+ # crappy copy from Dir.
82
+ def unlink path
83
+ dirent = @ole.dirent_from_path path
84
+ # EPERM
85
+ raise "operation not permitted #{path.inspect}" unless dirent.file?
86
+ # i think we should free all of our blocks. i think the best way to do that would be
87
+ # like:
88
+ # open(path) { |f| f.truncate 0 }. which should free all our blocks from the
89
+ # allocation table. then if we remove ourself from our parent, we won't be part of
90
+ # the bat at save time.
91
+ # i think if you run repack, all free blocks should get zeroed.
92
+ open(path) { |f| f.truncate 0 }
93
+ parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
94
+ parent.children.delete dirent
95
+ 1 # hmmm. as per ::File ?
96
+ end
97
+ end
98
+
99
+ class DirParent
100
+ def initialize ole
101
+ @ole = ole
102
+ end
103
+
104
+ def open path_str
105
+ dirent = @ole.dirent_from_path path_str
106
+ # like Errno::ENOTDIR
107
+ raise "#{path_str.inspect} is not a directory" unless dirent.dir?
108
+ dir = Dir.new dirent, path_str
109
+ if block_given?
110
+ yield dir
111
+ else
112
+ dir
113
+ end
114
+ end
115
+
116
+ # certain Dir class methods proxy in this fashion:
117
+ def entries path
118
+ open(path) { |dir| dir.entries }
119
+ end
120
+
121
+ # there are some other important ones, like:
122
+ # chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
123
+ # mkdir, and rmdir are the main ones we'd need to support
124
+ def rmdir path
125
+ dirent = @ole.dirent_from_path path
126
+ # repeating myself
127
+ raise "#{path.inspect} is not a directory" unless dirent.dir?
128
+ # ENOTEMPTY:
129
+ raise "directory not empty #{path.inspect}" unless dirent.children.empty?
130
+ # now delete it, how to do that? the canonical representation that is
131
+ # maintained is the root tree, and the children array. we must remove it
132
+ # from the children array.
133
+ # we need the parent then. this sucks but anyway:
134
+ parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
135
+ # note that the way this currently works, on save and repack time this will get
136
+ # reflected. to work properly, ie to make a difference now it would have to re-write
137
+ # the dirent. i think that Ole::Storage#close will handle that. and maybe include a
138
+ # #repack.
139
+ parent.children.delete dirent
140
+ 0 # hmmm. as per ::Dir ?
141
+ end
142
+
143
+ class Dir
144
+ include Enumerable
145
+ attr_reader :dirent, :path, :entries, :pos
146
+
147
+ def initialize dirent, path
148
+ @dirent, @path = dirent, path
149
+ @pos = 0
150
+ # FIXME: hack, and probably not really desired
151
+ @entries = %w[. ..] + @dirent.children.map(&:name)
152
+ end
153
+
154
+ def each(&block)
155
+ @entries.each(&block)
156
+ end
157
+
158
+ def close
159
+ end
160
+
161
+ def read
162
+ @entries[@pos]
163
+ ensure
164
+ @pos += 1 if @pos < @entries.length
165
+ end
166
+
167
+ def pos= pos
168
+ @pos = [[0, pos].max, @entries.length].min
169
+ end
170
+
171
+ def rewind
172
+ @pos = 0
173
+ end
174
+
175
+ alias tell :pos
176
+ alias seek :pos=
177
+ end
178
+ end
179
+ end
180
+ end
181
+
@@ -0,0 +1,184 @@
1
+
2
+ # move to support?
3
+ class IO # :nodoc:
4
+ def self.copy src, dst
5
+ until src.eof?
6
+ buf = src.read(4096)
7
+ dst.write buf
8
+ end
9
+ end
10
+ end
11
+
12
+ #
13
+ # = Introduction
14
+ #
15
+ # +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
16
+ # slices of the input file by providing a list of ranges. Intended as an initial measure to curb
17
+ # inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
18
+ # no method to stream it.
19
+ #
20
+ # This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
21
+ # and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
22
+ # getting 16 bytes doesn't read the whole thing).
23
+ #
24
+ # In the simplest case it can be used with a single range to provide a limited io to a section of
25
+ # a file.
26
+ #
27
+ # = Limitations
28
+ #
29
+ # * No buffering. by design at the moment. Intended for large reads
30
+ #
31
+ # = TODO
32
+ #
33
+ # On further reflection, this class is something of a joining/optimization of
34
+ # two separate IO classes. a SubfileIO, for providing access to a range within
35
+ # a File as a separate IO object, and a ConcatIO, allowing the presentation of
36
+ # a bunch of io objects as a single unified whole.
37
+ #
38
+ # I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
39
+ # convert a whole mime message into an IO stream, that can be read from.
40
+ # It will just be the concatenation of a series of IO objects, corresponding to
41
+ # headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
42
+ # original message proper, or RangesIO as provided by the Attachment#data, that
43
+ # will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
44
+ # fly. Thus the attachment, in its plain or encoded form, and the message as a
45
+ # whole never exists as a single string in memory, as it does now. This is a
46
+ # fair bit of work to achieve, but generally useful I believe.
47
+ #
48
+ # This class isn't ole specific, maybe move it to my general ruby stream project.
49
+ #
50
+ class RangesIO
51
+ attr_reader :io, :ranges, :size, :pos
52
+ # +io+ is the parent io object that we are wrapping.
53
+ #
54
+ # +ranges+ are byte offsets, either
55
+ # 1. an array of ranges [1..2, 4..5, 6..8] or
56
+ # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
57
+ # (think the way String indexing works)
58
+ # The +ranges+ provide sequential slices of the file that will be read. they can overlap.
59
+ def initialize io, ranges, opts={}
60
+ @opts = {:close_parent => false}.merge opts
61
+ @io = io
62
+ # convert ranges to arrays. check for negative ranges?
63
+ @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
64
+ # calculate size
65
+ @size = @ranges.inject(0) { |total, (pos, len)| total + len }
66
+ # initial position in the file
67
+ @pos = 0
68
+ end
69
+
70
+ def pos= pos, whence=IO::SEEK_SET
71
+ # FIXME support other whence values
72
+ raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
73
+ # just a simple pos calculation. invalidate buffers if we had them
74
+ @pos = pos
75
+ end
76
+
77
+ alias seek :pos=
78
+ alias tell :pos
79
+
80
+ def close
81
+ @io.close if @opts[:close_parent]
82
+ end
83
+
84
+ def range_and_offset pos
85
+ off = nil
86
+ r = ranges.inject(0) do |total, r|
87
+ to = total + r[1]
88
+ if pos <= to
89
+ off = pos - total
90
+ break r
91
+ end
92
+ to
93
+ end
94
+ # should be impossible for any valid pos, (0...size) === pos
95
+ raise "unable to find range for pos #{pos.inspect}" unless off
96
+ [r, off]
97
+ end
98
+
99
+ def eof?
100
+ @pos == @size
101
+ end
102
+
103
+ # read bytes from file, to a maximum of +limit+, or all available if unspecified.
104
+ def read limit=nil
105
+ data = ''
106
+ limit ||= size
107
+ # special case eof
108
+ return data if eof?
109
+ r, off = range_and_offset @pos
110
+ i = ranges.index r
111
+ # this may be conceptually nice (create sub-range starting where we are), but
112
+ # for a large range array its pretty wasteful. even the previous way was. but
113
+ # i'm not trying to optimize this atm. it may even go to c later if necessary.
114
+ ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
115
+ @io.seek pos
116
+ if limit < len
117
+ # FIXME this += isn't correct if there is a read error
118
+ # or something.
119
+ @pos += limit
120
+ break data << @io.read(limit)
121
+ end
122
+ # this can also stuff up. if the ranges are beyond the size of the file, we can get
123
+ # nil here.
124
+ data << @io.read(len)
125
+ @pos += len
126
+ limit -= len
127
+ end
128
+ data
129
+ end
130
+
131
+ # you may override this call to update @ranges and @size, if applicable. then write
132
+ # support can grow below
133
+ def truncate size
134
+ raise NotImplementedError, 'truncate not supported'
135
+ end
136
+ # why not? :)
137
+ alias size= :truncate
138
+
139
+ def write data
140
+ # short cut. needed because truncate 0 may return no ranges, instead of empty range,
141
+ # thus range_and_offset fails.
142
+ return 0 if data.empty?
143
+ data_pos = 0
144
+ # if we don't have room, we can use the truncate hook to make more space.
145
+ if data.length > @size - @pos
146
+ begin
147
+ truncate @pos + data.length
148
+ rescue NotImplementedError
149
+ # FIXME maybe warn instead, then just truncate the data?
150
+ raise "unable to satisfy write of #{data.length} bytes"
151
+ end
152
+ end
153
+ r, off = range_and_offset @pos
154
+ i = ranges.index r
155
+ ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
156
+ @io.seek pos
157
+ if data_pos + len > data.length
158
+ chunk = data[data_pos..-1]
159
+ @io.write chunk
160
+ @pos += chunk.length
161
+ data_pos = data.length
162
+ break
163
+ end
164
+ @io.write data[data_pos, len]
165
+ @pos += len
166
+ data_pos += len
167
+ end
168
+ data_pos
169
+ end
170
+
171
+ # this will be generalised to a module later
172
+ def each_read blocksize=4096
173
+ yield read(blocksize) until eof?
174
+ end
175
+
176
+ def inspect
177
+ # the rescue is for empty files
178
+ pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
179
+ range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
180
+ "#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
181
+ "current_range=#{range_str}>"
182
+ end
183
+ end
184
+