ruby-ole 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,60 @@
1
+ require 'rake/rdoctask'
2
+ require 'rake/testtask'
3
+ require 'rake/packagetask'
4
+ require 'rake/gempackagetask'
5
+
6
+ require 'rbconfig'
7
+ require 'fileutils'
8
+
9
+ $:.unshift 'lib'
10
+
11
+ require 'ole/storage'
12
+
13
+ PKG_NAME = 'ruby-ole'
14
+ PKG_VERSION = Ole::Storage::VERSION
15
+
16
+ task :default => [:test]
17
+
18
+ Rake::TestTask.new(:test) do |t|
19
+ t.test_files = FileList["test/test_*.rb"]
20
+ t.warning = true
21
+ t.verbose = true
22
+ end
23
+
24
+ # RDocTask wasn't working for me
25
+ desc 'Build the rdoc HTML Files'
26
+ task :rdoc do
27
+ system "rdoc -S -N --main 'Ole::Storage' --tab-width 2 --title '#{PKG_NAME} documentation' lib"
28
+ end
29
+
30
+ spec = Gem::Specification.new do |s|
31
+ s.name = PKG_NAME
32
+ s.version = PKG_VERSION
33
+ s.summary = %q{Ruby OLE library.}
34
+ s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
35
+ s.authors = ["Charles Lowe"]
36
+ s.email = %q{aquasync@gmail.com}
37
+ s.homepage = %q{http://code.google.com/p/ruby-ole}
38
+ #s.rubyforge_project = %q{ruby-ole}
39
+
40
+ s.executables = ['oletool']
41
+ s.files = ['Rakefile']
42
+ s.files += Dir.glob("lib/**/*.rb")
43
+ s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
44
+ s.files += Dir.glob("bin/*")
45
+
46
+ s.has_rdoc = true
47
+ s.rdoc_options += ['--main', 'Ole::Storage',
48
+ '--title', "#{PKG_NAME} documentation",
49
+ '--tab-width', '2']
50
+
51
+ s.autorequire = 'ole/storage'
52
+ end
53
+
54
+ Rake::GemPackageTask.new(spec) do |p|
55
+ p.gem_spec = spec
56
+ p.need_tar = true
57
+ p.need_zip = false
58
+ p.package_dir = 'build'
59
+ end
60
+
@@ -0,0 +1,35 @@
1
+ #! /usr/bin/ruby
2
+
3
+ require 'optparse'
4
+ require 'rubygems'
5
+ require 'ole/storage'
6
+
7
+ def oletool
8
+ opts = {:verbose => false, :action => :tree}
9
+ op = OptionParser.new do |op|
10
+ op.banner = "Usage: oletool [options] [files]"
11
+ op.separator ''
12
+ op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
13
+ op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
14
+ op.separator ''
15
+ op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
16
+ op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
17
+ end
18
+ files = op.parse ARGV
19
+ if files.empty?
20
+ puts 'Must specify 1 or more msg files.'
21
+ puts op
22
+ exit 1
23
+ end
24
+ Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
25
+ files.each do |file|
26
+ case opts[:action]
27
+ when :tree
28
+ Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
29
+ when :repack
30
+ Ole::Storage.open file, 'r+', &:repack
31
+ end
32
+ end
33
+ end
34
+
35
+ oletool
@@ -0,0 +1,7 @@
1
+
2
+ require 'ole/support'
3
+
4
+ module Ole # :nodoc:
5
+ Log = Logger.new_with_callstack
6
+ end
7
+
@@ -0,0 +1,181 @@
1
+ #
2
+ # = Introduction
3
+ #
4
+ # This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
5
+ #
6
+ # Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
7
+ # Zip::ZipFile to be used exchangablyk. It should be possible to write recursive copy using
8
+ # the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
9
+ # and zip files.
10
+ #
11
+ # = Usage
12
+ #
13
+ # Currently you can do something like the following:
14
+ #
15
+ # Ole::Storage.open 'test.doc' do |ole|
16
+ # ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
17
+ # ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
18
+ # end
19
+ #
20
+ # = Notes
21
+ #
22
+ # *** This file is very incomplete
23
+ #
24
+ # i think its okay to have an api like this on top, but there are certain things that ole
25
+ # does that aren't captured.
26
+ # <tt>Ole::Storage</tt> can have multiple files with the same name, for example, or with
27
+ # / in the name, and other things that are probably invalid anyway.
28
+ # i think this should remain an addon, built on top of my core api.
29
+ # but still the ideas can be reflected in the core, ie, changing the read/write semantics.
30
+ #
31
+ # once the core changes are complete, this will be a pretty straight forward file to complete.
32
+ #
33
+
34
+ require 'ole/base'
35
+
36
+ module Ole # :nodoc:
37
+ class Storage
38
+ def file
39
+ @file ||= FileParent.new self
40
+ end
41
+
42
+ def dir
43
+ @dir ||= DirParent.new self
44
+ end
45
+
46
+ def dirent_from_path path_str
47
+ path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
48
+ dirent = @root
49
+ return dirent if path.empty?
50
+ path = path.split /\/+/
51
+ until path.empty?
52
+ raise "invalid path #{path_str.inspect}" if dirent.file?
53
+ if tmp = dirent[path.shift]
54
+ dirent = tmp
55
+ else
56
+ # allow write etc later.
57
+ raise "invalid path #{path_str.inspect}"
58
+ end
59
+ end
60
+ dirent
61
+ end
62
+
63
+ class FileParent
64
+ def initialize ole
65
+ @ole = ole
66
+ end
67
+
68
+ def open path_str, mode='r', &block
69
+ dirent = @ole.dirent_from_path path_str
70
+ # like Errno::EISDIR
71
+ raise "#{path_str.inspect} is a directory" unless dirent.file?
72
+ dirent.open(&block)
73
+ end
74
+
75
+ alias new :open
76
+
77
+ def read path
78
+ open(path) { |f| f.read }
79
+ end
80
+
81
+ # crappy copy from Dir.
82
+ def unlink path
83
+ dirent = @ole.dirent_from_path path
84
+ # EPERM
85
+ raise "operation not permitted #{path.inspect}" unless dirent.file?
86
+ # i think we should free all of our blocks. i think the best way to do that would be
87
+ # like:
88
+ # open(path) { |f| f.truncate 0 }. which should free all our blocks from the
89
+ # allocation table. then if we remove ourself from our parent, we won't be part of
90
+ # the bat at save time.
91
+ # i think if you run repack, all free blocks should get zeroed.
92
+ open(path) { |f| f.truncate 0 }
93
+ parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
94
+ parent.children.delete dirent
95
+ 1 # hmmm. as per ::File ?
96
+ end
97
+ end
98
+
99
+ class DirParent
100
+ def initialize ole
101
+ @ole = ole
102
+ end
103
+
104
+ def open path_str
105
+ dirent = @ole.dirent_from_path path_str
106
+ # like Errno::ENOTDIR
107
+ raise "#{path_str.inspect} is not a directory" unless dirent.dir?
108
+ dir = Dir.new dirent, path_str
109
+ if block_given?
110
+ yield dir
111
+ else
112
+ dir
113
+ end
114
+ end
115
+
116
+ # certain Dir class methods proxy in this fashion:
117
+ def entries path
118
+ open(path) { |dir| dir.entries }
119
+ end
120
+
121
+ # there are some other important ones, like:
122
+ # chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
123
+ # mkdir, and rmdir are the main ones we'd need to support
124
+ def rmdir path
125
+ dirent = @ole.dirent_from_path path
126
+ # repeating myself
127
+ raise "#{path.inspect} is not a directory" unless dirent.dir?
128
+ # ENOTEMPTY:
129
+ raise "directory not empty #{path.inspect}" unless dirent.children.empty?
130
+ # now delete it, how to do that? the canonical representation that is
131
+ # maintained is the root tree, and the children array. we must remove it
132
+ # from the children array.
133
+ # we need the parent then. this sucks but anyway:
134
+ parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
135
+ # note that the way this currently works, on save and repack time this will get
136
+ # reflected. to work properly, ie to make a difference now it would have to re-write
137
+ # the dirent. i think that Ole::Storage#close will handle that. and maybe include a
138
+ # #repack.
139
+ parent.children.delete dirent
140
+ 0 # hmmm. as per ::Dir ?
141
+ end
142
+
143
+ class Dir
144
+ include Enumerable
145
+ attr_reader :dirent, :path, :entries, :pos
146
+
147
+ def initialize dirent, path
148
+ @dirent, @path = dirent, path
149
+ @pos = 0
150
+ # FIXME: hack, and probably not really desired
151
+ @entries = %w[. ..] + @dirent.children.map(&:name)
152
+ end
153
+
154
+ def each(&block)
155
+ @entries.each(&block)
156
+ end
157
+
158
+ def close
159
+ end
160
+
161
+ def read
162
+ @entries[@pos]
163
+ ensure
164
+ @pos += 1 if @pos < @entries.length
165
+ end
166
+
167
+ def pos= pos
168
+ @pos = [[0, pos].max, @entries.length].min
169
+ end
170
+
171
+ def rewind
172
+ @pos = 0
173
+ end
174
+
175
+ alias tell :pos
176
+ alias seek :pos=
177
+ end
178
+ end
179
+ end
180
+ end
181
+
@@ -0,0 +1,184 @@
1
+
2
+ # move to support?
3
+ class IO # :nodoc:
4
+ def self.copy src, dst
5
+ until src.eof?
6
+ buf = src.read(4096)
7
+ dst.write buf
8
+ end
9
+ end
10
+ end
11
+
12
+ #
13
+ # = Introduction
14
+ #
15
+ # +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
16
+ # slices of the input file by providing a list of ranges. Intended as an initial measure to curb
17
+ # inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
18
+ # no method to stream it.
19
+ #
20
+ # This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
21
+ # and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
22
+ # getting 16 bytes doesn't read the whole thing).
23
+ #
24
+ # In the simplest case it can be used with a single range to provide a limited io to a section of
25
+ # a file.
26
+ #
27
+ # = Limitations
28
+ #
29
+ # * No buffering. by design at the moment. Intended for large reads
30
+ #
31
+ # = TODO
32
+ #
33
+ # On further reflection, this class is something of a joining/optimization of
34
+ # two separate IO classes. a SubfileIO, for providing access to a range within
35
+ # a File as a separate IO object, and a ConcatIO, allowing the presentation of
36
+ # a bunch of io objects as a single unified whole.
37
+ #
38
+ # I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
39
+ # convert a whole mime message into an IO stream, that can be read from.
40
+ # It will just be the concatenation of a series of IO objects, corresponding to
41
+ # headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
42
+ # original message proper, or RangesIO as provided by the Attachment#data, that
43
+ # will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
44
+ # fly. Thus the attachment, in its plain or encoded form, and the message as a
45
+ # whole never exists as a single string in memory, as it does now. This is a
46
+ # fair bit of work to achieve, but generally useful I believe.
47
+ #
48
+ # This class isn't ole specific, maybe move it to my general ruby stream project.
49
+ #
50
+ class RangesIO
51
+ attr_reader :io, :ranges, :size, :pos
52
+ # +io+ is the parent io object that we are wrapping.
53
+ #
54
+ # +ranges+ are byte offsets, either
55
+ # 1. an array of ranges [1..2, 4..5, 6..8] or
56
+ # 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
57
+ # (think the way String indexing works)
58
+ # The +ranges+ provide sequential slices of the file that will be read. they can overlap.
59
+ def initialize io, ranges, opts={}
60
+ @opts = {:close_parent => false}.merge opts
61
+ @io = io
62
+ # convert ranges to arrays. check for negative ranges?
63
+ @ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
64
+ # calculate size
65
+ @size = @ranges.inject(0) { |total, (pos, len)| total + len }
66
+ # initial position in the file
67
+ @pos = 0
68
+ end
69
+
70
+ def pos= pos, whence=IO::SEEK_SET
71
+ # FIXME support other whence values
72
+ raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
73
+ # just a simple pos calculation. invalidate buffers if we had them
74
+ @pos = pos
75
+ end
76
+
77
+ alias seek :pos=
78
+ alias tell :pos
79
+
80
+ def close
81
+ @io.close if @opts[:close_parent]
82
+ end
83
+
84
+ def range_and_offset pos
85
+ off = nil
86
+ r = ranges.inject(0) do |total, r|
87
+ to = total + r[1]
88
+ if pos <= to
89
+ off = pos - total
90
+ break r
91
+ end
92
+ to
93
+ end
94
+ # should be impossible for any valid pos, (0...size) === pos
95
+ raise "unable to find range for pos #{pos.inspect}" unless off
96
+ [r, off]
97
+ end
98
+
99
+ def eof?
100
+ @pos == @size
101
+ end
102
+
103
+ # read bytes from file, to a maximum of +limit+, or all available if unspecified.
104
+ def read limit=nil
105
+ data = ''
106
+ limit ||= size
107
+ # special case eof
108
+ return data if eof?
109
+ r, off = range_and_offset @pos
110
+ i = ranges.index r
111
+ # this may be conceptually nice (create sub-range starting where we are), but
112
+ # for a large range array its pretty wasteful. even the previous way was. but
113
+ # i'm not trying to optimize this atm. it may even go to c later if necessary.
114
+ ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
115
+ @io.seek pos
116
+ if limit < len
117
+ # FIXME this += isn't correct if there is a read error
118
+ # or something.
119
+ @pos += limit
120
+ break data << @io.read(limit)
121
+ end
122
+ # this can also stuff up. if the ranges are beyond the size of the file, we can get
123
+ # nil here.
124
+ data << @io.read(len)
125
+ @pos += len
126
+ limit -= len
127
+ end
128
+ data
129
+ end
130
+
131
+ # you may override this call to update @ranges and @size, if applicable. then write
132
+ # support can grow below
133
+ def truncate size
134
+ raise NotImplementedError, 'truncate not supported'
135
+ end
136
+ # why not? :)
137
+ alias size= :truncate
138
+
139
+ def write data
140
+ # short cut. needed because truncate 0 may return no ranges, instead of empty range,
141
+ # thus range_and_offset fails.
142
+ return 0 if data.empty?
143
+ data_pos = 0
144
+ # if we don't have room, we can use the truncate hook to make more space.
145
+ if data.length > @size - @pos
146
+ begin
147
+ truncate @pos + data.length
148
+ rescue NotImplementedError
149
+ # FIXME maybe warn instead, then just truncate the data?
150
+ raise "unable to satisfy write of #{data.length} bytes"
151
+ end
152
+ end
153
+ r, off = range_and_offset @pos
154
+ i = ranges.index r
155
+ ([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
156
+ @io.seek pos
157
+ if data_pos + len > data.length
158
+ chunk = data[data_pos..-1]
159
+ @io.write chunk
160
+ @pos += chunk.length
161
+ data_pos = data.length
162
+ break
163
+ end
164
+ @io.write data[data_pos, len]
165
+ @pos += len
166
+ data_pos += len
167
+ end
168
+ data_pos
169
+ end
170
+
171
+ # this will be generalised to a module later
172
+ def each_read blocksize=4096
173
+ yield read(blocksize) until eof?
174
+ end
175
+
176
+ def inspect
177
+ # the rescue is for empty files
178
+ pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
179
+ range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
180
+ "#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
181
+ "current_range=#{range_str}>"
182
+ end
183
+ end
184
+