ruby-ole 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +60 -0
- data/bin/oletool +35 -0
- data/lib/ole/base.rb +7 -0
- data/lib/ole/file_system.rb +181 -0
- data/lib/ole/io_helpers.rb +184 -0
- data/lib/ole/storage.rb +925 -0
- data/lib/ole/support.rb +51 -0
- data/lib/ole/types.rb +36 -0
- data/test/test_storage.rb +139 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +62 -0
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
|
6
|
+
require 'rbconfig'
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
$:.unshift 'lib'
|
10
|
+
|
11
|
+
require 'ole/storage'
|
12
|
+
|
13
|
+
PKG_NAME = 'ruby-ole'
|
14
|
+
PKG_VERSION = Ole::Storage::VERSION
|
15
|
+
|
16
|
+
task :default => [:test]
|
17
|
+
|
18
|
+
Rake::TestTask.new(:test) do |t|
|
19
|
+
t.test_files = FileList["test/test_*.rb"]
|
20
|
+
t.warning = true
|
21
|
+
t.verbose = true
|
22
|
+
end
|
23
|
+
|
24
|
+
# RDocTask wasn't working for me
|
25
|
+
desc 'Build the rdoc HTML Files'
|
26
|
+
task :rdoc do
|
27
|
+
system "rdoc -S -N --main 'Ole::Storage' --tab-width 2 --title '#{PKG_NAME} documentation' lib"
|
28
|
+
end
|
29
|
+
|
30
|
+
spec = Gem::Specification.new do |s|
|
31
|
+
s.name = PKG_NAME
|
32
|
+
s.version = PKG_VERSION
|
33
|
+
s.summary = %q{Ruby OLE library.}
|
34
|
+
s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
|
35
|
+
s.authors = ["Charles Lowe"]
|
36
|
+
s.email = %q{aquasync@gmail.com}
|
37
|
+
s.homepage = %q{http://code.google.com/p/ruby-ole}
|
38
|
+
#s.rubyforge_project = %q{ruby-ole}
|
39
|
+
|
40
|
+
s.executables = ['oletool']
|
41
|
+
s.files = ['Rakefile']
|
42
|
+
s.files += Dir.glob("lib/**/*.rb")
|
43
|
+
s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
|
44
|
+
s.files += Dir.glob("bin/*")
|
45
|
+
|
46
|
+
s.has_rdoc = true
|
47
|
+
s.rdoc_options += ['--main', 'Ole::Storage',
|
48
|
+
'--title', "#{PKG_NAME} documentation",
|
49
|
+
'--tab-width', '2']
|
50
|
+
|
51
|
+
s.autorequire = 'ole/storage'
|
52
|
+
end
|
53
|
+
|
54
|
+
Rake::GemPackageTask.new(spec) do |p|
|
55
|
+
p.gem_spec = spec
|
56
|
+
p.need_tar = true
|
57
|
+
p.need_zip = false
|
58
|
+
p.package_dir = 'build'
|
59
|
+
end
|
60
|
+
|
data/bin/oletool
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'ole/storage'
|
6
|
+
|
7
|
+
def oletool
|
8
|
+
opts = {:verbose => false, :action => :tree}
|
9
|
+
op = OptionParser.new do |op|
|
10
|
+
op.banner = "Usage: oletool [options] [files]"
|
11
|
+
op.separator ''
|
12
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
13
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
14
|
+
op.separator ''
|
15
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
16
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
17
|
+
end
|
18
|
+
files = op.parse ARGV
|
19
|
+
if files.empty?
|
20
|
+
puts 'Must specify 1 or more msg files.'
|
21
|
+
puts op
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
25
|
+
files.each do |file|
|
26
|
+
case opts[:action]
|
27
|
+
when :tree
|
28
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
29
|
+
when :repack
|
30
|
+
Ole::Storage.open file, 'r+', &:repack
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
oletool
|
data/lib/ole/base.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
|
5
|
+
#
|
6
|
+
# Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
|
7
|
+
# Zip::ZipFile to be used exchangablyk. It should be possible to write recursive copy using
|
8
|
+
# the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
|
9
|
+
# and zip files.
|
10
|
+
#
|
11
|
+
# = Usage
|
12
|
+
#
|
13
|
+
# Currently you can do something like the following:
|
14
|
+
#
|
15
|
+
# Ole::Storage.open 'test.doc' do |ole|
|
16
|
+
# ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
|
17
|
+
# ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# = Notes
|
21
|
+
#
|
22
|
+
# *** This file is very incomplete
|
23
|
+
#
|
24
|
+
# i think its okay to have an api like this on top, but there are certain things that ole
|
25
|
+
# does that aren't captured.
|
26
|
+
# <tt>Ole::Storage</tt> can have multiple files with the same name, for example, or with
|
27
|
+
# / in the name, and other things that are probably invalid anyway.
|
28
|
+
# i think this should remain an addon, built on top of my core api.
|
29
|
+
# but still the ideas can be reflected in the core, ie, changing the read/write semantics.
|
30
|
+
#
|
31
|
+
# once the core changes are complete, this will be a pretty straight forward file to complete.
|
32
|
+
#
|
33
|
+
|
34
|
+
require 'ole/base'
|
35
|
+
|
36
|
+
module Ole # :nodoc:
|
37
|
+
class Storage
|
38
|
+
def file
|
39
|
+
@file ||= FileParent.new self
|
40
|
+
end
|
41
|
+
|
42
|
+
def dir
|
43
|
+
@dir ||= DirParent.new self
|
44
|
+
end
|
45
|
+
|
46
|
+
def dirent_from_path path_str
|
47
|
+
path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
|
48
|
+
dirent = @root
|
49
|
+
return dirent if path.empty?
|
50
|
+
path = path.split /\/+/
|
51
|
+
until path.empty?
|
52
|
+
raise "invalid path #{path_str.inspect}" if dirent.file?
|
53
|
+
if tmp = dirent[path.shift]
|
54
|
+
dirent = tmp
|
55
|
+
else
|
56
|
+
# allow write etc later.
|
57
|
+
raise "invalid path #{path_str.inspect}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
dirent
|
61
|
+
end
|
62
|
+
|
63
|
+
class FileParent
|
64
|
+
def initialize ole
|
65
|
+
@ole = ole
|
66
|
+
end
|
67
|
+
|
68
|
+
def open path_str, mode='r', &block
|
69
|
+
dirent = @ole.dirent_from_path path_str
|
70
|
+
# like Errno::EISDIR
|
71
|
+
raise "#{path_str.inspect} is a directory" unless dirent.file?
|
72
|
+
dirent.open(&block)
|
73
|
+
end
|
74
|
+
|
75
|
+
alias new :open
|
76
|
+
|
77
|
+
def read path
|
78
|
+
open(path) { |f| f.read }
|
79
|
+
end
|
80
|
+
|
81
|
+
# crappy copy from Dir.
|
82
|
+
def unlink path
|
83
|
+
dirent = @ole.dirent_from_path path
|
84
|
+
# EPERM
|
85
|
+
raise "operation not permitted #{path.inspect}" unless dirent.file?
|
86
|
+
# i think we should free all of our blocks. i think the best way to do that would be
|
87
|
+
# like:
|
88
|
+
# open(path) { |f| f.truncate 0 }. which should free all our blocks from the
|
89
|
+
# allocation table. then if we remove ourself from our parent, we won't be part of
|
90
|
+
# the bat at save time.
|
91
|
+
# i think if you run repack, all free blocks should get zeroed.
|
92
|
+
open(path) { |f| f.truncate 0 }
|
93
|
+
parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
|
94
|
+
parent.children.delete dirent
|
95
|
+
1 # hmmm. as per ::File ?
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class DirParent
|
100
|
+
def initialize ole
|
101
|
+
@ole = ole
|
102
|
+
end
|
103
|
+
|
104
|
+
def open path_str
|
105
|
+
dirent = @ole.dirent_from_path path_str
|
106
|
+
# like Errno::ENOTDIR
|
107
|
+
raise "#{path_str.inspect} is not a directory" unless dirent.dir?
|
108
|
+
dir = Dir.new dirent, path_str
|
109
|
+
if block_given?
|
110
|
+
yield dir
|
111
|
+
else
|
112
|
+
dir
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# certain Dir class methods proxy in this fashion:
|
117
|
+
def entries path
|
118
|
+
open(path) { |dir| dir.entries }
|
119
|
+
end
|
120
|
+
|
121
|
+
# there are some other important ones, like:
|
122
|
+
# chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
|
123
|
+
# mkdir, and rmdir are the main ones we'd need to support
|
124
|
+
def rmdir path
|
125
|
+
dirent = @ole.dirent_from_path path
|
126
|
+
# repeating myself
|
127
|
+
raise "#{path.inspect} is not a directory" unless dirent.dir?
|
128
|
+
# ENOTEMPTY:
|
129
|
+
raise "directory not empty #{path.inspect}" unless dirent.children.empty?
|
130
|
+
# now delete it, how to do that? the canonical representation that is
|
131
|
+
# maintained is the root tree, and the children array. we must remove it
|
132
|
+
# from the children array.
|
133
|
+
# we need the parent then. this sucks but anyway:
|
134
|
+
parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
|
135
|
+
# note that the way this currently works, on save and repack time this will get
|
136
|
+
# reflected. to work properly, ie to make a difference now it would have to re-write
|
137
|
+
# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
|
138
|
+
# #repack.
|
139
|
+
parent.children.delete dirent
|
140
|
+
0 # hmmm. as per ::Dir ?
|
141
|
+
end
|
142
|
+
|
143
|
+
class Dir
|
144
|
+
include Enumerable
|
145
|
+
attr_reader :dirent, :path, :entries, :pos
|
146
|
+
|
147
|
+
def initialize dirent, path
|
148
|
+
@dirent, @path = dirent, path
|
149
|
+
@pos = 0
|
150
|
+
# FIXME: hack, and probably not really desired
|
151
|
+
@entries = %w[. ..] + @dirent.children.map(&:name)
|
152
|
+
end
|
153
|
+
|
154
|
+
def each(&block)
|
155
|
+
@entries.each(&block)
|
156
|
+
end
|
157
|
+
|
158
|
+
def close
|
159
|
+
end
|
160
|
+
|
161
|
+
def read
|
162
|
+
@entries[@pos]
|
163
|
+
ensure
|
164
|
+
@pos += 1 if @pos < @entries.length
|
165
|
+
end
|
166
|
+
|
167
|
+
def pos= pos
|
168
|
+
@pos = [[0, pos].max, @entries.length].min
|
169
|
+
end
|
170
|
+
|
171
|
+
def rewind
|
172
|
+
@pos = 0
|
173
|
+
end
|
174
|
+
|
175
|
+
alias tell :pos
|
176
|
+
alias seek :pos=
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
|
2
|
+
# move to support?
|
3
|
+
class IO # :nodoc:
|
4
|
+
def self.copy src, dst
|
5
|
+
until src.eof?
|
6
|
+
buf = src.read(4096)
|
7
|
+
dst.write buf
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# = Introduction
|
14
|
+
#
|
15
|
+
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
16
|
+
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
17
|
+
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
18
|
+
# no method to stream it.
|
19
|
+
#
|
20
|
+
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
21
|
+
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
22
|
+
# getting 16 bytes doesn't read the whole thing).
|
23
|
+
#
|
24
|
+
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
25
|
+
# a file.
|
26
|
+
#
|
27
|
+
# = Limitations
|
28
|
+
#
|
29
|
+
# * No buffering. by design at the moment. Intended for large reads
|
30
|
+
#
|
31
|
+
# = TODO
|
32
|
+
#
|
33
|
+
# On further reflection, this class is something of a joining/optimization of
|
34
|
+
# two separate IO classes. a SubfileIO, for providing access to a range within
|
35
|
+
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
36
|
+
# a bunch of io objects as a single unified whole.
|
37
|
+
#
|
38
|
+
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
39
|
+
# convert a whole mime message into an IO stream, that can be read from.
|
40
|
+
# It will just be the concatenation of a series of IO objects, corresponding to
|
41
|
+
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
42
|
+
# original message proper, or RangesIO as provided by the Attachment#data, that
|
43
|
+
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
44
|
+
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
45
|
+
# whole never exists as a single string in memory, as it does now. This is a
|
46
|
+
# fair bit of work to achieve, but generally useful I believe.
|
47
|
+
#
|
48
|
+
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
49
|
+
#
|
50
|
+
class RangesIO
|
51
|
+
attr_reader :io, :ranges, :size, :pos
|
52
|
+
# +io+ is the parent io object that we are wrapping.
|
53
|
+
#
|
54
|
+
# +ranges+ are byte offsets, either
|
55
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
56
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
57
|
+
# (think the way String indexing works)
|
58
|
+
# The +ranges+ provide sequential slices of the file that will be read. they can overlap.
|
59
|
+
def initialize io, ranges, opts={}
|
60
|
+
@opts = {:close_parent => false}.merge opts
|
61
|
+
@io = io
|
62
|
+
# convert ranges to arrays. check for negative ranges?
|
63
|
+
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
64
|
+
# calculate size
|
65
|
+
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
|
66
|
+
# initial position in the file
|
67
|
+
@pos = 0
|
68
|
+
end
|
69
|
+
|
70
|
+
def pos= pos, whence=IO::SEEK_SET
|
71
|
+
# FIXME support other whence values
|
72
|
+
raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
|
73
|
+
# just a simple pos calculation. invalidate buffers if we had them
|
74
|
+
@pos = pos
|
75
|
+
end
|
76
|
+
|
77
|
+
alias seek :pos=
|
78
|
+
alias tell :pos
|
79
|
+
|
80
|
+
def close
|
81
|
+
@io.close if @opts[:close_parent]
|
82
|
+
end
|
83
|
+
|
84
|
+
def range_and_offset pos
|
85
|
+
off = nil
|
86
|
+
r = ranges.inject(0) do |total, r|
|
87
|
+
to = total + r[1]
|
88
|
+
if pos <= to
|
89
|
+
off = pos - total
|
90
|
+
break r
|
91
|
+
end
|
92
|
+
to
|
93
|
+
end
|
94
|
+
# should be impossible for any valid pos, (0...size) === pos
|
95
|
+
raise "unable to find range for pos #{pos.inspect}" unless off
|
96
|
+
[r, off]
|
97
|
+
end
|
98
|
+
|
99
|
+
def eof?
|
100
|
+
@pos == @size
|
101
|
+
end
|
102
|
+
|
103
|
+
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
104
|
+
def read limit=nil
|
105
|
+
data = ''
|
106
|
+
limit ||= size
|
107
|
+
# special case eof
|
108
|
+
return data if eof?
|
109
|
+
r, off = range_and_offset @pos
|
110
|
+
i = ranges.index r
|
111
|
+
# this may be conceptually nice (create sub-range starting where we are), but
|
112
|
+
# for a large range array its pretty wasteful. even the previous way was. but
|
113
|
+
# i'm not trying to optimize this atm. it may even go to c later if necessary.
|
114
|
+
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
115
|
+
@io.seek pos
|
116
|
+
if limit < len
|
117
|
+
# FIXME this += isn't correct if there is a read error
|
118
|
+
# or something.
|
119
|
+
@pos += limit
|
120
|
+
break data << @io.read(limit)
|
121
|
+
end
|
122
|
+
# this can also stuff up. if the ranges are beyond the size of the file, we can get
|
123
|
+
# nil here.
|
124
|
+
data << @io.read(len)
|
125
|
+
@pos += len
|
126
|
+
limit -= len
|
127
|
+
end
|
128
|
+
data
|
129
|
+
end
|
130
|
+
|
131
|
+
# you may override this call to update @ranges and @size, if applicable. then write
|
132
|
+
# support can grow below
|
133
|
+
def truncate size
|
134
|
+
raise NotImplementedError, 'truncate not supported'
|
135
|
+
end
|
136
|
+
# why not? :)
|
137
|
+
alias size= :truncate
|
138
|
+
|
139
|
+
def write data
|
140
|
+
# short cut. needed because truncate 0 may return no ranges, instead of empty range,
|
141
|
+
# thus range_and_offset fails.
|
142
|
+
return 0 if data.empty?
|
143
|
+
data_pos = 0
|
144
|
+
# if we don't have room, we can use the truncate hook to make more space.
|
145
|
+
if data.length > @size - @pos
|
146
|
+
begin
|
147
|
+
truncate @pos + data.length
|
148
|
+
rescue NotImplementedError
|
149
|
+
# FIXME maybe warn instead, then just truncate the data?
|
150
|
+
raise "unable to satisfy write of #{data.length} bytes"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
r, off = range_and_offset @pos
|
154
|
+
i = ranges.index r
|
155
|
+
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
156
|
+
@io.seek pos
|
157
|
+
if data_pos + len > data.length
|
158
|
+
chunk = data[data_pos..-1]
|
159
|
+
@io.write chunk
|
160
|
+
@pos += chunk.length
|
161
|
+
data_pos = data.length
|
162
|
+
break
|
163
|
+
end
|
164
|
+
@io.write data[data_pos, len]
|
165
|
+
@pos += len
|
166
|
+
data_pos += len
|
167
|
+
end
|
168
|
+
data_pos
|
169
|
+
end
|
170
|
+
|
171
|
+
# this will be generalised to a module later
|
172
|
+
def each_read blocksize=4096
|
173
|
+
yield read(blocksize) until eof?
|
174
|
+
end
|
175
|
+
|
176
|
+
def inspect
|
177
|
+
# the rescue is for empty files
|
178
|
+
pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
|
179
|
+
range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
|
180
|
+
"#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
|
181
|
+
"current_range=#{range_str}>"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|