ruby-ole 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +60 -0
- data/bin/oletool +35 -0
- data/lib/ole/base.rb +7 -0
- data/lib/ole/file_system.rb +181 -0
- data/lib/ole/io_helpers.rb +184 -0
- data/lib/ole/storage.rb +925 -0
- data/lib/ole/support.rb +51 -0
- data/lib/ole/types.rb +36 -0
- data/test/test_storage.rb +139 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +62 -0
data/Rakefile
ADDED
@@ -0,0 +1,60 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
|
6
|
+
require 'rbconfig'
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
$:.unshift 'lib'
|
10
|
+
|
11
|
+
require 'ole/storage'
|
12
|
+
|
13
|
+
PKG_NAME = 'ruby-ole'
|
14
|
+
PKG_VERSION = Ole::Storage::VERSION
|
15
|
+
|
16
|
+
task :default => [:test]
|
17
|
+
|
18
|
+
Rake::TestTask.new(:test) do |t|
|
19
|
+
t.test_files = FileList["test/test_*.rb"]
|
20
|
+
t.warning = true
|
21
|
+
t.verbose = true
|
22
|
+
end
|
23
|
+
|
24
|
+
# RDocTask wasn't working for me
|
25
|
+
desc 'Build the rdoc HTML Files'
|
26
|
+
task :rdoc do
|
27
|
+
system "rdoc -S -N --main 'Ole::Storage' --tab-width 2 --title '#{PKG_NAME} documentation' lib"
|
28
|
+
end
|
29
|
+
|
30
|
+
spec = Gem::Specification.new do |s|
|
31
|
+
s.name = PKG_NAME
|
32
|
+
s.version = PKG_VERSION
|
33
|
+
s.summary = %q{Ruby OLE library.}
|
34
|
+
s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
|
35
|
+
s.authors = ["Charles Lowe"]
|
36
|
+
s.email = %q{aquasync@gmail.com}
|
37
|
+
s.homepage = %q{http://code.google.com/p/ruby-ole}
|
38
|
+
#s.rubyforge_project = %q{ruby-ole}
|
39
|
+
|
40
|
+
s.executables = ['oletool']
|
41
|
+
s.files = ['Rakefile']
|
42
|
+
s.files += Dir.glob("lib/**/*.rb")
|
43
|
+
s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
|
44
|
+
s.files += Dir.glob("bin/*")
|
45
|
+
|
46
|
+
s.has_rdoc = true
|
47
|
+
s.rdoc_options += ['--main', 'Ole::Storage',
|
48
|
+
'--title', "#{PKG_NAME} documentation",
|
49
|
+
'--tab-width', '2']
|
50
|
+
|
51
|
+
s.autorequire = 'ole/storage'
|
52
|
+
end
|
53
|
+
|
54
|
+
Rake::GemPackageTask.new(spec) do |p|
|
55
|
+
p.gem_spec = spec
|
56
|
+
p.need_tar = true
|
57
|
+
p.need_zip = false
|
58
|
+
p.package_dir = 'build'
|
59
|
+
end
|
60
|
+
|
data/bin/oletool
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'ole/storage'
|
6
|
+
|
7
|
+
def oletool
|
8
|
+
opts = {:verbose => false, :action => :tree}
|
9
|
+
op = OptionParser.new do |op|
|
10
|
+
op.banner = "Usage: oletool [options] [files]"
|
11
|
+
op.separator ''
|
12
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
13
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
14
|
+
op.separator ''
|
15
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
16
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
17
|
+
end
|
18
|
+
files = op.parse ARGV
|
19
|
+
if files.empty?
|
20
|
+
puts 'Must specify 1 or more msg files.'
|
21
|
+
puts op
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
25
|
+
files.each do |file|
|
26
|
+
case opts[:action]
|
27
|
+
when :tree
|
28
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
29
|
+
when :repack
|
30
|
+
Ole::Storage.open file, 'r+', &:repack
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
oletool
|
data/lib/ole/base.rb
ADDED
@@ -0,0 +1,181 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
|
5
|
+
#
|
6
|
+
# Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
|
7
|
+
# Zip::ZipFile to be used exchangablyk. It should be possible to write recursive copy using
|
8
|
+
# the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
|
9
|
+
# and zip files.
|
10
|
+
#
|
11
|
+
# = Usage
|
12
|
+
#
|
13
|
+
# Currently you can do something like the following:
|
14
|
+
#
|
15
|
+
# Ole::Storage.open 'test.doc' do |ole|
|
16
|
+
# ole.dir.entries '/' # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
|
17
|
+
# ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# = Notes
|
21
|
+
#
|
22
|
+
# *** This file is very incomplete
|
23
|
+
#
|
24
|
+
# i think its okay to have an api like this on top, but there are certain things that ole
|
25
|
+
# does that aren't captured.
|
26
|
+
# <tt>Ole::Storage</tt> can have multiple files with the same name, for example, or with
|
27
|
+
# / in the name, and other things that are probably invalid anyway.
|
28
|
+
# i think this should remain an addon, built on top of my core api.
|
29
|
+
# but still the ideas can be reflected in the core, ie, changing the read/write semantics.
|
30
|
+
#
|
31
|
+
# once the core changes are complete, this will be a pretty straight forward file to complete.
|
32
|
+
#
|
33
|
+
|
34
|
+
require 'ole/base'
|
35
|
+
|
36
|
+
module Ole # :nodoc:
|
37
|
+
class Storage
|
38
|
+
def file
|
39
|
+
@file ||= FileParent.new self
|
40
|
+
end
|
41
|
+
|
42
|
+
def dir
|
43
|
+
@dir ||= DirParent.new self
|
44
|
+
end
|
45
|
+
|
46
|
+
def dirent_from_path path_str
|
47
|
+
path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
|
48
|
+
dirent = @root
|
49
|
+
return dirent if path.empty?
|
50
|
+
path = path.split /\/+/
|
51
|
+
until path.empty?
|
52
|
+
raise "invalid path #{path_str.inspect}" if dirent.file?
|
53
|
+
if tmp = dirent[path.shift]
|
54
|
+
dirent = tmp
|
55
|
+
else
|
56
|
+
# allow write etc later.
|
57
|
+
raise "invalid path #{path_str.inspect}"
|
58
|
+
end
|
59
|
+
end
|
60
|
+
dirent
|
61
|
+
end
|
62
|
+
|
63
|
+
class FileParent
|
64
|
+
def initialize ole
|
65
|
+
@ole = ole
|
66
|
+
end
|
67
|
+
|
68
|
+
def open path_str, mode='r', &block
|
69
|
+
dirent = @ole.dirent_from_path path_str
|
70
|
+
# like Errno::EISDIR
|
71
|
+
raise "#{path_str.inspect} is a directory" unless dirent.file?
|
72
|
+
dirent.open(&block)
|
73
|
+
end
|
74
|
+
|
75
|
+
alias new :open
|
76
|
+
|
77
|
+
def read path
|
78
|
+
open(path) { |f| f.read }
|
79
|
+
end
|
80
|
+
|
81
|
+
# crappy copy from Dir.
|
82
|
+
def unlink path
|
83
|
+
dirent = @ole.dirent_from_path path
|
84
|
+
# EPERM
|
85
|
+
raise "operation not permitted #{path.inspect}" unless dirent.file?
|
86
|
+
# i think we should free all of our blocks. i think the best way to do that would be
|
87
|
+
# like:
|
88
|
+
# open(path) { |f| f.truncate 0 }. which should free all our blocks from the
|
89
|
+
# allocation table. then if we remove ourself from our parent, we won't be part of
|
90
|
+
# the bat at save time.
|
91
|
+
# i think if you run repack, all free blocks should get zeroed.
|
92
|
+
open(path) { |f| f.truncate 0 }
|
93
|
+
parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
|
94
|
+
parent.children.delete dirent
|
95
|
+
1 # hmmm. as per ::File ?
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
class DirParent
|
100
|
+
def initialize ole
|
101
|
+
@ole = ole
|
102
|
+
end
|
103
|
+
|
104
|
+
def open path_str
|
105
|
+
dirent = @ole.dirent_from_path path_str
|
106
|
+
# like Errno::ENOTDIR
|
107
|
+
raise "#{path_str.inspect} is not a directory" unless dirent.dir?
|
108
|
+
dir = Dir.new dirent, path_str
|
109
|
+
if block_given?
|
110
|
+
yield dir
|
111
|
+
else
|
112
|
+
dir
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
# certain Dir class methods proxy in this fashion:
|
117
|
+
def entries path
|
118
|
+
open(path) { |dir| dir.entries }
|
119
|
+
end
|
120
|
+
|
121
|
+
# there are some other important ones, like:
|
122
|
+
# chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
|
123
|
+
# mkdir, and rmdir are the main ones we'd need to support
|
124
|
+
def rmdir path
|
125
|
+
dirent = @ole.dirent_from_path path
|
126
|
+
# repeating myself
|
127
|
+
raise "#{path.inspect} is not a directory" unless dirent.dir?
|
128
|
+
# ENOTEMPTY:
|
129
|
+
raise "directory not empty #{path.inspect}" unless dirent.children.empty?
|
130
|
+
# now delete it, how to do that? the canonical representation that is
|
131
|
+
# maintained is the root tree, and the children array. we must remove it
|
132
|
+
# from the children array.
|
133
|
+
# we need the parent then. this sucks but anyway:
|
134
|
+
parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
|
135
|
+
# note that the way this currently works, on save and repack time this will get
|
136
|
+
# reflected. to work properly, ie to make a difference now it would have to re-write
|
137
|
+
# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
|
138
|
+
# #repack.
|
139
|
+
parent.children.delete dirent
|
140
|
+
0 # hmmm. as per ::Dir ?
|
141
|
+
end
|
142
|
+
|
143
|
+
class Dir
|
144
|
+
include Enumerable
|
145
|
+
attr_reader :dirent, :path, :entries, :pos
|
146
|
+
|
147
|
+
def initialize dirent, path
|
148
|
+
@dirent, @path = dirent, path
|
149
|
+
@pos = 0
|
150
|
+
# FIXME: hack, and probably not really desired
|
151
|
+
@entries = %w[. ..] + @dirent.children.map(&:name)
|
152
|
+
end
|
153
|
+
|
154
|
+
def each(&block)
|
155
|
+
@entries.each(&block)
|
156
|
+
end
|
157
|
+
|
158
|
+
def close
|
159
|
+
end
|
160
|
+
|
161
|
+
def read
|
162
|
+
@entries[@pos]
|
163
|
+
ensure
|
164
|
+
@pos += 1 if @pos < @entries.length
|
165
|
+
end
|
166
|
+
|
167
|
+
def pos= pos
|
168
|
+
@pos = [[0, pos].max, @entries.length].min
|
169
|
+
end
|
170
|
+
|
171
|
+
def rewind
|
172
|
+
@pos = 0
|
173
|
+
end
|
174
|
+
|
175
|
+
alias tell :pos
|
176
|
+
alias seek :pos=
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
@@ -0,0 +1,184 @@
|
|
1
|
+
|
2
|
+
# move to support?
|
3
|
+
class IO # :nodoc:
|
4
|
+
def self.copy src, dst
|
5
|
+
until src.eof?
|
6
|
+
buf = src.read(4096)
|
7
|
+
dst.write buf
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
#
|
13
|
+
# = Introduction
|
14
|
+
#
|
15
|
+
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
16
|
+
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
17
|
+
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
18
|
+
# no method to stream it.
|
19
|
+
#
|
20
|
+
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
21
|
+
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
22
|
+
# getting 16 bytes doesn't read the whole thing).
|
23
|
+
#
|
24
|
+
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
25
|
+
# a file.
|
26
|
+
#
|
27
|
+
# = Limitations
|
28
|
+
#
|
29
|
+
# * No buffering. by design at the moment. Intended for large reads
|
30
|
+
#
|
31
|
+
# = TODO
|
32
|
+
#
|
33
|
+
# On further reflection, this class is something of a joining/optimization of
|
34
|
+
# two separate IO classes. a SubfileIO, for providing access to a range within
|
35
|
+
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
36
|
+
# a bunch of io objects as a single unified whole.
|
37
|
+
#
|
38
|
+
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
39
|
+
# convert a whole mime message into an IO stream, that can be read from.
|
40
|
+
# It will just be the concatenation of a series of IO objects, corresponding to
|
41
|
+
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
42
|
+
# original message proper, or RangesIO as provided by the Attachment#data, that
|
43
|
+
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
44
|
+
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
45
|
+
# whole never exists as a single string in memory, as it does now. This is a
|
46
|
+
# fair bit of work to achieve, but generally useful I believe.
|
47
|
+
#
|
48
|
+
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
49
|
+
#
|
50
|
+
class RangesIO
|
51
|
+
attr_reader :io, :ranges, :size, :pos
|
52
|
+
# +io+ is the parent io object that we are wrapping.
|
53
|
+
#
|
54
|
+
# +ranges+ are byte offsets, either
|
55
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
56
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
57
|
+
# (think the way String indexing works)
|
58
|
+
# The +ranges+ provide sequential slices of the file that will be read. they can overlap.
|
59
|
+
def initialize io, ranges, opts={}
|
60
|
+
@opts = {:close_parent => false}.merge opts
|
61
|
+
@io = io
|
62
|
+
# convert ranges to arrays. check for negative ranges?
|
63
|
+
@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
64
|
+
# calculate size
|
65
|
+
@size = @ranges.inject(0) { |total, (pos, len)| total + len }
|
66
|
+
# initial position in the file
|
67
|
+
@pos = 0
|
68
|
+
end
|
69
|
+
|
70
|
+
def pos= pos, whence=IO::SEEK_SET
|
71
|
+
# FIXME support other whence values
|
72
|
+
raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
|
73
|
+
# just a simple pos calculation. invalidate buffers if we had them
|
74
|
+
@pos = pos
|
75
|
+
end
|
76
|
+
|
77
|
+
alias seek :pos=
|
78
|
+
alias tell :pos
|
79
|
+
|
80
|
+
def close
|
81
|
+
@io.close if @opts[:close_parent]
|
82
|
+
end
|
83
|
+
|
84
|
+
def range_and_offset pos
|
85
|
+
off = nil
|
86
|
+
r = ranges.inject(0) do |total, r|
|
87
|
+
to = total + r[1]
|
88
|
+
if pos <= to
|
89
|
+
off = pos - total
|
90
|
+
break r
|
91
|
+
end
|
92
|
+
to
|
93
|
+
end
|
94
|
+
# should be impossible for any valid pos, (0...size) === pos
|
95
|
+
raise "unable to find range for pos #{pos.inspect}" unless off
|
96
|
+
[r, off]
|
97
|
+
end
|
98
|
+
|
99
|
+
def eof?
|
100
|
+
@pos == @size
|
101
|
+
end
|
102
|
+
|
103
|
+
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
104
|
+
def read limit=nil
|
105
|
+
data = ''
|
106
|
+
limit ||= size
|
107
|
+
# special case eof
|
108
|
+
return data if eof?
|
109
|
+
r, off = range_and_offset @pos
|
110
|
+
i = ranges.index r
|
111
|
+
# this may be conceptually nice (create sub-range starting where we are), but
|
112
|
+
# for a large range array its pretty wasteful. even the previous way was. but
|
113
|
+
# i'm not trying to optimize this atm. it may even go to c later if necessary.
|
114
|
+
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
115
|
+
@io.seek pos
|
116
|
+
if limit < len
|
117
|
+
# FIXME this += isn't correct if there is a read error
|
118
|
+
# or something.
|
119
|
+
@pos += limit
|
120
|
+
break data << @io.read(limit)
|
121
|
+
end
|
122
|
+
# this can also stuff up. if the ranges are beyond the size of the file, we can get
|
123
|
+
# nil here.
|
124
|
+
data << @io.read(len)
|
125
|
+
@pos += len
|
126
|
+
limit -= len
|
127
|
+
end
|
128
|
+
data
|
129
|
+
end
|
130
|
+
|
131
|
+
# you may override this call to update @ranges and @size, if applicable. then write
|
132
|
+
# support can grow below
|
133
|
+
def truncate size
|
134
|
+
raise NotImplementedError, 'truncate not supported'
|
135
|
+
end
|
136
|
+
# why not? :)
|
137
|
+
alias size= :truncate
|
138
|
+
|
139
|
+
def write data
|
140
|
+
# short cut. needed because truncate 0 may return no ranges, instead of empty range,
|
141
|
+
# thus range_and_offset fails.
|
142
|
+
return 0 if data.empty?
|
143
|
+
data_pos = 0
|
144
|
+
# if we don't have room, we can use the truncate hook to make more space.
|
145
|
+
if data.length > @size - @pos
|
146
|
+
begin
|
147
|
+
truncate @pos + data.length
|
148
|
+
rescue NotImplementedError
|
149
|
+
# FIXME maybe warn instead, then just truncate the data?
|
150
|
+
raise "unable to satisfy write of #{data.length} bytes"
|
151
|
+
end
|
152
|
+
end
|
153
|
+
r, off = range_and_offset @pos
|
154
|
+
i = ranges.index r
|
155
|
+
([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
|
156
|
+
@io.seek pos
|
157
|
+
if data_pos + len > data.length
|
158
|
+
chunk = data[data_pos..-1]
|
159
|
+
@io.write chunk
|
160
|
+
@pos += chunk.length
|
161
|
+
data_pos = data.length
|
162
|
+
break
|
163
|
+
end
|
164
|
+
@io.write data[data_pos, len]
|
165
|
+
@pos += len
|
166
|
+
data_pos += len
|
167
|
+
end
|
168
|
+
data_pos
|
169
|
+
end
|
170
|
+
|
171
|
+
# this will be generalised to a module later
|
172
|
+
def each_read blocksize=4096
|
173
|
+
yield read(blocksize) until eof?
|
174
|
+
end
|
175
|
+
|
176
|
+
def inspect
|
177
|
+
# the rescue is for empty files
|
178
|
+
pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
|
179
|
+
range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
|
180
|
+
"#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
|
181
|
+
"current_range=#{range_str}>"
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|