ruby-ole-patched-for-home_run 1.2.10.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +76 -0
- data/README +117 -0
- data/Rakefile +116 -0
- data/bin/oletool +40 -0
- data/data/propids.yaml +56 -0
- data/lib/ole/base.rb +7 -0
- data/lib/ole/file_system.rb +2 -0
- data/lib/ole/ranges_io.rb +260 -0
- data/lib/ole/storage/base.rb +948 -0
- data/lib/ole/storage/file_system.rb +394 -0
- data/lib/ole/storage/meta_data.rb +148 -0
- data/lib/ole/storage.rb +3 -0
- data/lib/ole/support.rb +256 -0
- data/lib/ole/types/base.rb +281 -0
- data/lib/ole/types/property_set.rb +165 -0
- data/lib/ole/types.rb +2 -0
- data/test/oleWithDirs.ole +0 -0
- data/test/test.doc +0 -0
- data/test/test_SummaryInformation +0 -0
- data/test/test_filesystem.rb +879 -0
- data/test/test_mbat.rb +41 -0
- data/test/test_meta_data.rb +45 -0
- data/test/test_property_set.rb +40 -0
- data/test/test_ranges_io.rb +103 -0
- data/test/test_storage.rb +220 -0
- data/test/test_support.rb +159 -0
- data/test/test_types.rb +67 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +108 -0
data/ChangeLog
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
== 1.2.10 / 2009-07-20
|
2
|
+
|
3
|
+
- Mostly more performance enhancements, significantly faster for
|
4
|
+
certain operations.
|
5
|
+
- Using lots of files is faster due to new hash lookup for dirents by name.
|
6
|
+
- Writes of many files are faster now too as Dirent & FileTime serialization
|
7
|
+
has been improved.
|
8
|
+
- Certain operations from the filesystem api have been profiled and sped up.
|
9
|
+
- Don't use syswrite on jruby to avoid the buffered stream warnings.
|
10
|
+
|
11
|
+
== 1.2.9 / 2009-07-14
|
12
|
+
|
13
|
+
- Lots of performance enhancements for RangesIO.
|
14
|
+
|
15
|
+
== 1.2.8.2 / 2009-01-01
|
16
|
+
|
17
|
+
- Update code to support ruby 1.9.1
|
18
|
+
|
19
|
+
== 1.2.8.1 / 2008-10-22
|
20
|
+
|
21
|
+
- Fix a couple of breakages when using $KCODE = 'UTF8'
|
22
|
+
|
23
|
+
== 1.2.8 / 2008-10-08
|
24
|
+
|
25
|
+
- Check in the new fixes to the mbat support.
|
26
|
+
- Update README to be a bit more useful.
|
27
|
+
|
28
|
+
== 1.2.7 / 2008-08-12
|
29
|
+
|
30
|
+
- Prepare Ole::Types::PropertySet for write support.
|
31
|
+
- Introduce Ole::Storage#meta_data as an easy interface to meta data stored
|
32
|
+
within various property sets.
|
33
|
+
- Add new --metadata action to oletool to dump said metadata.
|
34
|
+
- Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type
|
35
|
+
function to try to guess mime type of a file based on some simple heuristics.
|
36
|
+
- Restructure project files a bit, and pull in file_system & meta_data support
|
37
|
+
by default.
|
38
|
+
- More tests - now have 100% coverage.
|
39
|
+
|
40
|
+
== 1.2.6 / 2008-07-21
|
41
|
+
|
42
|
+
- Fix FileClass#expand_path to work properly on darwin (issue #2)
|
43
|
+
- Guard against Enumerable#sum clash with active support (issue #3)
|
44
|
+
|
45
|
+
== 1.2.5 / 2008-02-16
|
46
|
+
|
47
|
+
- Make all tests pass on ruby 1.9.
|
48
|
+
|
49
|
+
== 1.2.4 / 2008-01-09
|
50
|
+
|
51
|
+
- Make all tests pass on windows (issue #1).
|
52
|
+
- Make all tests pass on a power pc (running ubuntu).
|
53
|
+
- Property set convenience access functions.
|
54
|
+
|
55
|
+
== 1.2.3 / 2007-12-28
|
56
|
+
|
57
|
+
- MBAT write support re-implmented. Can now write files over ~8mb again.
|
58
|
+
- Minor fixes (truncation in #flush, file modification timestamps)
|
59
|
+
- More test coverage
|
60
|
+
- Initial (read-only) property set support.
|
61
|
+
- Complete filesystem api, to pass most of the rubyzip tests.
|
62
|
+
- Add a ChangeLog :).
|
63
|
+
|
64
|
+
== 1.2.2 / 2007-11-05
|
65
|
+
|
66
|
+
- Lots of test updates, 90% coverage.
|
67
|
+
- Fix +to_tree+ method to be more efficient, and stream output.
|
68
|
+
- Optimizations from benchmarks and profiling, mostly for writes. Fixed
|
69
|
+
AllocationTable#resize_chain, RangesIOResizable#truncate and
|
70
|
+
AllocationTable#free_block.
|
71
|
+
- Add in filesystem test file from rubyzip, and start working on a
|
72
|
+
filesystem api.
|
73
|
+
|
74
|
+
== 1.2.1 / 2007-08-20
|
75
|
+
|
76
|
+
- Separate out from ruby-msg as new project.
|
data/README
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
= Introduction
|
2
|
+
|
3
|
+
The ruby-ole library provides a variety of functions primarily for
|
4
|
+
working with OLE2 structured storage files, such as those produced by
|
5
|
+
Microsoft Office - eg *.doc, *.msg etc.
|
6
|
+
|
7
|
+
= Example Usage
|
8
|
+
|
9
|
+
Here are some examples of how to use the library functionality,
|
10
|
+
categorised roughly by purpose.
|
11
|
+
|
12
|
+
1. Reading and writing files within an OLE container
|
13
|
+
|
14
|
+
The recommended way to manipulate the contents is via the
|
15
|
+
"file_system" API, whereby you use Ole::Storage instance methods
|
16
|
+
similar to the regular File and Dir class methods.
|
17
|
+
|
18
|
+
ole = Ole::Storage.open('oleWithDirs.ole', 'rb+')
|
19
|
+
p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"]
|
20
|
+
p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'"
|
21
|
+
ole.dir.mkdir('newdir')
|
22
|
+
|
23
|
+
2. Accessing OLE meta data
|
24
|
+
|
25
|
+
Some convenience functions are provided for (currently read only)
|
26
|
+
access to OLE property sets and other sources of meta data.
|
27
|
+
|
28
|
+
ole = Ole::Storage.open('test_word_95.doc')
|
29
|
+
p ole.meta_data.file_format # => "MSWordDoc"
|
30
|
+
p ole.meta_data.mime_type # => "application/msword"
|
31
|
+
p ole.meta_data.doc_author.split.first # => "Charles"
|
32
|
+
|
33
|
+
3. Raw access to underlying OLE internals
|
34
|
+
|
35
|
+
This is probably of little interest to most developers using the
|
36
|
+
library, but for some use cases you may need to drop down to the
|
37
|
+
lower level API on which the "file_system" API is constructed,
|
38
|
+
which exposes more of the format details.
|
39
|
+
|
40
|
+
<tt>Ole::Storage</tt> files can have multiple files with the same name,
|
41
|
+
or with a slash in the name, and other things that are probably
|
42
|
+
strictly invalid. This API is the only way to access those files.
|
43
|
+
|
44
|
+
You can access the header object directly:
|
45
|
+
|
46
|
+
p ole.header.num_sbat # => 1
|
47
|
+
p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"]
|
48
|
+
|
49
|
+
You can directly access the array of all Dirent objects,
|
50
|
+
including the root:
|
51
|
+
|
52
|
+
p ole.dirents.length # => 5
|
53
|
+
puts ole.root.to_tree
|
54
|
+
# =>
|
55
|
+
- #<Dirent:"Root Entry">
|
56
|
+
|- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
|
57
|
+
|- #<Dirent:"\001CompObj" size=98 data="\001\000\376\377\003...">
|
58
|
+
|- #<Dirent:"WordDocument" size=2574 data="\334\245e\000-...">
|
59
|
+
\- #<Dirent:"\005SummaryInformation" size=54788 data="\376\377\000\000\001...">
|
60
|
+
|
61
|
+
You can access (through RangesIO methods, or by using the
|
62
|
+
relevant Dirent and AllocationTable methods) information like where within
|
63
|
+
the container a stream is located (these are offset/length pairs):
|
64
|
+
|
65
|
+
p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]]
|
66
|
+
|
67
|
+
See the documentation for each class for more details.
|
68
|
+
|
69
|
+
= Thanks
|
70
|
+
|
71
|
+
* The code contained in this project was initially based on chicago's libole
|
72
|
+
(source available at http://prdownloads.sf.net/chicago/ole.tgz).
|
73
|
+
|
74
|
+
* It was later augmented with some corrections by inspecting pole, and (purely
|
75
|
+
for header definitions) gsf.
|
76
|
+
|
77
|
+
* The property set parsing code came from the apache java project POIFS.
|
78
|
+
|
79
|
+
* The excellent idea for using a pseudo file system style interface by providing
|
80
|
+
#file and #dir methods which mimic File and Dir, was borrowed (along with almost
|
81
|
+
unchanged tests!) from Thomas Sondergaard's rubyzip.
|
82
|
+
|
83
|
+
= TODO
|
84
|
+
|
85
|
+
== 1.2.11
|
86
|
+
|
87
|
+
* internal api cleanup
|
88
|
+
* add buffering to rangesio so that performance for small reads and writes
|
89
|
+
isn't so awful. maybe try and remove the bottlenecks of unbuffered first
|
90
|
+
with more profiling, then implement the buffering on top of that.
|
91
|
+
* fix mode strings - like truncate when using 'w+', supporting append
|
92
|
+
'a+' modes etc. done?
|
93
|
+
* make ranges io obey readable vs writeable modes.
|
94
|
+
* more RangesIO completion. ie, doesn't support #<< at the moment.
|
95
|
+
* maybe some oletool doc.
|
96
|
+
* make sure `rake test' runs tests both with $KCODE='UTF8', and without,
|
97
|
+
and maybe ensure i don't regress on 1.9 and jruby either now that they're
|
98
|
+
fixed.
|
99
|
+
|
100
|
+
== 1.3.1
|
101
|
+
|
102
|
+
* case insensitive open mode would be nice
|
103
|
+
* fix property sets a bit more. see TODO in Ole::Storage::MetaData
|
104
|
+
* ability to zero out padding and unused blocks
|
105
|
+
* case insensitive mode for ole/file_system?
|
106
|
+
* better tests for mbat support.
|
107
|
+
* further doc cleanup
|
108
|
+
* add in place testing for jruby and ruby1.9
|
109
|
+
|
110
|
+
== Longer term
|
111
|
+
|
112
|
+
* more benchmarking, profiling, and speed fixes. was thinking vs other
|
113
|
+
ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other
|
114
|
+
ole implementations (maybe perl's, and poifs) just to check its in the
|
115
|
+
ballpark, with no remaining silly bottlenecks.
|
116
|
+
* supposedly vba does something weird to ole files. test that.
|
117
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
|
6
|
+
require 'rbconfig'
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
$:.unshift 'lib'
|
10
|
+
|
11
|
+
require 'ole/storage'
|
12
|
+
|
13
|
+
PKG_NAME = 'ruby-ole-patched-for-home_run'
|
14
|
+
PKG_VERSION = Ole::Storage::VERSION
|
15
|
+
|
16
|
+
task :default => [:test]
|
17
|
+
|
18
|
+
Rake::TestTask.new do |t|
|
19
|
+
t.test_files = FileList["test/test_*.rb"]
|
20
|
+
t.warning = true
|
21
|
+
t.verbose = true
|
22
|
+
end
|
23
|
+
|
24
|
+
begin
|
25
|
+
require 'rcov/rcovtask'
|
26
|
+
# NOTE: this will not do anything until you add some tests
|
27
|
+
desc "Create a cross-referenced code coverage report"
|
28
|
+
Rcov::RcovTask.new do |t|
|
29
|
+
t.test_files = FileList['test/test*.rb']
|
30
|
+
t.ruby_opts << "-Ilib" # in order to use this rcov
|
31
|
+
t.rcov_opts << "--xrefs" # comment to disable cross-references
|
32
|
+
t.verbose = true
|
33
|
+
end
|
34
|
+
rescue LoadError
|
35
|
+
# Rcov not available
|
36
|
+
end
|
37
|
+
|
38
|
+
Rake::RDocTask.new do |t|
|
39
|
+
t.rdoc_dir = 'doc'
|
40
|
+
t.rdoc_files.include 'lib/**/*.rb'
|
41
|
+
t.rdoc_files.include 'README', 'ChangeLog'
|
42
|
+
t.title = "#{PKG_NAME} documentation"
|
43
|
+
t.options += %w[--line-numbers --inline-source --tab-width 2]
|
44
|
+
t.main = 'README'
|
45
|
+
end
|
46
|
+
|
47
|
+
spec = Gem::Specification.new do |s|
|
48
|
+
s.name = PKG_NAME
|
49
|
+
s.version = PKG_VERSION
|
50
|
+
s.summary = %q{Ruby OLE library.}
|
51
|
+
s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
|
52
|
+
s.authors = ['Charles Lowe (fixed for homerun by Alex Le)']
|
53
|
+
s.email = %q{nworld3d@yahoo.com}
|
54
|
+
# s.homepage = %q{http://code.google.com/p/ruby-ole}
|
55
|
+
# s.rubyforge_project = %q{ruby-ole}
|
56
|
+
|
57
|
+
s.executables = ['oletool']
|
58
|
+
s.files = ['README', 'Rakefile', 'ChangeLog', 'data/propids.yaml']
|
59
|
+
s.files += FileList['lib/**/*.rb']
|
60
|
+
s.files += FileList['test/test_*.rb', 'test/*.doc']
|
61
|
+
s.files += FileList['test/oleWithDirs.ole', 'test/test_SummaryInformation']
|
62
|
+
s.files += FileList['bin/*']
|
63
|
+
s.test_files = FileList['test/test_*.rb']
|
64
|
+
|
65
|
+
s.has_rdoc = true
|
66
|
+
s.extra_rdoc_files = ['README', 'ChangeLog']
|
67
|
+
s.rdoc_options += [
|
68
|
+
'--main', 'README',
|
69
|
+
'--title', "#{PKG_NAME} documentation",
|
70
|
+
'--tab-width', '2'
|
71
|
+
]
|
72
|
+
end
|
73
|
+
|
74
|
+
Rake::GemPackageTask.new(spec) do |t|
|
75
|
+
t.gem_spec = spec
|
76
|
+
t.need_tar = false
|
77
|
+
t.need_zip = false
|
78
|
+
t.package_dir = 'build'
|
79
|
+
end
|
80
|
+
|
81
|
+
desc 'Run various benchmarks'
|
82
|
+
task :benchmark do
|
83
|
+
require 'benchmark'
|
84
|
+
require 'tempfile'
|
85
|
+
require 'ole/file_system'
|
86
|
+
|
87
|
+
# should probably add some read benchmarks too
|
88
|
+
def write_benchmark opts={}
|
89
|
+
files, size = opts[:files], opts[:size]
|
90
|
+
block_size = opts[:block_size] || 100_000
|
91
|
+
block = 0.chr * block_size
|
92
|
+
blocks, remaining = size.divmod block_size
|
93
|
+
remaining = 0.chr * remaining
|
94
|
+
Tempfile.open 'ole_storage_benchmark' do |temp|
|
95
|
+
Ole::Storage.open temp do |ole|
|
96
|
+
files.times do |i|
|
97
|
+
ole.file.open "file_#{i}", 'w' do |f|
|
98
|
+
blocks.times { f.write block }
|
99
|
+
f.write remaining
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
Benchmark.bm do |bm|
|
107
|
+
bm.report 'write_1mb_1x5' do
|
108
|
+
5.times { write_benchmark :files => 1, :size => 1_000_000 }
|
109
|
+
end
|
110
|
+
|
111
|
+
bm.report 'write_1mb_2x5' do
|
112
|
+
5.times { write_benchmark :files => 1_000, :size => 1_000 }
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
data/bin/oletool
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ole/storage'
|
5
|
+
|
6
|
+
def oletool
|
7
|
+
opts = {:verbose => false, :action => :tree}
|
8
|
+
op = OptionParser.new do |op|
|
9
|
+
op.banner = "Usage: oletool [options] [files]"
|
10
|
+
op.separator ''
|
11
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
12
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
13
|
+
op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype }
|
14
|
+
op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata }
|
15
|
+
op.separator ''
|
16
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
17
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
18
|
+
end
|
19
|
+
files = op.parse ARGV
|
20
|
+
if files.empty?
|
21
|
+
puts 'Must specify 1 or more msg files.'
|
22
|
+
puts op
|
23
|
+
exit 1
|
24
|
+
end
|
25
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
26
|
+
files.each do |file|
|
27
|
+
case opts[:action]
|
28
|
+
when :tree
|
29
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
30
|
+
when :repack
|
31
|
+
Ole::Storage.open file, 'rb+', &:repack
|
32
|
+
when :metadata
|
33
|
+
Ole::Storage.open(file) { |ole| y ole.meta_data.to_h }
|
34
|
+
when :mimetype
|
35
|
+
puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
oletool
|
data/data/propids.yaml
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
"{f29f85e0-4ff9-1068-ab91-08002b27b3d9}":
|
2
|
+
- FMTID_SummaryInformation
|
3
|
+
- 2: doc_title
|
4
|
+
3: doc_subject
|
5
|
+
4: doc_author
|
6
|
+
5: doc_keywords
|
7
|
+
6: doc_comments
|
8
|
+
7: doc_template
|
9
|
+
8: doc_last_author
|
10
|
+
9: doc_rev_number
|
11
|
+
10: doc_edit_time
|
12
|
+
11: doc_last_printed
|
13
|
+
12: doc_created_time
|
14
|
+
13: doc_last_saved_time
|
15
|
+
14: doc_page_count
|
16
|
+
15: doc_word_count
|
17
|
+
16: doc_char_count
|
18
|
+
18: doc_app_name
|
19
|
+
19: security
|
20
|
+
|
21
|
+
"{d5cdd502-2e9c-101b-9397-08002b2cf9ae}":
|
22
|
+
- FMTID_DocSummaryInfo
|
23
|
+
- 2: doc_category
|
24
|
+
3: doc_presentation_target
|
25
|
+
4: doc_byte_count
|
26
|
+
5: doc_line_count
|
27
|
+
6: doc_para_count
|
28
|
+
7: doc_slide_count
|
29
|
+
8: doc_note_count
|
30
|
+
9: doc_hidden_count
|
31
|
+
10: mmclips
|
32
|
+
11: scale_crop
|
33
|
+
12: heading_pairs
|
34
|
+
13: doc_part_titles
|
35
|
+
14: doc_manager
|
36
|
+
15: doc_company
|
37
|
+
16: links_up_to_date
|
38
|
+
|
39
|
+
"{d5cdd505-2e9c-101b-9397-08002b2cf9ae}":
|
40
|
+
- FMTID_UserDefinedProperties
|
41
|
+
- {}
|
42
|
+
|
43
|
+
# just dumped these all here. if i can confirm any of these
|
44
|
+
# better, i can update this file so they're recognized.
|
45
|
+
#0b63e343-9ccc-11d0-bcdb-00805fccce04
|
46
|
+
#0b63e350-9ccc-11d0-bcdb-00805fccce04 NetLibrary propset?
|
47
|
+
#31f400a0-fd07-11cf-b9bd-00aa003db18e ScriptInfo propset?
|
48
|
+
#49691c90-7e17-101a-a91c-08002b2ecda9 Query propset?
|
49
|
+
#560c36c0-503a-11cf-baa1-00004c752a9a
|
50
|
+
#70eb7a10-55d9-11cf-b75b-00aa0051fe20 HTMLInfo propset
|
51
|
+
#85ac0960-1819-11d1-896f-00805f053bab message propset?
|
52
|
+
#aa568eec-e0e5-11cf-8fda-00aa00a14f93 NNTP SummaryInformation propset?
|
53
|
+
#b725f130-47ef-101a-a5f1-02608c9eebac Storage propset
|
54
|
+
#c82bf596-b831-11d0-b733-00aa00a1ebd2 NetLibraryInfo propset
|
55
|
+
#c82bf597-b831-11d0-b733-00aa00a1ebd2 LinkInformation propset?
|
56
|
+
#d1b5d3f0-c0b3-11cf-9a92-00a0c908dbf1 LinkInformation propset?
|
data/lib/ole/base.rb
ADDED
@@ -0,0 +1,260 @@
|
|
1
|
+
# need IO::Mode
|
2
|
+
require 'ole/support'
|
3
|
+
|
4
|
+
#
|
5
|
+
# = Introduction
|
6
|
+
#
|
7
|
+
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
8
|
+
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
9
|
+
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
10
|
+
# no method to stream it.
|
11
|
+
#
|
12
|
+
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
13
|
+
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
14
|
+
# getting 16 bytes doesn't read the whole thing).
|
15
|
+
#
|
16
|
+
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
17
|
+
# a file.
|
18
|
+
#
|
19
|
+
# = Limitations
|
20
|
+
#
|
21
|
+
# * No buffering. by design at the moment. Intended for large reads
|
22
|
+
#
|
23
|
+
# = TODO
|
24
|
+
#
|
25
|
+
# On further reflection, this class is something of a joining/optimization of
|
26
|
+
# two separate IO classes. a SubfileIO, for providing access to a range within
|
27
|
+
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
28
|
+
# a bunch of io objects as a single unified whole.
|
29
|
+
#
|
30
|
+
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
31
|
+
# convert a whole mime message into an IO stream, that can be read from.
|
32
|
+
# It will just be the concatenation of a series of IO objects, corresponding to
|
33
|
+
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
34
|
+
# original message proper, or RangesIO as provided by the Attachment#data, that
|
35
|
+
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
36
|
+
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
37
|
+
# whole never exists as a single string in memory, as it does now. This is a
|
38
|
+
# fair bit of work to achieve, but generally useful I believe.
|
39
|
+
#
|
40
|
+
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
41
|
+
#
|
42
|
+
class RangesIO
|
43
|
+
attr_reader :io, :mode, :ranges, :size, :pos
|
44
|
+
# +io+:: the parent io object that we are wrapping.
|
45
|
+
# +mode+:: the mode to use
|
46
|
+
# +params+:: hash of params.
|
47
|
+
# * :ranges - byte offsets, either:
|
48
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
49
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
50
|
+
# (think the way String indexing works)
|
51
|
+
# * :close_parent - boolean to close parent when this object is closed
|
52
|
+
#
|
53
|
+
# NOTE: the +ranges+ can overlap.
|
54
|
+
def initialize io, mode='r', params={}
|
55
|
+
mode, params = 'r', mode if Hash === mode
|
56
|
+
ranges = params[:ranges]
|
57
|
+
@params = {:close_parent => false}.merge params
|
58
|
+
@mode = IO::Mode.new mode
|
59
|
+
@io = io
|
60
|
+
# initial position in the file
|
61
|
+
@pos = 0
|
62
|
+
self.ranges = ranges || [[0, io.size]]
|
63
|
+
# handle some mode flags
|
64
|
+
truncate 0 if @mode.truncate?
|
65
|
+
seek size if @mode.append?
|
66
|
+
end
|
67
|
+
|
68
|
+
# add block form. TODO add test for this
|
69
|
+
def self.open(*args, &block)
|
70
|
+
ranges_io = new(*args)
|
71
|
+
if block_given?
|
72
|
+
begin; yield ranges_io
|
73
|
+
ensure; ranges_io.close
|
74
|
+
end
|
75
|
+
else
|
76
|
+
ranges_io
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def ranges= ranges
|
81
|
+
# convert ranges to arrays. check for negative ranges?
|
82
|
+
ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
83
|
+
# combine ranges
|
84
|
+
if @params[:combine] == false
|
85
|
+
# might be useful for debugging...
|
86
|
+
@ranges = ranges
|
87
|
+
else
|
88
|
+
@ranges = []
|
89
|
+
next_pos = nil
|
90
|
+
ranges.each do |pos, len|
|
91
|
+
if next_pos == pos
|
92
|
+
@ranges.last[1] += len
|
93
|
+
next_pos += len
|
94
|
+
else
|
95
|
+
@ranges << [pos, len]
|
96
|
+
next_pos = pos + len
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
# calculate cumulative offsets from range sizes
|
101
|
+
@size = 0
|
102
|
+
@offsets = []
|
103
|
+
@ranges.each do |pos, len|
|
104
|
+
@offsets << @size
|
105
|
+
@size += len
|
106
|
+
end
|
107
|
+
self.pos = @pos
|
108
|
+
end
|
109
|
+
|
110
|
+
def pos= pos, whence=IO::SEEK_SET
|
111
|
+
case whence
|
112
|
+
when IO::SEEK_SET
|
113
|
+
when IO::SEEK_CUR
|
114
|
+
pos += @pos
|
115
|
+
when IO::SEEK_END
|
116
|
+
pos = @size + pos
|
117
|
+
else raise Errno::EINVAL
|
118
|
+
end
|
119
|
+
raise Errno::EINVAL unless (0..@size) === pos
|
120
|
+
@pos = pos
|
121
|
+
|
122
|
+
# do a binary search throuh @offsets to find the active range.
|
123
|
+
a, c, b = 0, 0, @offsets.length
|
124
|
+
while a < b
|
125
|
+
c = (a + b) / 2
|
126
|
+
pivot = @offsets[c]
|
127
|
+
if pos == pivot
|
128
|
+
@active = c
|
129
|
+
return
|
130
|
+
elsif pos < pivot
|
131
|
+
b = c
|
132
|
+
else
|
133
|
+
a = c + 1
|
134
|
+
end
|
135
|
+
end
|
136
|
+
|
137
|
+
@active = a - 1
|
138
|
+
end
|
139
|
+
|
140
|
+
alias seek :pos=
|
141
|
+
alias tell :pos
|
142
|
+
|
143
|
+
def rewind
|
144
|
+
seek 0
|
145
|
+
end
|
146
|
+
|
147
|
+
def close
|
148
|
+
@io.close if @params[:close_parent]
|
149
|
+
end
|
150
|
+
|
151
|
+
def eof?
|
152
|
+
@pos == @size
|
153
|
+
end
|
154
|
+
|
155
|
+
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
156
|
+
def read limit=nil
|
157
|
+
data = ''
|
158
|
+
return data if eof?
|
159
|
+
limit ||= size
|
160
|
+
pos, len = @ranges[@active]
|
161
|
+
diff = @pos - @offsets[@active]
|
162
|
+
pos += diff
|
163
|
+
len -= diff
|
164
|
+
loop do
|
165
|
+
@io.seek pos
|
166
|
+
if limit < len
|
167
|
+
s = @io.read(limit).to_s
|
168
|
+
@pos += s.length
|
169
|
+
data << s
|
170
|
+
break
|
171
|
+
end
|
172
|
+
s = @io.read(len).to_s
|
173
|
+
@pos += s.length
|
174
|
+
data << s
|
175
|
+
break if s.length != len
|
176
|
+
limit -= len
|
177
|
+
break if @active == @ranges.length - 1
|
178
|
+
@active += 1
|
179
|
+
pos, len = @ranges[@active]
|
180
|
+
end
|
181
|
+
data
|
182
|
+
end
|
183
|
+
|
184
|
+
# you may override this call to update @ranges and @size, if applicable.
|
185
|
+
def truncate size
|
186
|
+
raise NotImplementedError, 'truncate not supported'
|
187
|
+
end
|
188
|
+
|
189
|
+
# using explicit forward instead of an alias now for overriding.
|
190
|
+
# should override truncate.
|
191
|
+
def size= size
|
192
|
+
truncate size
|
193
|
+
end
|
194
|
+
|
195
|
+
def write data
|
196
|
+
return 0 if data.empty?
|
197
|
+
data_pos = 0
|
198
|
+
# if we don't have room, we can use the truncate hook to make more space.
|
199
|
+
if data.length > @size - @pos
|
200
|
+
begin
|
201
|
+
truncate @pos + data.length
|
202
|
+
rescue NotImplementedError
|
203
|
+
raise IOError, "unable to grow #{inspect} to write #{data.length} bytes"
|
204
|
+
end
|
205
|
+
end
|
206
|
+
pos, len = @ranges[@active]
|
207
|
+
diff = @pos - @offsets[@active]
|
208
|
+
pos += diff
|
209
|
+
len -= diff
|
210
|
+
loop do
|
211
|
+
@io.seek pos
|
212
|
+
if data_pos + len > data.length
|
213
|
+
chunk = data[data_pos..-1]
|
214
|
+
@io.write chunk
|
215
|
+
@pos += chunk.length
|
216
|
+
data_pos = data.length
|
217
|
+
break
|
218
|
+
end
|
219
|
+
@io.write data[data_pos, len]
|
220
|
+
@pos += len
|
221
|
+
data_pos += len
|
222
|
+
break if @active == @ranges.length - 1
|
223
|
+
@active += 1
|
224
|
+
pos, len = @ranges[@active]
|
225
|
+
end
|
226
|
+
data_pos
|
227
|
+
end
|
228
|
+
|
229
|
+
alias << write
|
230
|
+
|
231
|
+
# i can wrap it in a buffered io stream that
|
232
|
+
# provides gets, and appropriately handle pos,
|
233
|
+
# truncate. mostly added just to past the tests.
|
234
|
+
# FIXME
|
235
|
+
def gets
|
236
|
+
s = read 1024
|
237
|
+
i = s.index "\n"
|
238
|
+
self.pos -= s.length - (i+1)
|
239
|
+
s[0..i]
|
240
|
+
end
|
241
|
+
alias readline :gets
|
242
|
+
|
243
|
+
def inspect
|
244
|
+
"#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>"
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
|
249
|
+
# only really needed for the allocation table writes etc. maybe just use explicit modes
|
250
|
+
# for those
|
251
|
+
# better yet write a test that breaks before I fix it. added nodoc for the
|
252
|
+
# time being.
|
253
|
+
class RangesIONonResizeable < RangesIO # :nodoc:
|
254
|
+
def initialize io, mode='r', params={}
|
255
|
+
mode, params = 'r', mode if Hash === mode
|
256
|
+
flags = IO::Mode.new(mode).flags & ~IO::TRUNC
|
257
|
+
super io, flags, params
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|