keeguon-ruby-ole 1.2.11.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +20 -0
- data/ChangeLog +140 -0
- data/README +116 -0
- data/Rakefile +91 -0
- data/bin/oletool +40 -0
- data/lib/ole/base.rb +10 -0
- data/lib/ole/file_system.rb +7 -0
- data/lib/ole/ranges_io.rb +267 -0
- data/lib/ole/storage.rb +3 -0
- data/lib/ole/storage/base.rb +945 -0
- data/lib/ole/storage/file_system.rb +394 -0
- data/lib/ole/storage/meta_data.rb +150 -0
- data/lib/ole/storage/version.rb +8 -0
- data/lib/ole/support.rb +248 -0
- data/lib/ole/types.rb +2 -0
- data/lib/ole/types/base.rb +314 -0
- data/lib/ole/types/property_set.rb +202 -0
- data/ruby-ole.gemspec +32 -0
- data/test/oleWithDirs.ole +0 -0
- data/test/test.doc +0 -0
- data/test/test_SummaryInformation +0 -0
- data/test/test_filesystem.rb +932 -0
- data/test/test_mbat.rb +40 -0
- data/test/test_meta_data.rb +43 -0
- data/test/test_property_set.rb +40 -0
- data/test/test_ranges_io.rb +113 -0
- data/test/test_storage.rb +221 -0
- data/test/test_support.rb +155 -0
- data/test/test_types.rb +68 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 58fa07a027f1749169976efdfd28765389a64427
|
4
|
+
data.tar.gz: 2bab2adf00827a519565157b2ab29ff558f91bfd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 856d34198b96fb409778dac630f9ecc83a40eded7771e2551910a886be6ab2e2ce134e7fe3f4b71e6b2fd0968d3d4e509509d018791d1bb0168741196ebd6dfa
|
7
|
+
data.tar.gz: d76eab9e94ebf6cfa3c8f10af7d0e7ef4cf51fcf2090a6ec463dd2e2cdc4187457becfdef0c647f4fc63ba3089fceaa136d1c65e3487b362e53ac119773be5d5
|
data/COPYING
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007-2010 Charles Lowe
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
20
|
+
|
data/ChangeLog
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
== 1.2.11.7 / 2013-06-24
|
2
|
+
|
3
|
+
- Various encoding fixes to make tests pass on current rubies.
|
4
|
+
- Fix RangesIO#write behaviour when passed an encoded string (github #14,
|
5
|
+
romuloceccon).
|
6
|
+
- Fix Dirent#each_child attempting iteration on file children (github #13).
|
7
|
+
- Unused variable fixes to avoid warnings (github #12, kachick).
|
8
|
+
|
9
|
+
== 1.2.11.6 / 2012-12-10
|
10
|
+
|
11
|
+
- Fix breakage of writable IO stream detection on Windows (github #11).
|
12
|
+
|
13
|
+
== 1.2.11.5 / 2012-11-06
|
14
|
+
|
15
|
+
- Fix breakage of IO.parse_mode on Rubinius (issue #10).
|
16
|
+
- Make tests pass on rubinius (issue #11).
|
17
|
+
- Improve RangesIO test coverage.
|
18
|
+
- Don't warn when mbat_start is AVAIL instead of EOC (github #9).
|
19
|
+
|
20
|
+
== 1.2.11.4 / 2012-07-03
|
21
|
+
|
22
|
+
- Embed PropertySet meta data GUIDs and field lists, to avoid hitting the
|
23
|
+
filesystem and remove dependency on YAML.
|
24
|
+
- Update Rakefile to avoid warnings about both deprecated tasks and space
|
25
|
+
before parentheses.
|
26
|
+
- Remove Dirent#children=.
|
27
|
+
|
28
|
+
== 1.2.11.3 / 2012-02-25
|
29
|
+
|
30
|
+
- Various fixes for ruby 1.9.3 - handle iconv deprecation and
|
31
|
+
fix FileTime.from_time (github #7, brawnski).
|
32
|
+
- Avoid constant redefinition warning in gemspec.
|
33
|
+
|
34
|
+
== 1.2.11.2 / 2011-09-07
|
35
|
+
|
36
|
+
- Remove empty dirents after constructing tree (fixes non-contiguous
|
37
|
+
allocation table usage).
|
38
|
+
- Fix fallback Symbol#to_proc to match activesupport definition in case
|
39
|
+
we get loaded first (github #2, lazylester).
|
40
|
+
- Use method_defined? for fallback guards to support newer versions of
|
41
|
+
ruby (jocker).
|
42
|
+
- Add guard on FileTime#initialize to skip for newer versions of ruby.
|
43
|
+
Missing required methods, but optimization no longer relevant
|
44
|
+
anyway (github #4, sagmor).
|
45
|
+
|
46
|
+
== 1.2.11.1 / 2010-10-24
|
47
|
+
|
48
|
+
- Add gemspec and docs to packages so tests can pass again.
|
49
|
+
- Build tarballs again in package task.
|
50
|
+
|
51
|
+
== 1.2.11 / 2010-10-17
|
52
|
+
|
53
|
+
- Add COPYING for packaging (issue #7)
|
54
|
+
- Make tests pass using home_run (github #1)
|
55
|
+
- Make tests pass using mathn.
|
56
|
+
- Updates to suppress warnings on 1.9.2.
|
57
|
+
- Split out gemspec into separate file and use for Rakefile.
|
58
|
+
|
59
|
+
== 1.2.10.1 / 2010-03-19
|
60
|
+
|
61
|
+
- Avoid use of DateTime#new! (issue #4)
|
62
|
+
- Remove warning about unexpected root names (issue #5)
|
63
|
+
|
64
|
+
== 1.2.10 / 2009-07-20
|
65
|
+
|
66
|
+
- Mostly more performance enhancements, significantly faster for
|
67
|
+
certain operations.
|
68
|
+
- Using lots of files is faster due to new hash lookup for dirents by name.
|
69
|
+
- Writes of many files are faster now too as Dirent & FileTime serialization
|
70
|
+
has been improved.
|
71
|
+
- Certain operations from the filesystem api have been profiled and sped up.
|
72
|
+
- Don't use syswrite on jruby to avoid the buffered stream warnings.
|
73
|
+
|
74
|
+
== 1.2.9 / 2009-07-14
|
75
|
+
|
76
|
+
- Lots of performance enhancements for RangesIO.
|
77
|
+
|
78
|
+
== 1.2.8.2 / 2009-01-01
|
79
|
+
|
80
|
+
- Update code to support ruby 1.9.1
|
81
|
+
|
82
|
+
== 1.2.8.1 / 2008-10-22
|
83
|
+
|
84
|
+
- Fix a couple of breakages when using $KCODE = 'UTF8'
|
85
|
+
|
86
|
+
== 1.2.8 / 2008-10-08
|
87
|
+
|
88
|
+
- Check in the new fixes to the mbat support.
|
89
|
+
- Update README to be a bit more useful.
|
90
|
+
|
91
|
+
== 1.2.7 / 2008-08-12
|
92
|
+
|
93
|
+
- Prepare Ole::Types::PropertySet for write support.
|
94
|
+
- Introduce Ole::Storage#meta_data as an easy interface to meta data stored
|
95
|
+
within various property sets.
|
96
|
+
- Add new --metadata action to oletool to dump said metadata.
|
97
|
+
- Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type
|
98
|
+
function to try to guess mime type of a file based on some simple heuristics.
|
99
|
+
- Restructure project files a bit, and pull in file_system & meta_data support
|
100
|
+
by default.
|
101
|
+
- More tests - now have 100% coverage.
|
102
|
+
|
103
|
+
== 1.2.6 / 2008-07-21
|
104
|
+
|
105
|
+
- Fix FileClass#expand_path to work properly on darwin (issue #2)
|
106
|
+
- Guard against Enumerable#sum clash with active support (issue #3)
|
107
|
+
|
108
|
+
== 1.2.5 / 2008-02-16
|
109
|
+
|
110
|
+
- Make all tests pass on ruby 1.9.
|
111
|
+
|
112
|
+
== 1.2.4 / 2008-01-09
|
113
|
+
|
114
|
+
- Make all tests pass on windows (issue #1).
|
115
|
+
- Make all tests pass on a power pc (running ubuntu).
|
116
|
+
- Property set convenience access functions.
|
117
|
+
|
118
|
+
== 1.2.3 / 2007-12-28
|
119
|
+
|
120
|
+
- MBAT write support re-implmented. Can now write files over ~8mb again.
|
121
|
+
- Minor fixes (truncation in #flush, file modification timestamps)
|
122
|
+
- More test coverage
|
123
|
+
- Initial (read-only) property set support.
|
124
|
+
- Complete filesystem api, to pass most of the rubyzip tests.
|
125
|
+
- Add a ChangeLog :).
|
126
|
+
|
127
|
+
== 1.2.2 / 2007-11-05
|
128
|
+
|
129
|
+
- Lots of test updates, 90% coverage.
|
130
|
+
- Fix +to_tree+ method to be more efficient, and stream output.
|
131
|
+
- Optimizations from benchmarks and profiling, mostly for writes. Fixed
|
132
|
+
AllocationTable#resize_chain, RangesIOResizable#truncate and
|
133
|
+
AllocationTable#free_block.
|
134
|
+
- Add in filesystem test file from rubyzip, and start working on a
|
135
|
+
filesystem api.
|
136
|
+
|
137
|
+
== 1.2.1 / 2007-08-20
|
138
|
+
|
139
|
+
- Separate out from ruby-msg as new project.
|
140
|
+
|
data/README
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
= Introduction
|
2
|
+
|
3
|
+
The ruby-ole library provides a variety of functions primarily for
|
4
|
+
working with OLE2 structured storage files, such as those produced by
|
5
|
+
Microsoft Office - eg *.doc, *.msg etc.
|
6
|
+
|
7
|
+
= Example Usage
|
8
|
+
|
9
|
+
Here are some examples of how to use the library functionality,
|
10
|
+
categorised roughly by purpose.
|
11
|
+
|
12
|
+
1. Reading and writing files within an OLE container
|
13
|
+
|
14
|
+
The recommended way to manipulate the contents is via the
|
15
|
+
"file_system" API, whereby you use Ole::Storage instance methods
|
16
|
+
similar to the regular File and Dir class methods.
|
17
|
+
|
18
|
+
ole = Ole::Storage.open('oleWithDirs.ole', 'rb+')
|
19
|
+
p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"]
|
20
|
+
p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'"
|
21
|
+
ole.dir.mkdir('newdir')
|
22
|
+
|
23
|
+
2. Accessing OLE meta data
|
24
|
+
|
25
|
+
Some convenience functions are provided for (currently read only)
|
26
|
+
access to OLE property sets and other sources of meta data.
|
27
|
+
|
28
|
+
ole = Ole::Storage.open('test_word_95.doc')
|
29
|
+
p ole.meta_data.file_format # => "MSWordDoc"
|
30
|
+
p ole.meta_data.mime_type # => "application/msword"
|
31
|
+
p ole.meta_data.doc_author.split.first # => "Charles"
|
32
|
+
|
33
|
+
3. Raw access to underlying OLE internals
|
34
|
+
|
35
|
+
This is probably of little interest to most developers using the
|
36
|
+
library, but for some use cases you may need to drop down to the
|
37
|
+
lower level API on which the "file_system" API is constructed,
|
38
|
+
which exposes more of the format details.
|
39
|
+
|
40
|
+
<tt>Ole::Storage</tt> files can have multiple files with the same name,
|
41
|
+
or with a slash in the name, and other things that are probably
|
42
|
+
strictly invalid. This API is the only way to access those files.
|
43
|
+
|
44
|
+
You can access the header object directly:
|
45
|
+
|
46
|
+
p ole.header.num_sbat # => 1
|
47
|
+
p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"]
|
48
|
+
|
49
|
+
You can directly access the array of all Dirent objects,
|
50
|
+
including the root:
|
51
|
+
|
52
|
+
p ole.dirents.length # => 5
|
53
|
+
puts ole.root.to_tree
|
54
|
+
# =>
|
55
|
+
- #<Dirent:"Root Entry">
|
56
|
+
|- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
|
57
|
+
|- #<Dirent:"\001CompObj" size=98 data="\001\000\376\377\003...">
|
58
|
+
|- #<Dirent:"WordDocument" size=2574 data="\334\245e\000-...">
|
59
|
+
\- #<Dirent:"\005SummaryInformation" size=54788 data="\376\377\000\000\001...">
|
60
|
+
|
61
|
+
You can access (through RangesIO methods, or by using the
|
62
|
+
relevant Dirent and AllocationTable methods) information like where within
|
63
|
+
the container a stream is located (these are offset/length pairs):
|
64
|
+
|
65
|
+
p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]]
|
66
|
+
|
67
|
+
See the documentation for each class for more details.
|
68
|
+
|
69
|
+
= Thanks
|
70
|
+
|
71
|
+
* The code contained in this project was initially based on chicago's libole
|
72
|
+
(source available at http://prdownloads.sf.net/chicago/ole.tgz).
|
73
|
+
|
74
|
+
* It was later augmented with some corrections by inspecting pole, and (purely
|
75
|
+
for header definitions) gsf.
|
76
|
+
|
77
|
+
* The property set parsing code came from the apache java project POIFS.
|
78
|
+
|
79
|
+
* The excellent idea for using a pseudo file system style interface by providing
|
80
|
+
#file and #dir methods which mimic File and Dir, was borrowed (along with almost
|
81
|
+
unchanged tests!) from Thomas Sondergaard's rubyzip.
|
82
|
+
|
83
|
+
= TODO
|
84
|
+
|
85
|
+
== 1.2.12
|
86
|
+
|
87
|
+
* internal api cleanup
|
88
|
+
* add buffering to rangesio so that performance for small reads and writes
|
89
|
+
isn't so awful. maybe try and remove the bottlenecks of unbuffered first
|
90
|
+
with more profiling, then implement the buffering on top of that.
|
91
|
+
* fix mode strings - like truncate when using 'w+', supporting append
|
92
|
+
'a+' modes etc. done?
|
93
|
+
* make ranges io obey readable vs writeable modes.
|
94
|
+
* more RangesIO completion. ie, doesn't support #<< at the moment.
|
95
|
+
* maybe some oletool doc.
|
96
|
+
* make sure `rake test' runs tests both with $KCODE='UTF8', and without,
|
97
|
+
and maybe ensure i don't regress on 1.9 and jruby either now that they're
|
98
|
+
fixed.
|
99
|
+
|
100
|
+
== 1.3.1
|
101
|
+
|
102
|
+
* case insensitive open mode would be nice
|
103
|
+
* fix property sets a bit more. see TODO in Ole::Storage::MetaData
|
104
|
+
* ability to zero out padding and unused blocks
|
105
|
+
* better tests for mbat support.
|
106
|
+
* further doc cleanup
|
107
|
+
* add in place testing for jruby and ruby1.9
|
108
|
+
|
109
|
+
== Longer term
|
110
|
+
|
111
|
+
* more benchmarking, profiling, and speed fixes. was thinking vs other
|
112
|
+
ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other
|
113
|
+
ole implementations (maybe perl's, and poifs) just to check its in the
|
114
|
+
ballpark, with no remaining silly bottlenecks.
|
115
|
+
* supposedly vba does something weird to ole files. test that.
|
116
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
require 'rbconfig'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
spec = eval File.read('ruby-ole.gemspec')
|
8
|
+
|
9
|
+
task :default => [:test]
|
10
|
+
|
11
|
+
Rake::TestTask.new do |t|
|
12
|
+
t.test_files = FileList["test/test_*.rb"]
|
13
|
+
t.warning = true
|
14
|
+
t.verbose = true
|
15
|
+
end
|
16
|
+
|
17
|
+
begin
|
18
|
+
require 'rcov/rcovtask'
|
19
|
+
# NOTE: this will not do anything until you add some tests
|
20
|
+
desc "Create a cross-referenced code coverage report"
|
21
|
+
Rcov::RcovTask.new do |t|
|
22
|
+
t.test_files = FileList['test/test*.rb']
|
23
|
+
t.ruby_opts << "-Ilib" # in order to use this rcov
|
24
|
+
t.rcov_opts << "--xrefs" # comment to disable cross-references
|
25
|
+
t.verbose = true
|
26
|
+
end
|
27
|
+
rescue LoadError
|
28
|
+
# Rcov not available
|
29
|
+
end
|
30
|
+
|
31
|
+
begin
|
32
|
+
require 'rdoc/task'
|
33
|
+
RDoc::Task.new do |t|
|
34
|
+
t.rdoc_dir = 'doc'
|
35
|
+
t.rdoc_files.include 'lib/**/*.rb'
|
36
|
+
t.rdoc_files.include 'README', 'ChangeLog'
|
37
|
+
t.title = "#{PKG_NAME} documentation"
|
38
|
+
t.options += %w[--line-numbers --inline-source --tab-width 2]
|
39
|
+
t.main = 'README'
|
40
|
+
end
|
41
|
+
rescue LoadError
|
42
|
+
# RDoc not available or too old (<2.4.2)
|
43
|
+
end
|
44
|
+
|
45
|
+
begin
|
46
|
+
require 'rubygems/package_task'
|
47
|
+
Gem::PackageTask.new(spec) do |t|
|
48
|
+
t.need_tar = true
|
49
|
+
t.need_zip = false
|
50
|
+
t.package_dir = 'build'
|
51
|
+
end
|
52
|
+
rescue LoadError
|
53
|
+
# RubyGems too old (<1.3.2)
|
54
|
+
end
|
55
|
+
|
56
|
+
desc 'Run various benchmarks'
|
57
|
+
task :benchmark do
|
58
|
+
require 'benchmark'
|
59
|
+
require 'tempfile'
|
60
|
+
require 'ole/storage'
|
61
|
+
|
62
|
+
# should probably add some read benchmarks too
|
63
|
+
def write_benchmark opts={}
|
64
|
+
files, size = opts[:files], opts[:size]
|
65
|
+
block_size = opts[:block_size] || 100_000
|
66
|
+
block = 0.chr * block_size
|
67
|
+
blocks, remaining = size.divmod block_size
|
68
|
+
remaining = 0.chr * remaining
|
69
|
+
Tempfile.open 'ole_storage_benchmark' do |temp|
|
70
|
+
Ole::Storage.open temp do |ole|
|
71
|
+
files.times do |i|
|
72
|
+
ole.file.open "file_#{i}", 'w' do |f|
|
73
|
+
blocks.times { f.write block }
|
74
|
+
f.write remaining
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
Benchmark.bm do |bm|
|
82
|
+
bm.report 'write_1mb_1x5' do
|
83
|
+
5.times { write_benchmark :files => 1, :size => 1_000_000 }
|
84
|
+
end
|
85
|
+
|
86
|
+
bm.report 'write_1mb_2x5' do
|
87
|
+
5.times { write_benchmark :files => 1_000, :size => 1_000 }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
data/bin/oletool
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ole/storage'
|
5
|
+
|
6
|
+
def oletool
|
7
|
+
opts = {:verbose => false, :action => :tree}
|
8
|
+
op = OptionParser.new do |op|
|
9
|
+
op.banner = "Usage: oletool [options] [files]"
|
10
|
+
op.separator ''
|
11
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
12
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
13
|
+
op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype }
|
14
|
+
op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata }
|
15
|
+
op.separator ''
|
16
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
17
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
18
|
+
end
|
19
|
+
files = op.parse ARGV
|
20
|
+
if files.empty?
|
21
|
+
puts 'Must specify 1 or more msg files.'
|
22
|
+
puts op
|
23
|
+
exit 1
|
24
|
+
end
|
25
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
26
|
+
files.each do |file|
|
27
|
+
case opts[:action]
|
28
|
+
when :tree
|
29
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
30
|
+
when :repack
|
31
|
+
Ole::Storage.open file, 'rb+', &:repack
|
32
|
+
when :metadata
|
33
|
+
Ole::Storage.open(file) { |ole| y ole.meta_data.to_h }
|
34
|
+
when :mimetype
|
35
|
+
puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
oletool
|
data/lib/ole/base.rb
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
# encoding: ASCII-8BIT
|
2
|
+
|
3
|
+
# need Ole::IOMode
|
4
|
+
require 'ole/support'
|
5
|
+
|
6
|
+
#
|
7
|
+
# = Introduction
|
8
|
+
#
|
9
|
+
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
10
|
+
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
11
|
+
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
12
|
+
# no method to stream it.
|
13
|
+
#
|
14
|
+
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
15
|
+
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
16
|
+
# getting 16 bytes doesn't read the whole thing).
|
17
|
+
#
|
18
|
+
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
19
|
+
# a file.
|
20
|
+
#
|
21
|
+
# = Limitations
|
22
|
+
#
|
23
|
+
# * No buffering. by design at the moment. Intended for large reads
|
24
|
+
#
|
25
|
+
# = TODO
|
26
|
+
#
|
27
|
+
# On further reflection, this class is something of a joining/optimization of
|
28
|
+
# two separate IO classes. a SubfileIO, for providing access to a range within
|
29
|
+
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
30
|
+
# a bunch of io objects as a single unified whole.
|
31
|
+
#
|
32
|
+
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
33
|
+
# convert a whole mime message into an IO stream, that can be read from.
|
34
|
+
# It will just be the concatenation of a series of IO objects, corresponding to
|
35
|
+
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
36
|
+
# original message proper, or RangesIO as provided by the Attachment#data, that
|
37
|
+
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
38
|
+
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
39
|
+
# whole never exists as a single string in memory, as it does now. This is a
|
40
|
+
# fair bit of work to achieve, but generally useful I believe.
|
41
|
+
#
|
42
|
+
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
43
|
+
#
|
44
|
+
class RangesIO
|
45
|
+
attr_reader :io, :mode, :ranges, :size, :pos
|
46
|
+
# +io+:: the parent io object that we are wrapping.
|
47
|
+
# +mode+:: the mode to use
|
48
|
+
# +params+:: hash of params.
|
49
|
+
# * :ranges - byte offsets, either:
|
50
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
51
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
52
|
+
# (think the way String indexing works)
|
53
|
+
# * :close_parent - boolean to close parent when this object is closed
|
54
|
+
#
|
55
|
+
# NOTE: the +ranges+ can overlap.
|
56
|
+
def initialize io, mode='r', params={}
|
57
|
+
mode, params = 'r', mode if Hash === mode
|
58
|
+
ranges = params[:ranges]
|
59
|
+
@params = {:close_parent => false}.merge params
|
60
|
+
@mode = Ole::IOMode.new mode
|
61
|
+
@io = io
|
62
|
+
# initial position in the file
|
63
|
+
@pos = 0
|
64
|
+
self.ranges = ranges || [[0, io.size]]
|
65
|
+
# handle some mode flags
|
66
|
+
truncate 0 if @mode.truncate?
|
67
|
+
seek size if @mode.append?
|
68
|
+
end
|
69
|
+
|
70
|
+
# add block form. TODO add test for this
|
71
|
+
def self.open(*args, &block)
|
72
|
+
ranges_io = new(*args)
|
73
|
+
if block_given?
|
74
|
+
begin; yield ranges_io
|
75
|
+
ensure; ranges_io.close
|
76
|
+
end
|
77
|
+
else
|
78
|
+
ranges_io
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def ranges= ranges
|
83
|
+
# convert ranges to arrays. check for negative ranges?
|
84
|
+
ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
85
|
+
# combine ranges
|
86
|
+
if @params[:combine] == false
|
87
|
+
# might be useful for debugging...
|
88
|
+
@ranges = ranges
|
89
|
+
else
|
90
|
+
@ranges = []
|
91
|
+
next_pos = nil
|
92
|
+
ranges.each do |pos, len|
|
93
|
+
if next_pos == pos
|
94
|
+
@ranges.last[1] += len
|
95
|
+
next_pos += len
|
96
|
+
else
|
97
|
+
@ranges << [pos, len]
|
98
|
+
next_pos = pos + len
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
# calculate cumulative offsets from range sizes
|
103
|
+
@size = 0
|
104
|
+
@offsets = []
|
105
|
+
@ranges.each do |pos, len|
|
106
|
+
@offsets << @size
|
107
|
+
@size += len
|
108
|
+
end
|
109
|
+
self.pos = @pos
|
110
|
+
end
|
111
|
+
|
112
|
+
def pos= pos, whence=IO::SEEK_SET
|
113
|
+
case whence
|
114
|
+
when IO::SEEK_SET
|
115
|
+
when IO::SEEK_CUR
|
116
|
+
pos += @pos
|
117
|
+
when IO::SEEK_END
|
118
|
+
pos = @size + pos
|
119
|
+
else raise Errno::EINVAL
|
120
|
+
end
|
121
|
+
raise Errno::EINVAL unless (0..@size) === pos
|
122
|
+
@pos = pos
|
123
|
+
|
124
|
+
# do a binary search throuh @offsets to find the active range.
|
125
|
+
a, c, b = 0, 0, @offsets.length
|
126
|
+
while a < b
|
127
|
+
c = (a + b).div(2)
|
128
|
+
pivot = @offsets[c]
|
129
|
+
if pos == pivot
|
130
|
+
@active = c
|
131
|
+
return
|
132
|
+
elsif pos < pivot
|
133
|
+
b = c
|
134
|
+
else
|
135
|
+
a = c + 1
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
@active = a - 1
|
140
|
+
end
|
141
|
+
|
142
|
+
alias seek :pos=
|
143
|
+
alias tell :pos
|
144
|
+
|
145
|
+
def rewind
|
146
|
+
seek 0
|
147
|
+
end
|
148
|
+
|
149
|
+
def close
|
150
|
+
@io.close if @params[:close_parent]
|
151
|
+
end
|
152
|
+
|
153
|
+
def eof?
|
154
|
+
@pos == @size
|
155
|
+
end
|
156
|
+
|
157
|
+
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
158
|
+
def read limit=nil
|
159
|
+
data = ''
|
160
|
+
return data if eof?
|
161
|
+
limit ||= size
|
162
|
+
pos, len = @ranges[@active]
|
163
|
+
diff = @pos - @offsets[@active]
|
164
|
+
pos += diff
|
165
|
+
len -= diff
|
166
|
+
loop do
|
167
|
+
@io.seek pos
|
168
|
+
if limit < len
|
169
|
+
s = @io.read(limit).to_s
|
170
|
+
@pos += s.length
|
171
|
+
data << s
|
172
|
+
break
|
173
|
+
end
|
174
|
+
s = @io.read(len).to_s
|
175
|
+
@pos += s.length
|
176
|
+
data << s
|
177
|
+
break if s.length != len
|
178
|
+
limit -= len
|
179
|
+
break if @active == @ranges.length - 1
|
180
|
+
@active += 1
|
181
|
+
pos, len = @ranges[@active]
|
182
|
+
end
|
183
|
+
data
|
184
|
+
end
|
185
|
+
|
186
|
+
# you may override this call to update @ranges and @size, if applicable.
|
187
|
+
def truncate size
|
188
|
+
raise NotImplementedError, 'truncate not supported'
|
189
|
+
end
|
190
|
+
|
191
|
+
# using explicit forward instead of an alias now for overriding.
|
192
|
+
# should override truncate.
|
193
|
+
def size= size
|
194
|
+
truncate size
|
195
|
+
end
|
196
|
+
|
197
|
+
def write data
|
198
|
+
# duplicates object to avoid side effects for the caller, but do so only if
|
199
|
+
# encoding isn't already ASCII-8BIT (slight optimization)
|
200
|
+
if data.respond_to?(:encoding) and data.encoding != Encoding::ASCII_8BIT
|
201
|
+
data = data.dup.force_encoding(Encoding::ASCII_8BIT)
|
202
|
+
end
|
203
|
+
return 0 if data.empty?
|
204
|
+
data_pos = 0
|
205
|
+
# if we don't have room, we can use the truncate hook to make more space.
|
206
|
+
if data.length > @size - @pos
|
207
|
+
begin
|
208
|
+
truncate @pos + data.length
|
209
|
+
rescue NotImplementedError
|
210
|
+
raise IOError, "unable to grow #{inspect} to write #{data.length} bytes"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
pos, len = @ranges[@active]
|
214
|
+
diff = @pos - @offsets[@active]
|
215
|
+
pos += diff
|
216
|
+
len -= diff
|
217
|
+
loop do
|
218
|
+
@io.seek pos
|
219
|
+
if data_pos + len > data.length
|
220
|
+
chunk = data[data_pos..-1]
|
221
|
+
@io.write chunk
|
222
|
+
@pos += chunk.length
|
223
|
+
data_pos = data.length
|
224
|
+
break
|
225
|
+
end
|
226
|
+
@io.write data[data_pos, len]
|
227
|
+
@pos += len
|
228
|
+
data_pos += len
|
229
|
+
break if @active == @ranges.length - 1
|
230
|
+
@active += 1
|
231
|
+
pos, len = @ranges[@active]
|
232
|
+
end
|
233
|
+
data_pos
|
234
|
+
end
|
235
|
+
|
236
|
+
alias << write
|
237
|
+
|
238
|
+
# i can wrap it in a buffered io stream that
|
239
|
+
# provides gets, and appropriately handle pos,
|
240
|
+
# truncate. mostly added just to past the tests.
|
241
|
+
# FIXME
|
242
|
+
def gets
|
243
|
+
s = read 1024
|
244
|
+
i = s.index "\n"
|
245
|
+
self.pos -= s.length - (i+1)
|
246
|
+
s[0..i]
|
247
|
+
end
|
248
|
+
alias readline :gets
|
249
|
+
|
250
|
+
def inspect
|
251
|
+
"#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>"
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
|
256
|
+
# only really needed for the allocation table writes etc. maybe just use explicit modes
|
257
|
+
# for those
|
258
|
+
# better yet write a test that breaks before I fix it. added nodoc for the
|
259
|
+
# time being.
|
260
|
+
class RangesIONonResizeable < RangesIO # :nodoc:
|
261
|
+
def initialize io, mode='r', params={}
|
262
|
+
mode, params = 'r', mode if Hash === mode
|
263
|
+
flags = Ole::IOMode.new(mode).flags & ~IO::TRUNC
|
264
|
+
super io, flags, params
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|