keeguon-ruby-ole 1.2.11.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING +20 -0
- data/ChangeLog +140 -0
- data/README +116 -0
- data/Rakefile +91 -0
- data/bin/oletool +40 -0
- data/lib/ole/base.rb +10 -0
- data/lib/ole/file_system.rb +7 -0
- data/lib/ole/ranges_io.rb +267 -0
- data/lib/ole/storage.rb +3 -0
- data/lib/ole/storage/base.rb +945 -0
- data/lib/ole/storage/file_system.rb +394 -0
- data/lib/ole/storage/meta_data.rb +150 -0
- data/lib/ole/storage/version.rb +8 -0
- data/lib/ole/support.rb +248 -0
- data/lib/ole/types.rb +2 -0
- data/lib/ole/types/base.rb +314 -0
- data/lib/ole/types/property_set.rb +202 -0
- data/ruby-ole.gemspec +32 -0
- data/test/oleWithDirs.ole +0 -0
- data/test/test.doc +0 -0
- data/test/test_SummaryInformation +0 -0
- data/test/test_filesystem.rb +932 -0
- data/test/test_mbat.rb +40 -0
- data/test/test_meta_data.rb +43 -0
- data/test/test_property_set.rb +40 -0
- data/test/test_ranges_io.rb +113 -0
- data/test/test_storage.rb +221 -0
- data/test/test_support.rb +155 -0
- data/test/test_types.rb +68 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +92 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 58fa07a027f1749169976efdfd28765389a64427
|
4
|
+
data.tar.gz: 2bab2adf00827a519565157b2ab29ff558f91bfd
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 856d34198b96fb409778dac630f9ecc83a40eded7771e2551910a886be6ab2e2ce134e7fe3f4b71e6b2fd0968d3d4e509509d018791d1bb0168741196ebd6dfa
|
7
|
+
data.tar.gz: d76eab9e94ebf6cfa3c8f10af7d0e7ef4cf51fcf2090a6ec463dd2e2cdc4187457becfdef0c647f4fc63ba3089fceaa136d1c65e3487b362e53ac119773be5d5
|
data/COPYING
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2007-2010 Charles Lowe
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4
|
+
of this software and associated documentation files (the "Software"), to deal
|
5
|
+
in the Software without restriction, including without limitation the rights
|
6
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7
|
+
copies of the Software, and to permit persons to whom the Software is
|
8
|
+
furnished to do so, subject to the following conditions:
|
9
|
+
|
10
|
+
The above copyright notice and this permission notice shall be included in
|
11
|
+
all copies or substantial portions of the Software.
|
12
|
+
|
13
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19
|
+
THE SOFTWARE.
|
20
|
+
|
data/ChangeLog
ADDED
@@ -0,0 +1,140 @@
|
|
1
|
+
== 1.2.11.7 / 2013-06-24
|
2
|
+
|
3
|
+
- Various encoding fixes to make tests pass on current rubies.
|
4
|
+
- Fix RangesIO#write behaviour when passed an encoded string (github #14,
|
5
|
+
romuloceccon).
|
6
|
+
- Fix Dirent#each_child attempting iteration on file children (github #13).
|
7
|
+
- Unused variable fixes to avoid warnings (github #12, kachick).
|
8
|
+
|
9
|
+
== 1.2.11.6 / 2012-12-10
|
10
|
+
|
11
|
+
- Fix breakage of writable IO stream detection on Windows (github #11).
|
12
|
+
|
13
|
+
== 1.2.11.5 / 2012-11-06
|
14
|
+
|
15
|
+
- Fix breakage of IO.parse_mode on Rubinius (issue #10).
|
16
|
+
- Make tests pass on rubinius (issue #11).
|
17
|
+
- Improve RangesIO test coverage.
|
18
|
+
- Don't warn when mbat_start is AVAIL instead of EOC (github #9).
|
19
|
+
|
20
|
+
== 1.2.11.4 / 2012-07-03
|
21
|
+
|
22
|
+
- Embed PropertySet meta data GUIDs and field lists, to avoid hitting the
|
23
|
+
filesystem and remove dependency on YAML.
|
24
|
+
- Update Rakefile to avoid warnings about both deprecated tasks and space
|
25
|
+
before parentheses.
|
26
|
+
- Remove Dirent#children=.
|
27
|
+
|
28
|
+
== 1.2.11.3 / 2012-02-25
|
29
|
+
|
30
|
+
- Various fixes for ruby 1.9.3 - handle iconv deprecation and
|
31
|
+
fix FileTime.from_time (github #7, brawnski).
|
32
|
+
- Avoid constant redefinition warning in gemspec.
|
33
|
+
|
34
|
+
== 1.2.11.2 / 2011-09-07
|
35
|
+
|
36
|
+
- Remove empty dirents after constructing tree (fixes non-contiguous
|
37
|
+
allocation table usage).
|
38
|
+
- Fix fallback Symbol#to_proc to match activesupport definition in case
|
39
|
+
we get loaded first (github #2, lazylester).
|
40
|
+
- Use method_defined? for fallback guards to support newer versions of
|
41
|
+
ruby (jocker).
|
42
|
+
- Add guard on FileTime#initialize to skip for newer versions of ruby.
|
43
|
+
Missing required methods, but optimization no longer relevant
|
44
|
+
anyway (github #4, sagmor).
|
45
|
+
|
46
|
+
== 1.2.11.1 / 2010-10-24
|
47
|
+
|
48
|
+
- Add gemspec and docs to packages so tests can pass again.
|
49
|
+
- Build tarballs again in package task.
|
50
|
+
|
51
|
+
== 1.2.11 / 2010-10-17
|
52
|
+
|
53
|
+
- Add COPYING for packaging (issue #7)
|
54
|
+
- Make tests pass using home_run (github #1)
|
55
|
+
- Make tests pass using mathn.
|
56
|
+
- Updates to suppress warnings on 1.9.2.
|
57
|
+
- Split out gemspec into separate file and use for Rakefile.
|
58
|
+
|
59
|
+
== 1.2.10.1 / 2010-03-19
|
60
|
+
|
61
|
+
- Avoid use of DateTime#new! (issue #4)
|
62
|
+
- Remove warning about unexpected root names (issue #5)
|
63
|
+
|
64
|
+
== 1.2.10 / 2009-07-20
|
65
|
+
|
66
|
+
- Mostly more performance enhancements, significantly faster for
|
67
|
+
certain operations.
|
68
|
+
- Using lots of files is faster due to new hash lookup for dirents by name.
|
69
|
+
- Writes of many files are faster now too as Dirent & FileTime serialization
|
70
|
+
has been improved.
|
71
|
+
- Certain operations from the filesystem api have been profiled and sped up.
|
72
|
+
- Don't use syswrite on jruby to avoid the buffered stream warnings.
|
73
|
+
|
74
|
+
== 1.2.9 / 2009-07-14
|
75
|
+
|
76
|
+
- Lots of performance enhancements for RangesIO.
|
77
|
+
|
78
|
+
== 1.2.8.2 / 2009-01-01
|
79
|
+
|
80
|
+
- Update code to support ruby 1.9.1
|
81
|
+
|
82
|
+
== 1.2.8.1 / 2008-10-22
|
83
|
+
|
84
|
+
- Fix a couple of breakages when using $KCODE = 'UTF8'
|
85
|
+
|
86
|
+
== 1.2.8 / 2008-10-08
|
87
|
+
|
88
|
+
- Check in the new fixes to the mbat support.
|
89
|
+
- Update README to be a bit more useful.
|
90
|
+
|
91
|
+
== 1.2.7 / 2008-08-12
|
92
|
+
|
93
|
+
- Prepare Ole::Types::PropertySet for write support.
|
94
|
+
- Introduce Ole::Storage#meta_data as an easy interface to meta data stored
|
95
|
+
within various property sets.
|
96
|
+
- Add new --metadata action to oletool to dump said metadata.
|
97
|
+
- Add new --mimetype action to oletool, and corresponding Ole::Storage#mime_type
|
98
|
+
function to try to guess mime type of a file based on some simple heuristics.
|
99
|
+
- Restructure project files a bit, and pull in file_system & meta_data support
|
100
|
+
by default.
|
101
|
+
- More tests - now have 100% coverage.
|
102
|
+
|
103
|
+
== 1.2.6 / 2008-07-21
|
104
|
+
|
105
|
+
- Fix FileClass#expand_path to work properly on darwin (issue #2)
|
106
|
+
- Guard against Enumerable#sum clash with active support (issue #3)
|
107
|
+
|
108
|
+
== 1.2.5 / 2008-02-16
|
109
|
+
|
110
|
+
- Make all tests pass on ruby 1.9.
|
111
|
+
|
112
|
+
== 1.2.4 / 2008-01-09
|
113
|
+
|
114
|
+
- Make all tests pass on windows (issue #1).
|
115
|
+
- Make all tests pass on a power pc (running ubuntu).
|
116
|
+
- Property set convenience access functions.
|
117
|
+
|
118
|
+
== 1.2.3 / 2007-12-28
|
119
|
+
|
120
|
+
- MBAT write support re-implmented. Can now write files over ~8mb again.
|
121
|
+
- Minor fixes (truncation in #flush, file modification timestamps)
|
122
|
+
- More test coverage
|
123
|
+
- Initial (read-only) property set support.
|
124
|
+
- Complete filesystem api, to pass most of the rubyzip tests.
|
125
|
+
- Add a ChangeLog :).
|
126
|
+
|
127
|
+
== 1.2.2 / 2007-11-05
|
128
|
+
|
129
|
+
- Lots of test updates, 90% coverage.
|
130
|
+
- Fix +to_tree+ method to be more efficient, and stream output.
|
131
|
+
- Optimizations from benchmarks and profiling, mostly for writes. Fixed
|
132
|
+
AllocationTable#resize_chain, RangesIOResizable#truncate and
|
133
|
+
AllocationTable#free_block.
|
134
|
+
- Add in filesystem test file from rubyzip, and start working on a
|
135
|
+
filesystem api.
|
136
|
+
|
137
|
+
== 1.2.1 / 2007-08-20
|
138
|
+
|
139
|
+
- Separate out from ruby-msg as new project.
|
140
|
+
|
data/README
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
= Introduction
|
2
|
+
|
3
|
+
The ruby-ole library provides a variety of functions primarily for
|
4
|
+
working with OLE2 structured storage files, such as those produced by
|
5
|
+
Microsoft Office - eg *.doc, *.msg etc.
|
6
|
+
|
7
|
+
= Example Usage
|
8
|
+
|
9
|
+
Here are some examples of how to use the library functionality,
|
10
|
+
categorised roughly by purpose.
|
11
|
+
|
12
|
+
1. Reading and writing files within an OLE container
|
13
|
+
|
14
|
+
The recommended way to manipulate the contents is via the
|
15
|
+
"file_system" API, whereby you use Ole::Storage instance methods
|
16
|
+
similar to the regular File and Dir class methods.
|
17
|
+
|
18
|
+
ole = Ole::Storage.open('oleWithDirs.ole', 'rb+')
|
19
|
+
p ole.dir.entries('.') # => [".", "..", "dir1", "dir2", "file1"]
|
20
|
+
p ole.file.read('file1')[0, 25] # => "this is the entry 'file1'"
|
21
|
+
ole.dir.mkdir('newdir')
|
22
|
+
|
23
|
+
2. Accessing OLE meta data
|
24
|
+
|
25
|
+
Some convenience functions are provided for (currently read only)
|
26
|
+
access to OLE property sets and other sources of meta data.
|
27
|
+
|
28
|
+
ole = Ole::Storage.open('test_word_95.doc')
|
29
|
+
p ole.meta_data.file_format # => "MSWordDoc"
|
30
|
+
p ole.meta_data.mime_type # => "application/msword"
|
31
|
+
p ole.meta_data.doc_author.split.first # => "Charles"
|
32
|
+
|
33
|
+
3. Raw access to underlying OLE internals
|
34
|
+
|
35
|
+
This is probably of little interest to most developers using the
|
36
|
+
library, but for some use cases you may need to drop down to the
|
37
|
+
lower level API on which the "file_system" API is constructed,
|
38
|
+
which exposes more of the format details.
|
39
|
+
|
40
|
+
<tt>Ole::Storage</tt> files can have multiple files with the same name,
|
41
|
+
or with a slash in the name, and other things that are probably
|
42
|
+
strictly invalid. This API is the only way to access those files.
|
43
|
+
|
44
|
+
You can access the header object directly:
|
45
|
+
|
46
|
+
p ole.header.num_sbat # => 1
|
47
|
+
p ole.header.magic.unpack('H*') # => ["d0cf11e0a1b11ae1"]
|
48
|
+
|
49
|
+
You can directly access the array of all Dirent objects,
|
50
|
+
including the root:
|
51
|
+
|
52
|
+
p ole.dirents.length # => 5
|
53
|
+
puts ole.root.to_tree
|
54
|
+
# =>
|
55
|
+
- #<Dirent:"Root Entry">
|
56
|
+
|- #<Dirent:"\001Ole" size=20 data="\001\000\000\002\000...">
|
57
|
+
|- #<Dirent:"\001CompObj" size=98 data="\001\000\376\377\003...">
|
58
|
+
|- #<Dirent:"WordDocument" size=2574 data="\334\245e\000-...">
|
59
|
+
\- #<Dirent:"\005SummaryInformation" size=54788 data="\376\377\000\000\001...">
|
60
|
+
|
61
|
+
You can access (through RangesIO methods, or by using the
|
62
|
+
relevant Dirent and AllocationTable methods) information like where within
|
63
|
+
the container a stream is located (these are offset/length pairs):
|
64
|
+
|
65
|
+
p ole.root["\001CompObj"].open { |io| io.ranges } # => [[0, 64], [64, 34]]
|
66
|
+
|
67
|
+
See the documentation for each class for more details.
|
68
|
+
|
69
|
+
= Thanks
|
70
|
+
|
71
|
+
* The code contained in this project was initially based on chicago's libole
|
72
|
+
(source available at http://prdownloads.sf.net/chicago/ole.tgz).
|
73
|
+
|
74
|
+
* It was later augmented with some corrections by inspecting pole, and (purely
|
75
|
+
for header definitions) gsf.
|
76
|
+
|
77
|
+
* The property set parsing code came from the apache java project POIFS.
|
78
|
+
|
79
|
+
* The excellent idea for using a pseudo file system style interface by providing
|
80
|
+
#file and #dir methods which mimic File and Dir, was borrowed (along with almost
|
81
|
+
unchanged tests!) from Thomas Sondergaard's rubyzip.
|
82
|
+
|
83
|
+
= TODO
|
84
|
+
|
85
|
+
== 1.2.12
|
86
|
+
|
87
|
+
* internal api cleanup
|
88
|
+
* add buffering to rangesio so that performance for small reads and writes
|
89
|
+
isn't so awful. maybe try and remove the bottlenecks of unbuffered first
|
90
|
+
with more profiling, then implement the buffering on top of that.
|
91
|
+
* fix mode strings - like truncate when using 'w+', supporting append
|
92
|
+
'a+' modes etc. done?
|
93
|
+
* make ranges io obey readable vs writeable modes.
|
94
|
+
* more RangesIO completion. ie, doesn't support #<< at the moment.
|
95
|
+
* maybe some oletool doc.
|
96
|
+
* make sure `rake test' runs tests both with $KCODE='UTF8', and without,
|
97
|
+
and maybe ensure i don't regress on 1.9 and jruby either now that they're
|
98
|
+
fixed.
|
99
|
+
|
100
|
+
== 1.3.1
|
101
|
+
|
102
|
+
* case insensitive open mode would be nice
|
103
|
+
* fix property sets a bit more. see TODO in Ole::Storage::MetaData
|
104
|
+
* ability to zero out padding and unused blocks
|
105
|
+
* better tests for mbat support.
|
106
|
+
* further doc cleanup
|
107
|
+
* add in place testing for jruby and ruby1.9
|
108
|
+
|
109
|
+
== Longer term
|
110
|
+
|
111
|
+
* more benchmarking, profiling, and speed fixes. was thinking vs other
|
112
|
+
ruby filesystems (eg, vs File/Dir itself, and vs rubyzip), and vs other
|
113
|
+
ole implementations (maybe perl's, and poifs) just to check its in the
|
114
|
+
ballpark, with no remaining silly bottlenecks.
|
115
|
+
* supposedly vba does something weird to ole files. test that.
|
116
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake/testtask'
|
3
|
+
|
4
|
+
require 'rbconfig'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
spec = eval File.read('ruby-ole.gemspec')
|
8
|
+
|
9
|
+
task :default => [:test]
|
10
|
+
|
11
|
+
Rake::TestTask.new do |t|
|
12
|
+
t.test_files = FileList["test/test_*.rb"]
|
13
|
+
t.warning = true
|
14
|
+
t.verbose = true
|
15
|
+
end
|
16
|
+
|
17
|
+
begin
|
18
|
+
require 'rcov/rcovtask'
|
19
|
+
# NOTE: this will not do anything until you add some tests
|
20
|
+
desc "Create a cross-referenced code coverage report"
|
21
|
+
Rcov::RcovTask.new do |t|
|
22
|
+
t.test_files = FileList['test/test*.rb']
|
23
|
+
t.ruby_opts << "-Ilib" # in order to use this rcov
|
24
|
+
t.rcov_opts << "--xrefs" # comment to disable cross-references
|
25
|
+
t.verbose = true
|
26
|
+
end
|
27
|
+
rescue LoadError
|
28
|
+
# Rcov not available
|
29
|
+
end
|
30
|
+
|
31
|
+
begin
|
32
|
+
require 'rdoc/task'
|
33
|
+
RDoc::Task.new do |t|
|
34
|
+
t.rdoc_dir = 'doc'
|
35
|
+
t.rdoc_files.include 'lib/**/*.rb'
|
36
|
+
t.rdoc_files.include 'README', 'ChangeLog'
|
37
|
+
t.title = "#{PKG_NAME} documentation"
|
38
|
+
t.options += %w[--line-numbers --inline-source --tab-width 2]
|
39
|
+
t.main = 'README'
|
40
|
+
end
|
41
|
+
rescue LoadError
|
42
|
+
# RDoc not available or too old (<2.4.2)
|
43
|
+
end
|
44
|
+
|
45
|
+
begin
|
46
|
+
require 'rubygems/package_task'
|
47
|
+
Gem::PackageTask.new(spec) do |t|
|
48
|
+
t.need_tar = true
|
49
|
+
t.need_zip = false
|
50
|
+
t.package_dir = 'build'
|
51
|
+
end
|
52
|
+
rescue LoadError
|
53
|
+
# RubyGems too old (<1.3.2)
|
54
|
+
end
|
55
|
+
|
56
|
+
desc 'Run various benchmarks'
|
57
|
+
task :benchmark do
|
58
|
+
require 'benchmark'
|
59
|
+
require 'tempfile'
|
60
|
+
require 'ole/storage'
|
61
|
+
|
62
|
+
# should probably add some read benchmarks too
|
63
|
+
def write_benchmark opts={}
|
64
|
+
files, size = opts[:files], opts[:size]
|
65
|
+
block_size = opts[:block_size] || 100_000
|
66
|
+
block = 0.chr * block_size
|
67
|
+
blocks, remaining = size.divmod block_size
|
68
|
+
remaining = 0.chr * remaining
|
69
|
+
Tempfile.open 'ole_storage_benchmark' do |temp|
|
70
|
+
Ole::Storage.open temp do |ole|
|
71
|
+
files.times do |i|
|
72
|
+
ole.file.open "file_#{i}", 'w' do |f|
|
73
|
+
blocks.times { f.write block }
|
74
|
+
f.write remaining
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
Benchmark.bm do |bm|
|
82
|
+
bm.report 'write_1mb_1x5' do
|
83
|
+
5.times { write_benchmark :files => 1, :size => 1_000_000 }
|
84
|
+
end
|
85
|
+
|
86
|
+
bm.report 'write_1mb_2x5' do
|
87
|
+
5.times { write_benchmark :files => 1_000, :size => 1_000 }
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
data/bin/oletool
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'ole/storage'
|
5
|
+
|
6
|
+
def oletool
|
7
|
+
opts = {:verbose => false, :action => :tree}
|
8
|
+
op = OptionParser.new do |op|
|
9
|
+
op.banner = "Usage: oletool [options] [files]"
|
10
|
+
op.separator ''
|
11
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
12
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
13
|
+
op.on('-m', '--mimetype', 'Print the guessed mime types') { opts[:action] = :mimetype }
|
14
|
+
op.on('-y', '--metadata', 'Dump the internal meta data as YAML') { opts[:action] = :metadata }
|
15
|
+
op.separator ''
|
16
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
17
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
18
|
+
end
|
19
|
+
files = op.parse ARGV
|
20
|
+
if files.empty?
|
21
|
+
puts 'Must specify 1 or more msg files.'
|
22
|
+
puts op
|
23
|
+
exit 1
|
24
|
+
end
|
25
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
26
|
+
files.each do |file|
|
27
|
+
case opts[:action]
|
28
|
+
when :tree
|
29
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
30
|
+
when :repack
|
31
|
+
Ole::Storage.open file, 'rb+', &:repack
|
32
|
+
when :metadata
|
33
|
+
Ole::Storage.open(file) { |ole| y ole.meta_data.to_h }
|
34
|
+
when :mimetype
|
35
|
+
puts Ole::Storage.open(file) { |ole| ole.meta_data.mime_type }
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
oletool
|
data/lib/ole/base.rb
ADDED
@@ -0,0 +1,267 @@
|
|
1
|
+
# encoding: ASCII-8BIT
|
2
|
+
|
3
|
+
# need Ole::IOMode
|
4
|
+
require 'ole/support'
|
5
|
+
|
6
|
+
#
|
7
|
+
# = Introduction
|
8
|
+
#
|
9
|
+
# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
|
10
|
+
# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
|
11
|
+
# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
|
12
|
+
# no method to stream it.
|
13
|
+
#
|
14
|
+
# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
|
15
|
+
# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
|
16
|
+
# getting 16 bytes doesn't read the whole thing).
|
17
|
+
#
|
18
|
+
# In the simplest case it can be used with a single range to provide a limited io to a section of
|
19
|
+
# a file.
|
20
|
+
#
|
21
|
+
# = Limitations
|
22
|
+
#
|
23
|
+
# * No buffering. by design at the moment. Intended for large reads
|
24
|
+
#
|
25
|
+
# = TODO
|
26
|
+
#
|
27
|
+
# On further reflection, this class is something of a joining/optimization of
|
28
|
+
# two separate IO classes. a SubfileIO, for providing access to a range within
|
29
|
+
# a File as a separate IO object, and a ConcatIO, allowing the presentation of
|
30
|
+
# a bunch of io objects as a single unified whole.
|
31
|
+
#
|
32
|
+
# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
|
33
|
+
# convert a whole mime message into an IO stream, that can be read from.
|
34
|
+
# It will just be the concatenation of a series of IO objects, corresponding to
|
35
|
+
# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
|
36
|
+
# original message proper, or RangesIO as provided by the Attachment#data, that
|
37
|
+
# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
|
38
|
+
# fly. Thus the attachment, in its plain or encoded form, and the message as a
|
39
|
+
# whole never exists as a single string in memory, as it does now. This is a
|
40
|
+
# fair bit of work to achieve, but generally useful I believe.
|
41
|
+
#
|
42
|
+
# This class isn't ole specific, maybe move it to my general ruby stream project.
|
43
|
+
#
|
44
|
+
class RangesIO
|
45
|
+
attr_reader :io, :mode, :ranges, :size, :pos
|
46
|
+
# +io+:: the parent io object that we are wrapping.
|
47
|
+
# +mode+:: the mode to use
|
48
|
+
# +params+:: hash of params.
|
49
|
+
# * :ranges - byte offsets, either:
|
50
|
+
# 1. an array of ranges [1..2, 4..5, 6..8] or
|
51
|
+
# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
|
52
|
+
# (think the way String indexing works)
|
53
|
+
# * :close_parent - boolean to close parent when this object is closed
|
54
|
+
#
|
55
|
+
# NOTE: the +ranges+ can overlap.
|
56
|
+
def initialize io, mode='r', params={}
|
57
|
+
mode, params = 'r', mode if Hash === mode
|
58
|
+
ranges = params[:ranges]
|
59
|
+
@params = {:close_parent => false}.merge params
|
60
|
+
@mode = Ole::IOMode.new mode
|
61
|
+
@io = io
|
62
|
+
# initial position in the file
|
63
|
+
@pos = 0
|
64
|
+
self.ranges = ranges || [[0, io.size]]
|
65
|
+
# handle some mode flags
|
66
|
+
truncate 0 if @mode.truncate?
|
67
|
+
seek size if @mode.append?
|
68
|
+
end
|
69
|
+
|
70
|
+
# add block form. TODO add test for this
|
71
|
+
def self.open(*args, &block)
|
72
|
+
ranges_io = new(*args)
|
73
|
+
if block_given?
|
74
|
+
begin; yield ranges_io
|
75
|
+
ensure; ranges_io.close
|
76
|
+
end
|
77
|
+
else
|
78
|
+
ranges_io
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def ranges= ranges
|
83
|
+
# convert ranges to arrays. check for negative ranges?
|
84
|
+
ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
|
85
|
+
# combine ranges
|
86
|
+
if @params[:combine] == false
|
87
|
+
# might be useful for debugging...
|
88
|
+
@ranges = ranges
|
89
|
+
else
|
90
|
+
@ranges = []
|
91
|
+
next_pos = nil
|
92
|
+
ranges.each do |pos, len|
|
93
|
+
if next_pos == pos
|
94
|
+
@ranges.last[1] += len
|
95
|
+
next_pos += len
|
96
|
+
else
|
97
|
+
@ranges << [pos, len]
|
98
|
+
next_pos = pos + len
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
# calculate cumulative offsets from range sizes
|
103
|
+
@size = 0
|
104
|
+
@offsets = []
|
105
|
+
@ranges.each do |pos, len|
|
106
|
+
@offsets << @size
|
107
|
+
@size += len
|
108
|
+
end
|
109
|
+
self.pos = @pos
|
110
|
+
end
|
111
|
+
|
112
|
+
def pos= pos, whence=IO::SEEK_SET
|
113
|
+
case whence
|
114
|
+
when IO::SEEK_SET
|
115
|
+
when IO::SEEK_CUR
|
116
|
+
pos += @pos
|
117
|
+
when IO::SEEK_END
|
118
|
+
pos = @size + pos
|
119
|
+
else raise Errno::EINVAL
|
120
|
+
end
|
121
|
+
raise Errno::EINVAL unless (0..@size) === pos
|
122
|
+
@pos = pos
|
123
|
+
|
124
|
+
# do a binary search throuh @offsets to find the active range.
|
125
|
+
a, c, b = 0, 0, @offsets.length
|
126
|
+
while a < b
|
127
|
+
c = (a + b).div(2)
|
128
|
+
pivot = @offsets[c]
|
129
|
+
if pos == pivot
|
130
|
+
@active = c
|
131
|
+
return
|
132
|
+
elsif pos < pivot
|
133
|
+
b = c
|
134
|
+
else
|
135
|
+
a = c + 1
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
139
|
+
@active = a - 1
|
140
|
+
end
|
141
|
+
|
142
|
+
alias seek :pos=
|
143
|
+
alias tell :pos
|
144
|
+
|
145
|
+
def rewind
|
146
|
+
seek 0
|
147
|
+
end
|
148
|
+
|
149
|
+
def close
|
150
|
+
@io.close if @params[:close_parent]
|
151
|
+
end
|
152
|
+
|
153
|
+
def eof?
|
154
|
+
@pos == @size
|
155
|
+
end
|
156
|
+
|
157
|
+
# read bytes from file, to a maximum of +limit+, or all available if unspecified.
|
158
|
+
def read limit=nil
|
159
|
+
data = ''
|
160
|
+
return data if eof?
|
161
|
+
limit ||= size
|
162
|
+
pos, len = @ranges[@active]
|
163
|
+
diff = @pos - @offsets[@active]
|
164
|
+
pos += diff
|
165
|
+
len -= diff
|
166
|
+
loop do
|
167
|
+
@io.seek pos
|
168
|
+
if limit < len
|
169
|
+
s = @io.read(limit).to_s
|
170
|
+
@pos += s.length
|
171
|
+
data << s
|
172
|
+
break
|
173
|
+
end
|
174
|
+
s = @io.read(len).to_s
|
175
|
+
@pos += s.length
|
176
|
+
data << s
|
177
|
+
break if s.length != len
|
178
|
+
limit -= len
|
179
|
+
break if @active == @ranges.length - 1
|
180
|
+
@active += 1
|
181
|
+
pos, len = @ranges[@active]
|
182
|
+
end
|
183
|
+
data
|
184
|
+
end
|
185
|
+
|
186
|
+
# you may override this call to update @ranges and @size, if applicable.
|
187
|
+
def truncate size
|
188
|
+
raise NotImplementedError, 'truncate not supported'
|
189
|
+
end
|
190
|
+
|
191
|
+
# using explicit forward instead of an alias now for overriding.
|
192
|
+
# should override truncate.
|
193
|
+
def size= size
|
194
|
+
truncate size
|
195
|
+
end
|
196
|
+
|
197
|
+
def write data
|
198
|
+
# duplicates object to avoid side effects for the caller, but do so only if
|
199
|
+
# encoding isn't already ASCII-8BIT (slight optimization)
|
200
|
+
if data.respond_to?(:encoding) and data.encoding != Encoding::ASCII_8BIT
|
201
|
+
data = data.dup.force_encoding(Encoding::ASCII_8BIT)
|
202
|
+
end
|
203
|
+
return 0 if data.empty?
|
204
|
+
data_pos = 0
|
205
|
+
# if we don't have room, we can use the truncate hook to make more space.
|
206
|
+
if data.length > @size - @pos
|
207
|
+
begin
|
208
|
+
truncate @pos + data.length
|
209
|
+
rescue NotImplementedError
|
210
|
+
raise IOError, "unable to grow #{inspect} to write #{data.length} bytes"
|
211
|
+
end
|
212
|
+
end
|
213
|
+
pos, len = @ranges[@active]
|
214
|
+
diff = @pos - @offsets[@active]
|
215
|
+
pos += diff
|
216
|
+
len -= diff
|
217
|
+
loop do
|
218
|
+
@io.seek pos
|
219
|
+
if data_pos + len > data.length
|
220
|
+
chunk = data[data_pos..-1]
|
221
|
+
@io.write chunk
|
222
|
+
@pos += chunk.length
|
223
|
+
data_pos = data.length
|
224
|
+
break
|
225
|
+
end
|
226
|
+
@io.write data[data_pos, len]
|
227
|
+
@pos += len
|
228
|
+
data_pos += len
|
229
|
+
break if @active == @ranges.length - 1
|
230
|
+
@active += 1
|
231
|
+
pos, len = @ranges[@active]
|
232
|
+
end
|
233
|
+
data_pos
|
234
|
+
end
|
235
|
+
|
236
|
+
alias << write
|
237
|
+
|
238
|
+
# i can wrap it in a buffered io stream that
|
239
|
+
# provides gets, and appropriately handle pos,
|
240
|
+
# truncate. mostly added just to past the tests.
|
241
|
+
# FIXME
|
242
|
+
def gets
|
243
|
+
s = read 1024
|
244
|
+
i = s.index "\n"
|
245
|
+
self.pos -= s.length - (i+1)
|
246
|
+
s[0..i]
|
247
|
+
end
|
248
|
+
alias readline :gets
|
249
|
+
|
250
|
+
def inspect
|
251
|
+
"#<#{self.class} io=#{io.inspect}, size=#{@size}, pos=#{@pos}>"
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
# this subclass of ranges io explicitly ignores the truncate part of 'w' modes.
|
256
|
+
# only really needed for the allocation table writes etc. maybe just use explicit modes
|
257
|
+
# for those
|
258
|
+
# better yet write a test that breaks before I fix it. added nodoc for the
|
259
|
+
# time being.
|
260
|
+
class RangesIONonResizeable < RangesIO # :nodoc:
|
261
|
+
def initialize io, mode='r', params={}
|
262
|
+
mode, params = 'r', mode if Hash === mode
|
263
|
+
flags = Ole::IOMode.new(mode).flags & ~IO::TRUNC
|
264
|
+
super io, flags, params
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|