parseexcel 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README +87 -0
- data/lib/parseexcel/format.rb +109 -0
- data/lib/parseexcel/olestorage.rb +355 -0
- data/lib/parseexcel/parseexcel.rb +14 -0
- data/lib/parseexcel/parser.rb +1032 -0
- data/lib/parseexcel/workbook.rb +70 -0
- data/lib/parseexcel/worksheet.rb +152 -0
- data/test/data/annotation.xls +0 -0
- data/test/data/bar.xls +0 -0
- data/test/data/comment.5.0.xls +0 -0
- data/test/data/comment.xls +0 -0
- data/test/data/dates.xls +0 -0
- data/test/data/float.5.0.xls +0 -0
- data/test/data/float.xls +0 -0
- data/test/data/foo.xls +0 -0
- data/test/data/image.xls +0 -0
- data/test/data/nil.xls +0 -0
- data/test/data/umlaut.5.0.xls +0 -0
- data/test/data/umlaut.biff8.xls +0 -0
- data/test/data/uncompressed.str.xls +0 -0
- data/test/suite.rb +9 -0
- data/test/test_format.rb +63 -0
- data/test/test_olestorage.rb +175 -0
- data/test/test_parser.rb +733 -0
- data/test/test_workbook.rb +70 -0
- data/test/test_worksheet.rb +183 -0
- metadata +72 -0
data/README
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
Spreadsheet::ParseExcel - Get information from an Excel file.
|
2
|
+
============
|
3
|
+
Version: 0.5.1
|
4
|
+
Date: 2006-05-18
|
5
|
+
|
6
|
+
Short Description:
|
7
|
+
Spreadsheet::ParseExcel allows you to get information out of a
|
8
|
+
simple Excel file
|
9
|
+
This Package is an - as of today incomplete - translation of
|
10
|
+
Kawai Takanoris Perl-Module.
|
11
|
+
|
12
|
+
|
13
|
+
Requirements
|
14
|
+
------------
|
15
|
+
|
16
|
+
* ruby 1.8
|
17
|
+
|
18
|
+
Install
|
19
|
+
-------
|
20
|
+
|
21
|
+
De-Compress archive and enter its top directory.
|
22
|
+
Then type:
|
23
|
+
|
24
|
+
$ ruby setup.rb config
|
25
|
+
$ ruby setup.rb setup
|
26
|
+
($ su)
|
27
|
+
# ruby setup.rb install
|
28
|
+
|
29
|
+
You can also install files into your favorite directory
|
30
|
+
by supplying setup.rb some options. Try "ruby setup.rb --help".
|
31
|
+
|
32
|
+
|
33
|
+
Usage
|
34
|
+
-----
|
35
|
+
#!/usr/bin/env ruby
|
36
|
+
|
37
|
+
require 'parseexcel/parser'
|
38
|
+
|
39
|
+
# your first step is always reading in the file.
|
40
|
+
# that gives you a workbook-object, which has one or more worksheets,
|
41
|
+
# just like in Excel you have the possibility of multiple worksheets.
|
42
|
+
workbook = Spreadsheet::ParseExcel.parse(path_to_file)
|
43
|
+
|
44
|
+
# usually, you want the first worksheet:
|
45
|
+
worksheet = workbook.worksheet(0)
|
46
|
+
|
47
|
+
# now you can either iterate over all rows, skipping the first number of
|
48
|
+
# rows (in case you know they just contain column headers)
|
49
|
+
skip = 2
|
50
|
+
worksheet.each(skip) { |row|
|
51
|
+
# a row is actually just an Array of Cells..
|
52
|
+
first_cell = row.at(0)
|
53
|
+
|
54
|
+
# how you get data out of the cell depends on what datatype you
|
55
|
+
# expect:
|
56
|
+
|
57
|
+
# if you expect a String, you can pass an encoding and (iconv
|
58
|
+
# required) the content of the cell will be converted.
|
59
|
+
str = row.at(1).to_s('latin1')
|
60
|
+
|
61
|
+
# if you expect a Float:
|
62
|
+
float = row.at(2).to_f
|
63
|
+
|
64
|
+
# if you expect an Integer:
|
65
|
+
int = row.at(3).to_i
|
66
|
+
|
67
|
+
# if you expect a Date:
|
68
|
+
date = row.at(4).date
|
69
|
+
|
70
|
+
# ParseExcel makes a guess at what Datatype a cell has. At the moment,
|
71
|
+
# possible values are: :date, :numeric, :text
|
72
|
+
celltype = first_cell.type
|
73
|
+
}
|
74
|
+
|
75
|
+
# if you know exactly which row your data resides in, you may just
|
76
|
+
# retrieve that row, which is again simply an Array of Cells
|
77
|
+
row = worksheet.row(26)
|
78
|
+
|
79
|
+
|
80
|
+
License
|
81
|
+
-------
|
82
|
+
|
83
|
+
LGPL
|
84
|
+
|
85
|
+
|
86
|
+
URL: http://download.ywesee.com/parseexcel
|
87
|
+
Author: Hannes Wyss <hwyss@ywesee.com>
|
@@ -0,0 +1,109 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Spreadsheet::ParseExcel -- Extract Data from an Excel File
|
4
|
+
# Copyright (C) 2003 ywesee -- intellectual capital connected
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2.1 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
|
21
|
+
# hwyss@ywesee.com
|
22
|
+
#
|
23
|
+
# Format -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
|
24
|
+
|
25
|
+
module Spreadsheet
|
26
|
+
module ParseExcel
|
27
|
+
class Format
|
28
|
+
@@fmt_strs = {
|
29
|
+
0x00 => '@',
|
30
|
+
0x01 => '0',
|
31
|
+
0x02 => '0.00',
|
32
|
+
0x03 => '#,##0',
|
33
|
+
0x04 => '#,##0.00',
|
34
|
+
0x05 => '($#,##0_);($#,##0)',
|
35
|
+
0x06 => '($#,##0_);[RED]($#,##0)',
|
36
|
+
0x07 => '($#,##0.00_);($#,##0.00_)',
|
37
|
+
0x08 => '($#,##0.00_);[RED]($#,##0.00_)',
|
38
|
+
0x09 => '0%',
|
39
|
+
0x0A => '0.00%',
|
40
|
+
0x0B => '0.00E+00',
|
41
|
+
0x0C => '# ?/?',
|
42
|
+
0x0D => '# ??/??',
|
43
|
+
0x0E => 'm-d-yy',
|
44
|
+
0x0F => 'd-mmm-yy',
|
45
|
+
0x10 => 'd-mmm',
|
46
|
+
0x11 => 'mmm-yy',
|
47
|
+
0x12 => 'h:mm AM/PM',
|
48
|
+
0x13 => 'h:mm:ss AM/PM',
|
49
|
+
0x14 => 'h:mm',
|
50
|
+
0x15 => 'h:mm:ss',
|
51
|
+
0x16 => 'm-d-yy h:mm',
|
52
|
+
#0x17-0x24 -- Differs in Natinal
|
53
|
+
0x25 => '(#,##0_);(#,##0)',
|
54
|
+
0x26 => '(#,##0_);[RED](#,##0)',
|
55
|
+
0x27 => '(#,##0.00);(#,##0.00)',
|
56
|
+
0x28 => '(#,##0.00);[RED](#,##0.00)',
|
57
|
+
0x29 => '_(*#,##0_);_(*(#,##0);_(*"-"_);_(@_)',
|
58
|
+
0x2A => '_($*#,##0_);_($*(#,##0);_(*"-"_);_(@_)',
|
59
|
+
0x2B => '_(*#,##0.00_);_(*(#,##0.00);_(*"-"??_);_(@_)',
|
60
|
+
0x2C => '_($*#,##0.00_);_($*(#,##0.00);_(*"-"??_);_(@_)',
|
61
|
+
0x2D => 'mm:ss',
|
62
|
+
0x2E => '[h]:mm:ss',
|
63
|
+
0x2F => 'mm:ss.0',
|
64
|
+
0x30 => '##0.0E+0',
|
65
|
+
0x31 => '@',
|
66
|
+
}
|
67
|
+
attr_accessor :font_no, :fmt_idx, :lock, :hidden, :style, :key_123
|
68
|
+
attr_accessor :align_h, :wrap, :align_v, :just_last, :rotate, :indent
|
69
|
+
attr_accessor :shrink, :merge, :read_dir, :encoding
|
70
|
+
attr_accessor :border_style, :border_color, :border_diag, :fill
|
71
|
+
@@date_pattern = /(\0?d\0?d|\0?m\0?m|\0?y\0?y|\0?h|\0?s\0?s)/i
|
72
|
+
def initialize(params={})
|
73
|
+
params.each { |key, val|
|
74
|
+
mthd = key.to_s + '='
|
75
|
+
if(self.respond_to?(mthd))
|
76
|
+
self.send(mthd, val)
|
77
|
+
end
|
78
|
+
}
|
79
|
+
end
|
80
|
+
def add_text_format(idx, fmt_str)
|
81
|
+
@@fmt_strs.store(idx, fmt_str)
|
82
|
+
end
|
83
|
+
def cell_type(cell)
|
84
|
+
if(cell.numeric)
|
85
|
+
if([0x0E..0x16, 0x2D..0x2F].any? { |range| range.include?(@fmt_idx.to_i) })
|
86
|
+
:date
|
87
|
+
elsif((fmt = @@fmt_strs[@fmt_idx]) && @@date_pattern.match(fmt))
|
88
|
+
:date
|
89
|
+
else
|
90
|
+
:numeric
|
91
|
+
end
|
92
|
+
else
|
93
|
+
:text
|
94
|
+
end
|
95
|
+
end
|
96
|
+
def text_format(str, code=:_native_)
|
97
|
+
(code == :_native_) ? str : str.unpack('n*').pack('C*')
|
98
|
+
end
|
99
|
+
def to_s(target_encoding=nil)
|
100
|
+
fmt_str = @@fmt_strs[@fmt_idx].to_s
|
101
|
+
if(target_encoding)
|
102
|
+
Iconv.new(target_encoding, @encoding).iconv(fmt_str)
|
103
|
+
else
|
104
|
+
fmt_str.dup
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
@@ -0,0 +1,355 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Spreadsheet::ParseExcel -- Extract Data from an Excel File
|
4
|
+
# Copyright (C) 2003 ywesee -- intellectual capital connected
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2.1 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
|
21
|
+
# hwyss@ywesee.com
|
22
|
+
#
|
23
|
+
# OLEReader -- Spreadsheet::ParseExcel -- 05.06.2003 -- hwyss@ywesee.com
|
24
|
+
|
25
|
+
require 'date'
|
26
|
+
|
27
|
+
module OLE
|
28
|
+
class UnknownFormatError < RuntimeError; end
|
29
|
+
class DateTime
|
30
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :msec
|
31
|
+
def initialize(year, month=1, day=1, hour=0, min=0, sec=0, msec=0)
|
32
|
+
@year = year
|
33
|
+
@month = month
|
34
|
+
@day = day
|
35
|
+
@hour = hour
|
36
|
+
@min = min
|
37
|
+
@sec = sec
|
38
|
+
@msec = msec
|
39
|
+
end
|
40
|
+
def date
|
41
|
+
begin
|
42
|
+
Date.new(@year, @month, @day)
|
43
|
+
rescue ArgumentError
|
44
|
+
end
|
45
|
+
end
|
46
|
+
class << self
|
47
|
+
def month_days(month, year)
|
48
|
+
case month % 12
|
49
|
+
when 0,1,3,5,7,8,10
|
50
|
+
31
|
51
|
+
when 4,6,9,11
|
52
|
+
30
|
53
|
+
else
|
54
|
+
Date.leap?(year) ? 29 : 28
|
55
|
+
end
|
56
|
+
end
|
57
|
+
def parse(datetime)
|
58
|
+
#1.Divide Day and Time
|
59
|
+
big_dt = datetime.split(//).reverse.inject(0) { |inj, char|
|
60
|
+
inj *= 0x100
|
61
|
+
inj += char.to_i
|
62
|
+
}
|
63
|
+
msec = big_dt % 10000000
|
64
|
+
big_dt /= 10000000
|
65
|
+
day = (big_dt / (24*60*60)) + 1
|
66
|
+
time = big_dt % (24*60*60)
|
67
|
+
#2. Year->Day(1601/1/2?)
|
68
|
+
year = 1601
|
69
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :msec
|
70
|
+
ydays = year_days(year)
|
71
|
+
while(day > ydays)
|
72
|
+
day -= ydays
|
73
|
+
year += 1
|
74
|
+
ydays = year_days(year)
|
75
|
+
end
|
76
|
+
month = 1
|
77
|
+
1.upto(11) { |month|
|
78
|
+
mdays = month_days(month, year)
|
79
|
+
break if(day <= mdays)
|
80
|
+
day -= mdays
|
81
|
+
}
|
82
|
+
#3. Hour->iSec
|
83
|
+
hour = time / 3600
|
84
|
+
min = (time % 3600) / 60
|
85
|
+
sec = time % 60
|
86
|
+
new(year, month, day, hour, min, sec, msec)
|
87
|
+
end
|
88
|
+
def year_days(year)
|
89
|
+
Date.leap?(year) ? 366 : 365
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
class Storage < File
|
94
|
+
PpsType_Root = 5
|
95
|
+
PpsType_Dir = 1
|
96
|
+
PpsType_File = 2
|
97
|
+
DataSizeSmall = 0x1000
|
98
|
+
LongIntSize = 4
|
99
|
+
PpsSize = 0x80
|
100
|
+
attr_reader :header
|
101
|
+
def initialize(filename)
|
102
|
+
super(filename, "r")
|
103
|
+
binmode
|
104
|
+
@header = get_header
|
105
|
+
end
|
106
|
+
module PPS
|
107
|
+
class Node
|
108
|
+
attr_reader :no, :type, :prev_pps, :next_pps, :data
|
109
|
+
attr_reader :dir_pps, :time_1st, :time_2nd, :start_block, :size
|
110
|
+
attr_reader :name
|
111
|
+
def initialize(no, datastr)
|
112
|
+
@no = no
|
113
|
+
#def init(datastr)
|
114
|
+
nm_size, @type, @prev_pps,
|
115
|
+
@next_pps, @dir_pps = datastr[0x40,16].unpack('vvVVV')
|
116
|
+
@time_1st = DateTime.parse(datastr[0x64, 8])
|
117
|
+
@time_2nd = DateTime.parse(datastr[0x6C, 8])
|
118
|
+
@start_block, @size = datastr[0x74,8].unpack('VV')
|
119
|
+
nm_size -= 2 if(nm_size > 2)
|
120
|
+
@name = datastr[0,nm_size]
|
121
|
+
#end
|
122
|
+
end
|
123
|
+
def get_data(header)
|
124
|
+
end
|
125
|
+
private
|
126
|
+
end
|
127
|
+
class Root < Node
|
128
|
+
def get_data(header)
|
129
|
+
@data = header.get_big_data(@start_block, @size)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
class Dir < Node
|
133
|
+
end
|
134
|
+
class File < Node
|
135
|
+
def get_data(header)
|
136
|
+
@data = if(@size < DataSizeSmall)
|
137
|
+
header.get_small_data(@start_block, @size)
|
138
|
+
else
|
139
|
+
header.get_big_data(@start_block, @size)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
class << self
|
145
|
+
def is_normal_block?(block)
|
146
|
+
block < 0xFFFFFFFC
|
147
|
+
end
|
148
|
+
def pps_factory(pos, datastr)
|
149
|
+
nm_size, type = datastr[0x40,4].unpack('vC')
|
150
|
+
nm_size -= 2 if(nm_size > 2)
|
151
|
+
nm = datastr[0,nm_size]
|
152
|
+
klass = {
|
153
|
+
PpsType_Root => PPS::Root,
|
154
|
+
PpsType_Dir => PPS::Dir,
|
155
|
+
PpsType_File => PPS::File,
|
156
|
+
}[type] or raise("unknown pps_type: #{type} / #{nm}")
|
157
|
+
klass.new(pos, datastr)
|
158
|
+
end
|
159
|
+
end
|
160
|
+
class Header
|
161
|
+
attr_reader :big_block_size, :small_block_size, :bdb_count, :root_start
|
162
|
+
attr_reader :sbd_start, :sbd_count, :extra_bbd_start, :extra_bbd_count
|
163
|
+
attr_reader :bbd_info
|
164
|
+
def initialize(fh)
|
165
|
+
@fh = fh
|
166
|
+
@pps_table = {}
|
167
|
+
#BIG BLOCK SIZE
|
168
|
+
exp = get_info(0x1E, 2, 'v')
|
169
|
+
raise UnknownFormatError.new if exp.nil?
|
170
|
+
@big_block_size = (2 ** exp)
|
171
|
+
#SMALL BLOCK SIZE
|
172
|
+
exp = get_info(0x20, 2, 'v')
|
173
|
+
raise UnknownFormatError.new if exp.nil?
|
174
|
+
@small_block_size = (2 ** exp)
|
175
|
+
#BDB Count
|
176
|
+
@bdb_count = get_info(0x2C, 4, 'V') or raise UnknownFormatError.new
|
177
|
+
#START BLOCK
|
178
|
+
@root_start = get_info(0x30, 4, 'V') or raise UnknownFormatError.new
|
179
|
+
#SMALL BD START
|
180
|
+
@sbd_start = get_info(0x3C, 4, 'V') or raise UnknownFormatError.new
|
181
|
+
#SMALL BD COUNT
|
182
|
+
@sbd_count = get_info(0x40, 4, 'V') or raise UnknownFormatError.new
|
183
|
+
#EXTRA BBD START
|
184
|
+
@extra_bbd_start = get_info(0x44, 4, 'V') or raise UnknownFormatError.new
|
185
|
+
#EXTRA BBD COUNT
|
186
|
+
@extra_bbd_count = get_info(0x48, 4, 'V') or raise UnknownFormatError.new
|
187
|
+
#GET BBD INFO
|
188
|
+
@bbd_info = get_bbd_info
|
189
|
+
#GET ROOT PPS
|
190
|
+
@root = get_nth_pps(0)
|
191
|
+
end
|
192
|
+
def get_bbd_info
|
193
|
+
bdb_count = @bdb_count
|
194
|
+
first_count = (@big_block_size - 0x4C) / LongIntSize
|
195
|
+
bdl_count = (@big_block_size / LongIntSize) - 1
|
196
|
+
#1. 1st BDlist
|
197
|
+
@fh.seek(0x4C)
|
198
|
+
get_count = [first_count, bdb_count].min
|
199
|
+
buff = @fh.read(LongIntSize * get_count)
|
200
|
+
bdl_list = buff.unpack("V#{get_count}")
|
201
|
+
bdb_count -= get_count
|
202
|
+
#2. Extra BDList
|
203
|
+
block = @extra_bbd_start
|
204
|
+
while((bdb_count > 0) && Storage.is_normal_block?(block))
|
205
|
+
set_file_pos(block, 0)
|
206
|
+
get_count = [bdb_count, bdl_count].min
|
207
|
+
buff = @fh.read(LongIntSize * get_count)
|
208
|
+
bdl_list += buff.unpack("V#{get_count}")
|
209
|
+
bdb_count -= get_count
|
210
|
+
buff = @fh.read(LongIntSize)
|
211
|
+
block = buff.unpack('V')
|
212
|
+
end
|
213
|
+
#3.Get BDs
|
214
|
+
bd_table = {}
|
215
|
+
block_no = 0
|
216
|
+
bd_count = @big_block_size / LongIntSize
|
217
|
+
bdl_list.each { |bdl|
|
218
|
+
set_file_pos(bdl, 0)
|
219
|
+
buff = @fh.read(@big_block_size)
|
220
|
+
array = buff.unpack("V#{bd_count}")
|
221
|
+
bd_count.times { |idx|
|
222
|
+
bd_table.store(block_no, array[idx]) unless(array[idx]==block_no.next)
|
223
|
+
block_no += 1
|
224
|
+
}
|
225
|
+
}
|
226
|
+
bd_table
|
227
|
+
end
|
228
|
+
def get_big_data(block, size)
|
229
|
+
result = ''
|
230
|
+
return result unless Storage.is_normal_block?(block)
|
231
|
+
rest = size
|
232
|
+
keys = @bbd_info.keys.sort
|
233
|
+
while(rest > 0)
|
234
|
+
res = keys.select { |key| key >= block }
|
235
|
+
nkey = res.first
|
236
|
+
idx = nkey - block
|
237
|
+
nxt = @bbd_info[nkey]
|
238
|
+
set_file_pos(block, 0)
|
239
|
+
get_size = [rest, @big_block_size * idx.next].min
|
240
|
+
result << @fh.read(get_size)
|
241
|
+
rest -= get_size
|
242
|
+
block = nxt
|
243
|
+
end
|
244
|
+
result
|
245
|
+
end
|
246
|
+
def get_info(pos, len, fmt)
|
247
|
+
@fh.seek(pos)
|
248
|
+
if(buff = @fh.read(len))
|
249
|
+
buff.unpack(fmt).first
|
250
|
+
end
|
251
|
+
end
|
252
|
+
def get_next_block_no(block)
|
253
|
+
@bbd_info[block] || block.next
|
254
|
+
end
|
255
|
+
def get_next_small_block_no(block)
|
256
|
+
base = @big_block_size / LongIntSize
|
257
|
+
nth = block / base
|
258
|
+
pos = block % base
|
259
|
+
blk = get_nth_block_no(@sbd_start, nth)
|
260
|
+
set_file_pos(blk, pos * LongIntSize)
|
261
|
+
@fh.read(LongIntSize).unpack('V').first
|
262
|
+
end
|
263
|
+
def get_nth_block_no(start_block, nth)
|
264
|
+
nxt = start_block
|
265
|
+
nth.times { |idx|
|
266
|
+
nxt = get_next_block_no(nxt)
|
267
|
+
return nil unless Storage.is_normal_block?(nxt)
|
268
|
+
}
|
269
|
+
nxt
|
270
|
+
end
|
271
|
+
def get_nth_pps(pos)
|
272
|
+
@pps_table.fetch(pos) {
|
273
|
+
base_count = @big_block_size / PpsSize
|
274
|
+
pps_block = pos / base_count
|
275
|
+
pps_pos = pos % base_count
|
276
|
+
|
277
|
+
block = get_nth_block_no(@root_start, pps_block) or return
|
278
|
+
set_file_pos(block, PpsSize*pps_pos)
|
279
|
+
buff = @fh.read(PpsSize) or return
|
280
|
+
pps = Storage.pps_factory(pos, buff)
|
281
|
+
pps.get_data(self)
|
282
|
+
@pps_table.store(pos, pps)
|
283
|
+
}
|
284
|
+
end
|
285
|
+
def get_small_data(block, size)
|
286
|
+
result = ''
|
287
|
+
rest = size
|
288
|
+
while(rest > 0)
|
289
|
+
set_file_pos_small(block)
|
290
|
+
get_size = [rest, @small_block_size].min
|
291
|
+
result << @fh.read(get_size)
|
292
|
+
rest -= @small_block_size
|
293
|
+
block = get_next_small_block_no(block)
|
294
|
+
end
|
295
|
+
result
|
296
|
+
end
|
297
|
+
def sb_start
|
298
|
+
@root.start_block
|
299
|
+
end
|
300
|
+
def sb_size
|
301
|
+
@root.size
|
302
|
+
end
|
303
|
+
def set_file_pos(block, pos)
|
304
|
+
@fh.seek((block+1) * @big_block_size + pos)
|
305
|
+
end
|
306
|
+
def set_file_pos_small(block)
|
307
|
+
base = @big_block_size / @small_block_size
|
308
|
+
nth = block / base
|
309
|
+
pos = block % base
|
310
|
+
blk = get_nth_block_no(sb_start, nth)
|
311
|
+
set_file_pos(blk, pos * @small_block_size)
|
312
|
+
end
|
313
|
+
end
|
314
|
+
def search_pps(names, cse=false, no=0, done=[])
|
315
|
+
#1. Check it self
|
316
|
+
return [] if(done.include?(no))
|
317
|
+
done.push(no)
|
318
|
+
pps = @header.get_nth_pps(no) or return []
|
319
|
+
cond = if(cse)
|
320
|
+
Proc.new { |name|
|
321
|
+
/^#{Regexp.escape pps.name}$/i.match(name)
|
322
|
+
}
|
323
|
+
else
|
324
|
+
Proc.new { |name| name == pps.name }
|
325
|
+
end
|
326
|
+
result = if(names.any? { |name| cond.call(name) })
|
327
|
+
[pps]
|
328
|
+
else
|
329
|
+
[]
|
330
|
+
end
|
331
|
+
#2. Check Child, Previous, Next PPSs
|
332
|
+
[ pps.dir_pps, pps.prev_pps, pps.next_pps ].each { |node|
|
333
|
+
unless(node == 0xFFFFFFFF)
|
334
|
+
result += search_pps(names, cse, node, done)
|
335
|
+
end
|
336
|
+
}
|
337
|
+
result
|
338
|
+
end
|
339
|
+
private
|
340
|
+
def get_header
|
341
|
+
#0. Check ID
|
342
|
+
rewind
|
343
|
+
return unless(read(8) == "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
|
344
|
+
Header.new(self)
|
345
|
+
end
|
346
|
+
end
|
347
|
+
def asc2ucs(str)
|
348
|
+
str.split(//).join("\000") + "\000"
|
349
|
+
end
|
350
|
+
module_function :asc2ucs
|
351
|
+
end
|
352
|
+
|
353
|
+
=begin
|
354
|
+
ToDo: Merge with Daniel J. Bergers OLEWriter
|
355
|
+
=end
|