parseexcel_mod 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/COPYING +515 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +22 -0
- data/README +95 -0
- data/README.md +29 -0
- data/Rakefile +1 -0
- data/lib/parseexcel_mod.rb +4 -0
- data/lib/parseexcel_mod/format.rb +119 -0
- data/lib/parseexcel_mod/olestorage.rb +365 -0
- data/lib/parseexcel_mod/parseexcel_mod.rb +14 -0
- data/lib/parseexcel_mod/parser.rb +1013 -0
- data/lib/parseexcel_mod/version.rb +4 -0
- data/lib/parseexcel_mod/workbook.rb +74 -0
- data/lib/parseexcel_mod/worksheet.rb +170 -0
- data/parseexcel_mod.gemspec +23 -0
- data/test/data/annotation.xls +0 -0
- data/test/data/bar.xls +0 -0
- data/test/data/comment.5.0.xls +0 -0
- data/test/data/comment.xls +0 -0
- data/test/data/dates.xls +0 -0
- data/test/data/float.5.0.xls +0 -0
- data/test/data/float.xls +0 -0
- data/test/data/foo.xls +0 -0
- data/test/data/image.xls +0 -0
- data/test/data/nil.xls +0 -0
- data/test/data/umlaut.5.0.xls +0 -0
- data/test/data/umlaut.biff8.xls +0 -0
- data/test/data/uncompressed.str.xls +0 -0
- data/test/suite.rb +9 -0
- data/test/test_format.rb +63 -0
- data/test/test_olestorage.rb +180 -0
- data/test/test_parser.rb +745 -0
- data/test/test_workbook.rb +70 -0
- data/test/test_worksheet.rb +183 -0
- data/usage-en.txt +112 -0
- metadata +134 -0
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2013 t0pep0
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
Spreadsheet::ParseExcel - Get information from an Excel file.
|
2
|
+
|
3
|
+
============
|
4
|
+
Version: 0.5.3
|
5
|
+
Date: 2011-04-22
|
6
|
+
|
7
|
+
Short Description:
|
8
|
+
This is databasically's version of the parseexcel gem and is not actively maintained as the original gem is no longer maintained. This version was created to resolve a bug reading Unicode characters. The resulting error is as follows:
|
9
|
+
|
10
|
+
undefined method `&' for "\x00":String
|
11
|
+
|
12
|
+
This version of the library resolves this error.
|
13
|
+
|
14
|
+
Spreadsheet::ParseExcel allows you to get information out of a
|
15
|
+
simple Excel file
|
16
|
+
This Package is an - as of today incomplete - translation of
|
17
|
+
Kawai Takanoris Perl-Module.
|
18
|
+
|
19
|
+
|
20
|
+
Requirements
|
21
|
+
------------
|
22
|
+
|
23
|
+
* ruby 1.8
|
24
|
+
|
25
|
+
Install
|
26
|
+
-------
|
27
|
+
|
28
|
+
De-Compress archive and enter its top directory.
|
29
|
+
Then type:
|
30
|
+
|
31
|
+
$ ruby install.rb config
|
32
|
+
$ ruby install.rb setup
|
33
|
+
($ su)
|
34
|
+
# ruby install.rb install
|
35
|
+
|
36
|
+
You can also install files into your favorite directory
|
37
|
+
by supplying install.rb with some options. Try "ruby install.rb --help".
|
38
|
+
More information can be found in the redistributed file usage-en.txt
|
39
|
+
|
40
|
+
|
41
|
+
Usage
|
42
|
+
-----
|
43
|
+
#!/usr/bin/env ruby
|
44
|
+
|
45
|
+
require 'parseexcel'
|
46
|
+
|
47
|
+
# your first step is always reading in the file.
|
48
|
+
# that gives you a workbook-object, which has one or more worksheets,
|
49
|
+
# just like in Excel you have the possibility of multiple worksheets.
|
50
|
+
workbook = Spreadsheet::ParseExcel.parse(path_to_file)
|
51
|
+
|
52
|
+
# usually, you want the first worksheet:
|
53
|
+
worksheet = workbook.worksheet(0)
|
54
|
+
|
55
|
+
# now you can either iterate over all rows, skipping the first number of
|
56
|
+
# rows (in case you know they just contain column headers)
|
57
|
+
skip = 2
|
58
|
+
worksheet.each(skip) { |row|
|
59
|
+
# a row is actually just an Array of Cells..
|
60
|
+
first_cell = row.at(0)
|
61
|
+
|
62
|
+
# how you get data out of the cell depends on what datatype you
|
63
|
+
# expect:
|
64
|
+
|
65
|
+
# if you expect a String, you can pass an encoding and (iconv
|
66
|
+
# required) the content of the cell will be converted.
|
67
|
+
str = row.at(1).to_s('latin1')
|
68
|
+
|
69
|
+
# if you expect a Float:
|
70
|
+
float = row.at(2).to_f
|
71
|
+
|
72
|
+
# if you expect an Integer:
|
73
|
+
int = row.at(3).to_i
|
74
|
+
|
75
|
+
# if you expect a Date:
|
76
|
+
date = row.at(4).date
|
77
|
+
|
78
|
+
# ParseExcel makes a guess at what Datatype a cell has. At the moment,
|
79
|
+
# possible values are: :date, :numeric, :text
|
80
|
+
celltype = first_cell.type
|
81
|
+
}
|
82
|
+
|
83
|
+
# if you know exactly which row your data resides in, you may just
|
84
|
+
# retrieve that row, which is again simply an Array of Cells
|
85
|
+
row = worksheet.row(26)
|
86
|
+
|
87
|
+
|
88
|
+
License
|
89
|
+
-------
|
90
|
+
|
91
|
+
LGPL
|
92
|
+
|
93
|
+
|
94
|
+
URL: http://download.ywesee.com/parseexcel
|
95
|
+
Author: Hannes Wyss <hwyss@ywesee.com>
|
data/README.md
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
# ParseexcelMod
|
2
|
+
|
3
|
+
TODO: Write a gem description
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application's Gemfile:
|
8
|
+
|
9
|
+
gem 'parseexcel_mod'
|
10
|
+
|
11
|
+
And then execute:
|
12
|
+
|
13
|
+
$ bundle
|
14
|
+
|
15
|
+
Or install it yourself as:
|
16
|
+
|
17
|
+
$ gem install parseexcel_mod
|
18
|
+
|
19
|
+
## Usage
|
20
|
+
|
21
|
+
TODO: Write usage instructions here
|
22
|
+
|
23
|
+
## Contributing
|
24
|
+
|
25
|
+
1. Fork it
|
26
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
27
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
29
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Spreadsheet::ParseExcel -- Extract Data from an Excel File
|
4
|
+
# Copyright (C) 2003 ywesee -- intellectual capital connected
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2.1 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
|
21
|
+
# hwyss@ywesee.com
|
22
|
+
#
|
23
|
+
# Format -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
|
24
|
+
|
25
|
+
module Spreadsheet
|
26
|
+
module ParseExcel
|
27
|
+
class Format
|
28
|
+
@@fmt_strs = {
|
29
|
+
0x00 => "@",
|
30
|
+
0x01 => "0",
|
31
|
+
0x02 => "0.00",
|
32
|
+
0x03 => "#,##0",
|
33
|
+
0x04 => "#,##0.00",
|
34
|
+
0x05 => "($#,##0_);($#,##0)",
|
35
|
+
0x06 => "($#,##0_);[RED]($#,##0)",
|
36
|
+
0x07 => "($#,##0.00_);($#,##0.00_)",
|
37
|
+
0x08 => "($#,##0.00_);[RED]($#,##0.00_)",
|
38
|
+
0x09 => "0%",
|
39
|
+
0x0A => "0.00%",
|
40
|
+
0x0B => "0.00E+00",
|
41
|
+
0x0C => "# ?/?",
|
42
|
+
0x0D => "# ??/??",
|
43
|
+
0x0E => "m-d-yy",
|
44
|
+
0x0F => "d-mmm-yy",
|
45
|
+
0x10 => "d-mmm",
|
46
|
+
0x11 => "mmm-yy",
|
47
|
+
0x12 => "h:mm AM/PM",
|
48
|
+
0x13 => "h:mm:ss AM/PM",
|
49
|
+
0x14 => "h:mm",
|
50
|
+
0x15 => "h:mm:ss",
|
51
|
+
0x16 => "m-d-yy h:mm",
|
52
|
+
#0x17-0x24 -- national differences?
|
53
|
+
0x25 => "(#,##0_);(#,##0)",
|
54
|
+
0x26 => "(#,##0_);[RED](#,##0)",
|
55
|
+
0x27 => "(#,##0.00);(#,##0.00)",
|
56
|
+
0x28 => "(#,##0.00);[RED](#,##0.00)",
|
57
|
+
0x29 => "_(*#,##0_);_(*(#,##0);_(*\"-\"_);_(@_)",
|
58
|
+
0x2A => "_($*#,##0_);_($*(#,##0);_(*\"-\"_);_(@_)",
|
59
|
+
0x2B => "_(*#,##0.00_);_(*(#,##0.00);_(*\"-\"??_);_(@_)",
|
60
|
+
0x2C => "_($*#,##0.00_);_($*(#,##0.00);_(*\"-\"??_);_(@_)",
|
61
|
+
0x2D => "mm:ss",
|
62
|
+
0x2E => "[h]:mm:ss",
|
63
|
+
0x2F => "mm:ss.0",
|
64
|
+
0x30 => "##0.0E+0",
|
65
|
+
0x31 => "@",
|
66
|
+
}
|
67
|
+
begin
|
68
|
+
require 'iconv'
|
69
|
+
iconv = Iconv.new('utf16le', 'latin1')
|
70
|
+
@@fmt_strs = @@fmt_strs.inject({}) { |memo, (key, val)|
|
71
|
+
memo.store(key, iconv.iconv(val))
|
72
|
+
memo
|
73
|
+
}
|
74
|
+
rescue
|
75
|
+
warn("default formats are encoded in ISO-8859-1")
|
76
|
+
end
|
77
|
+
attr_accessor :font_no, :fmt_idx, :lock, :hidden, :style, :key_123
|
78
|
+
attr_accessor :align_h, :wrap, :align_v, :just_last, :rotate, :indent
|
79
|
+
attr_accessor :shrink, :merge, :read_dir, :encoding
|
80
|
+
attr_accessor :border_style, :border_color, :border_diag, :fill
|
81
|
+
@@date_pattern = /(\0?d\0?d|\0?m\0?m|\0?y\0?y|\0?h|\0?s\0?s)/i
|
82
|
+
def initialize(params={})
|
83
|
+
params.each { |key, val|
|
84
|
+
mthd = key.to_s + '='
|
85
|
+
if(self.respond_to?(mthd))
|
86
|
+
self.send(mthd, val)
|
87
|
+
end
|
88
|
+
}
|
89
|
+
end
|
90
|
+
def add_text_format(idx, fmt_str)
|
91
|
+
@@fmt_strs.store(idx, fmt_str)
|
92
|
+
end
|
93
|
+
def cell_type(cell)
|
94
|
+
if(cell.numeric)
|
95
|
+
if([0x0E..0x16, 0x2D..0x2F].any? { |range| range.include?(@fmt_idx.to_i) })
|
96
|
+
:date
|
97
|
+
elsif((fmt = @@fmt_strs[@fmt_idx]) && @@date_pattern.match(fmt))
|
98
|
+
:date
|
99
|
+
else
|
100
|
+
:numeric
|
101
|
+
end
|
102
|
+
else
|
103
|
+
:text
|
104
|
+
end
|
105
|
+
end
|
106
|
+
def text_format(str, code=:_native_)
|
107
|
+
(code == :_native_) ? str : str.unpack('n*').pack('C*')
|
108
|
+
end
|
109
|
+
def to_s(target_encoding=nil)
|
110
|
+
fmt_str = @@fmt_strs[@fmt_idx].to_s
|
111
|
+
if(target_encoding)
|
112
|
+
Iconv.new(target_encoding, @encoding).iconv(fmt_str)
|
113
|
+
else
|
114
|
+
fmt_str.dup
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
@@ -0,0 +1,365 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Spreadsheet::ParseExcel -- Extract Data from an Excel File
|
4
|
+
# Copyright (C) 2003 ywesee -- intellectual capital connected
|
5
|
+
#
|
6
|
+
# This library is free software; you can redistribute it and/or
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
8
|
+
# License as published by the Free Software Foundation; either
|
9
|
+
# version 2.1 of the License, or (at your option) any later version.
|
10
|
+
#
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14
|
+
# Lesser General Public License for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
17
|
+
# License along with this library; if not, write to the Free Software
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
19
|
+
#
|
20
|
+
# ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
|
21
|
+
# hwyss@ywesee.com
|
22
|
+
#
|
23
|
+
# OLEReader -- Spreadsheet::ParseExcel -- 05.06.2003 -- hwyss@ywesee.com
|
24
|
+
|
25
|
+
require 'date'
|
26
|
+
require 'stringio'
|
27
|
+
|
28
|
+
module OLE
|
29
|
+
class UnknownFormatError < RuntimeError; end
|
30
|
+
class DateTime
|
31
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :msec
|
32
|
+
def initialize(year, month=1, day=1, hour=0, min=0, sec=0, msec=0)
|
33
|
+
@year = year
|
34
|
+
@month = month
|
35
|
+
@day = day
|
36
|
+
@hour = hour
|
37
|
+
@min = min
|
38
|
+
@sec = sec
|
39
|
+
@msec = msec
|
40
|
+
end
|
41
|
+
def date
|
42
|
+
begin
|
43
|
+
Date.new(@year, @month, @day)
|
44
|
+
rescue ArgumentError
|
45
|
+
end
|
46
|
+
end
|
47
|
+
class << self
|
48
|
+
def month_days(month, year)
|
49
|
+
case month % 12
|
50
|
+
when 0,1,3,5,7,8,10
|
51
|
+
31
|
52
|
+
when 4,6,9,11
|
53
|
+
30
|
54
|
+
else
|
55
|
+
Date.leap?(year) ? 29 : 28
|
56
|
+
end
|
57
|
+
end
|
58
|
+
def parse(datetime)
|
59
|
+
#1.Divide Day and Time
|
60
|
+
big_dt = datetime.split(//).reverse.inject(0) { |inj, char|
|
61
|
+
inj *= 0x100
|
62
|
+
inj += char.to_i
|
63
|
+
}
|
64
|
+
msec = big_dt % 10000000
|
65
|
+
big_dt /= 10000000
|
66
|
+
day = (big_dt / (24*60*60)) + 1
|
67
|
+
time = big_dt % (24*60*60)
|
68
|
+
#2. Year->Day(1601/1/2?)
|
69
|
+
year = 1601
|
70
|
+
attr_reader :year, :month, :day, :hour, :min, :sec, :msec
|
71
|
+
ydays = year_days(year)
|
72
|
+
while(day > ydays)
|
73
|
+
day -= ydays
|
74
|
+
year += 1
|
75
|
+
ydays = year_days(year)
|
76
|
+
end
|
77
|
+
month = 1
|
78
|
+
1.upto(11) { |month|
|
79
|
+
mdays = month_days(month, year)
|
80
|
+
break if(day <= mdays)
|
81
|
+
day -= mdays
|
82
|
+
}
|
83
|
+
#3. Hour->iSec
|
84
|
+
hour = time / 3600
|
85
|
+
min = (time % 3600) / 60
|
86
|
+
sec = time % 60
|
87
|
+
new(year, month, day, hour, min, sec, msec)
|
88
|
+
end
|
89
|
+
def year_days(year)
|
90
|
+
Date.leap?(year) ? 366 : 365
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
class Storage
|
95
|
+
PpsType_Root = 5
|
96
|
+
PpsType_Dir = 1
|
97
|
+
PpsType_File = 2
|
98
|
+
DataSizeSmall = 0x1000
|
99
|
+
LongIntSize = 4
|
100
|
+
PpsSize = 0x80
|
101
|
+
attr_reader :header
|
102
|
+
def initialize(filename)
|
103
|
+
if(filename.respond_to?(:seek))
|
104
|
+
@fh_owner = false
|
105
|
+
@fh = filename
|
106
|
+
else
|
107
|
+
@fh_owner = true
|
108
|
+
@fh = File.open(filename, "r")
|
109
|
+
end
|
110
|
+
@fh.binmode
|
111
|
+
@header = get_header
|
112
|
+
end
|
113
|
+
def close
|
114
|
+
@fh.close if(@fh_owner)
|
115
|
+
end
|
116
|
+
module PPS
|
117
|
+
class Node
|
118
|
+
attr_reader :no, :type, :prev_pps, :next_pps, :data
|
119
|
+
attr_reader :dir_pps, :time_1st, :time_2nd, :start_block, :size
|
120
|
+
attr_reader :name
|
121
|
+
def initialize(no, datastr)
|
122
|
+
@no = no
|
123
|
+
#def init(datastr)
|
124
|
+
nm_size, @type, @prev_pps,
|
125
|
+
@next_pps, @dir_pps = datastr[0x40,16].unpack('vvVVV')
|
126
|
+
@time_1st = DateTime.parse(datastr[0x64, 8])
|
127
|
+
@time_2nd = DateTime.parse(datastr[0x6C, 8])
|
128
|
+
@start_block, @size = datastr[0x74,8].unpack('VV')
|
129
|
+
nm_size -= 2 if(nm_size > 2)
|
130
|
+
@name = datastr[0,nm_size]
|
131
|
+
#end
|
132
|
+
end
|
133
|
+
def get_data(header)
|
134
|
+
end
|
135
|
+
private
|
136
|
+
end
|
137
|
+
class Root < Node
|
138
|
+
def get_data(header)
|
139
|
+
@data = header.get_big_data(@start_block, @size)
|
140
|
+
end
|
141
|
+
end
|
142
|
+
class Dir < Node
|
143
|
+
end
|
144
|
+
class File < Node
|
145
|
+
def get_data(header)
|
146
|
+
@data = if(@size < DataSizeSmall)
|
147
|
+
header.get_small_data(@start_block, @size)
|
148
|
+
else
|
149
|
+
header.get_big_data(@start_block, @size)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
class << self
|
155
|
+
def is_normal_block?(block)
|
156
|
+
block < 0xFFFFFFFC
|
157
|
+
end
|
158
|
+
def pps_factory(pos, datastr)
|
159
|
+
nm_size, type = datastr[0x40,4].unpack('vC')
|
160
|
+
nm_size -= 2 if(nm_size > 2)
|
161
|
+
nm = datastr[0,nm_size]
|
162
|
+
klass = {
|
163
|
+
PpsType_Root => PPS::Root,
|
164
|
+
PpsType_Dir => PPS::Dir,
|
165
|
+
PpsType_File => PPS::File,
|
166
|
+
}[type] or raise("unknown pps_type: #{type} / #{nm}")
|
167
|
+
klass.new(pos, datastr)
|
168
|
+
end
|
169
|
+
end
|
170
|
+
class Header
|
171
|
+
attr_reader :big_block_size, :small_block_size, :bdb_count, :root_start
|
172
|
+
attr_reader :sbd_start, :sbd_count, :extra_bbd_start, :extra_bbd_count
|
173
|
+
attr_reader :bbd_info
|
174
|
+
def initialize(fh)
|
175
|
+
@fh = fh
|
176
|
+
@pps_table = {}
|
177
|
+
#BIG BLOCK SIZE
|
178
|
+
exp = get_info(0x1E, 2, 'v') or raise UnknownFormatError.new
|
179
|
+
@big_block_size = (2 ** exp)
|
180
|
+
#SMALL BLOCK SIZE
|
181
|
+
exp = get_info(0x20, 2, 'v') or raise UnknownFormatError.new
|
182
|
+
@small_block_size = (2 ** exp)
|
183
|
+
#BDB Count
|
184
|
+
@bdb_count = get_info(0x2C, 4, 'V') or raise UnknownFormatError.new
|
185
|
+
#START BLOCK
|
186
|
+
@root_start = get_info(0x30, 4, 'V') or raise UnknownFormatError.new
|
187
|
+
#SMALL BD START
|
188
|
+
@sbd_start = get_info(0x3C, 4, 'V') or raise UnknownFormatError.new
|
189
|
+
#SMALL BD COUNT
|
190
|
+
@sbd_count = get_info(0x40, 4, 'V') or raise UnknownFormatError.new
|
191
|
+
#EXTRA BBD START
|
192
|
+
@extra_bbd_start = get_info(0x44, 4, 'V') or raise UnknownFormatError.new
|
193
|
+
#EXTRA BBD COUNT
|
194
|
+
@extra_bbd_count = get_info(0x48, 4, 'V') or raise UnknownFormatError.new
|
195
|
+
#GET BBD INFO
|
196
|
+
@bbd_info = get_bbd_info
|
197
|
+
#GET ROOT PPS
|
198
|
+
@root = get_nth_pps(0)
|
199
|
+
end
|
200
|
+
def get_bbd_info
|
201
|
+
bdb_count = @bdb_count
|
202
|
+
first_count = (@big_block_size - 0x4C) / LongIntSize
|
203
|
+
bdl_count = (@big_block_size / LongIntSize) - 1
|
204
|
+
#1. 1st BDlist
|
205
|
+
@fh.seek(0x4C)
|
206
|
+
get_count = [first_count, bdb_count].min
|
207
|
+
buff = @fh.read(LongIntSize * get_count)
|
208
|
+
bdl_list = buff.unpack("V#{get_count}")
|
209
|
+
bdb_count -= get_count
|
210
|
+
#2. Extra BDList
|
211
|
+
block = @extra_bbd_start
|
212
|
+
while((bdb_count > 0) && Storage.is_normal_block?(block))
|
213
|
+
set_file_pos(block, 0)
|
214
|
+
get_count = [bdb_count, bdl_count].min
|
215
|
+
buff = @fh.read(LongIntSize * get_count)
|
216
|
+
bdl_list += buff.unpack("V#{get_count}")
|
217
|
+
bdb_count -= get_count
|
218
|
+
buff = @fh.read(LongIntSize)
|
219
|
+
block = buff.unpack('V').first
|
220
|
+
end
|
221
|
+
#3.Get BDs
|
222
|
+
bd_table = {}
|
223
|
+
block_no = 0
|
224
|
+
bd_count = @big_block_size / LongIntSize
|
225
|
+
bdl_list.each { |bdl|
|
226
|
+
set_file_pos(bdl, 0)
|
227
|
+
buff = @fh.read(@big_block_size)
|
228
|
+
array = buff.unpack("V#{bd_count}")
|
229
|
+
bd_count.times { |idx|
|
230
|
+
bd_table.store(block_no, array[idx]) unless(array[idx]==block_no.next)
|
231
|
+
block_no += 1
|
232
|
+
}
|
233
|
+
}
|
234
|
+
bd_table
|
235
|
+
end
|
236
|
+
def get_big_data(block, size)
|
237
|
+
result = ''
|
238
|
+
return result unless Storage.is_normal_block?(block)
|
239
|
+
rest = size
|
240
|
+
keys = @bbd_info.keys.sort
|
241
|
+
while(rest > 0)
|
242
|
+
res = keys.select { |key| key >= block }
|
243
|
+
nkey = res.first
|
244
|
+
idx = nkey - block
|
245
|
+
nxt = @bbd_info[nkey]
|
246
|
+
set_file_pos(block, 0)
|
247
|
+
get_size = [rest, @big_block_size * idx.next].min
|
248
|
+
result << @fh.read(get_size)
|
249
|
+
rest -= get_size
|
250
|
+
block = nxt
|
251
|
+
end
|
252
|
+
result
|
253
|
+
end
|
254
|
+
def get_info(pos, len, fmt)
|
255
|
+
@fh.seek(pos)
|
256
|
+
if(buff = @fh.read(len))
|
257
|
+
buff.unpack(fmt).first
|
258
|
+
end
|
259
|
+
end
|
260
|
+
def get_next_block_no(block)
|
261
|
+
@bbd_info[block] || block.next
|
262
|
+
end
|
263
|
+
def get_next_small_block_no(block)
|
264
|
+
base = @big_block_size / LongIntSize
|
265
|
+
nth = block / base
|
266
|
+
pos = block % base
|
267
|
+
blk = get_nth_block_no(@sbd_start, nth)
|
268
|
+
set_file_pos(blk, pos * LongIntSize)
|
269
|
+
@fh.read(LongIntSize).unpack('V').first
|
270
|
+
end
|
271
|
+
def get_nth_block_no(start_block, nth)
|
272
|
+
nxt = start_block
|
273
|
+
nth.times { |idx|
|
274
|
+
nxt = get_next_block_no(nxt)
|
275
|
+
return nil unless Storage.is_normal_block?(nxt)
|
276
|
+
}
|
277
|
+
nxt
|
278
|
+
end
|
279
|
+
def get_nth_pps(pos)
|
280
|
+
@pps_table.fetch(pos) {
|
281
|
+
base_count = @big_block_size / PpsSize
|
282
|
+
pps_block = pos / base_count
|
283
|
+
pps_pos = pos % base_count
|
284
|
+
|
285
|
+
block = get_nth_block_no(@root_start, pps_block) or return
|
286
|
+
set_file_pos(block, PpsSize*pps_pos)
|
287
|
+
buff = @fh.read(PpsSize) or return
|
288
|
+
pps = Storage.pps_factory(pos, buff)
|
289
|
+
pps.get_data(self)
|
290
|
+
@pps_table.store(pos, pps)
|
291
|
+
}
|
292
|
+
end
|
293
|
+
def get_small_data(block, size)
|
294
|
+
result = ''
|
295
|
+
rest = size
|
296
|
+
while(rest > 0)
|
297
|
+
set_file_pos_small(block)
|
298
|
+
get_size = [rest, @small_block_size].min
|
299
|
+
result << @fh.read(get_size)
|
300
|
+
rest -= @small_block_size
|
301
|
+
block = get_next_small_block_no(block)
|
302
|
+
end
|
303
|
+
result
|
304
|
+
end
|
305
|
+
def sb_start
|
306
|
+
@root.start_block
|
307
|
+
end
|
308
|
+
def sb_size
|
309
|
+
@root.size
|
310
|
+
end
|
311
|
+
def set_file_pos(block, pos)
|
312
|
+
@fh.seek((block+1) * @big_block_size + pos)
|
313
|
+
end
|
314
|
+
def set_file_pos_small(block)
|
315
|
+
base = @big_block_size / @small_block_size
|
316
|
+
nth = block / base
|
317
|
+
pos = block % base
|
318
|
+
blk = get_nth_block_no(sb_start, nth)
|
319
|
+
set_file_pos(blk, pos * @small_block_size)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
def search_pps(names, cse=false, no=0, done=[])
|
323
|
+
#1. Check it self
|
324
|
+
return [] if(done.include?(no))
|
325
|
+
done.push(no)
|
326
|
+
pps = @header.get_nth_pps(no) or return []
|
327
|
+
cond = if(cse)
|
328
|
+
Proc.new { |name|
|
329
|
+
/^#{Regexp.escape pps.name}$/i.match(name)
|
330
|
+
}
|
331
|
+
else
|
332
|
+
Proc.new { |name| name == pps.name }
|
333
|
+
end
|
334
|
+
result = if(names.any? { |name| cond.call(name) })
|
335
|
+
[pps]
|
336
|
+
else
|
337
|
+
[]
|
338
|
+
end
|
339
|
+
#2. Check Child, Previous, Next PPSs
|
340
|
+
[ pps.dir_pps, pps.prev_pps, pps.next_pps ].each { |node|
|
341
|
+
unless(node == 0xFFFFFFFF)
|
342
|
+
result += search_pps(names, cse, node, done)
|
343
|
+
end
|
344
|
+
}
|
345
|
+
result
|
346
|
+
end
|
347
|
+
private
|
348
|
+
def get_header
|
349
|
+
#0. Check ID
|
350
|
+
@fh.rewind
|
351
|
+
unless(@fh.read(8) == "\xD0\xCF\x11\xE0\xA1\xB1\x1A\xE1")
|
352
|
+
#raise UnknownFormatError
|
353
|
+
end
|
354
|
+
Header.new(@fh)
|
355
|
+
end
|
356
|
+
end
|
357
|
+
def asc2ucs(str)
|
358
|
+
str.split(//).join("\000") + "\000"
|
359
|
+
end
|
360
|
+
module_function :asc2ucs
|
361
|
+
end
|
362
|
+
|
363
|
+
=begin
|
364
|
+
ToDo: Merge with Daniel J. Bergers OLEWriter
|
365
|
+
=end
|