spreadsheet 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/GUIDE.txt +209 -0
- data/History.txt +8 -0
- data/LICENSE.txt +619 -0
- data/Manifest.txt +46 -0
- data/README.txt +54 -0
- data/Rakefile +15 -0
- data/lib/parseexcel.rb +27 -0
- data/lib/parseexcel/parseexcel.rb +75 -0
- data/lib/parseexcel/parser.rb +11 -0
- data/lib/spreadsheet.rb +79 -0
- data/lib/spreadsheet/datatypes.rb +99 -0
- data/lib/spreadsheet/encodings.rb +49 -0
- data/lib/spreadsheet/excel.rb +75 -0
- data/lib/spreadsheet/excel/error.rb +26 -0
- data/lib/spreadsheet/excel/internals.rb +322 -0
- data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
- data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
- data/lib/spreadsheet/excel/offset.rb +37 -0
- data/lib/spreadsheet/excel/reader.rb +798 -0
- data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
- data/lib/spreadsheet/excel/reader/biff8.rb +168 -0
- data/lib/spreadsheet/excel/row.rb +67 -0
- data/lib/spreadsheet/excel/sst_entry.rb +45 -0
- data/lib/spreadsheet/excel/workbook.rb +76 -0
- data/lib/spreadsheet/excel/worksheet.rb +85 -0
- data/lib/spreadsheet/excel/writer.rb +1 -0
- data/lib/spreadsheet/excel/writer/biff8.rb +66 -0
- data/lib/spreadsheet/excel/writer/format.rb +270 -0
- data/lib/spreadsheet/excel/writer/workbook.rb +586 -0
- data/lib/spreadsheet/excel/writer/worksheet.rb +556 -0
- data/lib/spreadsheet/font.rb +86 -0
- data/lib/spreadsheet/format.rb +172 -0
- data/lib/spreadsheet/formula.rb +9 -0
- data/lib/spreadsheet/row.rb +87 -0
- data/lib/spreadsheet/workbook.rb +120 -0
- data/lib/spreadsheet/worksheet.rb +215 -0
- data/lib/spreadsheet/writer.rb +29 -0
- data/test/data/test_copy.xls +0 -0
- data/test/data/test_version_excel5.xls +0 -0
- data/test/data/test_version_excel95.xls +0 -0
- data/test/data/test_version_excel97.xls +0 -0
- data/test/excel/row.rb +29 -0
- data/test/font.rb +163 -0
- data/test/integration.rb +1021 -0
- data/test/workbook.rb +21 -0
- data/test/worksheet.rb +62 -0
- metadata +113 -0
@@ -0,0 +1,37 @@
|
|
1
|
+
module Spreadsheet
|
2
|
+
module Excel
|
3
|
+
##
|
4
|
+
# This module is used to keep track of offsets in modified Excel documents.
|
5
|
+
# Considered internal and subject to change without notice.
|
6
|
+
module Offset
|
7
|
+
def initialize *args
|
8
|
+
super
|
9
|
+
@changes = {}
|
10
|
+
@offsets = {}
|
11
|
+
end
|
12
|
+
def Offset.append_features mod
|
13
|
+
super
|
14
|
+
attr_reader :changes, :offsets
|
15
|
+
mod.module_eval do
|
16
|
+
class << self
|
17
|
+
def offset *keys
|
18
|
+
keys.each do |key|
|
19
|
+
attr_reader key unless instance_methods.include? key.to_s
|
20
|
+
define_method "#{key}=" do |value|
|
21
|
+
@changes.store key, true
|
22
|
+
instance_variable_set "@#{key}", value
|
23
|
+
end
|
24
|
+
define_method "set_#{key}" do |value, pos, len|
|
25
|
+
instance_variable_set "@#{key}", value
|
26
|
+
@offsets.store key, [pos, len]
|
27
|
+
havename = "have_set_#{key}"
|
28
|
+
send(havename, value, pos, len) if respond_to? havename
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,798 @@
|
|
1
|
+
require 'spreadsheet/encodings'
|
2
|
+
require 'spreadsheet/font'
|
3
|
+
require 'spreadsheet/formula'
|
4
|
+
require 'spreadsheet/excel/error'
|
5
|
+
require 'spreadsheet/excel/internals'
|
6
|
+
require 'spreadsheet/excel/sst_entry'
|
7
|
+
require 'spreadsheet/excel/worksheet'
|
8
|
+
|
9
|
+
module Spreadsheet
|
10
|
+
module Excel
|
11
|
+
##
|
12
|
+
# Reader class for Excel Workbooks. Most read_* method correspond to an
|
13
|
+
# Excel-Record/Opcode. You should not need to call any of its methods
|
14
|
+
# directly. If you think you do, look at #read
|
15
|
+
class Reader
|
16
|
+
include Encodings
|
17
|
+
include Internals
|
18
|
+
OPCODE_SIZE = 4
|
19
|
+
ROW_BLOCK_OPS = [
|
20
|
+
:blank, :boolerr, :dbcell, :formula, :label, :labelsst, :mulblank, :mulrk,
|
21
|
+
:number, :rk, :row, :rstring,
|
22
|
+
]
|
23
|
+
def initialize opts = {}
|
24
|
+
@pos = 0
|
25
|
+
@bigendian = opts.fetch(:bigendian) {
|
26
|
+
[1].pack('l') != "\001\000\000\000"
|
27
|
+
}
|
28
|
+
@opts = opts
|
29
|
+
@current_row_block = {}
|
30
|
+
@formats = {}
|
31
|
+
BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF8') end
|
32
|
+
end
|
33
|
+
def decode_rk work
|
34
|
+
# Bit Mask Contents
|
35
|
+
# 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
|
36
|
+
# 1 0x00000002 0 = Floating-point value 1 = Signed integer value
|
37
|
+
# 31-2 0xFFFFFFFC Encoded value
|
38
|
+
#
|
39
|
+
# If bit 1 is cleared, the encoded value represents the 30 most significant
|
40
|
+
# bits of an IEEE 754 floating-point value (64-bit double precision). The
|
41
|
+
# 34 least significant bits must be set to zero. If bit 1 is set, the
|
42
|
+
# encoded value represents a signed 30-bit integer value. To get the
|
43
|
+
# correct integer, the encoded value has to be shifted right arithmetically
|
44
|
+
# by 2 bits. If bit 0 is set, the decoded value (both integer and
|
45
|
+
# floating-point) must be divided by 100 to get the final result.
|
46
|
+
flags, = work.unpack 'C'
|
47
|
+
cent = flags & 1
|
48
|
+
int = flags & 2
|
49
|
+
value = 0
|
50
|
+
if int == 0
|
51
|
+
## remove two bits
|
52
|
+
integer, = work.unpack 'V'
|
53
|
+
integer &= 0xfffffffc
|
54
|
+
value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
|
55
|
+
else
|
56
|
+
## remove two bits
|
57
|
+
unsigned, = work.unpack 'V'
|
58
|
+
unsigned = unsigned >> 2
|
59
|
+
shifted = [unsigned].pack 'V'
|
60
|
+
## I can't find a format for unpacking a little endian signed integer
|
61
|
+
shifted.reverse! if @bigendian
|
62
|
+
value, = shifted.unpack 'l'
|
63
|
+
end
|
64
|
+
if cent == 1
|
65
|
+
value /= 100.0
|
66
|
+
end
|
67
|
+
value
|
68
|
+
end
|
69
|
+
def encoding codepage_id
|
70
|
+
name = CODEPAGES.fetch(codepage_id) {
|
71
|
+
raise "Unknown Codepage 0x%04x" % codepage_id }
|
72
|
+
if RUBY_VERSION >= '1.9'
|
73
|
+
Encoding.find name
|
74
|
+
else
|
75
|
+
name
|
76
|
+
end
|
77
|
+
end
|
78
|
+
def in_row_block? op
|
79
|
+
ROW_BLOCK_OPS.include?(op)
|
80
|
+
end
|
81
|
+
def memoize?
|
82
|
+
@opts[:memoization]
|
83
|
+
end
|
84
|
+
def postread_workbook
|
85
|
+
sheets = @workbook.worksheets
|
86
|
+
sheets.each_with_index do |sheet, idx|
|
87
|
+
offset = sheet.offset
|
88
|
+
nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
|
89
|
+
@workbook.offsets.store sheet, [offset, nxt - offset]
|
90
|
+
end
|
91
|
+
end
|
92
|
+
def postread_worksheet worksheet
|
93
|
+
end
|
94
|
+
##
|
95
|
+
# The entry-point for reading Excel-documents. Reads the Biff-Version and
|
96
|
+
# loads additional reader-methods before proceeding with parsing the document.
|
97
|
+
def read io
|
98
|
+
@ole = Ole::Storage.open io
|
99
|
+
@workbook = Workbook.new io, {}
|
100
|
+
@book = @ole.file.open("Book") rescue @ole.file.open("Workbook")
|
101
|
+
@data = @book.read
|
102
|
+
read_bof
|
103
|
+
@workbook.ole = @book
|
104
|
+
@workbook.bof = @bof
|
105
|
+
@workbook.version = @version
|
106
|
+
biff = @workbook.biff_version
|
107
|
+
extend_reader biff
|
108
|
+
extend_internals biff
|
109
|
+
read_workbook
|
110
|
+
@workbook
|
111
|
+
end
|
112
|
+
def read_blank worksheet, addr, work
|
113
|
+
# Offset Size Contents
|
114
|
+
# 0 2 Index to row
|
115
|
+
# 2 2 Index to column
|
116
|
+
# 4 2 Index to XF record (➜ 6.115)
|
117
|
+
row, column, xf = work.unpack binfmt(:blank)
|
118
|
+
set_cell worksheet, row, column, xf
|
119
|
+
end
|
120
|
+
def read_bof
|
121
|
+
# Offset Size Contents
|
122
|
+
# 0 2 BIFF version (always 0x0600 for BIFF8)
|
123
|
+
# 2 2 Type of the following data: 0x0005 = Workbook globals
|
124
|
+
# 0x0006 = Visual Basic module
|
125
|
+
# 0x0010 = Worksheet
|
126
|
+
# 0x0020 = Chart
|
127
|
+
# 0x0040 = Macro sheet
|
128
|
+
# 0x0100 = Workspace file
|
129
|
+
# 4 2 Build identifier
|
130
|
+
# 6 2 Build year
|
131
|
+
# 8 4 File history flags
|
132
|
+
# 12 4 Lowest Excel version that can read all records in this file
|
133
|
+
pos, @bof, len, work = get_next_chunk
|
134
|
+
## version and datatype are common to all Excel-Versions. Later versions
|
135
|
+
# have additional information such as build-id and -year (from BIFF5).
|
136
|
+
# These are ignored for the time being.
|
137
|
+
version, datatype = work.unpack('v2')
|
138
|
+
if datatype == 0x5
|
139
|
+
@version = version
|
140
|
+
end
|
141
|
+
end
|
142
|
+
def read_boolerr worksheet, addr, work
|
143
|
+
# Offset Size Contents
|
144
|
+
# 0 2 Index to row
|
145
|
+
# 2 2 Index to column
|
146
|
+
# 4 2 Index to XF record (➜ 6.115)
|
147
|
+
# 6 1 Boolean or error value (type depends on the following byte)
|
148
|
+
# 7 1 0 = Boolean value; 1 = Error code
|
149
|
+
row, column, xf, value, error = work.unpack 'v3C2'
|
150
|
+
set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
|
151
|
+
end
|
152
|
+
def read_boundsheet work, pos, len
|
153
|
+
# Offset Size Contents
|
154
|
+
# 0 4 Absolute stream position of the BOF record of the sheet
|
155
|
+
# represented by this record. This field is never encrypted
|
156
|
+
# in protected files.
|
157
|
+
# 4 1 Visibility: 0x00 = Visible
|
158
|
+
# 0x01 = Hidden
|
159
|
+
# 0x02 = Strong hidden (see below)
|
160
|
+
# 5 1 Sheet type: 0x00 = Worksheet
|
161
|
+
# 0x02 = Chart
|
162
|
+
# 0x06 = Visual Basic module
|
163
|
+
# 6 var. Sheet name: BIFF5/BIFF7: Byte string,
|
164
|
+
# 8-bit string length (➜ 3.3)
|
165
|
+
# BIFF8: Unicode string, 8-bit string length (➜ 3.4)
|
166
|
+
offset, visibility, type = work.unpack("VC2")
|
167
|
+
name = client read_string(work[6..-1]), @workbook.encoding
|
168
|
+
if @boundsheets
|
169
|
+
@boundsheets[0] += 1
|
170
|
+
@boundsheets[2] += len
|
171
|
+
else
|
172
|
+
@boundsheets = [1, pos, len]
|
173
|
+
end
|
174
|
+
@workbook.set_boundsheets *@boundsheets
|
175
|
+
@workbook.add_worksheet Worksheet.new(:name => name,
|
176
|
+
:ole => @book,
|
177
|
+
:offset => offset,
|
178
|
+
:reader => self)
|
179
|
+
end
|
180
|
+
def read_codepage work, pos, len
|
181
|
+
codepage, _ = work.unpack 'v'
|
182
|
+
@workbook.set_encoding encoding(codepage), pos, len
|
183
|
+
end
|
184
|
+
def read_dimensions worksheet, work, pos, len
|
185
|
+
# Offset Size Contents
|
186
|
+
# 0 4 Index to first used row
|
187
|
+
# 4 4 Index to last used row, increased by 1
|
188
|
+
# 8 2 Index to first used column
|
189
|
+
# 10 2 Index to last used column, increased by 1
|
190
|
+
# 12 2 Not used
|
191
|
+
worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
|
192
|
+
end
|
193
|
+
def read_font work, pos, len
|
194
|
+
# Offset Size Contents
|
195
|
+
# 0 2 Height of the font (in twips = 1/20 of a point)
|
196
|
+
# 2 2 Option flags:
|
197
|
+
# Bit Mask Contents
|
198
|
+
# 0 0x0001 1 = Characters are bold (redundant, see below)
|
199
|
+
# 1 0x0002 1 = Characters are italic
|
200
|
+
# 2 0x0004 1 = Characters are underlined
|
201
|
+
# (redundant, see below)
|
202
|
+
# 3 0x0008 1 = Characters are struck out
|
203
|
+
# 4 0x0010 1 = Characters are outlined (djberger)
|
204
|
+
# 5 0x0020 1 = Characters are shadowed (djberger)
|
205
|
+
# 4 2 Colour index (➜ 6.70)
|
206
|
+
# 6 2 Font weight (100-1000). Standard values are
|
207
|
+
# 0x0190 (400) for normal text and
|
208
|
+
# 0x02bc (700) for bold text.
|
209
|
+
# 8 2 Escapement type: 0x0000 = None
|
210
|
+
# 0x0001 = Superscript
|
211
|
+
# 0x0002 = Subscript
|
212
|
+
# 10 1 Underline type: 0x00 = None
|
213
|
+
# 0x01 = Single
|
214
|
+
# 0x02 = Double
|
215
|
+
# 0x21 = Single accounting
|
216
|
+
# 0x22 = Double accounting
|
217
|
+
# 11 1 Font family:
|
218
|
+
# 0x00 = None (unknown or don't care)
|
219
|
+
# 0x01 = Roman (variable width, serifed)
|
220
|
+
# 0x02 = Swiss (variable width, sans-serifed)
|
221
|
+
# 0x03 = Modern (fixed width, serifed or sans-serifed)
|
222
|
+
# 0x04 = Script (cursive)
|
223
|
+
# 0x05 = Decorative (specialised,
|
224
|
+
# for example Old English, Fraktur)
|
225
|
+
# 12 1 Character set: 0x00 = 0 = ANSI Latin
|
226
|
+
# 0x01 = 1 = System default
|
227
|
+
# 0x02 = 2 = Symbol
|
228
|
+
# 0x4d = 77 = Apple Roman
|
229
|
+
# 0x80 = 128 = ANSI Japanese Shift-JIS
|
230
|
+
# 0x81 = 129 = ANSI Korean (Hangul)
|
231
|
+
# 0x82 = 130 = ANSI Korean (Johab)
|
232
|
+
# 0x86 = 134 = ANSI Chinese Simplified GBK
|
233
|
+
# 0x88 = 136 = ANSI Chinese Traditional BIG5
|
234
|
+
# 0xa1 = 161 = ANSI Greek
|
235
|
+
# 0xa2 = 162 = ANSI Turkish
|
236
|
+
# 0xa3 = 163 = ANSI Vietnamese
|
237
|
+
# 0xb1 = 177 = ANSI Hebrew
|
238
|
+
# 0xb2 = 178 = ANSI Arabic
|
239
|
+
# 0xba = 186 = ANSI Baltic
|
240
|
+
# 0xcc = 204 = ANSI Cyrillic
|
241
|
+
# 0xde = 222 = ANSI Thai
|
242
|
+
# 0xee = 238 = ANSI Latin II (Central European)
|
243
|
+
# 0xff = 255 = OEM Latin I
|
244
|
+
# 13 1 Not used
|
245
|
+
# 14 var. Font name:
|
246
|
+
# BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
|
247
|
+
# BIFF8: Unicode string, 8-bit string length (➜ 3.4)
|
248
|
+
name = client read_string(work[14..-1]), @workbook.encoding
|
249
|
+
font = Font.new name
|
250
|
+
size, opts, color, font.weight, escapement, underline,
|
251
|
+
family, encoding = work.unpack binfmt(:font)
|
252
|
+
font.size = size / TWIPS
|
253
|
+
font.italic = opts & 0x0002
|
254
|
+
font.strikeout = opts & 0x0008
|
255
|
+
font.outline = opts & 0x0010
|
256
|
+
font.shadow = opts & 0x0020
|
257
|
+
font.color = COLOR_CODES[color] || :text
|
258
|
+
font.escapement = ESCAPEMENT_TYPES[escapement]
|
259
|
+
font.underline = UNDERLINE_TYPES[underline]
|
260
|
+
font.family = FONT_FAMILIES[family]
|
261
|
+
font.encoding = FONT_ENCODINGS[encoding]
|
262
|
+
@workbook.add_font font
|
263
|
+
end
|
264
|
+
def read_format work, pos, len
|
265
|
+
# Offset Size Contents
|
266
|
+
# 0 2 Format index used in other records
|
267
|
+
# 2 var. Number format string
|
268
|
+
# (Unicode string, 16-bit string length, ➜ 3.4)
|
269
|
+
idx, = work.unpack 'v'
|
270
|
+
value = read_string work[2..-1], 2
|
271
|
+
@formats.store idx, client(value, @workbook.encoding)
|
272
|
+
end
|
273
|
+
def read_formula worksheet, work, pos, len
|
274
|
+
# Offset Size Contents
|
275
|
+
# 0 2 Index to row
|
276
|
+
# 2 2 Index to column
|
277
|
+
# 4 2 Index to XF record (➜ 6.115)
|
278
|
+
# 6 8 Result of the formula. See below for details.
|
279
|
+
# 14 2 Option flags:
|
280
|
+
# Bit Mask Contents
|
281
|
+
# 0 0x0001 1 = Recalculate always
|
282
|
+
# 1 0x0002 1 = Calculate on open
|
283
|
+
# 3 0x0008 1 = Part of a shared formula
|
284
|
+
# 16 4 Not used
|
285
|
+
# 20 var. Formula data (RPN token array, ➜ 4)
|
286
|
+
# Offset Size Contents
|
287
|
+
# 0 2 Size of the following formula data (sz)
|
288
|
+
# 2 sz Formula data (RPN token array)
|
289
|
+
# [2+sz] var. (optional) Additional data for specific tokens
|
290
|
+
# (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
|
291
|
+
#
|
292
|
+
# Result of the Formula
|
293
|
+
# Dependent on the type of value the formula returns, the result field has
|
294
|
+
# the following format:
|
295
|
+
#
|
296
|
+
# Result is a numeric value:
|
297
|
+
# Offset Size Contents
|
298
|
+
# 0 8 IEEE 754 floating-point value (64-bit double precision)
|
299
|
+
#
|
300
|
+
# Result is a string (the string follows in a STRING record, ➜ 6.98):
|
301
|
+
# Offset Size Contents
|
302
|
+
# 0 1 0x00 (identifier for a string value)
|
303
|
+
# 1 5 Not used
|
304
|
+
# 6 2 0xffff
|
305
|
+
# Note: In BIFF8 the string must not be empty. For empty cells there is a
|
306
|
+
# special identifier defined (see below).
|
307
|
+
#
|
308
|
+
# Result is a Boolean value:
|
309
|
+
# Offset Size Contents
|
310
|
+
# 0 1 0x01 (identifier for a Boolean value)
|
311
|
+
# 1 1 Not used
|
312
|
+
# 2 1 0 = FALSE, 1 = TRUE
|
313
|
+
# 3 3 Not used
|
314
|
+
# 6 2 0xffff
|
315
|
+
#
|
316
|
+
# Result is an error value:
|
317
|
+
# Offset Size Contents
|
318
|
+
# 0 1 0x02 (identifier for an error value)
|
319
|
+
# 1 1 Not used
|
320
|
+
# 2 1 Error code (➜ 3.7)
|
321
|
+
# 3 3 Not used
|
322
|
+
# 6 2 0xffff
|
323
|
+
#
|
324
|
+
# Result is an empty cell (BIFF8), for example an empty string:
|
325
|
+
# Offset Size Contents
|
326
|
+
# 0 1 0x03 (identifier for an empty cell)
|
327
|
+
# 1 5 Not used
|
328
|
+
# 6 2 0xffff
|
329
|
+
row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
|
330
|
+
formula = Formula.new
|
331
|
+
formula.shared = (opts & 0x08) > 0
|
332
|
+
formula.data = work[20..-1]
|
333
|
+
if rcheck != 0xffff || rtype > 3
|
334
|
+
value, = work.unpack 'x6E'
|
335
|
+
unless value
|
336
|
+
# on architectures where sizeof(double) > 8
|
337
|
+
value, = work.unpack 'x6e'
|
338
|
+
end
|
339
|
+
formula.value = value
|
340
|
+
elsif rtype == 0
|
341
|
+
pos, op, len, work = get_next_chunk
|
342
|
+
if op == :string
|
343
|
+
formula.value = client read_string(work, 2), @workbook.encoding
|
344
|
+
else
|
345
|
+
warn "String Value expected after Formula, but got #{op}"
|
346
|
+
formula.value = Error.new 0x2a
|
347
|
+
@pos = pos
|
348
|
+
end
|
349
|
+
elsif rtype == 1
|
350
|
+
formula.value = rval > 0
|
351
|
+
elsif rtype == 2
|
352
|
+
formula.value = Error.new rval
|
353
|
+
else
|
354
|
+
# leave the Formula value blank
|
355
|
+
end
|
356
|
+
end
|
357
|
+
def read_index worksheet, work, pos, len
|
358
|
+
# Offset Size Contents
|
359
|
+
# 0 4 Not used
|
360
|
+
# 4 4 Index to first used row (rf, 0-based)
|
361
|
+
# 8 4 Index to first row of unused tail of sheet
|
362
|
+
# (rl, last used row + 1, 0-based)
|
363
|
+
# 12 4 Absolute stream position of the
|
364
|
+
# DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
|
365
|
+
# record does not exist, the offset points to the record at
|
366
|
+
# the position where the DEFCOLWIDTH record would occur.
|
367
|
+
# 16 4∙nm Array of nm absolute stream positions to the
|
368
|
+
# DBCELL record (➜ 6.26) of each Row Block
|
369
|
+
# TODO: use the index if it exists
|
370
|
+
# _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
|
371
|
+
end
|
372
|
+
def read_label worksheet, addr, work
|
373
|
+
# Offset Size Contents
|
374
|
+
# 0 2 Index to row
|
375
|
+
# 2 2 Index to column
|
376
|
+
# 4 2 Index to XF record (➜ 6.115)
|
377
|
+
# 6 var. Unicode string, 16-bit string length (➜ 3.4)
|
378
|
+
row, column, xf = work.unpack 'v3'
|
379
|
+
value = client read_string(work[6..-1], 2), @workbook.encoding
|
380
|
+
set_cell worksheet, row, column, xf, value
|
381
|
+
end
|
382
|
+
def read_labelsst worksheet, addr, work
|
383
|
+
# Offset Size Contents
|
384
|
+
# 0 2 Index to row
|
385
|
+
# 2 2 Index to column
|
386
|
+
# 4 2 Index to XF record (➜ 6.115)
|
387
|
+
# 6 4 Index into SST record (➜ 6.96)
|
388
|
+
row, column, xf, index = work.unpack binfmt(:labelsst)
|
389
|
+
set_cell worksheet, row, column, xf, worksheet.shared_string(index)
|
390
|
+
end
|
391
|
+
def read_mulblank worksheet, addr, work
|
392
|
+
# Offset Size Contents
|
393
|
+
# 0 2 Index to row
|
394
|
+
# 2 2 Index to first column (fc)
|
395
|
+
# 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
|
396
|
+
# 4+2∙nc 2 Index to last column (lc)
|
397
|
+
row, column, *xfs = work.unpack 'v*'
|
398
|
+
last_column = xfs.pop # unused
|
399
|
+
xfs.each do |xf| set_cell worksheet, row, column, xf end
|
400
|
+
end
|
401
|
+
def read_mulrk worksheet, addr, work
|
402
|
+
# Offset Size Contents
|
403
|
+
# 0 2 Index to row
|
404
|
+
# 2 2 Index to first column (fc)
|
405
|
+
# 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
|
406
|
+
# Offset Size Contents
|
407
|
+
# 0 2 Index to XF record (➜ 6.115)
|
408
|
+
# 2 4 RK value (➜ 3.6)
|
409
|
+
# 4+6∙nc 2 Index to last column (lc)
|
410
|
+
row, column = work.unpack 'v2'
|
411
|
+
4.step(work.size - 6, 6) do |idx|
|
412
|
+
xf, = work.unpack "x#{idx}v"
|
413
|
+
set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
|
414
|
+
column += 1
|
415
|
+
end
|
416
|
+
end
|
417
|
+
def read_number worksheet, addr, work
|
418
|
+
# Offset Size Contents
|
419
|
+
# 0 2 Index to row
|
420
|
+
# 2 2 Index to column
|
421
|
+
# 4 2 Index to XF record (➜ 6.115)
|
422
|
+
# 6 8 IEEE 754 floating-point value (64-bit double precision)
|
423
|
+
row, column, xf, value = work.unpack 'v3E'
|
424
|
+
unless value
|
425
|
+
# on architectures where sizeof(double) > 8
|
426
|
+
value, = work.unpack 'x6e'
|
427
|
+
end
|
428
|
+
set_cell worksheet, row, column, xf, value
|
429
|
+
end
|
430
|
+
def read_rk worksheet, addr, work
|
431
|
+
# Offset Size Contents
|
432
|
+
# 0 2 Index to row
|
433
|
+
# 2 2 Index to column
|
434
|
+
# 4 2 Index to XF record (➜ 6.115)
|
435
|
+
# 6 4 RK value (➜ 3.6)
|
436
|
+
row, column, xf = work.unpack 'v3'
|
437
|
+
set_cell worksheet, row, column, xf, decode_rk(work[6,4])
|
438
|
+
end
|
439
|
+
def read_row worksheet, addr
|
440
|
+
row = addr[:index]
|
441
|
+
@current_row_block.fetch row do
|
442
|
+
@current_row_block.clear
|
443
|
+
cells = @current_row_block[row] = Row.new(nil, row)
|
444
|
+
@pos = addr[:offset]
|
445
|
+
found = false
|
446
|
+
while tuple = get_next_chunk
|
447
|
+
pos, op, len, work = tuple
|
448
|
+
case op
|
449
|
+
when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
|
450
|
+
# one Row-Block
|
451
|
+
@pos = pos
|
452
|
+
return cells
|
453
|
+
when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
|
454
|
+
return cells
|
455
|
+
when :row # ○○ Row Blocks ➜ 5.7
|
456
|
+
# ● ROW ➜ 6.83
|
457
|
+
# ignore, we already did these in read_worksheet
|
458
|
+
return cells if found
|
459
|
+
when :blank # BLANK ➜ 6.7
|
460
|
+
found = true
|
461
|
+
read_blank worksheet, addr, work
|
462
|
+
when :boolerr # BOOLERR ➜ 6.10
|
463
|
+
found = true
|
464
|
+
read_boolerr worksheet, addr, work
|
465
|
+
when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
|
466
|
+
found = true
|
467
|
+
# TODO: implement for BIFF2 support
|
468
|
+
when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
|
469
|
+
found = true
|
470
|
+
read_label worksheet, addr, work
|
471
|
+
when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
|
472
|
+
found = true
|
473
|
+
read_labelsst worksheet, addr, work
|
474
|
+
when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
|
475
|
+
found = true
|
476
|
+
read_mulblank worksheet, addr, work
|
477
|
+
when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
|
478
|
+
found = true
|
479
|
+
read_mulrk worksheet, addr, work
|
480
|
+
when :number # NUMBER ➜ 6.68
|
481
|
+
found = true
|
482
|
+
read_number worksheet, addr, work
|
483
|
+
when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
|
484
|
+
found = true
|
485
|
+
read_rk worksheet, addr, work
|
486
|
+
when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
|
487
|
+
found = true
|
488
|
+
read_rstring worksheet, addr, work
|
489
|
+
end
|
490
|
+
end
|
491
|
+
cells
|
492
|
+
end
|
493
|
+
end
|
494
|
+
def read_rstring worksheet, addr, work
|
495
|
+
# Offset Size Contents
|
496
|
+
# 0 2 Index to row
|
497
|
+
# 2 2 Index to column
|
498
|
+
# 4 2 Index to XF record (➜ 6.115)
|
499
|
+
# 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
|
500
|
+
# 6+sz 2 Number of Rich-Text formatting runs (rt)
|
501
|
+
# 8+sz 4·rt List of rt formatting runs (➜ 3.2)
|
502
|
+
row, column, xf = work.unpack 'v3'
|
503
|
+
value = client read_string(work[6..-1], 2), @workbook.encoding
|
504
|
+
set_cell worksheet, row, column, xf, value
|
505
|
+
end
|
506
|
+
def read_workbook
|
507
|
+
worksheet = nil
|
508
|
+
previous_op = nil
|
509
|
+
while tuple = get_next_chunk
|
510
|
+
pos, op, len, work = tuple
|
511
|
+
case op
|
512
|
+
when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
|
513
|
+
return
|
514
|
+
worksheet = @workbook.worksheets.find do |worksheet|
|
515
|
+
worksheet.offset == pos
|
516
|
+
end
|
517
|
+
if worksheet
|
518
|
+
read_worksheet worksheet
|
519
|
+
else
|
520
|
+
warn "Unexpected BOF (0x%04x) at position 0x%04x" % [@bof, pos]
|
521
|
+
end
|
522
|
+
when :eof # ● EOF ➜ 6.36
|
523
|
+
postread_workbook
|
524
|
+
return
|
525
|
+
when :datemode # ○ DATEMODE ➜ 6.25
|
526
|
+
flag, _ = work.unpack 'v'
|
527
|
+
if flag == 1
|
528
|
+
@workbook.date_base = Date.new 1904, 1, 1
|
529
|
+
else
|
530
|
+
@workbook.date_base = Date.new 1899, 12, 31
|
531
|
+
end
|
532
|
+
when :continue # ○ CONTINUE ➜ 6.22
|
533
|
+
case previous_op
|
534
|
+
when :sst # ● SST ➜ 6.96
|
535
|
+
continue_sst work, pos, len
|
536
|
+
end
|
537
|
+
when :codepage # ○ CODEPAGE ➜ 6.17
|
538
|
+
read_codepage work, pos, len
|
539
|
+
when :boundsheet # ●● BOUNDSHEET ➜ 6.12
|
540
|
+
read_boundsheet work, pos, len
|
541
|
+
when :xf # ●● XF ➜ 6.115
|
542
|
+
read_xf work, pos, len
|
543
|
+
when :sst # ○ Shared String Table ➜ 5.11
|
544
|
+
# ● SST ➜ 6.96
|
545
|
+
read_sst work, pos, len
|
546
|
+
# TODO: implement memory-efficient sst handling, possibly in conjunction
|
547
|
+
# with EXTSST
|
548
|
+
# when :extsst # ● EXTSST ➜ 6.40
|
549
|
+
when :style # ●● STYLE ➜ 6.99
|
550
|
+
read_style work, pos, len
|
551
|
+
when :format # ○○ FORMAT (Number Format) ➜ 6.45
|
552
|
+
read_format work, pos, len
|
553
|
+
when :font
|
554
|
+
read_font work, pos, len
|
555
|
+
end
|
556
|
+
previous_op = op unless op == :continue
|
557
|
+
end
|
558
|
+
end
|
559
|
+
def read_worksheet worksheet, offset
|
560
|
+
@pos = offset
|
561
|
+
while tuple = get_next_chunk
|
562
|
+
pos, op, len, work = tuple
|
563
|
+
if((offset = @current_row_block_offset) && !in_row_block?(op))
|
564
|
+
@current_row_block_offset = nil
|
565
|
+
offset[1] = pos - offset[0]
|
566
|
+
end
|
567
|
+
case op
|
568
|
+
when :eof # ● EOF ➜ 6.36
|
569
|
+
postread_worksheet worksheet
|
570
|
+
return
|
571
|
+
#when :uncalced # ○ UNCALCED ➜ 6.104
|
572
|
+
# TODO: Formula support. Values were not calculated before saving
|
573
|
+
#warn <<-EOS
|
574
|
+
# Some fields containig formulas were saved without a computed value.
|
575
|
+
# Support Spreadsheet::Excel by implementing formula-calculations!
|
576
|
+
#EOS
|
577
|
+
#when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
|
578
|
+
# TODO: if there are changes in rows, omit index when writing
|
579
|
+
#read_index worksheet, work, pos, len
|
580
|
+
when :dimensions # ● DIMENSIONS ➜ 6.31
|
581
|
+
read_dimensions worksheet, work, pos, len
|
582
|
+
when :row # ○○ Row Blocks ➜ 5.7
|
583
|
+
# ● ROW ➜ 6.83
|
584
|
+
set_row_address worksheet, work, pos, len
|
585
|
+
end
|
586
|
+
end
|
587
|
+
end
|
588
|
+
def read_style work, pos, len
|
589
|
+
# User-Defined Cell Styles:
|
590
|
+
# Offset Size Contents
|
591
|
+
# 0 2 Bit Mask Contents
|
592
|
+
# 11-0 0x0fff Index to style XF record (➜ 6.115)
|
593
|
+
# 15 0x8000 Always 0 for user-defined styles
|
594
|
+
# 2 var. BIFF2-BIFF7: Non-empty byte string,
|
595
|
+
# 8-bit string length (➜ 3.3)
|
596
|
+
# BIFF8: Non-empty Unicode string,
|
597
|
+
# 16-bit string length (➜ 3.4)
|
598
|
+
#
|
599
|
+
# Built-In Cell Styles
|
600
|
+
# Offset Size Contents
|
601
|
+
# 0 2 Bit Mask Contents
|
602
|
+
# 11-0 0x0FFF Index to style XF record (➜ 6.115)
|
603
|
+
# 15 0x8000 Always 1 for built-in styles
|
604
|
+
# 2 1 Identifier of the built-in cell style:
|
605
|
+
# 0x00 = Normal
|
606
|
+
# 0x01 = RowLevel_lv (see next field)
|
607
|
+
# 0x02 = ColLevel_lv (see next field)
|
608
|
+
# 0x03 = Comma
|
609
|
+
# 0x04 = Currency
|
610
|
+
# 0x05 = Percent
|
611
|
+
# 0x06 = Comma [0] (BIFF4-BIFF8)
|
612
|
+
# 0x07 = Currency [0] (BIFF4-BIFF8)
|
613
|
+
# 0x08 = Hyperlink (BIFF8)
|
614
|
+
# 0x09 = Followed Hyperlink (BIFF8)
|
615
|
+
# 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
|
616
|
+
# FFH otherwise
|
617
|
+
flags, = work.unpack 'v'
|
618
|
+
xf_idx = flags & 0x0fff
|
619
|
+
xf = @workbook.format xf_idx
|
620
|
+
builtin = flags & 0x8000
|
621
|
+
if builtin == 0
|
622
|
+
xf.name = client read_string(work[2..-1], 2), @workbook.encoding
|
623
|
+
else
|
624
|
+
id, level = work.unpack 'x2C2'
|
625
|
+
if name = BUILTIN_STYLES[id]
|
626
|
+
name.sub '_lv', "_#{level.to_s}"
|
627
|
+
xf.name = client name, 'UTF8'
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|
631
|
+
def read_xf work, pos, len
|
632
|
+
# Offset Size Contents
|
633
|
+
# 0 2 Index to FONT record (➜ 6.43)
|
634
|
+
# 2 2 Index to FORMAT record (➜ 6.45)
|
635
|
+
# 4 2 Bit Mask Contents
|
636
|
+
# 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
|
637
|
+
# Bit Mask Contents
|
638
|
+
# 0 0x01 1 = Cell is locked
|
639
|
+
# 1 0x02 1 = Formula is hidden
|
640
|
+
# 2 0x04 0 = Cell XF; 1 = Style XF
|
641
|
+
# 15-4 0xfff0 Index to parent style XF
|
642
|
+
# (always 0xfff in style XFs)
|
643
|
+
# 6 1 Bit Mask Contents
|
644
|
+
# 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
|
645
|
+
# Value Horizontal alignment
|
646
|
+
# 0x00 General
|
647
|
+
# 0x01 Left
|
648
|
+
# 0x02 Centred
|
649
|
+
# 0x03 Right
|
650
|
+
# 0x04 Filled
|
651
|
+
# 0x05 Justified (BIFF4-BIFF8X)
|
652
|
+
# 0x06 Centred across selection
|
653
|
+
# (BIFF4-BIFF8X)
|
654
|
+
# 0x07 Distributed (BIFF8X)
|
655
|
+
# 3 0x08 1 = Text is wrapped at right border
|
656
|
+
# 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
|
657
|
+
# Value Vertical alignment
|
658
|
+
# 0x00 Top
|
659
|
+
# 0x01 Centred
|
660
|
+
# 0x02 Bottom
|
661
|
+
# 0x03 Justified (BIFF5-BIFF8X)
|
662
|
+
# 0x04 Distributed (BIFF8X)
|
663
|
+
# 7 1 XF_ROTATION: Text rotation angle (see above)
|
664
|
+
# Value Text rotation
|
665
|
+
# 0 Not rotated
|
666
|
+
# 1-90 1 to 90 degrees counterclockwise
|
667
|
+
# 91-180 1 to 90 degrees clockwise
|
668
|
+
# 255 Letters are stacked top-to-bottom,
|
669
|
+
# but not rotated
|
670
|
+
# 8 1 Bit Mask Contents
|
671
|
+
# 3-0 0x0f Indent level
|
672
|
+
# 4 0x10 1 = Shrink content to fit into cell
|
673
|
+
# 5 0x40 1 = Merge Range (djberger)
|
674
|
+
# 7-6 0xc0 Text direction (BIFF8X only)
|
675
|
+
# 0 = According to context
|
676
|
+
# 1 = Left-to-right
|
677
|
+
# 2 = Right-to-left
|
678
|
+
# 9 1 Bit Mask Contents
|
679
|
+
# 7-2 0xfc XF_USED_ATTRIB – Used attributes
|
680
|
+
# Each bit describes the validity of a
|
681
|
+
# specific group of attributes. In cell XFs
|
682
|
+
# a cleared bit means the attributes of the
|
683
|
+
# parent style XF are used (but only if the
|
684
|
+
# attributes are valid there), a set bit
|
685
|
+
# means the attributes of this XF are used.
|
686
|
+
# In style XFs a cleared bit means the
|
687
|
+
# attribute setting is valid, a set bit
|
688
|
+
# means the attribute should be ignored.
|
689
|
+
# Bit Mask Contents
|
690
|
+
# 0 0x01 Flag for number format
|
691
|
+
# 1 0x02 Flag for font
|
692
|
+
# 2 0x04 Flag for horizontal and
|
693
|
+
# vertical alignment, text wrap,
|
694
|
+
# indentation, orientation,
|
695
|
+
# rotation, and text direction
|
696
|
+
# 3 0x08 Flag for border lines
|
697
|
+
# 4 0x10 Flag for background area style
|
698
|
+
# 5 0x20 Flag for cell protection (cell
|
699
|
+
# locked and formula hidden)
|
700
|
+
# 10 4 Cell border lines and background area:
|
701
|
+
# Bit Mask Contents
|
702
|
+
# 3- 0 0x0000000f Left line style (➜ 3.10)
|
703
|
+
# 7- 4 0x000000f0 Right line style (➜ 3.10)
|
704
|
+
# 11- 8 0x00000f00 Top line style (➜ 3.10)
|
705
|
+
# 15-12 0x0000f000 Bottom line style (➜ 3.10)
|
706
|
+
# 22-16 0x007f0000 Colour index (➜ 6.70)
|
707
|
+
# for left line colour
|
708
|
+
# 29-23 0x3f800000 Colour index (➜ 6.70)
|
709
|
+
# for right line colour
|
710
|
+
# 30 0x40000000 1 = Diagonal line
|
711
|
+
# from top left to right bottom
|
712
|
+
# 31 0x80000000 1 = Diagonal line
|
713
|
+
# from bottom left to right top
|
714
|
+
# 14 4 Bit Mask Contents
|
715
|
+
# 6- 0 0x0000007f Colour index (➜ 6.70)
|
716
|
+
# for top line colour
|
717
|
+
# 13- 7 0x00003f80 Colour index (➜ 6.70)
|
718
|
+
# for bottom line colour
|
719
|
+
# 20-14 0x001fc000 Colour index (➜ 6.70)
|
720
|
+
# for diagonal line colour
|
721
|
+
# 24-21 0x01e00000 Diagonal line style (➜ 3.10)
|
722
|
+
# 31-26 0xfc000000 Fill pattern (➜ 3.11)
|
723
|
+
# 18 2 Bit Mask Contents
|
724
|
+
# 6-0 0x007f Colour index (➜ 6.70)
|
725
|
+
# for pattern colour
|
726
|
+
# 13-7 0x3f80 Colour index (➜ 6.70)
|
727
|
+
# for pattern background
|
728
|
+
fmt = Format.new
|
729
|
+
font_idx, numfmt, xf_type, xf_align, xf_rotation, xf_indent, xf_used_attr,
|
730
|
+
xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
|
731
|
+
fmt.number_format = @formats[numfmt]
|
732
|
+
fmt.font = @workbook.font font_idx
|
733
|
+
@workbook.add_format fmt
|
734
|
+
end
|
735
|
+
def set_cell worksheet, row, column, xf, value=nil
|
736
|
+
cells = @current_row_block[row] ||= Row.new(nil, row)
|
737
|
+
cells.formats[column] = @workbook.format(xf)
|
738
|
+
cells[column] = value
|
739
|
+
end
|
740
|
+
def set_row_address worksheet, work, pos, len
|
741
|
+
# Offset Size Contents
|
742
|
+
# 0 2 Index of this row
|
743
|
+
# 2 2 Index to column of the first cell which
|
744
|
+
# is described by a cell record
|
745
|
+
# 4 2 Index to column of the last cell which is
|
746
|
+
# described by a cell record, increased by 1
|
747
|
+
# 6 2 Bit Mask Contents
|
748
|
+
# 14-0 0x7fff Height of the row, in twips = 1/20 of a point
|
749
|
+
# 15 0x8000 0 = Row has custom height;
|
750
|
+
# 1 = Row has default height
|
751
|
+
# 8 2 Not used
|
752
|
+
# 10 1 0 = No defaults written;
|
753
|
+
# 1 = Default row attribute field and XF index occur below (fl)
|
754
|
+
# 11 2 Relative offset to calculate stream position of the first
|
755
|
+
# cell record for this row (➜ 5.7.1)
|
756
|
+
# [13] 3 (written only if fl = 1) Default row attributes (➜ 3.12)
|
757
|
+
# [16] 2 (written only if fl = 1) Index to XF record (➜ 6.115)
|
758
|
+
@current_row_block_offset ||= [pos]
|
759
|
+
index, first_used, first_unused, flags,
|
760
|
+
hasdefaults, offset = work.unpack binfmt(:row)
|
761
|
+
# TODO: read attributes from work[13,3], read flags
|
762
|
+
if hasdefaults > 0
|
763
|
+
# TODO: read row default XF
|
764
|
+
end
|
765
|
+
worksheet.set_row_address index, :first_used => first_used,
|
766
|
+
:first_unused => first_unused,
|
767
|
+
:index => index,
|
768
|
+
:row_block => @current_row_block_offset,
|
769
|
+
:offset => @current_row_block_offset[0]
|
770
|
+
#:first_cell => offset
|
771
|
+
end
|
772
|
+
private
|
773
|
+
def extend_internals version
|
774
|
+
require 'spreadsheet/excel/internals/biff%i' % version
|
775
|
+
extend Internals.const_get('Biff%i' % version)
|
776
|
+
rescue LoadError
|
777
|
+
end
|
778
|
+
def extend_reader version
|
779
|
+
require 'spreadsheet/excel/reader/biff%i' % version
|
780
|
+
extend Reader.const_get('Biff%i' % version)
|
781
|
+
rescue LoadError
|
782
|
+
end
|
783
|
+
def get_next_chunk
|
784
|
+
pos = @pos
|
785
|
+
op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
|
786
|
+
@pos += OPCODE_SIZE
|
787
|
+
if len
|
788
|
+
work = @data[@pos,len]
|
789
|
+
@pos += len
|
790
|
+
code = SEDOCPO.fetch(op, op)
|
791
|
+
#puts "0x%04x/%-16s (0x%08x) %5i: %s" % [op, code.inspect, pos, len, work[0,16].inspect]
|
792
|
+
#puts "0x%04x/%-16s %5i: %s" % [op, code.inspect, len, work[0,32].inspect]
|
793
|
+
[ pos, code, len + OPCODE_SIZE, work]
|
794
|
+
end
|
795
|
+
end
|
796
|
+
end
|
797
|
+
end
|
798
|
+
end
|