spreadsheet 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. data/GUIDE.txt +209 -0
  2. data/History.txt +8 -0
  3. data/LICENSE.txt +619 -0
  4. data/Manifest.txt +46 -0
  5. data/README.txt +54 -0
  6. data/Rakefile +15 -0
  7. data/lib/parseexcel.rb +27 -0
  8. data/lib/parseexcel/parseexcel.rb +75 -0
  9. data/lib/parseexcel/parser.rb +11 -0
  10. data/lib/spreadsheet.rb +79 -0
  11. data/lib/spreadsheet/datatypes.rb +99 -0
  12. data/lib/spreadsheet/encodings.rb +49 -0
  13. data/lib/spreadsheet/excel.rb +75 -0
  14. data/lib/spreadsheet/excel/error.rb +26 -0
  15. data/lib/spreadsheet/excel/internals.rb +322 -0
  16. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  17. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  18. data/lib/spreadsheet/excel/offset.rb +37 -0
  19. data/lib/spreadsheet/excel/reader.rb +798 -0
  20. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  21. data/lib/spreadsheet/excel/reader/biff8.rb +168 -0
  22. data/lib/spreadsheet/excel/row.rb +67 -0
  23. data/lib/spreadsheet/excel/sst_entry.rb +45 -0
  24. data/lib/spreadsheet/excel/workbook.rb +76 -0
  25. data/lib/spreadsheet/excel/worksheet.rb +85 -0
  26. data/lib/spreadsheet/excel/writer.rb +1 -0
  27. data/lib/spreadsheet/excel/writer/biff8.rb +66 -0
  28. data/lib/spreadsheet/excel/writer/format.rb +270 -0
  29. data/lib/spreadsheet/excel/writer/workbook.rb +586 -0
  30. data/lib/spreadsheet/excel/writer/worksheet.rb +556 -0
  31. data/lib/spreadsheet/font.rb +86 -0
  32. data/lib/spreadsheet/format.rb +172 -0
  33. data/lib/spreadsheet/formula.rb +9 -0
  34. data/lib/spreadsheet/row.rb +87 -0
  35. data/lib/spreadsheet/workbook.rb +120 -0
  36. data/lib/spreadsheet/worksheet.rb +215 -0
  37. data/lib/spreadsheet/writer.rb +29 -0
  38. data/test/data/test_copy.xls +0 -0
  39. data/test/data/test_version_excel5.xls +0 -0
  40. data/test/data/test_version_excel95.xls +0 -0
  41. data/test/data/test_version_excel97.xls +0 -0
  42. data/test/excel/row.rb +29 -0
  43. data/test/font.rb +163 -0
  44. data/test/integration.rb +1021 -0
  45. data/test/workbook.rb +21 -0
  46. data/test/worksheet.rb +62 -0
  47. metadata +113 -0
@@ -0,0 +1,37 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ ##
4
+ # This module is used to keep track of offsets in modified Excel documents.
5
+ # Considered internal and subject to change without notice.
6
+ module Offset
7
+ def initialize *args
8
+ super
9
+ @changes = {}
10
+ @offsets = {}
11
+ end
12
+ def Offset.append_features mod
13
+ super
14
+ attr_reader :changes, :offsets
15
+ mod.module_eval do
16
+ class << self
17
+ def offset *keys
18
+ keys.each do |key|
19
+ attr_reader key unless instance_methods.include? key.to_s
20
+ define_method "#{key}=" do |value|
21
+ @changes.store key, true
22
+ instance_variable_set "@#{key}", value
23
+ end
24
+ define_method "set_#{key}" do |value, pos, len|
25
+ instance_variable_set "@#{key}", value
26
+ @offsets.store key, [pos, len]
27
+ havename = "have_set_#{key}"
28
+ send(havename, value, pos, len) if respond_to? havename
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,798 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/excel/error'
5
+ require 'spreadsheet/excel/internals'
6
+ require 'spreadsheet/excel/sst_entry'
7
+ require 'spreadsheet/excel/worksheet'
8
+
9
+ module Spreadsheet
10
+ module Excel
11
+ ##
12
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
13
+ # Excel-Record/Opcode. You should not need to call any of its methods
14
+ # directly. If you think you do, look at #read
15
+ class Reader
16
+ include Encodings
17
+ include Internals
18
+ OPCODE_SIZE = 4
19
+ ROW_BLOCK_OPS = [
20
+ :blank, :boolerr, :dbcell, :formula, :label, :labelsst, :mulblank, :mulrk,
21
+ :number, :rk, :row, :rstring,
22
+ ]
23
+ def initialize opts = {}
24
+ @pos = 0
25
+ @bigendian = opts.fetch(:bigendian) {
26
+ [1].pack('l') != "\001\000\000\000"
27
+ }
28
+ @opts = opts
29
+ @current_row_block = {}
30
+ @formats = {}
31
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF8') end
32
+ end
33
+ def decode_rk work
34
+ # Bit Mask Contents
35
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
36
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
37
+ # 31-2 0xFFFFFFFC Encoded value
38
+ #
39
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
40
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
41
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
42
+ # encoded value represents a signed 30-bit integer value. To get the
43
+ # correct integer, the encoded value has to be shifted right arithmetically
44
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
45
+ # floating-point) must be divided by 100 to get the final result.
46
+ flags, = work.unpack 'C'
47
+ cent = flags & 1
48
+ int = flags & 2
49
+ value = 0
50
+ if int == 0
51
+ ## remove two bits
52
+ integer, = work.unpack 'V'
53
+ integer &= 0xfffffffc
54
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
55
+ else
56
+ ## remove two bits
57
+ unsigned, = work.unpack 'V'
58
+ unsigned = unsigned >> 2
59
+ shifted = [unsigned].pack 'V'
60
+ ## I can't find a format for unpacking a little endian signed integer
61
+ shifted.reverse! if @bigendian
62
+ value, = shifted.unpack 'l'
63
+ end
64
+ if cent == 1
65
+ value /= 100.0
66
+ end
67
+ value
68
+ end
69
+ def encoding codepage_id
70
+ name = CODEPAGES.fetch(codepage_id) {
71
+ raise "Unknown Codepage 0x%04x" % codepage_id }
72
+ if RUBY_VERSION >= '1.9'
73
+ Encoding.find name
74
+ else
75
+ name
76
+ end
77
+ end
78
+ def in_row_block? op
79
+ ROW_BLOCK_OPS.include?(op)
80
+ end
81
+ def memoize?
82
+ @opts[:memoization]
83
+ end
84
+ def postread_workbook
85
+ sheets = @workbook.worksheets
86
+ sheets.each_with_index do |sheet, idx|
87
+ offset = sheet.offset
88
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
89
+ @workbook.offsets.store sheet, [offset, nxt - offset]
90
+ end
91
+ end
92
+ def postread_worksheet worksheet
93
+ end
94
+ ##
95
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
96
+ # loads additional reader-methods before proceeding with parsing the document.
97
+ def read io
98
+ @ole = Ole::Storage.open io
99
+ @workbook = Workbook.new io, {}
100
+ @book = @ole.file.open("Book") rescue @ole.file.open("Workbook")
101
+ @data = @book.read
102
+ read_bof
103
+ @workbook.ole = @book
104
+ @workbook.bof = @bof
105
+ @workbook.version = @version
106
+ biff = @workbook.biff_version
107
+ extend_reader biff
108
+ extend_internals biff
109
+ read_workbook
110
+ @workbook
111
+ end
112
+ def read_blank worksheet, addr, work
113
+ # Offset Size Contents
114
+ # 0 2 Index to row
115
+ # 2 2 Index to column
116
+ # 4 2 Index to XF record (➜ 6.115)
117
+ row, column, xf = work.unpack binfmt(:blank)
118
+ set_cell worksheet, row, column, xf
119
+ end
120
+ def read_bof
121
+ # Offset Size Contents
122
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
123
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
124
+ # 0x0006 = Visual Basic module
125
+ # 0x0010 = Worksheet
126
+ # 0x0020 = Chart
127
+ # 0x0040 = Macro sheet
128
+ # 0x0100 = Workspace file
129
+ # 4 2 Build identifier
130
+ # 6 2 Build year
131
+ # 8 4 File history flags
132
+ # 12 4 Lowest Excel version that can read all records in this file
133
+ pos, @bof, len, work = get_next_chunk
134
+ ## version and datatype are common to all Excel-Versions. Later versions
135
+ # have additional information such as build-id and -year (from BIFF5).
136
+ # These are ignored for the time being.
137
+ version, datatype = work.unpack('v2')
138
+ if datatype == 0x5
139
+ @version = version
140
+ end
141
+ end
142
+ def read_boolerr worksheet, addr, work
143
+ # Offset Size Contents
144
+ # 0 2 Index to row
145
+ # 2 2 Index to column
146
+ # 4 2 Index to XF record (➜ 6.115)
147
+ # 6 1 Boolean or error value (type depends on the following byte)
148
+ # 7 1 0 = Boolean value; 1 = Error code
149
+ row, column, xf, value, error = work.unpack 'v3C2'
150
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
151
+ end
152
+ def read_boundsheet work, pos, len
153
+ # Offset Size Contents
154
+ # 0 4 Absolute stream position of the BOF record of the sheet
155
+ # represented by this record. This field is never encrypted
156
+ # in protected files.
157
+ # 4 1 Visibility: 0x00 = Visible
158
+ # 0x01 = Hidden
159
+ # 0x02 = Strong hidden (see below)
160
+ # 5 1 Sheet type: 0x00 = Worksheet
161
+ # 0x02 = Chart
162
+ # 0x06 = Visual Basic module
163
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
164
+ # 8-bit string length (➜ 3.3)
165
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
166
+ offset, visibility, type = work.unpack("VC2")
167
+ name = client read_string(work[6..-1]), @workbook.encoding
168
+ if @boundsheets
169
+ @boundsheets[0] += 1
170
+ @boundsheets[2] += len
171
+ else
172
+ @boundsheets = [1, pos, len]
173
+ end
174
+ @workbook.set_boundsheets *@boundsheets
175
+ @workbook.add_worksheet Worksheet.new(:name => name,
176
+ :ole => @book,
177
+ :offset => offset,
178
+ :reader => self)
179
+ end
180
+ def read_codepage work, pos, len
181
+ codepage, _ = work.unpack 'v'
182
+ @workbook.set_encoding encoding(codepage), pos, len
183
+ end
184
+ def read_dimensions worksheet, work, pos, len
185
+ # Offset Size Contents
186
+ # 0 4 Index to first used row
187
+ # 4 4 Index to last used row, increased by 1
188
+ # 8 2 Index to first used column
189
+ # 10 2 Index to last used column, increased by 1
190
+ # 12 2 Not used
191
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
192
+ end
193
+ def read_font work, pos, len
194
+ # Offset Size Contents
195
+ # 0 2 Height of the font (in twips = 1/20 of a point)
196
+ # 2 2 Option flags:
197
+ # Bit Mask Contents
198
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
199
+ # 1 0x0002 1 = Characters are italic
200
+ # 2 0x0004 1 = Characters are underlined
201
+ # (redundant, see below)
202
+ # 3 0x0008 1 = Characters are struck out
203
+ # 4 0x0010 1 = Characters are outlined (djberger)
204
+ # 5 0x0020 1 = Characters are shadowed (djberger)
205
+ # 4 2 Colour index (➜ 6.70)
206
+ # 6 2 Font weight (100-1000). Standard values are
207
+ # 0x0190 (400) for normal text and
208
+ # 0x02bc (700) for bold text.
209
+ # 8 2 Escapement type: 0x0000 = None
210
+ # 0x0001 = Superscript
211
+ # 0x0002 = Subscript
212
+ # 10 1 Underline type: 0x00 = None
213
+ # 0x01 = Single
214
+ # 0x02 = Double
215
+ # 0x21 = Single accounting
216
+ # 0x22 = Double accounting
217
+ # 11 1 Font family:
218
+ # 0x00 = None (unknown or don't care)
219
+ # 0x01 = Roman (variable width, serifed)
220
+ # 0x02 = Swiss (variable width, sans-serifed)
221
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
222
+ # 0x04 = Script (cursive)
223
+ # 0x05 = Decorative (specialised,
224
+ # for example Old English, Fraktur)
225
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
226
+ # 0x01 = 1 = System default
227
+ # 0x02 = 2 = Symbol
228
+ # 0x4d = 77 = Apple Roman
229
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
230
+ # 0x81 = 129 = ANSI Korean (Hangul)
231
+ # 0x82 = 130 = ANSI Korean (Johab)
232
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
233
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
234
+ # 0xa1 = 161 = ANSI Greek
235
+ # 0xa2 = 162 = ANSI Turkish
236
+ # 0xa3 = 163 = ANSI Vietnamese
237
+ # 0xb1 = 177 = ANSI Hebrew
238
+ # 0xb2 = 178 = ANSI Arabic
239
+ # 0xba = 186 = ANSI Baltic
240
+ # 0xcc = 204 = ANSI Cyrillic
241
+ # 0xde = 222 = ANSI Thai
242
+ # 0xee = 238 = ANSI Latin II (Central European)
243
+ # 0xff = 255 = OEM Latin I
244
+ # 13 1 Not used
245
+ # 14 var. Font name:
246
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
247
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
248
+ name = client read_string(work[14..-1]), @workbook.encoding
249
+ font = Font.new name
250
+ size, opts, color, font.weight, escapement, underline,
251
+ family, encoding = work.unpack binfmt(:font)
252
+ font.size = size / TWIPS
253
+ font.italic = opts & 0x0002
254
+ font.strikeout = opts & 0x0008
255
+ font.outline = opts & 0x0010
256
+ font.shadow = opts & 0x0020
257
+ font.color = COLOR_CODES[color] || :text
258
+ font.escapement = ESCAPEMENT_TYPES[escapement]
259
+ font.underline = UNDERLINE_TYPES[underline]
260
+ font.family = FONT_FAMILIES[family]
261
+ font.encoding = FONT_ENCODINGS[encoding]
262
+ @workbook.add_font font
263
+ end
264
+ def read_format work, pos, len
265
+ # Offset Size Contents
266
+ # 0 2 Format index used in other records
267
+ # 2 var. Number format string
268
+ # (Unicode string, 16-bit string length, ➜ 3.4)
269
+ idx, = work.unpack 'v'
270
+ value = read_string work[2..-1], 2
271
+ @formats.store idx, client(value, @workbook.encoding)
272
+ end
273
+ def read_formula worksheet, work, pos, len
274
+ # Offset Size Contents
275
+ # 0 2 Index to row
276
+ # 2 2 Index to column
277
+ # 4 2 Index to XF record (➜ 6.115)
278
+ # 6 8 Result of the formula. See below for details.
279
+ # 14 2 Option flags:
280
+ # Bit Mask Contents
281
+ # 0 0x0001 1 = Recalculate always
282
+ # 1 0x0002 1 = Calculate on open
283
+ # 3 0x0008 1 = Part of a shared formula
284
+ # 16 4 Not used
285
+ # 20 var. Formula data (RPN token array, ➜ 4)
286
+ # Offset Size Contents
287
+ # 0 2 Size of the following formula data (sz)
288
+ # 2 sz Formula data (RPN token array)
289
+ # [2+sz] var. (optional) Additional data for specific tokens
290
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
291
+ #
292
+ # Result of the Formula
293
+ # Dependent on the type of value the formula returns, the result field has
294
+ # the following format:
295
+ #
296
+ # Result is a numeric value:
297
+ # Offset Size Contents
298
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
299
+ #
300
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
301
+ # Offset Size Contents
302
+ # 0 1 0x00 (identifier for a string value)
303
+ # 1 5 Not used
304
+ # 6 2 0xffff
305
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
306
+ # special identifier defined (see below).
307
+ #
308
+ # Result is a Boolean value:
309
+ # Offset Size Contents
310
+ # 0 1 0x01 (identifier for a Boolean value)
311
+ # 1 1 Not used
312
+ # 2 1 0 = FALSE, 1 = TRUE
313
+ # 3 3 Not used
314
+ # 6 2 0xffff
315
+ #
316
+ # Result is an error value:
317
+ # Offset Size Contents
318
+ # 0 1 0x02 (identifier for an error value)
319
+ # 1 1 Not used
320
+ # 2 1 Error code (➜ 3.7)
321
+ # 3 3 Not used
322
+ # 6 2 0xffff
323
+ #
324
+ # Result is an empty cell (BIFF8), for example an empty string:
325
+ # Offset Size Contents
326
+ # 0 1 0x03 (identifier for an empty cell)
327
+ # 1 5 Not used
328
+ # 6 2 0xffff
329
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
330
+ formula = Formula.new
331
+ formula.shared = (opts & 0x08) > 0
332
+ formula.data = work[20..-1]
333
+ if rcheck != 0xffff || rtype > 3
334
+ value, = work.unpack 'x6E'
335
+ unless value
336
+ # on architectures where sizeof(double) > 8
337
+ value, = work.unpack 'x6e'
338
+ end
339
+ formula.value = value
340
+ elsif rtype == 0
341
+ pos, op, len, work = get_next_chunk
342
+ if op == :string
343
+ formula.value = client read_string(work, 2), @workbook.encoding
344
+ else
345
+ warn "String Value expected after Formula, but got #{op}"
346
+ formula.value = Error.new 0x2a
347
+ @pos = pos
348
+ end
349
+ elsif rtype == 1
350
+ formula.value = rval > 0
351
+ elsif rtype == 2
352
+ formula.value = Error.new rval
353
+ else
354
+ # leave the Formula value blank
355
+ end
356
+ end
357
+ def read_index worksheet, work, pos, len
358
+ # Offset Size Contents
359
+ # 0 4 Not used
360
+ # 4 4 Index to first used row (rf, 0-based)
361
+ # 8 4 Index to first row of unused tail of sheet
362
+ # (rl, last used row + 1, 0-based)
363
+ # 12 4 Absolute stream position of the
364
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
365
+ # record does not exist, the offset points to the record at
366
+ # the position where the DEFCOLWIDTH record would occur.
367
+ # 16 4∙nm Array of nm absolute stream positions to the
368
+ # DBCELL record (➜ 6.26) of each Row Block
369
+ # TODO: use the index if it exists
370
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
371
+ end
372
+ def read_label worksheet, addr, work
373
+ # Offset Size Contents
374
+ # 0 2 Index to row
375
+ # 2 2 Index to column
376
+ # 4 2 Index to XF record (➜ 6.115)
377
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
378
+ row, column, xf = work.unpack 'v3'
379
+ value = client read_string(work[6..-1], 2), @workbook.encoding
380
+ set_cell worksheet, row, column, xf, value
381
+ end
382
+ def read_labelsst worksheet, addr, work
383
+ # Offset Size Contents
384
+ # 0 2 Index to row
385
+ # 2 2 Index to column
386
+ # 4 2 Index to XF record (➜ 6.115)
387
+ # 6 4 Index into SST record (➜ 6.96)
388
+ row, column, xf, index = work.unpack binfmt(:labelsst)
389
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
390
+ end
391
+ def read_mulblank worksheet, addr, work
392
+ # Offset Size Contents
393
+ # 0 2 Index to row
394
+ # 2 2 Index to first column (fc)
395
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
396
+ # 4+2∙nc 2 Index to last column (lc)
397
+ row, column, *xfs = work.unpack 'v*'
398
+ last_column = xfs.pop # unused
399
+ xfs.each do |xf| set_cell worksheet, row, column, xf end
400
+ end
401
+ def read_mulrk worksheet, addr, work
402
+ # Offset Size Contents
403
+ # 0 2 Index to row
404
+ # 2 2 Index to first column (fc)
405
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
406
+ # Offset Size Contents
407
+ # 0 2 Index to XF record (➜ 6.115)
408
+ # 2 4 RK value (➜ 3.6)
409
+ # 4+6∙nc 2 Index to last column (lc)
410
+ row, column = work.unpack 'v2'
411
+ 4.step(work.size - 6, 6) do |idx|
412
+ xf, = work.unpack "x#{idx}v"
413
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
414
+ column += 1
415
+ end
416
+ end
417
+ def read_number worksheet, addr, work
418
+ # Offset Size Contents
419
+ # 0 2 Index to row
420
+ # 2 2 Index to column
421
+ # 4 2 Index to XF record (➜ 6.115)
422
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
423
+ row, column, xf, value = work.unpack 'v3E'
424
+ unless value
425
+ # on architectures where sizeof(double) > 8
426
+ value, = work.unpack 'x6e'
427
+ end
428
+ set_cell worksheet, row, column, xf, value
429
+ end
430
+ def read_rk worksheet, addr, work
431
+ # Offset Size Contents
432
+ # 0 2 Index to row
433
+ # 2 2 Index to column
434
+ # 4 2 Index to XF record (➜ 6.115)
435
+ # 6 4 RK value (➜ 3.6)
436
+ row, column, xf = work.unpack 'v3'
437
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
438
+ end
439
+ def read_row worksheet, addr
440
+ row = addr[:index]
441
+ @current_row_block.fetch row do
442
+ @current_row_block.clear
443
+ cells = @current_row_block[row] = Row.new(nil, row)
444
+ @pos = addr[:offset]
445
+ found = false
446
+ while tuple = get_next_chunk
447
+ pos, op, len, work = tuple
448
+ case op
449
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
450
+ # one Row-Block
451
+ @pos = pos
452
+ return cells
453
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
454
+ return cells
455
+ when :row # ○○ Row Blocks ➜ 5.7
456
+ # ● ROW ➜ 6.83
457
+ # ignore, we already did these in read_worksheet
458
+ return cells if found
459
+ when :blank # BLANK ➜ 6.7
460
+ found = true
461
+ read_blank worksheet, addr, work
462
+ when :boolerr # BOOLERR ➜ 6.10
463
+ found = true
464
+ read_boolerr worksheet, addr, work
465
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
466
+ found = true
467
+ # TODO: implement for BIFF2 support
468
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
469
+ found = true
470
+ read_label worksheet, addr, work
471
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
472
+ found = true
473
+ read_labelsst worksheet, addr, work
474
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
475
+ found = true
476
+ read_mulblank worksheet, addr, work
477
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
478
+ found = true
479
+ read_mulrk worksheet, addr, work
480
+ when :number # NUMBER ➜ 6.68
481
+ found = true
482
+ read_number worksheet, addr, work
483
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
484
+ found = true
485
+ read_rk worksheet, addr, work
486
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
487
+ found = true
488
+ read_rstring worksheet, addr, work
489
+ end
490
+ end
491
+ cells
492
+ end
493
+ end
494
+ def read_rstring worksheet, addr, work
495
+ # Offset Size Contents
496
+ # 0 2 Index to row
497
+ # 2 2 Index to column
498
+ # 4 2 Index to XF record (➜ 6.115)
499
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
500
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
501
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
502
+ row, column, xf = work.unpack 'v3'
503
+ value = client read_string(work[6..-1], 2), @workbook.encoding
504
+ set_cell worksheet, row, column, xf, value
505
+ end
506
+ def read_workbook
507
+ worksheet = nil
508
+ previous_op = nil
509
+ while tuple = get_next_chunk
510
+ pos, op, len, work = tuple
511
+ case op
512
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
513
+ return
514
+ worksheet = @workbook.worksheets.find do |worksheet|
515
+ worksheet.offset == pos
516
+ end
517
+ if worksheet
518
+ read_worksheet worksheet
519
+ else
520
+ warn "Unexpected BOF (0x%04x) at position 0x%04x" % [@bof, pos]
521
+ end
522
+ when :eof # ● EOF ➜ 6.36
523
+ postread_workbook
524
+ return
525
+ when :datemode # ○ DATEMODE ➜ 6.25
526
+ flag, _ = work.unpack 'v'
527
+ if flag == 1
528
+ @workbook.date_base = Date.new 1904, 1, 1
529
+ else
530
+ @workbook.date_base = Date.new 1899, 12, 31
531
+ end
532
+ when :continue # ○ CONTINUE ➜ 6.22
533
+ case previous_op
534
+ when :sst # ● SST ➜ 6.96
535
+ continue_sst work, pos, len
536
+ end
537
+ when :codepage # ○ CODEPAGE ➜ 6.17
538
+ read_codepage work, pos, len
539
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
540
+ read_boundsheet work, pos, len
541
+ when :xf # ●● XF ➜ 6.115
542
+ read_xf work, pos, len
543
+ when :sst # ○ Shared String Table ➜ 5.11
544
+ # ● SST ➜ 6.96
545
+ read_sst work, pos, len
546
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
547
+ # with EXTSST
548
+ # when :extsst # ● EXTSST ➜ 6.40
549
+ when :style # ●● STYLE ➜ 6.99
550
+ read_style work, pos, len
551
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
552
+ read_format work, pos, len
553
+ when :font
554
+ read_font work, pos, len
555
+ end
556
+ previous_op = op unless op == :continue
557
+ end
558
+ end
559
+ def read_worksheet worksheet, offset
560
+ @pos = offset
561
+ while tuple = get_next_chunk
562
+ pos, op, len, work = tuple
563
+ if((offset = @current_row_block_offset) && !in_row_block?(op))
564
+ @current_row_block_offset = nil
565
+ offset[1] = pos - offset[0]
566
+ end
567
+ case op
568
+ when :eof # ● EOF ➜ 6.36
569
+ postread_worksheet worksheet
570
+ return
571
+ #when :uncalced # ○ UNCALCED ➜ 6.104
572
+ # TODO: Formula support. Values were not calculated before saving
573
+ #warn <<-EOS
574
+ # Some fields containig formulas were saved without a computed value.
575
+ # Support Spreadsheet::Excel by implementing formula-calculations!
576
+ #EOS
577
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
578
+ # TODO: if there are changes in rows, omit index when writing
579
+ #read_index worksheet, work, pos, len
580
+ when :dimensions # ● DIMENSIONS ➜ 6.31
581
+ read_dimensions worksheet, work, pos, len
582
+ when :row # ○○ Row Blocks ➜ 5.7
583
+ # ● ROW ➜ 6.83
584
+ set_row_address worksheet, work, pos, len
585
+ end
586
+ end
587
+ end
588
+ def read_style work, pos, len
589
+ # User-Defined Cell Styles:
590
+ # Offset Size Contents
591
+ # 0 2 Bit Mask Contents
592
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
593
+ # 15 0x8000 Always 0 for user-defined styles
594
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
595
+ # 8-bit string length (➜ 3.3)
596
+ # BIFF8: Non-empty Unicode string,
597
+ # 16-bit string length (➜ 3.4)
598
+ #
599
+ # Built-In Cell Styles
600
+ # Offset Size Contents
601
+ # 0 2 Bit Mask Contents
602
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
603
+ # 15 0x8000 Always 1 for built-in styles
604
+ # 2 1 Identifier of the built-in cell style:
605
+ # 0x00 = Normal
606
+ # 0x01 = RowLevel_lv (see next field)
607
+ # 0x02 = ColLevel_lv (see next field)
608
+ # 0x03 = Comma
609
+ # 0x04 = Currency
610
+ # 0x05 = Percent
611
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
612
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
613
+ # 0x08 = Hyperlink (BIFF8)
614
+ # 0x09 = Followed Hyperlink (BIFF8)
615
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
616
+ # FFH otherwise
617
+ flags, = work.unpack 'v'
618
+ xf_idx = flags & 0x0fff
619
+ xf = @workbook.format xf_idx
620
+ builtin = flags & 0x8000
621
+ if builtin == 0
622
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
623
+ else
624
+ id, level = work.unpack 'x2C2'
625
+ if name = BUILTIN_STYLES[id]
626
+ name.sub '_lv', "_#{level.to_s}"
627
+ xf.name = client name, 'UTF8'
628
+ end
629
+ end
630
+ end
631
+ def read_xf work, pos, len
632
+ # Offset Size Contents
633
+ # 0 2 Index to FONT record (➜ 6.43)
634
+ # 2 2 Index to FORMAT record (➜ 6.45)
635
+ # 4 2 Bit Mask Contents
636
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
637
+ # Bit Mask Contents
638
+ # 0 0x01 1 = Cell is locked
639
+ # 1 0x02 1 = Formula is hidden
640
+ # 2 0x04 0 = Cell XF; 1 = Style XF
641
+ # 15-4 0xfff0 Index to parent style XF
642
+ # (always 0xfff in style XFs)
643
+ # 6 1 Bit Mask Contents
644
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
645
+ # Value Horizontal alignment
646
+ # 0x00 General
647
+ # 0x01 Left
648
+ # 0x02 Centred
649
+ # 0x03 Right
650
+ # 0x04 Filled
651
+ # 0x05 Justified (BIFF4-BIFF8X)
652
+ # 0x06 Centred across selection
653
+ # (BIFF4-BIFF8X)
654
+ # 0x07 Distributed (BIFF8X)
655
+ # 3 0x08 1 = Text is wrapped at right border
656
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
657
+ # Value Vertical alignment
658
+ # 0x00 Top
659
+ # 0x01 Centred
660
+ # 0x02 Bottom
661
+ # 0x03 Justified (BIFF5-BIFF8X)
662
+ # 0x04 Distributed (BIFF8X)
663
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
664
+ # Value Text rotation
665
+ # 0 Not rotated
666
+ # 1-90 1 to 90 degrees counterclockwise
667
+ # 91-180 1 to 90 degrees clockwise
668
+ # 255 Letters are stacked top-to-bottom,
669
+ # but not rotated
670
+ # 8 1 Bit Mask Contents
671
+ # 3-0 0x0f Indent level
672
+ # 4 0x10 1 = Shrink content to fit into cell
673
+ # 5 0x40 1 = Merge Range (djberger)
674
+ # 7-6 0xc0 Text direction (BIFF8X only)
675
+ # 0 = According to context
676
+ # 1 = Left-to-right
677
+ # 2 = Right-to-left
678
+ # 9 1 Bit Mask Contents
679
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
680
+ # Each bit describes the validity of a
681
+ # specific group of attributes. In cell XFs
682
+ # a cleared bit means the attributes of the
683
+ # parent style XF are used (but only if the
684
+ # attributes are valid there), a set bit
685
+ # means the attributes of this XF are used.
686
+ # In style XFs a cleared bit means the
687
+ # attribute setting is valid, a set bit
688
+ # means the attribute should be ignored.
689
+ # Bit Mask Contents
690
+ # 0 0x01 Flag for number format
691
+ # 1 0x02 Flag for font
692
+ # 2 0x04 Flag for horizontal and
693
+ # vertical alignment, text wrap,
694
+ # indentation, orientation,
695
+ # rotation, and text direction
696
+ # 3 0x08 Flag for border lines
697
+ # 4 0x10 Flag for background area style
698
+ # 5 0x20 Flag for cell protection (cell
699
+ # locked and formula hidden)
700
+ # 10 4 Cell border lines and background area:
701
+ # Bit Mask Contents
702
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
703
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
704
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
705
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
706
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
707
+ # for left line colour
708
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
709
+ # for right line colour
710
+ # 30 0x40000000 1 = Diagonal line
711
+ # from top left to right bottom
712
+ # 31 0x80000000 1 = Diagonal line
713
+ # from bottom left to right top
714
+ # 14 4 Bit Mask Contents
715
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
716
+ # for top line colour
717
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
718
+ # for bottom line colour
719
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
720
+ # for diagonal line colour
721
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
722
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
723
+ # 18 2 Bit Mask Contents
724
+ # 6-0 0x007f Colour index (➜ 6.70)
725
+ # for pattern colour
726
+ # 13-7 0x3f80 Colour index (➜ 6.70)
727
+ # for pattern background
728
+ fmt = Format.new
729
+ font_idx, numfmt, xf_type, xf_align, xf_rotation, xf_indent, xf_used_attr,
730
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
731
+ fmt.number_format = @formats[numfmt]
732
+ fmt.font = @workbook.font font_idx
733
+ @workbook.add_format fmt
734
+ end
735
+ def set_cell worksheet, row, column, xf, value=nil
736
+ cells = @current_row_block[row] ||= Row.new(nil, row)
737
+ cells.formats[column] = @workbook.format(xf)
738
+ cells[column] = value
739
+ end
740
+ def set_row_address worksheet, work, pos, len
741
+ # Offset Size Contents
742
+ # 0 2 Index of this row
743
+ # 2 2 Index to column of the first cell which
744
+ # is described by a cell record
745
+ # 4 2 Index to column of the last cell which is
746
+ # described by a cell record, increased by 1
747
+ # 6 2 Bit Mask Contents
748
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
749
+ # 15 0x8000 0 = Row has custom height;
750
+ # 1 = Row has default height
751
+ # 8 2 Not used
752
+ # 10 1 0 = No defaults written;
753
+ # 1 = Default row attribute field and XF index occur below (fl)
754
+ # 11 2 Relative offset to calculate stream position of the first
755
+ # cell record for this row (➜ 5.7.1)
756
+ # [13] 3 (written only if fl = 1) Default row attributes (➜ 3.12)
757
+ # [16] 2 (written only if fl = 1) Index to XF record (➜ 6.115)
758
+ @current_row_block_offset ||= [pos]
759
+ index, first_used, first_unused, flags,
760
+ hasdefaults, offset = work.unpack binfmt(:row)
761
+ # TODO: read attributes from work[13,3], read flags
762
+ if hasdefaults > 0
763
+ # TODO: read row default XF
764
+ end
765
+ worksheet.set_row_address index, :first_used => first_used,
766
+ :first_unused => first_unused,
767
+ :index => index,
768
+ :row_block => @current_row_block_offset,
769
+ :offset => @current_row_block_offset[0]
770
+ #:first_cell => offset
771
+ end
772
+ private
773
+ def extend_internals version
774
+ require 'spreadsheet/excel/internals/biff%i' % version
775
+ extend Internals.const_get('Biff%i' % version)
776
+ rescue LoadError
777
+ end
778
+ def extend_reader version
779
+ require 'spreadsheet/excel/reader/biff%i' % version
780
+ extend Reader.const_get('Biff%i' % version)
781
+ rescue LoadError
782
+ end
783
+ def get_next_chunk
784
+ pos = @pos
785
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
786
+ @pos += OPCODE_SIZE
787
+ if len
788
+ work = @data[@pos,len]
789
+ @pos += len
790
+ code = SEDOCPO.fetch(op, op)
791
+ #puts "0x%04x/%-16s (0x%08x) %5i: %s" % [op, code.inspect, pos, len, work[0,16].inspect]
792
+ #puts "0x%04x/%-16s %5i: %s" % [op, code.inspect, len, work[0,32].inspect]
793
+ [ pos, code, len + OPCODE_SIZE, work]
794
+ end
795
+ end
796
+ end
797
+ end
798
+ end