spreadsheet 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/GUIDE.txt +209 -0
  2. data/History.txt +8 -0
  3. data/LICENSE.txt +619 -0
  4. data/Manifest.txt +46 -0
  5. data/README.txt +54 -0
  6. data/Rakefile +15 -0
  7. data/lib/parseexcel.rb +27 -0
  8. data/lib/parseexcel/parseexcel.rb +75 -0
  9. data/lib/parseexcel/parser.rb +11 -0
  10. data/lib/spreadsheet.rb +79 -0
  11. data/lib/spreadsheet/datatypes.rb +99 -0
  12. data/lib/spreadsheet/encodings.rb +49 -0
  13. data/lib/spreadsheet/excel.rb +75 -0
  14. data/lib/spreadsheet/excel/error.rb +26 -0
  15. data/lib/spreadsheet/excel/internals.rb +322 -0
  16. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  17. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  18. data/lib/spreadsheet/excel/offset.rb +37 -0
  19. data/lib/spreadsheet/excel/reader.rb +798 -0
  20. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  21. data/lib/spreadsheet/excel/reader/biff8.rb +168 -0
  22. data/lib/spreadsheet/excel/row.rb +67 -0
  23. data/lib/spreadsheet/excel/sst_entry.rb +45 -0
  24. data/lib/spreadsheet/excel/workbook.rb +76 -0
  25. data/lib/spreadsheet/excel/worksheet.rb +85 -0
  26. data/lib/spreadsheet/excel/writer.rb +1 -0
  27. data/lib/spreadsheet/excel/writer/biff8.rb +66 -0
  28. data/lib/spreadsheet/excel/writer/format.rb +270 -0
  29. data/lib/spreadsheet/excel/writer/workbook.rb +586 -0
  30. data/lib/spreadsheet/excel/writer/worksheet.rb +556 -0
  31. data/lib/spreadsheet/font.rb +86 -0
  32. data/lib/spreadsheet/format.rb +172 -0
  33. data/lib/spreadsheet/formula.rb +9 -0
  34. data/lib/spreadsheet/row.rb +87 -0
  35. data/lib/spreadsheet/workbook.rb +120 -0
  36. data/lib/spreadsheet/worksheet.rb +215 -0
  37. data/lib/spreadsheet/writer.rb +29 -0
  38. data/test/data/test_copy.xls +0 -0
  39. data/test/data/test_version_excel5.xls +0 -0
  40. data/test/data/test_version_excel95.xls +0 -0
  41. data/test/data/test_version_excel97.xls +0 -0
  42. data/test/excel/row.rb +29 -0
  43. data/test/font.rb +163 -0
  44. data/test/integration.rb +1021 -0
  45. data/test/workbook.rb +21 -0
  46. data/test/worksheet.rb +62 -0
  47. metadata +113 -0
@@ -0,0 +1,37 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ ##
4
+ # This module is used to keep track of offsets in modified Excel documents.
5
+ # Considered internal and subject to change without notice.
6
+ module Offset
7
+ def initialize *args
8
+ super
9
+ @changes = {}
10
+ @offsets = {}
11
+ end
12
+ def Offset.append_features mod
13
+ super
14
+ attr_reader :changes, :offsets
15
+ mod.module_eval do
16
+ class << self
17
+ def offset *keys
18
+ keys.each do |key|
19
+ attr_reader key unless instance_methods.include? key.to_s
20
+ define_method "#{key}=" do |value|
21
+ @changes.store key, true
22
+ instance_variable_set "@#{key}", value
23
+ end
24
+ define_method "set_#{key}" do |value, pos, len|
25
+ instance_variable_set "@#{key}", value
26
+ @offsets.store key, [pos, len]
27
+ havename = "have_set_#{key}"
28
+ send(havename, value, pos, len) if respond_to? havename
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,798 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/excel/error'
5
+ require 'spreadsheet/excel/internals'
6
+ require 'spreadsheet/excel/sst_entry'
7
+ require 'spreadsheet/excel/worksheet'
8
+
9
+ module Spreadsheet
10
+ module Excel
11
+ ##
12
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
13
+ # Excel-Record/Opcode. You should not need to call any of its methods
14
+ # directly. If you think you do, look at #read
15
+ class Reader
16
+ include Encodings
17
+ include Internals
18
+ OPCODE_SIZE = 4
19
+ ROW_BLOCK_OPS = [
20
+ :blank, :boolerr, :dbcell, :formula, :label, :labelsst, :mulblank, :mulrk,
21
+ :number, :rk, :row, :rstring,
22
+ ]
23
+ def initialize opts = {}
24
+ @pos = 0
25
+ @bigendian = opts.fetch(:bigendian) {
26
+ [1].pack('l') != "\001\000\000\000"
27
+ }
28
+ @opts = opts
29
+ @current_row_block = {}
30
+ @formats = {}
31
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF8') end
32
+ end
33
+ def decode_rk work
34
+ # Bit Mask Contents
35
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
36
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
37
+ # 31-2 0xFFFFFFFC Encoded value
38
+ #
39
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
40
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
41
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
42
+ # encoded value represents a signed 30-bit integer value. To get the
43
+ # correct integer, the encoded value has to be shifted right arithmetically
44
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
45
+ # floating-point) must be divided by 100 to get the final result.
46
+ flags, = work.unpack 'C'
47
+ cent = flags & 1
48
+ int = flags & 2
49
+ value = 0
50
+ if int == 0
51
+ ## remove two bits
52
+ integer, = work.unpack 'V'
53
+ integer &= 0xfffffffc
54
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
55
+ else
56
+ ## remove two bits
57
+ unsigned, = work.unpack 'V'
58
+ unsigned = unsigned >> 2
59
+ shifted = [unsigned].pack 'V'
60
+ ## I can't find a format for unpacking a little endian signed integer
61
+ shifted.reverse! if @bigendian
62
+ value, = shifted.unpack 'l'
63
+ end
64
+ if cent == 1
65
+ value /= 100.0
66
+ end
67
+ value
68
+ end
69
+ def encoding codepage_id
70
+ name = CODEPAGES.fetch(codepage_id) {
71
+ raise "Unknown Codepage 0x%04x" % codepage_id }
72
+ if RUBY_VERSION >= '1.9'
73
+ Encoding.find name
74
+ else
75
+ name
76
+ end
77
+ end
78
+ def in_row_block? op
79
+ ROW_BLOCK_OPS.include?(op)
80
+ end
81
+ def memoize?
82
+ @opts[:memoization]
83
+ end
84
+ def postread_workbook
85
+ sheets = @workbook.worksheets
86
+ sheets.each_with_index do |sheet, idx|
87
+ offset = sheet.offset
88
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
89
+ @workbook.offsets.store sheet, [offset, nxt - offset]
90
+ end
91
+ end
92
+ def postread_worksheet worksheet
93
+ end
94
+ ##
95
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
96
+ # loads additional reader-methods before proceeding with parsing the document.
97
+ def read io
98
+ @ole = Ole::Storage.open io
99
+ @workbook = Workbook.new io, {}
100
+ @book = @ole.file.open("Book") rescue @ole.file.open("Workbook")
101
+ @data = @book.read
102
+ read_bof
103
+ @workbook.ole = @book
104
+ @workbook.bof = @bof
105
+ @workbook.version = @version
106
+ biff = @workbook.biff_version
107
+ extend_reader biff
108
+ extend_internals biff
109
+ read_workbook
110
+ @workbook
111
+ end
112
+ def read_blank worksheet, addr, work
113
+ # Offset Size Contents
114
+ # 0 2 Index to row
115
+ # 2 2 Index to column
116
+ # 4 2 Index to XF record (➜ 6.115)
117
+ row, column, xf = work.unpack binfmt(:blank)
118
+ set_cell worksheet, row, column, xf
119
+ end
120
+ def read_bof
121
+ # Offset Size Contents
122
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
123
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
124
+ # 0x0006 = Visual Basic module
125
+ # 0x0010 = Worksheet
126
+ # 0x0020 = Chart
127
+ # 0x0040 = Macro sheet
128
+ # 0x0100 = Workspace file
129
+ # 4 2 Build identifier
130
+ # 6 2 Build year
131
+ # 8 4 File history flags
132
+ # 12 4 Lowest Excel version that can read all records in this file
133
+ pos, @bof, len, work = get_next_chunk
134
+ ## version and datatype are common to all Excel-Versions. Later versions
135
+ # have additional information such as build-id and -year (from BIFF5).
136
+ # These are ignored for the time being.
137
+ version, datatype = work.unpack('v2')
138
+ if datatype == 0x5
139
+ @version = version
140
+ end
141
+ end
142
+ def read_boolerr worksheet, addr, work
143
+ # Offset Size Contents
144
+ # 0 2 Index to row
145
+ # 2 2 Index to column
146
+ # 4 2 Index to XF record (➜ 6.115)
147
+ # 6 1 Boolean or error value (type depends on the following byte)
148
+ # 7 1 0 = Boolean value; 1 = Error code
149
+ row, column, xf, value, error = work.unpack 'v3C2'
150
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
151
+ end
152
+ def read_boundsheet work, pos, len
153
+ # Offset Size Contents
154
+ # 0 4 Absolute stream position of the BOF record of the sheet
155
+ # represented by this record. This field is never encrypted
156
+ # in protected files.
157
+ # 4 1 Visibility: 0x00 = Visible
158
+ # 0x01 = Hidden
159
+ # 0x02 = Strong hidden (see below)
160
+ # 5 1 Sheet type: 0x00 = Worksheet
161
+ # 0x02 = Chart
162
+ # 0x06 = Visual Basic module
163
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
164
+ # 8-bit string length (➜ 3.3)
165
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
166
+ offset, visibility, type = work.unpack("VC2")
167
+ name = client read_string(work[6..-1]), @workbook.encoding
168
+ if @boundsheets
169
+ @boundsheets[0] += 1
170
+ @boundsheets[2] += len
171
+ else
172
+ @boundsheets = [1, pos, len]
173
+ end
174
+ @workbook.set_boundsheets *@boundsheets
175
+ @workbook.add_worksheet Worksheet.new(:name => name,
176
+ :ole => @book,
177
+ :offset => offset,
178
+ :reader => self)
179
+ end
180
+ def read_codepage work, pos, len
181
+ codepage, _ = work.unpack 'v'
182
+ @workbook.set_encoding encoding(codepage), pos, len
183
+ end
184
+ def read_dimensions worksheet, work, pos, len
185
+ # Offset Size Contents
186
+ # 0 4 Index to first used row
187
+ # 4 4 Index to last used row, increased by 1
188
+ # 8 2 Index to first used column
189
+ # 10 2 Index to last used column, increased by 1
190
+ # 12 2 Not used
191
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
192
+ end
193
+ def read_font work, pos, len
194
+ # Offset Size Contents
195
+ # 0 2 Height of the font (in twips = 1/20 of a point)
196
+ # 2 2 Option flags:
197
+ # Bit Mask Contents
198
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
199
+ # 1 0x0002 1 = Characters are italic
200
+ # 2 0x0004 1 = Characters are underlined
201
+ # (redundant, see below)
202
+ # 3 0x0008 1 = Characters are struck out
203
+ # 4 0x0010 1 = Characters are outlined (djberger)
204
+ # 5 0x0020 1 = Characters are shadowed (djberger)
205
+ # 4 2 Colour index (➜ 6.70)
206
+ # 6 2 Font weight (100-1000). Standard values are
207
+ # 0x0190 (400) for normal text and
208
+ # 0x02bc (700) for bold text.
209
+ # 8 2 Escapement type: 0x0000 = None
210
+ # 0x0001 = Superscript
211
+ # 0x0002 = Subscript
212
+ # 10 1 Underline type: 0x00 = None
213
+ # 0x01 = Single
214
+ # 0x02 = Double
215
+ # 0x21 = Single accounting
216
+ # 0x22 = Double accounting
217
+ # 11 1 Font family:
218
+ # 0x00 = None (unknown or don't care)
219
+ # 0x01 = Roman (variable width, serifed)
220
+ # 0x02 = Swiss (variable width, sans-serifed)
221
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
222
+ # 0x04 = Script (cursive)
223
+ # 0x05 = Decorative (specialised,
224
+ # for example Old English, Fraktur)
225
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
226
+ # 0x01 = 1 = System default
227
+ # 0x02 = 2 = Symbol
228
+ # 0x4d = 77 = Apple Roman
229
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
230
+ # 0x81 = 129 = ANSI Korean (Hangul)
231
+ # 0x82 = 130 = ANSI Korean (Johab)
232
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
233
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
234
+ # 0xa1 = 161 = ANSI Greek
235
+ # 0xa2 = 162 = ANSI Turkish
236
+ # 0xa3 = 163 = ANSI Vietnamese
237
+ # 0xb1 = 177 = ANSI Hebrew
238
+ # 0xb2 = 178 = ANSI Arabic
239
+ # 0xba = 186 = ANSI Baltic
240
+ # 0xcc = 204 = ANSI Cyrillic
241
+ # 0xde = 222 = ANSI Thai
242
+ # 0xee = 238 = ANSI Latin II (Central European)
243
+ # 0xff = 255 = OEM Latin I
244
+ # 13 1 Not used
245
+ # 14 var. Font name:
246
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
247
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
248
+ name = client read_string(work[14..-1]), @workbook.encoding
249
+ font = Font.new name
250
+ size, opts, color, font.weight, escapement, underline,
251
+ family, encoding = work.unpack binfmt(:font)
252
+ font.size = size / TWIPS
253
+ font.italic = opts & 0x0002
254
+ font.strikeout = opts & 0x0008
255
+ font.outline = opts & 0x0010
256
+ font.shadow = opts & 0x0020
257
+ font.color = COLOR_CODES[color] || :text
258
+ font.escapement = ESCAPEMENT_TYPES[escapement]
259
+ font.underline = UNDERLINE_TYPES[underline]
260
+ font.family = FONT_FAMILIES[family]
261
+ font.encoding = FONT_ENCODINGS[encoding]
262
+ @workbook.add_font font
263
+ end
264
+ def read_format work, pos, len
265
+ # Offset Size Contents
266
+ # 0 2 Format index used in other records
267
+ # 2 var. Number format string
268
+ # (Unicode string, 16-bit string length, ➜ 3.4)
269
+ idx, = work.unpack 'v'
270
+ value = read_string work[2..-1], 2
271
+ @formats.store idx, client(value, @workbook.encoding)
272
+ end
273
+ def read_formula worksheet, work, pos, len
274
+ # Offset Size Contents
275
+ # 0 2 Index to row
276
+ # 2 2 Index to column
277
+ # 4 2 Index to XF record (➜ 6.115)
278
+ # 6 8 Result of the formula. See below for details.
279
+ # 14 2 Option flags:
280
+ # Bit Mask Contents
281
+ # 0 0x0001 1 = Recalculate always
282
+ # 1 0x0002 1 = Calculate on open
283
+ # 3 0x0008 1 = Part of a shared formula
284
+ # 16 4 Not used
285
+ # 20 var. Formula data (RPN token array, ➜ 4)
286
+ # Offset Size Contents
287
+ # 0 2 Size of the following formula data (sz)
288
+ # 2 sz Formula data (RPN token array)
289
+ # [2+sz] var. (optional) Additional data for specific tokens
290
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
291
+ #
292
+ # Result of the Formula
293
+ # Dependent on the type of value the formula returns, the result field has
294
+ # the following format:
295
+ #
296
+ # Result is a numeric value:
297
+ # Offset Size Contents
298
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
299
+ #
300
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
301
+ # Offset Size Contents
302
+ # 0 1 0x00 (identifier for a string value)
303
+ # 1 5 Not used
304
+ # 6 2 0xffff
305
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
306
+ # special identifier defined (see below).
307
+ #
308
+ # Result is a Boolean value:
309
+ # Offset Size Contents
310
+ # 0 1 0x01 (identifier for a Boolean value)
311
+ # 1 1 Not used
312
+ # 2 1 0 = FALSE, 1 = TRUE
313
+ # 3 3 Not used
314
+ # 6 2 0xffff
315
+ #
316
+ # Result is an error value:
317
+ # Offset Size Contents
318
+ # 0 1 0x02 (identifier for an error value)
319
+ # 1 1 Not used
320
+ # 2 1 Error code (➜ 3.7)
321
+ # 3 3 Not used
322
+ # 6 2 0xffff
323
+ #
324
+ # Result is an empty cell (BIFF8), for example an empty string:
325
+ # Offset Size Contents
326
+ # 0 1 0x03 (identifier for an empty cell)
327
+ # 1 5 Not used
328
+ # 6 2 0xffff
329
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
330
+ formula = Formula.new
331
+ formula.shared = (opts & 0x08) > 0
332
+ formula.data = work[20..-1]
333
+ if rcheck != 0xffff || rtype > 3
334
+ value, = work.unpack 'x6E'
335
+ unless value
336
+ # on architectures where sizeof(double) > 8
337
+ value, = work.unpack 'x6e'
338
+ end
339
+ formula.value = value
340
+ elsif rtype == 0
341
+ pos, op, len, work = get_next_chunk
342
+ if op == :string
343
+ formula.value = client read_string(work, 2), @workbook.encoding
344
+ else
345
+ warn "String Value expected after Formula, but got #{op}"
346
+ formula.value = Error.new 0x2a
347
+ @pos = pos
348
+ end
349
+ elsif rtype == 1
350
+ formula.value = rval > 0
351
+ elsif rtype == 2
352
+ formula.value = Error.new rval
353
+ else
354
+ # leave the Formula value blank
355
+ end
356
+ end
357
+ def read_index worksheet, work, pos, len
358
+ # Offset Size Contents
359
+ # 0 4 Not used
360
+ # 4 4 Index to first used row (rf, 0-based)
361
+ # 8 4 Index to first row of unused tail of sheet
362
+ # (rl, last used row + 1, 0-based)
363
+ # 12 4 Absolute stream position of the
364
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
365
+ # record does not exist, the offset points to the record at
366
+ # the position where the DEFCOLWIDTH record would occur.
367
+ # 16 4∙nm Array of nm absolute stream positions to the
368
+ # DBCELL record (➜ 6.26) of each Row Block
369
+ # TODO: use the index if it exists
370
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
371
+ end
372
+ def read_label worksheet, addr, work
373
+ # Offset Size Contents
374
+ # 0 2 Index to row
375
+ # 2 2 Index to column
376
+ # 4 2 Index to XF record (➜ 6.115)
377
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
378
+ row, column, xf = work.unpack 'v3'
379
+ value = client read_string(work[6..-1], 2), @workbook.encoding
380
+ set_cell worksheet, row, column, xf, value
381
+ end
382
+ def read_labelsst worksheet, addr, work
383
+ # Offset Size Contents
384
+ # 0 2 Index to row
385
+ # 2 2 Index to column
386
+ # 4 2 Index to XF record (➜ 6.115)
387
+ # 6 4 Index into SST record (➜ 6.96)
388
+ row, column, xf, index = work.unpack binfmt(:labelsst)
389
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
390
+ end
391
+ def read_mulblank worksheet, addr, work
392
+ # Offset Size Contents
393
+ # 0 2 Index to row
394
+ # 2 2 Index to first column (fc)
395
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
396
+ # 4+2∙nc 2 Index to last column (lc)
397
+ row, column, *xfs = work.unpack 'v*'
398
+ last_column = xfs.pop # unused
399
+ xfs.each do |xf| set_cell worksheet, row, column, xf end
400
+ end
401
+ def read_mulrk worksheet, addr, work
402
+ # Offset Size Contents
403
+ # 0 2 Index to row
404
+ # 2 2 Index to first column (fc)
405
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
406
+ # Offset Size Contents
407
+ # 0 2 Index to XF record (➜ 6.115)
408
+ # 2 4 RK value (➜ 3.6)
409
+ # 4+6∙nc 2 Index to last column (lc)
410
+ row, column = work.unpack 'v2'
411
+ 4.step(work.size - 6, 6) do |idx|
412
+ xf, = work.unpack "x#{idx}v"
413
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
414
+ column += 1
415
+ end
416
+ end
417
+ def read_number worksheet, addr, work
418
+ # Offset Size Contents
419
+ # 0 2 Index to row
420
+ # 2 2 Index to column
421
+ # 4 2 Index to XF record (➜ 6.115)
422
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
423
+ row, column, xf, value = work.unpack 'v3E'
424
+ unless value
425
+ # on architectures where sizeof(double) > 8
426
+ value, = work.unpack 'x6e'
427
+ end
428
+ set_cell worksheet, row, column, xf, value
429
+ end
430
+ def read_rk worksheet, addr, work
431
+ # Offset Size Contents
432
+ # 0 2 Index to row
433
+ # 2 2 Index to column
434
+ # 4 2 Index to XF record (➜ 6.115)
435
+ # 6 4 RK value (➜ 3.6)
436
+ row, column, xf = work.unpack 'v3'
437
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
438
+ end
439
+ def read_row worksheet, addr
440
+ row = addr[:index]
441
+ @current_row_block.fetch row do
442
+ @current_row_block.clear
443
+ cells = @current_row_block[row] = Row.new(nil, row)
444
+ @pos = addr[:offset]
445
+ found = false
446
+ while tuple = get_next_chunk
447
+ pos, op, len, work = tuple
448
+ case op
449
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
450
+ # one Row-Block
451
+ @pos = pos
452
+ return cells
453
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
454
+ return cells
455
+ when :row # ○○ Row Blocks ➜ 5.7
456
+ # ● ROW ➜ 6.83
457
+ # ignore, we already did these in read_worksheet
458
+ return cells if found
459
+ when :blank # BLANK ➜ 6.7
460
+ found = true
461
+ read_blank worksheet, addr, work
462
+ when :boolerr # BOOLERR ➜ 6.10
463
+ found = true
464
+ read_boolerr worksheet, addr, work
465
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
466
+ found = true
467
+ # TODO: implement for BIFF2 support
468
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
469
+ found = true
470
+ read_label worksheet, addr, work
471
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
472
+ found = true
473
+ read_labelsst worksheet, addr, work
474
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
475
+ found = true
476
+ read_mulblank worksheet, addr, work
477
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
478
+ found = true
479
+ read_mulrk worksheet, addr, work
480
+ when :number # NUMBER ➜ 6.68
481
+ found = true
482
+ read_number worksheet, addr, work
483
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
484
+ found = true
485
+ read_rk worksheet, addr, work
486
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
487
+ found = true
488
+ read_rstring worksheet, addr, work
489
+ end
490
+ end
491
+ cells
492
+ end
493
+ end
494
+ def read_rstring worksheet, addr, work
495
+ # Offset Size Contents
496
+ # 0 2 Index to row
497
+ # 2 2 Index to column
498
+ # 4 2 Index to XF record (➜ 6.115)
499
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
500
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
501
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
502
+ row, column, xf = work.unpack 'v3'
503
+ value = client read_string(work[6..-1], 2), @workbook.encoding
504
+ set_cell worksheet, row, column, xf, value
505
+ end
506
+ def read_workbook
507
+ worksheet = nil
508
+ previous_op = nil
509
+ while tuple = get_next_chunk
510
+ pos, op, len, work = tuple
511
+ case op
512
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
513
+ return
514
+ worksheet = @workbook.worksheets.find do |worksheet|
515
+ worksheet.offset == pos
516
+ end
517
+ if worksheet
518
+ read_worksheet worksheet
519
+ else
520
+ warn "Unexpected BOF (0x%04x) at position 0x%04x" % [@bof, pos]
521
+ end
522
+ when :eof # ● EOF ➜ 6.36
523
+ postread_workbook
524
+ return
525
+ when :datemode # ○ DATEMODE ➜ 6.25
526
+ flag, _ = work.unpack 'v'
527
+ if flag == 1
528
+ @workbook.date_base = Date.new 1904, 1, 1
529
+ else
530
+ @workbook.date_base = Date.new 1899, 12, 31
531
+ end
532
+ when :continue # ○ CONTINUE ➜ 6.22
533
+ case previous_op
534
+ when :sst # ● SST ➜ 6.96
535
+ continue_sst work, pos, len
536
+ end
537
+ when :codepage # ○ CODEPAGE ➜ 6.17
538
+ read_codepage work, pos, len
539
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
540
+ read_boundsheet work, pos, len
541
+ when :xf # ●● XF ➜ 6.115
542
+ read_xf work, pos, len
543
+ when :sst # ○ Shared String Table ➜ 5.11
544
+ # ● SST ➜ 6.96
545
+ read_sst work, pos, len
546
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
547
+ # with EXTSST
548
+ # when :extsst # ● EXTSST ➜ 6.40
549
+ when :style # ●● STYLE ➜ 6.99
550
+ read_style work, pos, len
551
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
552
+ read_format work, pos, len
553
+ when :font
554
+ read_font work, pos, len
555
+ end
556
+ previous_op = op unless op == :continue
557
+ end
558
+ end
559
+ def read_worksheet worksheet, offset
560
+ @pos = offset
561
+ while tuple = get_next_chunk
562
+ pos, op, len, work = tuple
563
+ if((offset = @current_row_block_offset) && !in_row_block?(op))
564
+ @current_row_block_offset = nil
565
+ offset[1] = pos - offset[0]
566
+ end
567
+ case op
568
+ when :eof # ● EOF ➜ 6.36
569
+ postread_worksheet worksheet
570
+ return
571
+ #when :uncalced # ○ UNCALCED ➜ 6.104
572
+ # TODO: Formula support. Values were not calculated before saving
573
+ #warn <<-EOS
574
+ # Some fields containig formulas were saved without a computed value.
575
+ # Support Spreadsheet::Excel by implementing formula-calculations!
576
+ #EOS
577
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
578
+ # TODO: if there are changes in rows, omit index when writing
579
+ #read_index worksheet, work, pos, len
580
+ when :dimensions # ● DIMENSIONS ➜ 6.31
581
+ read_dimensions worksheet, work, pos, len
582
+ when :row # ○○ Row Blocks ➜ 5.7
583
+ # ● ROW ➜ 6.83
584
+ set_row_address worksheet, work, pos, len
585
+ end
586
+ end
587
+ end
588
+ def read_style work, pos, len
589
+ # User-Defined Cell Styles:
590
+ # Offset Size Contents
591
+ # 0 2 Bit Mask Contents
592
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
593
+ # 15 0x8000 Always 0 for user-defined styles
594
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
595
+ # 8-bit string length (➜ 3.3)
596
+ # BIFF8: Non-empty Unicode string,
597
+ # 16-bit string length (➜ 3.4)
598
+ #
599
+ # Built-In Cell Styles
600
+ # Offset Size Contents
601
+ # 0 2 Bit Mask Contents
602
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
603
+ # 15 0x8000 Always 1 for built-in styles
604
+ # 2 1 Identifier of the built-in cell style:
605
+ # 0x00 = Normal
606
+ # 0x01 = RowLevel_lv (see next field)
607
+ # 0x02 = ColLevel_lv (see next field)
608
+ # 0x03 = Comma
609
+ # 0x04 = Currency
610
+ # 0x05 = Percent
611
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
612
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
613
+ # 0x08 = Hyperlink (BIFF8)
614
+ # 0x09 = Followed Hyperlink (BIFF8)
615
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
616
+ # FFH otherwise
617
+ flags, = work.unpack 'v'
618
+ xf_idx = flags & 0x0fff
619
+ xf = @workbook.format xf_idx
620
+ builtin = flags & 0x8000
621
+ if builtin == 0
622
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
623
+ else
624
+ id, level = work.unpack 'x2C2'
625
+ if name = BUILTIN_STYLES[id]
626
+ name.sub '_lv', "_#{level.to_s}"
627
+ xf.name = client name, 'UTF8'
628
+ end
629
+ end
630
+ end
631
+ def read_xf work, pos, len
632
+ # Offset Size Contents
633
+ # 0 2 Index to FONT record (➜ 6.43)
634
+ # 2 2 Index to FORMAT record (➜ 6.45)
635
+ # 4 2 Bit Mask Contents
636
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
637
+ # Bit Mask Contents
638
+ # 0 0x01 1 = Cell is locked
639
+ # 1 0x02 1 = Formula is hidden
640
+ # 2 0x04 0 = Cell XF; 1 = Style XF
641
+ # 15-4 0xfff0 Index to parent style XF
642
+ # (always 0xfff in style XFs)
643
+ # 6 1 Bit Mask Contents
644
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
645
+ # Value Horizontal alignment
646
+ # 0x00 General
647
+ # 0x01 Left
648
+ # 0x02 Centred
649
+ # 0x03 Right
650
+ # 0x04 Filled
651
+ # 0x05 Justified (BIFF4-BIFF8X)
652
+ # 0x06 Centred across selection
653
+ # (BIFF4-BIFF8X)
654
+ # 0x07 Distributed (BIFF8X)
655
+ # 3 0x08 1 = Text is wrapped at right border
656
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
657
+ # Value Vertical alignment
658
+ # 0x00 Top
659
+ # 0x01 Centred
660
+ # 0x02 Bottom
661
+ # 0x03 Justified (BIFF5-BIFF8X)
662
+ # 0x04 Distributed (BIFF8X)
663
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
664
+ # Value Text rotation
665
+ # 0 Not rotated
666
+ # 1-90 1 to 90 degrees counterclockwise
667
+ # 91-180 1 to 90 degrees clockwise
668
+ # 255 Letters are stacked top-to-bottom,
669
+ # but not rotated
670
+ # 8 1 Bit Mask Contents
671
+ # 3-0 0x0f Indent level
672
+ # 4 0x10 1 = Shrink content to fit into cell
673
+ # 5 0x40 1 = Merge Range (djberger)
674
+ # 7-6 0xc0 Text direction (BIFF8X only)
675
+ # 0 = According to context
676
+ # 1 = Left-to-right
677
+ # 2 = Right-to-left
678
+ # 9 1 Bit Mask Contents
679
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
680
+ # Each bit describes the validity of a
681
+ # specific group of attributes. In cell XFs
682
+ # a cleared bit means the attributes of the
683
+ # parent style XF are used (but only if the
684
+ # attributes are valid there), a set bit
685
+ # means the attributes of this XF are used.
686
+ # In style XFs a cleared bit means the
687
+ # attribute setting is valid, a set bit
688
+ # means the attribute should be ignored.
689
+ # Bit Mask Contents
690
+ # 0 0x01 Flag for number format
691
+ # 1 0x02 Flag for font
692
+ # 2 0x04 Flag for horizontal and
693
+ # vertical alignment, text wrap,
694
+ # indentation, orientation,
695
+ # rotation, and text direction
696
+ # 3 0x08 Flag for border lines
697
+ # 4 0x10 Flag for background area style
698
+ # 5 0x20 Flag for cell protection (cell
699
+ # locked and formula hidden)
700
+ # 10 4 Cell border lines and background area:
701
+ # Bit Mask Contents
702
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
703
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
704
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
705
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
706
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
707
+ # for left line colour
708
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
709
+ # for right line colour
710
+ # 30 0x40000000 1 = Diagonal line
711
+ # from top left to right bottom
712
+ # 31 0x80000000 1 = Diagonal line
713
+ # from bottom left to right top
714
+ # 14 4 Bit Mask Contents
715
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
716
+ # for top line colour
717
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
718
+ # for bottom line colour
719
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
720
+ # for diagonal line colour
721
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
722
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
723
+ # 18 2 Bit Mask Contents
724
+ # 6-0 0x007f Colour index (➜ 6.70)
725
+ # for pattern colour
726
+ # 13-7 0x3f80 Colour index (➜ 6.70)
727
+ # for pattern background
728
+ fmt = Format.new
729
+ font_idx, numfmt, xf_type, xf_align, xf_rotation, xf_indent, xf_used_attr,
730
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
731
+ fmt.number_format = @formats[numfmt]
732
+ fmt.font = @workbook.font font_idx
733
+ @workbook.add_format fmt
734
+ end
735
+ def set_cell worksheet, row, column, xf, value=nil
736
+ cells = @current_row_block[row] ||= Row.new(nil, row)
737
+ cells.formats[column] = @workbook.format(xf)
738
+ cells[column] = value
739
+ end
740
+ def set_row_address worksheet, work, pos, len
741
+ # Offset Size Contents
742
+ # 0 2 Index of this row
743
+ # 2 2 Index to column of the first cell which
744
+ # is described by a cell record
745
+ # 4 2 Index to column of the last cell which is
746
+ # described by a cell record, increased by 1
747
+ # 6 2 Bit Mask Contents
748
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
749
+ # 15 0x8000 0 = Row has custom height;
750
+ # 1 = Row has default height
751
+ # 8 2 Not used
752
+ # 10 1 0 = No defaults written;
753
+ # 1 = Default row attribute field and XF index occur below (fl)
754
+ # 11 2 Relative offset to calculate stream position of the first
755
+ # cell record for this row (➜ 5.7.1)
756
+ # [13] 3 (written only if fl = 1) Default row attributes (➜ 3.12)
757
+ # [16] 2 (written only if fl = 1) Index to XF record (➜ 6.115)
758
+ @current_row_block_offset ||= [pos]
759
+ index, first_used, first_unused, flags,
760
+ hasdefaults, offset = work.unpack binfmt(:row)
761
+ # TODO: read attributes from work[13,3], read flags
762
+ if hasdefaults > 0
763
+ # TODO: read row default XF
764
+ end
765
+ worksheet.set_row_address index, :first_used => first_used,
766
+ :first_unused => first_unused,
767
+ :index => index,
768
+ :row_block => @current_row_block_offset,
769
+ :offset => @current_row_block_offset[0]
770
+ #:first_cell => offset
771
+ end
772
+ private
773
+ def extend_internals version
774
+ require 'spreadsheet/excel/internals/biff%i' % version
775
+ extend Internals.const_get('Biff%i' % version)
776
+ rescue LoadError
777
+ end
778
+ def extend_reader version
779
+ require 'spreadsheet/excel/reader/biff%i' % version
780
+ extend Reader.const_get('Biff%i' % version)
781
+ rescue LoadError
782
+ end
783
+ def get_next_chunk
784
+ pos = @pos
785
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
786
+ @pos += OPCODE_SIZE
787
+ if len
788
+ work = @data[@pos,len]
789
+ @pos += len
790
+ code = SEDOCPO.fetch(op, op)
791
+ #puts "0x%04x/%-16s (0x%08x) %5i: %s" % [op, code.inspect, pos, len, work[0,16].inspect]
792
+ #puts "0x%04x/%-16s %5i: %s" % [op, code.inspect, len, work[0,32].inspect]
793
+ [ pos, code, len + OPCODE_SIZE, work]
794
+ end
795
+ end
796
+ end
797
+ end
798
+ end