ttb-spreadsheet 0.6.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. data/GUIDE.txt +267 -0
  2. data/Gemfile +3 -0
  3. data/Gemfile.lock +18 -0
  4. data/History.txt +365 -0
  5. data/LICENSE.txt +619 -0
  6. data/Manifest.txt +62 -0
  7. data/README.txt +107 -0
  8. data/Rakefile +0 -0
  9. data/bin/xlsopcodes +18 -0
  10. data/lib/parseexcel.rb +27 -0
  11. data/lib/parseexcel/parseexcel.rb +75 -0
  12. data/lib/parseexcel/parser.rb +11 -0
  13. data/lib/spreadsheet.rb +79 -0
  14. data/lib/spreadsheet/column.rb +71 -0
  15. data/lib/spreadsheet/compatibility.rb +23 -0
  16. data/lib/spreadsheet/datatypes.rb +106 -0
  17. data/lib/spreadsheet/encodings.rb +57 -0
  18. data/lib/spreadsheet/excel.rb +88 -0
  19. data/lib/spreadsheet/excel/error.rb +26 -0
  20. data/lib/spreadsheet/excel/internals.rb +365 -0
  21. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  22. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  23. data/lib/spreadsheet/excel/offset.rb +41 -0
  24. data/lib/spreadsheet/excel/reader.rb +1173 -0
  25. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  26. data/lib/spreadsheet/excel/reader/biff8.rb +199 -0
  27. data/lib/spreadsheet/excel/row.rb +92 -0
  28. data/lib/spreadsheet/excel/sst_entry.rb +46 -0
  29. data/lib/spreadsheet/excel/workbook.rb +80 -0
  30. data/lib/spreadsheet/excel/worksheet.rb +100 -0
  31. data/lib/spreadsheet/excel/writer.rb +1 -0
  32. data/lib/spreadsheet/excel/writer/biff8.rb +75 -0
  33. data/lib/spreadsheet/excel/writer/format.rb +253 -0
  34. data/lib/spreadsheet/excel/writer/workbook.rb +690 -0
  35. data/lib/spreadsheet/excel/writer/worksheet.rb +891 -0
  36. data/lib/spreadsheet/font.rb +92 -0
  37. data/lib/spreadsheet/format.rb +177 -0
  38. data/lib/spreadsheet/formula.rb +9 -0
  39. data/lib/spreadsheet/helpers.rb +11 -0
  40. data/lib/spreadsheet/link.rb +43 -0
  41. data/lib/spreadsheet/row.rb +132 -0
  42. data/lib/spreadsheet/workbook.rb +126 -0
  43. data/lib/spreadsheet/worksheet.rb +287 -0
  44. data/lib/spreadsheet/writer.rb +30 -0
  45. data/spreadsheet.gemspec +20 -0
  46. data/test/data/test_changes.xls +0 -0
  47. data/test/data/test_copy.xls +0 -0
  48. data/test/data/test_datetime.xls +0 -0
  49. data/test/data/test_empty.xls +0 -0
  50. data/test/data/test_formula.xls +0 -0
  51. data/test/data/test_long_sst_record.xls +0 -0
  52. data/test/data/test_missing_row.xls +0 -0
  53. data/test/data/test_version_excel5.xls +0 -0
  54. data/test/data/test_version_excel95.xls +0 -0
  55. data/test/data/test_version_excel97.xls +0 -0
  56. data/test/excel/row.rb +35 -0
  57. data/test/excel/writer/workbook.rb +23 -0
  58. data/test/excel/writer/worksheet.rb +24 -0
  59. data/test/font.rb +163 -0
  60. data/test/integration.rb +1311 -0
  61. data/test/row.rb +33 -0
  62. data/test/suite.rb +17 -0
  63. data/test/workbook.rb +29 -0
  64. data/test/worksheet.rb +80 -0
  65. metadata +151 -0
@@ -0,0 +1,17 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff5. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows.
7
+ module Biff5
8
+ BINARY_FORMATS = {
9
+ :dimensions => 'v5',
10
+ }
11
+ def binfmt key # :nodoc:
12
+ BINARY_FORMATS.fetch key do super end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff8. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows and more
7
+ # Binary formats are moved here for disambiguation.
8
+ module Biff8
9
+ BINARY_FORMATS = {
10
+ :bof => 'v4V2',
11
+ :dimensions => 'V2v2x2',
12
+ }
13
+ def binfmt key # :nodoc:
14
+ BINARY_FORMATS.fetch key do super end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ require 'spreadsheet/compatibility'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # This module is used to keep track of offsets in modified Excel documents.
7
+ # Considered internal and subject to change without notice.
8
+ module Offset
9
+ include Compatibility
10
+ attr_reader :changes, :offsets
11
+ def initialize *args
12
+ super
13
+ @changes = {}
14
+ @offsets = {}
15
+ end
16
+ def Offset.append_features mod
17
+ super
18
+ mod.module_eval do
19
+ class << self
20
+ include Compatibility
21
+ def offset *keys
22
+ keys.each do |key|
23
+ attr_reader key unless instance_methods.include? method_name(key)
24
+ define_method "#{key}=" do |value|
25
+ @changes.store key, true
26
+ instance_variable_set ivar_name(key), value
27
+ end
28
+ define_method "set_#{key}" do |value, pos, len|
29
+ instance_variable_set ivar_name(key), value
30
+ @offsets.store key, [pos, len]
31
+ havename = "have_set_#{key}"
32
+ send(havename, value, pos, len) if respond_to? havename
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,1173 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/link'
5
+ require 'spreadsheet/excel/error'
6
+ require 'spreadsheet/excel/internals'
7
+ require 'spreadsheet/excel/sst_entry'
8
+ require 'spreadsheet/excel/worksheet'
9
+
10
+ module Spreadsheet
11
+ module Excel
12
+ ##
13
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
14
+ # Excel-Record/Opcode. You should not need to call any of its methods
15
+ # directly. If you think you do, look at #read
16
+ class Reader
17
+ include Spreadsheet::Encodings
18
+ include Spreadsheet::Excel::Internals
19
+ ROW_BLOCK_OPS = {
20
+ :blank => true, :boolerr => true, :dbcell => true, :formula => true,
21
+ :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
22
+ :number => true, :rk => true, :rstring => true,
23
+ }
24
+ def initialize opts = {}
25
+ @pos = 0
26
+ @bigendian = opts.fetch(:bigendian) {
27
+ [1].pack('l') != "\001\000\000\000"
28
+ }
29
+ @opts = opts
30
+ @boundsheets = nil
31
+ @current_row_block = {}
32
+ @current_row_block_offset = nil
33
+ @formats = {}
34
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
35
+ end
36
+ def decode_rk work
37
+ # Bit Mask Contents
38
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
39
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
40
+ # 31-2 0xFFFFFFFC Encoded value
41
+ #
42
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
43
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
44
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
45
+ # encoded value represents a signed 30-bit integer value. To get the
46
+ # correct integer, the encoded value has to be shifted right arithmetically
47
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
48
+ # floating-point) must be divided by 100 to get the final result.
49
+ flags, = work.unpack 'C'
50
+ cent = flags & 1
51
+ int = flags & 2
52
+ value = 0
53
+ if int == 0
54
+ ## remove two bits
55
+ integer, = work.unpack 'V'
56
+ integer &= 0xfffffffc
57
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
58
+ else
59
+ ## I can't find a format for unpacking a little endian signed integer.
60
+ # 'V' works for packing, but not for unpacking. But the following works
61
+ # fine afaics:
62
+ unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
63
+ ## remove two bits
64
+ value = unsigned >> 2
65
+ end
66
+ if cent == 1
67
+ value /= 100.0
68
+ end
69
+ value
70
+ end
71
+ def encoding codepage_id
72
+ name = CODEPAGES.fetch(codepage_id) {
73
+ raise "Unknown Codepage 0x%04x" % codepage_id }
74
+ if RUBY_VERSION >= '1.9'
75
+ Encoding.find name
76
+ else
77
+ name
78
+ end
79
+ end
80
+ def get_next_chunk
81
+ pos = @pos
82
+ if pos < @data.size
83
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
84
+ @pos += OPCODE_SIZE
85
+ if len
86
+ work = @data[@pos,len]
87
+ @pos += len
88
+ code = SEDOCPO.fetch(op, op)
89
+ if io = @opts[:print_opcodes]
90
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
91
+ op, code.inspect, len, work.inspect)
92
+ end
93
+ [ pos, code, len + OPCODE_SIZE, work]
94
+ end
95
+ end
96
+ end
97
+ def in_row_block? op, previous
98
+ if op == :row
99
+ previous == op
100
+ else
101
+ ROW_BLOCK_OPS.include?(op)
102
+ end
103
+ end
104
+ def memoize?
105
+ @opts[:memoization]
106
+ end
107
+ def postread_workbook
108
+ sheets = @workbook.worksheets
109
+ sheets.each_with_index do |sheet, idx|
110
+ offset = sheet.offset
111
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
112
+ @workbook.offsets.store sheet, [offset, nxt - offset]
113
+ end
114
+ end
115
+ def postread_worksheet worksheet
116
+ end
117
+ ##
118
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
119
+ # loads additional reader-methods before proceeding with parsing the document.
120
+ def read io
121
+ setup io
122
+ read_workbook
123
+ @workbook.default_format = @workbook.format 0
124
+ @workbook.changes.clear
125
+ @workbook
126
+ end
127
+ def read_blank worksheet, addr, work
128
+ # Offset Size Contents
129
+ # 0 2 Index to row
130
+ # 2 2 Index to column
131
+ # 4 2 Index to XF record (➜ 6.115)
132
+ row, column, xf = work.unpack binfmt(:blank)
133
+ set_cell worksheet, row, column, xf
134
+ end
135
+ def read_bof
136
+ # Offset Size Contents
137
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
138
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
139
+ # 0x0006 = Visual Basic module
140
+ # 0x0010 = Worksheet
141
+ # 0x0020 = Chart
142
+ # 0x0040 = Macro sheet
143
+ # 0x0100 = Workspace file
144
+ # 4 2 Build identifier
145
+ # 6 2 Build year
146
+ # 8 4 File history flags
147
+ # 12 4 Lowest Excel version that can read all records in this file
148
+ pos, @bof, len, work = get_next_chunk
149
+ ## version and datatype are common to all Excel-Versions. Later versions
150
+ # have additional information such as build-id and -year (from BIFF5).
151
+ # These are ignored for the time being.
152
+ version, datatype = work.unpack('v2')
153
+ if datatype == 0x5
154
+ @version = version
155
+ end
156
+ end
157
+ def read_boolerr worksheet, addr, work
158
+ # Offset Size Contents
159
+ # 0 2 Index to row
160
+ # 2 2 Index to column
161
+ # 4 2 Index to XF record (➜ 6.115)
162
+ # 6 1 Boolean or error value (type depends on the following byte)
163
+ # 7 1 0 = Boolean value; 1 = Error code
164
+ row, column, xf, value, error = work.unpack 'v3C2'
165
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
166
+ end
167
+ def read_boundsheet work, pos, len
168
+ # Offset Size Contents
169
+ # 0 4 Absolute stream position of the BOF record of the sheet
170
+ # represented by this record. This field is never encrypted
171
+ # in protected files.
172
+ # 4 1 Visibility: 0x00 = Visible
173
+ # 0x01 = Hidden
174
+ # 0x02 = Strong hidden (see below)
175
+ # 5 1 Sheet type: 0x00 = Worksheet
176
+ # 0x02 = Chart
177
+ # 0x06 = Visual Basic module
178
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
179
+ # 8-bit string length (➜ 3.3)
180
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
181
+ offset, visibility, type = work.unpack("VC2")
182
+ name = client read_string(work[6..-1]), @workbook.encoding
183
+ if @boundsheets
184
+ @boundsheets[0] += 1
185
+ @boundsheets[2] += len
186
+ else
187
+ @boundsheets = [1, pos, len]
188
+ end
189
+ @workbook.set_boundsheets(*@boundsheets)
190
+ @workbook.add_worksheet Worksheet.new(:name => name,
191
+ :ole => @book,
192
+ :offset => offset,
193
+ :reader => self)
194
+ end
195
+ def read_codepage work, pos, len
196
+ codepage, _ = work.unpack 'v'
197
+ @workbook.set_encoding encoding(codepage), pos, len
198
+ end
199
+ def read_colinfo worksheet, work, pos, len
200
+ # Offset Size Contents
201
+ # 0 2 Index to first column in the range
202
+ # 2 2 Index to last column in the range
203
+ # 4 2 Width of the columns in 1/256 of the width of the zero
204
+ # character, using default font (first FONT record in the
205
+ # file)
206
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
207
+ # 8 2 Option flags:
208
+ # Bits Mask Contents
209
+ # 0 0x0001 1 = Columns are hidden
210
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
211
+ # 12 0x1000 1 = Columns are collapsed
212
+ # 10 2 Not used
213
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
214
+ first.upto last do |col|
215
+ column = Column.new col, @workbook.format(xf),
216
+ :width => width.to_f / 256,
217
+ :hidden => (opts & 0x0001) > 0,
218
+ :collapsed => (opts & 0x1000) > 0,
219
+ :outline_level => (opts & 0x0700) / 256
220
+ column.worksheet = worksheet
221
+ worksheet.columns[col] = column
222
+ end
223
+ end
224
+ def read_dimensions worksheet, work, pos, len
225
+ # Offset Size Contents
226
+ # 0 4 Index to first used row
227
+ # 4 4 Index to last used row, increased by 1
228
+ # 8 2 Index to first used column
229
+ # 10 2 Index to last used column, increased by 1
230
+ # 12 2 Not used
231
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
232
+ end
233
+ def read_font work, pos, len
234
+ # Offset Size Contents
235
+ # 0 2 Height of the font (in twips = 1/20 of a point)
236
+ # 2 2 Option flags:
237
+ # Bit Mask Contents
238
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
239
+ # 1 0x0002 1 = Characters are italic
240
+ # 2 0x0004 1 = Characters are underlined
241
+ # (redundant, see below)
242
+ # 3 0x0008 1 = Characters are struck out
243
+ # 4 0x0010 1 = Characters are outlined (djberger)
244
+ # 5 0x0020 1 = Characters are shadowed (djberger)
245
+ # 4 2 Colour index (➜ 6.70)
246
+ # 6 2 Font weight (100-1000). Standard values are
247
+ # 0x0190 (400) for normal text and
248
+ # 0x02bc (700) for bold text.
249
+ # 8 2 Escapement type: 0x0000 = None
250
+ # 0x0001 = Superscript
251
+ # 0x0002 = Subscript
252
+ # 10 1 Underline type: 0x00 = None
253
+ # 0x01 = Single
254
+ # 0x02 = Double
255
+ # 0x21 = Single accounting
256
+ # 0x22 = Double accounting
257
+ # 11 1 Font family:
258
+ # 0x00 = None (unknown or don't care)
259
+ # 0x01 = Roman (variable width, serifed)
260
+ # 0x02 = Swiss (variable width, sans-serifed)
261
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
262
+ # 0x04 = Script (cursive)
263
+ # 0x05 = Decorative (specialised,
264
+ # for example Old English, Fraktur)
265
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
266
+ # 0x01 = 1 = System default
267
+ # 0x02 = 2 = Symbol
268
+ # 0x4d = 77 = Apple Roman
269
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
270
+ # 0x81 = 129 = ANSI Korean (Hangul)
271
+ # 0x82 = 130 = ANSI Korean (Johab)
272
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
273
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
274
+ # 0xa1 = 161 = ANSI Greek
275
+ # 0xa2 = 162 = ANSI Turkish
276
+ # 0xa3 = 163 = ANSI Vietnamese
277
+ # 0xb1 = 177 = ANSI Hebrew
278
+ # 0xb2 = 178 = ANSI Arabic
279
+ # 0xba = 186 = ANSI Baltic
280
+ # 0xcc = 204 = ANSI Cyrillic
281
+ # 0xde = 222 = ANSI Thai
282
+ # 0xee = 238 = ANSI Latin II (Central European)
283
+ # 0xff = 255 = OEM Latin I
284
+ # 13 1 Not used
285
+ # 14 var. Font name:
286
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
287
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
288
+ name = client read_string(work[14..-1]), @workbook.encoding
289
+ font = Font.new name
290
+ size, opts, color, font.weight, escapement, underline,
291
+ family, encoding = work.unpack binfmt(:font)
292
+ font.size = size / TWIPS
293
+ font.italic = opts & 0x0002
294
+ font.strikeout = opts & 0x0008
295
+ font.outline = opts & 0x0010
296
+ font.shadow = opts & 0x0020
297
+ font.color = COLOR_CODES[color] || :text
298
+ font.escapement = ESCAPEMENT_TYPES[escapement]
299
+ font.underline = UNDERLINE_TYPES[underline]
300
+ font.family = FONT_FAMILIES[family]
301
+ font.encoding = FONT_ENCODINGS[encoding]
302
+ @workbook.add_font font
303
+ end
304
+ def read_format work, pos, len
305
+ # Offset Size Contents
306
+ # 0 2 Format index used in other records
307
+ # 2 var. Number format string
308
+ # (Unicode string, 16-bit string length, ➜ 3.4)
309
+ idx, = work.unpack 'v'
310
+ value = read_string work[2..-1], 2
311
+ @formats.store idx, client(value, @workbook.encoding)
312
+ end
313
+ def read_formula worksheet, addr, work
314
+ # Offset Size Contents
315
+ # 0 2 Index to row
316
+ # 2 2 Index to column
317
+ # 4 2 Index to XF record (➜ 6.115)
318
+ # 6 8 Result of the formula. See below for details.
319
+ # 14 2 Option flags:
320
+ # Bit Mask Contents
321
+ # 0 0x0001 1 = Recalculate always
322
+ # 1 0x0002 1 = Calculate on open
323
+ # 3 0x0008 1 = Part of a shared formula
324
+ # 16 4 Not used
325
+ # 20 var. Formula data (RPN token array, ➜ 4)
326
+ # Offset Size Contents
327
+ # 0 2 Size of the following formula data (sz)
328
+ # 2 sz Formula data (RPN token array)
329
+ # [2+sz] var. (optional) Additional data for specific tokens
330
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
331
+ #
332
+ # Result of the Formula
333
+ # Dependent on the type of value the formula returns, the result field has
334
+ # the following format:
335
+ #
336
+ # Result is a numeric value:
337
+ # Offset Size Contents
338
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
339
+ #
340
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
341
+ # Offset Size Contents
342
+ # 0 1 0x00 (identifier for a string value)
343
+ # 1 5 Not used
344
+ # 6 2 0xffff
345
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
346
+ # special identifier defined (see below).
347
+ #
348
+ # Result is a Boolean value:
349
+ # Offset Size Contents
350
+ # 0 1 0x01 (identifier for a Boolean value)
351
+ # 1 1 Not used
352
+ # 2 1 0 = FALSE, 1 = TRUE
353
+ # 3 3 Not used
354
+ # 6 2 0xffff
355
+ #
356
+ # Result is an error value:
357
+ # Offset Size Contents
358
+ # 0 1 0x02 (identifier for an error value)
359
+ # 1 1 Not used
360
+ # 2 1 Error code (➜ 3.7)
361
+ # 3 3 Not used
362
+ # 6 2 0xffff
363
+ #
364
+ # Result is an empty cell (BIFF8), for example an empty string:
365
+ # Offset Size Contents
366
+ # 0 1 0x03 (identifier for an empty cell)
367
+ # 1 5 Not used
368
+ # 6 2 0xffff
369
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
370
+ formula = Formula.new
371
+ formula.shared = (opts & 0x08) > 0
372
+ formula.data = work[20..-1]
373
+ if rcheck != 0xffff || rtype > 3
374
+ value, = work.unpack 'x6E'
375
+ unless value
376
+ # on architectures where sizeof(double) > 8
377
+ value, = work.unpack 'x6e'
378
+ end
379
+ formula.value = value
380
+ elsif rtype == 0
381
+ pos, op, len, work = get_next_chunk
382
+ if op == :sharedfmla
383
+ ## TODO: formula-support in 0.8.0
384
+ pos, op, len, work = get_next_chunk
385
+ end
386
+ if op == :string
387
+ formula.value = client read_string(work, 2), @workbook.encoding
388
+ else
389
+ warn "String Value expected after Formula, but got #{op}"
390
+ formula.value = Error.new 0x2a
391
+ @pos = pos
392
+ end
393
+ elsif rtype == 1
394
+ formula.value = rval > 0
395
+ elsif rtype == 2
396
+ formula.value = Error.new rval
397
+ else
398
+ # leave the Formula value blank
399
+ end
400
+ set_cell worksheet, row, column, xf, formula
401
+ end
402
+ def read_hlink worksheet, work, pos, len
403
+ # 6.53.1 Common Record Contents
404
+ # Offset Size Contents
405
+ # 0 8 Cell range address of all cells containing this hyperlink
406
+ # (➜ 3.13.1)
407
+ # 8 16 GUID of StdLink:
408
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
409
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
410
+ # 24 4 Unknown value: 0x00000002
411
+ # 28 4 Option flags (see below)
412
+ # Bit Mask Contents
413
+ # 0 0x00000001 0 = No link extant
414
+ # 1 = File link or URL
415
+ # 1 0x00000002 0 = Relative file path
416
+ # 1 = Absolute path or URL
417
+ # 2 and 4 0x00000014 0 = No description
418
+ # 1 (both bits) = Description
419
+ # 3 0x00000008 0 = No text mark
420
+ # 1 = Text mark
421
+ # 7 0x00000080 0 = No target frame
422
+ # 1 = Target frame
423
+ # 8 0x00000100 0 = File link or URL
424
+ # 1 = UNC path (incl. server name)
425
+ #--------------------------------------------------------------------------
426
+ # [32] 4 (optional, see option flags) Character count of description
427
+ # text, including trailing zero word (dl)
428
+ # [36] 2∙dl (optional, see option flags) Character array of description
429
+ # text, no Unicode string header, always 16-bit characters,
430
+ # zero-terminated
431
+ #--------------------------------------------------------------------------
432
+ # [var.] 4 (optional, see option flags) Character count of target
433
+ # frame, including trailing zero word (fl)
434
+ # [var.] 2∙fl (optional, see option flags) Character array of target
435
+ # frame, no Unicode string header, always 16-bit characters,
436
+ # zero-terminated
437
+ #--------------------------------------------------------------------------
438
+ # var. var. Special data (➜ 6.53.2 and following)
439
+ #--------------------------------------------------------------------------
440
+ # [var.] 4 (optional, see option flags) Character count of the text
441
+ # mark, including trailing zero word (tl)
442
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
443
+ # mark without “#” sign, no Unicode string header, always
444
+ # 16-bit characters, zero-terminated
445
+ firstrow, lastrow, firstcol, lastcol, guid, opts = work.unpack 'v4H32x4V'
446
+ has_link = opts & 0x0001
447
+ absolute = opts & 0x0002
448
+ desc = opts & 0x0014
449
+ textmark = opts & 0x0008
450
+ target = opts & 0x0080
451
+ unc = opts & 0x0100
452
+ link = Link.new
453
+ url, description = nil
454
+ pos = 32
455
+ if desc > 0
456
+ description, pos = read_hlink_string work, pos
457
+ link << description
458
+ end
459
+ if target > 0
460
+ link.target_frame, pos = read_hlink_string work, pos
461
+ end
462
+ if unc > 0
463
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
464
+ # These data fields are for UNC paths containing a server name (for
465
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
466
+ # flags field must be 1.x00x.xx112.
467
+ # Offset Size Contents
468
+ # 0 4 Character count of the UNC,
469
+ # including trailing zero word (fl)
470
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
471
+ # always 16-bit characters, zeroterminated.
472
+ link.url, pos = read_hlink_string work, pos
473
+ elsif has_link > 0
474
+ uid, = work.unpack "x#{pos}H32"
475
+ pos += 16
476
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
477
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
478
+ # These data fields occur for links which are not local files or files
479
+ # in the local network (for instance HTTP and FTP links and e-mail
480
+ # addresses). The lower 9 bits of the option flags field must be
481
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
482
+ # GUID could be used to distinguish a URL from a file link.
483
+ # Offset Size Contents
484
+ # 0 16 GUID of URL Moniker:
485
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
486
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
487
+ # 16 4 Size of character array of the URL, including trailing
488
+ # zero word (us). There are us/2-1 characters in the
489
+ # following string.
490
+ # 20 us Character array of the URL, no Unicode string header,
491
+ # always 16-bit characters, zeroterminated
492
+ size, = work.unpack "x#{pos}V"
493
+ pos += 4
494
+ data = work[pos, size].chomp "\000\000"
495
+ link.url = client data
496
+ pos += size
497
+ else
498
+ # 6.53.3 Hyperlink to a Local File
499
+ # These data fields are for links to files on local drives. The path of
500
+ # the file can be complete with drive letter (absolute) or relative to
501
+ # the location of the workbook. The lower 9 bits of the option flags
502
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
503
+ # URL from a file link.
504
+ # Offset Size Contents
505
+ # 0 16 GUID of File Moniker:
506
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
507
+ # (00000303-0000-0000-C000-000000000046)
508
+ # 16 2 Directory up-level count. Each leading “..\” in the
509
+ # file link is deleted and increases this counter.
510
+ # 18 4 Character count of the shortened file path and name,
511
+ # including trailing zero byte (sl)
512
+ # 22 sl Character array of the shortened file path and name in
513
+ # 8.3-DOS-format. This field can be filled with a long
514
+ # file name too. No Unicode string header, always 8-bit
515
+ # characters, zeroterminated.
516
+ # 22+sl 24 Unknown byte sequence:
517
+ # FF FF AD DE 00 00 00 00
518
+ # 00 00 00 00 00 00 00 00
519
+ # 00 00 00 00 00 00 00 00
520
+ # 46+sl 4 Size of the following file link field including string
521
+ # length field and additional data field (sz). If sz is
522
+ # zero, nothing will follow (except a text mark).
523
+ # [50+sl] 4 (optional) Size of character array of the extended file
524
+ # path and name (xl). There are xl/2 characters in the
525
+ # following string.
526
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
527
+ # [56+sl] xl (optional) Character array of the extended file path
528
+ # and name (xl), no Unicode string header, always 16-bit
529
+ # characters, not zero-terminated
530
+ uplevel, count = work.unpack "x#{pos}vV"
531
+ pos += 6
532
+ # TODO: short file path may have any of the OEM encodings. Find out which
533
+ # and use the #client method to convert the encoding.
534
+ prefix = internal('..\\', 'UTF-8') * uplevel
535
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
536
+ pos += count + 24
537
+ total, size = work.unpack "x#{pos}V2"
538
+ pos += 10
539
+ if total > 0
540
+ link.url = client work[pos, size]
541
+ pos += size
542
+ end
543
+ end
544
+ else
545
+ # 6.53.5 Hyperlink to the Current Workbook
546
+ # In this case only the text mark field is present (optional with
547
+ # description).
548
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
549
+ # current workbook.
550
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
551
+ end
552
+ if textmark > 0
553
+ link.fragment, _ = read_hlink_string work, pos
554
+ end
555
+ if link.empty?
556
+ link << link.href
557
+ end
558
+ firstrow.upto lastrow do |row|
559
+ firstcol.upto lastcol do |col|
560
+ worksheet.add_link row, col, link
561
+ end
562
+ end
563
+ end
564
+ def read_hlink_string work, pos
565
+ count, = work.unpack "x#{pos}V"
566
+ len = count * 2
567
+ pos += 4
568
+ data = work[pos, len].chomp "\000\000"
569
+ pos += len
570
+ [client(data, 'UTF-16LE'), pos]
571
+ end
572
+ def read_index worksheet, work, pos, len
573
+ # Offset Size Contents
574
+ # 0 4 Not used
575
+ # 4 4 Index to first used row (rf, 0-based)
576
+ # 8 4 Index to first row of unused tail of sheet
577
+ # (rl, last used row + 1, 0-based)
578
+ # 12 4 Absolute stream position of the
579
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
580
+ # record does not exist, the offset points to the record at
581
+ # the position where the DEFCOLWIDTH record would occur.
582
+ # 16 4∙nm Array of nm absolute stream positions to the
583
+ # DBCELL record (➜ 6.26) of each Row Block
584
+ # TODO: use the index if it exists
585
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
586
+ end
587
+ def read_label worksheet, addr, work
588
+ # Offset Size Contents
589
+ # 0 2 Index to row
590
+ # 2 2 Index to column
591
+ # 4 2 Index to XF record (➜ 6.115)
592
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
593
+ row, column, xf = work.unpack 'v3'
594
+ value = client read_string(work[6..-1], 2), @workbook.encoding
595
+ set_cell worksheet, row, column, xf, value
596
+ end
597
+ def read_labelsst worksheet, addr, work
598
+ # Offset Size Contents
599
+ # 0 2 Index to row
600
+ # 2 2 Index to column
601
+ # 4 2 Index to XF record (➜ 6.115)
602
+ # 6 4 Index into SST record (➜ 6.96)
603
+ row, column, xf, index = work.unpack binfmt(:labelsst)
604
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
605
+ end
606
+ def read_mulblank worksheet, addr, work
607
+ # Offset Size Contents
608
+ # 0 2 Index to row
609
+ # 2 2 Index to first column (fc)
610
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
611
+ # 4+2∙nc 2 Index to last column (lc)
612
+ row, column, *xfs = work.unpack 'v*'
613
+ last_column = xfs.pop # unused
614
+ xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
615
+ end
616
+ def read_mulrk worksheet, addr, work
617
+ # Offset Size Contents
618
+ # 0 2 Index to row
619
+ # 2 2 Index to first column (fc)
620
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
621
+ # Offset Size Contents
622
+ # 0 2 Index to XF record (➜ 6.115)
623
+ # 2 4 RK value (➜ 3.6)
624
+ # 4+6∙nc 2 Index to last column (lc)
625
+ row, column = work.unpack 'v2'
626
+ 4.step(work.size - 6, 6) do |idx|
627
+ xf, = work.unpack "x#{idx}v"
628
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
629
+ column += 1
630
+ end
631
+ end
632
+ def read_number worksheet, addr, work
633
+ # Offset Size Contents
634
+ # 0 2 Index to row
635
+ # 2 2 Index to column
636
+ # 4 2 Index to XF record (➜ 6.115)
637
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
638
+ row, column, xf, value = work.unpack binfmt(:number)
639
+ set_cell worksheet, row, column, xf, value
640
+ end
641
+ def read_rk worksheet, addr, work
642
+ # Offset Size Contents
643
+ # 0 2 Index to row
644
+ # 2 2 Index to column
645
+ # 4 2 Index to XF record (➜ 6.115)
646
+ # 6 4 RK value (➜ 3.6)
647
+ row, column, xf = work.unpack 'v3'
648
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
649
+ end
650
+ def read_row worksheet, addr
651
+ row = addr[:index]
652
+ @current_row_block.fetch [worksheet, row] do
653
+ @current_row_block.clear
654
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
655
+ @pos = addr[:offset]
656
+ found = false
657
+ while tuple = get_next_chunk
658
+ pos, op, len, work = tuple
659
+ case op
660
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
661
+ # one Row-Block
662
+ @pos = pos
663
+ return cells
664
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
665
+ return cells
666
+ when :row # ○○ Row Blocks ➜ 5.7
667
+ # ● ROW ➜ 6.83
668
+ # ignore, we already did these in read_worksheet
669
+ return cells if found
670
+ when :blank # BLANK ➜ 6.7
671
+ found = true
672
+ read_blank worksheet, addr, work
673
+ when :boolerr # BOOLERR ➜ 6.10
674
+ found = true
675
+ read_boolerr worksheet, addr, work
676
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
677
+ found = true
678
+ # TODO: implement for BIFF2 support
679
+ when :formula # FORMULA ➜ 6.46
680
+ found = true
681
+ read_formula worksheet, addr, work
682
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
683
+ found = true
684
+ read_label worksheet, addr, work
685
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
686
+ found = true
687
+ read_labelsst worksheet, addr, work
688
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
689
+ found = true
690
+ read_mulblank worksheet, addr, work
691
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
692
+ found = true
693
+ read_mulrk worksheet, addr, work
694
+ when :number # NUMBER ➜ 6.68
695
+ found = true
696
+ read_number worksheet, addr, work
697
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
698
+ found = true
699
+ read_rk worksheet, addr, work
700
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
701
+ found = true
702
+ read_rstring worksheet, addr, work
703
+ end
704
+ end
705
+ cells
706
+ end
707
+ end
708
+ def read_rstring worksheet, addr, work
709
+ # Offset Size Contents
710
+ # 0 2 Index to row
711
+ # 2 2 Index to column
712
+ # 4 2 Index to XF record (➜ 6.115)
713
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
714
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
715
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
716
+ row, column, xf = work.unpack 'v3'
717
+ value = client read_string(work[6..-1], 2), @workbook.encoding
718
+ set_cell worksheet, row, column, xf, value
719
+ end
720
+ def read_window2 worksheet, work, pos, len
721
+ # This record contains additional settings for the document window
722
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
723
+ # It is part of the Sheet View Settings Block (➜ 4.5).
724
+ # Offset Size Contents
725
+ # 0 2 Option flags:
726
+ # Bits Mask Contents
727
+ # 0 0x0001 0 = Show formula results
728
+ # 1 = Show formulas
729
+ # 1 0x0002 0 = Do not show grid lines
730
+ # 1 = Show grid lines
731
+ # 2 0x0004 0 = Do not show sheet headers
732
+ # 1 = Show sheet headers
733
+ # 3 0x0008 0 = Panes are not frozen
734
+ # 1 = Panes are frozen (freeze)
735
+ # 4 0x0010 0 = Show zero values as empty cells
736
+ # 1 = Show zero values
737
+ # 5 0x0020 0 = Manual grid line colour
738
+ # 1 = Automatic grid line colour
739
+ # 6 0x0040 0 = Columns from left to right
740
+ # 1 = Columns from right to left
741
+ # 7 0x0080 0 = Do not show outline symbols
742
+ # 1 = Show outline symbols
743
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
744
+ # 1 = Remove splits if pane freeze is removed
745
+ # 9 0x0200 0 = Sheet not selected
746
+ # 1 = Sheet selected (BIFF5-BIFF8)
747
+ # 10 0x0400 0 = Sheet not active
748
+ # 1 = Sheet active (BIFF5-BIFF8)
749
+ # 11 0x0800 0 = Show in normal view
750
+ # 1 = Show in page break preview (BIFF8)
751
+ # 2 2 Index to first visible row
752
+ # 4 2 Index to first visible column
753
+ # 6 2 Colour index of grid line colour (➜ 5.74).
754
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
755
+ # 8 2 Not used
756
+ # 10 2 Cached magnification factor in page break preview (in percent)
757
+ # 0 = Default (60%)
758
+ # 12 2 Cached magnification factor in normal view (in percent)
759
+ # 0 = Default (100%)
760
+ # 14 4 Not used
761
+ flags, _ = work.unpack 'v'
762
+ worksheet.selected = flags & 0x0200 > 0
763
+ end
764
+ def read_workbook
765
+ worksheet = nil
766
+ previous_op = nil
767
+ while tuple = get_next_chunk
768
+ pos, op, len, work = tuple
769
+ case op
770
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
771
+ return
772
+ when :eof # ● EOF ➜ 6.36
773
+ postread_workbook
774
+ return
775
+ when :datemode # ○ DATEMODE ➜ 6.25
776
+ flag, _ = work.unpack 'v'
777
+ if flag == 1
778
+ @workbook.date_base = DateTime.new 1904, 1, 1
779
+ else
780
+ @workbook.date_base = DateTime.new 1899, 12, 31
781
+ end
782
+ when :continue # ○ CONTINUE ➜ 6.22
783
+ case previous_op
784
+ when :sst # ● SST ➜ 6.96
785
+ continue_sst work, pos, len
786
+ end
787
+ when :codepage # ○ CODEPAGE ➜ 6.17
788
+ read_codepage work, pos, len
789
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
790
+ read_boundsheet work, pos, len
791
+ when :xf # ●● XF ➜ 6.115
792
+ read_xf work, pos, len
793
+ when :sst # ○ Shared String Table ➜ 5.11
794
+ # ● SST ➜ 6.96
795
+ read_sst work, pos, len
796
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
797
+ # with EXTSST
798
+ when :extsst # ● EXTSST ➜ 6.40
799
+ read_extsst work, pos, len
800
+ when :style # ●● STYLE ➜ 6.99
801
+ read_style work, pos, len
802
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
803
+ read_format work, pos, len
804
+ when :font # ●● FONT ➜ 6.43
805
+ read_font work, pos, len
806
+ end
807
+ previous_op = op unless op == :continue
808
+ end
809
+ end
810
+ def read_worksheet worksheet, offset
811
+ @pos = offset
812
+ @detected_rows = {}
813
+ previous = nil
814
+ while tuple = get_next_chunk
815
+ pos, op, len, work = tuple
816
+ if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
817
+ @current_row_block_offset = nil
818
+ offset[1] = pos - offset[0]
819
+ end
820
+ case op
821
+ when :eof # ● EOF ➜ 6.36
822
+ postread_worksheet worksheet
823
+ return
824
+ #when :uncalced # ○ UNCALCED ➜ 6.104
825
+ # TODO: Formula support. Values were not calculated before saving
826
+ #warn <<-EOS
827
+ # Some fields containig formulas were saved without a computed value.
828
+ # Support Spreadsheet::Excel by implementing formula-calculations!
829
+ #EOS
830
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
831
+ # TODO: if there are changes in rows, omit index when writing
832
+ #read_index worksheet, work, pos, len
833
+ when :guts # GUTS 5.53
834
+ read_guts worksheet, work, pos, len
835
+ when :colinfo # ○○ COLINFO ➜ 6.18
836
+ read_colinfo worksheet, work, pos, len
837
+ when :dimensions # ● DIMENSIONS ➜ 6.31
838
+ read_dimensions worksheet, work, pos, len
839
+ when :row # ○○ Row Blocks ➜ 5.7
840
+ # ● ROW ➜ 6.83
841
+ set_row_address worksheet, work, pos, len
842
+ when :hlink
843
+ read_hlink worksheet, work, pos, len
844
+ when :window2
845
+ read_window2 worksheet, work, pos, len
846
+ else
847
+ if ROW_BLOCK_OPS.include?(op)
848
+ set_missing_row_address worksheet, work, pos, len
849
+ end
850
+ end
851
+ previous = op
852
+ end
853
+ end
854
+ def read_guts worksheet, work, pos, len
855
+ # Offset Size Contents
856
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
857
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
858
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
859
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
860
+ width, height, row_level, col_level = work.unpack 'v4'
861
+ worksheet.guts[:width] = width
862
+ worksheet.guts[:height] = height
863
+ worksheet.guts[:row_level] = row_level
864
+ worksheet.guts[:col_level] = col_level
865
+ end
866
+ def read_style work, pos, len
867
+ # User-Defined Cell Styles:
868
+ # Offset Size Contents
869
+ # 0 2 Bit Mask Contents
870
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
871
+ # 15 0x8000 Always 0 for user-defined styles
872
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
873
+ # 8-bit string length (➜ 3.3)
874
+ # BIFF8: Non-empty Unicode string,
875
+ # 16-bit string length (➜ 3.4)
876
+ #
877
+ # Built-In Cell Styles
878
+ # Offset Size Contents
879
+ # 0 2 Bit Mask Contents
880
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
881
+ # 15 0x8000 Always 1 for built-in styles
882
+ # 2 1 Identifier of the built-in cell style:
883
+ # 0x00 = Normal
884
+ # 0x01 = RowLevel_lv (see next field)
885
+ # 0x02 = ColLevel_lv (see next field)
886
+ # 0x03 = Comma
887
+ # 0x04 = Currency
888
+ # 0x05 = Percent
889
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
890
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
891
+ # 0x08 = Hyperlink (BIFF8)
892
+ # 0x09 = Followed Hyperlink (BIFF8)
893
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
894
+ # FFH otherwise
895
+ flags, = work.unpack 'v'
896
+ xf_idx = flags & 0x0fff
897
+ xf = @workbook.format xf_idx
898
+ builtin = flags & 0x8000
899
+ if builtin == 0
900
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
901
+ else
902
+ id, level = work.unpack 'x2C2'
903
+ if name = BUILTIN_STYLES[id]
904
+ name.sub '_lv', "_#{level.to_s}"
905
+ xf.name = client name, 'UTF-8'
906
+ end
907
+ end
908
+ end
909
+ def read_xf work, pos, len
910
+ # Offset Size Contents
911
+ # 0 2 Index to FONT record (➜ 6.43)
912
+ # 2 2 Index to FORMAT record (➜ 6.45)
913
+ # 4 2 Bit Mask Contents
914
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
915
+ # Bit Mask Contents
916
+ # 0 0x01 1 = Cell is locked
917
+ # 1 0x02 1 = Formula is hidden
918
+ # 2 0x04 0 = Cell XF; 1 = Style XF
919
+ # 15-4 0xfff0 Index to parent style XF
920
+ # (always 0xfff in style XFs)
921
+ # 6 1 Bit Mask Contents
922
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
923
+ # Value Horizontal alignment
924
+ # 0x00 General
925
+ # 0x01 Left
926
+ # 0x02 Centred
927
+ # 0x03 Right
928
+ # 0x04 Filled
929
+ # 0x05 Justified (BIFF4-BIFF8X)
930
+ # 0x06 Centred across selection
931
+ # (BIFF4-BIFF8X)
932
+ # 0x07 Distributed (BIFF8X)
933
+ # 3 0x08 1 = Text is wrapped at right border
934
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
935
+ # Value Vertical alignment
936
+ # 0x00 Top
937
+ # 0x01 Centred
938
+ # 0x02 Bottom
939
+ # 0x03 Justified (BIFF5-BIFF8X)
940
+ # 0x04 Distributed (BIFF8X)
941
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
942
+ # Value Text rotation
943
+ # 0 Not rotated
944
+ # 1-90 1 to 90 degrees counterclockwise
945
+ # 91-180 1 to 90 degrees clockwise
946
+ # 255 Letters are stacked top-to-bottom,
947
+ # but not rotated
948
+ # 8 1 Bit Mask Contents
949
+ # 3-0 0x0f Indent level
950
+ # 4 0x10 1 = Shrink content to fit into cell
951
+ # 5 0x40 1 = Merge Range (djberger)
952
+ # 7-6 0xc0 Text direction (BIFF8X only)
953
+ # 0 = According to context
954
+ # 1 = Left-to-right
955
+ # 2 = Right-to-left
956
+ # 9 1 Bit Mask Contents
957
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
958
+ # Each bit describes the validity of a
959
+ # specific group of attributes. In cell XFs
960
+ # a cleared bit means the attributes of the
961
+ # parent style XF are used (but only if the
962
+ # attributes are valid there), a set bit
963
+ # means the attributes of this XF are used.
964
+ # In style XFs a cleared bit means the
965
+ # attribute setting is valid, a set bit
966
+ # means the attribute should be ignored.
967
+ # Bit Mask Contents
968
+ # 0 0x01 Flag for number format
969
+ # 1 0x02 Flag for font
970
+ # 2 0x04 Flag for horizontal and
971
+ # vertical alignment, text wrap,
972
+ # indentation, orientation,
973
+ # rotation, and text direction
974
+ # 3 0x08 Flag for border lines
975
+ # 4 0x10 Flag for background area style
976
+ # 5 0x20 Flag for cell protection (cell
977
+ # locked and formula hidden)
978
+ # 10 4 Cell border lines and background area:
979
+ # Bit Mask Contents
980
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
981
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
982
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
983
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
984
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
985
+ # for left line colour
986
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
987
+ # for right line colour
988
+ # 30 0x40000000 1 = Diagonal line
989
+ # from top left to right bottom
990
+ # 31 0x80000000 1 = Diagonal line
991
+ # from bottom left to right top
992
+ # 14 4 Bit Mask Contents
993
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
994
+ # for top line colour
995
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
996
+ # for bottom line colour
997
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
998
+ # for diagonal line colour
999
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1000
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1001
+ # 18 2 Bit Mask Contents
1002
+ # 6-0 0x007f Colour index (➜ 6.70)
1003
+ # for pattern colour
1004
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1005
+ # for pattern background
1006
+ fmt = Format.new
1007
+ font_idx, numfmt, xf_type, xf_align, xf_rotation, xf_indent, xf_used_attr,
1008
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1009
+ fmt.number_format = @formats[numfmt]
1010
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1011
+ # with a 0-based index, but all subsequent font indices are 1-based.
1012
+ fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1013
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1014
+ fmt.text_wrap = xf_align & 0x08 > 0
1015
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1016
+ fmt.rotation = if xf_rotation == 255
1017
+ :stacked
1018
+ elsif xf_rotation > 90
1019
+ 90 - xf_rotation
1020
+ else
1021
+ xf_rotation
1022
+ end
1023
+ fmt.indent_level = xf_indent & 0x0f
1024
+ fmt.shrink = xf_indent & 0x10 > 0
1025
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1026
+ fmt.left = xf_borders & 0x0000000f > 0
1027
+ fmt.right = xf_borders & 0x000000f0 > 0
1028
+ fmt.top = xf_borders & 0x00000f00 > 0
1029
+ fmt.bottom = xf_borders & 0x0000f000 > 0
1030
+ fmt.left_color = COLOR_CODES[xf_borders & 0x007f0000] || :border
1031
+ fmt.right_color = COLOR_CODES[xf_borders & 0x3f800000] || :border
1032
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1033
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1034
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :border
1035
+ fmt.bottom_color = COLOR_CODES[xf_brdcolors & 0x00003f80] || :border
1036
+ fmt.diagonal_color = COLOR_CODES[xf_brdcolors & 0x001fc000] || :border
1037
+ #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1038
+ fmt.pattern = xf_brdcolors & 0xfc000000
1039
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1040
+ fmt.pattern_bg_color = COLOR_CODES[xf_pattern & 0x3f80] || :pattern_bg
1041
+ @workbook.add_format fmt
1042
+ end
1043
+ def set_cell worksheet, row, column, xf, value=nil
1044
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1045
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1046
+ cells[column] = value
1047
+ end
1048
+ def set_missing_row_address worksheet, work, pos, len
1049
+ # Offset Size Contents
1050
+ # 0 2 Index of this row
1051
+ # 2 2 Index to this column
1052
+ row_index, column_index = work.unpack 'v2'
1053
+ unless worksheet.offsets[row_index]
1054
+ @current_row_block_offset ||= [pos]
1055
+ data = {
1056
+ :index => row_index,
1057
+ :row_block => @current_row_block_offset,
1058
+ :offset => @current_row_block_offset[0],
1059
+ }
1060
+ worksheet.set_row_address row_index, data
1061
+ end
1062
+ end
1063
+ def set_row_address worksheet, work, pos, len
1064
+ # Offset Size Contents
1065
+ # 0 2 Index of this row
1066
+ # 2 2 Index to column of the first cell which
1067
+ # is described by a cell record
1068
+ # 4 2 Index to column of the last cell which is
1069
+ # described by a cell record, increased by 1
1070
+ # 6 2 Bit Mask Contents
1071
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1072
+ # 15 0x8000 0 = Row has custom height;
1073
+ # 1 = Row has default height
1074
+ # 8 2 Not used
1075
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1076
+ # calculate stream position of the first cell record for this
1077
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1078
+ # anymore, but the DBCELL record (➜ 6.26) instead.
1079
+ # 12 4 Option flags and default row formatting:
1080
+ # Bit Mask Contents
1081
+ # 2-0 0x00000007 Outline level of the row
1082
+ # 4 0x00000010 1 = Outline group starts or ends here
1083
+ # (depending on where the outline
1084
+ # buttons are located, see WSBOOL
1085
+ # record, ➜ 6.113), and is collapsed
1086
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1087
+ # filter or outline group)
1088
+ # 6 0x00000040 1 = Row height and default font height
1089
+ # do not match
1090
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1091
+ # 8 0x00000100 Always 1
1092
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1093
+ # (➜ 6.115)
1094
+ # 28 0x10000000 1 = Additional space above the row.
1095
+ # This flag is set, if the upper
1096
+ # border of at least one cell in this
1097
+ # row or if the lower border of at
1098
+ # least one cell in the row above is
1099
+ # formatted with a thick line style.
1100
+ # Thin and medium line styles are not
1101
+ # taken into account.
1102
+ # 29 0x20000000 1 = Additional space below the row.
1103
+ # This flag is set, if the lower
1104
+ # border of at least one cell in this
1105
+ # row or if the upper border of at
1106
+ # least one cell in the row below is
1107
+ # formatted with a medium or thick
1108
+ # line style. Thin line styles are
1109
+ # not taken into account.
1110
+ @current_row_block_offset ||= [pos]
1111
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1112
+ height &= 0x7fff
1113
+ format = nil
1114
+ # TODO: read attributes from work[13,3], read flags
1115
+ attrs = {
1116
+ :default_format => format,
1117
+ :first_used => first_used,
1118
+ :first_unused => first_unused,
1119
+ :index => index,
1120
+ :row_block => @current_row_block_offset,
1121
+ :offset => @current_row_block_offset[0],
1122
+ :outline_level => flags & 0x00000007,
1123
+ :collapsed => (flags & 0x0000010) > 0,
1124
+ :hidden => (flags & 0x0000020) > 0,
1125
+ }
1126
+ if (flags & 0x00000040) > 0
1127
+ attrs.store :height, height / TWIPS
1128
+ end
1129
+ if (flags & 0x00000080) > 0
1130
+ xf = (flags & 0x0fff0000) >> 16
1131
+ attrs.store :default_format, @workbook.format(xf)
1132
+ end
1133
+ # TODO: Row spacing
1134
+ worksheet.set_row_address index, attrs
1135
+ end
1136
+ def setup io
1137
+ ## Reading from StringIO fails without forced encoding
1138
+ if io.respond_to?(:string) && (str = io.string) \
1139
+ && str.respond_to?(:force_encoding)
1140
+ str.force_encoding 'ASCII-8BIT'
1141
+ end
1142
+ ##
1143
+ io.rewind
1144
+ @ole = Ole::Storage.open io
1145
+ @workbook = Workbook.new io, {}
1146
+ %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1147
+ @book = @ole.file.open(name) rescue false
1148
+ end
1149
+ @data = @book.read
1150
+ read_bof
1151
+ @workbook.ole = @book
1152
+ @workbook.bof = @bof
1153
+ @workbook.version = @version
1154
+ biff = @workbook.biff_version
1155
+ extend_reader biff
1156
+ extend_internals biff
1157
+ end
1158
+ private
1159
+ def extend_internals version
1160
+ require 'spreadsheet/excel/internals/biff%i' % version
1161
+ extend Internals.const_get('Biff%i' % version)
1162
+ ## spreadsheets may not include a codepage record.
1163
+ @workbook.encoding = encoding 850 if version < 8
1164
+ rescue LoadError
1165
+ end
1166
+ def extend_reader version
1167
+ require 'spreadsheet/excel/reader/biff%i' % version
1168
+ extend Reader.const_get('Biff%i' % version)
1169
+ rescue LoadError
1170
+ end
1171
+ end
1172
+ end
1173
+ end