spreadsheet 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/bin/irb +29 -0
  3. data/bin/racc +29 -0
  4. data/bin/rdbg +29 -0
  5. data/bin/rdoc +29 -0
  6. data/bin/ri +29 -0
  7. data/bin/rubocop +29 -0
  8. data/bin/ruby-parse +29 -0
  9. data/bin/ruby-rewrite +29 -0
  10. data/bin/standardrb +29 -0
  11. data/bin/test-unit +29 -0
  12. data/lib/parseexcel/parseexcel.rb +66 -58
  13. data/lib/parseexcel/parser.rb +1 -1
  14. data/lib/parseexcel.rb +1 -1
  15. data/lib/spreadsheet/column.rb +13 -10
  16. data/lib/spreadsheet/compatibility.rb +3 -1
  17. data/lib/spreadsheet/datatypes.rb +150 -147
  18. data/lib/spreadsheet/encodings.rb +20 -16
  19. data/lib/spreadsheet/errors.rb +2 -2
  20. data/lib/spreadsheet/excel/error.rb +23 -22
  21. data/lib/spreadsheet/excel/internals/biff5.rb +11 -11
  22. data/lib/spreadsheet/excel/internals/biff8.rb +13 -13
  23. data/lib/spreadsheet/excel/internals.rb +451 -451
  24. data/lib/spreadsheet/excel/offset.rb +34 -31
  25. data/lib/spreadsheet/excel/password_hash.rb +18 -18
  26. data/lib/spreadsheet/excel/reader/biff5.rb +34 -35
  27. data/lib/spreadsheet/excel/reader/biff8.rb +235 -222
  28. data/lib/spreadsheet/excel/reader.rb +1331 -1274
  29. data/lib/spreadsheet/excel/rgb.rb +91 -91
  30. data/lib/spreadsheet/excel/row.rb +99 -91
  31. data/lib/spreadsheet/excel/sst_entry.rb +41 -38
  32. data/lib/spreadsheet/excel/workbook.rb +87 -76
  33. data/lib/spreadsheet/excel/worksheet.rb +126 -107
  34. data/lib/spreadsheet/excel/writer/biff8.rb +57 -55
  35. data/lib/spreadsheet/excel/writer/format.rb +274 -256
  36. data/lib/spreadsheet/excel/writer/n_worksheet.rb +838 -798
  37. data/lib/spreadsheet/excel/writer/workbook.rb +672 -635
  38. data/lib/spreadsheet/excel/writer/worksheet.rb +899 -861
  39. data/lib/spreadsheet/excel/writer.rb +1 -1
  40. data/lib/spreadsheet/excel.rb +19 -12
  41. data/lib/spreadsheet/font.rb +31 -26
  42. data/lib/spreadsheet/format.rb +75 -59
  43. data/lib/spreadsheet/link.rb +8 -5
  44. data/lib/spreadsheet/note.rb +7 -6
  45. data/lib/spreadsheet/noteObject.rb +6 -5
  46. data/lib/spreadsheet/row.rb +35 -24
  47. data/lib/spreadsheet/version.rb +1 -1
  48. data/lib/spreadsheet/workbook.rb +28 -13
  49. data/lib/spreadsheet/worksheet.rb +103 -68
  50. data/lib/spreadsheet/writer.rb +3 -0
  51. data/lib/spreadsheet.rb +12 -15
  52. data/test/data/test_row_record_empty_range.xls +0 -0
  53. data/test/excel/reader.rb +8 -8
  54. data/test/excel/row.rb +35 -31
  55. data/test/excel/writer/workbook.rb +18 -16
  56. data/test/excel/writer/worksheet.rb +10 -8
  57. data/test/font.rb +44 -32
  58. data/test/format.rb +38 -33
  59. data/test/integration.rb +641 -598
  60. data/test/row.rb +5 -3
  61. data/test/suite.rb +7 -7
  62. data/test/workbook.rb +15 -14
  63. data/test/workbook_protection.rb +5 -5
  64. data/test/worksheet.rb +36 -34
  65. metadata +59 -6
@@ -1,1314 +1,1371 @@
1
- require 'spreadsheet/encodings'
2
- require 'spreadsheet/font'
3
- require 'spreadsheet/formula'
4
- require 'spreadsheet/link'
5
- require 'spreadsheet/note'
6
- require 'spreadsheet/noteObject'
7
- require 'spreadsheet/excel/error'
8
- require 'spreadsheet/excel/internals'
9
- require 'spreadsheet/excel/sst_entry'
10
- require 'spreadsheet/excel/worksheet'
1
+ require "spreadsheet/encodings"
2
+ require "spreadsheet/font"
3
+ require "spreadsheet/formula"
4
+ require "spreadsheet/link"
5
+ require "spreadsheet/note"
6
+ require "spreadsheet/noteObject"
7
+ require "spreadsheet/excel/error"
8
+ require "spreadsheet/excel/internals"
9
+ require "spreadsheet/excel/sst_entry"
10
+ require "spreadsheet/excel/worksheet"
11
11
 
12
12
  module Spreadsheet
13
13
  module Excel
14
- ##
15
- # Reader class for Excel Workbooks. Most read_* method correspond to an
16
- # Excel-Record/Opcode. You should not need to call any of its methods
17
- # directly. If you think you do, look at #read
18
- class Reader
19
- include Spreadsheet::Encodings
20
- include Spreadsheet::Excel::Internals
21
- ROW_BLOCK_OPS = {
22
- :blank => true, :boolerr => true, :dbcell => true, :formula => true,
23
- :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
24
- :number => true, :rk => true, :rstring => true,
25
- }
26
- def initialize opts = {}
27
- @pos = 0
28
- @bigendian = opts.fetch(:bigendian) {
29
- [1].pack('l') != "\001\000\000\000"
30
- }
31
- @opts = opts
32
- @boundsheets = nil
33
- @current_row_block = {}
34
- @current_row_block_offset = nil
35
- @formats = {}
36
- BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
37
- end
38
- def decode_rk work
39
- # Bit Mask Contents
40
- # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
41
- # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
42
- # 31-2 0xFFFFFFFC Encoded value
43
- #
44
- # If bit 1 is cleared, the encoded value represents the 30 most significant
45
- # bits of an IEEE 754 floating-point value (64-bit double precision). The
46
- # 34 least significant bits must be set to zero. If bit 1 is set, the
47
- # encoded value represents a signed 30-bit integer value. To get the
48
- # correct integer, the encoded value has to be shifted right arithmetically
49
- # by 2 bits. If bit 0 is set, the decoded value (both integer and
50
- # floating-point) must be divided by 100 to get the final result.
51
- flags, = work.unpack 'C'
52
- cent = flags & 1
53
- int = flags & 2
54
- value = 0
55
- if int == 0
56
- ## remove two bits
57
- integer, = work.unpack 'V'
58
- integer &= 0xfffffffc
59
- value, = ("\0\0\0\0" + [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
60
- else
61
- ## I can't find a format for unpacking a little endian signed integer.
62
- # 'V' works for packing, but not for unpacking. But the following works
63
- # fine afaics:
64
- unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
65
- ## remove two bits
66
- value = unsigned >> 2
67
- end
68
- if cent == 1
69
- value /= 100.0
70
- end
71
- value
72
- end
73
- def encoding codepage_id
74
- name = CODEPAGES.fetch(codepage_id) do
75
- raise Spreadsheet::Errors::UnknownCodepage, "Unknown Codepage 0x%04x" % codepage_id
76
- end
14
+ ##
15
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
16
+ # Excel-Record/Opcode. You should not need to call any of its methods
17
+ # directly. If you think you do, look at #read
18
+ class Reader
19
+ include Spreadsheet::Encodings
20
+ include Spreadsheet::Excel::Internals
77
21
 
78
- if RUBY_VERSION >= '1.9'
79
- begin
80
- Encoding.find name
81
- rescue ArgumentError
82
- raise Spreadsheet::Errors::UnsupportedEncoding, "Unsupported encoding with name '#{name}'"
22
+ ROW_BLOCK_OPS = {
23
+ blank: true, boolerr: true, dbcell: true, formula: true,
24
+ label: true, labelsst: true, mulblank: true, mulrk: true,
25
+ number: true, rk: true, rstring: true
26
+ }
27
+ def initialize opts = {}
28
+ @pos = 0
29
+ @bigendian = opts.fetch(:bigendian) {
30
+ [1].pack("l") != "\001\000\000\000"
31
+ }
32
+ @opts = opts
33
+ @boundsheets = nil
34
+ @current_row_block = {}
35
+ @current_row_block_offset = nil
36
+ @formats = {}
37
+ BUILTIN_FORMATS.each { |key, fmt| @formats.store key, client(fmt, "UTF-8") }
83
38
  end
84
- else
85
- name
86
- end
87
- end
88
- def get_next_chunk
89
- pos = @pos
90
- if pos < @data.size
91
- op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
92
- @pos += OPCODE_SIZE
93
- if len
94
- work = @data[@pos,len]
95
- @pos += len
96
- code = SEDOCPO.fetch(op, op)
97
- if io = @opts[:print_opcodes]
98
- io.puts sprintf("0x%04x/%-16s %5i: %s",
99
- op, code.inspect, len, work.inspect)
39
+
40
+ def decode_rk work
41
+ # Bit Mask Contents
42
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
43
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
44
+ # 31-2 0xFFFFFFFC Encoded value
45
+ #
46
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
47
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
48
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
49
+ # encoded value represents a signed 30-bit integer value. To get the
50
+ # correct integer, the encoded value has to be shifted right arithmetically
51
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
52
+ # floating-point) must be divided by 100 to get the final result.
53
+ flags, = work.unpack "C"
54
+ cent = flags & 1
55
+ int = flags & 2
56
+ value = 0
57
+ if int == 0
58
+ ## remove two bits
59
+ integer, = work.unpack "V"
60
+ integer &= 0xfffffffc
61
+ value, = ("\0\0\0\0" + [integer].pack("V")).unpack EIGHT_BYTE_DOUBLE
62
+ else
63
+ ## I can't find a format for unpacking a little endian signed integer.
64
+ # 'V' works for packing, but not for unpacking. But the following works
65
+ # fine afaics:
66
+ unsigned, = (@bigendian ? work.reverse : work).unpack "l"
67
+ ## remove two bits
68
+ value = unsigned >> 2
69
+ end
70
+ if cent == 1
71
+ value /= 100.0
100
72
  end
101
- [ pos, code, len + OPCODE_SIZE, work]
73
+ value
102
74
  end
103
- end
104
- end
105
- def in_row_block? op, previous
106
- if op == :row
107
- previous == op
108
- else
109
- ROW_BLOCK_OPS.include?(op)
110
- end
111
- end
112
- def memoize?
113
- @opts[:memoization]
114
- end
115
- def postread_workbook
116
- sheets = @workbook.worksheets
117
- sheets.each_with_index do |sheet, idx|
118
- offset = sheet.offset
119
- nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
120
- @workbook.offsets.store sheet, [offset, nxt - offset]
121
- end
122
- end
123
- def postread_worksheet worksheet
124
- #We now have a lot of Note and NoteObjects, but they're not linked
125
- #So link the noteObject(text) to the note (with author, position)
126
- #TODO
127
- @noteList.each do |i|
128
- matching_objs = @noteObjList.select { |j| j.objID == i.objID }
129
- if matching_objs.length > 1
130
- puts "ERROR - more than one matching object ID!"
75
+
76
+ def encoding codepage_id
77
+ name = CODEPAGES.fetch(codepage_id) do
78
+ raise Spreadsheet::Errors::UnknownCodepage, "Unknown Codepage 0x%04x" % codepage_id
79
+ end
80
+
81
+ if RUBY_VERSION >= "1.9"
82
+ begin
83
+ Encoding.find name
84
+ rescue ArgumentError
85
+ raise Spreadsheet::Errors::UnsupportedEncoding, "Unsupported encoding with name '#{name}'"
86
+ end
87
+ else
88
+ name
131
89
  end
132
- matching_obj = matching_objs.first
133
- i.text = matching_obj.nil? ? '' : matching_obj.text
134
- worksheet.add_note i.row, i.col, i.text
135
- end
136
- end
137
- ##
138
- # The entry-point for reading Excel-documents. Reads the Biff-Version and
139
- # loads additional reader-methods before proceeding with parsing the document.
140
- def read io
141
- setup io
142
- read_workbook
143
- @workbook.default_format = @workbook.format 0
144
- @workbook.changes.clear
145
- @workbook
146
- end
147
- def read_blank worksheet, addr, work
148
- # Offset Size Contents
149
- # 0 2 Index to row
150
- # 2 2 Index to column
151
- # 4 2 Index to XF record (➜ 6.115)
152
- row, column, xf = work.unpack binfmt(:blank)
153
- set_cell worksheet, row, column, xf
154
- end
155
- def read_bof
156
- # Offset Size Contents
157
- # 0 2 BIFF version (always 0x0600 for BIFF8)
158
- # 2 2 Type of the following data: 0x0005 = Workbook globals
159
- # 0x0006 = Visual Basic module
160
- # 0x0010 = Worksheet
161
- # 0x0020 = Chart
162
- # 0x0040 = Macro sheet
163
- # 0x0100 = Workspace file
164
- # 4 2 Build identifier
165
- # 6 2 Build year
166
- # 8 4 File history flags
167
- # 12 4 Lowest Excel version that can read all records in this file
168
- _, @bof, _, work = get_next_chunk
169
- ## version and datatype are common to all Excel-Versions. Later versions
170
- # have additional information such as build-id and -year (from BIFF5).
171
- # These are ignored for the time being.
172
- version, datatype = work.unpack('v2')
173
- if datatype == 0x5
174
- @version = version
175
- end
176
- end
177
- def read_boolerr worksheet, addr, work
178
- # Offset Size Contents
179
- # 0 2 Index to row
180
- # 2 2 Index to column
181
- # 4 2 Index to XF record (➜ 6.115)
182
- # 6 1 Boolean or error value (type depends on the following byte)
183
- # 7 1 0 = Boolean value; 1 = Error code
184
- row, column, xf, value, error = work.unpack 'v3C2'
185
- set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
186
- end
187
- def read_boundsheet work, pos, len
188
- # Offset Size Contents
189
- # 0 4 Absolute stream position of the BOF record of the sheet
190
- # represented by this record. This field is never encrypted
191
- # in protected files.
192
- # 4 1 Visibility: 0x00 = Visible
193
- # 0x01 = Hidden
194
- # 0x02 = Strong hidden (see below)
195
- # 5 1 Sheet type: 0x00 = Worksheet
196
- # 0x02 = Chart
197
- # 0x06 = Visual Basic module
198
- # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
199
- # 8-bit string length (➜ 3.3)
200
- # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
201
- offset, visibility, _ = work.unpack("VC2")
202
- name = client read_string(work[6..-1]), @workbook.encoding
203
- if @boundsheets
204
- @boundsheets[0] += 1
205
- @boundsheets[2] += len
206
- else
207
- @boundsheets = [1, pos, len]
208
- end
209
- @workbook.set_boundsheets(*@boundsheets)
210
- @workbook.add_worksheet Worksheet.new(:name => name,
211
- :ole => @book,
212
- :offset => offset,
213
- :reader => self,
214
- :visibility => WORKSHEET_VISIBILITIES[visibility])
215
- end
216
- def read_codepage work, pos, len
217
- codepage, _ = work.unpack 'v'
218
- @workbook.set_encoding encoding(codepage), pos, len
219
- end
220
- def read_colinfo worksheet, work, pos, len
221
- # Offset Size Contents
222
- # 0 2 Index to first column in the range
223
- # 2 2 Index to last column in the range
224
- # 4 2 Width of the columns in 1/256 of the width of the zero
225
- # character, using default font (first FONT record in the
226
- # file)
227
- # 6 2 Index to XF record (➜ 6.115) for default column formatting
228
- # 8 2 Option flags:
229
- # Bits Mask Contents
230
- # 0 0x0001 1 = Columns are hidden
231
- # 10-8 0x0700 Outline level of the columns (0 = no outline)
232
- # 12 0x1000 1 = Columns are collapsed
233
- # 10 2 Not used
234
- first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
235
- first.upto last do |col|
236
- column = Column.new col, @workbook.format(xf),
237
- :width => width.to_f / 256,
238
- :hidden => (opts & 0x0001) > 0,
239
- :collapsed => (opts & 0x1000) > 0,
240
- :outline_level => (opts & 0x0700) / 256
241
- column.worksheet = worksheet
242
- worksheet.columns[col] = column
243
- end
244
- end
245
- def read_dimensions worksheet, work, pos, len
246
- # Offset Size Contents
247
- # 0 4 Index to first used row
248
- # 4 4 Index to last used row, increased by 1
249
- # 8 2 Index to first used column
250
- # 10 2 Index to last used column, increased by 1
251
- # 12 2 Not used
252
- worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
253
- end
254
- def read_font work, pos, len
255
- # Offset Size Contents
256
- # 0 2 Height of the font (in twips = 1/20 of a point)
257
- # 2 2 Option flags:
258
- # Bit Mask Contents
259
- # 0 0x0001 1 = Characters are bold (redundant, see below)
260
- # 1 0x0002 1 = Characters are italic
261
- # 2 0x0004 1 = Characters are underlined
262
- # (redundant, see below)
263
- # 3 0x0008 1 = Characters are struck out
264
- # 4 0x0010 1 = Characters are outlined (djberger)
265
- # 5 0x0020 1 = Characters are shadowed (djberger)
266
- # 4 2 Colour index (➜ 6.70)
267
- # 6 2 Font weight (100-1000). Standard values are
268
- # 0x0190 (400) for normal text and
269
- # 0x02bc (700) for bold text.
270
- # 8 2 Escapement type: 0x0000 = None
271
- # 0x0001 = Superscript
272
- # 0x0002 = Subscript
273
- # 10 1 Underline type: 0x00 = None
274
- # 0x01 = Single
275
- # 0x02 = Double
276
- # 0x21 = Single accounting
277
- # 0x22 = Double accounting
278
- # 11 1 Font family:
279
- # 0x00 = None (unknown or don't care)
280
- # 0x01 = Roman (variable width, serifed)
281
- # 0x02 = Swiss (variable width, sans-serifed)
282
- # 0x03 = Modern (fixed width, serifed or sans-serifed)
283
- # 0x04 = Script (cursive)
284
- # 0x05 = Decorative (specialised,
285
- # for example Old English, Fraktur)
286
- # 12 1 Character set: 0x00 = 0 = ANSI Latin
287
- # 0x01 = 1 = System default
288
- # 0x02 = 2 = Symbol
289
- # 0x4d = 77 = Apple Roman
290
- # 0x80 = 128 = ANSI Japanese Shift-JIS
291
- # 0x81 = 129 = ANSI Korean (Hangul)
292
- # 0x82 = 130 = ANSI Korean (Johab)
293
- # 0x86 = 134 = ANSI Chinese Simplified GBK
294
- # 0x88 = 136 = ANSI Chinese Traditional BIG5
295
- # 0xa1 = 161 = ANSI Greek
296
- # 0xa2 = 162 = ANSI Turkish
297
- # 0xa3 = 163 = ANSI Vietnamese
298
- # 0xb1 = 177 = ANSI Hebrew
299
- # 0xb2 = 178 = ANSI Arabic
300
- # 0xba = 186 = ANSI Baltic
301
- # 0xcc = 204 = ANSI Cyrillic
302
- # 0xde = 222 = ANSI Thai
303
- # 0xee = 238 = ANSI Latin II (Central European)
304
- # 0xff = 255 = OEM Latin I
305
- # 13 1 Not used
306
- # 14 var. Font name:
307
- # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
308
- # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
309
- name = client read_string(work[14..-1]), @workbook.encoding
310
- font = Font.new name
311
- size, opts, color, font.weight, escapement, underline,
312
- family, encoding = work.unpack binfmt(:font)
313
- font.size = size / TWIPS
314
- font.italic = opts & 0x0002
315
- font.strikeout = opts & 0x0008
316
- font.outline = opts & 0x0010
317
- font.shadow = opts & 0x0020
318
- font.color = COLOR_CODES[color] || :text
319
- font.escapement = ESCAPEMENT_TYPES[escapement]
320
- font.underline = UNDERLINE_TYPES[underline]
321
- font.family = FONT_FAMILIES[family]
322
- font.encoding = FONT_ENCODINGS[encoding]
323
- @workbook.add_font font
324
- end
325
- def read_format work, pos, len
326
- # Offset Size Contents
327
- # 0 2 Format index used in other records
328
- # 2 var. Number format string
329
- # (Unicode string, 16-bit string length, ➜ 3.4)
330
- idx, = work.unpack 'v'
331
- value = read_string work[2..-1], 2
332
- @formats.store idx, client(value, @workbook.encoding)
333
- end
334
- def read_formula worksheet, addr, work
335
- # Offset Size Contents
336
- # 0 2 Index to row
337
- # 2 2 Index to column
338
- # 4 2 Index to XF record (➜ 6.115)
339
- # 6 8 Result of the formula. See below for details.
340
- # 14 2 Option flags:
341
- # Bit Mask Contents
342
- # 0 0x0001 1 = Recalculate always
343
- # 1 0x0002 1 = Calculate on open
344
- # 3 0x0008 1 = Part of a shared formula
345
- # 16 4 Not used
346
- # 20 var. Formula data (RPN token array, ➜ 4)
347
- # Offset Size Contents
348
- # 0 2 Size of the following formula data (sz)
349
- # 2 sz Formula data (RPN token array)
350
- # [2+sz] var. (optional) Additional data for specific tokens
351
- # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
352
- #
353
- # Result of the Formula
354
- # Dependent on the type of value the formula returns, the result field has
355
- # the following format:
356
- #
357
- # Result is a numeric value:
358
- # Offset Size Contents
359
- # 0 8 IEEE 754 floating-point value (64-bit double precision)
360
- #
361
- # Result is a string (the string follows in a STRING record, ➜ 6.98):
362
- # Offset Size Contents
363
- # 0 1 0x00 (identifier for a string value)
364
- # 1 5 Not used
365
- # 6 2 0xffff
366
- # Note: In BIFF8 the string must not be empty. For empty cells there is a
367
- # special identifier defined (see below).
368
- #
369
- # Result is a Boolean value:
370
- # Offset Size Contents
371
- # 0 1 0x01 (identifier for a Boolean value)
372
- # 1 1 Not used
373
- # 2 1 0 = FALSE, 1 = TRUE
374
- # 3 3 Not used
375
- # 6 2 0xffff
376
- #
377
- # Result is an error value:
378
- # Offset Size Contents
379
- # 0 1 0x02 (identifier for an error value)
380
- # 1 1 Not used
381
- # 2 1 Error code (➜ 3.7)
382
- # 3 3 Not used
383
- # 6 2 0xffff
384
- #
385
- # Result is an empty cell (BIFF8), for example an empty string:
386
- # Offset Size Contents
387
- # 0 1 0x03 (identifier for an empty cell)
388
- # 1 5 Not used
389
- # 6 2 0xffff
390
- row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
391
- formula = Formula.new
392
- formula.shared = (opts & 0x08) > 0
393
- formula.data = work[20..-1]
394
- if rcheck != 0xffff || rtype > 3
395
- value, = work.unpack 'x6E'
396
- unless value
397
- # on architectures where sizeof(double) > 8
398
- value, = work.unpack 'x6e'
399
90
  end
400
- formula.value = value
401
- elsif rtype == 0
402
- pos, op, _len, work = get_next_chunk
403
- if op == :sharedfmla
404
- ## TODO: formula-support in 0.8.0
405
- pos, op, _len, work = get_next_chunk
91
+
92
+ def get_next_chunk
93
+ pos = @pos
94
+ if pos < @data.size
95
+ op, len = @data[@pos, OPCODE_SIZE].unpack("v2")
96
+ @pos += OPCODE_SIZE
97
+ if len
98
+ work = @data[@pos, len]
99
+ @pos += len
100
+ code = SEDOCPO.fetch(op, op)
101
+ if (io = @opts[:print_opcodes])
102
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
103
+ op, code.inspect, len, work.inspect)
104
+ end
105
+ [pos, code, len + OPCODE_SIZE, work]
106
+ end
107
+ end
406
108
  end
407
- if op == :string
408
- formula.value = client read_string(work, 2), @workbook.encoding
409
- else
410
- warn "String Value expected after Formula, but got #{op}"
411
- formula.value = Error.new 0x2a
412
- @pos = pos
109
+
110
+ def in_row_block? op, previous
111
+ if op == :row
112
+ previous == op
113
+ else
114
+ ROW_BLOCK_OPS.include?(op)
115
+ end
413
116
  end
414
- elsif rtype == 1
415
- formula.value = rval > 0
416
- elsif rtype == 2
417
- formula.value = Error.new rval
418
- else
419
- # leave the Formula value blank
420
- end
421
- set_cell worksheet, row, column, xf, formula
422
- end
423
- def read_hlink worksheet, work, pos, len
424
- # 6.53.1 Common Record Contents
425
- # Offset Size Contents
426
- # 0 8 Cell range address of all cells containing this hyperlink
427
- # (➜ 3.13.1)
428
- # 8 16 GUID of StdLink:
429
- # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
430
- # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
431
- # 24 4 Unknown value: 0x00000002
432
- # 28 4 Option flags (see below)
433
- # Bit Mask Contents
434
- # 0 0x00000001 0 = No link extant
435
- # 1 = File link or URL
436
- # 1 0x00000002 0 = Relative file path
437
- # 1 = Absolute path or URL
438
- # 2 and 4 0x00000014 0 = No description
439
- # 1 (both bits) = Description
440
- # 3 0x00000008 0 = No text mark
441
- # 1 = Text mark
442
- # 7 0x00000080 0 = No target frame
443
- # 1 = Target frame
444
- # 8 0x00000100 0 = File link or URL
445
- # 1 = UNC path (incl. server name)
446
- #--------------------------------------------------------------------------
447
- # [32] 4 (optional, see option flags) Character count of description
448
- # text, including trailing zero word (dl)
449
- # [36] 2∙dl (optional, see option flags) Character array of description
450
- # text, no Unicode string header, always 16-bit characters,
451
- # zero-terminated
452
- #--------------------------------------------------------------------------
453
- # [var.] 4 (optional, see option flags) Character count of target
454
- # frame, including trailing zero word (fl)
455
- # [var.] 2∙fl (optional, see option flags) Character array of target
456
- # frame, no Unicode string header, always 16-bit characters,
457
- # zero-terminated
458
- #--------------------------------------------------------------------------
459
- # var. var. Special data (➜ 6.53.2 and following)
460
- #--------------------------------------------------------------------------
461
- # [var.] 4 (optional, see option flags) Character count of the text
462
- # mark, including trailing zero word (tl)
463
- # [var.] 2∙tl (optional, see option flags) Character array of the text
464
- # mark without “#” sign, no Unicode string header, always
465
- # 16-bit characters, zero-terminated
466
- firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack 'v4H32x4V'
467
- has_link = opts & 0x0001
468
- desc = opts & 0x0014
469
- textmark = opts & 0x0008
470
- target = opts & 0x0080
471
- unc = opts & 0x0100
472
- link = Link.new
473
- _, description = nil
474
- pos = 32
475
- if desc > 0
476
- description, pos = read_hlink_string work, pos
477
- link << description
478
- end
479
- if target > 0
480
- link.target_frame, pos = read_hlink_string work, pos
481
- end
482
- if unc > 0
483
- # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
484
- # These data fields are for UNC paths containing a server name (for
485
- # instance “\\server\path\file.xls”). The lower 9 bits of the option
486
- # flags field must be 1.x00x.xx112.
487
- # Offset Size Contents
488
- # 0 4 Character count of the UNC,
489
- # including trailing zero word (fl)
490
- # 4 2∙fl Character array of the UNC, no Unicode string header,
491
- # always 16-bit characters, zeroterminated.
492
- link.url, pos = read_hlink_string work, pos
493
- elsif has_link > 0
494
- uid, = work.unpack "x#{pos}H32"
495
- pos += 16
496
- if uid == "e0c9ea79f9bace118c8200aa004ba90b"
497
- # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
498
- # These data fields occur for links which are not local files or files
499
- # in the local network (for instance HTTP and FTP links and e-mail
500
- # addresses). The lower 9 bits of the option flags field must be
501
- # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
502
- # GUID could be used to distinguish a URL from a file link.
117
+
118
+ def memoize?
119
+ @opts[:memoization]
120
+ end
121
+
122
+ def postread_workbook
123
+ sheets = @workbook.worksheets
124
+ sheets.each_with_index do |sheet, idx|
125
+ offset = sheet.offset
126
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
127
+ @workbook.offsets.store sheet, [offset, nxt - offset]
128
+ end
129
+ end
130
+
131
+ def postread_worksheet worksheet
132
+ # We now have a lot of Note and NoteObjects, but they're not linked
133
+ # So link the noteObject(text) to the note (with author, position)
134
+ # TODO
135
+ @note_list.each do |i|
136
+ matching_objs = @note_ob_list.select { |j| j.obj_id == i.obj_id }
137
+ if matching_objs.length > 1
138
+ puts "ERROR - more than one matching object ID!"
139
+ end
140
+ matching_obj = matching_objs.first
141
+ i.text = matching_obj.nil? ? "" : matching_obj.text
142
+ worksheet.add_note i.row, i.col, i.text
143
+ end
144
+ end
145
+
146
+ ##
147
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
148
+ # loads additional reader-methods before proceeding with parsing the document.
149
+ def read io
150
+ setup io
151
+ read_workbook
152
+ @workbook.default_format = @workbook.format 0
153
+ @workbook.changes.clear
154
+ @workbook
155
+ end
156
+
157
+ def read_blank worksheet, addr, work
503
158
  # Offset Size Contents
504
- # 0 16 GUID of URL Moniker:
505
- # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
506
- # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
507
- # 16 4 Size of character array of the URL, including trailing
508
- # zero word (us). There are us/2-1 characters in the
509
- # following string.
510
- # 20 us Character array of the URL, no Unicode string header,
511
- # always 16-bit characters, zeroterminated
512
- size, = work.unpack "x#{pos}V"
513
- pos += 4
514
- data = work[pos, size].chomp "\000\000"
515
- link.url = client data
516
- pos += size
517
- else
518
- # 6.53.3 Hyperlink to a Local File
519
- # These data fields are for links to files on local drives. The path of
520
- # the file can be complete with drive letter (absolute) or relative to
521
- # the location of the workbook. The lower 9 bits of the option flags
522
- # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
523
- # URL from a file link.
159
+ # 0 2 Index to row
160
+ # 2 2 Index to column
161
+ # 4 2 Index to XF record (➜ 6.115)
162
+ row, column, xf = work.unpack binfmt(:blank)
163
+ set_cell worksheet, row, column, xf
164
+ end
165
+
166
+ def read_bof
524
167
  # Offset Size Contents
525
- # 0 16 GUID of File Moniker:
526
- # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
527
- # (00000303-0000-0000-C000-000000000046)
528
- # 16 2 Directory up-level count. Each leading “..\” in the
529
- # file link is deleted and increases this counter.
530
- # 18 4 Character count of the shortened file path and name,
531
- # including trailing zero byte (sl)
532
- # 22 sl Character array of the shortened file path and name in
533
- # 8.3-DOS-format. This field can be filled with a long
534
- # file name too. No Unicode string header, always 8-bit
535
- # characters, zeroterminated.
536
- # 22+sl 24 Unknown byte sequence:
537
- # FF FF AD DE 00 00 00 00
538
- # 00 00 00 00 00 00 00 00
539
- # 00 00 00 00 00 00 00 00
540
- # 46+sl 4 Size of the following file link field including string
541
- # length field and additional data field (sz). If sz is
542
- # zero, nothing will follow (except a text mark).
543
- # [50+sl] 4 (optional) Size of character array of the extended file
544
- # path and name (xl). There are xl/2 characters in the
545
- # following string.
546
- # [54+sl] 2 (optional) Unknown byte sequence: 03 00
547
- # [56+sl] xl (optional) Character array of the extended file path
548
- # and name (xl), no Unicode string header, always 16-bit
549
- # characters, not zero-terminated
550
- uplevel, count = work.unpack "x#{pos}vV"
551
- pos += 6
552
- # TODO: short file path may have any of the OEM encodings. Find out which
553
- # and use the #client method to convert the encoding.
554
- prefix = internal('..\\', 'UTF-8') * uplevel
555
- link.dos = link.url = prefix << work[pos, count].chomp("\000")
556
- pos += count + 24
557
- total, size = work.unpack "x#{pos}V2"
558
- pos += 10
559
- if total > 0
560
- link.url = client work[pos, size]
561
- pos += size
168
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
169
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
170
+ # 0x0006 = Visual Basic module
171
+ # 0x0010 = Worksheet
172
+ # 0x0020 = Chart
173
+ # 0x0040 = Macro sheet
174
+ # 0x0100 = Workspace file
175
+ # 4 2 Build identifier
176
+ # 6 2 Build year
177
+ # 8 4 File history flags
178
+ # 12 4 Lowest Excel version that can read all records in this file
179
+ _, @bof, _, work = get_next_chunk
180
+ ## version and datatype are common to all Excel-Versions. Later versions
181
+ # have additional information such as build-id and -year (from BIFF5).
182
+ # These are ignored for the time being.
183
+ version, datatype = work.unpack("v2")
184
+ if datatype == 0x5
185
+ @version = version
562
186
  end
563
187
  end
564
- else
565
- # 6.53.5 Hyperlink to the Current Workbook
566
- # In this case only the text mark field is present (optional with
567
- # description).
568
- # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
569
- # current workbook.
570
- # The lower 9 bits of the option flags field must be 0.x00x.1x002.
571
- end
572
- if textmark > 0
573
- link.fragment, _ = read_hlink_string work, pos
574
- end
575
- if link.empty?
576
- link << link.href
577
- end
578
- firstrow.upto lastrow do |row|
579
- firstcol.upto lastcol do |col|
580
- worksheet.add_link row, col, link
188
+
189
+ def read_boolerr worksheet, addr, work
190
+ # Offset Size Contents
191
+ # 0 2 Index to row
192
+ # 2 2 Index to column
193
+ # 4 2 Index to XF record (➜ 6.115)
194
+ # 6 1 Boolean or error value (type depends on the following byte)
195
+ # 7 1 0 = Boolean value; 1 = Error code
196
+ row, column, xf, value, error = work.unpack "v3C2"
197
+ set_cell worksheet, row, column, xf, (error == 0) ? value > 0 : Error.new(value)
581
198
  end
582
- end
583
- end
584
- def read_hlink_string work, pos
585
- count, = work.unpack "x#{pos}V"
586
- len = count * 2
587
- pos += 4
588
- data = work[pos, len].chomp "\000\000"
589
- pos += len
590
- [client(data, 'UTF-16LE'), pos]
591
- end
592
- def read_index worksheet, work, pos, len
593
- # Offset Size Contents
594
- # 0 4 Not used
595
- # 4 4 Index to first used row (rf, 0-based)
596
- # 8 4 Index to first row of unused tail of sheet
597
- # (rl, last used row + 1, 0-based)
598
- # 12 4 Absolute stream position of the
599
- # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
600
- # record does not exist, the offset points to the record at
601
- # the position where the DEFCOLWIDTH record would occur.
602
- # 16 4∙nm Array of nm absolute stream positions to the
603
- # DBCELL record (➜ 6.26) of each Row Block
604
- # TODO: use the index if it exists
605
- # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
606
- end
607
- def read_label worksheet, addr, work
608
- # Offset Size Contents
609
- # 0 2 Index to row
610
- # 2 2 Index to column
611
- # 4 2 Index to XF record (➜ 6.115)
612
- # 6 var. Unicode string, 16-bit string length (➜ 3.4)
613
- row, column, xf = work.unpack 'v3'
614
- value = client read_string(work[6..-1], 2), @workbook.encoding
615
- set_cell worksheet, row, column, xf, value
616
- end
617
- def read_labelsst worksheet, addr, work
618
- # Offset Size Contents
619
- # 0 2 Index to row
620
- # 2 2 Index to column
621
- # 4 2 Index to XF record (➜ 6.115)
622
- # 6 4 Index into SST record (➜ 6.96)
623
- row, column, xf, index = work.unpack binfmt(:labelsst)
624
- set_cell worksheet, row, column, xf, worksheet.shared_string(index)
625
- end
626
- def read_mulblank worksheet, addr, work
627
- # Offset Size Contents
628
- # 0 2 Index to row
629
- # 2 2 Index to first column (fc)
630
- # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
631
- # 4+2∙nc 2 Index to last column (lc)
632
- row, column, *xfs = work.unpack 'v*'
633
- xfs.pop #=> last_column
634
- xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
635
- end
636
- def read_mulrk worksheet, addr, work
637
- # Offset Size Contents
638
- # 0 2 Index to row
639
- # 2 2 Index to first column (fc)
640
- # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
641
- # Offset Size Contents
642
- # 0 2 Index to XF record (➜ 6.115)
643
- # 2 4 RK value (➜ 3.6)
644
- # 4+6∙nc 2 Index to last column (lc)
645
- row, column = work.unpack 'v2'
646
- 4.step(work.size - 6, 6) do |idx|
647
- xf, = work.unpack "x#{idx}v"
648
- set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
649
- column += 1
650
- end
651
- end
652
- def read_number worksheet, addr, work
653
- # Offset Size Contents
654
- # 0 2 Index to row
655
- # 2 2 Index to column
656
- # 4 2 Index to XF record (➜ 6.115)
657
- # 6 8 IEEE 754 floating-point value (64-bit double precision)
658
- row, column, xf, value = work.unpack binfmt(:number)
659
- set_cell worksheet, row, column, xf, value
660
- end
661
- def read_rk worksheet, addr, work
662
- # Offset Size Contents
663
- # 0 2 Index to row
664
- # 2 2 Index to column
665
- # 4 2 Index to XF record (➜ 6.115)
666
- # 6 4 RK value (➜ 3.6)
667
- row, column, xf = work.unpack 'v3'
668
- set_cell worksheet, row, column, xf, decode_rk(work[6,4])
669
- end
670
- def read_row worksheet, addr
671
- row = addr[:index]
672
- @current_row_block.fetch [worksheet, row] do
673
- @current_row_block.clear
674
- cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
675
- @pos = addr[:offset]
676
- found = false
677
- while tuple = get_next_chunk
678
- pos, op, _, work = tuple
679
- case op
680
- when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
681
- # one Row-Block
682
- @pos = pos
683
- return cells
684
- when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
685
- return cells
686
- when :row # ○○ Row Blocks ➜ 5.7
687
- # ● ROW ➜ 6.83
688
- # ignore, we already did these in read_worksheet
689
- return cells if found
690
- when :blank # BLANK ➜ 6.7
691
- found = true
692
- read_blank worksheet, addr, work
693
- when :boolerr # BOOLERR ➜ 6.10
694
- found = true
695
- read_boolerr worksheet, addr, work
696
- when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
697
- found = true
698
- # TODO: implement for BIFF2 support
699
- when :formula # FORMULA ➜ 6.46
700
- found = true
701
- read_formula worksheet, addr, work
702
- when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
703
- found = true
704
- read_label worksheet, addr, work
705
- when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
706
- found = true
707
- read_labelsst worksheet, addr, work
708
- when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
709
- found = true
710
- read_mulblank worksheet, addr, work
711
- when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
712
- found = true
713
- read_mulrk worksheet, addr, work
714
- when :number # NUMBER ➜ 6.68
715
- found = true
716
- read_number worksheet, addr, work
717
- when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
718
- found = true
719
- read_rk worksheet, addr, work
720
- when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
721
- found = true
722
- read_rstring worksheet, addr, work
199
+
200
+ def read_boundsheet work, pos, len
201
+ # Offset Size Contents
202
+ # 0 4 Absolute stream position of the BOF record of the sheet
203
+ # represented by this record. This field is never encrypted
204
+ # in protected files.
205
+ # 4 1 Visibility: 0x00 = Visible
206
+ # 0x01 = Hidden
207
+ # 0x02 = Strong hidden (see below)
208
+ # 5 1 Sheet type: 0x00 = Worksheet
209
+ # 0x02 = Chart
210
+ # 0x06 = Visual Basic module
211
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
212
+ # 8-bit string length ( 3.3)
213
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
214
+ offset, visibility, _ = work.unpack("VC2")
215
+ name = client read_string(work[6..]), @workbook.encoding
216
+ if @boundsheets
217
+ @boundsheets[0] += 1
218
+ @boundsheets[2] += len
219
+ else
220
+ @boundsheets = [1, pos, len]
723
221
  end
222
+ @workbook.set_boundsheets(*@boundsheets)
223
+ @workbook.add_worksheet Worksheet.new(name: name,
224
+ ole: @book,
225
+ offset: offset,
226
+ reader: self,
227
+ visibility: WORKSHEET_VISIBILITIES[visibility])
228
+ end
229
+
230
+ def read_codepage work, pos, len
231
+ codepage, _ = work.unpack "v"
232
+ @workbook.set_encoding encoding(codepage), pos, len
724
233
  end
725
- cells
726
- end
727
- end
728
- def read_rstring worksheet, addr, work
729
- # Offset Size Contents
730
- # 0 2 Index to row
731
- # 2 2 Index to column
732
- # 4 2 Index to XF record (➜ 6.115)
733
- # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
734
- # 6+sz 2 Number of Rich-Text formatting runs (rt)
735
- # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
736
- row, column, xf = work.unpack 'v3'
737
- value = client read_string(work[6..-1], 2), @workbook.encoding
738
- set_cell worksheet, row, column, xf, value
739
- end
740
- def read_window2 worksheet, work, pos, len
741
- # This record contains additional settings for the document window
742
- # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
743
- # It is part of the Sheet View Settings Block (➜ 4.5).
744
- # Offset Size Contents
745
- # 0 2 Option flags:
746
- # Bits Mask Contents
747
- # 0 0x0001 0 = Show formula results
748
- # 1 = Show formulas
749
- # 1 0x0002 0 = Do not show grid lines
750
- # 1 = Show grid lines
751
- # 2 0x0004 0 = Do not show sheet headers
752
- # 1 = Show sheet headers
753
- # 3 0x0008 0 = Panes are not frozen
754
- # 1 = Panes are frozen (freeze)
755
- # 4 0x0010 0 = Show zero values as empty cells
756
- # 1 = Show zero values
757
- # 5 0x0020 0 = Manual grid line colour
758
- # 1 = Automatic grid line colour
759
- # 6 0x0040 0 = Columns from left to right
760
- # 1 = Columns from right to left
761
- # 7 0x0080 0 = Do not show outline symbols
762
- # 1 = Show outline symbols
763
- # 8 0x0100 0 = Keep splits if pane freeze is removed
764
- # 1 = Remove splits if pane freeze is removed
765
- # 9 0x0200 0 = Sheet not selected
766
- # 1 = Sheet selected (BIFF5-BIFF8)
767
- # 10 0x0400 0 = Sheet not active
768
- # 1 = Sheet active (BIFF5-BIFF8)
769
- # 11 0x0800 0 = Show in normal view
770
- # 1 = Show in page break preview (BIFF8)
771
- # 2 2 Index to first visible row
772
- # 4 2 Index to first visible column
773
- # 6 2 Colour index of grid line colour (➜ 5.74).
774
- # Note that in BIFF2-BIFF5 an RGB colour is written instead.
775
- # 8 2 Not used
776
- # 10 2 Cached magnification factor in page break preview (in percent)
777
- # 0 = Default (60%)
778
- # 12 2 Cached magnification factor in normal view (in percent)
779
- # 0 = Default (100%)
780
- # 14 4 Not used
781
- flags, _ = work.unpack 'v'
782
- worksheet.selected = flags & 0x0200 > 0
783
- end
784
234
 
785
- def read_merged_cells worksheet, work, pos, len
786
- # This record contains the addresses of merged cell ranges in the current sheet.
787
- # Record MERGEDCELLS, BIFF8:
788
- # Offset Size Contents
789
- # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
790
- # If the record size exceeds the limit, it is not continued with a CONTINUE record,
791
- # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
792
- # per record results in a maximum number of 1027 merged ranges.
235
+ def read_colinfo worksheet, work, pos, len
236
+ # Offset Size Contents
237
+ # 0 2 Index to first column in the range
238
+ # 2 2 Index to last column in the range
239
+ # 4 2 Width of the columns in 1/256 of the width of the zero
240
+ # character, using default font (first FONT record in the
241
+ # file)
242
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
243
+ # 8 2 Option flags:
244
+ # Bits Mask Contents
245
+ # 0 0x0001 1 = Columns are hidden
246
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
247
+ # 12 0x1000 1 = Columns are collapsed
248
+ # 10 2 Not used
249
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
250
+ first.upto last do |col|
251
+ column = Column.new col, @workbook.format(xf),
252
+ width: width.to_f / 256,
253
+ hidden: (opts & 0x0001) > 0,
254
+ collapsed: (opts & 0x1000) > 0,
255
+ outline_level: (opts & 0x0700) / 256
256
+ column.worksheet = worksheet
257
+ worksheet.columns[col] = column
258
+ end
259
+ end
793
260
 
794
- worksheet.merged_cells.push(*read_range_address_list(work, len))
795
- #
796
- # A cell range address list consists of a field with the number of ranges and the list
797
- # of the range addresses.
798
- # Cell range address list, BIFF2-BIFF8:
799
- # Offset Size Contents
800
- # 0 2 Number of following cell range addresses (nm)
801
- # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
802
- #
803
- end
261
+ def read_dimensions worksheet, work, pos, len
262
+ # Offset Size Contents
263
+ # 0 4 Index to first used row
264
+ # 4 4 Index to last used row, increased by 1
265
+ # 8 2 Index to first used column
266
+ # 10 2 Index to last used column, increased by 1
267
+ # 12 2 Not used
268
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
269
+ end
804
270
 
805
- def read_workbook
806
- previous_op = nil
807
- while tuple = get_next_chunk
808
- pos, op, len, work = tuple
809
- case op
810
- when @bof, :bof # BOF Type = worksheet ( 6.8)
811
- return
812
- when :eof # EOF 6.36
813
- postread_workbook
814
- return
815
- when :datemode # DATEMODE 6.25
816
- flag, _ = work.unpack 'v'
817
- if flag == 1
818
- @workbook.date_base = DateTime.new 1904, 1, 1
271
+ def read_font work, pos, len
272
+ # Offset Size Contents
273
+ # 0 2 Height of the font (in twips = 1/20 of a point)
274
+ # 2 2 Option flags:
275
+ # Bit Mask Contents
276
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
277
+ # 1 0x0002 1 = Characters are italic
278
+ # 2 0x0004 1 = Characters are underlined
279
+ # (redundant, see below)
280
+ # 3 0x0008 1 = Characters are struck out
281
+ # 4 0x0010 1 = Characters are outlined (djberger)
282
+ # 5 0x0020 1 = Characters are shadowed (djberger)
283
+ # 4 2 Colour index (➜ 6.70)
284
+ # 6 2 Font weight (100-1000). Standard values are
285
+ # 0x0190 (400) for normal text and
286
+ # 0x02bc (700) for bold text.
287
+ # 8 2 Escapement type: 0x0000 = None
288
+ # 0x0001 = Superscript
289
+ # 0x0002 = Subscript
290
+ # 10 1 Underline type: 0x00 = None
291
+ # 0x01 = Single
292
+ # 0x02 = Double
293
+ # 0x21 = Single accounting
294
+ # 0x22 = Double accounting
295
+ # 11 1 Font family:
296
+ # 0x00 = None (unknown or don't care)
297
+ # 0x01 = Roman (variable width, serifed)
298
+ # 0x02 = Swiss (variable width, sans-serifed)
299
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
300
+ # 0x04 = Script (cursive)
301
+ # 0x05 = Decorative (specialised,
302
+ # for example Old English, Fraktur)
303
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
304
+ # 0x01 = 1 = System default
305
+ # 0x02 = 2 = Symbol
306
+ # 0x4d = 77 = Apple Roman
307
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
308
+ # 0x81 = 129 = ANSI Korean (Hangul)
309
+ # 0x82 = 130 = ANSI Korean (Johab)
310
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
311
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
312
+ # 0xa1 = 161 = ANSI Greek
313
+ # 0xa2 = 162 = ANSI Turkish
314
+ # 0xa3 = 163 = ANSI Vietnamese
315
+ # 0xb1 = 177 = ANSI Hebrew
316
+ # 0xb2 = 178 = ANSI Arabic
317
+ # 0xba = 186 = ANSI Baltic
318
+ # 0xcc = 204 = ANSI Cyrillic
319
+ # 0xde = 222 = ANSI Thai
320
+ # 0xee = 238 = ANSI Latin II (Central European)
321
+ # 0xff = 255 = OEM Latin I
322
+ # 13 1 Not used
323
+ # 14 var. Font name:
324
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
325
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
326
+ name = client read_string(work[14..]), @workbook.encoding
327
+ font = Font.new name
328
+ size, opts, color, font.weight, escapement, underline,
329
+ family, encoding = work.unpack binfmt(:font)
330
+ font.size = size / TWIPS
331
+ font.italic = opts & 0x0002
332
+ font.strikeout = opts & 0x0008
333
+ font.outline = opts & 0x0010
334
+ font.shadow = opts & 0x0020
335
+ font.color = COLOR_CODES[color] || :text
336
+ font.escapement = ESCAPEMENT_TYPES[escapement]
337
+ font.underline = UNDERLINE_TYPES[underline]
338
+ font.family = FONT_FAMILIES[family]
339
+ font.encoding = FONT_ENCODINGS[encoding]
340
+ @workbook.add_font font
341
+ end
342
+
343
+ def read_format work, pos, len
344
+ # Offset Size Contents
345
+ # 0 2 Format index used in other records
346
+ # 2 var. Number format string
347
+ # (Unicode string, 16-bit string length, ➜ 3.4)
348
+ idx, = work.unpack "v"
349
+ value = read_string work[2..], 2
350
+ @formats.store idx, client(value, @workbook.encoding)
351
+ end
352
+
353
+ def read_formula worksheet, addr, work
354
+ # Offset Size Contents
355
+ # 0 2 Index to row
356
+ # 2 2 Index to column
357
+ # 4 2 Index to XF record (➜ 6.115)
358
+ # 6 8 Result of the formula. See below for details.
359
+ # 14 2 Option flags:
360
+ # Bit Mask Contents
361
+ # 0 0x0001 1 = Recalculate always
362
+ # 1 0x0002 1 = Calculate on open
363
+ # 3 0x0008 1 = Part of a shared formula
364
+ # 16 4 Not used
365
+ # 20 var. Formula data (RPN token array, ➜ 4)
366
+ # Offset Size Contents
367
+ # 0 2 Size of the following formula data (sz)
368
+ # 2 sz Formula data (RPN token array)
369
+ # [2+sz] var. (optional) Additional data for specific tokens
370
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
371
+ #
372
+ # Result of the Formula
373
+ # Dependent on the type of value the formula returns, the result field has
374
+ # the following format:
375
+ #
376
+ # Result is a numeric value:
377
+ # Offset Size Contents
378
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
379
+ #
380
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
381
+ # Offset Size Contents
382
+ # 0 1 0x00 (identifier for a string value)
383
+ # 1 5 Not used
384
+ # 6 2 0xffff
385
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
386
+ # special identifier defined (see below).
387
+ #
388
+ # Result is a Boolean value:
389
+ # Offset Size Contents
390
+ # 0 1 0x01 (identifier for a Boolean value)
391
+ # 1 1 Not used
392
+ # 2 1 0 = FALSE, 1 = TRUE
393
+ # 3 3 Not used
394
+ # 6 2 0xffff
395
+ #
396
+ # Result is an error value:
397
+ # Offset Size Contents
398
+ # 0 1 0x02 (identifier for an error value)
399
+ # 1 1 Not used
400
+ # 2 1 Error code (➜ 3.7)
401
+ # 3 3 Not used
402
+ # 6 2 0xffff
403
+ #
404
+ # Result is an empty cell (BIFF8), for example an empty string:
405
+ # Offset Size Contents
406
+ # 0 1 0x03 (identifier for an empty cell)
407
+ # 1 5 Not used
408
+ # 6 2 0xffff
409
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack "v3CxCx3v2"
410
+ formula = Formula.new
411
+ formula.shared = (opts & 0x08) > 0
412
+ formula.data = work[20..]
413
+ if rcheck != 0xffff || rtype > 3
414
+ value, = work.unpack "x6E"
415
+ unless value
416
+ # on architectures where sizeof(double) > 8
417
+ value, = work.unpack "x6e"
418
+ end
419
+ formula.value = value
420
+ elsif rtype == 0
421
+ pos, op, _len, work = get_next_chunk
422
+ if op == :sharedfmla
423
+ ## TODO: formula-support in 0.8.0
424
+ pos, op, _len, work = get_next_chunk
425
+ end
426
+ if op == :string
427
+ formula.value = client read_string(work, 2), @workbook.encoding
428
+ else
429
+ warn "String Value expected after Formula, but got #{op}"
430
+ formula.value = Error.new 0x2a
431
+ @pos = pos
432
+ end
433
+ elsif rtype == 1
434
+ formula.value = rval > 0
435
+ elsif rtype == 2
436
+ formula.value = Error.new rval
819
437
  else
820
- @workbook.date_base = DateTime.new 1899, 12, 31
438
+ # leave the Formula value blank
821
439
  end
822
- when :continue # ○ CONTINUE 6.22
823
- case previous_op
824
- when :sst # ● SST ➜ 6.96
825
- continue_sst work, pos, len
440
+ set_cell worksheet, row, column, xf, formula
441
+ end
442
+
443
+ def read_hlink worksheet, work, pos_unused, len
444
+ # 6.53.1 Common Record Contents
445
+ # Offset Size Contents
446
+ # 0 8 Cell range address of all cells containing this hyperlink
447
+ # (➜ 3.13.1)
448
+ # 8 16 GUID of StdLink:
449
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
450
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
451
+ # 24 4 Unknown value: 0x00000002
452
+ # 28 4 Option flags (see below)
453
+ # Bit Mask Contents
454
+ # 0 0x00000001 0 = No link extant
455
+ # 1 = File link or URL
456
+ # 1 0x00000002 0 = Relative file path
457
+ # 1 = Absolute path or URL
458
+ # 2 and 4 0x00000014 0 = No description
459
+ # 1 (both bits) = Description
460
+ # 3 0x00000008 0 = No text mark
461
+ # 1 = Text mark
462
+ # 7 0x00000080 0 = No target frame
463
+ # 1 = Target frame
464
+ # 8 0x00000100 0 = File link or URL
465
+ # 1 = UNC path (incl. server name)
466
+ #--------------------------------------------------------------------------
467
+ # [32] 4 (optional, see option flags) Character count of description
468
+ # text, including trailing zero word (dl)
469
+ # [36] 2∙dl (optional, see option flags) Character array of description
470
+ # text, no Unicode string header, always 16-bit characters,
471
+ # zero-terminated
472
+ #--------------------------------------------------------------------------
473
+ # [var.] 4 (optional, see option flags) Character count of target
474
+ # frame, including trailing zero word (fl)
475
+ # [var.] 2∙fl (optional, see option flags) Character array of target
476
+ # frame, no Unicode string header, always 16-bit characters,
477
+ # zero-terminated
478
+ #--------------------------------------------------------------------------
479
+ # var. var. Special data (➜ 6.53.2 and following)
480
+ #--------------------------------------------------------------------------
481
+ # [var.] 4 (optional, see option flags) Character count of the text
482
+ # mark, including trailing zero word (tl)
483
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
484
+ # mark without “#” sign, no Unicode string header, always
485
+ # 16-bit characters, zero-terminated
486
+ firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack "v4H32x4V"
487
+ has_link = opts & 0x0001
488
+ desc = opts & 0x0014
489
+ textmark = opts & 0x0008
490
+ target = opts & 0x0080
491
+ unc = opts & 0x0100
492
+ link = Link.new
493
+ _, _ = nil
494
+ pos = 32
495
+ if desc > 0
496
+ description, pos = read_hlink_string work, pos
497
+ link << description
498
+ end
499
+ if target > 0
500
+ link.target_frame, pos = read_hlink_string work, pos
501
+ end
502
+ if unc > 0
503
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
504
+ # These data fields are for UNC paths containing a server name (for
505
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
506
+ # flags field must be 1.x00x.xx112.
507
+ # Offset Size Contents
508
+ # 0 4 Character count of the UNC,
509
+ # including trailing zero word (fl)
510
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
511
+ # always 16-bit characters, zeroterminated.
512
+ link.url, pos = read_hlink_string work, pos
513
+ elsif has_link > 0
514
+ uid, = work.unpack "x#{pos}H32"
515
+ pos += 16
516
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
517
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
518
+ # These data fields occur for links which are not local files or files
519
+ # in the local network (for instance HTTP and FTP links and e-mail
520
+ # addresses). The lower 9 bits of the option flags field must be
521
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
522
+ # GUID could be used to distinguish a URL from a file link.
523
+ # Offset Size Contents
524
+ # 0 16 GUID of URL Moniker:
525
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
526
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
527
+ # 16 4 Size of character array of the URL, including trailing
528
+ # zero word (us). There are us/2-1 characters in the
529
+ # following string.
530
+ # 20 us Character array of the URL, no Unicode string header,
531
+ # always 16-bit characters, zeroterminated
532
+ size, = work.unpack "x#{pos}V"
533
+ pos += 4
534
+ data = work[pos, size].chomp "\000\000"
535
+ link.url = client data
536
+ pos += size
537
+ else
538
+ # 6.53.3 Hyperlink to a Local File
539
+ # These data fields are for links to files on local drives. The path of
540
+ # the file can be complete with drive letter (absolute) or relative to
541
+ # the location of the workbook. The lower 9 bits of the option flags
542
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
543
+ # URL from a file link.
544
+ # Offset Size Contents
545
+ # 0 16 GUID of File Moniker:
546
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
547
+ # (00000303-0000-0000-C000-000000000046)
548
+ # 16 2 Directory up-level count. Each leading “..\” in the
549
+ # file link is deleted and increases this counter.
550
+ # 18 4 Character count of the shortened file path and name,
551
+ # including trailing zero byte (sl)
552
+ # 22 sl Character array of the shortened file path and name in
553
+ # 8.3-DOS-format. This field can be filled with a long
554
+ # file name too. No Unicode string header, always 8-bit
555
+ # characters, zeroterminated.
556
+ # 22+sl 24 Unknown byte sequence:
557
+ # FF FF AD DE 00 00 00 00
558
+ # 00 00 00 00 00 00 00 00
559
+ # 00 00 00 00 00 00 00 00
560
+ # 46+sl 4 Size of the following file link field including string
561
+ # length field and additional data field (sz). If sz is
562
+ # zero, nothing will follow (except a text mark).
563
+ # [50+sl] 4 (optional) Size of character array of the extended file
564
+ # path and name (xl). There are xl/2 characters in the
565
+ # following string.
566
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
567
+ # [56+sl] xl (optional) Character array of the extended file path
568
+ # and name (xl), no Unicode string header, always 16-bit
569
+ # characters, not zero-terminated
570
+ uplevel, count = work.unpack "x#{pos}vV"
571
+ pos += 6
572
+ # TODO: short file path may have any of the OEM encodings. Find out which
573
+ # and use the #client method to convert the encoding.
574
+ prefix = internal("..\\", "UTF-8") * uplevel
575
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
576
+ pos += count + 24
577
+ total, size = work.unpack "x#{pos}V2"
578
+ pos += 10
579
+ if total > 0
580
+ link.url = client work[pos, size]
581
+ pos += size
582
+ end
583
+ end
584
+ else
585
+ # 6.53.5 Hyperlink to the Current Workbook
586
+ # In this case only the text mark field is present (optional with
587
+ # description).
588
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
589
+ # current workbook.
590
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
591
+ end
592
+ if textmark > 0
593
+ link.fragment, _ = read_hlink_string work, pos
594
+ end
595
+ if link.empty?
596
+ link << link.href
597
+ end
598
+ firstrow.upto lastrow do |row|
599
+ firstcol.upto lastcol do |col|
600
+ worksheet.add_link row, col, link
601
+ end
826
602
  end
827
- when :codepage # ○ CODEPAGE ➜ 6.17
828
- read_codepage work, pos, len
829
- when :boundsheet # ●● BOUNDSHEET ➜ 6.12
830
- read_boundsheet work, pos, len
831
- when :xf # ●● XF ➜ 6.115
832
- read_xf work, pos, len
833
- when :sst # ○ Shared String Table ➜ 5.11
834
- # ● SST ➜ 6.96
835
- read_sst work, pos, len
836
- # TODO: implement memory-efficient sst handling, possibly in conjunction
837
- # with EXTSST
838
- when :extsst # ● EXTSST ➜ 6.40
839
- read_extsst work, pos, len
840
- when :style # ●● STYLE ➜ 6.99
841
- read_style work, pos, len
842
- when :format # ○○ FORMAT (Number Format) ➜ 6.45
843
- read_format work, pos, len
844
- when :font # ●● FONT ➜ 6.43
845
- read_font work, pos, len
846
603
  end
847
- previous_op = op unless op == :continue
848
- end
849
- end
850
- def read_worksheet worksheet, offset
851
- @pos = offset
852
- @detected_rows = {}
853
- @noteObjList = []
854
- @noteList = []
855
- @noteObject = nil
856
- previous = nil
857
- while tuple = get_next_chunk
858
- pos, op, len, work = tuple
859
- if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
860
- @current_row_block_offset = nil
861
- offset[1] = pos - offset[0]
604
+
605
+ def read_hlink_string work, pos
606
+ count, = work.unpack "x#{pos}V"
607
+ len = count * 2
608
+ pos += 4
609
+ data = work[pos, len].chomp "\000\000"
610
+ pos += len
611
+ [client(data, "UTF-16LE"), pos]
612
+ end
613
+
614
+ def read_index worksheet, work, pos, len
615
+ # Offset Size Contents
616
+ # 0 4 Not used
617
+ # 4 4 Index to first used row (rf, 0-based)
618
+ # 8 4 Index to first row of unused tail of sheet
619
+ # (rl, last used row + 1, 0-based)
620
+ # 12 4 Absolute stream position of the
621
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
622
+ # record does not exist, the offset points to the record at
623
+ # the position where the DEFCOLWIDTH record would occur.
624
+ # 16 4∙nm Array of nm absolute stream positions to the
625
+ # DBCELL record (➜ 6.26) of each Row Block
626
+ # TODO: use the index if it exists
627
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
628
+ end
629
+
630
+ def read_label worksheet, addr, work
631
+ # Offset Size Contents
632
+ # 0 2 Index to row
633
+ # 2 2 Index to column
634
+ # 4 2 Index to XF record (➜ 6.115)
635
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
636
+ row, column, xf = work.unpack "v3"
637
+ value = client read_string(work[6..], 2), @workbook.encoding
638
+ set_cell worksheet, row, column, xf, value
639
+ end
640
+
641
+ def read_labelsst worksheet, addr, work
642
+ # Offset Size Contents
643
+ # 0 2 Index to row
644
+ # 2 2 Index to column
645
+ # 4 2 Index to XF record (➜ 6.115)
646
+ # 6 4 Index into SST record (➜ 6.96)
647
+ row, column, xf, index = work.unpack binfmt(:labelsst)
648
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
649
+ end
650
+
651
+ def read_mulblank worksheet, addr, work
652
+ # Offset Size Contents
653
+ # 0 2 Index to row
654
+ # 2 2 Index to first column (fc)
655
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
656
+ # 4+2∙nc 2 Index to last column (lc)
657
+ row, column, *xfs = work.unpack "v*"
658
+ xfs.pop #=> last_column
659
+ xfs.each_with_index { |xf, idx| set_cell worksheet, row, column + idx, xf }
660
+ end
661
+
662
+ def read_mulrk worksheet, addr, work
663
+ # Offset Size Contents
664
+ # 0 2 Index to row
665
+ # 2 2 Index to first column (fc)
666
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
667
+ # Offset Size Contents
668
+ # 0 2 Index to XF record (➜ 6.115)
669
+ # 2 4 RK value (➜ 3.6)
670
+ # 4+6∙nc 2 Index to last column (lc)
671
+ row, column = work.unpack "v2"
672
+ 4.step(work.size - 6, 6) do |idx|
673
+ xf, = work.unpack "x#{idx}v"
674
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
675
+ column += 1
676
+ end
677
+ end
678
+
679
+ def read_number worksheet, addr, work
680
+ # Offset Size Contents
681
+ # 0 2 Index to row
682
+ # 2 2 Index to column
683
+ # 4 2 Index to XF record (➜ 6.115)
684
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
685
+ row, column, xf, value = work.unpack binfmt(:number)
686
+ set_cell worksheet, row, column, xf, value
687
+ end
688
+
689
+ def read_rk worksheet, addr, work
690
+ # Offset Size Contents
691
+ # 0 2 Index to row
692
+ # 2 2 Index to column
693
+ # 4 2 Index to XF record (➜ 6.115)
694
+ # 6 4 RK value (➜ 3.6)
695
+ row, column, xf = work.unpack "v3"
696
+ set_cell worksheet, row, column, xf, decode_rk(work[6, 4])
697
+ end
698
+
699
+ def read_row worksheet, addr
700
+ row = addr[:index]
701
+ @current_row_block.fetch [worksheet, row] do
702
+ @current_row_block.clear
703
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
704
+ @pos = addr[:offset]
705
+ found = false
706
+ while (tuple = get_next_chunk)
707
+ pos, op, _, work = tuple
708
+ case op
709
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
710
+ # one Row-Block
711
+ @pos = pos
712
+ return cells
713
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
714
+ return cells
715
+ when :row # ○○ Row Blocks ➜ 5.7
716
+ # ● ROW ➜ 6.83
717
+ # ignore, we already did these in read_worksheet
718
+ return cells if found
719
+ when :blank # BLANK ➜ 6.7
720
+ found = true
721
+ read_blank worksheet, addr, work
722
+ when :boolerr # BOOLERR ➜ 6.10
723
+ found = true
724
+ read_boolerr worksheet, addr, work
725
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
726
+ found = true
727
+ # TODO: implement for BIFF2 support
728
+ when :formula # FORMULA ➜ 6.46
729
+ found = true
730
+ read_formula worksheet, addr, work
731
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
732
+ found = true
733
+ read_label worksheet, addr, work
734
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
735
+ found = true
736
+ read_labelsst worksheet, addr, work
737
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
738
+ found = true
739
+ read_mulblank worksheet, addr, work
740
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
741
+ found = true
742
+ read_mulrk worksheet, addr, work
743
+ when :number # NUMBER ➜ 6.68
744
+ found = true
745
+ read_number worksheet, addr, work
746
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
747
+ found = true
748
+ read_rk worksheet, addr, work
749
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
750
+ found = true
751
+ read_rstring worksheet, addr, work
752
+ end
753
+ end
754
+ cells
755
+ end
756
+ end
757
+
758
+ def read_rstring worksheet, addr, work
759
+ # Offset Size Contents
760
+ # 0 2 Index to row
761
+ # 2 2 Index to column
762
+ # 4 2 Index to XF record (➜ 6.115)
763
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
764
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
765
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
766
+ row, column, xf = work.unpack "v3"
767
+ value = client read_string(work[6..], 2), @workbook.encoding
768
+ set_cell worksheet, row, column, xf, value
769
+ end
770
+
771
+ def read_window2 worksheet, work, pos, len
772
+ # This record contains additional settings for the document window
773
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
774
+ # It is part of the Sheet View Settings Block (➜ 4.5).
775
+ # Offset Size Contents
776
+ # 0 2 Option flags:
777
+ # Bits Mask Contents
778
+ # 0 0x0001 0 = Show formula results
779
+ # 1 = Show formulas
780
+ # 1 0x0002 0 = Do not show grid lines
781
+ # 1 = Show grid lines
782
+ # 2 0x0004 0 = Do not show sheet headers
783
+ # 1 = Show sheet headers
784
+ # 3 0x0008 0 = Panes are not frozen
785
+ # 1 = Panes are frozen (freeze)
786
+ # 4 0x0010 0 = Show zero values as empty cells
787
+ # 1 = Show zero values
788
+ # 5 0x0020 0 = Manual grid line colour
789
+ # 1 = Automatic grid line colour
790
+ # 6 0x0040 0 = Columns from left to right
791
+ # 1 = Columns from right to left
792
+ # 7 0x0080 0 = Do not show outline symbols
793
+ # 1 = Show outline symbols
794
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
795
+ # 1 = Remove splits if pane freeze is removed
796
+ # 9 0x0200 0 = Sheet not selected
797
+ # 1 = Sheet selected (BIFF5-BIFF8)
798
+ # 10 0x0400 0 = Sheet not active
799
+ # 1 = Sheet active (BIFF5-BIFF8)
800
+ # 11 0x0800 0 = Show in normal view
801
+ # 1 = Show in page break preview (BIFF8)
802
+ # 2 2 Index to first visible row
803
+ # 4 2 Index to first visible column
804
+ # 6 2 Colour index of grid line colour (➜ 5.74).
805
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
806
+ # 8 2 Not used
807
+ # 10 2 Cached magnification factor in page break preview (in percent)
808
+ # 0 = Default (60%)
809
+ # 12 2 Cached magnification factor in normal view (in percent)
810
+ # 0 = Default (100%)
811
+ # 14 4 Not used
812
+ flags, _ = work.unpack "v"
813
+ worksheet.selected = flags & 0x0200 > 0
814
+ end
815
+
816
+ def read_merged_cells worksheet, work, pos, len
817
+ # This record contains the addresses of merged cell ranges in the current sheet.
818
+ # Record MERGEDCELLS, BIFF8:
819
+ # Offset Size Contents
820
+ # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
821
+ # If the record size exceeds the limit, it is not continued with a CONTINUE record,
822
+ # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
823
+ # per record results in a maximum number of 1027 merged ranges.
824
+
825
+ worksheet.merged_cells.push(*read_range_address_list(work, len))
826
+ #
827
+ # A cell range address list consists of a field with the number of ranges and the list
828
+ # of the range addresses.
829
+ # Cell range address list, BIFF2-BIFF8:
830
+ # Offset Size Contents
831
+ # 0 2 Number of following cell range addresses (nm)
832
+ # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
833
+ #
834
+ end
835
+
836
+ def read_workbook
837
+ previous_op = nil
838
+ while (tuple = get_next_chunk)
839
+ pos, op, len, work = tuple
840
+ case op
841
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
842
+ return
843
+ when :eof # ● EOF ➜ 6.36
844
+ postread_workbook
845
+ return
846
+ when :datemode # ○ DATEMODE ➜ 6.25
847
+ flag, _ = work.unpack "v"
848
+ @workbook.date_base = if flag == 1
849
+ DateTime.new 1904, 1, 1
850
+ else
851
+ DateTime.new 1899, 12, 31
852
+ end
853
+ when :continue # ○ CONTINUE ➜ 6.22
854
+ case previous_op
855
+ when :sst # ● SST ➜ 6.96
856
+ continue_sst work, pos, len
857
+ end
858
+ when :codepage # ○ CODEPAGE ➜ 6.17
859
+ read_codepage work, pos, len
860
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
861
+ read_boundsheet work, pos, len
862
+ when :xf # ●● XF ➜ 6.115
863
+ read_xf work, pos, len
864
+ when :sst # ○ Shared String Table ➜ 5.11
865
+ # ● SST ➜ 6.96
866
+ read_sst work, pos, len
867
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
868
+ # with EXTSST
869
+ when :extsst # ● EXTSST ➜ 6.40
870
+ read_extsst work, pos, len
871
+ when :style # ●● STYLE ➜ 6.99
872
+ read_style work, pos, len
873
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
874
+ read_format work, pos, len
875
+ when :font # ●● FONT ➜ 6.43
876
+ read_font work, pos, len
877
+ end
878
+ previous_op = op unless op == :continue
879
+ end
880
+ end
881
+
882
+ def read_worksheet worksheet, offset
883
+ @pos = offset
884
+ @detected_rows = {}
885
+ @note_ob_list = []
886
+ @note_list = []
887
+ @note_object = nil
888
+ previous = nil
889
+ while (tuple = get_next_chunk)
890
+ pos, op, len, work = tuple
891
+ if (offset = @current_row_block_offset) && !in_row_block?(op, previous)
892
+ @current_row_block_offset = nil
893
+ offset[1] = pos - offset[0]
894
+ end
895
+ case op
896
+ when :eof # ● EOF ➜ 6.36
897
+ postread_worksheet worksheet
898
+ return
899
+ # when :uncalced # ○ UNCALCED ➜ 6.104
900
+ # TODO: Formula support. Values were not calculated before saving
901
+ # warn <<-EOS
902
+ # Some fields containig formulas were saved without a computed value.
903
+ # Support Spreadsheet::Excel by implementing formula-calculations!
904
+ # EOS
905
+ # when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
906
+ # TODO: if there are changes in rows, omit index when writing
907
+ # read_index worksheet, work, pos, len
908
+ when :guts # GUTS 5.53
909
+ read_guts worksheet, work, pos, len
910
+ when :colinfo # ○○ COLINFO ➜ 6.18
911
+ read_colinfo worksheet, work, pos, len
912
+ when :dimensions # ● DIMENSIONS ➜ 6.31
913
+ read_dimensions worksheet, work, pos, len
914
+ when :row # ○○ Row Blocks ➜ 5.7
915
+ # ● ROW ➜ 6.83
916
+ set_row_address worksheet, work, pos, len
917
+ when :hlink
918
+ read_hlink worksheet, work, pos, len
919
+ when :window2
920
+ read_window2 worksheet, work, pos, len
921
+ when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
922
+ read_merged_cells worksheet, work, pos, len
923
+ when :protect, :password
924
+ read_sheet_protection worksheet, op, work
925
+ when :note # a note references an :obj
926
+ read_note worksheet, work, pos, len
927
+ when :obj # it contains the author in the NTS structure
928
+ _ft, _cb, ot, obj_id = work.unpack("v4")
929
+ if ot == 0x19
930
+ # puts "\nDEBUG: found Note Obj record"
931
+ @note_object = NoteObject.new
932
+ @note_object.obj_id = obj_id
933
+ end
934
+ # p work
935
+ when :drawing # this can be followed by txo in case of a note
936
+ if previous == :obj
937
+ # puts "\nDEBUG: found MsDrawing record"
938
+ # p work
939
+ end
940
+ when :txo # this contains the length of the note text
941
+ if previous == :drawing
942
+ # puts "\nDEBUG: found TxO record"
943
+ # p work
944
+ end
945
+ when :continue # this contains the actual note text
946
+ if previous == :txo && @note_object
947
+ # puts "\nDEBUG: found Continue record"
948
+ continue_fmt = work.unpack("C")
949
+ if continue_fmt.first == 0
950
+ # puts "Picking compressed charset"
951
+ # Skip to offset due to 'v5C' used above
952
+ text = work.unpack("@1C*")
953
+ @note_object.text = text.pack("C*")
954
+ elsif continue_fmt.first == 1
955
+ # puts "Picking uncompressed charset"
956
+ text = work.unpack("@1S*")
957
+ @note_object.text = text.pack("U*")
958
+ end
959
+ @note_ob_list << @note_object
960
+ end
961
+ when :pagesetup
962
+ read_pagesetup(worksheet, work, pos, len)
963
+ when :leftmargin
964
+ worksheet.margins[:left] = work.unpack1(binfmt(:margin))
965
+ when :rightmargin
966
+ worksheet.margins[:right] = work.unpack1(binfmt(:margin))
967
+ when :topmargin
968
+ worksheet.margins[:top] = work.unpack1(binfmt(:margin))
969
+ when :bottommargin
970
+ worksheet.margins[:bottom] = work.unpack1(binfmt(:margin))
971
+ else
972
+ if ROW_BLOCK_OPS.include?(op)
973
+ set_missing_row_address worksheet, work, pos, len
974
+ end
975
+ end
976
+ previous = op
977
+ # previous = op unless op == :continue
978
+ end
979
+ end
980
+
981
+ def read_pagesetup(worksheet, work, pos, len)
982
+ worksheet.pagesetup.delete_if { true }
983
+ data = work.unpack(binfmt(:pagesetup))
984
+ worksheet.pagesetup[:orientation] = (data[5] == 0) ? :landscape : :portrait
985
+ worksheet.pagesetup[:adjust_to] = data[1]
986
+
987
+ worksheet.pagesetup[:orig_data] = data
988
+ # TODO: add options acording to specification
989
+ end
990
+
991
+ def read_guts worksheet, work, pos, len
992
+ # Offset Size Contents
993
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
994
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
995
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
996
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
997
+ width, height, row_level, col_level = work.unpack "v4"
998
+ worksheet.guts[:width] = width
999
+ worksheet.guts[:height] = height
1000
+ worksheet.guts[:row_level] = row_level
1001
+ worksheet.guts[:col_level] = col_level
862
1002
  end
863
- case op
864
- when :eof # ● EOF 6.36
865
- postread_worksheet worksheet
866
- return
867
- #when :uncalced # ○ UNCALCED ➜ 6.104
868
- # TODO: Formula support. Values were not calculated before saving
869
- #warn <<-EOS
870
- # Some fields containig formulas were saved without a computed value.
871
- # Support Spreadsheet::Excel by implementing formula-calculations!
872
- #EOS
873
- #when :index # ○ INDEX5.7 (Row Blocks), ➜ 6.55
874
- # TODO: if there are changes in rows, omit index when writing
875
- #read_index worksheet, work, pos, len
876
- when :guts # GUTS 5.53
877
- read_guts worksheet, work, pos, len
878
- when :colinfo # ○○ COLINFO ➜ 6.18
879
- read_colinfo worksheet, work, pos, len
880
- when :dimensions # ● DIMENSIONS 6.31
881
- read_dimensions worksheet, work, pos, len
882
- when :row # ○○ Row Blocks 5.7
883
- # ● ROW 6.83
884
- set_row_address worksheet, work, pos, len
885
- when :hlink
886
- read_hlink worksheet, work, pos, len
887
- when :window2
888
- read_window2 worksheet, work, pos, len
889
- when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
890
- read_merged_cells worksheet, work, pos, len
891
- when :protect, :password
892
- read_sheet_protection worksheet, op, work
893
- when :note # a note references an :obj
894
- read_note worksheet, work, pos, len
895
- when :obj # it contains the author in the NTS structure
896
- _ft, _cb, _ot, _objID = work.unpack('v4')
897
- if _ot == 0x19
898
- #puts "\nDEBUG: found Note Obj record"
899
- @noteObject = NoteObject.new
900
- @noteObject.objID = _objID
1003
+
1004
+ def read_style work, pos, len
1005
+ # User-Defined Cell Styles:
1006
+ # Offset Size Contents
1007
+ # 0 2 Bit Mask Contents
1008
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
1009
+ # 15 0x8000 Always 0 for user-defined styles
1010
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
1011
+ # 8-bit string length (➜ 3.3)
1012
+ # BIFF8: Non-empty Unicode string,
1013
+ # 16-bit string length (3.4)
1014
+ #
1015
+ # Built-In Cell Styles
1016
+ # Offset Size Contents
1017
+ # 0 2 Bit Mask Contents
1018
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
1019
+ # 15 0x8000 Always 1 for built-in styles
1020
+ # 2 1 Identifier of the built-in cell style:
1021
+ # 0x00 = Normal
1022
+ # 0x01 = RowLevel_lv (see next field)
1023
+ # 0x02 = ColLevel_lv (see next field)
1024
+ # 0x03 = Comma
1025
+ # 0x04 = Currency
1026
+ # 0x05 = Percent
1027
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
1028
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
1029
+ # 0x08 = Hyperlink (BIFF8)
1030
+ # 0x09 = Followed Hyperlink (BIFF8)
1031
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
1032
+ # FFH otherwise
1033
+ flags, = work.unpack "v"
1034
+ xf_idx = flags & 0x0fff
1035
+ xf = @workbook.format xf_idx
1036
+ builtin = flags & 0x8000
1037
+ if builtin == 0
1038
+ xf.name = client read_string(work[2..], 2), @workbook.encoding
1039
+ else
1040
+ id, level = work.unpack "x2C2"
1041
+ if (name = BUILTIN_STYLES[id])
1042
+ name.sub "_lv", "_#{level}"
1043
+ xf.name = client name, "UTF-8"
1044
+ end
901
1045
  end
902
- #p work
903
- when :drawing # this can be followed by txo in case of a note
904
- if previous == :obj
905
- #puts "\nDEBUG: found MsDrawing record"
906
- #p work
1046
+ end
1047
+
1048
+ def read_xf work, pos, len
1049
+ # Offset Size Contents
1050
+ # 0 2 Index to FONT record (➜ 6.43)
1051
+ # 2 2 Index to FORMAT record (➜ 6.45)
1052
+ # 4 2 Bit Mask Contents
1053
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1054
+ # Bit Mask Contents
1055
+ # 0 0x01 1 = Cell is locked
1056
+ # 1 0x02 1 = Formula is hidden
1057
+ # 2 0x04 0 = Cell XF; 1 = Style XF
1058
+ # 15-4 0xfff0 Index to parent style XF
1059
+ # (always 0xfff in style XFs)
1060
+ # 6 1 Bit Mask Contents
1061
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1062
+ # Value Horizontal alignment
1063
+ # 0x00 General
1064
+ # 0x01 Left
1065
+ # 0x02 Centred
1066
+ # 0x03 Right
1067
+ # 0x04 Filled
1068
+ # 0x05 Justified (BIFF4-BIFF8X)
1069
+ # 0x06 Centred across selection
1070
+ # (BIFF4-BIFF8X)
1071
+ # 0x07 Distributed (BIFF8X)
1072
+ # 3 0x08 1 = Text is wrapped at right border
1073
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1074
+ # Value Vertical alignment
1075
+ # 0x00 Top
1076
+ # 0x01 Centred
1077
+ # 0x02 Bottom
1078
+ # 0x03 Justified (BIFF5-BIFF8X)
1079
+ # 0x04 Distributed (BIFF8X)
1080
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
1081
+ # Value Text rotation
1082
+ # 0 Not rotated
1083
+ # 1-90 1 to 90 degrees counterclockwise
1084
+ # 91-180 1 to 90 degrees clockwise
1085
+ # 255 Letters are stacked top-to-bottom,
1086
+ # but not rotated
1087
+ # 8 1 Bit Mask Contents
1088
+ # 3-0 0x0f Indent level
1089
+ # 4 0x10 1 = Shrink content to fit into cell
1090
+ # 5 0x40 1 = Merge Range (djberger)
1091
+ # 7-6 0xc0 Text direction (BIFF8X only)
1092
+ # 0 = According to context
1093
+ # 1 = Left-to-right
1094
+ # 2 = Right-to-left
1095
+ # 9 1 Bit Mask Contents
1096
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1097
+ # Each bit describes the validity of a
1098
+ # specific group of attributes. In cell XFs
1099
+ # a cleared bit means the attributes of the
1100
+ # parent style XF are used (but only if the
1101
+ # attributes are valid there), a set bit
1102
+ # means the attributes of this XF are used.
1103
+ # In style XFs a cleared bit means the
1104
+ # attribute setting is valid, a set bit
1105
+ # means the attribute should be ignored.
1106
+ # Bit Mask Contents
1107
+ # 0 0x01 Flag for number format
1108
+ # 1 0x02 Flag for font
1109
+ # 2 0x04 Flag for horizontal and
1110
+ # vertical alignment, text wrap,
1111
+ # indentation, orientation,
1112
+ # rotation, and text direction
1113
+ # 3 0x08 Flag for border lines
1114
+ # 4 0x10 Flag for background area style
1115
+ # 5 0x20 Flag for cell protection (cell
1116
+ # locked and formula hidden)
1117
+ # 10 4 Cell border lines and background area:
1118
+ # Bit Mask Contents
1119
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
1120
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
1121
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
1122
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1123
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
1124
+ # for left line colour
1125
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
1126
+ # for right line colour
1127
+ # 30 0x40000000 1 = Diagonal line
1128
+ # from top left to right bottom
1129
+ # 31 0x80000000 1 = Diagonal line
1130
+ # from bottom left to right top
1131
+ # 14 4 Bit Mask Contents
1132
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
1133
+ # for top line colour
1134
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
1135
+ # for bottom line colour
1136
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
1137
+ # for diagonal line colour
1138
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1139
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1140
+ # 18 2 Bit Mask Contents
1141
+ # 6-0 0x007f Colour index (➜ 6.70)
1142
+ # for pattern colour
1143
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1144
+ # for pattern background
1145
+ fmt = Format.new
1146
+ font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1147
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1148
+ fmt.number_format = @formats[numfmt]
1149
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1150
+ # with a 0-based index, but all subsequent font indices are 1-based.
1151
+ fmt.font = @workbook.font((font_idx > 3) ? font_idx - 1 : font_idx)
1152
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1153
+ fmt.text_wrap = xf_align & 0x08 > 0
1154
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1155
+ fmt.rotation = if xf_rotation == 255
1156
+ :stacked
1157
+ elsif xf_rotation > 90
1158
+ 90 - xf_rotation
1159
+ else
1160
+ xf_rotation
907
1161
  end
908
- when :txo # this contains the length of the note text
909
- if previous == :drawing
910
- #puts "\nDEBUG: found TxO record"
911
- #p work
1162
+ fmt.indent_level = xf_indent & 0x0f
1163
+ fmt.shrink = xf_indent & 0x10 > 0
1164
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1165
+ fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1166
+ fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1167
+ fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1168
+ fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1169
+ fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1170
+ fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1171
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1172
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1173
+ if xf_brdcolors
1174
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1175
+ fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1176
+ fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1177
+ # fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1178
+ fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
912
1179
  end
913
- when :continue # this contains the actual note text
914
- if previous == :txo && @noteObject
915
- #puts "\nDEBUG: found Continue record"
916
- continueFmt = work.unpack('C')
917
- if (continueFmt.first == 0)
918
- #puts "Picking compressed charset"
919
- #Skip to offset due to 'v5C' used above
920
- _text = work.unpack('@1C*')
921
- @noteObject.text = _text.pack('C*')
922
- elsif (continueFmt.first == 1)
923
- #puts "Picking uncompressed charset"
924
- _text = work.unpack('@1S*')
925
- @noteObject.text = _text.pack('U*')
1180
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1181
+ fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1182
+ @workbook.add_format fmt
1183
+ end
1184
+
1185
+ def read_note worksheet, work, pos, len
1186
+ # puts "\nDEBUG: found a note record in read_worksheet\n"
1187
+ row, col, _, obj_id, obj_auth_en, obj_auth_len_fmt = work.unpack("v5C")
1188
+ if obj_auth_en > 0
1189
+ if obj_auth_len_fmt == 0
1190
+ # puts "Picking compressed charset"
1191
+ # Skip to offset due to 'v5C' used above
1192
+ obj_auth = work.unpack("@11C" + (obj_auth_en - 1).to_s + "C")
1193
+ elsif obj_auth_len_fmt == 1
1194
+ # puts "Picking uncompressed charset"
1195
+ obj_auth = work.unpack("@11S" + (obj_auth_en - 1).to_s + "S")
926
1196
  end
927
- @noteObjList << @noteObject
1197
+ obj_auth = obj_auth.pack("C*")
1198
+ else
1199
+ obj_auth = ""
928
1200
  end
929
- when :pagesetup
930
- read_pagesetup(worksheet, work, pos, len)
931
- when :leftmargin
932
- worksheet.margins[:left] = work.unpack(binfmt(:margin))[0]
933
- when :rightmargin
934
- worksheet.margins[:right] = work.unpack(binfmt(:margin))[0]
935
- when :topmargin
936
- worksheet.margins[:top] = work.unpack(binfmt(:margin))[0]
937
- when :bottommargin
938
- worksheet.margins[:bottom] = work.unpack(binfmt(:margin))[0]
939
- else
940
- if ROW_BLOCK_OPS.include?(op)
941
- set_missing_row_address worksheet, work, pos, len
1201
+ @note = Note.new
1202
+ @note.length = len
1203
+ @note.row = row
1204
+ @note.col = col
1205
+ @note.author = obj_auth
1206
+ @note.obj_id = obj_id
1207
+ # Pop it on the list to be sorted in postread_worksheet
1208
+ @note_list << @note
1209
+ end
1210
+
1211
+ def read_sheet_protection worksheet, op, data
1212
+ case op
1213
+ when :protect
1214
+ worksheet.protect! if data.unpack1("v") == 1
1215
+ when :password
1216
+ worksheet.password_hash = data.unpack1("v")
942
1217
  end
943
1218
  end
944
- previous = op
945
- #previous = op unless op == :continue
946
- end
947
- end
948
1219
 
949
- def read_pagesetup(worksheet, work, pos, len)
950
- worksheet.pagesetup.delete_if { true }
951
- data = work.unpack(binfmt(:pagesetup))
952
- worksheet.pagesetup[:orientation] = data[5] == 0 ? :landscape : :portrait
953
- worksheet.pagesetup[:adjust_to] = data[1]
1220
+ def set_cell worksheet, row, column, xf, value = nil
1221
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1222
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1223
+ cells[column] = value
1224
+ end
954
1225
 
955
- worksheet.pagesetup[:orig_data] = data
956
- # TODO: add options acording to specification
957
- end
1226
+ def set_missing_row_address worksheet, work, pos, len
1227
+ # Offset Size Contents
1228
+ # 0 2 Index of this row
1229
+ # 2 2 Index to this column
1230
+ row_index, _ = work.unpack "v2"
1231
+ unless worksheet.offsets[row_index]
1232
+ @current_row_block_offset ||= [pos]
1233
+ data = {
1234
+ index: row_index,
1235
+ row_block: @current_row_block_offset,
1236
+ offset: @current_row_block_offset[0]
1237
+ }
1238
+ worksheet.set_row_address row_index, data
1239
+ end
1240
+ end
958
1241
 
959
- def read_guts worksheet, work, pos, len
960
- # Offset Size Contents
961
- # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
962
- # 2 2 Height of the area to display column outlines (above the sheet), in pixel
963
- # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
964
- # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
965
- width, height, row_level, col_level = work.unpack 'v4'
966
- worksheet.guts[:width] = width
967
- worksheet.guts[:height] = height
968
- worksheet.guts[:row_level] = row_level
969
- worksheet.guts[:col_level] = col_level
970
- end
971
- def read_style work, pos, len
972
- # User-Defined Cell Styles:
973
- # Offset Size Contents
974
- # 0 2 Bit Mask Contents
975
- # 11-0 0x0fff Index to style XF record (➜ 6.115)
976
- # 15 0x8000 Always 0 for user-defined styles
977
- # 2 var. BIFF2-BIFF7: Non-empty byte string,
978
- # 8-bit string length (➜ 3.3)
979
- # BIFF8: Non-empty Unicode string,
980
- # 16-bit string length (➜ 3.4)
981
- #
982
- # Built-In Cell Styles
983
- # Offset Size Contents
984
- # 0 2 Bit Mask Contents
985
- # 11-0 0x0FFF Index to style XF record (➜ 6.115)
986
- # 15 0x8000 Always 1 for built-in styles
987
- # 2 1 Identifier of the built-in cell style:
988
- # 0x00 = Normal
989
- # 0x01 = RowLevel_lv (see next field)
990
- # 0x02 = ColLevel_lv (see next field)
991
- # 0x03 = Comma
992
- # 0x04 = Currency
993
- # 0x05 = Percent
994
- # 0x06 = Comma [0] (BIFF4-BIFF8)
995
- # 0x07 = Currency [0] (BIFF4-BIFF8)
996
- # 0x08 = Hyperlink (BIFF8)
997
- # 0x09 = Followed Hyperlink (BIFF8)
998
- # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
999
- # FFH otherwise
1000
- flags, = work.unpack 'v'
1001
- xf_idx = flags & 0x0fff
1002
- xf = @workbook.format xf_idx
1003
- builtin = flags & 0x8000
1004
- if builtin == 0
1005
- xf.name = client read_string(work[2..-1], 2), @workbook.encoding
1006
- else
1007
- id, level = work.unpack 'x2C2'
1008
- if name = BUILTIN_STYLES[id]
1009
- name.sub '_lv', "_#{level.to_s}"
1010
- xf.name = client name, 'UTF-8'
1242
+ def set_row_address worksheet, work, pos, len
1243
+ # Offset Size Contents
1244
+ # 0 2 Index of this row
1245
+ # 2 2 Index to column of the first cell which
1246
+ # is described by a cell record
1247
+ # 4 2 Index to column of the last cell which is
1248
+ # described by a cell record, increased by 1
1249
+ # 6 2 Bit Mask Contents
1250
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1251
+ # 15 0x8000 0 = Row has custom height;
1252
+ # 1 = Row has default height
1253
+ # 8 2 Not used
1254
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1255
+ # calculate stream position of the first cell record for this
1256
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1257
+ # anymore, but the DBCELL record (➜ 6.26) instead.
1258
+ # 12 4 Option flags and default row formatting:
1259
+ # Bit Mask Contents
1260
+ # 2-0 0x00000007 Outline level of the row
1261
+ # 4 0x00000010 1 = Outline group starts or ends here
1262
+ # (depending on where the outline
1263
+ # buttons are located, see WSBOOL
1264
+ # record, ➜ 6.113), and is collapsed
1265
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1266
+ # filter or outline group)
1267
+ # 6 0x00000040 1 = Row height and default font height
1268
+ # do not match
1269
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1270
+ # 8 0x00000100 Always 1
1271
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1272
+ # ( 6.115)
1273
+ # 28 0x10000000 1 = Additional space above the row.
1274
+ # This flag is set, if the upper
1275
+ # border of at least one cell in this
1276
+ # row or if the lower border of at
1277
+ # least one cell in the row above is
1278
+ # formatted with a thick line style.
1279
+ # Thin and medium line styles are not
1280
+ # taken into account.
1281
+ # 29 0x20000000 1 = Additional space below the row.
1282
+ # This flag is set, if the lower
1283
+ # border of at least one cell in this
1284
+ # row or if the upper border of at
1285
+ # least one cell in the row below is
1286
+ # formatted with a medium or thick
1287
+ # line style. Thin line styles are
1288
+ # not taken into account.
1289
+ @current_row_block_offset ||= [pos]
1290
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1291
+ height &= 0x7fff
1292
+ format = nil
1293
+ # TODO: read attributes from work[13,3], read flags
1294
+ attrs = {
1295
+ default_format: format,
1296
+ first_used: first_used,
1297
+ first_unused: first_unused,
1298
+ index: index,
1299
+ row_block: @current_row_block_offset,
1300
+ offset: @current_row_block_offset[0],
1301
+ outline_level: flags & 0x00000007,
1302
+ collapsed: (flags & 0x0000010) > 0,
1303
+ hidden: (flags & 0x0000020) > 0
1304
+ }
1305
+ if (flags & 0x00000040) > 0
1306
+ attrs.store :height, height / TWIPS
1307
+ end
1308
+ if (flags & 0x00000080) > 0
1309
+ xf = (flags & 0x0fff0000) >> 16
1310
+ attrs.store :default_format, @workbook.format(xf)
1311
+ end
1312
+ # When a ROW record claims no cells (`first_used` == `first_unused`) but
1313
+ # `set_missing_row_address` already recorded a valid offset from cell
1314
+ # records found earlier in the stream, preserve the original offset.
1315
+ # Some XLS writers emit ROW records after cell data with zeroed column
1316
+ # ranges; without this fix, `read_row` would seek to the wrong position.
1317
+ if first_used == first_unused && (existing = worksheet.row_addresses[index])
1318
+ attrs[:offset] = existing[:offset]
1319
+ attrs[:row_block] = existing[:row_block]
1320
+ end
1321
+ # TODO: Row spacing
1322
+ worksheet.set_row_address index, attrs
1323
+ end
1324
+
1325
+ def setup io
1326
+ ## Reading from StringIO fails without forced encoding
1327
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.3.0")
1328
+ io.set_encoding("ASCII-8BIT")
1329
+ elsif io.respond_to?(:string) && (str = io.string) && str.respond_to?(:force_encoding)
1330
+ str.force_encoding("ASCII-8BIT")
1331
+ end
1332
+ io.rewind
1333
+ @ole = Ole::Storage.open io
1334
+ @workbook = Workbook.new io, {}
1335
+ %w[Book Workbook BOOK WORKBOOK book workbook].any? do |name|
1336
+ @book = begin
1337
+ @ole.file.open(name)
1338
+ rescue
1339
+ false
1340
+ end
1341
+ end
1342
+ raise "could not locate a workbook, possibly an empty file passed" unless @book
1343
+ @data = @book.read
1344
+ read_bof
1345
+ @workbook.ole = @book
1346
+ @workbook.bof = @bof
1347
+ @workbook.version = @version
1348
+ biff = @workbook.biff_version
1349
+ extend_reader biff
1350
+ extend_internals biff
1351
+ end
1352
+
1353
+ private
1354
+
1355
+ def extend_internals version
1356
+ require "spreadsheet/excel/internals/biff%i" % version
1357
+ extend Internals.const_get("Biff%i" % version)
1358
+
1359
+ ## spreadsheets may not include a codepage record.
1360
+ @workbook.encoding = encoding 850 if version < 8
1361
+ rescue LoadError
1362
+ end
1363
+
1364
+ def extend_reader version
1365
+ require "spreadsheet/excel/reader/biff%i" % version
1366
+ extend Reader.const_get("Biff%i" % version)
1367
+ rescue LoadError
1011
1368
  end
1012
1369
  end
1013
1370
  end
1014
- def read_xf work, pos, len
1015
- # Offset Size Contents
1016
- # 0 2 Index to FONT record (➜ 6.43)
1017
- # 2 2 Index to FORMAT record (➜ 6.45)
1018
- # 4 2 Bit Mask Contents
1019
- # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1020
- # Bit Mask Contents
1021
- # 0 0x01 1 = Cell is locked
1022
- # 1 0x02 1 = Formula is hidden
1023
- # 2 0x04 0 = Cell XF; 1 = Style XF
1024
- # 15-4 0xfff0 Index to parent style XF
1025
- # (always 0xfff in style XFs)
1026
- # 6 1 Bit Mask Contents
1027
- # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1028
- # Value Horizontal alignment
1029
- # 0x00 General
1030
- # 0x01 Left
1031
- # 0x02 Centred
1032
- # 0x03 Right
1033
- # 0x04 Filled
1034
- # 0x05 Justified (BIFF4-BIFF8X)
1035
- # 0x06 Centred across selection
1036
- # (BIFF4-BIFF8X)
1037
- # 0x07 Distributed (BIFF8X)
1038
- # 3 0x08 1 = Text is wrapped at right border
1039
- # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1040
- # Value Vertical alignment
1041
- # 0x00 Top
1042
- # 0x01 Centred
1043
- # 0x02 Bottom
1044
- # 0x03 Justified (BIFF5-BIFF8X)
1045
- # 0x04 Distributed (BIFF8X)
1046
- # 7 1 XF_ROTATION: Text rotation angle (see above)
1047
- # Value Text rotation
1048
- # 0 Not rotated
1049
- # 1-90 1 to 90 degrees counterclockwise
1050
- # 91-180 1 to 90 degrees clockwise
1051
- # 255 Letters are stacked top-to-bottom,
1052
- # but not rotated
1053
- # 8 1 Bit Mask Contents
1054
- # 3-0 0x0f Indent level
1055
- # 4 0x10 1 = Shrink content to fit into cell
1056
- # 5 0x40 1 = Merge Range (djberger)
1057
- # 7-6 0xc0 Text direction (BIFF8X only)
1058
- # 0 = According to context
1059
- # 1 = Left-to-right
1060
- # 2 = Right-to-left
1061
- # 9 1 Bit Mask Contents
1062
- # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1063
- # Each bit describes the validity of a
1064
- # specific group of attributes. In cell XFs
1065
- # a cleared bit means the attributes of the
1066
- # parent style XF are used (but only if the
1067
- # attributes are valid there), a set bit
1068
- # means the attributes of this XF are used.
1069
- # In style XFs a cleared bit means the
1070
- # attribute setting is valid, a set bit
1071
- # means the attribute should be ignored.
1072
- # Bit Mask Contents
1073
- # 0 0x01 Flag for number format
1074
- # 1 0x02 Flag for font
1075
- # 2 0x04 Flag for horizontal and
1076
- # vertical alignment, text wrap,
1077
- # indentation, orientation,
1078
- # rotation, and text direction
1079
- # 3 0x08 Flag for border lines
1080
- # 4 0x10 Flag for background area style
1081
- # 5 0x20 Flag for cell protection (cell
1082
- # locked and formula hidden)
1083
- # 10 4 Cell border lines and background area:
1084
- # Bit Mask Contents
1085
- # 3- 0 0x0000000f Left line style (➜ 3.10)
1086
- # 7- 4 0x000000f0 Right line style (➜ 3.10)
1087
- # 11- 8 0x00000f00 Top line style (➜ 3.10)
1088
- # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1089
- # 22-16 0x007f0000 Colour index (➜ 6.70)
1090
- # for left line colour
1091
- # 29-23 0x3f800000 Colour index (➜ 6.70)
1092
- # for right line colour
1093
- # 30 0x40000000 1 = Diagonal line
1094
- # from top left to right bottom
1095
- # 31 0x80000000 1 = Diagonal line
1096
- # from bottom left to right top
1097
- # 14 4 Bit Mask Contents
1098
- # 6- 0 0x0000007f Colour index (➜ 6.70)
1099
- # for top line colour
1100
- # 13- 7 0x00003f80 Colour index (➜ 6.70)
1101
- # for bottom line colour
1102
- # 20-14 0x001fc000 Colour index (➜ 6.70)
1103
- # for diagonal line colour
1104
- # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1105
- # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1106
- # 18 2 Bit Mask Contents
1107
- # 6-0 0x007f Colour index (➜ 6.70)
1108
- # for pattern colour
1109
- # 13-7 0x3f80 Colour index (➜ 6.70)
1110
- # for pattern background
1111
- fmt = Format.new
1112
- font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1113
- xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1114
- fmt.number_format = @formats[numfmt]
1115
- ## this appears to be undocumented: the first 4 fonts seem to be accessed
1116
- # with a 0-based index, but all subsequent font indices are 1-based.
1117
- fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1118
- fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1119
- fmt.text_wrap = xf_align & 0x08 > 0
1120
- fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1121
- fmt.rotation = if xf_rotation == 255
1122
- :stacked
1123
- elsif xf_rotation > 90
1124
- 90 - xf_rotation
1125
- else
1126
- xf_rotation
1127
- end
1128
- fmt.indent_level = xf_indent & 0x0f
1129
- fmt.shrink = xf_indent & 0x10 > 0
1130
- fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1131
- fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1132
- fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1133
- fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1134
- fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1135
- fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1136
- fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1137
- fmt.cross_down = xf_borders & 0x40000000 > 0
1138
- fmt.cross_up = xf_borders & 0x80000000 > 0
1139
- if xf_brdcolors
1140
- fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1141
- fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1142
- fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1143
- #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1144
- fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
1145
- end
1146
- fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1147
- fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1148
- @workbook.add_format fmt
1149
- end
1150
- def read_note worksheet, work, pos, len
1151
- #puts "\nDEBUG: found a note record in read_worksheet\n"
1152
- row, col, _, _objID, _objAuthLen, _objAuthLenFmt = work.unpack('v5C')
1153
- if (_objAuthLen > 0)
1154
- if (_objAuthLenFmt == 0)
1155
- #puts "Picking compressed charset"
1156
- #Skip to offset due to 'v5C' used above
1157
- _objAuth = work.unpack('@11C' + (_objAuthLen-1).to_s + 'C')
1158
- elsif (_objAuthLenFmt == 1)
1159
- #puts "Picking uncompressed charset"
1160
- _objAuth = work.unpack('@11S' + (_objAuthLen-1).to_s + 'S')
1161
- end
1162
- _objAuth = _objAuth.pack('C*')
1163
- else
1164
- _objAuth = ""
1165
- end
1166
- @note = Note.new
1167
- @note.length = len
1168
- @note.row = row
1169
- @note.col = col
1170
- @note.author = _objAuth
1171
- @note.objID = _objID
1172
- #Pop it on the list to be sorted in postread_worksheet
1173
- @noteList << @note
1174
- end
1175
- def read_sheet_protection worksheet, op, data
1176
- case op
1177
- when :protect
1178
- worksheet.protect! if data.unpack('v').first == 1
1179
- when :password
1180
- worksheet.password_hash = data.unpack('v').first
1181
- end
1182
- end
1183
- def set_cell worksheet, row, column, xf, value=nil
1184
- cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1185
- cells.formats[column] = @workbook.format(xf) unless xf == 0
1186
- cells[column] = value
1187
- end
1188
- def set_missing_row_address worksheet, work, pos, len
1189
- # Offset Size Contents
1190
- # 0 2 Index of this row
1191
- # 2 2 Index to this column
1192
- row_index, _ = work.unpack 'v2'
1193
- unless worksheet.offsets[row_index]
1194
- @current_row_block_offset ||= [pos]
1195
- data = {
1196
- :index => row_index,
1197
- :row_block => @current_row_block_offset,
1198
- :offset => @current_row_block_offset[0],
1199
- }
1200
- worksheet.set_row_address row_index, data
1201
- end
1202
- end
1203
- def set_row_address worksheet, work, pos, len
1204
- # Offset Size Contents
1205
- # 0 2 Index of this row
1206
- # 2 2 Index to column of the first cell which
1207
- # is described by a cell record
1208
- # 4 2 Index to column of the last cell which is
1209
- # described by a cell record, increased by 1
1210
- # 6 2 Bit Mask Contents
1211
- # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1212
- # 15 0x8000 0 = Row has custom height;
1213
- # 1 = Row has default height
1214
- # 8 2 Not used
1215
- # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1216
- # calculate stream position of the first cell record for this
1217
- # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1218
- # anymore, but the DBCELL record (➜ 6.26) instead.
1219
- # 12 4 Option flags and default row formatting:
1220
- # Bit Mask Contents
1221
- # 2-0 0x00000007 Outline level of the row
1222
- # 4 0x00000010 1 = Outline group starts or ends here
1223
- # (depending on where the outline
1224
- # buttons are located, see WSBOOL
1225
- # record, ➜ 6.113), and is collapsed
1226
- # 5 0x00000020 1 = Row is hidden (manually, or by a
1227
- # filter or outline group)
1228
- # 6 0x00000040 1 = Row height and default font height
1229
- # do not match
1230
- # 7 0x00000080 1 = Row has explicit default format (fl)
1231
- # 8 0x00000100 Always 1
1232
- # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1233
- # (➜ 6.115)
1234
- # 28 0x10000000 1 = Additional space above the row.
1235
- # This flag is set, if the upper
1236
- # border of at least one cell in this
1237
- # row or if the lower border of at
1238
- # least one cell in the row above is
1239
- # formatted with a thick line style.
1240
- # Thin and medium line styles are not
1241
- # taken into account.
1242
- # 29 0x20000000 1 = Additional space below the row.
1243
- # This flag is set, if the lower
1244
- # border of at least one cell in this
1245
- # row or if the upper border of at
1246
- # least one cell in the row below is
1247
- # formatted with a medium or thick
1248
- # line style. Thin line styles are
1249
- # not taken into account.
1250
- @current_row_block_offset ||= [pos]
1251
- index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1252
- height &= 0x7fff
1253
- format = nil
1254
- # TODO: read attributes from work[13,3], read flags
1255
- attrs = {
1256
- :default_format => format,
1257
- :first_used => first_used,
1258
- :first_unused => first_unused,
1259
- :index => index,
1260
- :row_block => @current_row_block_offset,
1261
- :offset => @current_row_block_offset[0],
1262
- :outline_level => flags & 0x00000007,
1263
- :collapsed => (flags & 0x0000010) > 0,
1264
- :hidden => (flags & 0x0000020) > 0,
1265
- }
1266
- if (flags & 0x00000040) > 0
1267
- attrs.store :height, height / TWIPS
1268
- end
1269
- if (flags & 0x00000080) > 0
1270
- xf = (flags & 0x0fff0000) >> 16
1271
- attrs.store :default_format, @workbook.format(xf)
1272
- end
1273
- # TODO: Row spacing
1274
- worksheet.set_row_address index, attrs
1275
- end
1276
- def setup io
1277
- ## Reading from StringIO fails without forced encoding
1278
- if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3.0')
1279
- io.set_encoding('ASCII-8BIT')
1280
- elsif io.respond_to?(:string) && (str = io.string) && str.respond_to?(:force_encoding)
1281
- str.force_encoding('ASCII-8BIT')
1282
- end
1283
- io.rewind
1284
- @ole = Ole::Storage.open io
1285
- @workbook = Workbook.new io, {}
1286
- %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1287
- @book = @ole.file.open(name) rescue false
1288
- end
1289
- raise RuntimeError, "could not locate a workbook, possibly an empty file passed" unless @book
1290
- @data = @book.read
1291
- read_bof
1292
- @workbook.ole = @book
1293
- @workbook.bof = @bof
1294
- @workbook.version = @version
1295
- biff = @workbook.biff_version
1296
- extend_reader biff
1297
- extend_internals biff
1298
- end
1299
- private
1300
- def extend_internals version
1301
- require 'spreadsheet/excel/internals/biff%i' % version
1302
- extend Internals.const_get('Biff%i' % version)
1303
- ## spreadsheets may not include a codepage record.
1304
- @workbook.encoding = encoding 850 if version < 8
1305
- rescue LoadError
1306
- end
1307
- def extend_reader version
1308
- require 'spreadsheet/excel/reader/biff%i' % version
1309
- extend Reader.const_get('Biff%i' % version)
1310
- rescue LoadError
1311
- end
1312
- end
1313
- end
1314
1371
  end