spreadsheet 1.3.2 → 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/lib/parseexcel/parseexcel.rb +66 -58
  3. data/lib/parseexcel/parser.rb +1 -1
  4. data/lib/parseexcel.rb +1 -1
  5. data/lib/spreadsheet/column.rb +11 -9
  6. data/lib/spreadsheet/compatibility.rb +3 -1
  7. data/lib/spreadsheet/datatypes.rb +149 -147
  8. data/lib/spreadsheet/encodings.rb +20 -16
  9. data/lib/spreadsheet/errors.rb +2 -2
  10. data/lib/spreadsheet/excel/error.rb +23 -22
  11. data/lib/spreadsheet/excel/internals/biff5.rb +11 -11
  12. data/lib/spreadsheet/excel/internals/biff8.rb +13 -13
  13. data/lib/spreadsheet/excel/internals.rb +451 -451
  14. data/lib/spreadsheet/excel/offset.rb +32 -31
  15. data/lib/spreadsheet/excel/password_hash.rb +18 -18
  16. data/lib/spreadsheet/excel/reader/biff5.rb +34 -35
  17. data/lib/spreadsheet/excel/reader/biff8.rb +234 -222
  18. data/lib/spreadsheet/excel/reader.rb +1320 -1274
  19. data/lib/spreadsheet/excel/rgb.rb +91 -91
  20. data/lib/spreadsheet/excel/row.rb +99 -91
  21. data/lib/spreadsheet/excel/sst_entry.rb +40 -38
  22. data/lib/spreadsheet/excel/workbook.rb +86 -76
  23. data/lib/spreadsheet/excel/worksheet.rb +125 -107
  24. data/lib/spreadsheet/excel/writer/biff8.rb +56 -55
  25. data/lib/spreadsheet/excel/writer/format.rb +273 -256
  26. data/lib/spreadsheet/excel/writer/n_worksheet.rb +837 -798
  27. data/lib/spreadsheet/excel/writer/workbook.rb +671 -635
  28. data/lib/spreadsheet/excel/writer/worksheet.rb +898 -861
  29. data/lib/spreadsheet/excel/writer.rb +1 -1
  30. data/lib/spreadsheet/excel.rb +18 -11
  31. data/lib/spreadsheet/font.rb +30 -26
  32. data/lib/spreadsheet/format.rb +74 -59
  33. data/lib/spreadsheet/link.rb +7 -5
  34. data/lib/spreadsheet/note.rb +6 -6
  35. data/lib/spreadsheet/noteObject.rb +5 -5
  36. data/lib/spreadsheet/row.rb +33 -23
  37. data/lib/spreadsheet/version.rb +1 -1
  38. data/lib/spreadsheet/workbook.rb +27 -13
  39. data/lib/spreadsheet/worksheet.rb +102 -68
  40. data/lib/spreadsheet/writer.rb +3 -0
  41. data/lib/spreadsheet.rb +12 -15
  42. data/test/excel/reader.rb +8 -8
  43. data/test/excel/row.rb +35 -31
  44. data/test/excel/writer/workbook.rb +18 -16
  45. data/test/excel/writer/worksheet.rb +10 -8
  46. data/test/font.rb +44 -32
  47. data/test/format.rb +38 -33
  48. data/test/integration.rb +627 -598
  49. data/test/row.rb +5 -3
  50. data/test/suite.rb +7 -7
  51. data/test/workbook.rb +15 -14
  52. data/test/workbook_protection.rb +5 -5
  53. data/test/worksheet.rb +36 -34
  54. metadata +50 -8
@@ -1,1314 +1,1360 @@
1
- require 'spreadsheet/encodings'
2
- require 'spreadsheet/font'
3
- require 'spreadsheet/formula'
4
- require 'spreadsheet/link'
5
- require 'spreadsheet/note'
6
- require 'spreadsheet/noteObject'
7
- require 'spreadsheet/excel/error'
8
- require 'spreadsheet/excel/internals'
9
- require 'spreadsheet/excel/sst_entry'
10
- require 'spreadsheet/excel/worksheet'
1
+ require "spreadsheet/encodings"
2
+ require "spreadsheet/font"
3
+ require "spreadsheet/formula"
4
+ require "spreadsheet/link"
5
+ require "spreadsheet/note"
6
+ require "spreadsheet/noteObject"
7
+ require "spreadsheet/excel/error"
8
+ require "spreadsheet/excel/internals"
9
+ require "spreadsheet/excel/sst_entry"
10
+ require "spreadsheet/excel/worksheet"
11
11
 
12
12
  module Spreadsheet
13
13
  module Excel
14
- ##
15
- # Reader class for Excel Workbooks. Most read_* method correspond to an
16
- # Excel-Record/Opcode. You should not need to call any of its methods
17
- # directly. If you think you do, look at #read
18
- class Reader
19
- include Spreadsheet::Encodings
20
- include Spreadsheet::Excel::Internals
21
- ROW_BLOCK_OPS = {
22
- :blank => true, :boolerr => true, :dbcell => true, :formula => true,
23
- :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
24
- :number => true, :rk => true, :rstring => true,
25
- }
26
- def initialize opts = {}
27
- @pos = 0
28
- @bigendian = opts.fetch(:bigendian) {
29
- [1].pack('l') != "\001\000\000\000"
30
- }
31
- @opts = opts
32
- @boundsheets = nil
33
- @current_row_block = {}
34
- @current_row_block_offset = nil
35
- @formats = {}
36
- BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
37
- end
38
- def decode_rk work
39
- # Bit Mask Contents
40
- # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
41
- # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
42
- # 31-2 0xFFFFFFFC Encoded value
43
- #
44
- # If bit 1 is cleared, the encoded value represents the 30 most significant
45
- # bits of an IEEE 754 floating-point value (64-bit double precision). The
46
- # 34 least significant bits must be set to zero. If bit 1 is set, the
47
- # encoded value represents a signed 30-bit integer value. To get the
48
- # correct integer, the encoded value has to be shifted right arithmetically
49
- # by 2 bits. If bit 0 is set, the decoded value (both integer and
50
- # floating-point) must be divided by 100 to get the final result.
51
- flags, = work.unpack 'C'
52
- cent = flags & 1
53
- int = flags & 2
54
- value = 0
55
- if int == 0
56
- ## remove two bits
57
- integer, = work.unpack 'V'
58
- integer &= 0xfffffffc
59
- value, = ("\0\0\0\0" + [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
60
- else
61
- ## I can't find a format for unpacking a little endian signed integer.
62
- # 'V' works for packing, but not for unpacking. But the following works
63
- # fine afaics:
64
- unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
65
- ## remove two bits
66
- value = unsigned >> 2
67
- end
68
- if cent == 1
69
- value /= 100.0
70
- end
71
- value
72
- end
73
- def encoding codepage_id
74
- name = CODEPAGES.fetch(codepage_id) do
75
- raise Spreadsheet::Errors::UnknownCodepage, "Unknown Codepage 0x%04x" % codepage_id
76
- end
14
+ ##
15
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
16
+ # Excel-Record/Opcode. You should not need to call any of its methods
17
+ # directly. If you think you do, look at #read
18
+ class Reader
19
+ include Spreadsheet::Encodings
20
+ include Spreadsheet::Excel::Internals
21
+ ROW_BLOCK_OPS = {
22
+ blank: true, boolerr: true, dbcell: true, formula: true,
23
+ label: true, labelsst: true, mulblank: true, mulrk: true,
24
+ number: true, rk: true, rstring: true
25
+ }
26
+ def initialize opts = {}
27
+ @pos = 0
28
+ @bigendian = opts.fetch(:bigendian) {
29
+ [1].pack("l") != "\001\000\000\000"
30
+ }
31
+ @opts = opts
32
+ @boundsheets = nil
33
+ @current_row_block = {}
34
+ @current_row_block_offset = nil
35
+ @formats = {}
36
+ BUILTIN_FORMATS.each { |key, fmt| @formats.store key, client(fmt, "UTF-8") }
37
+ end
77
38
 
78
- if RUBY_VERSION >= '1.9'
79
- begin
80
- Encoding.find name
81
- rescue ArgumentError
82
- raise Spreadsheet::Errors::UnsupportedEncoding, "Unsupported encoding with name '#{name}'"
39
+ def decode_rk work
40
+ # Bit Mask Contents
41
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
42
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
43
+ # 31-2 0xFFFFFFFC Encoded value
44
+ #
45
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
46
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
47
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
48
+ # encoded value represents a signed 30-bit integer value. To get the
49
+ # correct integer, the encoded value has to be shifted right arithmetically
50
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
51
+ # floating-point) must be divided by 100 to get the final result.
52
+ flags, = work.unpack "C"
53
+ cent = flags & 1
54
+ int = flags & 2
55
+ value = 0
56
+ if int == 0
57
+ ## remove two bits
58
+ integer, = work.unpack "V"
59
+ integer &= 0xfffffffc
60
+ value, = ("\0\0\0\0" + [integer].pack("V")).unpack EIGHT_BYTE_DOUBLE
61
+ else
62
+ ## I can't find a format for unpacking a little endian signed integer.
63
+ # 'V' works for packing, but not for unpacking. But the following works
64
+ # fine afaics:
65
+ unsigned, = (@bigendian ? work.reverse : work).unpack "l"
66
+ ## remove two bits
67
+ value = unsigned >> 2
68
+ end
69
+ if cent == 1
70
+ value /= 100.0
71
+ end
72
+ value
83
73
  end
84
- else
85
- name
86
- end
87
- end
88
- def get_next_chunk
89
- pos = @pos
90
- if pos < @data.size
91
- op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
92
- @pos += OPCODE_SIZE
93
- if len
94
- work = @data[@pos,len]
95
- @pos += len
96
- code = SEDOCPO.fetch(op, op)
97
- if io = @opts[:print_opcodes]
98
- io.puts sprintf("0x%04x/%-16s %5i: %s",
99
- op, code.inspect, len, work.inspect)
74
+
75
+ def encoding codepage_id
76
+ name = CODEPAGES.fetch(codepage_id) do
77
+ raise Spreadsheet::Errors::UnknownCodepage, "Unknown Codepage 0x%04x" % codepage_id
78
+ end
79
+
80
+ if RUBY_VERSION >= "1.9"
81
+ begin
82
+ Encoding.find name
83
+ rescue ArgumentError
84
+ raise Spreadsheet::Errors::UnsupportedEncoding, "Unsupported encoding with name '#{name}'"
85
+ end
86
+ else
87
+ name
100
88
  end
101
- [ pos, code, len + OPCODE_SIZE, work]
102
89
  end
103
- end
104
- end
105
- def in_row_block? op, previous
106
- if op == :row
107
- previous == op
108
- else
109
- ROW_BLOCK_OPS.include?(op)
110
- end
111
- end
112
- def memoize?
113
- @opts[:memoization]
114
- end
115
- def postread_workbook
116
- sheets = @workbook.worksheets
117
- sheets.each_with_index do |sheet, idx|
118
- offset = sheet.offset
119
- nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
120
- @workbook.offsets.store sheet, [offset, nxt - offset]
121
- end
122
- end
123
- def postread_worksheet worksheet
124
- #We now have a lot of Note and NoteObjects, but they're not linked
125
- #So link the noteObject(text) to the note (with author, position)
126
- #TODO
127
- @noteList.each do |i|
128
- matching_objs = @noteObjList.select { |j| j.objID == i.objID }
129
- if matching_objs.length > 1
130
- puts "ERROR - more than one matching object ID!"
90
+
91
+ def get_next_chunk
92
+ pos = @pos
93
+ if pos < @data.size
94
+ op, len = @data[@pos, OPCODE_SIZE].unpack("v2")
95
+ @pos += OPCODE_SIZE
96
+ if len
97
+ work = @data[@pos, len]
98
+ @pos += len
99
+ code = SEDOCPO.fetch(op, op)
100
+ if (io = @opts[:print_opcodes])
101
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
102
+ op, code.inspect, len, work.inspect)
103
+ end
104
+ [pos, code, len + OPCODE_SIZE, work]
105
+ end
131
106
  end
132
- matching_obj = matching_objs.first
133
- i.text = matching_obj.nil? ? '' : matching_obj.text
134
- worksheet.add_note i.row, i.col, i.text
135
- end
136
- end
137
- ##
138
- # The entry-point for reading Excel-documents. Reads the Biff-Version and
139
- # loads additional reader-methods before proceeding with parsing the document.
140
- def read io
141
- setup io
142
- read_workbook
143
- @workbook.default_format = @workbook.format 0
144
- @workbook.changes.clear
145
- @workbook
146
- end
147
- def read_blank worksheet, addr, work
148
- # Offset Size Contents
149
- # 0 2 Index to row
150
- # 2 2 Index to column
151
- # 4 2 Index to XF record (➜ 6.115)
152
- row, column, xf = work.unpack binfmt(:blank)
153
- set_cell worksheet, row, column, xf
154
- end
155
- def read_bof
156
- # Offset Size Contents
157
- # 0 2 BIFF version (always 0x0600 for BIFF8)
158
- # 2 2 Type of the following data: 0x0005 = Workbook globals
159
- # 0x0006 = Visual Basic module
160
- # 0x0010 = Worksheet
161
- # 0x0020 = Chart
162
- # 0x0040 = Macro sheet
163
- # 0x0100 = Workspace file
164
- # 4 2 Build identifier
165
- # 6 2 Build year
166
- # 8 4 File history flags
167
- # 12 4 Lowest Excel version that can read all records in this file
168
- _, @bof, _, work = get_next_chunk
169
- ## version and datatype are common to all Excel-Versions. Later versions
170
- # have additional information such as build-id and -year (from BIFF5).
171
- # These are ignored for the time being.
172
- version, datatype = work.unpack('v2')
173
- if datatype == 0x5
174
- @version = version
175
- end
176
- end
177
- def read_boolerr worksheet, addr, work
178
- # Offset Size Contents
179
- # 0 2 Index to row
180
- # 2 2 Index to column
181
- # 4 2 Index to XF record (➜ 6.115)
182
- # 6 1 Boolean or error value (type depends on the following byte)
183
- # 7 1 0 = Boolean value; 1 = Error code
184
- row, column, xf, value, error = work.unpack 'v3C2'
185
- set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
186
- end
187
- def read_boundsheet work, pos, len
188
- # Offset Size Contents
189
- # 0 4 Absolute stream position of the BOF record of the sheet
190
- # represented by this record. This field is never encrypted
191
- # in protected files.
192
- # 4 1 Visibility: 0x00 = Visible
193
- # 0x01 = Hidden
194
- # 0x02 = Strong hidden (see below)
195
- # 5 1 Sheet type: 0x00 = Worksheet
196
- # 0x02 = Chart
197
- # 0x06 = Visual Basic module
198
- # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
199
- # 8-bit string length (➜ 3.3)
200
- # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
201
- offset, visibility, _ = work.unpack("VC2")
202
- name = client read_string(work[6..-1]), @workbook.encoding
203
- if @boundsheets
204
- @boundsheets[0] += 1
205
- @boundsheets[2] += len
206
- else
207
- @boundsheets = [1, pos, len]
208
- end
209
- @workbook.set_boundsheets(*@boundsheets)
210
- @workbook.add_worksheet Worksheet.new(:name => name,
211
- :ole => @book,
212
- :offset => offset,
213
- :reader => self,
214
- :visibility => WORKSHEET_VISIBILITIES[visibility])
215
- end
216
- def read_codepage work, pos, len
217
- codepage, _ = work.unpack 'v'
218
- @workbook.set_encoding encoding(codepage), pos, len
219
- end
220
- def read_colinfo worksheet, work, pos, len
221
- # Offset Size Contents
222
- # 0 2 Index to first column in the range
223
- # 2 2 Index to last column in the range
224
- # 4 2 Width of the columns in 1/256 of the width of the zero
225
- # character, using default font (first FONT record in the
226
- # file)
227
- # 6 2 Index to XF record (➜ 6.115) for default column formatting
228
- # 8 2 Option flags:
229
- # Bits Mask Contents
230
- # 0 0x0001 1 = Columns are hidden
231
- # 10-8 0x0700 Outline level of the columns (0 = no outline)
232
- # 12 0x1000 1 = Columns are collapsed
233
- # 10 2 Not used
234
- first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
235
- first.upto last do |col|
236
- column = Column.new col, @workbook.format(xf),
237
- :width => width.to_f / 256,
238
- :hidden => (opts & 0x0001) > 0,
239
- :collapsed => (opts & 0x1000) > 0,
240
- :outline_level => (opts & 0x0700) / 256
241
- column.worksheet = worksheet
242
- worksheet.columns[col] = column
243
- end
244
- end
245
- def read_dimensions worksheet, work, pos, len
246
- # Offset Size Contents
247
- # 0 4 Index to first used row
248
- # 4 4 Index to last used row, increased by 1
249
- # 8 2 Index to first used column
250
- # 10 2 Index to last used column, increased by 1
251
- # 12 2 Not used
252
- worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
253
- end
254
- def read_font work, pos, len
255
- # Offset Size Contents
256
- # 0 2 Height of the font (in twips = 1/20 of a point)
257
- # 2 2 Option flags:
258
- # Bit Mask Contents
259
- # 0 0x0001 1 = Characters are bold (redundant, see below)
260
- # 1 0x0002 1 = Characters are italic
261
- # 2 0x0004 1 = Characters are underlined
262
- # (redundant, see below)
263
- # 3 0x0008 1 = Characters are struck out
264
- # 4 0x0010 1 = Characters are outlined (djberger)
265
- # 5 0x0020 1 = Characters are shadowed (djberger)
266
- # 4 2 Colour index (➜ 6.70)
267
- # 6 2 Font weight (100-1000). Standard values are
268
- # 0x0190 (400) for normal text and
269
- # 0x02bc (700) for bold text.
270
- # 8 2 Escapement type: 0x0000 = None
271
- # 0x0001 = Superscript
272
- # 0x0002 = Subscript
273
- # 10 1 Underline type: 0x00 = None
274
- # 0x01 = Single
275
- # 0x02 = Double
276
- # 0x21 = Single accounting
277
- # 0x22 = Double accounting
278
- # 11 1 Font family:
279
- # 0x00 = None (unknown or don't care)
280
- # 0x01 = Roman (variable width, serifed)
281
- # 0x02 = Swiss (variable width, sans-serifed)
282
- # 0x03 = Modern (fixed width, serifed or sans-serifed)
283
- # 0x04 = Script (cursive)
284
- # 0x05 = Decorative (specialised,
285
- # for example Old English, Fraktur)
286
- # 12 1 Character set: 0x00 = 0 = ANSI Latin
287
- # 0x01 = 1 = System default
288
- # 0x02 = 2 = Symbol
289
- # 0x4d = 77 = Apple Roman
290
- # 0x80 = 128 = ANSI Japanese Shift-JIS
291
- # 0x81 = 129 = ANSI Korean (Hangul)
292
- # 0x82 = 130 = ANSI Korean (Johab)
293
- # 0x86 = 134 = ANSI Chinese Simplified GBK
294
- # 0x88 = 136 = ANSI Chinese Traditional BIG5
295
- # 0xa1 = 161 = ANSI Greek
296
- # 0xa2 = 162 = ANSI Turkish
297
- # 0xa3 = 163 = ANSI Vietnamese
298
- # 0xb1 = 177 = ANSI Hebrew
299
- # 0xb2 = 178 = ANSI Arabic
300
- # 0xba = 186 = ANSI Baltic
301
- # 0xcc = 204 = ANSI Cyrillic
302
- # 0xde = 222 = ANSI Thai
303
- # 0xee = 238 = ANSI Latin II (Central European)
304
- # 0xff = 255 = OEM Latin I
305
- # 13 1 Not used
306
- # 14 var. Font name:
307
- # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
308
- # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
309
- name = client read_string(work[14..-1]), @workbook.encoding
310
- font = Font.new name
311
- size, opts, color, font.weight, escapement, underline,
312
- family, encoding = work.unpack binfmt(:font)
313
- font.size = size / TWIPS
314
- font.italic = opts & 0x0002
315
- font.strikeout = opts & 0x0008
316
- font.outline = opts & 0x0010
317
- font.shadow = opts & 0x0020
318
- font.color = COLOR_CODES[color] || :text
319
- font.escapement = ESCAPEMENT_TYPES[escapement]
320
- font.underline = UNDERLINE_TYPES[underline]
321
- font.family = FONT_FAMILIES[family]
322
- font.encoding = FONT_ENCODINGS[encoding]
323
- @workbook.add_font font
324
- end
325
- def read_format work, pos, len
326
- # Offset Size Contents
327
- # 0 2 Format index used in other records
328
- # 2 var. Number format string
329
- # (Unicode string, 16-bit string length, ➜ 3.4)
330
- idx, = work.unpack 'v'
331
- value = read_string work[2..-1], 2
332
- @formats.store idx, client(value, @workbook.encoding)
333
- end
334
- def read_formula worksheet, addr, work
335
- # Offset Size Contents
336
- # 0 2 Index to row
337
- # 2 2 Index to column
338
- # 4 2 Index to XF record (➜ 6.115)
339
- # 6 8 Result of the formula. See below for details.
340
- # 14 2 Option flags:
341
- # Bit Mask Contents
342
- # 0 0x0001 1 = Recalculate always
343
- # 1 0x0002 1 = Calculate on open
344
- # 3 0x0008 1 = Part of a shared formula
345
- # 16 4 Not used
346
- # 20 var. Formula data (RPN token array, ➜ 4)
347
- # Offset Size Contents
348
- # 0 2 Size of the following formula data (sz)
349
- # 2 sz Formula data (RPN token array)
350
- # [2+sz] var. (optional) Additional data for specific tokens
351
- # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
352
- #
353
- # Result of the Formula
354
- # Dependent on the type of value the formula returns, the result field has
355
- # the following format:
356
- #
357
- # Result is a numeric value:
358
- # Offset Size Contents
359
- # 0 8 IEEE 754 floating-point value (64-bit double precision)
360
- #
361
- # Result is a string (the string follows in a STRING record, ➜ 6.98):
362
- # Offset Size Contents
363
- # 0 1 0x00 (identifier for a string value)
364
- # 1 5 Not used
365
- # 6 2 0xffff
366
- # Note: In BIFF8 the string must not be empty. For empty cells there is a
367
- # special identifier defined (see below).
368
- #
369
- # Result is a Boolean value:
370
- # Offset Size Contents
371
- # 0 1 0x01 (identifier for a Boolean value)
372
- # 1 1 Not used
373
- # 2 1 0 = FALSE, 1 = TRUE
374
- # 3 3 Not used
375
- # 6 2 0xffff
376
- #
377
- # Result is an error value:
378
- # Offset Size Contents
379
- # 0 1 0x02 (identifier for an error value)
380
- # 1 1 Not used
381
- # 2 1 Error code (➜ 3.7)
382
- # 3 3 Not used
383
- # 6 2 0xffff
384
- #
385
- # Result is an empty cell (BIFF8), for example an empty string:
386
- # Offset Size Contents
387
- # 0 1 0x03 (identifier for an empty cell)
388
- # 1 5 Not used
389
- # 6 2 0xffff
390
- row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
391
- formula = Formula.new
392
- formula.shared = (opts & 0x08) > 0
393
- formula.data = work[20..-1]
394
- if rcheck != 0xffff || rtype > 3
395
- value, = work.unpack 'x6E'
396
- unless value
397
- # on architectures where sizeof(double) > 8
398
- value, = work.unpack 'x6e'
399
107
  end
400
- formula.value = value
401
- elsif rtype == 0
402
- pos, op, _len, work = get_next_chunk
403
- if op == :sharedfmla
404
- ## TODO: formula-support in 0.8.0
405
- pos, op, _len, work = get_next_chunk
108
+
109
+ def in_row_block? op, previous
110
+ if op == :row
111
+ previous == op
112
+ else
113
+ ROW_BLOCK_OPS.include?(op)
114
+ end
406
115
  end
407
- if op == :string
408
- formula.value = client read_string(work, 2), @workbook.encoding
409
- else
410
- warn "String Value expected after Formula, but got #{op}"
411
- formula.value = Error.new 0x2a
412
- @pos = pos
116
+
117
+ def memoize?
118
+ @opts[:memoization]
413
119
  end
414
- elsif rtype == 1
415
- formula.value = rval > 0
416
- elsif rtype == 2
417
- formula.value = Error.new rval
418
- else
419
- # leave the Formula value blank
420
- end
421
- set_cell worksheet, row, column, xf, formula
422
- end
423
- def read_hlink worksheet, work, pos, len
424
- # 6.53.1 Common Record Contents
425
- # Offset Size Contents
426
- # 0 8 Cell range address of all cells containing this hyperlink
427
- # (➜ 3.13.1)
428
- # 8 16 GUID of StdLink:
429
- # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
430
- # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
431
- # 24 4 Unknown value: 0x00000002
432
- # 28 4 Option flags (see below)
433
- # Bit Mask Contents
434
- # 0 0x00000001 0 = No link extant
435
- # 1 = File link or URL
436
- # 1 0x00000002 0 = Relative file path
437
- # 1 = Absolute path or URL
438
- # 2 and 4 0x00000014 0 = No description
439
- # 1 (both bits) = Description
440
- # 3 0x00000008 0 = No text mark
441
- # 1 = Text mark
442
- # 7 0x00000080 0 = No target frame
443
- # 1 = Target frame
444
- # 8 0x00000100 0 = File link or URL
445
- # 1 = UNC path (incl. server name)
446
- #--------------------------------------------------------------------------
447
- # [32] 4 (optional, see option flags) Character count of description
448
- # text, including trailing zero word (dl)
449
- # [36] 2∙dl (optional, see option flags) Character array of description
450
- # text, no Unicode string header, always 16-bit characters,
451
- # zero-terminated
452
- #--------------------------------------------------------------------------
453
- # [var.] 4 (optional, see option flags) Character count of target
454
- # frame, including trailing zero word (fl)
455
- # [var.] 2∙fl (optional, see option flags) Character array of target
456
- # frame, no Unicode string header, always 16-bit characters,
457
- # zero-terminated
458
- #--------------------------------------------------------------------------
459
- # var. var. Special data (➜ 6.53.2 and following)
460
- #--------------------------------------------------------------------------
461
- # [var.] 4 (optional, see option flags) Character count of the text
462
- # mark, including trailing zero word (tl)
463
- # [var.] 2∙tl (optional, see option flags) Character array of the text
464
- # mark without “#” sign, no Unicode string header, always
465
- # 16-bit characters, zero-terminated
466
- firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack 'v4H32x4V'
467
- has_link = opts & 0x0001
468
- desc = opts & 0x0014
469
- textmark = opts & 0x0008
470
- target = opts & 0x0080
471
- unc = opts & 0x0100
472
- link = Link.new
473
- _, description = nil
474
- pos = 32
475
- if desc > 0
476
- description, pos = read_hlink_string work, pos
477
- link << description
478
- end
479
- if target > 0
480
- link.target_frame, pos = read_hlink_string work, pos
481
- end
482
- if unc > 0
483
- # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
484
- # These data fields are for UNC paths containing a server name (for
485
- # instance “\\server\path\file.xls”). The lower 9 bits of the option
486
- # flags field must be 1.x00x.xx112.
487
- # Offset Size Contents
488
- # 0 4 Character count of the UNC,
489
- # including trailing zero word (fl)
490
- # 4 2∙fl Character array of the UNC, no Unicode string header,
491
- # always 16-bit characters, zeroterminated.
492
- link.url, pos = read_hlink_string work, pos
493
- elsif has_link > 0
494
- uid, = work.unpack "x#{pos}H32"
495
- pos += 16
496
- if uid == "e0c9ea79f9bace118c8200aa004ba90b"
497
- # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
498
- # These data fields occur for links which are not local files or files
499
- # in the local network (for instance HTTP and FTP links and e-mail
500
- # addresses). The lower 9 bits of the option flags field must be
501
- # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
502
- # GUID could be used to distinguish a URL from a file link.
120
+
121
+ def postread_workbook
122
+ sheets = @workbook.worksheets
123
+ sheets.each_with_index do |sheet, idx|
124
+ offset = sheet.offset
125
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
126
+ @workbook.offsets.store sheet, [offset, nxt - offset]
127
+ end
128
+ end
129
+
130
+ def postread_worksheet worksheet
131
+ # We now have a lot of Note and NoteObjects, but they're not linked
132
+ # So link the noteObject(text) to the note (with author, position)
133
+ # TODO
134
+ @note_list.each do |i|
135
+ matching_objs = @note_ob_list.select { |j| j.obj_id == i.obj_id }
136
+ if matching_objs.length > 1
137
+ puts "ERROR - more than one matching object ID!"
138
+ end
139
+ matching_obj = matching_objs.first
140
+ i.text = matching_obj.nil? ? "" : matching_obj.text
141
+ worksheet.add_note i.row, i.col, i.text
142
+ end
143
+ end
144
+
145
+ ##
146
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
147
+ # loads additional reader-methods before proceeding with parsing the document.
148
+ def read io
149
+ setup io
150
+ read_workbook
151
+ @workbook.default_format = @workbook.format 0
152
+ @workbook.changes.clear
153
+ @workbook
154
+ end
155
+
156
+ def read_blank worksheet, addr, work
503
157
  # Offset Size Contents
504
- # 0 16 GUID of URL Moniker:
505
- # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
506
- # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
507
- # 16 4 Size of character array of the URL, including trailing
508
- # zero word (us). There are us/2-1 characters in the
509
- # following string.
510
- # 20 us Character array of the URL, no Unicode string header,
511
- # always 16-bit characters, zeroterminated
512
- size, = work.unpack "x#{pos}V"
513
- pos += 4
514
- data = work[pos, size].chomp "\000\000"
515
- link.url = client data
516
- pos += size
517
- else
518
- # 6.53.3 Hyperlink to a Local File
519
- # These data fields are for links to files on local drives. The path of
520
- # the file can be complete with drive letter (absolute) or relative to
521
- # the location of the workbook. The lower 9 bits of the option flags
522
- # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
523
- # URL from a file link.
158
+ # 0 2 Index to row
159
+ # 2 2 Index to column
160
+ # 4 2 Index to XF record (➜ 6.115)
161
+ row, column, xf = work.unpack binfmt(:blank)
162
+ set_cell worksheet, row, column, xf
163
+ end
164
+
165
+ def read_bof
524
166
  # Offset Size Contents
525
- # 0 16 GUID of File Moniker:
526
- # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
527
- # (00000303-0000-0000-C000-000000000046)
528
- # 16 2 Directory up-level count. Each leading “..\” in the
529
- # file link is deleted and increases this counter.
530
- # 18 4 Character count of the shortened file path and name,
531
- # including trailing zero byte (sl)
532
- # 22 sl Character array of the shortened file path and name in
533
- # 8.3-DOS-format. This field can be filled with a long
534
- # file name too. No Unicode string header, always 8-bit
535
- # characters, zeroterminated.
536
- # 22+sl 24 Unknown byte sequence:
537
- # FF FF AD DE 00 00 00 00
538
- # 00 00 00 00 00 00 00 00
539
- # 00 00 00 00 00 00 00 00
540
- # 46+sl 4 Size of the following file link field including string
541
- # length field and additional data field (sz). If sz is
542
- # zero, nothing will follow (except a text mark).
543
- # [50+sl] 4 (optional) Size of character array of the extended file
544
- # path and name (xl). There are xl/2 characters in the
545
- # following string.
546
- # [54+sl] 2 (optional) Unknown byte sequence: 03 00
547
- # [56+sl] xl (optional) Character array of the extended file path
548
- # and name (xl), no Unicode string header, always 16-bit
549
- # characters, not zero-terminated
550
- uplevel, count = work.unpack "x#{pos}vV"
551
- pos += 6
552
- # TODO: short file path may have any of the OEM encodings. Find out which
553
- # and use the #client method to convert the encoding.
554
- prefix = internal('..\\', 'UTF-8') * uplevel
555
- link.dos = link.url = prefix << work[pos, count].chomp("\000")
556
- pos += count + 24
557
- total, size = work.unpack "x#{pos}V2"
558
- pos += 10
559
- if total > 0
560
- link.url = client work[pos, size]
561
- pos += size
167
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
168
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
169
+ # 0x0006 = Visual Basic module
170
+ # 0x0010 = Worksheet
171
+ # 0x0020 = Chart
172
+ # 0x0040 = Macro sheet
173
+ # 0x0100 = Workspace file
174
+ # 4 2 Build identifier
175
+ # 6 2 Build year
176
+ # 8 4 File history flags
177
+ # 12 4 Lowest Excel version that can read all records in this file
178
+ _, @bof, _, work = get_next_chunk
179
+ ## version and datatype are common to all Excel-Versions. Later versions
180
+ # have additional information such as build-id and -year (from BIFF5).
181
+ # These are ignored for the time being.
182
+ version, datatype = work.unpack("v2")
183
+ if datatype == 0x5
184
+ @version = version
562
185
  end
563
186
  end
564
- else
565
- # 6.53.5 Hyperlink to the Current Workbook
566
- # In this case only the text mark field is present (optional with
567
- # description).
568
- # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
569
- # current workbook.
570
- # The lower 9 bits of the option flags field must be 0.x00x.1x002.
571
- end
572
- if textmark > 0
573
- link.fragment, _ = read_hlink_string work, pos
574
- end
575
- if link.empty?
576
- link << link.href
577
- end
578
- firstrow.upto lastrow do |row|
579
- firstcol.upto lastcol do |col|
580
- worksheet.add_link row, col, link
187
+
188
+ def read_boolerr worksheet, addr, work
189
+ # Offset Size Contents
190
+ # 0 2 Index to row
191
+ # 2 2 Index to column
192
+ # 4 2 Index to XF record (➜ 6.115)
193
+ # 6 1 Boolean or error value (type depends on the following byte)
194
+ # 7 1 0 = Boolean value; 1 = Error code
195
+ row, column, xf, value, error = work.unpack "v3C2"
196
+ set_cell worksheet, row, column, xf, (error == 0) ? value > 0 : Error.new(value)
581
197
  end
582
- end
583
- end
584
- def read_hlink_string work, pos
585
- count, = work.unpack "x#{pos}V"
586
- len = count * 2
587
- pos += 4
588
- data = work[pos, len].chomp "\000\000"
589
- pos += len
590
- [client(data, 'UTF-16LE'), pos]
591
- end
592
- def read_index worksheet, work, pos, len
593
- # Offset Size Contents
594
- # 0 4 Not used
595
- # 4 4 Index to first used row (rf, 0-based)
596
- # 8 4 Index to first row of unused tail of sheet
597
- # (rl, last used row + 1, 0-based)
598
- # 12 4 Absolute stream position of the
599
- # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
600
- # record does not exist, the offset points to the record at
601
- # the position where the DEFCOLWIDTH record would occur.
602
- # 16 4∙nm Array of nm absolute stream positions to the
603
- # DBCELL record (➜ 6.26) of each Row Block
604
- # TODO: use the index if it exists
605
- # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
606
- end
607
- def read_label worksheet, addr, work
608
- # Offset Size Contents
609
- # 0 2 Index to row
610
- # 2 2 Index to column
611
- # 4 2 Index to XF record (➜ 6.115)
612
- # 6 var. Unicode string, 16-bit string length (➜ 3.4)
613
- row, column, xf = work.unpack 'v3'
614
- value = client read_string(work[6..-1], 2), @workbook.encoding
615
- set_cell worksheet, row, column, xf, value
616
- end
617
- def read_labelsst worksheet, addr, work
618
- # Offset Size Contents
619
- # 0 2 Index to row
620
- # 2 2 Index to column
621
- # 4 2 Index to XF record (➜ 6.115)
622
- # 6 4 Index into SST record (➜ 6.96)
623
- row, column, xf, index = work.unpack binfmt(:labelsst)
624
- set_cell worksheet, row, column, xf, worksheet.shared_string(index)
625
- end
626
- def read_mulblank worksheet, addr, work
627
- # Offset Size Contents
628
- # 0 2 Index to row
629
- # 2 2 Index to first column (fc)
630
- # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
631
- # 4+2∙nc 2 Index to last column (lc)
632
- row, column, *xfs = work.unpack 'v*'
633
- xfs.pop #=> last_column
634
- xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
635
- end
636
- def read_mulrk worksheet, addr, work
637
- # Offset Size Contents
638
- # 0 2 Index to row
639
- # 2 2 Index to first column (fc)
640
- # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
641
- # Offset Size Contents
642
- # 0 2 Index to XF record (➜ 6.115)
643
- # 2 4 RK value (➜ 3.6)
644
- # 4+6∙nc 2 Index to last column (lc)
645
- row, column = work.unpack 'v2'
646
- 4.step(work.size - 6, 6) do |idx|
647
- xf, = work.unpack "x#{idx}v"
648
- set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
649
- column += 1
650
- end
651
- end
652
- def read_number worksheet, addr, work
653
- # Offset Size Contents
654
- # 0 2 Index to row
655
- # 2 2 Index to column
656
- # 4 2 Index to XF record (➜ 6.115)
657
- # 6 8 IEEE 754 floating-point value (64-bit double precision)
658
- row, column, xf, value = work.unpack binfmt(:number)
659
- set_cell worksheet, row, column, xf, value
660
- end
661
- def read_rk worksheet, addr, work
662
- # Offset Size Contents
663
- # 0 2 Index to row
664
- # 2 2 Index to column
665
- # 4 2 Index to XF record (➜ 6.115)
666
- # 6 4 RK value (➜ 3.6)
667
- row, column, xf = work.unpack 'v3'
668
- set_cell worksheet, row, column, xf, decode_rk(work[6,4])
669
- end
670
- def read_row worksheet, addr
671
- row = addr[:index]
672
- @current_row_block.fetch [worksheet, row] do
673
- @current_row_block.clear
674
- cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
675
- @pos = addr[:offset]
676
- found = false
677
- while tuple = get_next_chunk
678
- pos, op, _, work = tuple
679
- case op
680
- when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
681
- # one Row-Block
682
- @pos = pos
683
- return cells
684
- when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
685
- return cells
686
- when :row # ○○ Row Blocks ➜ 5.7
687
- # ● ROW ➜ 6.83
688
- # ignore, we already did these in read_worksheet
689
- return cells if found
690
- when :blank # BLANK ➜ 6.7
691
- found = true
692
- read_blank worksheet, addr, work
693
- when :boolerr # BOOLERR ➜ 6.10
694
- found = true
695
- read_boolerr worksheet, addr, work
696
- when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
697
- found = true
698
- # TODO: implement for BIFF2 support
699
- when :formula # FORMULA ➜ 6.46
700
- found = true
701
- read_formula worksheet, addr, work
702
- when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
703
- found = true
704
- read_label worksheet, addr, work
705
- when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
706
- found = true
707
- read_labelsst worksheet, addr, work
708
- when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
709
- found = true
710
- read_mulblank worksheet, addr, work
711
- when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
712
- found = true
713
- read_mulrk worksheet, addr, work
714
- when :number # NUMBER ➜ 6.68
715
- found = true
716
- read_number worksheet, addr, work
717
- when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
718
- found = true
719
- read_rk worksheet, addr, work
720
- when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
721
- found = true
722
- read_rstring worksheet, addr, work
198
+
199
+ def read_boundsheet work, pos, len
200
+ # Offset Size Contents
201
+ # 0 4 Absolute stream position of the BOF record of the sheet
202
+ # represented by this record. This field is never encrypted
203
+ # in protected files.
204
+ # 4 1 Visibility: 0x00 = Visible
205
+ # 0x01 = Hidden
206
+ # 0x02 = Strong hidden (see below)
207
+ # 5 1 Sheet type: 0x00 = Worksheet
208
+ # 0x02 = Chart
209
+ # 0x06 = Visual Basic module
210
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
211
+ # 8-bit string length ( 3.3)
212
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
213
+ offset, visibility, _ = work.unpack("VC2")
214
+ name = client read_string(work[6..]), @workbook.encoding
215
+ if @boundsheets
216
+ @boundsheets[0] += 1
217
+ @boundsheets[2] += len
218
+ else
219
+ @boundsheets = [1, pos, len]
723
220
  end
221
+ @workbook.set_boundsheets(*@boundsheets)
222
+ @workbook.add_worksheet Worksheet.new(name: name,
223
+ ole: @book,
224
+ offset: offset,
225
+ reader: self,
226
+ visibility: WORKSHEET_VISIBILITIES[visibility])
724
227
  end
725
- cells
726
- end
727
- end
728
- def read_rstring worksheet, addr, work
729
- # Offset Size Contents
730
- # 0 2 Index to row
731
- # 2 2 Index to column
732
- # 4 2 Index to XF record (➜ 6.115)
733
- # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
734
- # 6+sz 2 Number of Rich-Text formatting runs (rt)
735
- # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
736
- row, column, xf = work.unpack 'v3'
737
- value = client read_string(work[6..-1], 2), @workbook.encoding
738
- set_cell worksheet, row, column, xf, value
739
- end
740
- def read_window2 worksheet, work, pos, len
741
- # This record contains additional settings for the document window
742
- # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
743
- # It is part of the Sheet View Settings Block (➜ 4.5).
744
- # Offset Size Contents
745
- # 0 2 Option flags:
746
- # Bits Mask Contents
747
- # 0 0x0001 0 = Show formula results
748
- # 1 = Show formulas
749
- # 1 0x0002 0 = Do not show grid lines
750
- # 1 = Show grid lines
751
- # 2 0x0004 0 = Do not show sheet headers
752
- # 1 = Show sheet headers
753
- # 3 0x0008 0 = Panes are not frozen
754
- # 1 = Panes are frozen (freeze)
755
- # 4 0x0010 0 = Show zero values as empty cells
756
- # 1 = Show zero values
757
- # 5 0x0020 0 = Manual grid line colour
758
- # 1 = Automatic grid line colour
759
- # 6 0x0040 0 = Columns from left to right
760
- # 1 = Columns from right to left
761
- # 7 0x0080 0 = Do not show outline symbols
762
- # 1 = Show outline symbols
763
- # 8 0x0100 0 = Keep splits if pane freeze is removed
764
- # 1 = Remove splits if pane freeze is removed
765
- # 9 0x0200 0 = Sheet not selected
766
- # 1 = Sheet selected (BIFF5-BIFF8)
767
- # 10 0x0400 0 = Sheet not active
768
- # 1 = Sheet active (BIFF5-BIFF8)
769
- # 11 0x0800 0 = Show in normal view
770
- # 1 = Show in page break preview (BIFF8)
771
- # 2 2 Index to first visible row
772
- # 4 2 Index to first visible column
773
- # 6 2 Colour index of grid line colour (➜ 5.74).
774
- # Note that in BIFF2-BIFF5 an RGB colour is written instead.
775
- # 8 2 Not used
776
- # 10 2 Cached magnification factor in page break preview (in percent)
777
- # 0 = Default (60%)
778
- # 12 2 Cached magnification factor in normal view (in percent)
779
- # 0 = Default (100%)
780
- # 14 4 Not used
781
- flags, _ = work.unpack 'v'
782
- worksheet.selected = flags & 0x0200 > 0
783
- end
784
228
 
785
- def read_merged_cells worksheet, work, pos, len
786
- # This record contains the addresses of merged cell ranges in the current sheet.
787
- # Record MERGEDCELLS, BIFF8:
788
- # Offset Size Contents
789
- # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
790
- # If the record size exceeds the limit, it is not continued with a CONTINUE record,
791
- # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
792
- # per record results in a maximum number of 1027 merged ranges.
229
+ def read_codepage work, pos, len
230
+ codepage, _ = work.unpack "v"
231
+ @workbook.set_encoding encoding(codepage), pos, len
232
+ end
793
233
 
794
- worksheet.merged_cells.push(*read_range_address_list(work, len))
795
- #
796
- # A cell range address list consists of a field with the number of ranges and the list
797
- # of the range addresses.
798
- # Cell range address list, BIFF2-BIFF8:
799
- # Offset Size Contents
800
- # 0 2 Number of following cell range addresses (nm)
801
- # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
802
- #
803
- end
234
+ def read_colinfo worksheet, work, pos, len
235
+ # Offset Size Contents
236
+ # 0 2 Index to first column in the range
237
+ # 2 2 Index to last column in the range
238
+ # 4 2 Width of the columns in 1/256 of the width of the zero
239
+ # character, using default font (first FONT record in the
240
+ # file)
241
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
242
+ # 8 2 Option flags:
243
+ # Bits Mask Contents
244
+ # 0 0x0001 1 = Columns are hidden
245
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
246
+ # 12 0x1000 1 = Columns are collapsed
247
+ # 10 2 Not used
248
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
249
+ first.upto last do |col|
250
+ column = Column.new col, @workbook.format(xf),
251
+ width: width.to_f / 256,
252
+ hidden: (opts & 0x0001) > 0,
253
+ collapsed: (opts & 0x1000) > 0,
254
+ outline_level: (opts & 0x0700) / 256
255
+ column.worksheet = worksheet
256
+ worksheet.columns[col] = column
257
+ end
258
+ end
259
+
260
+ def read_dimensions worksheet, work, pos, len
261
+ # Offset Size Contents
262
+ # 0 4 Index to first used row
263
+ # 4 4 Index to last used row, increased by 1
264
+ # 8 2 Index to first used column
265
+ # 10 2 Index to last used column, increased by 1
266
+ # 12 2 Not used
267
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
268
+ end
269
+
270
+ def read_font work, pos, len
271
+ # Offset Size Contents
272
+ # 0 2 Height of the font (in twips = 1/20 of a point)
273
+ # 2 2 Option flags:
274
+ # Bit Mask Contents
275
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
276
+ # 1 0x0002 1 = Characters are italic
277
+ # 2 0x0004 1 = Characters are underlined
278
+ # (redundant, see below)
279
+ # 3 0x0008 1 = Characters are struck out
280
+ # 4 0x0010 1 = Characters are outlined (djberger)
281
+ # 5 0x0020 1 = Characters are shadowed (djberger)
282
+ # 4 2 Colour index (➜ 6.70)
283
+ # 6 2 Font weight (100-1000). Standard values are
284
+ # 0x0190 (400) for normal text and
285
+ # 0x02bc (700) for bold text.
286
+ # 8 2 Escapement type: 0x0000 = None
287
+ # 0x0001 = Superscript
288
+ # 0x0002 = Subscript
289
+ # 10 1 Underline type: 0x00 = None
290
+ # 0x01 = Single
291
+ # 0x02 = Double
292
+ # 0x21 = Single accounting
293
+ # 0x22 = Double accounting
294
+ # 11 1 Font family:
295
+ # 0x00 = None (unknown or don't care)
296
+ # 0x01 = Roman (variable width, serifed)
297
+ # 0x02 = Swiss (variable width, sans-serifed)
298
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
299
+ # 0x04 = Script (cursive)
300
+ # 0x05 = Decorative (specialised,
301
+ # for example Old English, Fraktur)
302
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
303
+ # 0x01 = 1 = System default
304
+ # 0x02 = 2 = Symbol
305
+ # 0x4d = 77 = Apple Roman
306
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
307
+ # 0x81 = 129 = ANSI Korean (Hangul)
308
+ # 0x82 = 130 = ANSI Korean (Johab)
309
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
310
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
311
+ # 0xa1 = 161 = ANSI Greek
312
+ # 0xa2 = 162 = ANSI Turkish
313
+ # 0xa3 = 163 = ANSI Vietnamese
314
+ # 0xb1 = 177 = ANSI Hebrew
315
+ # 0xb2 = 178 = ANSI Arabic
316
+ # 0xba = 186 = ANSI Baltic
317
+ # 0xcc = 204 = ANSI Cyrillic
318
+ # 0xde = 222 = ANSI Thai
319
+ # 0xee = 238 = ANSI Latin II (Central European)
320
+ # 0xff = 255 = OEM Latin I
321
+ # 13 1 Not used
322
+ # 14 var. Font name:
323
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
324
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
325
+ name = client read_string(work[14..]), @workbook.encoding
326
+ font = Font.new name
327
+ size, opts, color, font.weight, escapement, underline,
328
+ family, encoding = work.unpack binfmt(:font)
329
+ font.size = size / TWIPS
330
+ font.italic = opts & 0x0002
331
+ font.strikeout = opts & 0x0008
332
+ font.outline = opts & 0x0010
333
+ font.shadow = opts & 0x0020
334
+ font.color = COLOR_CODES[color] || :text
335
+ font.escapement = ESCAPEMENT_TYPES[escapement]
336
+ font.underline = UNDERLINE_TYPES[underline]
337
+ font.family = FONT_FAMILIES[family]
338
+ font.encoding = FONT_ENCODINGS[encoding]
339
+ @workbook.add_font font
340
+ end
341
+
342
+ def read_format work, pos, len
343
+ # Offset Size Contents
344
+ # 0 2 Format index used in other records
345
+ # 2 var. Number format string
346
+ # (Unicode string, 16-bit string length, ➜ 3.4)
347
+ idx, = work.unpack "v"
348
+ value = read_string work[2..], 2
349
+ @formats.store idx, client(value, @workbook.encoding)
350
+ end
351
+
352
+ def read_formula worksheet, addr, work
353
+ # Offset Size Contents
354
+ # 0 2 Index to row
355
+ # 2 2 Index to column
356
+ # 4 2 Index to XF record (➜ 6.115)
357
+ # 6 8 Result of the formula. See below for details.
358
+ # 14 2 Option flags:
359
+ # Bit Mask Contents
360
+ # 0 0x0001 1 = Recalculate always
361
+ # 1 0x0002 1 = Calculate on open
362
+ # 3 0x0008 1 = Part of a shared formula
363
+ # 16 4 Not used
364
+ # 20 var. Formula data (RPN token array, ➜ 4)
365
+ # Offset Size Contents
366
+ # 0 2 Size of the following formula data (sz)
367
+ # 2 sz Formula data (RPN token array)
368
+ # [2+sz] var. (optional) Additional data for specific tokens
369
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
370
+ #
371
+ # Result of the Formula
372
+ # Dependent on the type of value the formula returns, the result field has
373
+ # the following format:
374
+ #
375
+ # Result is a numeric value:
376
+ # Offset Size Contents
377
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
378
+ #
379
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
380
+ # Offset Size Contents
381
+ # 0 1 0x00 (identifier for a string value)
382
+ # 1 5 Not used
383
+ # 6 2 0xffff
384
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
385
+ # special identifier defined (see below).
386
+ #
387
+ # Result is a Boolean value:
388
+ # Offset Size Contents
389
+ # 0 1 0x01 (identifier for a Boolean value)
390
+ # 1 1 Not used
391
+ # 2 1 0 = FALSE, 1 = TRUE
392
+ # 3 3 Not used
393
+ # 6 2 0xffff
394
+ #
395
+ # Result is an error value:
396
+ # Offset Size Contents
397
+ # 0 1 0x02 (identifier for an error value)
398
+ # 1 1 Not used
399
+ # 2 1 Error code (➜ 3.7)
400
+ # 3 3 Not used
401
+ # 6 2 0xffff
402
+ #
403
+ # Result is an empty cell (BIFF8), for example an empty string:
404
+ # Offset Size Contents
405
+ # 0 1 0x03 (identifier for an empty cell)
406
+ # 1 5 Not used
407
+ # 6 2 0xffff
408
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack "v3CxCx3v2"
409
+ formula = Formula.new
410
+ formula.shared = (opts & 0x08) > 0
411
+ formula.data = work[20..]
412
+ if rcheck != 0xffff || rtype > 3
413
+ value, = work.unpack "x6E"
414
+ unless value
415
+ # on architectures where sizeof(double) > 8
416
+ value, = work.unpack "x6e"
417
+ end
418
+ formula.value = value
419
+ elsif rtype == 0
420
+ pos, op, _len, work = get_next_chunk
421
+ if op == :sharedfmla
422
+ ## TODO: formula-support in 0.8.0
423
+ pos, op, _len, work = get_next_chunk
424
+ end
425
+ if op == :string
426
+ formula.value = client read_string(work, 2), @workbook.encoding
427
+ else
428
+ warn "String Value expected after Formula, but got #{op}"
429
+ formula.value = Error.new 0x2a
430
+ @pos = pos
431
+ end
432
+ elsif rtype == 1
433
+ formula.value = rval > 0
434
+ elsif rtype == 2
435
+ formula.value = Error.new rval
436
+ else
437
+ # leave the Formula value blank
438
+ end
439
+ set_cell worksheet, row, column, xf, formula
440
+ end
804
441
 
805
- def read_workbook
806
- previous_op = nil
807
- while tuple = get_next_chunk
808
- pos, op, len, work = tuple
809
- case op
810
- when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
811
- return
812
- when :eof # ● EOF ➜ 6.36
813
- postread_workbook
814
- return
815
- when :datemode # DATEMODE ➜ 6.25
816
- flag, _ = work.unpack 'v'
817
- if flag == 1
818
- @workbook.date_base = DateTime.new 1904, 1, 1
442
+ def read_hlink worksheet, work, pos_unused, len
443
+ # 6.53.1 Common Record Contents
444
+ # Offset Size Contents
445
+ # 0 8 Cell range address of all cells containing this hyperlink
446
+ # (➜ 3.13.1)
447
+ # 8 16 GUID of StdLink:
448
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
449
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
450
+ # 24 4 Unknown value: 0x00000002
451
+ # 28 4 Option flags (see below)
452
+ # Bit Mask Contents
453
+ # 0 0x00000001 0 = No link extant
454
+ # 1 = File link or URL
455
+ # 1 0x00000002 0 = Relative file path
456
+ # 1 = Absolute path or URL
457
+ # 2 and 4 0x00000014 0 = No description
458
+ # 1 (both bits) = Description
459
+ # 3 0x00000008 0 = No text mark
460
+ # 1 = Text mark
461
+ # 7 0x00000080 0 = No target frame
462
+ # 1 = Target frame
463
+ # 8 0x00000100 0 = File link or URL
464
+ # 1 = UNC path (incl. server name)
465
+ #--------------------------------------------------------------------------
466
+ # [32] 4 (optional, see option flags) Character count of description
467
+ # text, including trailing zero word (dl)
468
+ # [36] 2∙dl (optional, see option flags) Character array of description
469
+ # text, no Unicode string header, always 16-bit characters,
470
+ # zero-terminated
471
+ #--------------------------------------------------------------------------
472
+ # [var.] 4 (optional, see option flags) Character count of target
473
+ # frame, including trailing zero word (fl)
474
+ # [var.] 2∙fl (optional, see option flags) Character array of target
475
+ # frame, no Unicode string header, always 16-bit characters,
476
+ # zero-terminated
477
+ #--------------------------------------------------------------------------
478
+ # var. var. Special data (➜ 6.53.2 and following)
479
+ #--------------------------------------------------------------------------
480
+ # [var.] 4 (optional, see option flags) Character count of the text
481
+ # mark, including trailing zero word (tl)
482
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
483
+ # mark without “#” sign, no Unicode string header, always
484
+ # 16-bit characters, zero-terminated
485
+ firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack "v4H32x4V"
486
+ has_link = opts & 0x0001
487
+ desc = opts & 0x0014
488
+ textmark = opts & 0x0008
489
+ target = opts & 0x0080
490
+ unc = opts & 0x0100
491
+ link = Link.new
492
+ _, _ = nil
493
+ pos = 32
494
+ if desc > 0
495
+ description, pos = read_hlink_string work, pos
496
+ link << description
497
+ end
498
+ if target > 0
499
+ link.target_frame, pos = read_hlink_string work, pos
500
+ end
501
+ if unc > 0
502
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
503
+ # These data fields are for UNC paths containing a server name (for
504
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
505
+ # flags field must be 1.x00x.xx112.
506
+ # Offset Size Contents
507
+ # 0 4 Character count of the UNC,
508
+ # including trailing zero word (fl)
509
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
510
+ # always 16-bit characters, zeroterminated.
511
+ link.url, pos = read_hlink_string work, pos
512
+ elsif has_link > 0
513
+ uid, = work.unpack "x#{pos}H32"
514
+ pos += 16
515
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
516
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
517
+ # These data fields occur for links which are not local files or files
518
+ # in the local network (for instance HTTP and FTP links and e-mail
519
+ # addresses). The lower 9 bits of the option flags field must be
520
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
521
+ # GUID could be used to distinguish a URL from a file link.
522
+ # Offset Size Contents
523
+ # 0 16 GUID of URL Moniker:
524
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
525
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
526
+ # 16 4 Size of character array of the URL, including trailing
527
+ # zero word (us). There are us/2-1 characters in the
528
+ # following string.
529
+ # 20 us Character array of the URL, no Unicode string header,
530
+ # always 16-bit characters, zeroterminated
531
+ size, = work.unpack "x#{pos}V"
532
+ pos += 4
533
+ data = work[pos, size].chomp "\000\000"
534
+ link.url = client data
535
+ pos += size
536
+ else
537
+ # 6.53.3 Hyperlink to a Local File
538
+ # These data fields are for links to files on local drives. The path of
539
+ # the file can be complete with drive letter (absolute) or relative to
540
+ # the location of the workbook. The lower 9 bits of the option flags
541
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
542
+ # URL from a file link.
543
+ # Offset Size Contents
544
+ # 0 16 GUID of File Moniker:
545
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
546
+ # (00000303-0000-0000-C000-000000000046)
547
+ # 16 2 Directory up-level count. Each leading “..\” in the
548
+ # file link is deleted and increases this counter.
549
+ # 18 4 Character count of the shortened file path and name,
550
+ # including trailing zero byte (sl)
551
+ # 22 sl Character array of the shortened file path and name in
552
+ # 8.3-DOS-format. This field can be filled with a long
553
+ # file name too. No Unicode string header, always 8-bit
554
+ # characters, zeroterminated.
555
+ # 22+sl 24 Unknown byte sequence:
556
+ # FF FF AD DE 00 00 00 00
557
+ # 00 00 00 00 00 00 00 00
558
+ # 00 00 00 00 00 00 00 00
559
+ # 46+sl 4 Size of the following file link field including string
560
+ # length field and additional data field (sz). If sz is
561
+ # zero, nothing will follow (except a text mark).
562
+ # [50+sl] 4 (optional) Size of character array of the extended file
563
+ # path and name (xl). There are xl/2 characters in the
564
+ # following string.
565
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
566
+ # [56+sl] xl (optional) Character array of the extended file path
567
+ # and name (xl), no Unicode string header, always 16-bit
568
+ # characters, not zero-terminated
569
+ uplevel, count = work.unpack "x#{pos}vV"
570
+ pos += 6
571
+ # TODO: short file path may have any of the OEM encodings. Find out which
572
+ # and use the #client method to convert the encoding.
573
+ prefix = internal("..\\", "UTF-8") * uplevel
574
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
575
+ pos += count + 24
576
+ total, size = work.unpack "x#{pos}V2"
577
+ pos += 10
578
+ if total > 0
579
+ link.url = client work[pos, size]
580
+ pos += size
581
+ end
582
+ end
819
583
  else
820
- @workbook.date_base = DateTime.new 1899, 12, 31
584
+ # 6.53.5 Hyperlink to the Current Workbook
585
+ # In this case only the text mark field is present (optional with
586
+ # description).
587
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
588
+ # current workbook.
589
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
590
+ end
591
+ if textmark > 0
592
+ link.fragment, _ = read_hlink_string work, pos
593
+ end
594
+ if link.empty?
595
+ link << link.href
821
596
  end
822
- when :continue # ○ CONTINUE ➜ 6.22
823
- case previous_op
824
- when :sst # ● SST ➜ 6.96
825
- continue_sst work, pos, len
597
+ firstrow.upto lastrow do |row|
598
+ firstcol.upto lastcol do |col|
599
+ worksheet.add_link row, col, link
600
+ end
826
601
  end
827
- when :codepage # ○ CODEPAGE ➜ 6.17
828
- read_codepage work, pos, len
829
- when :boundsheet # ●● BOUNDSHEET ➜ 6.12
830
- read_boundsheet work, pos, len
831
- when :xf # ●● XF ➜ 6.115
832
- read_xf work, pos, len
833
- when :sst # ○ Shared String Table ➜ 5.11
834
- # ● SST ➜ 6.96
835
- read_sst work, pos, len
836
- # TODO: implement memory-efficient sst handling, possibly in conjunction
837
- # with EXTSST
838
- when :extsst # ● EXTSST ➜ 6.40
839
- read_extsst work, pos, len
840
- when :style # ●● STYLE ➜ 6.99
841
- read_style work, pos, len
842
- when :format # ○○ FORMAT (Number Format) ➜ 6.45
843
- read_format work, pos, len
844
- when :font # ●● FONT ➜ 6.43
845
- read_font work, pos, len
846
602
  end
847
- previous_op = op unless op == :continue
848
- end
849
- end
850
- def read_worksheet worksheet, offset
851
- @pos = offset
852
- @detected_rows = {}
853
- @noteObjList = []
854
- @noteList = []
855
- @noteObject = nil
856
- previous = nil
857
- while tuple = get_next_chunk
858
- pos, op, len, work = tuple
859
- if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
860
- @current_row_block_offset = nil
861
- offset[1] = pos - offset[0]
603
+
604
+ def read_hlink_string work, pos
605
+ count, = work.unpack "x#{pos}V"
606
+ len = count * 2
607
+ pos += 4
608
+ data = work[pos, len].chomp "\000\000"
609
+ pos += len
610
+ [client(data, "UTF-16LE"), pos]
862
611
  end
863
- case op
864
- when :eof # ● EOF 6.36
865
- postread_worksheet worksheet
866
- return
867
- #when :uncalced # ○ UNCALCED 6.104
868
- # TODO: Formula support. Values were not calculated before saving
869
- #warn <<-EOS
870
- # Some fields containig formulas were saved without a computed value.
871
- # Support Spreadsheet::Excel by implementing formula-calculations!
872
- #EOS
873
- #when :index # ○ INDEX 5.7 (Row Blocks), ➜ 6.55
874
- # TODO: if there are changes in rows, omit index when writing
875
- #read_index worksheet, work, pos, len
876
- when :guts # GUTS 5.53
877
- read_guts worksheet, work, pos, len
878
- when :colinfo # ○○ COLINFO ➜ 6.18
879
- read_colinfo worksheet, work, pos, len
880
- when :dimensions # ● DIMENSIONS ➜ 6.31
881
- read_dimensions worksheet, work, pos, len
882
- when :row # ○○ Row Blocks ➜ 5.7
883
- # ROW 6.83
884
- set_row_address worksheet, work, pos, len
885
- when :hlink
886
- read_hlink worksheet, work, pos, len
887
- when :window2
888
- read_window2 worksheet, work, pos, len
889
- when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
890
- read_merged_cells worksheet, work, pos, len
891
- when :protect, :password
892
- read_sheet_protection worksheet, op, work
893
- when :note # a note references an :obj
894
- read_note worksheet, work, pos, len
895
- when :obj # it contains the author in the NTS structure
896
- _ft, _cb, _ot, _objID = work.unpack('v4')
897
- if _ot == 0x19
898
- #puts "\nDEBUG: found Note Obj record"
899
- @noteObject = NoteObject.new
900
- @noteObject.objID = _objID
612
+
613
+ def read_index worksheet, work, pos, len
614
+ # Offset Size Contents
615
+ # 0 4 Not used
616
+ # 4 4 Index to first used row (rf, 0-based)
617
+ # 8 4 Index to first row of unused tail of sheet
618
+ # (rl, last used row + 1, 0-based)
619
+ # 12 4 Absolute stream position of the
620
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
621
+ # record does not exist, the offset points to the record at
622
+ # the position where the DEFCOLWIDTH record would occur.
623
+ # 16 4∙nm Array of nm absolute stream positions to the
624
+ # DBCELL record (➜ 6.26) of each Row Block
625
+ # TODO: use the index if it exists
626
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
627
+ end
628
+
629
+ def read_label worksheet, addr, work
630
+ # Offset Size Contents
631
+ # 0 2 Index to row
632
+ # 2 2 Index to column
633
+ # 4 2 Index to XF record (➜ 6.115)
634
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
635
+ row, column, xf = work.unpack "v3"
636
+ value = client read_string(work[6..], 2), @workbook.encoding
637
+ set_cell worksheet, row, column, xf, value
638
+ end
639
+
640
+ def read_labelsst worksheet, addr, work
641
+ # Offset Size Contents
642
+ # 0 2 Index to row
643
+ # 2 2 Index to column
644
+ # 4 2 Index to XF record (➜ 6.115)
645
+ # 6 4 Index into SST record (➜ 6.96)
646
+ row, column, xf, index = work.unpack binfmt(:labelsst)
647
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
648
+ end
649
+
650
+ def read_mulblank worksheet, addr, work
651
+ # Offset Size Contents
652
+ # 0 2 Index to row
653
+ # 2 2 Index to first column (fc)
654
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
655
+ # 4+2∙nc 2 Index to last column (lc)
656
+ row, column, *xfs = work.unpack "v*"
657
+ xfs.pop #=> last_column
658
+ xfs.each_with_index { |xf, idx| set_cell worksheet, row, column + idx, xf }
659
+ end
660
+
661
+ def read_mulrk worksheet, addr, work
662
+ # Offset Size Contents
663
+ # 0 2 Index to row
664
+ # 2 2 Index to first column (fc)
665
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
666
+ # Offset Size Contents
667
+ # 0 2 Index to XF record (➜ 6.115)
668
+ # 2 4 RK value (➜ 3.6)
669
+ # 4+6∙nc 2 Index to last column (lc)
670
+ row, column = work.unpack "v2"
671
+ 4.step(work.size - 6, 6) do |idx|
672
+ xf, = work.unpack "x#{idx}v"
673
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
674
+ column += 1
901
675
  end
902
- #p work
903
- when :drawing # this can be followed by txo in case of a note
904
- if previous == :obj
905
- #puts "\nDEBUG: found MsDrawing record"
906
- #p work
676
+ end
677
+
678
+ def read_number worksheet, addr, work
679
+ # Offset Size Contents
680
+ # 0 2 Index to row
681
+ # 2 2 Index to column
682
+ # 4 2 Index to XF record (➜ 6.115)
683
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
684
+ row, column, xf, value = work.unpack binfmt(:number)
685
+ set_cell worksheet, row, column, xf, value
686
+ end
687
+
688
+ def read_rk worksheet, addr, work
689
+ # Offset Size Contents
690
+ # 0 2 Index to row
691
+ # 2 2 Index to column
692
+ # 4 2 Index to XF record (➜ 6.115)
693
+ # 6 4 RK value (➜ 3.6)
694
+ row, column, xf = work.unpack "v3"
695
+ set_cell worksheet, row, column, xf, decode_rk(work[6, 4])
696
+ end
697
+
698
+ def read_row worksheet, addr
699
+ row = addr[:index]
700
+ @current_row_block.fetch [worksheet, row] do
701
+ @current_row_block.clear
702
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
703
+ @pos = addr[:offset]
704
+ found = false
705
+ while (tuple = get_next_chunk)
706
+ pos, op, _, work = tuple
707
+ case op
708
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
709
+ # one Row-Block
710
+ @pos = pos
711
+ return cells
712
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
713
+ return cells
714
+ when :row # ○○ Row Blocks ➜ 5.7
715
+ # ● ROW ➜ 6.83
716
+ # ignore, we already did these in read_worksheet
717
+ return cells if found
718
+ when :blank # BLANK ➜ 6.7
719
+ found = true
720
+ read_blank worksheet, addr, work
721
+ when :boolerr # BOOLERR ➜ 6.10
722
+ found = true
723
+ read_boolerr worksheet, addr, work
724
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
725
+ found = true
726
+ # TODO: implement for BIFF2 support
727
+ when :formula # FORMULA ➜ 6.46
728
+ found = true
729
+ read_formula worksheet, addr, work
730
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
731
+ found = true
732
+ read_label worksheet, addr, work
733
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
734
+ found = true
735
+ read_labelsst worksheet, addr, work
736
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
737
+ found = true
738
+ read_mulblank worksheet, addr, work
739
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
740
+ found = true
741
+ read_mulrk worksheet, addr, work
742
+ when :number # NUMBER ➜ 6.68
743
+ found = true
744
+ read_number worksheet, addr, work
745
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
746
+ found = true
747
+ read_rk worksheet, addr, work
748
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
749
+ found = true
750
+ read_rstring worksheet, addr, work
751
+ end
752
+ end
753
+ cells
907
754
  end
908
- when :txo # this contains the length of the note text
909
- if previous == :drawing
910
- #puts "\nDEBUG: found TxO record"
911
- #p work
755
+ end
756
+
757
+ def read_rstring worksheet, addr, work
758
+ # Offset Size Contents
759
+ # 0 2 Index to row
760
+ # 2 2 Index to column
761
+ # 4 2 Index to XF record (➜ 6.115)
762
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
763
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
764
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
765
+ row, column, xf = work.unpack "v3"
766
+ value = client read_string(work[6..], 2), @workbook.encoding
767
+ set_cell worksheet, row, column, xf, value
768
+ end
769
+
770
+ def read_window2 worksheet, work, pos, len
771
+ # This record contains additional settings for the document window
772
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
773
+ # It is part of the Sheet View Settings Block (➜ 4.5).
774
+ # Offset Size Contents
775
+ # 0 2 Option flags:
776
+ # Bits Mask Contents
777
+ # 0 0x0001 0 = Show formula results
778
+ # 1 = Show formulas
779
+ # 1 0x0002 0 = Do not show grid lines
780
+ # 1 = Show grid lines
781
+ # 2 0x0004 0 = Do not show sheet headers
782
+ # 1 = Show sheet headers
783
+ # 3 0x0008 0 = Panes are not frozen
784
+ # 1 = Panes are frozen (freeze)
785
+ # 4 0x0010 0 = Show zero values as empty cells
786
+ # 1 = Show zero values
787
+ # 5 0x0020 0 = Manual grid line colour
788
+ # 1 = Automatic grid line colour
789
+ # 6 0x0040 0 = Columns from left to right
790
+ # 1 = Columns from right to left
791
+ # 7 0x0080 0 = Do not show outline symbols
792
+ # 1 = Show outline symbols
793
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
794
+ # 1 = Remove splits if pane freeze is removed
795
+ # 9 0x0200 0 = Sheet not selected
796
+ # 1 = Sheet selected (BIFF5-BIFF8)
797
+ # 10 0x0400 0 = Sheet not active
798
+ # 1 = Sheet active (BIFF5-BIFF8)
799
+ # 11 0x0800 0 = Show in normal view
800
+ # 1 = Show in page break preview (BIFF8)
801
+ # 2 2 Index to first visible row
802
+ # 4 2 Index to first visible column
803
+ # 6 2 Colour index of grid line colour (➜ 5.74).
804
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
805
+ # 8 2 Not used
806
+ # 10 2 Cached magnification factor in page break preview (in percent)
807
+ # 0 = Default (60%)
808
+ # 12 2 Cached magnification factor in normal view (in percent)
809
+ # 0 = Default (100%)
810
+ # 14 4 Not used
811
+ flags, _ = work.unpack "v"
812
+ worksheet.selected = flags & 0x0200 > 0
813
+ end
814
+
815
+ def read_merged_cells worksheet, work, pos, len
816
+ # This record contains the addresses of merged cell ranges in the current sheet.
817
+ # Record MERGEDCELLS, BIFF8:
818
+ # Offset Size Contents
819
+ # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
820
+ # If the record size exceeds the limit, it is not continued with a CONTINUE record,
821
+ # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
822
+ # per record results in a maximum number of 1027 merged ranges.
823
+
824
+ worksheet.merged_cells.push(*read_range_address_list(work, len))
825
+ #
826
+ # A cell range address list consists of a field with the number of ranges and the list
827
+ # of the range addresses.
828
+ # Cell range address list, BIFF2-BIFF8:
829
+ # Offset Size Contents
830
+ # 0 2 Number of following cell range addresses (nm)
831
+ # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
832
+ #
833
+ end
834
+
835
+ def read_workbook
836
+ previous_op = nil
837
+ while (tuple = get_next_chunk)
838
+ pos, op, len, work = tuple
839
+ case op
840
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
841
+ return
842
+ when :eof # ● EOF ➜ 6.36
843
+ postread_workbook
844
+ return
845
+ when :datemode # ○ DATEMODE ➜ 6.25
846
+ flag, _ = work.unpack "v"
847
+ @workbook.date_base = if flag == 1
848
+ DateTime.new 1904, 1, 1
849
+ else
850
+ DateTime.new 1899, 12, 31
851
+ end
852
+ when :continue # ○ CONTINUE ➜ 6.22
853
+ case previous_op
854
+ when :sst # ● SST ➜ 6.96
855
+ continue_sst work, pos, len
856
+ end
857
+ when :codepage # ○ CODEPAGE ➜ 6.17
858
+ read_codepage work, pos, len
859
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
860
+ read_boundsheet work, pos, len
861
+ when :xf # ●● XF ➜ 6.115
862
+ read_xf work, pos, len
863
+ when :sst # ○ Shared String Table ➜ 5.11
864
+ # ● SST ➜ 6.96
865
+ read_sst work, pos, len
866
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
867
+ # with EXTSST
868
+ when :extsst # ● EXTSST ➜ 6.40
869
+ read_extsst work, pos, len
870
+ when :style # ●● STYLE ➜ 6.99
871
+ read_style work, pos, len
872
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
873
+ read_format work, pos, len
874
+ when :font # ●● FONT ➜ 6.43
875
+ read_font work, pos, len
876
+ end
877
+ previous_op = op unless op == :continue
912
878
  end
913
- when :continue # this contains the actual note text
914
- if previous == :txo && @noteObject
915
- #puts "\nDEBUG: found Continue record"
916
- continueFmt = work.unpack('C')
917
- if (continueFmt.first == 0)
918
- #puts "Picking compressed charset"
919
- #Skip to offset due to 'v5C' used above
920
- _text = work.unpack('@1C*')
921
- @noteObject.text = _text.pack('C*')
922
- elsif (continueFmt.first == 1)
923
- #puts "Picking uncompressed charset"
924
- _text = work.unpack('@1S*')
925
- @noteObject.text = _text.pack('U*')
879
+ end
880
+
881
+ def read_worksheet worksheet, offset
882
+ @pos = offset
883
+ @detected_rows = {}
884
+ @note_ob_list = []
885
+ @note_list = []
886
+ @note_object = nil
887
+ previous = nil
888
+ while (tuple = get_next_chunk)
889
+ pos, op, len, work = tuple
890
+ if (offset = @current_row_block_offset) && !in_row_block?(op, previous)
891
+ @current_row_block_offset = nil
892
+ offset[1] = pos - offset[0]
926
893
  end
927
- @noteObjList << @noteObject
894
+ case op
895
+ when :eof # ● EOF ➜ 6.36
896
+ postread_worksheet worksheet
897
+ return
898
+ # when :uncalced # ○ UNCALCED ➜ 6.104
899
+ # TODO: Formula support. Values were not calculated before saving
900
+ # warn <<-EOS
901
+ # Some fields containig formulas were saved without a computed value.
902
+ # Support Spreadsheet::Excel by implementing formula-calculations!
903
+ # EOS
904
+ # when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
905
+ # TODO: if there are changes in rows, omit index when writing
906
+ # read_index worksheet, work, pos, len
907
+ when :guts # GUTS 5.53
908
+ read_guts worksheet, work, pos, len
909
+ when :colinfo # ○○ COLINFO ➜ 6.18
910
+ read_colinfo worksheet, work, pos, len
911
+ when :dimensions # ● DIMENSIONS ➜ 6.31
912
+ read_dimensions worksheet, work, pos, len
913
+ when :row # ○○ Row Blocks ➜ 5.7
914
+ # ● ROW ➜ 6.83
915
+ set_row_address worksheet, work, pos, len
916
+ when :hlink
917
+ read_hlink worksheet, work, pos, len
918
+ when :window2
919
+ read_window2 worksheet, work, pos, len
920
+ when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
921
+ read_merged_cells worksheet, work, pos, len
922
+ when :protect, :password
923
+ read_sheet_protection worksheet, op, work
924
+ when :note # a note references an :obj
925
+ read_note worksheet, work, pos, len
926
+ when :obj # it contains the author in the NTS structure
927
+ _ft, _cb, ot, obj_id = work.unpack("v4")
928
+ if ot == 0x19
929
+ # puts "\nDEBUG: found Note Obj record"
930
+ @note_object = NoteObject.new
931
+ @note_object.obj_id = obj_id
932
+ end
933
+ # p work
934
+ when :drawing # this can be followed by txo in case of a note
935
+ if previous == :obj
936
+ # puts "\nDEBUG: found MsDrawing record"
937
+ # p work
938
+ end
939
+ when :txo # this contains the length of the note text
940
+ if previous == :drawing
941
+ # puts "\nDEBUG: found TxO record"
942
+ # p work
943
+ end
944
+ when :continue # this contains the actual note text
945
+ if previous == :txo && @note_object
946
+ # puts "\nDEBUG: found Continue record"
947
+ continue_fmt = work.unpack("C")
948
+ if continue_fmt.first == 0
949
+ # puts "Picking compressed charset"
950
+ # Skip to offset due to 'v5C' used above
951
+ text = work.unpack("@1C*")
952
+ @note_object.text = text.pack("C*")
953
+ elsif continue_fmt.first == 1
954
+ # puts "Picking uncompressed charset"
955
+ text = work.unpack("@1S*")
956
+ @note_object.text = text.pack("U*")
957
+ end
958
+ @note_ob_list << @note_object
959
+ end
960
+ when :pagesetup
961
+ read_pagesetup(worksheet, work, pos, len)
962
+ when :leftmargin
963
+ worksheet.margins[:left] = work.unpack1(binfmt(:margin))
964
+ when :rightmargin
965
+ worksheet.margins[:right] = work.unpack1(binfmt(:margin))
966
+ when :topmargin
967
+ worksheet.margins[:top] = work.unpack1(binfmt(:margin))
968
+ when :bottommargin
969
+ worksheet.margins[:bottom] = work.unpack1(binfmt(:margin))
970
+ else
971
+ if ROW_BLOCK_OPS.include?(op)
972
+ set_missing_row_address worksheet, work, pos, len
973
+ end
974
+ end
975
+ previous = op
976
+ # previous = op unless op == :continue
928
977
  end
929
- when :pagesetup
930
- read_pagesetup(worksheet, work, pos, len)
931
- when :leftmargin
932
- worksheet.margins[:left] = work.unpack(binfmt(:margin))[0]
933
- when :rightmargin
934
- worksheet.margins[:right] = work.unpack(binfmt(:margin))[0]
935
- when :topmargin
936
- worksheet.margins[:top] = work.unpack(binfmt(:margin))[0]
937
- when :bottommargin
938
- worksheet.margins[:bottom] = work.unpack(binfmt(:margin))[0]
939
- else
940
- if ROW_BLOCK_OPS.include?(op)
941
- set_missing_row_address worksheet, work, pos, len
978
+ end
979
+
980
+ def read_pagesetup(worksheet, work, pos, len)
981
+ worksheet.pagesetup.delete_if { true }
982
+ data = work.unpack(binfmt(:pagesetup))
983
+ worksheet.pagesetup[:orientation] = (data[5] == 0) ? :landscape : :portrait
984
+ worksheet.pagesetup[:adjust_to] = data[1]
985
+
986
+ worksheet.pagesetup[:orig_data] = data
987
+ # TODO: add options acording to specification
988
+ end
989
+
990
+ def read_guts worksheet, work, pos, len
991
+ # Offset Size Contents
992
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
993
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
994
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
995
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
996
+ width, height, row_level, col_level = work.unpack "v4"
997
+ worksheet.guts[:width] = width
998
+ worksheet.guts[:height] = height
999
+ worksheet.guts[:row_level] = row_level
1000
+ worksheet.guts[:col_level] = col_level
1001
+ end
1002
+
1003
+ def read_style work, pos, len
1004
+ # User-Defined Cell Styles:
1005
+ # Offset Size Contents
1006
+ # 0 2 Bit Mask Contents
1007
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
1008
+ # 15 0x8000 Always 0 for user-defined styles
1009
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
1010
+ # 8-bit string length (➜ 3.3)
1011
+ # BIFF8: Non-empty Unicode string,
1012
+ # 16-bit string length (➜ 3.4)
1013
+ #
1014
+ # Built-In Cell Styles
1015
+ # Offset Size Contents
1016
+ # 0 2 Bit Mask Contents
1017
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
1018
+ # 15 0x8000 Always 1 for built-in styles
1019
+ # 2 1 Identifier of the built-in cell style:
1020
+ # 0x00 = Normal
1021
+ # 0x01 = RowLevel_lv (see next field)
1022
+ # 0x02 = ColLevel_lv (see next field)
1023
+ # 0x03 = Comma
1024
+ # 0x04 = Currency
1025
+ # 0x05 = Percent
1026
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
1027
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
1028
+ # 0x08 = Hyperlink (BIFF8)
1029
+ # 0x09 = Followed Hyperlink (BIFF8)
1030
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
1031
+ # FFH otherwise
1032
+ flags, = work.unpack "v"
1033
+ xf_idx = flags & 0x0fff
1034
+ xf = @workbook.format xf_idx
1035
+ builtin = flags & 0x8000
1036
+ if builtin == 0
1037
+ xf.name = client read_string(work[2..], 2), @workbook.encoding
1038
+ else
1039
+ id, level = work.unpack "x2C2"
1040
+ if (name = BUILTIN_STYLES[id])
1041
+ name.sub "_lv", "_#{level}"
1042
+ xf.name = client name, "UTF-8"
1043
+ end
942
1044
  end
943
1045
  end
944
- previous = op
945
- #previous = op unless op == :continue
946
- end
947
- end
948
1046
 
949
- def read_pagesetup(worksheet, work, pos, len)
950
- worksheet.pagesetup.delete_if { true }
951
- data = work.unpack(binfmt(:pagesetup))
952
- worksheet.pagesetup[:orientation] = data[5] == 0 ? :landscape : :portrait
953
- worksheet.pagesetup[:adjust_to] = data[1]
1047
+ def read_xf work, pos, len
1048
+ # Offset Size Contents
1049
+ # 0 2 Index to FONT record (➜ 6.43)
1050
+ # 2 2 Index to FORMAT record (➜ 6.45)
1051
+ # 4 2 Bit Mask Contents
1052
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1053
+ # Bit Mask Contents
1054
+ # 0 0x01 1 = Cell is locked
1055
+ # 1 0x02 1 = Formula is hidden
1056
+ # 2 0x04 0 = Cell XF; 1 = Style XF
1057
+ # 15-4 0xfff0 Index to parent style XF
1058
+ # (always 0xfff in style XFs)
1059
+ # 6 1 Bit Mask Contents
1060
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1061
+ # Value Horizontal alignment
1062
+ # 0x00 General
1063
+ # 0x01 Left
1064
+ # 0x02 Centred
1065
+ # 0x03 Right
1066
+ # 0x04 Filled
1067
+ # 0x05 Justified (BIFF4-BIFF8X)
1068
+ # 0x06 Centred across selection
1069
+ # (BIFF4-BIFF8X)
1070
+ # 0x07 Distributed (BIFF8X)
1071
+ # 3 0x08 1 = Text is wrapped at right border
1072
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1073
+ # Value Vertical alignment
1074
+ # 0x00 Top
1075
+ # 0x01 Centred
1076
+ # 0x02 Bottom
1077
+ # 0x03 Justified (BIFF5-BIFF8X)
1078
+ # 0x04 Distributed (BIFF8X)
1079
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
1080
+ # Value Text rotation
1081
+ # 0 Not rotated
1082
+ # 1-90 1 to 90 degrees counterclockwise
1083
+ # 91-180 1 to 90 degrees clockwise
1084
+ # 255 Letters are stacked top-to-bottom,
1085
+ # but not rotated
1086
+ # 8 1 Bit Mask Contents
1087
+ # 3-0 0x0f Indent level
1088
+ # 4 0x10 1 = Shrink content to fit into cell
1089
+ # 5 0x40 1 = Merge Range (djberger)
1090
+ # 7-6 0xc0 Text direction (BIFF8X only)
1091
+ # 0 = According to context
1092
+ # 1 = Left-to-right
1093
+ # 2 = Right-to-left
1094
+ # 9 1 Bit Mask Contents
1095
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1096
+ # Each bit describes the validity of a
1097
+ # specific group of attributes. In cell XFs
1098
+ # a cleared bit means the attributes of the
1099
+ # parent style XF are used (but only if the
1100
+ # attributes are valid there), a set bit
1101
+ # means the attributes of this XF are used.
1102
+ # In style XFs a cleared bit means the
1103
+ # attribute setting is valid, a set bit
1104
+ # means the attribute should be ignored.
1105
+ # Bit Mask Contents
1106
+ # 0 0x01 Flag for number format
1107
+ # 1 0x02 Flag for font
1108
+ # 2 0x04 Flag for horizontal and
1109
+ # vertical alignment, text wrap,
1110
+ # indentation, orientation,
1111
+ # rotation, and text direction
1112
+ # 3 0x08 Flag for border lines
1113
+ # 4 0x10 Flag for background area style
1114
+ # 5 0x20 Flag for cell protection (cell
1115
+ # locked and formula hidden)
1116
+ # 10 4 Cell border lines and background area:
1117
+ # Bit Mask Contents
1118
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
1119
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
1120
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
1121
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1122
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
1123
+ # for left line colour
1124
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
1125
+ # for right line colour
1126
+ # 30 0x40000000 1 = Diagonal line
1127
+ # from top left to right bottom
1128
+ # 31 0x80000000 1 = Diagonal line
1129
+ # from bottom left to right top
1130
+ # 14 4 Bit Mask Contents
1131
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
1132
+ # for top line colour
1133
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
1134
+ # for bottom line colour
1135
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
1136
+ # for diagonal line colour
1137
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1138
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1139
+ # 18 2 Bit Mask Contents
1140
+ # 6-0 0x007f Colour index (➜ 6.70)
1141
+ # for pattern colour
1142
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1143
+ # for pattern background
1144
+ fmt = Format.new
1145
+ font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1146
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1147
+ fmt.number_format = @formats[numfmt]
1148
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1149
+ # with a 0-based index, but all subsequent font indices are 1-based.
1150
+ fmt.font = @workbook.font((font_idx > 3) ? font_idx - 1 : font_idx)
1151
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1152
+ fmt.text_wrap = xf_align & 0x08 > 0
1153
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1154
+ fmt.rotation = if xf_rotation == 255
1155
+ :stacked
1156
+ elsif xf_rotation > 90
1157
+ 90 - xf_rotation
1158
+ else
1159
+ xf_rotation
1160
+ end
1161
+ fmt.indent_level = xf_indent & 0x0f
1162
+ fmt.shrink = xf_indent & 0x10 > 0
1163
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1164
+ fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1165
+ fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1166
+ fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1167
+ fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1168
+ fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1169
+ fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1170
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1171
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1172
+ if xf_brdcolors
1173
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1174
+ fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1175
+ fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1176
+ # fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1177
+ fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
1178
+ end
1179
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1180
+ fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1181
+ @workbook.add_format fmt
1182
+ end
954
1183
 
955
- worksheet.pagesetup[:orig_data] = data
956
- # TODO: add options acording to specification
957
- end
1184
+ def read_note worksheet, work, pos, len
1185
+ # puts "\nDEBUG: found a note record in read_worksheet\n"
1186
+ row, col, _, obj_id, obj_auth_en, obj_auth_len_fmt = work.unpack("v5C")
1187
+ if obj_auth_en > 0
1188
+ if obj_auth_len_fmt == 0
1189
+ # puts "Picking compressed charset"
1190
+ # Skip to offset due to 'v5C' used above
1191
+ obj_auth = work.unpack("@11C" + (obj_auth_en - 1).to_s + "C")
1192
+ elsif obj_auth_len_fmt == 1
1193
+ # puts "Picking uncompressed charset"
1194
+ obj_auth = work.unpack("@11S" + (obj_auth_en - 1).to_s + "S")
1195
+ end
1196
+ obj_auth = obj_auth.pack("C*")
1197
+ else
1198
+ obj_auth = ""
1199
+ end
1200
+ @note = Note.new
1201
+ @note.length = len
1202
+ @note.row = row
1203
+ @note.col = col
1204
+ @note.author = obj_auth
1205
+ @note.obj_id = obj_id
1206
+ # Pop it on the list to be sorted in postread_worksheet
1207
+ @note_list << @note
1208
+ end
958
1209
 
959
- def read_guts worksheet, work, pos, len
960
- # Offset Size Contents
961
- # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
962
- # 2 2 Height of the area to display column outlines (above the sheet), in pixel
963
- # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
964
- # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
965
- width, height, row_level, col_level = work.unpack 'v4'
966
- worksheet.guts[:width] = width
967
- worksheet.guts[:height] = height
968
- worksheet.guts[:row_level] = row_level
969
- worksheet.guts[:col_level] = col_level
970
- end
971
- def read_style work, pos, len
972
- # User-Defined Cell Styles:
973
- # Offset Size Contents
974
- # 0 2 Bit Mask Contents
975
- # 11-0 0x0fff Index to style XF record (➜ 6.115)
976
- # 15 0x8000 Always 0 for user-defined styles
977
- # 2 var. BIFF2-BIFF7: Non-empty byte string,
978
- # 8-bit string length (➜ 3.3)
979
- # BIFF8: Non-empty Unicode string,
980
- # 16-bit string length (➜ 3.4)
981
- #
982
- # Built-In Cell Styles
983
- # Offset Size Contents
984
- # 0 2 Bit Mask Contents
985
- # 11-0 0x0FFF Index to style XF record (➜ 6.115)
986
- # 15 0x8000 Always 1 for built-in styles
987
- # 2 1 Identifier of the built-in cell style:
988
- # 0x00 = Normal
989
- # 0x01 = RowLevel_lv (see next field)
990
- # 0x02 = ColLevel_lv (see next field)
991
- # 0x03 = Comma
992
- # 0x04 = Currency
993
- # 0x05 = Percent
994
- # 0x06 = Comma [0] (BIFF4-BIFF8)
995
- # 0x07 = Currency [0] (BIFF4-BIFF8)
996
- # 0x08 = Hyperlink (BIFF8)
997
- # 0x09 = Followed Hyperlink (BIFF8)
998
- # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
999
- # FFH otherwise
1000
- flags, = work.unpack 'v'
1001
- xf_idx = flags & 0x0fff
1002
- xf = @workbook.format xf_idx
1003
- builtin = flags & 0x8000
1004
- if builtin == 0
1005
- xf.name = client read_string(work[2..-1], 2), @workbook.encoding
1006
- else
1007
- id, level = work.unpack 'x2C2'
1008
- if name = BUILTIN_STYLES[id]
1009
- name.sub '_lv', "_#{level.to_s}"
1010
- xf.name = client name, 'UTF-8'
1210
+ def read_sheet_protection worksheet, op, data
1211
+ case op
1212
+ when :protect
1213
+ worksheet.protect! if data.unpack1("v") == 1
1214
+ when :password
1215
+ worksheet.password_hash = data.unpack1("v")
1216
+ end
1217
+ end
1218
+
1219
+ def set_cell worksheet, row, column, xf, value = nil
1220
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1221
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1222
+ cells[column] = value
1223
+ end
1224
+
1225
+ def set_missing_row_address worksheet, work, pos, len
1226
+ # Offset Size Contents
1227
+ # 0 2 Index of this row
1228
+ # 2 2 Index to this column
1229
+ row_index, _ = work.unpack "v2"
1230
+ unless worksheet.offsets[row_index]
1231
+ @current_row_block_offset ||= [pos]
1232
+ data = {
1233
+ index: row_index,
1234
+ row_block: @current_row_block_offset,
1235
+ offset: @current_row_block_offset[0]
1236
+ }
1237
+ worksheet.set_row_address row_index, data
1238
+ end
1239
+ end
1240
+
1241
+ def set_row_address worksheet, work, pos, len
1242
+ # Offset Size Contents
1243
+ # 0 2 Index of this row
1244
+ # 2 2 Index to column of the first cell which
1245
+ # is described by a cell record
1246
+ # 4 2 Index to column of the last cell which is
1247
+ # described by a cell record, increased by 1
1248
+ # 6 2 Bit Mask Contents
1249
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1250
+ # 15 0x8000 0 = Row has custom height;
1251
+ # 1 = Row has default height
1252
+ # 8 2 Not used
1253
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1254
+ # calculate stream position of the first cell record for this
1255
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1256
+ # anymore, but the DBCELL record ( 6.26) instead.
1257
+ # 12 4 Option flags and default row formatting:
1258
+ # Bit Mask Contents
1259
+ # 2-0 0x00000007 Outline level of the row
1260
+ # 4 0x00000010 1 = Outline group starts or ends here
1261
+ # (depending on where the outline
1262
+ # buttons are located, see WSBOOL
1263
+ # record, ➜ 6.113), and is collapsed
1264
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1265
+ # filter or outline group)
1266
+ # 6 0x00000040 1 = Row height and default font height
1267
+ # do not match
1268
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1269
+ # 8 0x00000100 Always 1
1270
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1271
+ # (➜ 6.115)
1272
+ # 28 0x10000000 1 = Additional space above the row.
1273
+ # This flag is set, if the upper
1274
+ # border of at least one cell in this
1275
+ # row or if the lower border of at
1276
+ # least one cell in the row above is
1277
+ # formatted with a thick line style.
1278
+ # Thin and medium line styles are not
1279
+ # taken into account.
1280
+ # 29 0x20000000 1 = Additional space below the row.
1281
+ # This flag is set, if the lower
1282
+ # border of at least one cell in this
1283
+ # row or if the upper border of at
1284
+ # least one cell in the row below is
1285
+ # formatted with a medium or thick
1286
+ # line style. Thin line styles are
1287
+ # not taken into account.
1288
+ @current_row_block_offset ||= [pos]
1289
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1290
+ height &= 0x7fff
1291
+ format = nil
1292
+ # TODO: read attributes from work[13,3], read flags
1293
+ attrs = {
1294
+ default_format: format,
1295
+ first_used: first_used,
1296
+ first_unused: first_unused,
1297
+ index: index,
1298
+ row_block: @current_row_block_offset,
1299
+ offset: @current_row_block_offset[0],
1300
+ outline_level: flags & 0x00000007,
1301
+ collapsed: (flags & 0x0000010) > 0,
1302
+ hidden: (flags & 0x0000020) > 0
1303
+ }
1304
+ if (flags & 0x00000040) > 0
1305
+ attrs.store :height, height / TWIPS
1306
+ end
1307
+ if (flags & 0x00000080) > 0
1308
+ xf = (flags & 0x0fff0000) >> 16
1309
+ attrs.store :default_format, @workbook.format(xf)
1310
+ end
1311
+ # TODO: Row spacing
1312
+ worksheet.set_row_address index, attrs
1313
+ end
1314
+
1315
+ def setup io
1316
+ ## Reading from StringIO fails without forced encoding
1317
+ if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new("2.3.0")
1318
+ io.set_encoding("ASCII-8BIT")
1319
+ elsif io.respond_to?(:string) && (str = io.string) && str.respond_to?(:force_encoding)
1320
+ str.force_encoding("ASCII-8BIT")
1321
+ end
1322
+ io.rewind
1323
+ @ole = Ole::Storage.open io
1324
+ @workbook = Workbook.new io, {}
1325
+ %w[Book Workbook BOOK WORKBOOK book workbook].any? do |name|
1326
+ @book = begin
1327
+ @ole.file.open(name)
1328
+ rescue
1329
+ false
1330
+ end
1331
+ end
1332
+ raise "could not locate a workbook, possibly an empty file passed" unless @book
1333
+ @data = @book.read
1334
+ read_bof
1335
+ @workbook.ole = @book
1336
+ @workbook.bof = @bof
1337
+ @workbook.version = @version
1338
+ biff = @workbook.biff_version
1339
+ extend_reader biff
1340
+ extend_internals biff
1341
+ end
1342
+
1343
+ private
1344
+
1345
+ def extend_internals version
1346
+ require "spreadsheet/excel/internals/biff%i" % version
1347
+ extend Internals.const_get("Biff%i" % version)
1348
+ ## spreadsheets may not include a codepage record.
1349
+ @workbook.encoding = encoding 850 if version < 8
1350
+ rescue LoadError
1351
+ end
1352
+
1353
+ def extend_reader version
1354
+ require "spreadsheet/excel/reader/biff%i" % version
1355
+ extend Reader.const_get("Biff%i" % version)
1356
+ rescue LoadError
1011
1357
  end
1012
1358
  end
1013
1359
  end
1014
- def read_xf work, pos, len
1015
- # Offset Size Contents
1016
- # 0 2 Index to FONT record (➜ 6.43)
1017
- # 2 2 Index to FORMAT record (➜ 6.45)
1018
- # 4 2 Bit Mask Contents
1019
- # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1020
- # Bit Mask Contents
1021
- # 0 0x01 1 = Cell is locked
1022
- # 1 0x02 1 = Formula is hidden
1023
- # 2 0x04 0 = Cell XF; 1 = Style XF
1024
- # 15-4 0xfff0 Index to parent style XF
1025
- # (always 0xfff in style XFs)
1026
- # 6 1 Bit Mask Contents
1027
- # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1028
- # Value Horizontal alignment
1029
- # 0x00 General
1030
- # 0x01 Left
1031
- # 0x02 Centred
1032
- # 0x03 Right
1033
- # 0x04 Filled
1034
- # 0x05 Justified (BIFF4-BIFF8X)
1035
- # 0x06 Centred across selection
1036
- # (BIFF4-BIFF8X)
1037
- # 0x07 Distributed (BIFF8X)
1038
- # 3 0x08 1 = Text is wrapped at right border
1039
- # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1040
- # Value Vertical alignment
1041
- # 0x00 Top
1042
- # 0x01 Centred
1043
- # 0x02 Bottom
1044
- # 0x03 Justified (BIFF5-BIFF8X)
1045
- # 0x04 Distributed (BIFF8X)
1046
- # 7 1 XF_ROTATION: Text rotation angle (see above)
1047
- # Value Text rotation
1048
- # 0 Not rotated
1049
- # 1-90 1 to 90 degrees counterclockwise
1050
- # 91-180 1 to 90 degrees clockwise
1051
- # 255 Letters are stacked top-to-bottom,
1052
- # but not rotated
1053
- # 8 1 Bit Mask Contents
1054
- # 3-0 0x0f Indent level
1055
- # 4 0x10 1 = Shrink content to fit into cell
1056
- # 5 0x40 1 = Merge Range (djberger)
1057
- # 7-6 0xc0 Text direction (BIFF8X only)
1058
- # 0 = According to context
1059
- # 1 = Left-to-right
1060
- # 2 = Right-to-left
1061
- # 9 1 Bit Mask Contents
1062
- # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1063
- # Each bit describes the validity of a
1064
- # specific group of attributes. In cell XFs
1065
- # a cleared bit means the attributes of the
1066
- # parent style XF are used (but only if the
1067
- # attributes are valid there), a set bit
1068
- # means the attributes of this XF are used.
1069
- # In style XFs a cleared bit means the
1070
- # attribute setting is valid, a set bit
1071
- # means the attribute should be ignored.
1072
- # Bit Mask Contents
1073
- # 0 0x01 Flag for number format
1074
- # 1 0x02 Flag for font
1075
- # 2 0x04 Flag for horizontal and
1076
- # vertical alignment, text wrap,
1077
- # indentation, orientation,
1078
- # rotation, and text direction
1079
- # 3 0x08 Flag for border lines
1080
- # 4 0x10 Flag for background area style
1081
- # 5 0x20 Flag for cell protection (cell
1082
- # locked and formula hidden)
1083
- # 10 4 Cell border lines and background area:
1084
- # Bit Mask Contents
1085
- # 3- 0 0x0000000f Left line style (➜ 3.10)
1086
- # 7- 4 0x000000f0 Right line style (➜ 3.10)
1087
- # 11- 8 0x00000f00 Top line style (➜ 3.10)
1088
- # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1089
- # 22-16 0x007f0000 Colour index (➜ 6.70)
1090
- # for left line colour
1091
- # 29-23 0x3f800000 Colour index (➜ 6.70)
1092
- # for right line colour
1093
- # 30 0x40000000 1 = Diagonal line
1094
- # from top left to right bottom
1095
- # 31 0x80000000 1 = Diagonal line
1096
- # from bottom left to right top
1097
- # 14 4 Bit Mask Contents
1098
- # 6- 0 0x0000007f Colour index (➜ 6.70)
1099
- # for top line colour
1100
- # 13- 7 0x00003f80 Colour index (➜ 6.70)
1101
- # for bottom line colour
1102
- # 20-14 0x001fc000 Colour index (➜ 6.70)
1103
- # for diagonal line colour
1104
- # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1105
- # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1106
- # 18 2 Bit Mask Contents
1107
- # 6-0 0x007f Colour index (➜ 6.70)
1108
- # for pattern colour
1109
- # 13-7 0x3f80 Colour index (➜ 6.70)
1110
- # for pattern background
1111
- fmt = Format.new
1112
- font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1113
- xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1114
- fmt.number_format = @formats[numfmt]
1115
- ## this appears to be undocumented: the first 4 fonts seem to be accessed
1116
- # with a 0-based index, but all subsequent font indices are 1-based.
1117
- fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1118
- fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1119
- fmt.text_wrap = xf_align & 0x08 > 0
1120
- fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1121
- fmt.rotation = if xf_rotation == 255
1122
- :stacked
1123
- elsif xf_rotation > 90
1124
- 90 - xf_rotation
1125
- else
1126
- xf_rotation
1127
- end
1128
- fmt.indent_level = xf_indent & 0x0f
1129
- fmt.shrink = xf_indent & 0x10 > 0
1130
- fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1131
- fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1132
- fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1133
- fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1134
- fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1135
- fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1136
- fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1137
- fmt.cross_down = xf_borders & 0x40000000 > 0
1138
- fmt.cross_up = xf_borders & 0x80000000 > 0
1139
- if xf_brdcolors
1140
- fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1141
- fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1142
- fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1143
- #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1144
- fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
1145
- end
1146
- fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1147
- fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1148
- @workbook.add_format fmt
1149
- end
1150
- def read_note worksheet, work, pos, len
1151
- #puts "\nDEBUG: found a note record in read_worksheet\n"
1152
- row, col, _, _objID, _objAuthLen, _objAuthLenFmt = work.unpack('v5C')
1153
- if (_objAuthLen > 0)
1154
- if (_objAuthLenFmt == 0)
1155
- #puts "Picking compressed charset"
1156
- #Skip to offset due to 'v5C' used above
1157
- _objAuth = work.unpack('@11C' + (_objAuthLen-1).to_s + 'C')
1158
- elsif (_objAuthLenFmt == 1)
1159
- #puts "Picking uncompressed charset"
1160
- _objAuth = work.unpack('@11S' + (_objAuthLen-1).to_s + 'S')
1161
- end
1162
- _objAuth = _objAuth.pack('C*')
1163
- else
1164
- _objAuth = ""
1165
- end
1166
- @note = Note.new
1167
- @note.length = len
1168
- @note.row = row
1169
- @note.col = col
1170
- @note.author = _objAuth
1171
- @note.objID = _objID
1172
- #Pop it on the list to be sorted in postread_worksheet
1173
- @noteList << @note
1174
- end
1175
- def read_sheet_protection worksheet, op, data
1176
- case op
1177
- when :protect
1178
- worksheet.protect! if data.unpack('v').first == 1
1179
- when :password
1180
- worksheet.password_hash = data.unpack('v').first
1181
- end
1182
- end
1183
- def set_cell worksheet, row, column, xf, value=nil
1184
- cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1185
- cells.formats[column] = @workbook.format(xf) unless xf == 0
1186
- cells[column] = value
1187
- end
1188
- def set_missing_row_address worksheet, work, pos, len
1189
- # Offset Size Contents
1190
- # 0 2 Index of this row
1191
- # 2 2 Index to this column
1192
- row_index, _ = work.unpack 'v2'
1193
- unless worksheet.offsets[row_index]
1194
- @current_row_block_offset ||= [pos]
1195
- data = {
1196
- :index => row_index,
1197
- :row_block => @current_row_block_offset,
1198
- :offset => @current_row_block_offset[0],
1199
- }
1200
- worksheet.set_row_address row_index, data
1201
- end
1202
- end
1203
- def set_row_address worksheet, work, pos, len
1204
- # Offset Size Contents
1205
- # 0 2 Index of this row
1206
- # 2 2 Index to column of the first cell which
1207
- # is described by a cell record
1208
- # 4 2 Index to column of the last cell which is
1209
- # described by a cell record, increased by 1
1210
- # 6 2 Bit Mask Contents
1211
- # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1212
- # 15 0x8000 0 = Row has custom height;
1213
- # 1 = Row has default height
1214
- # 8 2 Not used
1215
- # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1216
- # calculate stream position of the first cell record for this
1217
- # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1218
- # anymore, but the DBCELL record (➜ 6.26) instead.
1219
- # 12 4 Option flags and default row formatting:
1220
- # Bit Mask Contents
1221
- # 2-0 0x00000007 Outline level of the row
1222
- # 4 0x00000010 1 = Outline group starts or ends here
1223
- # (depending on where the outline
1224
- # buttons are located, see WSBOOL
1225
- # record, ➜ 6.113), and is collapsed
1226
- # 5 0x00000020 1 = Row is hidden (manually, or by a
1227
- # filter or outline group)
1228
- # 6 0x00000040 1 = Row height and default font height
1229
- # do not match
1230
- # 7 0x00000080 1 = Row has explicit default format (fl)
1231
- # 8 0x00000100 Always 1
1232
- # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1233
- # (➜ 6.115)
1234
- # 28 0x10000000 1 = Additional space above the row.
1235
- # This flag is set, if the upper
1236
- # border of at least one cell in this
1237
- # row or if the lower border of at
1238
- # least one cell in the row above is
1239
- # formatted with a thick line style.
1240
- # Thin and medium line styles are not
1241
- # taken into account.
1242
- # 29 0x20000000 1 = Additional space below the row.
1243
- # This flag is set, if the lower
1244
- # border of at least one cell in this
1245
- # row or if the upper border of at
1246
- # least one cell in the row below is
1247
- # formatted with a medium or thick
1248
- # line style. Thin line styles are
1249
- # not taken into account.
1250
- @current_row_block_offset ||= [pos]
1251
- index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1252
- height &= 0x7fff
1253
- format = nil
1254
- # TODO: read attributes from work[13,3], read flags
1255
- attrs = {
1256
- :default_format => format,
1257
- :first_used => first_used,
1258
- :first_unused => first_unused,
1259
- :index => index,
1260
- :row_block => @current_row_block_offset,
1261
- :offset => @current_row_block_offset[0],
1262
- :outline_level => flags & 0x00000007,
1263
- :collapsed => (flags & 0x0000010) > 0,
1264
- :hidden => (flags & 0x0000020) > 0,
1265
- }
1266
- if (flags & 0x00000040) > 0
1267
- attrs.store :height, height / TWIPS
1268
- end
1269
- if (flags & 0x00000080) > 0
1270
- xf = (flags & 0x0fff0000) >> 16
1271
- attrs.store :default_format, @workbook.format(xf)
1272
- end
1273
- # TODO: Row spacing
1274
- worksheet.set_row_address index, attrs
1275
- end
1276
- def setup io
1277
- ## Reading from StringIO fails without forced encoding
1278
- if Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.3.0')
1279
- io.set_encoding('ASCII-8BIT')
1280
- elsif io.respond_to?(:string) && (str = io.string) && str.respond_to?(:force_encoding)
1281
- str.force_encoding('ASCII-8BIT')
1282
- end
1283
- io.rewind
1284
- @ole = Ole::Storage.open io
1285
- @workbook = Workbook.new io, {}
1286
- %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1287
- @book = @ole.file.open(name) rescue false
1288
- end
1289
- raise RuntimeError, "could not locate a workbook, possibly an empty file passed" unless @book
1290
- @data = @book.read
1291
- read_bof
1292
- @workbook.ole = @book
1293
- @workbook.bof = @bof
1294
- @workbook.version = @version
1295
- biff = @workbook.biff_version
1296
- extend_reader biff
1297
- extend_internals biff
1298
- end
1299
- private
1300
- def extend_internals version
1301
- require 'spreadsheet/excel/internals/biff%i' % version
1302
- extend Internals.const_get('Biff%i' % version)
1303
- ## spreadsheets may not include a codepage record.
1304
- @workbook.encoding = encoding 850 if version < 8
1305
- rescue LoadError
1306
- end
1307
- def extend_reader version
1308
- require 'spreadsheet/excel/reader/biff%i' % version
1309
- extend Reader.const_get('Biff%i' % version)
1310
- rescue LoadError
1311
- end
1312
- end
1313
- end
1314
1360
  end