ruby-spreadsheet 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.document +5 -0
  2. data/GUIDE.txt +267 -0
  3. data/Gemfile +12 -0
  4. data/Gemfile.lock +20 -0
  5. data/History.txt +307 -0
  6. data/LICENSE.txt +619 -0
  7. data/README.txt +91 -0
  8. data/Rakefile +53 -0
  9. data/VERSION +1 -0
  10. data/bin/xlsopcodes +18 -0
  11. data/lib/parseexcel.rb +27 -0
  12. data/lib/parseexcel/parseexcel.rb +75 -0
  13. data/lib/parseexcel/parser.rb +11 -0
  14. data/lib/spreadsheet.rb +79 -0
  15. data/lib/spreadsheet/column.rb +71 -0
  16. data/lib/spreadsheet/compatibility.rb +23 -0
  17. data/lib/spreadsheet/datatypes.rb +110 -0
  18. data/lib/spreadsheet/encodings.rb +46 -0
  19. data/lib/spreadsheet/excel.rb +88 -0
  20. data/lib/spreadsheet/excel/error.rb +26 -0
  21. data/lib/spreadsheet/excel/internals.rb +386 -0
  22. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  23. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  24. data/lib/spreadsheet/excel/offset.rb +41 -0
  25. data/lib/spreadsheet/excel/reader.rb +1173 -0
  26. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  27. data/lib/spreadsheet/excel/reader/biff8.rb +193 -0
  28. data/lib/spreadsheet/excel/row.rb +92 -0
  29. data/lib/spreadsheet/excel/sst_entry.rb +46 -0
  30. data/lib/spreadsheet/excel/workbook.rb +80 -0
  31. data/lib/spreadsheet/excel/worksheet.rb +100 -0
  32. data/lib/spreadsheet/excel/writer.rb +1 -0
  33. data/lib/spreadsheet/excel/writer/biff8.rb +75 -0
  34. data/lib/spreadsheet/excel/writer/format.rb +253 -0
  35. data/lib/spreadsheet/excel/writer/workbook.rb +652 -0
  36. data/lib/spreadsheet/excel/writer/worksheet.rb +948 -0
  37. data/lib/spreadsheet/font.rb +92 -0
  38. data/lib/spreadsheet/format.rb +177 -0
  39. data/lib/spreadsheet/formula.rb +9 -0
  40. data/lib/spreadsheet/helpers.rb +11 -0
  41. data/lib/spreadsheet/link.rb +43 -0
  42. data/lib/spreadsheet/row.rb +132 -0
  43. data/lib/spreadsheet/workbook.rb +120 -0
  44. data/lib/spreadsheet/worksheet.rb +279 -0
  45. data/lib/spreadsheet/writer.rb +30 -0
  46. data/ruby-spreadsheet.gemspec +126 -0
  47. data/test/data/test_changes.xls +0 -0
  48. data/test/data/test_copy.xls +0 -0
  49. data/test/data/test_datetime.xls +0 -0
  50. data/test/data/test_empty.xls +0 -0
  51. data/test/data/test_formula.xls +0 -0
  52. data/test/data/test_missing_row.xls +0 -0
  53. data/test/data/test_version_excel5.xls +0 -0
  54. data/test/data/test_version_excel95.xls +0 -0
  55. data/test/data/test_version_excel97.xls +0 -0
  56. data/test/excel/row.rb +35 -0
  57. data/test/excel/writer/worksheet.rb +23 -0
  58. data/test/font.rb +163 -0
  59. data/test/integration.rb +1281 -0
  60. data/test/row.rb +33 -0
  61. data/test/suite.rb +14 -0
  62. data/test/workbook.rb +21 -0
  63. data/test/worksheet.rb +80 -0
  64. metadata +203 -0
@@ -0,0 +1,17 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff5. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows.
7
+ module Biff5
8
+ BINARY_FORMATS = {
9
+ :dimensions => 'v5',
10
+ }
11
+ def binfmt key # :nodoc:
12
+ BINARY_FORMATS.fetch key do super end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff8. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows and more
7
+ # Binary formats are moved here for disambiguation.
8
+ module Biff8
9
+ BINARY_FORMATS = {
10
+ :bof => 'v4V2',
11
+ :dimensions => 'V2v2x2',
12
+ }
13
+ def binfmt key # :nodoc:
14
+ BINARY_FORMATS.fetch key do super end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ require 'spreadsheet/compatibility'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # This module is used to keep track of offsets in modified Excel documents.
7
+ # Considered internal and subject to change without notice.
8
+ module Offset
9
+ include Compatibility
10
+ attr_reader :changes, :offsets
11
+ def initialize *args
12
+ super
13
+ @changes = {}
14
+ @offsets = {}
15
+ end
16
+ def Offset.append_features mod
17
+ super
18
+ mod.module_eval do
19
+ class << self
20
+ include Compatibility
21
+ def offset *keys
22
+ keys.each do |key|
23
+ attr_reader key unless instance_methods.include? method_name(key)
24
+ define_method "#{key}=" do |value|
25
+ @changes.store key, true
26
+ instance_variable_set ivar_name(key), value
27
+ end
28
+ define_method "set_#{key}" do |value, pos, len|
29
+ instance_variable_set ivar_name(key), value
30
+ @offsets.store key, [pos, len]
31
+ havename = "have_set_#{key}"
32
+ send(havename, value, pos, len) if respond_to? havename
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,1173 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/link'
5
+ require 'spreadsheet/excel/error'
6
+ require 'spreadsheet/excel/internals'
7
+ require 'spreadsheet/excel/sst_entry'
8
+ require 'spreadsheet/excel/worksheet'
9
+
10
+ module Spreadsheet
11
+ module Excel
12
+ ##
13
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
14
+ # Excel-Record/Opcode. You should not need to call any of its methods
15
+ # directly. If you think you do, look at #read
16
+ class Reader
17
+ include Spreadsheet::Encodings
18
+ include Spreadsheet::Excel::Internals
19
+ ROW_BLOCK_OPS = {
20
+ :blank => true, :boolerr => true, :dbcell => true, :formula => true,
21
+ :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
22
+ :number => true, :rk => true, :rstring => true,
23
+ }
24
+ def initialize opts = {}
25
+ @pos = 0
26
+ @bigendian = opts.fetch(:bigendian) {
27
+ [1].pack('l') != "\001\000\000\000"
28
+ }
29
+ @opts = opts
30
+ @boundsheets = nil
31
+ @current_row_block = {}
32
+ @current_row_block_offset = nil
33
+ @formats = {}
34
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
35
+ end
36
+ def decode_rk work
37
+ # Bit Mask Contents
38
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
39
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
40
+ # 31-2 0xFFFFFFFC Encoded value
41
+ #
42
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
43
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
44
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
45
+ # encoded value represents a signed 30-bit integer value. To get the
46
+ # correct integer, the encoded value has to be shifted right arithmetically
47
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
48
+ # floating-point) must be divided by 100 to get the final result.
49
+ flags, = work.unpack 'C'
50
+ cent = flags & 1
51
+ int = flags & 2
52
+ value = 0
53
+ if int == 0
54
+ ## remove two bits
55
+ integer, = work.unpack 'V'
56
+ integer &= 0xfffffffc
57
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
58
+ else
59
+ ## I can't find a format for unpacking a little endian signed integer.
60
+ # 'V' works for packing, but not for unpacking. But the following works
61
+ # fine afaics:
62
+ unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
63
+ ## remove two bits
64
+ value = unsigned >> 2
65
+ end
66
+ if cent == 1
67
+ value /= 100.0
68
+ end
69
+ value
70
+ end
71
+ def encoding codepage_id
72
+ name = CODEPAGES.fetch(codepage_id) {
73
+ raise "Unknown Codepage 0x%04x" % codepage_id }
74
+ if RUBY_VERSION >= '1.9'
75
+ Encoding.find name
76
+ else
77
+ name
78
+ end
79
+ end
80
+ def get_next_chunk
81
+ pos = @pos
82
+ if pos < @data.size
83
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
84
+ @pos += OPCODE_SIZE
85
+ if len
86
+ work = @data[@pos,len]
87
+ @pos += len
88
+ code = SEDOCPO.fetch(op, op)
89
+ if io = @opts[:print_opcodes]
90
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
91
+ op, code.inspect, len, work.inspect)
92
+ end
93
+ [ pos, code, len + OPCODE_SIZE, work]
94
+ end
95
+ end
96
+ end
97
+ def in_row_block? op, previous
98
+ if op == :row
99
+ previous == op
100
+ else
101
+ ROW_BLOCK_OPS.include?(op)
102
+ end
103
+ end
104
+ def memoize?
105
+ @opts[:memoization]
106
+ end
107
+ def postread_workbook
108
+ sheets = @workbook.worksheets
109
+ sheets.each_with_index do |sheet, idx|
110
+ offset = sheet.offset
111
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
112
+ @workbook.offsets.store sheet, [offset, nxt - offset]
113
+ end
114
+ end
115
+ def postread_worksheet worksheet
116
+ end
117
+ ##
118
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
119
+ # loads additional reader-methods before proceeding with parsing the document.
120
+ def read io
121
+ setup io
122
+ read_workbook
123
+ @workbook.default_format = @workbook.format 0
124
+ @workbook.changes.clear
125
+ @workbook
126
+ end
127
+ def read_blank worksheet, addr, work
128
+ # Offset Size Contents
129
+ # 0 2 Index to row
130
+ # 2 2 Index to column
131
+ # 4 2 Index to XF record (➜ 6.115)
132
+ row, column, xf = work.unpack binfmt(:blank)
133
+ set_cell worksheet, row, column, xf
134
+ end
135
+ def read_bof
136
+ # Offset Size Contents
137
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
138
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
139
+ # 0x0006 = Visual Basic module
140
+ # 0x0010 = Worksheet
141
+ # 0x0020 = Chart
142
+ # 0x0040 = Macro sheet
143
+ # 0x0100 = Workspace file
144
+ # 4 2 Build identifier
145
+ # 6 2 Build year
146
+ # 8 4 File history flags
147
+ # 12 4 Lowest Excel version that can read all records in this file
148
+ pos, @bof, len, work = get_next_chunk
149
+ ## version and datatype are common to all Excel-Versions. Later versions
150
+ # have additional information such as build-id and -year (from BIFF5).
151
+ # These are ignored for the time being.
152
+ version, datatype = work.unpack('v2')
153
+ if datatype == 0x5
154
+ @version = version
155
+ end
156
+ end
157
+ def read_boolerr worksheet, addr, work
158
+ # Offset Size Contents
159
+ # 0 2 Index to row
160
+ # 2 2 Index to column
161
+ # 4 2 Index to XF record (➜ 6.115)
162
+ # 6 1 Boolean or error value (type depends on the following byte)
163
+ # 7 1 0 = Boolean value; 1 = Error code
164
+ row, column, xf, value, error = work.unpack 'v3C2'
165
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
166
+ end
167
+ def read_boundsheet work, pos, len
168
+ # Offset Size Contents
169
+ # 0 4 Absolute stream position of the BOF record of the sheet
170
+ # represented by this record. This field is never encrypted
171
+ # in protected files.
172
+ # 4 1 Visibility: 0x00 = Visible
173
+ # 0x01 = Hidden
174
+ # 0x02 = Strong hidden (see below)
175
+ # 5 1 Sheet type: 0x00 = Worksheet
176
+ # 0x02 = Chart
177
+ # 0x06 = Visual Basic module
178
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
179
+ # 8-bit string length (➜ 3.3)
180
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
181
+ offset, visibility, type = work.unpack("VC2")
182
+ name = client read_string(work[6..-1]), @workbook.encoding
183
+ if @boundsheets
184
+ @boundsheets[0] += 1
185
+ @boundsheets[2] += len
186
+ else
187
+ @boundsheets = [1, pos, len]
188
+ end
189
+ @workbook.set_boundsheets(*@boundsheets)
190
+ @workbook.add_worksheet Worksheet.new(:name => name,
191
+ :ole => @book,
192
+ :offset => offset,
193
+ :reader => self)
194
+ end
195
+ def read_codepage work, pos, len
196
+ codepage, _ = work.unpack 'v'
197
+ @workbook.set_encoding encoding(codepage), pos, len
198
+ end
199
+ def read_colinfo worksheet, work, pos, len
200
+ # Offset Size Contents
201
+ # 0 2 Index to first column in the range
202
+ # 2 2 Index to last column in the range
203
+ # 4 2 Width of the columns in 1/256 of the width of the zero
204
+ # character, using default font (first FONT record in the
205
+ # file)
206
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
207
+ # 8 2 Option flags:
208
+ # Bits Mask Contents
209
+ # 0 0x0001 1 = Columns are hidden
210
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
211
+ # 12 0x1000 1 = Columns are collapsed
212
+ # 10 2 Not used
213
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
214
+ first.upto last do |col|
215
+ column = Column.new col, @workbook.format(xf),
216
+ :width => width.to_f / 256,
217
+ :hidden => (opts & 0x0001) > 0,
218
+ :collapsed => (opts & 0x1000) > 0,
219
+ :outline_level => (opts & 0x0700) / 256
220
+ column.worksheet = worksheet
221
+ worksheet.columns[col] = column
222
+ end
223
+ end
224
+ def read_dimensions worksheet, work, pos, len
225
+ # Offset Size Contents
226
+ # 0 4 Index to first used row
227
+ # 4 4 Index to last used row, increased by 1
228
+ # 8 2 Index to first used column
229
+ # 10 2 Index to last used column, increased by 1
230
+ # 12 2 Not used
231
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
232
+ end
233
+ def read_font work, pos, len
234
+ # Offset Size Contents
235
+ # 0 2 Height of the font (in twips = 1/20 of a point)
236
+ # 2 2 Option flags:
237
+ # Bit Mask Contents
238
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
239
+ # 1 0x0002 1 = Characters are italic
240
+ # 2 0x0004 1 = Characters are underlined
241
+ # (redundant, see below)
242
+ # 3 0x0008 1 = Characters are struck out
243
+ # 4 0x0010 1 = Characters are outlined (djberger)
244
+ # 5 0x0020 1 = Characters are shadowed (djberger)
245
+ # 4 2 Colour index (➜ 6.70)
246
+ # 6 2 Font weight (100-1000). Standard values are
247
+ # 0x0190 (400) for normal text and
248
+ # 0x02bc (700) for bold text.
249
+ # 8 2 Escapement type: 0x0000 = None
250
+ # 0x0001 = Superscript
251
+ # 0x0002 = Subscript
252
+ # 10 1 Underline type: 0x00 = None
253
+ # 0x01 = Single
254
+ # 0x02 = Double
255
+ # 0x21 = Single accounting
256
+ # 0x22 = Double accounting
257
+ # 11 1 Font family:
258
+ # 0x00 = None (unknown or don't care)
259
+ # 0x01 = Roman (variable width, serifed)
260
+ # 0x02 = Swiss (variable width, sans-serifed)
261
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
262
+ # 0x04 = Script (cursive)
263
+ # 0x05 = Decorative (specialised,
264
+ # for example Old English, Fraktur)
265
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
266
+ # 0x01 = 1 = System default
267
+ # 0x02 = 2 = Symbol
268
+ # 0x4d = 77 = Apple Roman
269
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
270
+ # 0x81 = 129 = ANSI Korean (Hangul)
271
+ # 0x82 = 130 = ANSI Korean (Johab)
272
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
273
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
274
+ # 0xa1 = 161 = ANSI Greek
275
+ # 0xa2 = 162 = ANSI Turkish
276
+ # 0xa3 = 163 = ANSI Vietnamese
277
+ # 0xb1 = 177 = ANSI Hebrew
278
+ # 0xb2 = 178 = ANSI Arabic
279
+ # 0xba = 186 = ANSI Baltic
280
+ # 0xcc = 204 = ANSI Cyrillic
281
+ # 0xde = 222 = ANSI Thai
282
+ # 0xee = 238 = ANSI Latin II (Central European)
283
+ # 0xff = 255 = OEM Latin I
284
+ # 13 1 Not used
285
+ # 14 var. Font name:
286
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
287
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
288
+ name = client read_string(work[14..-1]), @workbook.encoding
289
+ font = Font.new name
290
+ size, opts, color, font.weight, escapement, underline,
291
+ family, encoding = work.unpack binfmt(:font)
292
+ font.size = size / TWIPS
293
+ font.italic = opts & 0x0002
294
+ font.strikeout = opts & 0x0008
295
+ font.outline = opts & 0x0010
296
+ font.shadow = opts & 0x0020
297
+ font.color = COLOR_CODES[color] || :text
298
+ font.escapement = ESCAPEMENT_TYPES[escapement]
299
+ font.underline = UNDERLINE_TYPES[underline]
300
+ font.family = FONT_FAMILIES[family]
301
+ font.encoding = FONT_ENCODINGS[encoding]
302
+ @workbook.add_font font
303
+ end
304
+ def read_format work, pos, len
305
+ # Offset Size Contents
306
+ # 0 2 Format index used in other records
307
+ # 2 var. Number format string
308
+ # (Unicode string, 16-bit string length, ➜ 3.4)
309
+ idx, = work.unpack 'v'
310
+ value = read_string work[2..-1], 2
311
+ @formats.store idx, client(value, @workbook.encoding)
312
+ end
313
+ def read_formula worksheet, addr, work
314
+ # Offset Size Contents
315
+ # 0 2 Index to row
316
+ # 2 2 Index to column
317
+ # 4 2 Index to XF record (➜ 6.115)
318
+ # 6 8 Result of the formula. See below for details.
319
+ # 14 2 Option flags:
320
+ # Bit Mask Contents
321
+ # 0 0x0001 1 = Recalculate always
322
+ # 1 0x0002 1 = Calculate on open
323
+ # 3 0x0008 1 = Part of a shared formula
324
+ # 16 4 Not used
325
+ # 20 var. Formula data (RPN token array, ➜ 4)
326
+ # Offset Size Contents
327
+ # 0 2 Size of the following formula data (sz)
328
+ # 2 sz Formula data (RPN token array)
329
+ # [2+sz] var. (optional) Additional data for specific tokens
330
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
331
+ #
332
+ # Result of the Formula
333
+ # Dependent on the type of value the formula returns, the result field has
334
+ # the following format:
335
+ #
336
+ # Result is a numeric value:
337
+ # Offset Size Contents
338
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
339
+ #
340
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
341
+ # Offset Size Contents
342
+ # 0 1 0x00 (identifier for a string value)
343
+ # 1 5 Not used
344
+ # 6 2 0xffff
345
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
346
+ # special identifier defined (see below).
347
+ #
348
+ # Result is a Boolean value:
349
+ # Offset Size Contents
350
+ # 0 1 0x01 (identifier for a Boolean value)
351
+ # 1 1 Not used
352
+ # 2 1 0 = FALSE, 1 = TRUE
353
+ # 3 3 Not used
354
+ # 6 2 0xffff
355
+ #
356
+ # Result is an error value:
357
+ # Offset Size Contents
358
+ # 0 1 0x02 (identifier for an error value)
359
+ # 1 1 Not used
360
+ # 2 1 Error code (➜ 3.7)
361
+ # 3 3 Not used
362
+ # 6 2 0xffff
363
+ #
364
+ # Result is an empty cell (BIFF8), for example an empty string:
365
+ # Offset Size Contents
366
+ # 0 1 0x03 (identifier for an empty cell)
367
+ # 1 5 Not used
368
+ # 6 2 0xffff
369
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
370
+ formula = Formula.new
371
+ formula.shared = (opts & 0x08) > 0
372
+ formula.data = work[20..-1]
373
+ if rcheck != 0xffff || rtype > 3
374
+ value, = work.unpack 'x6E'
375
+ unless value
376
+ # on architectures where sizeof(double) > 8
377
+ value, = work.unpack 'x6e'
378
+ end
379
+ formula.value = value
380
+ elsif rtype == 0
381
+ pos, op, len, work = get_next_chunk
382
+ if op == :sharedfmla
383
+ ## TODO: formula-support in 0.8.0
384
+ pos, op, len, work = get_next_chunk
385
+ end
386
+ if op == :string
387
+ formula.value = client read_string(work, 2), @workbook.encoding
388
+ else
389
+ warn "String Value expected after Formula, but got #{op}"
390
+ formula.value = Error.new 0x2a
391
+ @pos = pos
392
+ end
393
+ elsif rtype == 1
394
+ formula.value = rval > 0
395
+ elsif rtype == 2
396
+ formula.value = Error.new rval
397
+ else
398
+ # leave the Formula value blank
399
+ end
400
+ set_cell worksheet, row, column, xf, formula
401
+ end
402
+ def read_hlink worksheet, work, pos, len
403
+ # 6.53.1 Common Record Contents
404
+ # Offset Size Contents
405
+ # 0 8 Cell range address of all cells containing this hyperlink
406
+ # (➜ 3.13.1)
407
+ # 8 16 GUID of StdLink:
408
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
409
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
410
+ # 24 4 Unknown value: 0x00000002
411
+ # 28 4 Option flags (see below)
412
+ # Bit Mask Contents
413
+ # 0 0x00000001 0 = No link extant
414
+ # 1 = File link or URL
415
+ # 1 0x00000002 0 = Relative file path
416
+ # 1 = Absolute path or URL
417
+ # 2 and 4 0x00000014 0 = No description
418
+ # 1 (both bits) = Description
419
+ # 3 0x00000008 0 = No text mark
420
+ # 1 = Text mark
421
+ # 7 0x00000080 0 = No target frame
422
+ # 1 = Target frame
423
+ # 8 0x00000100 0 = File link or URL
424
+ # 1 = UNC path (incl. server name)
425
+ #--------------------------------------------------------------------------
426
+ # [32] 4 (optional, see option flags) Character count of description
427
+ # text, including trailing zero word (dl)
428
+ # [36] 2∙dl (optional, see option flags) Character array of description
429
+ # text, no Unicode string header, always 16-bit characters,
430
+ # zero-terminated
431
+ #--------------------------------------------------------------------------
432
+ # [var.] 4 (optional, see option flags) Character count of target
433
+ # frame, including trailing zero word (fl)
434
+ # [var.] 2∙fl (optional, see option flags) Character array of target
435
+ # frame, no Unicode string header, always 16-bit characters,
436
+ # zero-terminated
437
+ #--------------------------------------------------------------------------
438
+ # var. var. Special data (➜ 6.53.2 and following)
439
+ #--------------------------------------------------------------------------
440
+ # [var.] 4 (optional, see option flags) Character count of the text
441
+ # mark, including trailing zero word (tl)
442
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
443
+ # mark without “#” sign, no Unicode string header, always
444
+ # 16-bit characters, zero-terminated
445
+ firstrow, lastrow, firstcol, lastcol, guid, opts = work.unpack 'v4H32x4V'
446
+ has_link = opts & 0x0001
447
+ absolute = opts & 0x0002
448
+ desc = opts & 0x0014
449
+ textmark = opts & 0x0008
450
+ target = opts & 0x0080
451
+ unc = opts & 0x0100
452
+ link = Link.new
453
+ url, description = nil
454
+ pos = 32
455
+ if desc > 0
456
+ description, pos = read_hlink_string work, pos
457
+ link << description
458
+ end
459
+ if target > 0
460
+ link.target_frame, pos = read_hlink_string work, pos
461
+ end
462
+ if unc > 0
463
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
464
+ # These data fields are for UNC paths containing a server name (for
465
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
466
+ # flags field must be 1.x00x.xx112.
467
+ # Offset Size Contents
468
+ # 0 4 Character count of the UNC,
469
+ # including trailing zero word (fl)
470
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
471
+ # always 16-bit characters, zeroterminated.
472
+ link.url, pos = read_hlink_string work, pos
473
+ elsif has_link > 0
474
+ uid, = work.unpack "x#{pos}H32"
475
+ pos += 16
476
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
477
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
478
+ # These data fields occur for links which are not local files or files
479
+ # in the local network (for instance HTTP and FTP links and e-mail
480
+ # addresses). The lower 9 bits of the option flags field must be
481
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
482
+ # GUID could be used to distinguish a URL from a file link.
483
+ # Offset Size Contents
484
+ # 0 16 GUID of URL Moniker:
485
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
486
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
487
+ # 16 4 Size of character array of the URL, including trailing
488
+ # zero word (us). There are us/2-1 characters in the
489
+ # following string.
490
+ # 20 us Character array of the URL, no Unicode string header,
491
+ # always 16-bit characters, zeroterminated
492
+ size, = work.unpack "x#{pos}V"
493
+ pos += 4
494
+ data = work[pos, size].chomp "\000\000"
495
+ link.url = client data
496
+ pos += size
497
+ else
498
+ # 6.53.3 Hyperlink to a Local File
499
+ # These data fields are for links to files on local drives. The path of
500
+ # the file can be complete with drive letter (absolute) or relative to
501
+ # the location of the workbook. The lower 9 bits of the option flags
502
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
503
+ # URL from a file link.
504
+ # Offset Size Contents
505
+ # 0 16 GUID of File Moniker:
506
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
507
+ # (00000303-0000-0000-C000-000000000046)
508
+ # 16 2 Directory up-level count. Each leading “..\” in the
509
+ # file link is deleted and increases this counter.
510
+ # 18 4 Character count of the shortened file path and name,
511
+ # including trailing zero byte (sl)
512
+ # 22 sl Character array of the shortened file path and name in
513
+ # 8.3-DOS-format. This field can be filled with a long
514
+ # file name too. No Unicode string header, always 8-bit
515
+ # characters, zeroterminated.
516
+ # 22+sl 24 Unknown byte sequence:
517
+ # FF FF AD DE 00 00 00 00
518
+ # 00 00 00 00 00 00 00 00
519
+ # 00 00 00 00 00 00 00 00
520
+ # 46+sl 4 Size of the following file link field including string
521
+ # length field and additional data field (sz). If sz is
522
+ # zero, nothing will follow (except a text mark).
523
+ # [50+sl] 4 (optional) Size of character array of the extended file
524
+ # path and name (xl). There are xl/2 characters in the
525
+ # following string.
526
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
527
+ # [56+sl] xl (optional) Character array of the extended file path
528
+ # and name (xl), no Unicode string header, always 16-bit
529
+ # characters, not zero-terminated
530
+ uplevel, count = work.unpack "x#{pos}vV"
531
+ pos += 6
532
+ # TODO: short file path may have any of the OEM encodings. Find out which
533
+ # and use the #client method to convert the encoding.
534
+ prefix = internal('..\\', 'UTF-8') * uplevel
535
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
536
+ pos += count + 24
537
+ total, size = work.unpack "x#{pos}V2"
538
+ pos += 10
539
+ if total > 0
540
+ link.url = client work[pos, size]
541
+ pos += size
542
+ end
543
+ end
544
+ else
545
+ # 6.53.5 Hyperlink to the Current Workbook
546
+ # In this case only the text mark field is present (optional with
547
+ # description).
548
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
549
+ # current workbook.
550
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
551
+ end
552
+ if textmark > 0
553
+ link.fragment, _ = read_hlink_string work, pos
554
+ end
555
+ if link.empty?
556
+ link << link.href
557
+ end
558
+ firstrow.upto lastrow do |row|
559
+ firstcol.upto lastcol do |col|
560
+ worksheet.add_link row, col, link
561
+ end
562
+ end
563
+ end
564
+ def read_hlink_string work, pos
565
+ count, = work.unpack "x#{pos}V"
566
+ len = count * 2
567
+ pos += 4
568
+ data = work[pos, len].chomp "\000\000"
569
+ pos += len
570
+ [client(data, 'UTF-16LE'), pos]
571
+ end
572
+ def read_index worksheet, work, pos, len
573
+ # Offset Size Contents
574
+ # 0 4 Not used
575
+ # 4 4 Index to first used row (rf, 0-based)
576
+ # 8 4 Index to first row of unused tail of sheet
577
+ # (rl, last used row + 1, 0-based)
578
+ # 12 4 Absolute stream position of the
579
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
580
+ # record does not exist, the offset points to the record at
581
+ # the position where the DEFCOLWIDTH record would occur.
582
+ # 16 4∙nm Array of nm absolute stream positions to the
583
+ # DBCELL record (➜ 6.26) of each Row Block
584
+ # TODO: use the index if it exists
585
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
586
+ end
587
+ def read_label worksheet, addr, work
588
+ # Offset Size Contents
589
+ # 0 2 Index to row
590
+ # 2 2 Index to column
591
+ # 4 2 Index to XF record (➜ 6.115)
592
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
593
+ row, column, xf = work.unpack 'v3'
594
+ value = client read_string(work[6..-1], 2), @workbook.encoding
595
+ set_cell worksheet, row, column, xf, value
596
+ end
597
+ def read_labelsst worksheet, addr, work
598
+ # Offset Size Contents
599
+ # 0 2 Index to row
600
+ # 2 2 Index to column
601
+ # 4 2 Index to XF record (➜ 6.115)
602
+ # 6 4 Index into SST record (➜ 6.96)
603
+ row, column, xf, index = work.unpack binfmt(:labelsst)
604
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
605
+ end
606
+ def read_mulblank worksheet, addr, work
607
+ # Offset Size Contents
608
+ # 0 2 Index to row
609
+ # 2 2 Index to first column (fc)
610
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
611
+ # 4+2∙nc 2 Index to last column (lc)
612
+ row, column, *xfs = work.unpack 'v*'
613
+ last_column = xfs.pop # unused
614
+ xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
615
+ end
616
+ def read_mulrk worksheet, addr, work
617
+ # Offset Size Contents
618
+ # 0 2 Index to row
619
+ # 2 2 Index to first column (fc)
620
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
621
+ # Offset Size Contents
622
+ # 0 2 Index to XF record (➜ 6.115)
623
+ # 2 4 RK value (➜ 3.6)
624
+ # 4+6∙nc 2 Index to last column (lc)
625
+ row, column = work.unpack 'v2'
626
+ 4.step(work.size - 6, 6) do |idx|
627
+ xf, = work.unpack "x#{idx}v"
628
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
629
+ column += 1
630
+ end
631
+ end
632
+ def read_number worksheet, addr, work
633
+ # Offset Size Contents
634
+ # 0 2 Index to row
635
+ # 2 2 Index to column
636
+ # 4 2 Index to XF record (➜ 6.115)
637
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
638
+ row, column, xf, value = work.unpack binfmt(:number)
639
+ set_cell worksheet, row, column, xf, value
640
+ end
641
+ def read_rk worksheet, addr, work
642
+ # Offset Size Contents
643
+ # 0 2 Index to row
644
+ # 2 2 Index to column
645
+ # 4 2 Index to XF record (➜ 6.115)
646
+ # 6 4 RK value (➜ 3.6)
647
+ row, column, xf = work.unpack 'v3'
648
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
649
+ end
650
+ def read_row worksheet, addr
651
+ row = addr[:index]
652
+ @current_row_block.fetch [worksheet, row] do
653
+ @current_row_block.clear
654
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
655
+ @pos = addr[:offset]
656
+ found = false
657
+ while tuple = get_next_chunk
658
+ pos, op, len, work = tuple
659
+ case op
660
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
661
+ # one Row-Block
662
+ @pos = pos
663
+ return cells
664
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
665
+ return cells
666
+ when :row # ○○ Row Blocks ➜ 5.7
667
+ # ● ROW ➜ 6.83
668
+ # ignore, we already did these in read_worksheet
669
+ return cells if found
670
+ when :blank # BLANK ➜ 6.7
671
+ found = true
672
+ read_blank worksheet, addr, work
673
+ when :boolerr # BOOLERR ➜ 6.10
674
+ found = true
675
+ read_boolerr worksheet, addr, work
676
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
677
+ found = true
678
+ # TODO: implement for BIFF2 support
679
+ when :formula # FORMULA ➜ 6.46
680
+ found = true
681
+ read_formula worksheet, addr, work
682
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
683
+ found = true
684
+ read_label worksheet, addr, work
685
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
686
+ found = true
687
+ read_labelsst worksheet, addr, work
688
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
689
+ found = true
690
+ read_mulblank worksheet, addr, work
691
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
692
+ found = true
693
+ read_mulrk worksheet, addr, work
694
+ when :number # NUMBER ➜ 6.68
695
+ found = true
696
+ read_number worksheet, addr, work
697
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
698
+ found = true
699
+ read_rk worksheet, addr, work
700
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
701
+ found = true
702
+ read_rstring worksheet, addr, work
703
+ end
704
+ end
705
+ cells
706
+ end
707
+ end
708
+ def read_rstring worksheet, addr, work
709
+ # Offset Size Contents
710
+ # 0 2 Index to row
711
+ # 2 2 Index to column
712
+ # 4 2 Index to XF record (➜ 6.115)
713
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
714
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
715
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
716
+ row, column, xf = work.unpack 'v3'
717
+ value = client read_string(work[6..-1], 2), @workbook.encoding
718
+ set_cell worksheet, row, column, xf, value
719
+ end
720
+ def read_window2 worksheet, work, pos, len
721
+ # This record contains additional settings for the document window
722
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
723
+ # It is part of the Sheet View Settings Block (➜ 4.5).
724
+ # Offset Size Contents
725
+ # 0 2 Option flags:
726
+ # Bits Mask Contents
727
+ # 0 0x0001 0 = Show formula results
728
+ # 1 = Show formulas
729
+ # 1 0x0002 0 = Do not show grid lines
730
+ # 1 = Show grid lines
731
+ # 2 0x0004 0 = Do not show sheet headers
732
+ # 1 = Show sheet headers
733
+ # 3 0x0008 0 = Panes are not frozen
734
+ # 1 = Panes are frozen (freeze)
735
+ # 4 0x0010 0 = Show zero values as empty cells
736
+ # 1 = Show zero values
737
+ # 5 0x0020 0 = Manual grid line colour
738
+ # 1 = Automatic grid line colour
739
+ # 6 0x0040 0 = Columns from left to right
740
+ # 1 = Columns from right to left
741
+ # 7 0x0080 0 = Do not show outline symbols
742
+ # 1 = Show outline symbols
743
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
744
+ # 1 = Remove splits if pane freeze is removed
745
+ # 9 0x0200 0 = Sheet not selected
746
+ # 1 = Sheet selected (BIFF5-BIFF8)
747
+ # 10 0x0400 0 = Sheet not active
748
+ # 1 = Sheet active (BIFF5-BIFF8)
749
+ # 11 0x0800 0 = Show in normal view
750
+ # 1 = Show in page break preview (BIFF8)
751
+ # 2 2 Index to first visible row
752
+ # 4 2 Index to first visible column
753
+ # 6 2 Colour index of grid line colour (➜ 5.74).
754
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
755
+ # 8 2 Not used
756
+ # 10 2 Cached magnification factor in page break preview (in percent)
757
+ # 0 = Default (60%)
758
+ # 12 2 Cached magnification factor in normal view (in percent)
759
+ # 0 = Default (100%)
760
+ # 14 4 Not used
761
+ flags, _ = work.unpack 'v'
762
+ worksheet.selected = flags & 0x0200 > 0
763
+ end
764
+ def read_workbook
765
+ worksheet = nil
766
+ previous_op = nil
767
+ while tuple = get_next_chunk
768
+ pos, op, len, work = tuple
769
+ case op
770
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
771
+ return
772
+ when :eof # ● EOF ➜ 6.36
773
+ postread_workbook
774
+ return
775
+ when :datemode # ○ DATEMODE ➜ 6.25
776
+ flag, _ = work.unpack 'v'
777
+ if flag == 1
778
+ @workbook.date_base = DateTime.new 1904, 1, 1
779
+ else
780
+ @workbook.date_base = DateTime.new 1899, 12, 31
781
+ end
782
+ when :continue # ○ CONTINUE ➜ 6.22
783
+ case previous_op
784
+ when :sst # ● SST ➜ 6.96
785
+ continue_sst work, pos, len
786
+ end
787
+ when :codepage # ○ CODEPAGE ➜ 6.17
788
+ read_codepage work, pos, len
789
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
790
+ read_boundsheet work, pos, len
791
+ when :xf # ●● XF ➜ 6.115
792
+ read_xf work, pos, len
793
+ when :sst # ○ Shared String Table ➜ 5.11
794
+ # ● SST ➜ 6.96
795
+ read_sst work, pos, len
796
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
797
+ # with EXTSST
798
+ when :extsst # ● EXTSST ➜ 6.40
799
+ read_extsst work, pos, len
800
+ when :style # ●● STYLE ➜ 6.99
801
+ read_style work, pos, len
802
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
803
+ read_format work, pos, len
804
+ when :font # ●● FONT ➜ 6.43
805
+ read_font work, pos, len
806
+ end
807
+ previous_op = op unless op == :continue
808
+ end
809
+ end
810
+ def read_worksheet worksheet, offset
811
+ @pos = offset
812
+ @detected_rows = {}
813
+ previous = nil
814
+ while tuple = get_next_chunk
815
+ pos, op, len, work = tuple
816
+ if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
817
+ @current_row_block_offset = nil
818
+ offset[1] = pos - offset[0]
819
+ end
820
+ case op
821
+ when :eof # ● EOF ➜ 6.36
822
+ postread_worksheet worksheet
823
+ return
824
+ #when :uncalced # ○ UNCALCED ➜ 6.104
825
+ # TODO: Formula support. Values were not calculated before saving
826
+ #warn <<-EOS
827
+ # Some fields containig formulas were saved without a computed value.
828
+ # Support Spreadsheet::Excel by implementing formula-calculations!
829
+ #EOS
830
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
831
+ # TODO: if there are changes in rows, omit index when writing
832
+ #read_index worksheet, work, pos, len
833
+ when :guts # GUTS 5.53
834
+ read_guts worksheet, work, pos, len
835
+ when :colinfo # ○○ COLINFO ➜ 6.18
836
+ read_colinfo worksheet, work, pos, len
837
+ when :dimensions # ● DIMENSIONS ➜ 6.31
838
+ read_dimensions worksheet, work, pos, len
839
+ when :row # ○○ Row Blocks ➜ 5.7
840
+ # ● ROW ➜ 6.83
841
+ set_row_address worksheet, work, pos, len
842
+ when :hlink
843
+ read_hlink worksheet, work, pos, len
844
+ when :window2
845
+ read_window2 worksheet, work, pos, len
846
+ else
847
+ if ROW_BLOCK_OPS.include?(op)
848
+ set_missing_row_address worksheet, work, pos, len
849
+ end
850
+ end
851
+ previous = op
852
+ end
853
+ end
854
+ def read_guts worksheet, work, pos, len
855
+ # Offset Size Contents
856
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
857
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
858
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
859
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
860
+ width, height, row_level, col_level = work.unpack 'v4'
861
+ worksheet.guts[:width] = width
862
+ worksheet.guts[:height] = height
863
+ worksheet.guts[:row_level] = row_level
864
+ worksheet.guts[:col_level] = col_level
865
+ end
866
+ def read_style work, pos, len
867
+ # User-Defined Cell Styles:
868
+ # Offset Size Contents
869
+ # 0 2 Bit Mask Contents
870
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
871
+ # 15 0x8000 Always 0 for user-defined styles
872
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
873
+ # 8-bit string length (➜ 3.3)
874
+ # BIFF8: Non-empty Unicode string,
875
+ # 16-bit string length (➜ 3.4)
876
+ #
877
+ # Built-In Cell Styles
878
+ # Offset Size Contents
879
+ # 0 2 Bit Mask Contents
880
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
881
+ # 15 0x8000 Always 1 for built-in styles
882
+ # 2 1 Identifier of the built-in cell style:
883
+ # 0x00 = Normal
884
+ # 0x01 = RowLevel_lv (see next field)
885
+ # 0x02 = ColLevel_lv (see next field)
886
+ # 0x03 = Comma
887
+ # 0x04 = Currency
888
+ # 0x05 = Percent
889
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
890
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
891
+ # 0x08 = Hyperlink (BIFF8)
892
+ # 0x09 = Followed Hyperlink (BIFF8)
893
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
894
+ # FFH otherwise
895
+ flags, = work.unpack 'v'
896
+ xf_idx = flags & 0x0fff
897
+ xf = @workbook.format xf_idx
898
+ builtin = flags & 0x8000
899
+ if builtin == 0
900
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
901
+ else
902
+ id, level = work.unpack 'x2C2'
903
+ if name = BUILTIN_STYLES[id]
904
+ name.sub '_lv', "_#{level.to_s}"
905
+ xf.name = client name, 'UTF-8'
906
+ end
907
+ end
908
+ end
909
+ def read_xf work, pos, len
910
+ # Offset Size Contents
911
+ # 0 2 Index to FONT record (➜ 6.43)
912
+ # 2 2 Index to FORMAT record (➜ 6.45)
913
+ # 4 2 Bit Mask Contents
914
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
915
+ # Bit Mask Contents
916
+ # 0 0x01 1 = Cell is locked
917
+ # 1 0x02 1 = Formula is hidden
918
+ # 2 0x04 0 = Cell XF; 1 = Style XF
919
+ # 15-4 0xfff0 Index to parent style XF
920
+ # (always 0xfff in style XFs)
921
+ # 6 1 Bit Mask Contents
922
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
923
+ # Value Horizontal alignment
924
+ # 0x00 General
925
+ # 0x01 Left
926
+ # 0x02 Centred
927
+ # 0x03 Right
928
+ # 0x04 Filled
929
+ # 0x05 Justified (BIFF4-BIFF8X)
930
+ # 0x06 Centred across selection
931
+ # (BIFF4-BIFF8X)
932
+ # 0x07 Distributed (BIFF8X)
933
+ # 3 0x08 1 = Text is wrapped at right border
934
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
935
+ # Value Vertical alignment
936
+ # 0x00 Top
937
+ # 0x01 Centred
938
+ # 0x02 Bottom
939
+ # 0x03 Justified (BIFF5-BIFF8X)
940
+ # 0x04 Distributed (BIFF8X)
941
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
942
+ # Value Text rotation
943
+ # 0 Not rotated
944
+ # 1-90 1 to 90 degrees counterclockwise
945
+ # 91-180 1 to 90 degrees clockwise
946
+ # 255 Letters are stacked top-to-bottom,
947
+ # but not rotated
948
+ # 8 1 Bit Mask Contents
949
+ # 3-0 0x0f Indent level
950
+ # 4 0x10 1 = Shrink content to fit into cell
951
+ # 5 0x40 1 = Merge Range (djberger)
952
+ # 7-6 0xc0 Text direction (BIFF8X only)
953
+ # 0 = According to context
954
+ # 1 = Left-to-right
955
+ # 2 = Right-to-left
956
+ # 9 1 Bit Mask Contents
957
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
958
+ # Each bit describes the validity of a
959
+ # specific group of attributes. In cell XFs
960
+ # a cleared bit means the attributes of the
961
+ # parent style XF are used (but only if the
962
+ # attributes are valid there), a set bit
963
+ # means the attributes of this XF are used.
964
+ # In style XFs a cleared bit means the
965
+ # attribute setting is valid, a set bit
966
+ # means the attribute should be ignored.
967
+ # Bit Mask Contents
968
+ # 0 0x01 Flag for number format
969
+ # 1 0x02 Flag for font
970
+ # 2 0x04 Flag for horizontal and
971
+ # vertical alignment, text wrap,
972
+ # indentation, orientation,
973
+ # rotation, and text direction
974
+ # 3 0x08 Flag for border lines
975
+ # 4 0x10 Flag for background area style
976
+ # 5 0x20 Flag for cell protection (cell
977
+ # locked and formula hidden)
978
+ # 10 4 Cell border lines and background area:
979
+ # Bit Mask Contents
980
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
981
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
982
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
983
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
984
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
985
+ # for left line colour
986
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
987
+ # for right line colour
988
+ # 30 0x40000000 1 = Diagonal line
989
+ # from top left to right bottom
990
+ # 31 0x80000000 1 = Diagonal line
991
+ # from bottom left to right top
992
+ # 14 4 Bit Mask Contents
993
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
994
+ # for top line colour
995
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
996
+ # for bottom line colour
997
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
998
+ # for diagonal line colour
999
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1000
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1001
+ # 18 2 Bit Mask Contents
1002
+ # 6-0 0x007f Colour index (➜ 6.70)
1003
+ # for pattern colour
1004
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1005
+ # for pattern background
1006
+ fmt = Format.new
1007
+ font_idx, numfmt, xf_type, xf_align, xf_rotation, xf_indent, xf_used_attr,
1008
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1009
+ fmt.number_format = @formats[numfmt]
1010
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1011
+ # with a 0-based index, but all subsequent font indices are 1-based.
1012
+ fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1013
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1014
+ fmt.text_wrap = xf_align & 0x08 > 0
1015
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1016
+ fmt.rotation = if xf_rotation == 255
1017
+ :stacked
1018
+ elsif xf_rotation > 90
1019
+ 90 - xf_rotation
1020
+ else
1021
+ xf_rotation
1022
+ end
1023
+ fmt.indent_level = xf_indent & 0x0f
1024
+ fmt.shrink = xf_indent & 0x10 > 0
1025
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1026
+ fmt.left = xf_borders & 0x0000000f > 0
1027
+ fmt.right = xf_borders & 0x000000f0 > 0
1028
+ fmt.top = xf_borders & 0x00000f00 > 0
1029
+ fmt.bottom = xf_borders & 0x0000f000 > 0
1030
+ fmt.left_color = COLOR_CODES[xf_borders & 0x007f0000] || :border
1031
+ fmt.right_color = COLOR_CODES[xf_borders & 0x3f800000] || :border
1032
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1033
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1034
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :border
1035
+ fmt.bottom_color = COLOR_CODES[xf_brdcolors & 0x00003f80] || :border
1036
+ fmt.diagonal_color = COLOR_CODES[xf_brdcolors & 0x001fc000] || :border
1037
+ #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1038
+ fmt.pattern = xf_brdcolors & 0xfc000000
1039
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1040
+ fmt.pattern_bg_color = COLOR_CODES[xf_pattern & 0x3f80] || :pattern_bg
1041
+ @workbook.add_format fmt
1042
+ end
1043
+ def set_cell worksheet, row, column, xf, value=nil
1044
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1045
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1046
+ cells[column] = value
1047
+ end
1048
+ def set_missing_row_address worksheet, work, pos, len
1049
+ # Offset Size Contents
1050
+ # 0 2 Index of this row
1051
+ # 2 2 Index to this column
1052
+ row_index, column_index = work.unpack 'v2'
1053
+ unless worksheet.offsets[row_index]
1054
+ @current_row_block_offset ||= [pos]
1055
+ data = {
1056
+ :index => row_index,
1057
+ :row_block => @current_row_block_offset,
1058
+ :offset => @current_row_block_offset[0],
1059
+ }
1060
+ worksheet.set_row_address row_index, data
1061
+ end
1062
+ end
1063
+ def set_row_address worksheet, work, pos, len
1064
+ # Offset Size Contents
1065
+ # 0 2 Index of this row
1066
+ # 2 2 Index to column of the first cell which
1067
+ # is described by a cell record
1068
+ # 4 2 Index to column of the last cell which is
1069
+ # described by a cell record, increased by 1
1070
+ # 6 2 Bit Mask Contents
1071
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1072
+ # 15 0x8000 0 = Row has custom height;
1073
+ # 1 = Row has default height
1074
+ # 8 2 Not used
1075
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1076
+ # calculate stream position of the first cell record for this
1077
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1078
+ # anymore, but the DBCELL record (➜ 6.26) instead.
1079
+ # 12 4 Option flags and default row formatting:
1080
+ # Bit Mask Contents
1081
+ # 2-0 0x00000007 Outline level of the row
1082
+ # 4 0x00000010 1 = Outline group starts or ends here
1083
+ # (depending on where the outline
1084
+ # buttons are located, see WSBOOL
1085
+ # record, ➜ 6.113), and is collapsed
1086
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1087
+ # filter or outline group)
1088
+ # 6 0x00000040 1 = Row height and default font height
1089
+ # do not match
1090
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1091
+ # 8 0x00000100 Always 1
1092
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1093
+ # (➜ 6.115)
1094
+ # 28 0x10000000 1 = Additional space above the row.
1095
+ # This flag is set, if the upper
1096
+ # border of at least one cell in this
1097
+ # row or if the lower border of at
1098
+ # least one cell in the row above is
1099
+ # formatted with a thick line style.
1100
+ # Thin and medium line styles are not
1101
+ # taken into account.
1102
+ # 29 0x20000000 1 = Additional space below the row.
1103
+ # This flag is set, if the lower
1104
+ # border of at least one cell in this
1105
+ # row or if the upper border of at
1106
+ # least one cell in the row below is
1107
+ # formatted with a medium or thick
1108
+ # line style. Thin line styles are
1109
+ # not taken into account.
1110
+ @current_row_block_offset ||= [pos]
1111
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1112
+ height &= 0x7fff
1113
+ format = nil
1114
+ # TODO: read attributes from work[13,3], read flags
1115
+ attrs = {
1116
+ :default_format => format,
1117
+ :first_used => first_used,
1118
+ :first_unused => first_unused,
1119
+ :index => index,
1120
+ :row_block => @current_row_block_offset,
1121
+ :offset => @current_row_block_offset[0],
1122
+ :outline_level => flags & 0x00000007,
1123
+ :collapsed => (flags & 0x0000010) > 0,
1124
+ :hidden => (flags & 0x0000020) > 0,
1125
+ }
1126
+ if (flags & 0x00000040) > 0
1127
+ attrs.store :height, height / TWIPS
1128
+ end
1129
+ if (flags & 0x00000080) > 0
1130
+ xf = (flags & 0x0fff0000) >> 16
1131
+ attrs.store :default_format, @workbook.format(xf)
1132
+ end
1133
+ # TODO: Row spacing
1134
+ worksheet.set_row_address index, attrs
1135
+ end
1136
+ def setup io
1137
+ ## Reading from StringIO fails without forced encoding
1138
+ if io.respond_to?(:string) && (str = io.string) \
1139
+ && str.respond_to?(:force_encoding)
1140
+ str.force_encoding 'ASCII-8BIT'
1141
+ end
1142
+ ##
1143
+ io.rewind
1144
+ @ole = Ole::Storage.open io
1145
+ @workbook = Workbook.new io, {}
1146
+ %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1147
+ @book = @ole.file.open(name) rescue false
1148
+ end
1149
+ @data = @book.read
1150
+ read_bof
1151
+ @workbook.ole = @book
1152
+ @workbook.bof = @bof
1153
+ @workbook.version = @version
1154
+ biff = @workbook.biff_version
1155
+ extend_reader biff
1156
+ extend_internals biff
1157
+ end
1158
+ private
1159
+ def extend_internals version
1160
+ require 'spreadsheet/excel/internals/biff%i' % version
1161
+ extend Internals.const_get('Biff%i' % version)
1162
+ ## spreadsheets may not include a codepage record.
1163
+ @workbook.encoding = encoding 850 if version < 8
1164
+ rescue LoadError
1165
+ end
1166
+ def extend_reader version
1167
+ require 'spreadsheet/excel/reader/biff%i' % version
1168
+ extend Reader.const_get('Biff%i' % version)
1169
+ rescue LoadError
1170
+ end
1171
+ end
1172
+ end
1173
+ end