keeguon-spreadsheet 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +619 -0
  3. data/Manifest.txt +85 -0
  4. data/bin/xlsopcodes +18 -0
  5. data/lib/parseexcel.rb +27 -0
  6. data/lib/parseexcel/parseexcel.rb +75 -0
  7. data/lib/parseexcel/parser.rb +11 -0
  8. data/lib/spreadsheet.rb +80 -0
  9. data/lib/spreadsheet/column.rb +71 -0
  10. data/lib/spreadsheet/compatibility.rb +23 -0
  11. data/lib/spreadsheet/datatypes.rb +161 -0
  12. data/lib/spreadsheet/encodings.rb +57 -0
  13. data/lib/spreadsheet/excel.rb +88 -0
  14. data/lib/spreadsheet/excel/error.rb +26 -0
  15. data/lib/spreadsheet/excel/internals.rb +458 -0
  16. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  17. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  18. data/lib/spreadsheet/excel/offset.rb +41 -0
  19. data/lib/spreadsheet/excel/password_hash.rb +24 -0
  20. data/lib/spreadsheet/excel/reader.rb +1302 -0
  21. data/lib/spreadsheet/excel/reader/biff5.rb +42 -0
  22. data/lib/spreadsheet/excel/reader/biff8.rb +231 -0
  23. data/lib/spreadsheet/excel/rgb.rb +122 -0
  24. data/lib/spreadsheet/excel/row.rb +98 -0
  25. data/lib/spreadsheet/excel/sst_entry.rb +46 -0
  26. data/lib/spreadsheet/excel/workbook.rb +80 -0
  27. data/lib/spreadsheet/excel/worksheet.rb +115 -0
  28. data/lib/spreadsheet/excel/writer.rb +1 -0
  29. data/lib/spreadsheet/excel/writer/biff8.rb +75 -0
  30. data/lib/spreadsheet/excel/writer/format.rb +264 -0
  31. data/lib/spreadsheet/excel/writer/n_worksheet.rb +888 -0
  32. data/lib/spreadsheet/excel/writer/workbook.rb +735 -0
  33. data/lib/spreadsheet/excel/writer/worksheet.rb +940 -0
  34. data/lib/spreadsheet/font.rb +115 -0
  35. data/lib/spreadsheet/format.rb +209 -0
  36. data/lib/spreadsheet/formula.rb +9 -0
  37. data/lib/spreadsheet/helpers.rb +11 -0
  38. data/lib/spreadsheet/link.rb +43 -0
  39. data/lib/spreadsheet/note.rb +23 -0
  40. data/lib/spreadsheet/noteObject.rb +17 -0
  41. data/lib/spreadsheet/row.rb +151 -0
  42. data/lib/spreadsheet/workbook.rb +143 -0
  43. data/lib/spreadsheet/worksheet.rb +326 -0
  44. data/lib/spreadsheet/writer.rb +30 -0
  45. data/test/data/test_adding_data_to_existing_file.xls +0 -0
  46. data/test/data/test_borders.xls +0 -0
  47. data/test/data/test_changes.xls +0 -0
  48. data/test/data/test_comment.xls +0 -0
  49. data/test/data/test_copy.xls +0 -0
  50. data/test/data/test_datetime.xls +0 -0
  51. data/test/data/test_empty.xls +0 -0
  52. data/test/data/test_formula.xls +0 -0
  53. data/test/data/test_long_sst_record.xls +0 -0
  54. data/test/data/test_margin.xls +0 -0
  55. data/test/data/test_merged_and_protected.xls +0 -0
  56. data/test/data/test_merged_cells.xls +0 -0
  57. data/test/data/test_missing_row.xls +0 -0
  58. data/test/data/test_pagesetup.xls +0 -0
  59. data/test/data/test_version_excel5.xls +0 -0
  60. data/test/data/test_version_excel95.xls +0 -0
  61. data/test/data/test_version_excel97.xls +0 -0
  62. data/test/data/test_version_excel97_2010.xls +0 -0
  63. data/test/data/test_worksheet_visibility.xls +0 -0
  64. data/test/excel/reader.rb +30 -0
  65. data/test/excel/row.rb +40 -0
  66. data/test/excel/writer/workbook.rb +95 -0
  67. data/test/excel/writer/worksheet.rb +81 -0
  68. data/test/font.rb +163 -0
  69. data/test/format.rb +95 -0
  70. data/test/integration.rb +1390 -0
  71. data/test/row.rb +33 -0
  72. data/test/suite.rb +18 -0
  73. data/test/workbook.rb +55 -0
  74. data/test/workbook_protection.rb +19 -0
  75. data/test/worksheet.rb +112 -0
  76. metadata +148 -0
@@ -0,0 +1,17 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff5. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows.
7
+ module Biff5
8
+ BINARY_FORMATS = {
9
+ :dimensions => 'v5',
10
+ }
11
+ def binfmt key # :nodoc:
12
+ BINARY_FORMATS.fetch key do super end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff8. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows and more
7
+ # Binary formats are moved here for disambiguation.
8
+ module Biff8
9
+ BINARY_FORMATS = {
10
+ :bof => 'v4V2',
11
+ :dimensions => 'V2v2x2',
12
+ }
13
+ def binfmt key # :nodoc:
14
+ BINARY_FORMATS.fetch key do super end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ require 'spreadsheet/compatibility'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # This module is used to keep track of offsets in modified Excel documents.
7
+ # Considered internal and subject to change without notice.
8
+ module Offset
9
+ include Compatibility
10
+ attr_reader :changes, :offsets
11
+ def initialize *args
12
+ super
13
+ @changes = {}
14
+ @offsets = {}
15
+ end
16
+ def Offset.append_features mod
17
+ super
18
+ mod.module_eval do
19
+ class << self
20
+ include Compatibility
21
+ def offset *keys
22
+ keys.each do |key|
23
+ attr_reader key unless instance_methods.include? method_name(key)
24
+ define_method "#{key}=" do |value|
25
+ @changes.store key, true
26
+ instance_variable_set ivar_name(key), value
27
+ end
28
+ define_method "set_#{key}" do |value, pos, len|
29
+ instance_variable_set ivar_name(key), value
30
+ @offsets.store key, [pos, len]
31
+ havename = "have_set_#{key}"
32
+ send(havename, value, pos, len) if respond_to? havename
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,24 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Password
4
+ class <<self
5
+ ##
6
+ # Makes an excel-compatible hash
7
+ def password_hash(password)
8
+ hash = 0
9
+ password.chars.reverse_each { |chr| hash = rol15(hash ^ chr[0].ord) }
10
+ hash ^ password.size ^ 0xCE4B
11
+ end
12
+
13
+ private
14
+ ##
15
+ # rotates hash 1 bit left, using lower 15 bits
16
+ def rol15(hash)
17
+ new_hash = hash << 1
18
+ (new_hash & 0x7FFF) | (new_hash >> 15)
19
+ end
20
+ end
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,1302 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/link'
5
+ require 'spreadsheet/note'
6
+ require 'spreadsheet/noteObject'
7
+ require 'spreadsheet/excel/error'
8
+ require 'spreadsheet/excel/internals'
9
+ require 'spreadsheet/excel/sst_entry'
10
+ require 'spreadsheet/excel/worksheet'
11
+
12
+ module Spreadsheet
13
+ module Excel
14
+ ##
15
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
16
+ # Excel-Record/Opcode. You should not need to call any of its methods
17
+ # directly. If you think you do, look at #read
18
+ class Reader
19
+ include Spreadsheet::Encodings
20
+ include Spreadsheet::Excel::Internals
21
+ ROW_BLOCK_OPS = {
22
+ :blank => true, :boolerr => true, :dbcell => true, :formula => true,
23
+ :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
24
+ :number => true, :rk => true, :rstring => true,
25
+ }
26
+ def initialize opts = {}
27
+ @pos = 0
28
+ @bigendian = opts.fetch(:bigendian) {
29
+ [1].pack('l') != "\001\000\000\000"
30
+ }
31
+ @opts = opts
32
+ @boundsheets = nil
33
+ @current_row_block = {}
34
+ @current_row_block_offset = nil
35
+ @formats = {}
36
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
37
+ end
38
+ def decode_rk work
39
+ # Bit Mask Contents
40
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
41
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
42
+ # 31-2 0xFFFFFFFC Encoded value
43
+ #
44
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
45
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
46
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
47
+ # encoded value represents a signed 30-bit integer value. To get the
48
+ # correct integer, the encoded value has to be shifted right arithmetically
49
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
50
+ # floating-point) must be divided by 100 to get the final result.
51
+ flags, = work.unpack 'C'
52
+ cent = flags & 1
53
+ int = flags & 2
54
+ value = 0
55
+ if int == 0
56
+ ## remove two bits
57
+ integer, = work.unpack 'V'
58
+ integer &= 0xfffffffc
59
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
60
+ else
61
+ ## I can't find a format for unpacking a little endian signed integer.
62
+ # 'V' works for packing, but not for unpacking. But the following works
63
+ # fine afaics:
64
+ unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
65
+ ## remove two bits
66
+ value = unsigned >> 2
67
+ end
68
+ if cent == 1
69
+ value /= 100.0
70
+ end
71
+ value
72
+ end
73
+ def encoding codepage_id
74
+ name = CODEPAGES.fetch(codepage_id) {
75
+ raise "Unknown Codepage 0x%04x" % codepage_id }
76
+ if RUBY_VERSION >= '1.9'
77
+ Encoding.find name
78
+ else
79
+ name
80
+ end
81
+ end
82
+ def get_next_chunk
83
+ pos = @pos
84
+ if pos < @data.size
85
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
86
+ @pos += OPCODE_SIZE
87
+ if len
88
+ work = @data[@pos,len]
89
+ @pos += len
90
+ code = SEDOCPO.fetch(op, op)
91
+ if io = @opts[:print_opcodes]
92
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
93
+ op, code.inspect, len, work.inspect)
94
+ end
95
+ [ pos, code, len + OPCODE_SIZE, work]
96
+ end
97
+ end
98
+ end
99
+ def in_row_block? op, previous
100
+ if op == :row
101
+ previous == op
102
+ else
103
+ ROW_BLOCK_OPS.include?(op)
104
+ end
105
+ end
106
+ def memoize?
107
+ @opts[:memoization]
108
+ end
109
+ def postread_workbook
110
+ sheets = @workbook.worksheets
111
+ sheets.each_with_index do |sheet, idx|
112
+ offset = sheet.offset
113
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
114
+ @workbook.offsets.store sheet, [offset, nxt - offset]
115
+ end
116
+ end
117
+ def postread_worksheet worksheet
118
+ #We now have a lot of Note and NoteObjects, but they're not linked
119
+ #So link the noteObject(text) to the note (with author, position)
120
+ #TODO
121
+ @noteList.each do |i|
122
+ matching_obj = @noteObjList.select {|j| j.objID == i.objID}
123
+ if matching_obj.length > 1
124
+ puts "ERROR - more than one matching object ID!"
125
+ end
126
+ i.text = matching_obj.first.text
127
+ worksheet.add_note i.row, i.col, i.text
128
+ end
129
+ end
130
+ ##
131
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
132
+ # loads additional reader-methods before proceeding with parsing the document.
133
+ def read io
134
+ setup io
135
+ read_workbook
136
+ @workbook.default_format = @workbook.format 0
137
+ @workbook.changes.clear
138
+ @workbook
139
+ end
140
+ def read_blank worksheet, addr, work
141
+ # Offset Size Contents
142
+ # 0 2 Index to row
143
+ # 2 2 Index to column
144
+ # 4 2 Index to XF record (➜ 6.115)
145
+ row, column, xf = work.unpack binfmt(:blank)
146
+ set_cell worksheet, row, column, xf
147
+ end
148
+ def read_bof
149
+ # Offset Size Contents
150
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
151
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
152
+ # 0x0006 = Visual Basic module
153
+ # 0x0010 = Worksheet
154
+ # 0x0020 = Chart
155
+ # 0x0040 = Macro sheet
156
+ # 0x0100 = Workspace file
157
+ # 4 2 Build identifier
158
+ # 6 2 Build year
159
+ # 8 4 File history flags
160
+ # 12 4 Lowest Excel version that can read all records in this file
161
+ _, @bof, _, work = get_next_chunk
162
+ ## version and datatype are common to all Excel-Versions. Later versions
163
+ # have additional information such as build-id and -year (from BIFF5).
164
+ # These are ignored for the time being.
165
+ version, datatype = work.unpack('v2')
166
+ if datatype == 0x5
167
+ @version = version
168
+ end
169
+ end
170
+ def read_boolerr worksheet, addr, work
171
+ # Offset Size Contents
172
+ # 0 2 Index to row
173
+ # 2 2 Index to column
174
+ # 4 2 Index to XF record (➜ 6.115)
175
+ # 6 1 Boolean or error value (type depends on the following byte)
176
+ # 7 1 0 = Boolean value; 1 = Error code
177
+ row, column, xf, value, error = work.unpack 'v3C2'
178
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
179
+ end
180
+ def read_boundsheet work, pos, len
181
+ # Offset Size Contents
182
+ # 0 4 Absolute stream position of the BOF record of the sheet
183
+ # represented by this record. This field is never encrypted
184
+ # in protected files.
185
+ # 4 1 Visibility: 0x00 = Visible
186
+ # 0x01 = Hidden
187
+ # 0x02 = Strong hidden (see below)
188
+ # 5 1 Sheet type: 0x00 = Worksheet
189
+ # 0x02 = Chart
190
+ # 0x06 = Visual Basic module
191
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
192
+ # 8-bit string length (➜ 3.3)
193
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
194
+ offset, visibility, _ = work.unpack("VC2")
195
+ name = client read_string(work[6..-1]), @workbook.encoding
196
+ if @boundsheets
197
+ @boundsheets[0] += 1
198
+ @boundsheets[2] += len
199
+ else
200
+ @boundsheets = [1, pos, len]
201
+ end
202
+ @workbook.set_boundsheets(*@boundsheets)
203
+ @workbook.add_worksheet Worksheet.new(:name => name,
204
+ :ole => @book,
205
+ :offset => offset,
206
+ :reader => self,
207
+ :visibility => WORKSHEET_VISIBILITIES[visibility])
208
+ end
209
+ def read_codepage work, pos, len
210
+ codepage, _ = work.unpack 'v'
211
+ @workbook.set_encoding encoding(codepage), pos, len
212
+ end
213
+ def read_colinfo worksheet, work, pos, len
214
+ # Offset Size Contents
215
+ # 0 2 Index to first column in the range
216
+ # 2 2 Index to last column in the range
217
+ # 4 2 Width of the columns in 1/256 of the width of the zero
218
+ # character, using default font (first FONT record in the
219
+ # file)
220
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
221
+ # 8 2 Option flags:
222
+ # Bits Mask Contents
223
+ # 0 0x0001 1 = Columns are hidden
224
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
225
+ # 12 0x1000 1 = Columns are collapsed
226
+ # 10 2 Not used
227
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
228
+ first.upto last do |col|
229
+ column = Column.new col, @workbook.format(xf),
230
+ :width => width.to_f / 256,
231
+ :hidden => (opts & 0x0001) > 0,
232
+ :collapsed => (opts & 0x1000) > 0,
233
+ :outline_level => (opts & 0x0700) / 256
234
+ column.worksheet = worksheet
235
+ worksheet.columns[col] = column
236
+ end
237
+ end
238
+ def read_dimensions worksheet, work, pos, len
239
+ # Offset Size Contents
240
+ # 0 4 Index to first used row
241
+ # 4 4 Index to last used row, increased by 1
242
+ # 8 2 Index to first used column
243
+ # 10 2 Index to last used column, increased by 1
244
+ # 12 2 Not used
245
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
246
+ end
247
+ def read_font work, pos, len
248
+ # Offset Size Contents
249
+ # 0 2 Height of the font (in twips = 1/20 of a point)
250
+ # 2 2 Option flags:
251
+ # Bit Mask Contents
252
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
253
+ # 1 0x0002 1 = Characters are italic
254
+ # 2 0x0004 1 = Characters are underlined
255
+ # (redundant, see below)
256
+ # 3 0x0008 1 = Characters are struck out
257
+ # 4 0x0010 1 = Characters are outlined (djberger)
258
+ # 5 0x0020 1 = Characters are shadowed (djberger)
259
+ # 4 2 Colour index (➜ 6.70)
260
+ # 6 2 Font weight (100-1000). Standard values are
261
+ # 0x0190 (400) for normal text and
262
+ # 0x02bc (700) for bold text.
263
+ # 8 2 Escapement type: 0x0000 = None
264
+ # 0x0001 = Superscript
265
+ # 0x0002 = Subscript
266
+ # 10 1 Underline type: 0x00 = None
267
+ # 0x01 = Single
268
+ # 0x02 = Double
269
+ # 0x21 = Single accounting
270
+ # 0x22 = Double accounting
271
+ # 11 1 Font family:
272
+ # 0x00 = None (unknown or don't care)
273
+ # 0x01 = Roman (variable width, serifed)
274
+ # 0x02 = Swiss (variable width, sans-serifed)
275
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
276
+ # 0x04 = Script (cursive)
277
+ # 0x05 = Decorative (specialised,
278
+ # for example Old English, Fraktur)
279
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
280
+ # 0x01 = 1 = System default
281
+ # 0x02 = 2 = Symbol
282
+ # 0x4d = 77 = Apple Roman
283
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
284
+ # 0x81 = 129 = ANSI Korean (Hangul)
285
+ # 0x82 = 130 = ANSI Korean (Johab)
286
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
287
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
288
+ # 0xa1 = 161 = ANSI Greek
289
+ # 0xa2 = 162 = ANSI Turkish
290
+ # 0xa3 = 163 = ANSI Vietnamese
291
+ # 0xb1 = 177 = ANSI Hebrew
292
+ # 0xb2 = 178 = ANSI Arabic
293
+ # 0xba = 186 = ANSI Baltic
294
+ # 0xcc = 204 = ANSI Cyrillic
295
+ # 0xde = 222 = ANSI Thai
296
+ # 0xee = 238 = ANSI Latin II (Central European)
297
+ # 0xff = 255 = OEM Latin I
298
+ # 13 1 Not used
299
+ # 14 var. Font name:
300
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
301
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
302
+ name = client read_string(work[14..-1]), @workbook.encoding
303
+ font = Font.new name
304
+ size, opts, color, font.weight, escapement, underline,
305
+ family, encoding = work.unpack binfmt(:font)
306
+ font.size = size / TWIPS
307
+ font.italic = opts & 0x0002
308
+ font.strikeout = opts & 0x0008
309
+ font.outline = opts & 0x0010
310
+ font.shadow = opts & 0x0020
311
+ font.color = COLOR_CODES[color] || :text
312
+ font.escapement = ESCAPEMENT_TYPES[escapement]
313
+ font.underline = UNDERLINE_TYPES[underline]
314
+ font.family = FONT_FAMILIES[family]
315
+ font.encoding = FONT_ENCODINGS[encoding]
316
+ @workbook.add_font font
317
+ end
318
+ def read_format work, pos, len
319
+ # Offset Size Contents
320
+ # 0 2 Format index used in other records
321
+ # 2 var. Number format string
322
+ # (Unicode string, 16-bit string length, ➜ 3.4)
323
+ idx, = work.unpack 'v'
324
+ value = read_string work[2..-1], 2
325
+ @formats.store idx, client(value, @workbook.encoding)
326
+ end
327
+ def read_formula worksheet, addr, work
328
+ # Offset Size Contents
329
+ # 0 2 Index to row
330
+ # 2 2 Index to column
331
+ # 4 2 Index to XF record (➜ 6.115)
332
+ # 6 8 Result of the formula. See below for details.
333
+ # 14 2 Option flags:
334
+ # Bit Mask Contents
335
+ # 0 0x0001 1 = Recalculate always
336
+ # 1 0x0002 1 = Calculate on open
337
+ # 3 0x0008 1 = Part of a shared formula
338
+ # 16 4 Not used
339
+ # 20 var. Formula data (RPN token array, ➜ 4)
340
+ # Offset Size Contents
341
+ # 0 2 Size of the following formula data (sz)
342
+ # 2 sz Formula data (RPN token array)
343
+ # [2+sz] var. (optional) Additional data for specific tokens
344
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
345
+ #
346
+ # Result of the Formula
347
+ # Dependent on the type of value the formula returns, the result field has
348
+ # the following format:
349
+ #
350
+ # Result is a numeric value:
351
+ # Offset Size Contents
352
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
353
+ #
354
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
355
+ # Offset Size Contents
356
+ # 0 1 0x00 (identifier for a string value)
357
+ # 1 5 Not used
358
+ # 6 2 0xffff
359
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
360
+ # special identifier defined (see below).
361
+ #
362
+ # Result is a Boolean value:
363
+ # Offset Size Contents
364
+ # 0 1 0x01 (identifier for a Boolean value)
365
+ # 1 1 Not used
366
+ # 2 1 0 = FALSE, 1 = TRUE
367
+ # 3 3 Not used
368
+ # 6 2 0xffff
369
+ #
370
+ # Result is an error value:
371
+ # Offset Size Contents
372
+ # 0 1 0x02 (identifier for an error value)
373
+ # 1 1 Not used
374
+ # 2 1 Error code (➜ 3.7)
375
+ # 3 3 Not used
376
+ # 6 2 0xffff
377
+ #
378
+ # Result is an empty cell (BIFF8), for example an empty string:
379
+ # Offset Size Contents
380
+ # 0 1 0x03 (identifier for an empty cell)
381
+ # 1 5 Not used
382
+ # 6 2 0xffff
383
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
384
+ formula = Formula.new
385
+ formula.shared = (opts & 0x08) > 0
386
+ formula.data = work[20..-1]
387
+ if rcheck != 0xffff || rtype > 3
388
+ value, = work.unpack 'x6E'
389
+ unless value
390
+ # on architectures where sizeof(double) > 8
391
+ value, = work.unpack 'x6e'
392
+ end
393
+ formula.value = value
394
+ elsif rtype == 0
395
+ pos, op, len, work = get_next_chunk
396
+ if op == :sharedfmla
397
+ ## TODO: formula-support in 0.8.0
398
+ pos, op, len, work = get_next_chunk
399
+ end
400
+ if op == :string
401
+ formula.value = client read_string(work, 2), @workbook.encoding
402
+ else
403
+ warn "String Value expected after Formula, but got #{op}"
404
+ formula.value = Error.new 0x2a
405
+ @pos = pos
406
+ end
407
+ elsif rtype == 1
408
+ formula.value = rval > 0
409
+ elsif rtype == 2
410
+ formula.value = Error.new rval
411
+ else
412
+ # leave the Formula value blank
413
+ end
414
+ set_cell worksheet, row, column, xf, formula
415
+ end
416
+ def read_hlink worksheet, work, pos, len
417
+ # 6.53.1 Common Record Contents
418
+ # Offset Size Contents
419
+ # 0 8 Cell range address of all cells containing this hyperlink
420
+ # (➜ 3.13.1)
421
+ # 8 16 GUID of StdLink:
422
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
423
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
424
+ # 24 4 Unknown value: 0x00000002
425
+ # 28 4 Option flags (see below)
426
+ # Bit Mask Contents
427
+ # 0 0x00000001 0 = No link extant
428
+ # 1 = File link or URL
429
+ # 1 0x00000002 0 = Relative file path
430
+ # 1 = Absolute path or URL
431
+ # 2 and 4 0x00000014 0 = No description
432
+ # 1 (both bits) = Description
433
+ # 3 0x00000008 0 = No text mark
434
+ # 1 = Text mark
435
+ # 7 0x00000080 0 = No target frame
436
+ # 1 = Target frame
437
+ # 8 0x00000100 0 = File link or URL
438
+ # 1 = UNC path (incl. server name)
439
+ #--------------------------------------------------------------------------
440
+ # [32] 4 (optional, see option flags) Character count of description
441
+ # text, including trailing zero word (dl)
442
+ # [36] 2∙dl (optional, see option flags) Character array of description
443
+ # text, no Unicode string header, always 16-bit characters,
444
+ # zero-terminated
445
+ #--------------------------------------------------------------------------
446
+ # [var.] 4 (optional, see option flags) Character count of target
447
+ # frame, including trailing zero word (fl)
448
+ # [var.] 2∙fl (optional, see option flags) Character array of target
449
+ # frame, no Unicode string header, always 16-bit characters,
450
+ # zero-terminated
451
+ #--------------------------------------------------------------------------
452
+ # var. var. Special data (➜ 6.53.2 and following)
453
+ #--------------------------------------------------------------------------
454
+ # [var.] 4 (optional, see option flags) Character count of the text
455
+ # mark, including trailing zero word (tl)
456
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
457
+ # mark without “#” sign, no Unicode string header, always
458
+ # 16-bit characters, zero-terminated
459
+ firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack 'v4H32x4V'
460
+ has_link = opts & 0x0001
461
+ desc = opts & 0x0014
462
+ textmark = opts & 0x0008
463
+ target = opts & 0x0080
464
+ unc = opts & 0x0100
465
+ link = Link.new
466
+ _, description = nil
467
+ pos = 32
468
+ if desc > 0
469
+ description, pos = read_hlink_string work, pos
470
+ link << description
471
+ end
472
+ if target > 0
473
+ link.target_frame, pos = read_hlink_string work, pos
474
+ end
475
+ if unc > 0
476
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
477
+ # These data fields are for UNC paths containing a server name (for
478
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
479
+ # flags field must be 1.x00x.xx112.
480
+ # Offset Size Contents
481
+ # 0 4 Character count of the UNC,
482
+ # including trailing zero word (fl)
483
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
484
+ # always 16-bit characters, zeroterminated.
485
+ link.url, pos = read_hlink_string work, pos
486
+ elsif has_link > 0
487
+ uid, = work.unpack "x#{pos}H32"
488
+ pos += 16
489
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
490
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
491
+ # These data fields occur for links which are not local files or files
492
+ # in the local network (for instance HTTP and FTP links and e-mail
493
+ # addresses). The lower 9 bits of the option flags field must be
494
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
495
+ # GUID could be used to distinguish a URL from a file link.
496
+ # Offset Size Contents
497
+ # 0 16 GUID of URL Moniker:
498
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
499
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
500
+ # 16 4 Size of character array of the URL, including trailing
501
+ # zero word (us). There are us/2-1 characters in the
502
+ # following string.
503
+ # 20 us Character array of the URL, no Unicode string header,
504
+ # always 16-bit characters, zeroterminated
505
+ size, = work.unpack "x#{pos}V"
506
+ pos += 4
507
+ data = work[pos, size].chomp "\000\000"
508
+ link.url = client data
509
+ pos += size
510
+ else
511
+ # 6.53.3 Hyperlink to a Local File
512
+ # These data fields are for links to files on local drives. The path of
513
+ # the file can be complete with drive letter (absolute) or relative to
514
+ # the location of the workbook. The lower 9 bits of the option flags
515
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
516
+ # URL from a file link.
517
+ # Offset Size Contents
518
+ # 0 16 GUID of File Moniker:
519
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
520
+ # (00000303-0000-0000-C000-000000000046)
521
+ # 16 2 Directory up-level count. Each leading “..\” in the
522
+ # file link is deleted and increases this counter.
523
+ # 18 4 Character count of the shortened file path and name,
524
+ # including trailing zero byte (sl)
525
+ # 22 sl Character array of the shortened file path and name in
526
+ # 8.3-DOS-format. This field can be filled with a long
527
+ # file name too. No Unicode string header, always 8-bit
528
+ # characters, zeroterminated.
529
+ # 22+sl 24 Unknown byte sequence:
530
+ # FF FF AD DE 00 00 00 00
531
+ # 00 00 00 00 00 00 00 00
532
+ # 00 00 00 00 00 00 00 00
533
+ # 46+sl 4 Size of the following file link field including string
534
+ # length field and additional data field (sz). If sz is
535
+ # zero, nothing will follow (except a text mark).
536
+ # [50+sl] 4 (optional) Size of character array of the extended file
537
+ # path and name (xl). There are xl/2 characters in the
538
+ # following string.
539
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
540
+ # [56+sl] xl (optional) Character array of the extended file path
541
+ # and name (xl), no Unicode string header, always 16-bit
542
+ # characters, not zero-terminated
543
+ uplevel, count = work.unpack "x#{pos}vV"
544
+ pos += 6
545
+ # TODO: short file path may have any of the OEM encodings. Find out which
546
+ # and use the #client method to convert the encoding.
547
+ prefix = internal('..\\', 'UTF-8') * uplevel
548
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
549
+ pos += count + 24
550
+ total, size = work.unpack "x#{pos}V2"
551
+ pos += 10
552
+ if total > 0
553
+ link.url = client work[pos, size]
554
+ pos += size
555
+ end
556
+ end
557
+ else
558
+ # 6.53.5 Hyperlink to the Current Workbook
559
+ # In this case only the text mark field is present (optional with
560
+ # description).
561
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
562
+ # current workbook.
563
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
564
+ end
565
+ if textmark > 0
566
+ link.fragment, _ = read_hlink_string work, pos
567
+ end
568
+ if link.empty?
569
+ link << link.href
570
+ end
571
+ firstrow.upto lastrow do |row|
572
+ firstcol.upto lastcol do |col|
573
+ worksheet.add_link row, col, link
574
+ end
575
+ end
576
+ end
577
+ def read_hlink_string work, pos
578
+ count, = work.unpack "x#{pos}V"
579
+ len = count * 2
580
+ pos += 4
581
+ data = work[pos, len].chomp "\000\000"
582
+ pos += len
583
+ [client(data, 'UTF-16LE'), pos]
584
+ end
585
+ def read_index worksheet, work, pos, len
586
+ # Offset Size Contents
587
+ # 0 4 Not used
588
+ # 4 4 Index to first used row (rf, 0-based)
589
+ # 8 4 Index to first row of unused tail of sheet
590
+ # (rl, last used row + 1, 0-based)
591
+ # 12 4 Absolute stream position of the
592
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
593
+ # record does not exist, the offset points to the record at
594
+ # the position where the DEFCOLWIDTH record would occur.
595
+ # 16 4∙nm Array of nm absolute stream positions to the
596
+ # DBCELL record (➜ 6.26) of each Row Block
597
+ # TODO: use the index if it exists
598
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
599
+ end
600
+ def read_label worksheet, addr, work
601
+ # Offset Size Contents
602
+ # 0 2 Index to row
603
+ # 2 2 Index to column
604
+ # 4 2 Index to XF record (➜ 6.115)
605
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
606
+ row, column, xf = work.unpack 'v3'
607
+ value = client read_string(work[6..-1], 2), @workbook.encoding
608
+ set_cell worksheet, row, column, xf, value
609
+ end
610
+ def read_labelsst worksheet, addr, work
611
+ # Offset Size Contents
612
+ # 0 2 Index to row
613
+ # 2 2 Index to column
614
+ # 4 2 Index to XF record (➜ 6.115)
615
+ # 6 4 Index into SST record (➜ 6.96)
616
+ row, column, xf, index = work.unpack binfmt(:labelsst)
617
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
618
+ end
619
+ def read_mulblank worksheet, addr, work
620
+ # Offset Size Contents
621
+ # 0 2 Index to row
622
+ # 2 2 Index to first column (fc)
623
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
624
+ # 4+2∙nc 2 Index to last column (lc)
625
+ row, column, *xfs = work.unpack 'v*'
626
+ xfs.pop #=> last_column
627
+ xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
628
+ end
629
+ def read_mulrk worksheet, addr, work
630
+ # Offset Size Contents
631
+ # 0 2 Index to row
632
+ # 2 2 Index to first column (fc)
633
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
634
+ # Offset Size Contents
635
+ # 0 2 Index to XF record (➜ 6.115)
636
+ # 2 4 RK value (➜ 3.6)
637
+ # 4+6∙nc 2 Index to last column (lc)
638
+ row, column = work.unpack 'v2'
639
+ 4.step(work.size - 6, 6) do |idx|
640
+ xf, = work.unpack "x#{idx}v"
641
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
642
+ column += 1
643
+ end
644
+ end
645
+ def read_number worksheet, addr, work
646
+ # Offset Size Contents
647
+ # 0 2 Index to row
648
+ # 2 2 Index to column
649
+ # 4 2 Index to XF record (➜ 6.115)
650
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
651
+ row, column, xf, value = work.unpack binfmt(:number)
652
+ set_cell worksheet, row, column, xf, value
653
+ end
654
+ def read_rk worksheet, addr, work
655
+ # Offset Size Contents
656
+ # 0 2 Index to row
657
+ # 2 2 Index to column
658
+ # 4 2 Index to XF record (➜ 6.115)
659
+ # 6 4 RK value (➜ 3.6)
660
+ row, column, xf = work.unpack 'v3'
661
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
662
+ end
663
+ def read_row worksheet, addr
664
+ row = addr[:index]
665
+ @current_row_block.fetch [worksheet, row] do
666
+ @current_row_block.clear
667
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
668
+ @pos = addr[:offset]
669
+ found = false
670
+ while tuple = get_next_chunk
671
+ pos, op, _, work = tuple
672
+ case op
673
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
674
+ # one Row-Block
675
+ @pos = pos
676
+ return cells
677
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
678
+ return cells
679
+ when :row # ○○ Row Blocks ➜ 5.7
680
+ # ● ROW ➜ 6.83
681
+ # ignore, we already did these in read_worksheet
682
+ return cells if found
683
+ when :blank # BLANK ➜ 6.7
684
+ found = true
685
+ read_blank worksheet, addr, work
686
+ when :boolerr # BOOLERR ➜ 6.10
687
+ found = true
688
+ read_boolerr worksheet, addr, work
689
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
690
+ found = true
691
+ # TODO: implement for BIFF2 support
692
+ when :formula # FORMULA ➜ 6.46
693
+ found = true
694
+ read_formula worksheet, addr, work
695
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
696
+ found = true
697
+ read_label worksheet, addr, work
698
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
699
+ found = true
700
+ read_labelsst worksheet, addr, work
701
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
702
+ found = true
703
+ read_mulblank worksheet, addr, work
704
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
705
+ found = true
706
+ read_mulrk worksheet, addr, work
707
+ when :number # NUMBER ➜ 6.68
708
+ found = true
709
+ read_number worksheet, addr, work
710
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
711
+ found = true
712
+ read_rk worksheet, addr, work
713
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
714
+ found = true
715
+ read_rstring worksheet, addr, work
716
+ end
717
+ end
718
+ cells
719
+ end
720
+ end
721
+ def read_rstring worksheet, addr, work
722
+ # Offset Size Contents
723
+ # 0 2 Index to row
724
+ # 2 2 Index to column
725
+ # 4 2 Index to XF record (➜ 6.115)
726
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
727
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
728
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
729
+ row, column, xf = work.unpack 'v3'
730
+ value = client read_string(work[6..-1], 2), @workbook.encoding
731
+ set_cell worksheet, row, column, xf, value
732
+ end
733
+ def read_window2 worksheet, work, pos, len
734
+ # This record contains additional settings for the document window
735
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
736
+ # It is part of the Sheet View Settings Block (➜ 4.5).
737
+ # Offset Size Contents
738
+ # 0 2 Option flags:
739
+ # Bits Mask Contents
740
+ # 0 0x0001 0 = Show formula results
741
+ # 1 = Show formulas
742
+ # 1 0x0002 0 = Do not show grid lines
743
+ # 1 = Show grid lines
744
+ # 2 0x0004 0 = Do not show sheet headers
745
+ # 1 = Show sheet headers
746
+ # 3 0x0008 0 = Panes are not frozen
747
+ # 1 = Panes are frozen (freeze)
748
+ # 4 0x0010 0 = Show zero values as empty cells
749
+ # 1 = Show zero values
750
+ # 5 0x0020 0 = Manual grid line colour
751
+ # 1 = Automatic grid line colour
752
+ # 6 0x0040 0 = Columns from left to right
753
+ # 1 = Columns from right to left
754
+ # 7 0x0080 0 = Do not show outline symbols
755
+ # 1 = Show outline symbols
756
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
757
+ # 1 = Remove splits if pane freeze is removed
758
+ # 9 0x0200 0 = Sheet not selected
759
+ # 1 = Sheet selected (BIFF5-BIFF8)
760
+ # 10 0x0400 0 = Sheet not active
761
+ # 1 = Sheet active (BIFF5-BIFF8)
762
+ # 11 0x0800 0 = Show in normal view
763
+ # 1 = Show in page break preview (BIFF8)
764
+ # 2 2 Index to first visible row
765
+ # 4 2 Index to first visible column
766
+ # 6 2 Colour index of grid line colour (➜ 5.74).
767
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
768
+ # 8 2 Not used
769
+ # 10 2 Cached magnification factor in page break preview (in percent)
770
+ # 0 = Default (60%)
771
+ # 12 2 Cached magnification factor in normal view (in percent)
772
+ # 0 = Default (100%)
773
+ # 14 4 Not used
774
+ flags, _ = work.unpack 'v'
775
+ worksheet.selected = flags & 0x0200 > 0
776
+ end
777
+
778
+ def read_merged_cells worksheet, work, pos, len
779
+ # This record contains the addresses of merged cell ranges in the current sheet.
780
+ # Record MERGEDCELLS, BIFF8:
781
+ # Offset Size Contents
782
+ # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
783
+ # If the record size exceeds the limit, it is not continued with a CONTINUE record,
784
+ # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
785
+ # per record results in a maximum number of 1027 merged ranges.
786
+
787
+ worksheet.merged_cells.push(*read_range_address_list(work, len))
788
+ #
789
+ # A cell range address list consists of a field with the number of ranges and the list
790
+ # of the range addresses.
791
+ # Cell range address list, BIFF2-BIFF8:
792
+ # Offset Size Contents
793
+ # 0 2 Number of following cell range addresses (nm)
794
+ # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
795
+ #
796
+ end
797
+
798
+ def read_workbook
799
+ previous_op = nil
800
+ while tuple = get_next_chunk
801
+ pos, op, len, work = tuple
802
+ case op
803
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
804
+ return
805
+ when :eof # ● EOF ➜ 6.36
806
+ postread_workbook
807
+ return
808
+ when :datemode # ○ DATEMODE ➜ 6.25
809
+ flag, _ = work.unpack 'v'
810
+ if flag == 1
811
+ @workbook.date_base = DateTime.new 1904, 1, 1
812
+ else
813
+ @workbook.date_base = DateTime.new 1899, 12, 31
814
+ end
815
+ when :continue # ○ CONTINUE ➜ 6.22
816
+ case previous_op
817
+ when :sst # ● SST ➜ 6.96
818
+ continue_sst work, pos, len
819
+ end
820
+ when :codepage # ○ CODEPAGE ➜ 6.17
821
+ read_codepage work, pos, len
822
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
823
+ read_boundsheet work, pos, len
824
+ when :xf # ●● XF ➜ 6.115
825
+ read_xf work, pos, len
826
+ when :sst # ○ Shared String Table ➜ 5.11
827
+ # ● SST ➜ 6.96
828
+ read_sst work, pos, len
829
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
830
+ # with EXTSST
831
+ when :extsst # ● EXTSST ➜ 6.40
832
+ read_extsst work, pos, len
833
+ when :style # ●● STYLE ➜ 6.99
834
+ read_style work, pos, len
835
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
836
+ read_format work, pos, len
837
+ when :font # ●● FONT ➜ 6.43
838
+ read_font work, pos, len
839
+ end
840
+ previous_op = op unless op == :continue
841
+ end
842
+ end
843
+ def read_worksheet worksheet, offset
844
+ @pos = offset
845
+ @detected_rows = {}
846
+ @noteObjList = []
847
+ @noteList = []
848
+ previous = nil
849
+ while tuple = get_next_chunk
850
+ pos, op, len, work = tuple
851
+ if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
852
+ @current_row_block_offset = nil
853
+ offset[1] = pos - offset[0]
854
+ end
855
+ case op
856
+ when :eof # ● EOF ➜ 6.36
857
+ postread_worksheet worksheet
858
+ return
859
+ #when :uncalced # ○ UNCALCED ➜ 6.104
860
+ # TODO: Formula support. Values were not calculated before saving
861
+ #warn <<-EOS
862
+ # Some fields containig formulas were saved without a computed value.
863
+ # Support Spreadsheet::Excel by implementing formula-calculations!
864
+ #EOS
865
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
866
+ # TODO: if there are changes in rows, omit index when writing
867
+ #read_index worksheet, work, pos, len
868
+ when :guts # GUTS 5.53
869
+ read_guts worksheet, work, pos, len
870
+ when :colinfo # ○○ COLINFO ➜ 6.18
871
+ read_colinfo worksheet, work, pos, len
872
+ when :dimensions # ● DIMENSIONS ➜ 6.31
873
+ read_dimensions worksheet, work, pos, len
874
+ when :row # ○○ Row Blocks ➜ 5.7
875
+ # ● ROW ➜ 6.83
876
+ set_row_address worksheet, work, pos, len
877
+ when :hlink
878
+ read_hlink worksheet, work, pos, len
879
+ when :window2
880
+ read_window2 worksheet, work, pos, len
881
+ when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
882
+ read_merged_cells worksheet, work, pos, len
883
+ when :protect, :password
884
+ read_sheet_protection worksheet, op, work
885
+ when :note # a note references an :obj
886
+ read_note worksheet, work, pos, len
887
+ when :obj # it contains the author in the NTS structure
888
+ _ft, _cb, _ot, _objID = work.unpack('v4')
889
+ if _ot == 0x19
890
+ #puts "\nDEBUG: found Note Obj record"
891
+ @noteObject = NoteObject.new
892
+ @noteObject.objID = _objID
893
+ end
894
+ #p work
895
+ when :drawing # this can be followed by txo in case of a note
896
+ if previous == :obj
897
+ #puts "\nDEBUG: found MsDrawing record"
898
+ #p work
899
+ end
900
+ when :txo # this contains the length of the note text
901
+ if previous == :drawing
902
+ #puts "\nDEBUG: found TxO record"
903
+ #p work
904
+ end
905
+ when :continue # this contains the actual note text
906
+ if previous == :txo
907
+ #puts "\nDEBUG: found Continue record"
908
+ continueFmt = work.unpack('C')
909
+ if (continueFmt.first == 0)
910
+ #puts "Picking compressed charset"
911
+ #Skip to offset due to 'v5C' used above
912
+ _text = work.unpack('@1C*')
913
+ @noteObject.text = _text.pack('C*')
914
+ elsif (continueFmt.first == 1)
915
+ #puts "Picking uncompressed charset"
916
+ _text = work.unpack('@1S*')
917
+ @noteObject.text = _text.pack('U*')
918
+ end
919
+ @noteObjList << @noteObject
920
+ end
921
+ when :pagesetup
922
+ read_pagesetup(worksheet, work, pos, len)
923
+ when :leftmargin
924
+ worksheet.margins[:left] = work.unpack(binfmt(:margin))[0]
925
+ when :rightmargin
926
+ worksheet.margins[:right] = work.unpack(binfmt(:margin))[0]
927
+ when :topmargin
928
+ worksheet.margins[:top] = work.unpack(binfmt(:margin))[0]
929
+ when :bottommargin
930
+ worksheet.margins[:bottom] = work.unpack(binfmt(:margin))[0]
931
+ else
932
+ if ROW_BLOCK_OPS.include?(op)
933
+ set_missing_row_address worksheet, work, pos, len
934
+ end
935
+ end
936
+ previous = op
937
+ #previous = op unless op == :continue
938
+ end
939
+ end
940
+
941
+ def read_pagesetup(worksheet, work, pos, len)
942
+ worksheet.pagesetup.delete_if { true }
943
+ data = work.unpack(binfmt(:pagesetup))
944
+ worksheet.pagesetup[:orientation] = data[5] == 0 ? :landscape : :portrait
945
+ worksheet.pagesetup[:adjust_to] = data[1]
946
+
947
+ worksheet.pagesetup[:orig_data] = data
948
+ # TODO: add options acording to specification
949
+ end
950
+
951
+ def read_guts worksheet, work, pos, len
952
+ # Offset Size Contents
953
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
954
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
955
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
956
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
957
+ width, height, row_level, col_level = work.unpack 'v4'
958
+ worksheet.guts[:width] = width
959
+ worksheet.guts[:height] = height
960
+ worksheet.guts[:row_level] = row_level
961
+ worksheet.guts[:col_level] = col_level
962
+ end
963
+ def read_style work, pos, len
964
+ # User-Defined Cell Styles:
965
+ # Offset Size Contents
966
+ # 0 2 Bit Mask Contents
967
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
968
+ # 15 0x8000 Always 0 for user-defined styles
969
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
970
+ # 8-bit string length (➜ 3.3)
971
+ # BIFF8: Non-empty Unicode string,
972
+ # 16-bit string length (➜ 3.4)
973
+ #
974
+ # Built-In Cell Styles
975
+ # Offset Size Contents
976
+ # 0 2 Bit Mask Contents
977
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
978
+ # 15 0x8000 Always 1 for built-in styles
979
+ # 2 1 Identifier of the built-in cell style:
980
+ # 0x00 = Normal
981
+ # 0x01 = RowLevel_lv (see next field)
982
+ # 0x02 = ColLevel_lv (see next field)
983
+ # 0x03 = Comma
984
+ # 0x04 = Currency
985
+ # 0x05 = Percent
986
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
987
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
988
+ # 0x08 = Hyperlink (BIFF8)
989
+ # 0x09 = Followed Hyperlink (BIFF8)
990
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
991
+ # FFH otherwise
992
+ flags, = work.unpack 'v'
993
+ xf_idx = flags & 0x0fff
994
+ xf = @workbook.format xf_idx
995
+ builtin = flags & 0x8000
996
+ if builtin == 0
997
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
998
+ else
999
+ id, level = work.unpack 'x2C2'
1000
+ if name = BUILTIN_STYLES[id]
1001
+ name.sub '_lv', "_#{level.to_s}"
1002
+ xf.name = client name, 'UTF-8'
1003
+ end
1004
+ end
1005
+ end
1006
+ def read_xf work, pos, len
1007
+ # Offset Size Contents
1008
+ # 0 2 Index to FONT record (➜ 6.43)
1009
+ # 2 2 Index to FORMAT record (➜ 6.45)
1010
+ # 4 2 Bit Mask Contents
1011
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1012
+ # Bit Mask Contents
1013
+ # 0 0x01 1 = Cell is locked
1014
+ # 1 0x02 1 = Formula is hidden
1015
+ # 2 0x04 0 = Cell XF; 1 = Style XF
1016
+ # 15-4 0xfff0 Index to parent style XF
1017
+ # (always 0xfff in style XFs)
1018
+ # 6 1 Bit Mask Contents
1019
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1020
+ # Value Horizontal alignment
1021
+ # 0x00 General
1022
+ # 0x01 Left
1023
+ # 0x02 Centred
1024
+ # 0x03 Right
1025
+ # 0x04 Filled
1026
+ # 0x05 Justified (BIFF4-BIFF8X)
1027
+ # 0x06 Centred across selection
1028
+ # (BIFF4-BIFF8X)
1029
+ # 0x07 Distributed (BIFF8X)
1030
+ # 3 0x08 1 = Text is wrapped at right border
1031
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1032
+ # Value Vertical alignment
1033
+ # 0x00 Top
1034
+ # 0x01 Centred
1035
+ # 0x02 Bottom
1036
+ # 0x03 Justified (BIFF5-BIFF8X)
1037
+ # 0x04 Distributed (BIFF8X)
1038
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
1039
+ # Value Text rotation
1040
+ # 0 Not rotated
1041
+ # 1-90 1 to 90 degrees counterclockwise
1042
+ # 91-180 1 to 90 degrees clockwise
1043
+ # 255 Letters are stacked top-to-bottom,
1044
+ # but not rotated
1045
+ # 8 1 Bit Mask Contents
1046
+ # 3-0 0x0f Indent level
1047
+ # 4 0x10 1 = Shrink content to fit into cell
1048
+ # 5 0x40 1 = Merge Range (djberger)
1049
+ # 7-6 0xc0 Text direction (BIFF8X only)
1050
+ # 0 = According to context
1051
+ # 1 = Left-to-right
1052
+ # 2 = Right-to-left
1053
+ # 9 1 Bit Mask Contents
1054
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1055
+ # Each bit describes the validity of a
1056
+ # specific group of attributes. In cell XFs
1057
+ # a cleared bit means the attributes of the
1058
+ # parent style XF are used (but only if the
1059
+ # attributes are valid there), a set bit
1060
+ # means the attributes of this XF are used.
1061
+ # In style XFs a cleared bit means the
1062
+ # attribute setting is valid, a set bit
1063
+ # means the attribute should be ignored.
1064
+ # Bit Mask Contents
1065
+ # 0 0x01 Flag for number format
1066
+ # 1 0x02 Flag for font
1067
+ # 2 0x04 Flag for horizontal and
1068
+ # vertical alignment, text wrap,
1069
+ # indentation, orientation,
1070
+ # rotation, and text direction
1071
+ # 3 0x08 Flag for border lines
1072
+ # 4 0x10 Flag for background area style
1073
+ # 5 0x20 Flag for cell protection (cell
1074
+ # locked and formula hidden)
1075
+ # 10 4 Cell border lines and background area:
1076
+ # Bit Mask Contents
1077
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
1078
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
1079
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
1080
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1081
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
1082
+ # for left line colour
1083
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
1084
+ # for right line colour
1085
+ # 30 0x40000000 1 = Diagonal line
1086
+ # from top left to right bottom
1087
+ # 31 0x80000000 1 = Diagonal line
1088
+ # from bottom left to right top
1089
+ # 14 4 Bit Mask Contents
1090
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
1091
+ # for top line colour
1092
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
1093
+ # for bottom line colour
1094
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
1095
+ # for diagonal line colour
1096
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1097
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1098
+ # 18 2 Bit Mask Contents
1099
+ # 6-0 0x007f Colour index (➜ 6.70)
1100
+ # for pattern colour
1101
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1102
+ # for pattern background
1103
+ fmt = Format.new
1104
+ font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1105
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1106
+ fmt.number_format = @formats[numfmt]
1107
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1108
+ # with a 0-based index, but all subsequent font indices are 1-based.
1109
+ fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1110
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1111
+ fmt.text_wrap = xf_align & 0x08 > 0
1112
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1113
+ fmt.rotation = if xf_rotation == 255
1114
+ :stacked
1115
+ elsif xf_rotation > 90
1116
+ 90 - xf_rotation
1117
+ else
1118
+ xf_rotation
1119
+ end
1120
+ fmt.indent_level = xf_indent & 0x0f
1121
+ fmt.shrink = xf_indent & 0x10 > 0
1122
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1123
+ fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1124
+ fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1125
+ fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1126
+ fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1127
+ fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1128
+ fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1129
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1130
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1131
+ if xf_brdcolors
1132
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1133
+ fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1134
+ fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1135
+ #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1136
+ fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
1137
+ end
1138
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1139
+ fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1140
+ @workbook.add_format fmt
1141
+ end
1142
+ def read_note worksheet, work, pos, len
1143
+ #puts "\nDEBUG: found a note record in read_worksheet\n"
1144
+ row, col, _, _objID, _objAuthLen, _objAuthLenFmt = work.unpack('v5C')
1145
+ if (_objAuthLenFmt == 0)
1146
+ #puts "Picking compressed charset"
1147
+ #Skip to offset due to 'v5C' used above
1148
+ _objAuth = work.unpack('@11C*')
1149
+ elsif (_objAuthLenFmt == 1)
1150
+ #puts "Picking uncompressed charset"
1151
+ _objAuth = work.unpack('@11S*')
1152
+ end
1153
+ _objAuth = _objAuth.pack('C*')
1154
+ @note = Note.new
1155
+ @note.length = len
1156
+ @note.row = row
1157
+ @note.col = col
1158
+ @note.author = _objAuth
1159
+ @note.objID = _objID
1160
+ #Pop it on the list to be sorted in postread_worksheet
1161
+ @noteList << @note
1162
+ end
1163
+ def read_sheet_protection worksheet, op, data
1164
+ case op
1165
+ when :protect
1166
+ worksheet.protect! if data.unpack('v').first == 1
1167
+ when :password
1168
+ worksheet.password_hash = data.unpack('v').first
1169
+ end
1170
+ end
1171
+ def set_cell worksheet, row, column, xf, value=nil
1172
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1173
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1174
+ cells[column] = value
1175
+ end
1176
+ def set_missing_row_address worksheet, work, pos, len
1177
+ # Offset Size Contents
1178
+ # 0 2 Index of this row
1179
+ # 2 2 Index to this column
1180
+ row_index, _ = work.unpack 'v2'
1181
+ unless worksheet.offsets[row_index]
1182
+ @current_row_block_offset ||= [pos]
1183
+ data = {
1184
+ :index => row_index,
1185
+ :row_block => @current_row_block_offset,
1186
+ :offset => @current_row_block_offset[0],
1187
+ }
1188
+ worksheet.set_row_address row_index, data
1189
+ end
1190
+ end
1191
+ def set_row_address worksheet, work, pos, len
1192
+ # Offset Size Contents
1193
+ # 0 2 Index of this row
1194
+ # 2 2 Index to column of the first cell which
1195
+ # is described by a cell record
1196
+ # 4 2 Index to column of the last cell which is
1197
+ # described by a cell record, increased by 1
1198
+ # 6 2 Bit Mask Contents
1199
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1200
+ # 15 0x8000 0 = Row has custom height;
1201
+ # 1 = Row has default height
1202
+ # 8 2 Not used
1203
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1204
+ # calculate stream position of the first cell record for this
1205
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1206
+ # anymore, but the DBCELL record (➜ 6.26) instead.
1207
+ # 12 4 Option flags and default row formatting:
1208
+ # Bit Mask Contents
1209
+ # 2-0 0x00000007 Outline level of the row
1210
+ # 4 0x00000010 1 = Outline group starts or ends here
1211
+ # (depending on where the outline
1212
+ # buttons are located, see WSBOOL
1213
+ # record, ➜ 6.113), and is collapsed
1214
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1215
+ # filter or outline group)
1216
+ # 6 0x00000040 1 = Row height and default font height
1217
+ # do not match
1218
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1219
+ # 8 0x00000100 Always 1
1220
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1221
+ # (➜ 6.115)
1222
+ # 28 0x10000000 1 = Additional space above the row.
1223
+ # This flag is set, if the upper
1224
+ # border of at least one cell in this
1225
+ # row or if the lower border of at
1226
+ # least one cell in the row above is
1227
+ # formatted with a thick line style.
1228
+ # Thin and medium line styles are not
1229
+ # taken into account.
1230
+ # 29 0x20000000 1 = Additional space below the row.
1231
+ # This flag is set, if the lower
1232
+ # border of at least one cell in this
1233
+ # row or if the upper border of at
1234
+ # least one cell in the row below is
1235
+ # formatted with a medium or thick
1236
+ # line style. Thin line styles are
1237
+ # not taken into account.
1238
+ @current_row_block_offset ||= [pos]
1239
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1240
+ height &= 0x7fff
1241
+ format = nil
1242
+ # TODO: read attributes from work[13,3], read flags
1243
+ attrs = {
1244
+ :default_format => format,
1245
+ :first_used => first_used,
1246
+ :first_unused => first_unused,
1247
+ :index => index,
1248
+ :row_block => @current_row_block_offset,
1249
+ :offset => @current_row_block_offset[0],
1250
+ :outline_level => flags & 0x00000007,
1251
+ :collapsed => (flags & 0x0000010) > 0,
1252
+ :hidden => (flags & 0x0000020) > 0,
1253
+ }
1254
+ if (flags & 0x00000040) > 0
1255
+ attrs.store :height, height / TWIPS
1256
+ end
1257
+ if (flags & 0x00000080) > 0
1258
+ xf = (flags & 0x0fff0000) >> 16
1259
+ attrs.store :default_format, @workbook.format(xf)
1260
+ end
1261
+ # TODO: Row spacing
1262
+ worksheet.set_row_address index, attrs
1263
+ end
1264
+ def setup io
1265
+ ## Reading from StringIO fails without forced encoding
1266
+ if io.respond_to?(:string) && (str = io.string) \
1267
+ && str.respond_to?(:force_encoding)
1268
+ str.force_encoding 'ASCII-8BIT'
1269
+ end
1270
+ ##
1271
+ io.rewind
1272
+ @ole = Ole::Storage.open io
1273
+ @workbook = Workbook.new io, {}
1274
+ %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1275
+ @book = @ole.file.open(name) rescue false
1276
+ end
1277
+ raise RuntimeError, "could not locate a workbook, possibly an empty file passed" unless @book
1278
+ @data = @book.read
1279
+ read_bof
1280
+ @workbook.ole = @book
1281
+ @workbook.bof = @bof
1282
+ @workbook.version = @version
1283
+ biff = @workbook.biff_version
1284
+ extend_reader biff
1285
+ extend_internals biff
1286
+ end
1287
+ private
1288
+ def extend_internals version
1289
+ require 'spreadsheet/excel/internals/biff%i' % version
1290
+ extend Internals.const_get('Biff%i' % version)
1291
+ ## spreadsheets may not include a codepage record.
1292
+ @workbook.encoding = encoding 850 if version < 8
1293
+ rescue LoadError
1294
+ end
1295
+ def extend_reader version
1296
+ require 'spreadsheet/excel/reader/biff%i' % version
1297
+ extend Reader.const_get('Biff%i' % version)
1298
+ rescue LoadError
1299
+ end
1300
+ end
1301
+ end
1302
+ end