keeguon-spreadsheet 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +619 -0
  3. data/Manifest.txt +85 -0
  4. data/bin/xlsopcodes +18 -0
  5. data/lib/parseexcel.rb +27 -0
  6. data/lib/parseexcel/parseexcel.rb +75 -0
  7. data/lib/parseexcel/parser.rb +11 -0
  8. data/lib/spreadsheet.rb +80 -0
  9. data/lib/spreadsheet/column.rb +71 -0
  10. data/lib/spreadsheet/compatibility.rb +23 -0
  11. data/lib/spreadsheet/datatypes.rb +161 -0
  12. data/lib/spreadsheet/encodings.rb +57 -0
  13. data/lib/spreadsheet/excel.rb +88 -0
  14. data/lib/spreadsheet/excel/error.rb +26 -0
  15. data/lib/spreadsheet/excel/internals.rb +458 -0
  16. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  17. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  18. data/lib/spreadsheet/excel/offset.rb +41 -0
  19. data/lib/spreadsheet/excel/password_hash.rb +24 -0
  20. data/lib/spreadsheet/excel/reader.rb +1302 -0
  21. data/lib/spreadsheet/excel/reader/biff5.rb +42 -0
  22. data/lib/spreadsheet/excel/reader/biff8.rb +231 -0
  23. data/lib/spreadsheet/excel/rgb.rb +122 -0
  24. data/lib/spreadsheet/excel/row.rb +98 -0
  25. data/lib/spreadsheet/excel/sst_entry.rb +46 -0
  26. data/lib/spreadsheet/excel/workbook.rb +80 -0
  27. data/lib/spreadsheet/excel/worksheet.rb +115 -0
  28. data/lib/spreadsheet/excel/writer.rb +1 -0
  29. data/lib/spreadsheet/excel/writer/biff8.rb +75 -0
  30. data/lib/spreadsheet/excel/writer/format.rb +264 -0
  31. data/lib/spreadsheet/excel/writer/n_worksheet.rb +888 -0
  32. data/lib/spreadsheet/excel/writer/workbook.rb +735 -0
  33. data/lib/spreadsheet/excel/writer/worksheet.rb +940 -0
  34. data/lib/spreadsheet/font.rb +115 -0
  35. data/lib/spreadsheet/format.rb +209 -0
  36. data/lib/spreadsheet/formula.rb +9 -0
  37. data/lib/spreadsheet/helpers.rb +11 -0
  38. data/lib/spreadsheet/link.rb +43 -0
  39. data/lib/spreadsheet/note.rb +23 -0
  40. data/lib/spreadsheet/noteObject.rb +17 -0
  41. data/lib/spreadsheet/row.rb +151 -0
  42. data/lib/spreadsheet/workbook.rb +143 -0
  43. data/lib/spreadsheet/worksheet.rb +326 -0
  44. data/lib/spreadsheet/writer.rb +30 -0
  45. data/test/data/test_adding_data_to_existing_file.xls +0 -0
  46. data/test/data/test_borders.xls +0 -0
  47. data/test/data/test_changes.xls +0 -0
  48. data/test/data/test_comment.xls +0 -0
  49. data/test/data/test_copy.xls +0 -0
  50. data/test/data/test_datetime.xls +0 -0
  51. data/test/data/test_empty.xls +0 -0
  52. data/test/data/test_formula.xls +0 -0
  53. data/test/data/test_long_sst_record.xls +0 -0
  54. data/test/data/test_margin.xls +0 -0
  55. data/test/data/test_merged_and_protected.xls +0 -0
  56. data/test/data/test_merged_cells.xls +0 -0
  57. data/test/data/test_missing_row.xls +0 -0
  58. data/test/data/test_pagesetup.xls +0 -0
  59. data/test/data/test_version_excel5.xls +0 -0
  60. data/test/data/test_version_excel95.xls +0 -0
  61. data/test/data/test_version_excel97.xls +0 -0
  62. data/test/data/test_version_excel97_2010.xls +0 -0
  63. data/test/data/test_worksheet_visibility.xls +0 -0
  64. data/test/excel/reader.rb +30 -0
  65. data/test/excel/row.rb +40 -0
  66. data/test/excel/writer/workbook.rb +95 -0
  67. data/test/excel/writer/worksheet.rb +81 -0
  68. data/test/font.rb +163 -0
  69. data/test/format.rb +95 -0
  70. data/test/integration.rb +1390 -0
  71. data/test/row.rb +33 -0
  72. data/test/suite.rb +18 -0
  73. data/test/workbook.rb +55 -0
  74. data/test/workbook_protection.rb +19 -0
  75. data/test/worksheet.rb +112 -0
  76. metadata +148 -0
@@ -0,0 +1,17 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff5. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows.
7
+ module Biff5
8
+ BINARY_FORMATS = {
9
+ :dimensions => 'v5',
10
+ }
11
+ def binfmt key # :nodoc:
12
+ BINARY_FORMATS.fetch key do super end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Internals
4
+ ##
5
+ # Binary Formats and other configurations internal to Biff8. This Module is
6
+ # likely to be expanded as Support for older Versions of Excel grows and more
7
+ # Binary formats are moved here for disambiguation.
8
+ module Biff8
9
+ BINARY_FORMATS = {
10
+ :bof => 'v4V2',
11
+ :dimensions => 'V2v2x2',
12
+ }
13
+ def binfmt key # :nodoc:
14
+ BINARY_FORMATS.fetch key do super end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,41 @@
1
+ require 'spreadsheet/compatibility'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # This module is used to keep track of offsets in modified Excel documents.
7
+ # Considered internal and subject to change without notice.
8
+ module Offset
9
+ include Compatibility
10
+ attr_reader :changes, :offsets
11
+ def initialize *args
12
+ super
13
+ @changes = {}
14
+ @offsets = {}
15
+ end
16
+ def Offset.append_features mod
17
+ super
18
+ mod.module_eval do
19
+ class << self
20
+ include Compatibility
21
+ def offset *keys
22
+ keys.each do |key|
23
+ attr_reader key unless instance_methods.include? method_name(key)
24
+ define_method "#{key}=" do |value|
25
+ @changes.store key, true
26
+ instance_variable_set ivar_name(key), value
27
+ end
28
+ define_method "set_#{key}" do |value, pos, len|
29
+ instance_variable_set ivar_name(key), value
30
+ @offsets.store key, [pos, len]
31
+ havename = "have_set_#{key}"
32
+ send(havename, value, pos, len) if respond_to? havename
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,24 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ module Password
4
+ class <<self
5
+ ##
6
+ # Makes an excel-compatible hash
7
+ def password_hash(password)
8
+ hash = 0
9
+ password.chars.reverse_each { |chr| hash = rol15(hash ^ chr[0].ord) }
10
+ hash ^ password.size ^ 0xCE4B
11
+ end
12
+
13
+ private
14
+ ##
15
+ # rotates hash 1 bit left, using lower 15 bits
16
+ def rol15(hash)
17
+ new_hash = hash << 1
18
+ (new_hash & 0x7FFF) | (new_hash >> 15)
19
+ end
20
+ end
21
+
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,1302 @@
1
+ require 'spreadsheet/encodings'
2
+ require 'spreadsheet/font'
3
+ require 'spreadsheet/formula'
4
+ require 'spreadsheet/link'
5
+ require 'spreadsheet/note'
6
+ require 'spreadsheet/noteObject'
7
+ require 'spreadsheet/excel/error'
8
+ require 'spreadsheet/excel/internals'
9
+ require 'spreadsheet/excel/sst_entry'
10
+ require 'spreadsheet/excel/worksheet'
11
+
12
+ module Spreadsheet
13
+ module Excel
14
+ ##
15
+ # Reader class for Excel Workbooks. Most read_* method correspond to an
16
+ # Excel-Record/Opcode. You should not need to call any of its methods
17
+ # directly. If you think you do, look at #read
18
+ class Reader
19
+ include Spreadsheet::Encodings
20
+ include Spreadsheet::Excel::Internals
21
+ ROW_BLOCK_OPS = {
22
+ :blank => true, :boolerr => true, :dbcell => true, :formula => true,
23
+ :label => true, :labelsst => true, :mulblank => true, :mulrk => true,
24
+ :number => true, :rk => true, :rstring => true,
25
+ }
26
+ def initialize opts = {}
27
+ @pos = 0
28
+ @bigendian = opts.fetch(:bigendian) {
29
+ [1].pack('l') != "\001\000\000\000"
30
+ }
31
+ @opts = opts
32
+ @boundsheets = nil
33
+ @current_row_block = {}
34
+ @current_row_block_offset = nil
35
+ @formats = {}
36
+ BUILTIN_FORMATS.each do |key, fmt| @formats.store key, client(fmt, 'UTF-8') end
37
+ end
38
+ def decode_rk work
39
+ # Bit Mask Contents
40
+ # 0 0x00000001 0 = Value not changed 1 = Value is multiplied by 100
41
+ # 1 0x00000002 0 = Floating-point value 1 = Signed integer value
42
+ # 31-2 0xFFFFFFFC Encoded value
43
+ #
44
+ # If bit 1 is cleared, the encoded value represents the 30 most significant
45
+ # bits of an IEEE 754 floating-point value (64-bit double precision). The
46
+ # 34 least significant bits must be set to zero. If bit 1 is set, the
47
+ # encoded value represents a signed 30-bit integer value. To get the
48
+ # correct integer, the encoded value has to be shifted right arithmetically
49
+ # by 2 bits. If bit 0 is set, the decoded value (both integer and
50
+ # floating-point) must be divided by 100 to get the final result.
51
+ flags, = work.unpack 'C'
52
+ cent = flags & 1
53
+ int = flags & 2
54
+ value = 0
55
+ if int == 0
56
+ ## remove two bits
57
+ integer, = work.unpack 'V'
58
+ integer &= 0xfffffffc
59
+ value, = ("\0\0\0\0" << [integer].pack('V')).unpack EIGHT_BYTE_DOUBLE
60
+ else
61
+ ## I can't find a format for unpacking a little endian signed integer.
62
+ # 'V' works for packing, but not for unpacking. But the following works
63
+ # fine afaics:
64
+ unsigned, = (@bigendian ? work.reverse : work).unpack 'l'
65
+ ## remove two bits
66
+ value = unsigned >> 2
67
+ end
68
+ if cent == 1
69
+ value /= 100.0
70
+ end
71
+ value
72
+ end
73
+ def encoding codepage_id
74
+ name = CODEPAGES.fetch(codepage_id) {
75
+ raise "Unknown Codepage 0x%04x" % codepage_id }
76
+ if RUBY_VERSION >= '1.9'
77
+ Encoding.find name
78
+ else
79
+ name
80
+ end
81
+ end
82
+ def get_next_chunk
83
+ pos = @pos
84
+ if pos < @data.size
85
+ op, len = @data[@pos,OPCODE_SIZE].unpack('v2')
86
+ @pos += OPCODE_SIZE
87
+ if len
88
+ work = @data[@pos,len]
89
+ @pos += len
90
+ code = SEDOCPO.fetch(op, op)
91
+ if io = @opts[:print_opcodes]
92
+ io.puts sprintf("0x%04x/%-16s %5i: %s",
93
+ op, code.inspect, len, work.inspect)
94
+ end
95
+ [ pos, code, len + OPCODE_SIZE, work]
96
+ end
97
+ end
98
+ end
99
+ def in_row_block? op, previous
100
+ if op == :row
101
+ previous == op
102
+ else
103
+ ROW_BLOCK_OPS.include?(op)
104
+ end
105
+ end
106
+ def memoize?
107
+ @opts[:memoization]
108
+ end
109
+ def postread_workbook
110
+ sheets = @workbook.worksheets
111
+ sheets.each_with_index do |sheet, idx|
112
+ offset = sheet.offset
113
+ nxt = (nxtsheet = sheets[idx + 1]) ? nxtsheet.offset : @workbook.ole.size
114
+ @workbook.offsets.store sheet, [offset, nxt - offset]
115
+ end
116
+ end
117
+ def postread_worksheet worksheet
118
+ #We now have a lot of Note and NoteObjects, but they're not linked
119
+ #So link the noteObject(text) to the note (with author, position)
120
+ #TODO
121
+ @noteList.each do |i|
122
+ matching_obj = @noteObjList.select {|j| j.objID == i.objID}
123
+ if matching_obj.length > 1
124
+ puts "ERROR - more than one matching object ID!"
125
+ end
126
+ i.text = matching_obj.first.text
127
+ worksheet.add_note i.row, i.col, i.text
128
+ end
129
+ end
130
+ ##
131
+ # The entry-point for reading Excel-documents. Reads the Biff-Version and
132
+ # loads additional reader-methods before proceeding with parsing the document.
133
+ def read io
134
+ setup io
135
+ read_workbook
136
+ @workbook.default_format = @workbook.format 0
137
+ @workbook.changes.clear
138
+ @workbook
139
+ end
140
+ def read_blank worksheet, addr, work
141
+ # Offset Size Contents
142
+ # 0 2 Index to row
143
+ # 2 2 Index to column
144
+ # 4 2 Index to XF record (➜ 6.115)
145
+ row, column, xf = work.unpack binfmt(:blank)
146
+ set_cell worksheet, row, column, xf
147
+ end
148
+ def read_bof
149
+ # Offset Size Contents
150
+ # 0 2 BIFF version (always 0x0600 for BIFF8)
151
+ # 2 2 Type of the following data: 0x0005 = Workbook globals
152
+ # 0x0006 = Visual Basic module
153
+ # 0x0010 = Worksheet
154
+ # 0x0020 = Chart
155
+ # 0x0040 = Macro sheet
156
+ # 0x0100 = Workspace file
157
+ # 4 2 Build identifier
158
+ # 6 2 Build year
159
+ # 8 4 File history flags
160
+ # 12 4 Lowest Excel version that can read all records in this file
161
+ _, @bof, _, work = get_next_chunk
162
+ ## version and datatype are common to all Excel-Versions. Later versions
163
+ # have additional information such as build-id and -year (from BIFF5).
164
+ # These are ignored for the time being.
165
+ version, datatype = work.unpack('v2')
166
+ if datatype == 0x5
167
+ @version = version
168
+ end
169
+ end
170
+ def read_boolerr worksheet, addr, work
171
+ # Offset Size Contents
172
+ # 0 2 Index to row
173
+ # 2 2 Index to column
174
+ # 4 2 Index to XF record (➜ 6.115)
175
+ # 6 1 Boolean or error value (type depends on the following byte)
176
+ # 7 1 0 = Boolean value; 1 = Error code
177
+ row, column, xf, value, error = work.unpack 'v3C2'
178
+ set_cell worksheet, row, column, xf, error == 0 ? value > 0 : Error.new(value)
179
+ end
180
+ def read_boundsheet work, pos, len
181
+ # Offset Size Contents
182
+ # 0 4 Absolute stream position of the BOF record of the sheet
183
+ # represented by this record. This field is never encrypted
184
+ # in protected files.
185
+ # 4 1 Visibility: 0x00 = Visible
186
+ # 0x01 = Hidden
187
+ # 0x02 = Strong hidden (see below)
188
+ # 5 1 Sheet type: 0x00 = Worksheet
189
+ # 0x02 = Chart
190
+ # 0x06 = Visual Basic module
191
+ # 6 var. Sheet name: BIFF5/BIFF7: Byte string,
192
+ # 8-bit string length (➜ 3.3)
193
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
194
+ offset, visibility, _ = work.unpack("VC2")
195
+ name = client read_string(work[6..-1]), @workbook.encoding
196
+ if @boundsheets
197
+ @boundsheets[0] += 1
198
+ @boundsheets[2] += len
199
+ else
200
+ @boundsheets = [1, pos, len]
201
+ end
202
+ @workbook.set_boundsheets(*@boundsheets)
203
+ @workbook.add_worksheet Worksheet.new(:name => name,
204
+ :ole => @book,
205
+ :offset => offset,
206
+ :reader => self,
207
+ :visibility => WORKSHEET_VISIBILITIES[visibility])
208
+ end
209
+ def read_codepage work, pos, len
210
+ codepage, _ = work.unpack 'v'
211
+ @workbook.set_encoding encoding(codepage), pos, len
212
+ end
213
+ def read_colinfo worksheet, work, pos, len
214
+ # Offset Size Contents
215
+ # 0 2 Index to first column in the range
216
+ # 2 2 Index to last column in the range
217
+ # 4 2 Width of the columns in 1/256 of the width of the zero
218
+ # character, using default font (first FONT record in the
219
+ # file)
220
+ # 6 2 Index to XF record (➜ 6.115) for default column formatting
221
+ # 8 2 Option flags:
222
+ # Bits Mask Contents
223
+ # 0 0x0001 1 = Columns are hidden
224
+ # 10-8 0x0700 Outline level of the columns (0 = no outline)
225
+ # 12 0x1000 1 = Columns are collapsed
226
+ # 10 2 Not used
227
+ first, last, width, xf, opts = work.unpack binfmt(:colinfo)[0..-2]
228
+ first.upto last do |col|
229
+ column = Column.new col, @workbook.format(xf),
230
+ :width => width.to_f / 256,
231
+ :hidden => (opts & 0x0001) > 0,
232
+ :collapsed => (opts & 0x1000) > 0,
233
+ :outline_level => (opts & 0x0700) / 256
234
+ column.worksheet = worksheet
235
+ worksheet.columns[col] = column
236
+ end
237
+ end
238
+ def read_dimensions worksheet, work, pos, len
239
+ # Offset Size Contents
240
+ # 0 4 Index to first used row
241
+ # 4 4 Index to last used row, increased by 1
242
+ # 8 2 Index to first used column
243
+ # 10 2 Index to last used column, increased by 1
244
+ # 12 2 Not used
245
+ worksheet.set_dimensions work.unpack(binfmt(:dimensions)), pos, len
246
+ end
247
+ def read_font work, pos, len
248
+ # Offset Size Contents
249
+ # 0 2 Height of the font (in twips = 1/20 of a point)
250
+ # 2 2 Option flags:
251
+ # Bit Mask Contents
252
+ # 0 0x0001 1 = Characters are bold (redundant, see below)
253
+ # 1 0x0002 1 = Characters are italic
254
+ # 2 0x0004 1 = Characters are underlined
255
+ # (redundant, see below)
256
+ # 3 0x0008 1 = Characters are struck out
257
+ # 4 0x0010 1 = Characters are outlined (djberger)
258
+ # 5 0x0020 1 = Characters are shadowed (djberger)
259
+ # 4 2 Colour index (➜ 6.70)
260
+ # 6 2 Font weight (100-1000). Standard values are
261
+ # 0x0190 (400) for normal text and
262
+ # 0x02bc (700) for bold text.
263
+ # 8 2 Escapement type: 0x0000 = None
264
+ # 0x0001 = Superscript
265
+ # 0x0002 = Subscript
266
+ # 10 1 Underline type: 0x00 = None
267
+ # 0x01 = Single
268
+ # 0x02 = Double
269
+ # 0x21 = Single accounting
270
+ # 0x22 = Double accounting
271
+ # 11 1 Font family:
272
+ # 0x00 = None (unknown or don't care)
273
+ # 0x01 = Roman (variable width, serifed)
274
+ # 0x02 = Swiss (variable width, sans-serifed)
275
+ # 0x03 = Modern (fixed width, serifed or sans-serifed)
276
+ # 0x04 = Script (cursive)
277
+ # 0x05 = Decorative (specialised,
278
+ # for example Old English, Fraktur)
279
+ # 12 1 Character set: 0x00 = 0 = ANSI Latin
280
+ # 0x01 = 1 = System default
281
+ # 0x02 = 2 = Symbol
282
+ # 0x4d = 77 = Apple Roman
283
+ # 0x80 = 128 = ANSI Japanese Shift-JIS
284
+ # 0x81 = 129 = ANSI Korean (Hangul)
285
+ # 0x82 = 130 = ANSI Korean (Johab)
286
+ # 0x86 = 134 = ANSI Chinese Simplified GBK
287
+ # 0x88 = 136 = ANSI Chinese Traditional BIG5
288
+ # 0xa1 = 161 = ANSI Greek
289
+ # 0xa2 = 162 = ANSI Turkish
290
+ # 0xa3 = 163 = ANSI Vietnamese
291
+ # 0xb1 = 177 = ANSI Hebrew
292
+ # 0xb2 = 178 = ANSI Arabic
293
+ # 0xba = 186 = ANSI Baltic
294
+ # 0xcc = 204 = ANSI Cyrillic
295
+ # 0xde = 222 = ANSI Thai
296
+ # 0xee = 238 = ANSI Latin II (Central European)
297
+ # 0xff = 255 = OEM Latin I
298
+ # 13 1 Not used
299
+ # 14 var. Font name:
300
+ # BIFF5/BIFF7: Byte string, 8-bit string length (➜ 3.3)
301
+ # BIFF8: Unicode string, 8-bit string length (➜ 3.4)
302
+ name = client read_string(work[14..-1]), @workbook.encoding
303
+ font = Font.new name
304
+ size, opts, color, font.weight, escapement, underline,
305
+ family, encoding = work.unpack binfmt(:font)
306
+ font.size = size / TWIPS
307
+ font.italic = opts & 0x0002
308
+ font.strikeout = opts & 0x0008
309
+ font.outline = opts & 0x0010
310
+ font.shadow = opts & 0x0020
311
+ font.color = COLOR_CODES[color] || :text
312
+ font.escapement = ESCAPEMENT_TYPES[escapement]
313
+ font.underline = UNDERLINE_TYPES[underline]
314
+ font.family = FONT_FAMILIES[family]
315
+ font.encoding = FONT_ENCODINGS[encoding]
316
+ @workbook.add_font font
317
+ end
318
+ def read_format work, pos, len
319
+ # Offset Size Contents
320
+ # 0 2 Format index used in other records
321
+ # 2 var. Number format string
322
+ # (Unicode string, 16-bit string length, ➜ 3.4)
323
+ idx, = work.unpack 'v'
324
+ value = read_string work[2..-1], 2
325
+ @formats.store idx, client(value, @workbook.encoding)
326
+ end
327
+ def read_formula worksheet, addr, work
328
+ # Offset Size Contents
329
+ # 0 2 Index to row
330
+ # 2 2 Index to column
331
+ # 4 2 Index to XF record (➜ 6.115)
332
+ # 6 8 Result of the formula. See below for details.
333
+ # 14 2 Option flags:
334
+ # Bit Mask Contents
335
+ # 0 0x0001 1 = Recalculate always
336
+ # 1 0x0002 1 = Calculate on open
337
+ # 3 0x0008 1 = Part of a shared formula
338
+ # 16 4 Not used
339
+ # 20 var. Formula data (RPN token array, ➜ 4)
340
+ # Offset Size Contents
341
+ # 0 2 Size of the following formula data (sz)
342
+ # 2 sz Formula data (RPN token array)
343
+ # [2+sz] var. (optional) Additional data for specific tokens
344
+ # (➜ 4.1.6, for example tArray token, ➜ 4.8.7)
345
+ #
346
+ # Result of the Formula
347
+ # Dependent on the type of value the formula returns, the result field has
348
+ # the following format:
349
+ #
350
+ # Result is a numeric value:
351
+ # Offset Size Contents
352
+ # 0 8 IEEE 754 floating-point value (64-bit double precision)
353
+ #
354
+ # Result is a string (the string follows in a STRING record, ➜ 6.98):
355
+ # Offset Size Contents
356
+ # 0 1 0x00 (identifier for a string value)
357
+ # 1 5 Not used
358
+ # 6 2 0xffff
359
+ # Note: In BIFF8 the string must not be empty. For empty cells there is a
360
+ # special identifier defined (see below).
361
+ #
362
+ # Result is a Boolean value:
363
+ # Offset Size Contents
364
+ # 0 1 0x01 (identifier for a Boolean value)
365
+ # 1 1 Not used
366
+ # 2 1 0 = FALSE, 1 = TRUE
367
+ # 3 3 Not used
368
+ # 6 2 0xffff
369
+ #
370
+ # Result is an error value:
371
+ # Offset Size Contents
372
+ # 0 1 0x02 (identifier for an error value)
373
+ # 1 1 Not used
374
+ # 2 1 Error code (➜ 3.7)
375
+ # 3 3 Not used
376
+ # 6 2 0xffff
377
+ #
378
+ # Result is an empty cell (BIFF8), for example an empty string:
379
+ # Offset Size Contents
380
+ # 0 1 0x03 (identifier for an empty cell)
381
+ # 1 5 Not used
382
+ # 6 2 0xffff
383
+ row, column, xf, rtype, rval, rcheck, opts = work.unpack 'v3CxCx3v2'
384
+ formula = Formula.new
385
+ formula.shared = (opts & 0x08) > 0
386
+ formula.data = work[20..-1]
387
+ if rcheck != 0xffff || rtype > 3
388
+ value, = work.unpack 'x6E'
389
+ unless value
390
+ # on architectures where sizeof(double) > 8
391
+ value, = work.unpack 'x6e'
392
+ end
393
+ formula.value = value
394
+ elsif rtype == 0
395
+ pos, op, len, work = get_next_chunk
396
+ if op == :sharedfmla
397
+ ## TODO: formula-support in 0.8.0
398
+ pos, op, len, work = get_next_chunk
399
+ end
400
+ if op == :string
401
+ formula.value = client read_string(work, 2), @workbook.encoding
402
+ else
403
+ warn "String Value expected after Formula, but got #{op}"
404
+ formula.value = Error.new 0x2a
405
+ @pos = pos
406
+ end
407
+ elsif rtype == 1
408
+ formula.value = rval > 0
409
+ elsif rtype == 2
410
+ formula.value = Error.new rval
411
+ else
412
+ # leave the Formula value blank
413
+ end
414
+ set_cell worksheet, row, column, xf, formula
415
+ end
416
+ def read_hlink worksheet, work, pos, len
417
+ # 6.53.1 Common Record Contents
418
+ # Offset Size Contents
419
+ # 0 8 Cell range address of all cells containing this hyperlink
420
+ # (➜ 3.13.1)
421
+ # 8 16 GUID of StdLink:
422
+ # D0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
423
+ # (79EAC9D0-BAF9-11CE-8C82-00AA004BA90B)
424
+ # 24 4 Unknown value: 0x00000002
425
+ # 28 4 Option flags (see below)
426
+ # Bit Mask Contents
427
+ # 0 0x00000001 0 = No link extant
428
+ # 1 = File link or URL
429
+ # 1 0x00000002 0 = Relative file path
430
+ # 1 = Absolute path or URL
431
+ # 2 and 4 0x00000014 0 = No description
432
+ # 1 (both bits) = Description
433
+ # 3 0x00000008 0 = No text mark
434
+ # 1 = Text mark
435
+ # 7 0x00000080 0 = No target frame
436
+ # 1 = Target frame
437
+ # 8 0x00000100 0 = File link or URL
438
+ # 1 = UNC path (incl. server name)
439
+ #--------------------------------------------------------------------------
440
+ # [32] 4 (optional, see option flags) Character count of description
441
+ # text, including trailing zero word (dl)
442
+ # [36] 2∙dl (optional, see option flags) Character array of description
443
+ # text, no Unicode string header, always 16-bit characters,
444
+ # zero-terminated
445
+ #--------------------------------------------------------------------------
446
+ # [var.] 4 (optional, see option flags) Character count of target
447
+ # frame, including trailing zero word (fl)
448
+ # [var.] 2∙fl (optional, see option flags) Character array of target
449
+ # frame, no Unicode string header, always 16-bit characters,
450
+ # zero-terminated
451
+ #--------------------------------------------------------------------------
452
+ # var. var. Special data (➜ 6.53.2 and following)
453
+ #--------------------------------------------------------------------------
454
+ # [var.] 4 (optional, see option flags) Character count of the text
455
+ # mark, including trailing zero word (tl)
456
+ # [var.] 2∙tl (optional, see option flags) Character array of the text
457
+ # mark without “#” sign, no Unicode string header, always
458
+ # 16-bit characters, zero-terminated
459
+ firstrow, lastrow, firstcol, lastcol, _, opts = work.unpack 'v4H32x4V'
460
+ has_link = opts & 0x0001
461
+ desc = opts & 0x0014
462
+ textmark = opts & 0x0008
463
+ target = opts & 0x0080
464
+ unc = opts & 0x0100
465
+ link = Link.new
466
+ _, description = nil
467
+ pos = 32
468
+ if desc > 0
469
+ description, pos = read_hlink_string work, pos
470
+ link << description
471
+ end
472
+ if target > 0
473
+ link.target_frame, pos = read_hlink_string work, pos
474
+ end
475
+ if unc > 0
476
+ # 6.53.4 Hyperlink to a File with UNC (Universal Naming Convention) Path
477
+ # These data fields are for UNC paths containing a server name (for
478
+ # instance “\\server\path\file.xls”). The lower 9 bits of the option
479
+ # flags field must be 1.x00x.xx112.
480
+ # Offset Size Contents
481
+ # 0 4 Character count of the UNC,
482
+ # including trailing zero word (fl)
483
+ # 4 2∙fl Character array of the UNC, no Unicode string header,
484
+ # always 16-bit characters, zeroterminated.
485
+ link.url, pos = read_hlink_string work, pos
486
+ elsif has_link > 0
487
+ uid, = work.unpack "x#{pos}H32"
488
+ pos += 16
489
+ if uid == "e0c9ea79f9bace118c8200aa004ba90b"
490
+ # 6.53.2 Hyperlink containing a URL (Uniform Resource Locator)
491
+ # These data fields occur for links which are not local files or files
492
+ # in the local network (for instance HTTP and FTP links and e-mail
493
+ # addresses). The lower 9 bits of the option flags field must be
494
+ # 0.x00x.xx112 (x means optional, depending on hyperlink content). The
495
+ # GUID could be used to distinguish a URL from a file link.
496
+ # Offset Size Contents
497
+ # 0 16 GUID of URL Moniker:
498
+ # E0 C9 EA 79 F9 BA CE 11 8C 82 00 AA 00 4B A9 0B
499
+ # (79EAC9E0-BAF9-11CE-8C82-00AA004BA90B)
500
+ # 16 4 Size of character array of the URL, including trailing
501
+ # zero word (us). There are us/2-1 characters in the
502
+ # following string.
503
+ # 20 us Character array of the URL, no Unicode string header,
504
+ # always 16-bit characters, zeroterminated
505
+ size, = work.unpack "x#{pos}V"
506
+ pos += 4
507
+ data = work[pos, size].chomp "\000\000"
508
+ link.url = client data
509
+ pos += size
510
+ else
511
+ # 6.53.3 Hyperlink to a Local File
512
+ # These data fields are for links to files on local drives. The path of
513
+ # the file can be complete with drive letter (absolute) or relative to
514
+ # the location of the workbook. The lower 9 bits of the option flags
515
+ # field must be 0.x00x.xxx12. The GUID could be used to distinguish a
516
+ # URL from a file link.
517
+ # Offset Size Contents
518
+ # 0 16 GUID of File Moniker:
519
+ # 03 03 00 00 00 00 00 00 C0 00 00 00 00 00 00 46
520
+ # (00000303-0000-0000-C000-000000000046)
521
+ # 16 2 Directory up-level count. Each leading “..\” in the
522
+ # file link is deleted and increases this counter.
523
+ # 18 4 Character count of the shortened file path and name,
524
+ # including trailing zero byte (sl)
525
+ # 22 sl Character array of the shortened file path and name in
526
+ # 8.3-DOS-format. This field can be filled with a long
527
+ # file name too. No Unicode string header, always 8-bit
528
+ # characters, zeroterminated.
529
+ # 22+sl 24 Unknown byte sequence:
530
+ # FF FF AD DE 00 00 00 00
531
+ # 00 00 00 00 00 00 00 00
532
+ # 00 00 00 00 00 00 00 00
533
+ # 46+sl 4 Size of the following file link field including string
534
+ # length field and additional data field (sz). If sz is
535
+ # zero, nothing will follow (except a text mark).
536
+ # [50+sl] 4 (optional) Size of character array of the extended file
537
+ # path and name (xl). There are xl/2 characters in the
538
+ # following string.
539
+ # [54+sl] 2 (optional) Unknown byte sequence: 03 00
540
+ # [56+sl] xl (optional) Character array of the extended file path
541
+ # and name (xl), no Unicode string header, always 16-bit
542
+ # characters, not zero-terminated
543
+ uplevel, count = work.unpack "x#{pos}vV"
544
+ pos += 6
545
+ # TODO: short file path may have any of the OEM encodings. Find out which
546
+ # and use the #client method to convert the encoding.
547
+ prefix = internal('..\\', 'UTF-8') * uplevel
548
+ link.dos = link.url = prefix << work[pos, count].chomp("\000")
549
+ pos += count + 24
550
+ total, size = work.unpack "x#{pos}V2"
551
+ pos += 10
552
+ if total > 0
553
+ link.url = client work[pos, size]
554
+ pos += size
555
+ end
556
+ end
557
+ else
558
+ # 6.53.5 Hyperlink to the Current Workbook
559
+ # In this case only the text mark field is present (optional with
560
+ # description).
561
+ # Example: The URL “#Sheet2!B1:C2” refers to the given range in the
562
+ # current workbook.
563
+ # The lower 9 bits of the option flags field must be 0.x00x.1x002.
564
+ end
565
+ if textmark > 0
566
+ link.fragment, _ = read_hlink_string work, pos
567
+ end
568
+ if link.empty?
569
+ link << link.href
570
+ end
571
+ firstrow.upto lastrow do |row|
572
+ firstcol.upto lastcol do |col|
573
+ worksheet.add_link row, col, link
574
+ end
575
+ end
576
+ end
577
+ def read_hlink_string work, pos
578
+ count, = work.unpack "x#{pos}V"
579
+ len = count * 2
580
+ pos += 4
581
+ data = work[pos, len].chomp "\000\000"
582
+ pos += len
583
+ [client(data, 'UTF-16LE'), pos]
584
+ end
585
+ def read_index worksheet, work, pos, len
586
+ # Offset Size Contents
587
+ # 0 4 Not used
588
+ # 4 4 Index to first used row (rf, 0-based)
589
+ # 8 4 Index to first row of unused tail of sheet
590
+ # (rl, last used row + 1, 0-based)
591
+ # 12 4 Absolute stream position of the
592
+ # DEFCOLWIDTH record (➜ 6.29) of the current sheet. If this
593
+ # record does not exist, the offset points to the record at
594
+ # the position where the DEFCOLWIDTH record would occur.
595
+ # 16 4∙nm Array of nm absolute stream positions to the
596
+ # DBCELL record (➜ 6.26) of each Row Block
597
+ # TODO: use the index if it exists
598
+ # _, first_used, first_unused, defcolwidth, *indices = work.unpack 'V*'
599
+ end
600
+ def read_label worksheet, addr, work
601
+ # Offset Size Contents
602
+ # 0 2 Index to row
603
+ # 2 2 Index to column
604
+ # 4 2 Index to XF record (➜ 6.115)
605
+ # 6 var. Unicode string, 16-bit string length (➜ 3.4)
606
+ row, column, xf = work.unpack 'v3'
607
+ value = client read_string(work[6..-1], 2), @workbook.encoding
608
+ set_cell worksheet, row, column, xf, value
609
+ end
610
+ def read_labelsst worksheet, addr, work
611
+ # Offset Size Contents
612
+ # 0 2 Index to row
613
+ # 2 2 Index to column
614
+ # 4 2 Index to XF record (➜ 6.115)
615
+ # 6 4 Index into SST record (➜ 6.96)
616
+ row, column, xf, index = work.unpack binfmt(:labelsst)
617
+ set_cell worksheet, row, column, xf, worksheet.shared_string(index)
618
+ end
619
+ def read_mulblank worksheet, addr, work
620
+ # Offset Size Contents
621
+ # 0 2 Index to row
622
+ # 2 2 Index to first column (fc)
623
+ # 4 2∙nc List of nc=lc-fc+1 16-bit indexes to XF records (➜ 6.115)
624
+ # 4+2∙nc 2 Index to last column (lc)
625
+ row, column, *xfs = work.unpack 'v*'
626
+ xfs.pop #=> last_column
627
+ xfs.each_with_index do |xf, idx| set_cell worksheet, row, column + idx, xf end
628
+ end
629
+ def read_mulrk worksheet, addr, work
630
+ # Offset Size Contents
631
+ # 0 2 Index to row
632
+ # 2 2 Index to first column (fc)
633
+ # 4 6∙nc List of nc=lc-fc+1 XF/RK structures. Each XF/RK contains:
634
+ # Offset Size Contents
635
+ # 0 2 Index to XF record (➜ 6.115)
636
+ # 2 4 RK value (➜ 3.6)
637
+ # 4+6∙nc 2 Index to last column (lc)
638
+ row, column = work.unpack 'v2'
639
+ 4.step(work.size - 6, 6) do |idx|
640
+ xf, = work.unpack "x#{idx}v"
641
+ set_cell worksheet, row, column, xf, decode_rk(work[idx + 2, 4])
642
+ column += 1
643
+ end
644
+ end
645
+ def read_number worksheet, addr, work
646
+ # Offset Size Contents
647
+ # 0 2 Index to row
648
+ # 2 2 Index to column
649
+ # 4 2 Index to XF record (➜ 6.115)
650
+ # 6 8 IEEE 754 floating-point value (64-bit double precision)
651
+ row, column, xf, value = work.unpack binfmt(:number)
652
+ set_cell worksheet, row, column, xf, value
653
+ end
654
+ def read_rk worksheet, addr, work
655
+ # Offset Size Contents
656
+ # 0 2 Index to row
657
+ # 2 2 Index to column
658
+ # 4 2 Index to XF record (➜ 6.115)
659
+ # 6 4 RK value (➜ 3.6)
660
+ row, column, xf = work.unpack 'v3'
661
+ set_cell worksheet, row, column, xf, decode_rk(work[6,4])
662
+ end
663
+ def read_row worksheet, addr
664
+ row = addr[:index]
665
+ @current_row_block.fetch [worksheet, row] do
666
+ @current_row_block.clear
667
+ cells = @current_row_block[[worksheet, row]] = Row.new(nil, row)
668
+ @pos = addr[:offset]
669
+ found = false
670
+ while tuple = get_next_chunk
671
+ pos, op, _, work = tuple
672
+ case op
673
+ when :eof # ● EOF ➜ 6.36 - we should only get here if there is just
674
+ # one Row-Block
675
+ @pos = pos
676
+ return cells
677
+ when :dbcell # ○ DBCELL Stream offsets to the cell records of each row
678
+ return cells
679
+ when :row # ○○ Row Blocks ➜ 5.7
680
+ # ● ROW ➜ 6.83
681
+ # ignore, we already did these in read_worksheet
682
+ return cells if found
683
+ when :blank # BLANK ➜ 6.7
684
+ found = true
685
+ read_blank worksheet, addr, work
686
+ when :boolerr # BOOLERR ➜ 6.10
687
+ found = true
688
+ read_boolerr worksheet, addr, work
689
+ when 0x0002 # INTEGER ➜ 6.56 (BIFF2 only)
690
+ found = true
691
+ # TODO: implement for BIFF2 support
692
+ when :formula # FORMULA ➜ 6.46
693
+ found = true
694
+ read_formula worksheet, addr, work
695
+ when :label # LABEL ➜ 6.59 (BIFF2-BIFF7)
696
+ found = true
697
+ read_label worksheet, addr, work
698
+ when :labelsst # LABELSST ➜ 6.61 (BIFF8 only)
699
+ found = true
700
+ read_labelsst worksheet, addr, work
701
+ when :mulblank # MULBLANK ➜ 6.64 (BIFF5-BIFF8)
702
+ found = true
703
+ read_mulblank worksheet, addr, work
704
+ when :mulrk # MULRK ➜ 6.65 (BIFF5-BIFF8)
705
+ found = true
706
+ read_mulrk worksheet, addr, work
707
+ when :number # NUMBER ➜ 6.68
708
+ found = true
709
+ read_number worksheet, addr, work
710
+ when :rk # RK ➜ 6.82 (BIFF3-BIFF8)
711
+ found = true
712
+ read_rk worksheet, addr, work
713
+ when :rstring # RSTRING ➜ 6.84 (BIFF5/BIFF7)
714
+ found = true
715
+ read_rstring worksheet, addr, work
716
+ end
717
+ end
718
+ cells
719
+ end
720
+ end
721
+ def read_rstring worksheet, addr, work
722
+ # Offset Size Contents
723
+ # 0 2 Index to row
724
+ # 2 2 Index to column
725
+ # 4 2 Index to XF record (➜ 6.115)
726
+ # 6 sz Unformatted Unicode string, 16-bit string length (➜ 3.4)
727
+ # 6+sz 2 Number of Rich-Text formatting runs (rt)
728
+ # 8+sz 4·rt List of rt formatting runs (➜ 3.2)
729
+ row, column, xf = work.unpack 'v3'
730
+ value = client read_string(work[6..-1], 2), @workbook.encoding
731
+ set_cell worksheet, row, column, xf, value
732
+ end
733
+ def read_window2 worksheet, work, pos, len
734
+ # This record contains additional settings for the document window
735
+ # (BIFF2-BIFF4) or for the window of a specific worksheet (BIFF5-BIFF8).
736
+ # It is part of the Sheet View Settings Block (➜ 4.5).
737
+ # Offset Size Contents
738
+ # 0 2 Option flags:
739
+ # Bits Mask Contents
740
+ # 0 0x0001 0 = Show formula results
741
+ # 1 = Show formulas
742
+ # 1 0x0002 0 = Do not show grid lines
743
+ # 1 = Show grid lines
744
+ # 2 0x0004 0 = Do not show sheet headers
745
+ # 1 = Show sheet headers
746
+ # 3 0x0008 0 = Panes are not frozen
747
+ # 1 = Panes are frozen (freeze)
748
+ # 4 0x0010 0 = Show zero values as empty cells
749
+ # 1 = Show zero values
750
+ # 5 0x0020 0 = Manual grid line colour
751
+ # 1 = Automatic grid line colour
752
+ # 6 0x0040 0 = Columns from left to right
753
+ # 1 = Columns from right to left
754
+ # 7 0x0080 0 = Do not show outline symbols
755
+ # 1 = Show outline symbols
756
+ # 8 0x0100 0 = Keep splits if pane freeze is removed
757
+ # 1 = Remove splits if pane freeze is removed
758
+ # 9 0x0200 0 = Sheet not selected
759
+ # 1 = Sheet selected (BIFF5-BIFF8)
760
+ # 10 0x0400 0 = Sheet not active
761
+ # 1 = Sheet active (BIFF5-BIFF8)
762
+ # 11 0x0800 0 = Show in normal view
763
+ # 1 = Show in page break preview (BIFF8)
764
+ # 2 2 Index to first visible row
765
+ # 4 2 Index to first visible column
766
+ # 6 2 Colour index of grid line colour (➜ 5.74).
767
+ # Note that in BIFF2-BIFF5 an RGB colour is written instead.
768
+ # 8 2 Not used
769
+ # 10 2 Cached magnification factor in page break preview (in percent)
770
+ # 0 = Default (60%)
771
+ # 12 2 Cached magnification factor in normal view (in percent)
772
+ # 0 = Default (100%)
773
+ # 14 4 Not used
774
+ flags, _ = work.unpack 'v'
775
+ worksheet.selected = flags & 0x0200 > 0
776
+ end
777
+
778
+ def read_merged_cells worksheet, work, pos, len
779
+ # This record contains the addresses of merged cell ranges in the current sheet.
780
+ # Record MERGEDCELLS, BIFF8:
781
+ # Offset Size Contents
782
+ # 0 var. Cell range address list with merged ranges (➜ 2.5.15)
783
+ # If the record size exceeds the limit, it is not continued with a CONTINUE record,
784
+ # but another self-contained MERGEDCELLS record is started. The limit of 8224 bytes
785
+ # per record results in a maximum number of 1027 merged ranges.
786
+
787
+ worksheet.merged_cells.push(*read_range_address_list(work, len))
788
+ #
789
+ # A cell range address list consists of a field with the number of ranges and the list
790
+ # of the range addresses.
791
+ # Cell range address list, BIFF2-BIFF8:
792
+ # Offset Size Contents
793
+ # 0 2 Number of following cell range addresses (nm)
794
+ # 2 6∙nm or 8∙nm List of nm cell range addresses (➜ 2.5.14)
795
+ #
796
+ end
797
+
798
+ def read_workbook
799
+ previous_op = nil
800
+ while tuple = get_next_chunk
801
+ pos, op, len, work = tuple
802
+ case op
803
+ when @bof, :bof # ● BOF Type = worksheet (➜ 6.8)
804
+ return
805
+ when :eof # ● EOF ➜ 6.36
806
+ postread_workbook
807
+ return
808
+ when :datemode # ○ DATEMODE ➜ 6.25
809
+ flag, _ = work.unpack 'v'
810
+ if flag == 1
811
+ @workbook.date_base = DateTime.new 1904, 1, 1
812
+ else
813
+ @workbook.date_base = DateTime.new 1899, 12, 31
814
+ end
815
+ when :continue # ○ CONTINUE ➜ 6.22
816
+ case previous_op
817
+ when :sst # ● SST ➜ 6.96
818
+ continue_sst work, pos, len
819
+ end
820
+ when :codepage # ○ CODEPAGE ➜ 6.17
821
+ read_codepage work, pos, len
822
+ when :boundsheet # ●● BOUNDSHEET ➜ 6.12
823
+ read_boundsheet work, pos, len
824
+ when :xf # ●● XF ➜ 6.115
825
+ read_xf work, pos, len
826
+ when :sst # ○ Shared String Table ➜ 5.11
827
+ # ● SST ➜ 6.96
828
+ read_sst work, pos, len
829
+ # TODO: implement memory-efficient sst handling, possibly in conjunction
830
+ # with EXTSST
831
+ when :extsst # ● EXTSST ➜ 6.40
832
+ read_extsst work, pos, len
833
+ when :style # ●● STYLE ➜ 6.99
834
+ read_style work, pos, len
835
+ when :format # ○○ FORMAT (Number Format) ➜ 6.45
836
+ read_format work, pos, len
837
+ when :font # ●● FONT ➜ 6.43
838
+ read_font work, pos, len
839
+ end
840
+ previous_op = op unless op == :continue
841
+ end
842
+ end
843
+ def read_worksheet worksheet, offset
844
+ @pos = offset
845
+ @detected_rows = {}
846
+ @noteObjList = []
847
+ @noteList = []
848
+ previous = nil
849
+ while tuple = get_next_chunk
850
+ pos, op, len, work = tuple
851
+ if((offset = @current_row_block_offset) && !in_row_block?(op, previous))
852
+ @current_row_block_offset = nil
853
+ offset[1] = pos - offset[0]
854
+ end
855
+ case op
856
+ when :eof # ● EOF ➜ 6.36
857
+ postread_worksheet worksheet
858
+ return
859
+ #when :uncalced # ○ UNCALCED ➜ 6.104
860
+ # TODO: Formula support. Values were not calculated before saving
861
+ #warn <<-EOS
862
+ # Some fields containig formulas were saved without a computed value.
863
+ # Support Spreadsheet::Excel by implementing formula-calculations!
864
+ #EOS
865
+ #when :index # ○ INDEX ➜ 5.7 (Row Blocks), ➜ 6.55
866
+ # TODO: if there are changes in rows, omit index when writing
867
+ #read_index worksheet, work, pos, len
868
+ when :guts # GUTS 5.53
869
+ read_guts worksheet, work, pos, len
870
+ when :colinfo # ○○ COLINFO ➜ 6.18
871
+ read_colinfo worksheet, work, pos, len
872
+ when :dimensions # ● DIMENSIONS ➜ 6.31
873
+ read_dimensions worksheet, work, pos, len
874
+ when :row # ○○ Row Blocks ➜ 5.7
875
+ # ● ROW ➜ 6.83
876
+ set_row_address worksheet, work, pos, len
877
+ when :hlink
878
+ read_hlink worksheet, work, pos, len
879
+ when :window2
880
+ read_window2 worksheet, work, pos, len
881
+ when :mergedcells # ○○ MERGEDCELLS ➜ 5.67
882
+ read_merged_cells worksheet, work, pos, len
883
+ when :protect, :password
884
+ read_sheet_protection worksheet, op, work
885
+ when :note # a note references an :obj
886
+ read_note worksheet, work, pos, len
887
+ when :obj # it contains the author in the NTS structure
888
+ _ft, _cb, _ot, _objID = work.unpack('v4')
889
+ if _ot == 0x19
890
+ #puts "\nDEBUG: found Note Obj record"
891
+ @noteObject = NoteObject.new
892
+ @noteObject.objID = _objID
893
+ end
894
+ #p work
895
+ when :drawing # this can be followed by txo in case of a note
896
+ if previous == :obj
897
+ #puts "\nDEBUG: found MsDrawing record"
898
+ #p work
899
+ end
900
+ when :txo # this contains the length of the note text
901
+ if previous == :drawing
902
+ #puts "\nDEBUG: found TxO record"
903
+ #p work
904
+ end
905
+ when :continue # this contains the actual note text
906
+ if previous == :txo
907
+ #puts "\nDEBUG: found Continue record"
908
+ continueFmt = work.unpack('C')
909
+ if (continueFmt.first == 0)
910
+ #puts "Picking compressed charset"
911
+ #Skip to offset due to 'v5C' used above
912
+ _text = work.unpack('@1C*')
913
+ @noteObject.text = _text.pack('C*')
914
+ elsif (continueFmt.first == 1)
915
+ #puts "Picking uncompressed charset"
916
+ _text = work.unpack('@1S*')
917
+ @noteObject.text = _text.pack('U*')
918
+ end
919
+ @noteObjList << @noteObject
920
+ end
921
+ when :pagesetup
922
+ read_pagesetup(worksheet, work, pos, len)
923
+ when :leftmargin
924
+ worksheet.margins[:left] = work.unpack(binfmt(:margin))[0]
925
+ when :rightmargin
926
+ worksheet.margins[:right] = work.unpack(binfmt(:margin))[0]
927
+ when :topmargin
928
+ worksheet.margins[:top] = work.unpack(binfmt(:margin))[0]
929
+ when :bottommargin
930
+ worksheet.margins[:bottom] = work.unpack(binfmt(:margin))[0]
931
+ else
932
+ if ROW_BLOCK_OPS.include?(op)
933
+ set_missing_row_address worksheet, work, pos, len
934
+ end
935
+ end
936
+ previous = op
937
+ #previous = op unless op == :continue
938
+ end
939
+ end
940
+
941
+ def read_pagesetup(worksheet, work, pos, len)
942
+ worksheet.pagesetup.delete_if { true }
943
+ data = work.unpack(binfmt(:pagesetup))
944
+ worksheet.pagesetup[:orientation] = data[5] == 0 ? :landscape : :portrait
945
+ worksheet.pagesetup[:adjust_to] = data[1]
946
+
947
+ worksheet.pagesetup[:orig_data] = data
948
+ # TODO: add options acording to specification
949
+ end
950
+
951
+ def read_guts worksheet, work, pos, len
952
+ # Offset Size Contents
953
+ # 0 2 Width of the area to display row outlines (left of the sheet), in pixel
954
+ # 2 2 Height of the area to display column outlines (above the sheet), in pixel
955
+ # 4 2 Number of visible row outline levels (used row levels + 1; or 0, if not used)
956
+ # 6 2 Number of visible column outline levels (used column levels + 1; or 0, if not used)
957
+ width, height, row_level, col_level = work.unpack 'v4'
958
+ worksheet.guts[:width] = width
959
+ worksheet.guts[:height] = height
960
+ worksheet.guts[:row_level] = row_level
961
+ worksheet.guts[:col_level] = col_level
962
+ end
963
+ def read_style work, pos, len
964
+ # User-Defined Cell Styles:
965
+ # Offset Size Contents
966
+ # 0 2 Bit Mask Contents
967
+ # 11-0 0x0fff Index to style XF record (➜ 6.115)
968
+ # 15 0x8000 Always 0 for user-defined styles
969
+ # 2 var. BIFF2-BIFF7: Non-empty byte string,
970
+ # 8-bit string length (➜ 3.3)
971
+ # BIFF8: Non-empty Unicode string,
972
+ # 16-bit string length (➜ 3.4)
973
+ #
974
+ # Built-In Cell Styles
975
+ # Offset Size Contents
976
+ # 0 2 Bit Mask Contents
977
+ # 11-0 0x0FFF Index to style XF record (➜ 6.115)
978
+ # 15 0x8000 Always 1 for built-in styles
979
+ # 2 1 Identifier of the built-in cell style:
980
+ # 0x00 = Normal
981
+ # 0x01 = RowLevel_lv (see next field)
982
+ # 0x02 = ColLevel_lv (see next field)
983
+ # 0x03 = Comma
984
+ # 0x04 = Currency
985
+ # 0x05 = Percent
986
+ # 0x06 = Comma [0] (BIFF4-BIFF8)
987
+ # 0x07 = Currency [0] (BIFF4-BIFF8)
988
+ # 0x08 = Hyperlink (BIFF8)
989
+ # 0x09 = Followed Hyperlink (BIFF8)
990
+ # 3 1 Level for RowLevel or ColLevel style (zero-based, lv),
991
+ # FFH otherwise
992
+ flags, = work.unpack 'v'
993
+ xf_idx = flags & 0x0fff
994
+ xf = @workbook.format xf_idx
995
+ builtin = flags & 0x8000
996
+ if builtin == 0
997
+ xf.name = client read_string(work[2..-1], 2), @workbook.encoding
998
+ else
999
+ id, level = work.unpack 'x2C2'
1000
+ if name = BUILTIN_STYLES[id]
1001
+ name.sub '_lv', "_#{level.to_s}"
1002
+ xf.name = client name, 'UTF-8'
1003
+ end
1004
+ end
1005
+ end
1006
+ def read_xf work, pos, len
1007
+ # Offset Size Contents
1008
+ # 0 2 Index to FONT record (➜ 6.43)
1009
+ # 2 2 Index to FORMAT record (➜ 6.45)
1010
+ # 4 2 Bit Mask Contents
1011
+ # 2-0 0x0007 XF_TYPE_PROT – XF type, cell protection
1012
+ # Bit Mask Contents
1013
+ # 0 0x01 1 = Cell is locked
1014
+ # 1 0x02 1 = Formula is hidden
1015
+ # 2 0x04 0 = Cell XF; 1 = Style XF
1016
+ # 15-4 0xfff0 Index to parent style XF
1017
+ # (always 0xfff in style XFs)
1018
+ # 6 1 Bit Mask Contents
1019
+ # 2-0 0x07 XF_HOR_ALIGN – Horizontal alignment
1020
+ # Value Horizontal alignment
1021
+ # 0x00 General
1022
+ # 0x01 Left
1023
+ # 0x02 Centred
1024
+ # 0x03 Right
1025
+ # 0x04 Filled
1026
+ # 0x05 Justified (BIFF4-BIFF8X)
1027
+ # 0x06 Centred across selection
1028
+ # (BIFF4-BIFF8X)
1029
+ # 0x07 Distributed (BIFF8X)
1030
+ # 3 0x08 1 = Text is wrapped at right border
1031
+ # 6-4 0x70 XF_VERT_ALIGN – Vertical alignment
1032
+ # Value Vertical alignment
1033
+ # 0x00 Top
1034
+ # 0x01 Centred
1035
+ # 0x02 Bottom
1036
+ # 0x03 Justified (BIFF5-BIFF8X)
1037
+ # 0x04 Distributed (BIFF8X)
1038
+ # 7 1 XF_ROTATION: Text rotation angle (see above)
1039
+ # Value Text rotation
1040
+ # 0 Not rotated
1041
+ # 1-90 1 to 90 degrees counterclockwise
1042
+ # 91-180 1 to 90 degrees clockwise
1043
+ # 255 Letters are stacked top-to-bottom,
1044
+ # but not rotated
1045
+ # 8 1 Bit Mask Contents
1046
+ # 3-0 0x0f Indent level
1047
+ # 4 0x10 1 = Shrink content to fit into cell
1048
+ # 5 0x40 1 = Merge Range (djberger)
1049
+ # 7-6 0xc0 Text direction (BIFF8X only)
1050
+ # 0 = According to context
1051
+ # 1 = Left-to-right
1052
+ # 2 = Right-to-left
1053
+ # 9 1 Bit Mask Contents
1054
+ # 7-2 0xfc XF_USED_ATTRIB – Used attributes
1055
+ # Each bit describes the validity of a
1056
+ # specific group of attributes. In cell XFs
1057
+ # a cleared bit means the attributes of the
1058
+ # parent style XF are used (but only if the
1059
+ # attributes are valid there), a set bit
1060
+ # means the attributes of this XF are used.
1061
+ # In style XFs a cleared bit means the
1062
+ # attribute setting is valid, a set bit
1063
+ # means the attribute should be ignored.
1064
+ # Bit Mask Contents
1065
+ # 0 0x01 Flag for number format
1066
+ # 1 0x02 Flag for font
1067
+ # 2 0x04 Flag for horizontal and
1068
+ # vertical alignment, text wrap,
1069
+ # indentation, orientation,
1070
+ # rotation, and text direction
1071
+ # 3 0x08 Flag for border lines
1072
+ # 4 0x10 Flag for background area style
1073
+ # 5 0x20 Flag for cell protection (cell
1074
+ # locked and formula hidden)
1075
+ # 10 4 Cell border lines and background area:
1076
+ # Bit Mask Contents
1077
+ # 3- 0 0x0000000f Left line style (➜ 3.10)
1078
+ # 7- 4 0x000000f0 Right line style (➜ 3.10)
1079
+ # 11- 8 0x00000f00 Top line style (➜ 3.10)
1080
+ # 15-12 0x0000f000 Bottom line style (➜ 3.10)
1081
+ # 22-16 0x007f0000 Colour index (➜ 6.70)
1082
+ # for left line colour
1083
+ # 29-23 0x3f800000 Colour index (➜ 6.70)
1084
+ # for right line colour
1085
+ # 30 0x40000000 1 = Diagonal line
1086
+ # from top left to right bottom
1087
+ # 31 0x80000000 1 = Diagonal line
1088
+ # from bottom left to right top
1089
+ # 14 4 Bit Mask Contents
1090
+ # 6- 0 0x0000007f Colour index (➜ 6.70)
1091
+ # for top line colour
1092
+ # 13- 7 0x00003f80 Colour index (➜ 6.70)
1093
+ # for bottom line colour
1094
+ # 20-14 0x001fc000 Colour index (➜ 6.70)
1095
+ # for diagonal line colour
1096
+ # 24-21 0x01e00000 Diagonal line style (➜ 3.10)
1097
+ # 31-26 0xfc000000 Fill pattern (➜ 3.11)
1098
+ # 18 2 Bit Mask Contents
1099
+ # 6-0 0x007f Colour index (➜ 6.70)
1100
+ # for pattern colour
1101
+ # 13-7 0x3f80 Colour index (➜ 6.70)
1102
+ # for pattern background
1103
+ fmt = Format.new
1104
+ font_idx, numfmt, _, xf_align, xf_rotation, xf_indent, _,
1105
+ xf_borders, xf_brdcolors, xf_pattern = work.unpack binfmt(:xf)
1106
+ fmt.number_format = @formats[numfmt]
1107
+ ## this appears to be undocumented: the first 4 fonts seem to be accessed
1108
+ # with a 0-based index, but all subsequent font indices are 1-based.
1109
+ fmt.font = @workbook.font(font_idx > 3 ? font_idx - 1 : font_idx)
1110
+ fmt.horizontal_align = NGILA_H_FX[xf_align & 0x07]
1111
+ fmt.text_wrap = xf_align & 0x08 > 0
1112
+ fmt.vertical_align = NGILA_V_FX[xf_align & 0x70]
1113
+ fmt.rotation = if xf_rotation == 255
1114
+ :stacked
1115
+ elsif xf_rotation > 90
1116
+ 90 - xf_rotation
1117
+ else
1118
+ xf_rotation
1119
+ end
1120
+ fmt.indent_level = xf_indent & 0x0f
1121
+ fmt.shrink = xf_indent & 0x10 > 0
1122
+ fmt.text_direction = NOITCERID_TXET_FX[xf_indent & 0xc0]
1123
+ fmt.left = XF_BORDER_LINE_STYLES[xf_borders & 0x0000000f]
1124
+ fmt.right = XF_BORDER_LINE_STYLES[(xf_borders & 0x000000f0) >> 4]
1125
+ fmt.top = XF_BORDER_LINE_STYLES[(xf_borders & 0x00000f00) >> 8]
1126
+ fmt.bottom = XF_BORDER_LINE_STYLES[(xf_borders & 0x0000f000) >> 12]
1127
+ fmt.left_color = COLOR_CODES[(xf_borders & 0x007f0000) >> 16] || :black
1128
+ fmt.right_color = COLOR_CODES[(xf_borders & 0x3f800000) >> 23] || :black
1129
+ fmt.cross_down = xf_borders & 0x40000000 > 0
1130
+ fmt.cross_up = xf_borders & 0x80000000 > 0
1131
+ if xf_brdcolors
1132
+ fmt.top_color = COLOR_CODES[xf_brdcolors & 0x0000007f] || :black
1133
+ fmt.bottom_color = COLOR_CODES[(xf_brdcolors & 0x00003f80) >> 7] || :black
1134
+ fmt.diagonal_color = COLOR_CODES[(xf_brdcolors & 0x001fc000) >> 14] || :black
1135
+ #fmt.diagonal_style = COLOR_CODES[xf_brdcolors & 0x01e00000]
1136
+ fmt.pattern = (xf_brdcolors & 0xfc000000) >> 26
1137
+ end
1138
+ fmt.pattern_fg_color = COLOR_CODES[xf_pattern & 0x007f] || :border
1139
+ fmt.pattern_bg_color = COLOR_CODES[(xf_pattern & 0x3f80) >> 7] || :pattern_bg
1140
+ @workbook.add_format fmt
1141
+ end
1142
+ def read_note worksheet, work, pos, len
1143
+ #puts "\nDEBUG: found a note record in read_worksheet\n"
1144
+ row, col, _, _objID, _objAuthLen, _objAuthLenFmt = work.unpack('v5C')
1145
+ if (_objAuthLenFmt == 0)
1146
+ #puts "Picking compressed charset"
1147
+ #Skip to offset due to 'v5C' used above
1148
+ _objAuth = work.unpack('@11C*')
1149
+ elsif (_objAuthLenFmt == 1)
1150
+ #puts "Picking uncompressed charset"
1151
+ _objAuth = work.unpack('@11S*')
1152
+ end
1153
+ _objAuth = _objAuth.pack('C*')
1154
+ @note = Note.new
1155
+ @note.length = len
1156
+ @note.row = row
1157
+ @note.col = col
1158
+ @note.author = _objAuth
1159
+ @note.objID = _objID
1160
+ #Pop it on the list to be sorted in postread_worksheet
1161
+ @noteList << @note
1162
+ end
1163
+ def read_sheet_protection worksheet, op, data
1164
+ case op
1165
+ when :protect
1166
+ worksheet.protect! if data.unpack('v').first == 1
1167
+ when :password
1168
+ worksheet.password_hash = data.unpack('v').first
1169
+ end
1170
+ end
1171
+ def set_cell worksheet, row, column, xf, value=nil
1172
+ cells = @current_row_block[[worksheet, row]] ||= Row.new(nil, row)
1173
+ cells.formats[column] = @workbook.format(xf) unless xf == 0
1174
+ cells[column] = value
1175
+ end
1176
+ def set_missing_row_address worksheet, work, pos, len
1177
+ # Offset Size Contents
1178
+ # 0 2 Index of this row
1179
+ # 2 2 Index to this column
1180
+ row_index, _ = work.unpack 'v2'
1181
+ unless worksheet.offsets[row_index]
1182
+ @current_row_block_offset ||= [pos]
1183
+ data = {
1184
+ :index => row_index,
1185
+ :row_block => @current_row_block_offset,
1186
+ :offset => @current_row_block_offset[0],
1187
+ }
1188
+ worksheet.set_row_address row_index, data
1189
+ end
1190
+ end
1191
+ def set_row_address worksheet, work, pos, len
1192
+ # Offset Size Contents
1193
+ # 0 2 Index of this row
1194
+ # 2 2 Index to column of the first cell which
1195
+ # is described by a cell record
1196
+ # 4 2 Index to column of the last cell which is
1197
+ # described by a cell record, increased by 1
1198
+ # 6 2 Bit Mask Contents
1199
+ # 14-0 0x7fff Height of the row, in twips = 1/20 of a point
1200
+ # 15 0x8000 0 = Row has custom height;
1201
+ # 1 = Row has default height
1202
+ # 8 2 Not used
1203
+ # 10 2 In BIFF3-BIFF4 this field contains a relative offset to
1204
+ # calculate stream position of the first cell record for this
1205
+ # row (➜ 5.7.1). In BIFF5-BIFF8 this field is not used
1206
+ # anymore, but the DBCELL record (➜ 6.26) instead.
1207
+ # 12 4 Option flags and default row formatting:
1208
+ # Bit Mask Contents
1209
+ # 2-0 0x00000007 Outline level of the row
1210
+ # 4 0x00000010 1 = Outline group starts or ends here
1211
+ # (depending on where the outline
1212
+ # buttons are located, see WSBOOL
1213
+ # record, ➜ 6.113), and is collapsed
1214
+ # 5 0x00000020 1 = Row is hidden (manually, or by a
1215
+ # filter or outline group)
1216
+ # 6 0x00000040 1 = Row height and default font height
1217
+ # do not match
1218
+ # 7 0x00000080 1 = Row has explicit default format (fl)
1219
+ # 8 0x00000100 Always 1
1220
+ # 27-16 0x0fff0000 If fl = 1: Index to default XF record
1221
+ # (➜ 6.115)
1222
+ # 28 0x10000000 1 = Additional space above the row.
1223
+ # This flag is set, if the upper
1224
+ # border of at least one cell in this
1225
+ # row or if the lower border of at
1226
+ # least one cell in the row above is
1227
+ # formatted with a thick line style.
1228
+ # Thin and medium line styles are not
1229
+ # taken into account.
1230
+ # 29 0x20000000 1 = Additional space below the row.
1231
+ # This flag is set, if the lower
1232
+ # border of at least one cell in this
1233
+ # row or if the upper border of at
1234
+ # least one cell in the row below is
1235
+ # formatted with a medium or thick
1236
+ # line style. Thin line styles are
1237
+ # not taken into account.
1238
+ @current_row_block_offset ||= [pos]
1239
+ index, first_used, first_unused, height, flags = work.unpack binfmt(:row)
1240
+ height &= 0x7fff
1241
+ format = nil
1242
+ # TODO: read attributes from work[13,3], read flags
1243
+ attrs = {
1244
+ :default_format => format,
1245
+ :first_used => first_used,
1246
+ :first_unused => first_unused,
1247
+ :index => index,
1248
+ :row_block => @current_row_block_offset,
1249
+ :offset => @current_row_block_offset[0],
1250
+ :outline_level => flags & 0x00000007,
1251
+ :collapsed => (flags & 0x0000010) > 0,
1252
+ :hidden => (flags & 0x0000020) > 0,
1253
+ }
1254
+ if (flags & 0x00000040) > 0
1255
+ attrs.store :height, height / TWIPS
1256
+ end
1257
+ if (flags & 0x00000080) > 0
1258
+ xf = (flags & 0x0fff0000) >> 16
1259
+ attrs.store :default_format, @workbook.format(xf)
1260
+ end
1261
+ # TODO: Row spacing
1262
+ worksheet.set_row_address index, attrs
1263
+ end
1264
+ def setup io
1265
+ ## Reading from StringIO fails without forced encoding
1266
+ if io.respond_to?(:string) && (str = io.string) \
1267
+ && str.respond_to?(:force_encoding)
1268
+ str.force_encoding 'ASCII-8BIT'
1269
+ end
1270
+ ##
1271
+ io.rewind
1272
+ @ole = Ole::Storage.open io
1273
+ @workbook = Workbook.new io, {}
1274
+ %w{Book Workbook BOOK WORKBOOK book workbook}.any? do |name|
1275
+ @book = @ole.file.open(name) rescue false
1276
+ end
1277
+ raise RuntimeError, "could not locate a workbook, possibly an empty file passed" unless @book
1278
+ @data = @book.read
1279
+ read_bof
1280
+ @workbook.ole = @book
1281
+ @workbook.bof = @bof
1282
+ @workbook.version = @version
1283
+ biff = @workbook.biff_version
1284
+ extend_reader biff
1285
+ extend_internals biff
1286
+ end
1287
+ private
1288
+ def extend_internals version
1289
+ require 'spreadsheet/excel/internals/biff%i' % version
1290
+ extend Internals.const_get('Biff%i' % version)
1291
+ ## spreadsheets may not include a codepage record.
1292
+ @workbook.encoding = encoding 850 if version < 8
1293
+ rescue LoadError
1294
+ end
1295
+ def extend_reader version
1296
+ require 'spreadsheet/excel/reader/biff%i' % version
1297
+ extend Reader.const_get('Biff%i' % version)
1298
+ rescue LoadError
1299
+ end
1300
+ end
1301
+ end
1302
+ end