ruby-spreadsheet 0.6.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.document +5 -0
  2. data/GUIDE.txt +267 -0
  3. data/Gemfile +12 -0
  4. data/Gemfile.lock +20 -0
  5. data/History.txt +307 -0
  6. data/LICENSE.txt +619 -0
  7. data/README.txt +91 -0
  8. data/Rakefile +53 -0
  9. data/VERSION +1 -0
  10. data/bin/xlsopcodes +18 -0
  11. data/lib/parseexcel.rb +27 -0
  12. data/lib/parseexcel/parseexcel.rb +75 -0
  13. data/lib/parseexcel/parser.rb +11 -0
  14. data/lib/spreadsheet.rb +79 -0
  15. data/lib/spreadsheet/column.rb +71 -0
  16. data/lib/spreadsheet/compatibility.rb +23 -0
  17. data/lib/spreadsheet/datatypes.rb +110 -0
  18. data/lib/spreadsheet/encodings.rb +46 -0
  19. data/lib/spreadsheet/excel.rb +88 -0
  20. data/lib/spreadsheet/excel/error.rb +26 -0
  21. data/lib/spreadsheet/excel/internals.rb +386 -0
  22. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  23. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  24. data/lib/spreadsheet/excel/offset.rb +41 -0
  25. data/lib/spreadsheet/excel/reader.rb +1173 -0
  26. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  27. data/lib/spreadsheet/excel/reader/biff8.rb +193 -0
  28. data/lib/spreadsheet/excel/row.rb +92 -0
  29. data/lib/spreadsheet/excel/sst_entry.rb +46 -0
  30. data/lib/spreadsheet/excel/workbook.rb +80 -0
  31. data/lib/spreadsheet/excel/worksheet.rb +100 -0
  32. data/lib/spreadsheet/excel/writer.rb +1 -0
  33. data/lib/spreadsheet/excel/writer/biff8.rb +75 -0
  34. data/lib/spreadsheet/excel/writer/format.rb +253 -0
  35. data/lib/spreadsheet/excel/writer/workbook.rb +652 -0
  36. data/lib/spreadsheet/excel/writer/worksheet.rb +948 -0
  37. data/lib/spreadsheet/font.rb +92 -0
  38. data/lib/spreadsheet/format.rb +177 -0
  39. data/lib/spreadsheet/formula.rb +9 -0
  40. data/lib/spreadsheet/helpers.rb +11 -0
  41. data/lib/spreadsheet/link.rb +43 -0
  42. data/lib/spreadsheet/row.rb +132 -0
  43. data/lib/spreadsheet/workbook.rb +120 -0
  44. data/lib/spreadsheet/worksheet.rb +279 -0
  45. data/lib/spreadsheet/writer.rb +30 -0
  46. data/ruby-spreadsheet.gemspec +126 -0
  47. data/test/data/test_changes.xls +0 -0
  48. data/test/data/test_copy.xls +0 -0
  49. data/test/data/test_datetime.xls +0 -0
  50. data/test/data/test_empty.xls +0 -0
  51. data/test/data/test_formula.xls +0 -0
  52. data/test/data/test_missing_row.xls +0 -0
  53. data/test/data/test_version_excel5.xls +0 -0
  54. data/test/data/test_version_excel95.xls +0 -0
  55. data/test/data/test_version_excel97.xls +0 -0
  56. data/test/excel/row.rb +35 -0
  57. data/test/excel/writer/worksheet.rb +23 -0
  58. data/test/font.rb +163 -0
  59. data/test/integration.rb +1281 -0
  60. data/test/row.rb +33 -0
  61. data/test/suite.rb +14 -0
  62. data/test/workbook.rb +21 -0
  63. data/test/worksheet.rb +80 -0
  64. metadata +203 -0
@@ -0,0 +1,22 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ class Reader
4
+ ##
5
+ # This Module collects reader methods such as read_string that are specific to
6
+ # Biff5. This Module is likely to be expanded as Support for older Versions
7
+ # of Excel grows.
8
+ module Biff5
9
+ ##
10
+ # Read a String of 8-bit Characters
11
+ def read_string work, count_length=1
12
+ # Offset Size Contents
13
+ # 0 1 or 2 Length of the string (character count, ln)
14
+ # 1 or 2 ln Character array (8-bit characters)
15
+ fmt = count_length == 1 ? 'C' : 'v'
16
+ length, = work.unpack fmt
17
+ work[count_length, length]
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,193 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ class Reader
4
+ ##
5
+ # This Module collects reader methods such as read_string that are specific to
6
+ # Biff8. This Module is likely to be expanded as Support for older Versions
7
+ # of Excel grows and methods get moved here for disambiguation.
8
+ module Biff8
9
+ include Spreadsheet::Excel::Internals
10
+ ##
11
+ # When a String is too long for one Opcode, it is continued in a Continue
12
+ # Opcode. Excel may reconsider compressing the remainder of the string.
13
+ # This method appends the available remainder (decompressed if necessary) to
14
+ # the incomplete string.
15
+ def continue_string work, incomplete_string=@incomplete_string
16
+ opts, _ = work.unpack 'C'
17
+ wide = opts & 1
18
+ head, chars = incomplete_string
19
+ owing = chars - head.size / 2
20
+ size = owing * (wide + 1)
21
+ string = work[1, size]
22
+ if wide == 0
23
+ string = wide string
24
+ end
25
+ head << string
26
+ if head.size >= chars * 2
27
+ @incomplete_string = nil
28
+ end
29
+ size + 1
30
+ end
31
+ ##
32
+ # When a String is too long for one Opcode, it is continued in a Continue
33
+ # Opcode. Excel may reconsider compressing the remainder of the string.
34
+ # This method only evaluates the header and registers the address of the
35
+ # continuation with the previous SstEntry.
36
+ def continue_string_header work, oppos
37
+ opts, _ = work.unpack 'C'
38
+ wide = opts & 1
39
+ owing = @incomplete_sst.continued_chars
40
+ size = [work.size, owing * (1 + wide) + 1].min
41
+ chars = (size - 1) / (1 + wide)
42
+ skip = size
43
+ @incomplete_sst.continue oppos + OPCODE_SIZE, size, chars
44
+ unless @incomplete_sst.continued?
45
+ @workbook.add_shared_string @incomplete_sst
46
+ skip += @incomplete_skip
47
+ @incomplete_sst = nil
48
+ @incomplete_skip = nil
49
+ end
50
+ skip
51
+ end
52
+ ##
53
+ # Read more data into the Shared String Table. (see also: #read_sst)
54
+ # This method only evaluates the header, the actual work is done in #_read_sst
55
+ def continue_sst work, oppos, len
56
+ pos = 0
57
+ if @incomplete_sst
58
+ pos = continue_string_header work, oppos
59
+ end
60
+ @sst_offset[1] += len
61
+ _read_sst work, oppos, pos
62
+ end
63
+ def postread_workbook # :nodoc:
64
+ super
65
+ @incomplete_string, @sst_size, @sst_offset, @incomplete_sst = nil, @incomplete_skip = nil
66
+ end
67
+ ##
68
+ # Store the offset of extsst, so we can write a new extsst when the
69
+ # sst has changed
70
+ def read_extsst work, pos, len
71
+ @workbook.offsets.store :extsst, [pos, len]
72
+ end
73
+ ##
74
+ # Read the Shared String Table present in all Biff8 Files.
75
+ # This method only evaluates the header, the actual work is done in #_read_sst
76
+ def read_sst work, pos, len
77
+ # Offset Size Contents
78
+ # 0 4 Total number of strings in the workbook (see below)
79
+ # 4 4 Number of following strings (nm)
80
+ # 8 var. List of nm Unicode strings, 16-bit string length (➜ 3.4)
81
+ total, @sst_size = work.unpack 'V2'
82
+ @sst_offset = [pos, len]
83
+ @workbook.offsets.store :sst, @sst_offset
84
+ _read_sst work, pos, 8
85
+ end
86
+ ##
87
+ # Read a string from the Spreadsheet, such as a Worksheet- or Font-Name, or a
88
+ # Number-Format. See also #read_string_header and #read_string_body
89
+ def read_string work, count_length=1
90
+ # Offset Size Contents
91
+ # 0 1 or 2 Length of the string (character count, ln)
92
+ # 1 or 2 1 Option flags:
93
+ # Bit Mask Contents
94
+ # 0 0x01 Character compression (ccompr):
95
+ # 0 = Compressed (8-bit characters)
96
+ # 1 = Uncompressed (16-bit characters)
97
+ # 2 0x04 Asian phonetic settings (phonetic):
98
+ # 0 = Does not contain Asian phonetic settings
99
+ # 1 = Contains Asian phonetic settings
100
+ # 3 0x08 Rich-Text settings (richtext):
101
+ # 0 = Does not contain Rich-Text settings
102
+ # 1 = Contains Rich-Text settings
103
+ # [2 or 3] 2 (optional, only if richtext=1)
104
+ # Number of Rich-Text formatting runs (rt)
105
+ # [var.] 4 (optional, only if phonetic=1)
106
+ # Size of Asian phonetic settings block (in bytes, sz)
107
+ # var. ln Character array (8-bit characters
108
+ # or 2∙ln or 16-bit characters, dependent on ccompr)
109
+ # [var.] 4∙rt (optional, only if richtext=1)
110
+ # List of rt formatting runs (➜ 3.2)
111
+ # [var.] sz (optional, only if phonetic=1)
112
+ # Asian Phonetic Settings Block (➜ 3.4.2)
113
+ chars, offset, wide, phonetic, richtext, available, owing, skip \
114
+ = read_string_header work, count_length
115
+ string, data = read_string_body work, offset, available, wide > 0
116
+ if owing > 0
117
+ @incomplete_string = [string, chars]
118
+ end
119
+ string
120
+ end
121
+ ##
122
+ # Read the body of a string. Returns the String (decompressed if necessary) and
123
+ # the available data (unchanged).
124
+ def read_string_body work, offset, available, wide
125
+ data = work[offset, available]
126
+ string = wide ? data : wide(data)
127
+ [string, data]
128
+ end
129
+ ##
130
+ # Read the header of a string. Returns the following information in an Array:
131
+ # * The total number of characters in the string
132
+ # * The offset of the actual string data (= the length of this header in bytes)
133
+ # * Whether or not the string was compressed (0/1)
134
+ # * Whether or not the string contains asian phonetic settings (0/1)
135
+ # * Whether or not the string contains richtext formatting (0/1)
136
+ # * The number of bytes containing characters in this chunk of data
137
+ # * The number of characters missing from this chunk of data and expected to
138
+ # follow in a Continue Opcode
139
+ def read_string_header work, count_length=1, offset=0
140
+ fmt = count_length == 1 ? 'C2' : 'vC'
141
+ chars, opts = work[offset, 1 + count_length].unpack fmt
142
+ wide = opts & 1
143
+ phonetic = (opts >> 2) & 1
144
+ richtext = (opts >> 3) & 1
145
+ size = chars * (wide + 1)
146
+ skip = 0
147
+ if richtext > 0
148
+ runs, = work[offset + 1 + count_length, 2].unpack 'v'
149
+ skip = 4 * runs
150
+ end
151
+ if phonetic > 0
152
+ psize, = work[offset + 1 + count_length + richtext * 2, 4].unpack 'V'
153
+ skip += psize
154
+ end
155
+ flagsize = 1 + count_length + richtext * 2 + phonetic * 4
156
+ avbl = [work.size - offset, flagsize + size].min
157
+ have_chrs = (avbl - flagsize) / (1 + wide)
158
+ owing = chars - have_chrs
159
+ [chars, flagsize, wide, phonetic, richtext, avbl, owing, skip]
160
+ end
161
+ ##
162
+ # Insert null-characters into a compressed UTF-16 string
163
+ def wide string
164
+ data = ''
165
+ string.each_byte do |byte| data << byte.chr << 0.chr end
166
+ data
167
+ end
168
+ private
169
+ ##
170
+ # Read the Shared String Table present in all Biff8 Files.
171
+ def _read_sst work, oppos, pos
172
+ worksize = work.size
173
+ while @workbook.sst_size < @sst_size && pos < worksize do
174
+ sst = SstEntry.new :offset => oppos + OPCODE_SIZE + pos,
175
+ :ole => @data,
176
+ :reader => self
177
+ sst.chars, sst.flags, wide, sst.phonetic, sst.richtext, sst.available,
178
+ sst.continued_chars, skip = read_string_header work, 2, pos
179
+ sst.wide = wide > 0
180
+ if sst.continued?
181
+ @incomplete_sst = sst
182
+ @incomplete_skip = skip
183
+ pos += sst.available
184
+ else
185
+ @workbook.add_shared_string sst
186
+ pos += sst.available + skip
187
+ end
188
+ end
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
@@ -0,0 +1,92 @@
1
+ require 'date'
2
+ require 'spreadsheet/row'
3
+
4
+ module Spreadsheet
5
+ module Excel
6
+ ##
7
+ # Excel-specific Row methods
8
+ class Row < Spreadsheet::Row
9
+ ##
10
+ # The Excel date calculation erroneously assumes that 1900 is a leap-year. All
11
+ # Dates after 28.2.1900 are off by one.
12
+ LEAP_ERROR = Date.new 1900, 2, 28
13
+ ##
14
+ # Force convert the cell at _idx_ to a Date
15
+ def date idx
16
+ _date at(idx)
17
+ end
18
+ ##
19
+ # Force convert the cell at _idx_ to a DateTime
20
+ def datetime idx
21
+ _datetime at(idx)
22
+ end
23
+ def each &block
24
+ size.times do |idx|
25
+ block.call self[idx]
26
+ end
27
+ end
28
+ ##
29
+ # Access data in this Row like you would in an Array. If a cell is formatted
30
+ # as a Date or DateTime, the decoded Date or DateTime value is returned.
31
+ def [] idx, len=nil
32
+ if len
33
+ idx = idx...(idx+len)
34
+ end
35
+ if idx.is_a? Range
36
+ data = []
37
+ idx.each do |i|
38
+ data.push enriched_data(i, at(i))
39
+ end
40
+ data
41
+ else
42
+ enriched_data idx, at(idx)
43
+ end
44
+ end
45
+ private
46
+ def _date data # :nodoc:
47
+ return data if data.is_a?(Date)
48
+ datetime = _datetime data
49
+ Date.new datetime.year, datetime.month, datetime.day
50
+ end
51
+ def _datetime data # :nodoc:
52
+ return data if data.is_a?(DateTime)
53
+ base = @worksheet.date_base
54
+ date = base + data.to_f
55
+ hour = (data % 1) * 24
56
+ min = (hour % 1) * 60
57
+ sec = ((min % 1) * 60).round
58
+ min = min.floor
59
+ hour = hour.floor
60
+ if sec > 59
61
+ sec = 0
62
+ min += 1
63
+ end
64
+ if min > 59
65
+ min = 0
66
+ hour += 1
67
+ end
68
+ if hour > 23
69
+ hour = 0
70
+ date += 1
71
+ end
72
+ if LEAP_ERROR > base
73
+ date -= 1
74
+ end
75
+ DateTime.new(date.year, date.month, date.day, hour, min, sec)
76
+ end
77
+ def enriched_data idx, data # :nodoc:
78
+ res = nil
79
+ if link = @worksheet.links[[@idx, idx]]
80
+ res = link
81
+ elsif data.is_a?(Numeric) && fmt = format(idx)
82
+ res = if fmt.datetime? || fmt.time?
83
+ _datetime data
84
+ elsif fmt.date?
85
+ _date data
86
+ end
87
+ end
88
+ res || data
89
+ end
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,46 @@
1
+ require 'spreadsheet/encodings'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # Shared String Table Entry
7
+ class SstEntry
8
+ include Spreadsheet::Encodings
9
+ attr_accessor :chars, :phonetic, :richtext, :flags, :available,
10
+ :continued_chars, :wide
11
+ def initialize opts = {}
12
+ @content = nil
13
+ @offset = opts[:offset]
14
+ @ole = opts[:ole]
15
+ @reader = opts[:reader]
16
+ @continuations = []
17
+ end
18
+ ##
19
+ # Access the contents of this Shared String
20
+ def content
21
+ @content or begin
22
+ data = nil
23
+ data = @ole[@offset, @available]
24
+ content, _ = @reader.read_string_body data, @flags, @available, @wide
25
+ @continuations.each do |offset, len|
26
+ @reader.continue_string(@ole[offset,len], [content, @chars])
27
+ end
28
+ content = client content, 'UTF-16LE'
29
+ if @reader.memoize?
30
+ @content = content
31
+ end
32
+ content
33
+ end
34
+ end
35
+ ##
36
+ # Register the offset of a String continuation
37
+ def continue offset, size, chars
38
+ @continued_chars -= chars
39
+ @continuations.push [offset, size]
40
+ end
41
+ def continued? # :nodoc:
42
+ @continued_chars > 0
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,80 @@
1
+ require 'spreadsheet/workbook'
2
+ require 'spreadsheet/excel/offset'
3
+ require 'spreadsheet/excel/writer'
4
+ require 'ole/storage'
5
+
6
+ module Spreadsheet
7
+ module Excel
8
+ ##
9
+ # Excel-specific Workbook methods. These are mostly pertinent to the Excel
10
+ # reader. You should have no reason to use any of these.
11
+ class Workbook < Spreadsheet::Workbook
12
+ include Spreadsheet::Encodings
13
+ include Spreadsheet::Excel::Offset
14
+ BIFF_VERSIONS = {
15
+ 0x000 => 2,
16
+ 0x007 => 2,
17
+ 0x200 => 2,
18
+ 0x300 => 3,
19
+ 0x400 => 4,
20
+ 0x500 => 5,
21
+ 0x600 => 8,
22
+ }
23
+ VERSION_STRINGS = {
24
+ 0x600 => 'Microsoft Excel 97/2000/XP',
25
+ 0x500 => 'Microsoft Excel 95',
26
+ }
27
+ offset :encoding, :boundsheets, :sst
28
+ attr_accessor :bof, :ole
29
+ attr_writer :date_base
30
+ def Workbook.open io, opts = {}
31
+ @reader = Reader.new opts
32
+ @reader.read io
33
+ end
34
+ def initialize *args
35
+ super
36
+ enc = 'UTF-16LE'
37
+ if RUBY_VERSION >= '1.9'
38
+ enc = Encoding.find enc
39
+ end
40
+ @encoding = enc
41
+ @version = 0x600
42
+ @sst = []
43
+ end
44
+ def add_shared_string str
45
+ @sst.push str
46
+ end
47
+ def add_worksheet worksheet
48
+ @changes.store :boundsheets, true
49
+ super
50
+ end
51
+ def biff_version
52
+ case @bof
53
+ when 0x009
54
+ 2
55
+ when 0x209
56
+ 3
57
+ when 0x409
58
+ 4
59
+ else
60
+ BIFF_VERSIONS.fetch(@version) { raise "Unkown BIFF_VERSION '#@version'" }
61
+ end
62
+ end
63
+ def date_base
64
+ @date_base ||= DateTime.new 1899, 12, 31
65
+ end
66
+ def shared_string idx
67
+ @sst[idx.to_i].content
68
+ end
69
+ def sst_size
70
+ @sst.size
71
+ end
72
+ def uninspect_variables
73
+ super.push '@sst', '@offsets', '@changes'
74
+ end
75
+ def version_string
76
+ client VERSION_STRINGS.fetch(@version, "Unknown"), 'UTF-8'
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,100 @@
1
+ require 'spreadsheet/excel/offset'
2
+ require 'spreadsheet/excel/row'
3
+ require 'spreadsheet/worksheet'
4
+
5
+ module Spreadsheet
6
+ module Excel
7
+ ##
8
+ # Excel-specific Worksheet methods. These are mostly pertinent to the Excel
9
+ # reader, and to recording changes to the Worksheet. You should have no reason
10
+ # to use any of these.
11
+ class Worksheet < Spreadsheet::Worksheet
12
+ include Spreadsheet::Excel::Offset
13
+ offset :dimensions
14
+ attr_reader :offset, :ole, :links, :guts
15
+ def initialize opts = {}
16
+ @row_addresses = nil
17
+ super
18
+ @offset, @ole, @reader = opts[:offset], opts[:ole], opts[:reader]
19
+ @dimensions = nil
20
+ @links = {}
21
+ @guts = {}
22
+ end
23
+ def add_link row, column, link
24
+ @links.store [row, column], link
25
+ end
26
+ def column idx
27
+ ensure_rows_read
28
+ super
29
+ end
30
+ def date_base
31
+ @workbook.date_base
32
+ end
33
+ def each *args
34
+ ensure_rows_read
35
+ super
36
+ end
37
+ def ensure_rows_read
38
+ return if @row_addresses
39
+ @dimensions = nil
40
+ @row_addresses = []
41
+ @reader.read_worksheet self, @offset if @reader
42
+ end
43
+ def row idx
44
+ @rows[idx] or begin
45
+ ensure_rows_read
46
+ if addr = @row_addresses[idx]
47
+ row = @reader.read_row self, addr
48
+ [:default_format, :height, :outline_level, :hidden, ].each do |key|
49
+ row.send "unupdated_#{key}=", addr[key]
50
+ end
51
+ row.worksheet = self
52
+ row
53
+ else
54
+ Row.new self, idx
55
+ end
56
+ end
57
+ end
58
+ def row_updated idx, row
59
+ res = super
60
+ @workbook.changes.store self, true
61
+ @workbook.changes.store :boundsheets, true
62
+ @changes.store idx, true
63
+ @changes.store :dimensions, true
64
+ res
65
+ end
66
+ def set_row_address idx, opts
67
+ @offsets.store idx, opts[:row_block]
68
+ @row_addresses[idx] = opts
69
+ end
70
+ def shared_string idx
71
+ @workbook.shared_string idx
72
+ end
73
+ private
74
+ ## premature optimization?
75
+ def have_set_dimensions value, pos, len
76
+ if @row_addresses.size < row_count
77
+ @row_addresses.concat Array.new(row_count - @row_addresses.size)
78
+ end
79
+ end
80
+ def recalculate_dimensions
81
+ ensure_rows_read
82
+ shorten @rows
83
+ @dimensions = []
84
+ @dimensions[0] = [ index_of_first(@rows),
85
+ index_of_first(@row_addresses) ].compact.min || 0
86
+ @dimensions[1] = [ @rows.size, @row_addresses.size ].compact.max || 0
87
+ compact = @rows.compact
88
+ first_rows = compact.collect do |row| row.first_used end.compact.min
89
+ first_addrs = @row_addresses.compact.collect do |addr|
90
+ addr[:first_used] end.min
91
+ @dimensions[2] = [ first_rows, first_addrs ].compact.min || 0
92
+ last_rows = compact.collect do |row| row.first_unused end.max
93
+ last_addrs = @row_addresses.compact.collect do |addr|
94
+ addr[:first_unused] end.max
95
+ @dimensions[3] = [last_rows, last_addrs].compact.max || 0
96
+ @dimensions
97
+ end
98
+ end
99
+ end
100
+ end