spreadsheet 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. data/GUIDE.txt +209 -0
  2. data/History.txt +8 -0
  3. data/LICENSE.txt +619 -0
  4. data/Manifest.txt +46 -0
  5. data/README.txt +54 -0
  6. data/Rakefile +15 -0
  7. data/lib/parseexcel.rb +27 -0
  8. data/lib/parseexcel/parseexcel.rb +75 -0
  9. data/lib/parseexcel/parser.rb +11 -0
  10. data/lib/spreadsheet.rb +79 -0
  11. data/lib/spreadsheet/datatypes.rb +99 -0
  12. data/lib/spreadsheet/encodings.rb +49 -0
  13. data/lib/spreadsheet/excel.rb +75 -0
  14. data/lib/spreadsheet/excel/error.rb +26 -0
  15. data/lib/spreadsheet/excel/internals.rb +322 -0
  16. data/lib/spreadsheet/excel/internals/biff5.rb +17 -0
  17. data/lib/spreadsheet/excel/internals/biff8.rb +19 -0
  18. data/lib/spreadsheet/excel/offset.rb +37 -0
  19. data/lib/spreadsheet/excel/reader.rb +798 -0
  20. data/lib/spreadsheet/excel/reader/biff5.rb +22 -0
  21. data/lib/spreadsheet/excel/reader/biff8.rb +168 -0
  22. data/lib/spreadsheet/excel/row.rb +67 -0
  23. data/lib/spreadsheet/excel/sst_entry.rb +45 -0
  24. data/lib/spreadsheet/excel/workbook.rb +76 -0
  25. data/lib/spreadsheet/excel/worksheet.rb +85 -0
  26. data/lib/spreadsheet/excel/writer.rb +1 -0
  27. data/lib/spreadsheet/excel/writer/biff8.rb +66 -0
  28. data/lib/spreadsheet/excel/writer/format.rb +270 -0
  29. data/lib/spreadsheet/excel/writer/workbook.rb +586 -0
  30. data/lib/spreadsheet/excel/writer/worksheet.rb +556 -0
  31. data/lib/spreadsheet/font.rb +86 -0
  32. data/lib/spreadsheet/format.rb +172 -0
  33. data/lib/spreadsheet/formula.rb +9 -0
  34. data/lib/spreadsheet/row.rb +87 -0
  35. data/lib/spreadsheet/workbook.rb +120 -0
  36. data/lib/spreadsheet/worksheet.rb +215 -0
  37. data/lib/spreadsheet/writer.rb +29 -0
  38. data/test/data/test_copy.xls +0 -0
  39. data/test/data/test_version_excel5.xls +0 -0
  40. data/test/data/test_version_excel95.xls +0 -0
  41. data/test/data/test_version_excel97.xls +0 -0
  42. data/test/excel/row.rb +29 -0
  43. data/test/font.rb +163 -0
  44. data/test/integration.rb +1021 -0
  45. data/test/workbook.rb +21 -0
  46. data/test/worksheet.rb +62 -0
  47. metadata +113 -0
@@ -0,0 +1,22 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ class Reader
4
+ ##
5
+ # This Module collects reader methods such as read_string that are specific to
6
+ # Biff5. This Module is likely to be expanded as Support for older Versions
7
+ # of Excel grows.
8
+ module Biff5
9
+ ##
10
+ # Read a String of 8-bit Characters
11
+ def read_string work, count_length=1
12
+ # Offset Size Contents
13
+ # 0 1 or 2 Length of the string (character count, ln)
14
+ # 1 or 2 ln Character array (8-bit characters)
15
+ fmt = count_length == 1 ? 'C' : 'v'
16
+ length, = work.unpack fmt
17
+ work[count_length, length]
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,168 @@
1
+ module Spreadsheet
2
+ module Excel
3
+ class Reader
4
+ ##
5
+ # This Module collects reader methods such as read_string that are specific to
6
+ # Biff8. This Module is likely to be expanded as Support for older Versions
7
+ # of Excel grows and methods get moved here for disambiguation.
8
+ module Biff8
9
+ ##
10
+ # When a String is too long for one Opcode, it is continued in a Continue
11
+ # Opcode. Excel may reconsider compressing the remainder of the string.
12
+ # This method appends the available remainder (decompressed if necessary) to
13
+ # the incomplete string.
14
+ def continue_string work, incomplete_string=@incomplete_string
15
+ opts, _ = work.unpack 'C'
16
+ wide = opts & 1
17
+ head, chars = incomplete_string
18
+ owing = chars - head.size / 2
19
+ size = owing * (wide + 1)
20
+ string = work[1, size]
21
+ if wide == 0
22
+ string = wide string
23
+ end
24
+ head << string
25
+ if head.size >= chars * 2
26
+ @incomplete_string = nil
27
+ end
28
+ size + 1
29
+ end
30
+ ##
31
+ # When a String is too long for one Opcode, it is continued in a Continue
32
+ # Opcode. Excel may reconsider compressing the remainder of the string.
33
+ # This method only evaluates the header and registers the address of the
34
+ # continuation with the previous SstEntry.
35
+ def continue_string_header work, oppos
36
+ opts, _ = work.unpack 'C'
37
+ wide = opts & 1
38
+ owing = @incomplete_sst.continued_chars
39
+ size = [work.size, owing * (1 + wide) + 1].min
40
+ chars = size - 1 / (1 + wide)
41
+ @incomplete_sst.continue oppos + OPCODE_SIZE, size, chars
42
+ unless @incomplete_sst.continued?
43
+ @incomplete_sst = nil
44
+ end
45
+ size
46
+ end
47
+ ##
48
+ # Read more data into the Shared String Table. (see also: #read_sst)
49
+ # This method only evaluates the header, the actual work is done in #_read_sst
50
+ def continue_sst work, oppos, len
51
+ pos = 0
52
+ if @incomplete_sst
53
+ pos = continue_string_header work, oppos
54
+ end
55
+ @sst_offset[1] += len
56
+ _read_sst work, oppos, pos
57
+ end
58
+ def postread_workbook # :nodoc:
59
+ super
60
+ @incomplete_string, @sst_size, @sst_offset, @incomplete_sst = nil
61
+ end
62
+ ##
63
+ # Read the Shared String Table present in all Biff8 Files.
64
+ # This method only evaluates the header, the actual work is done in #_read_sst
65
+ def read_sst work, pos, len
66
+ # Offset Size Contents
67
+ # 0 4 Total number of strings in the workbook (see below)
68
+ # 4 4 Number of following strings (nm)
69
+ # 8 var. List of nm Unicode strings, 16-bit string length (➜ 3.4)
70
+ total, @sst_size = work.unpack 'V2'
71
+ @sst_offset = [pos, len]
72
+ @workbook.offsets.store :sst, @sst_offset
73
+ _read_sst work, pos, 8
74
+ end
75
+ ##
76
+ # Read a string from the Spreadsheet, such as a Worksheet- or Font-Name, or a
77
+ # Number-Format. See also #read_string_header and #read_string_body
78
+ def read_string work, count_length=1
79
+ # Offset Size Contents
80
+ # 0 1 or 2 Length of the string (character count, ln)
81
+ # 1 or 2 1 Option flags:
82
+ # Bit Mask Contents
83
+ # 0 0x01 Character compression (ccompr):
84
+ # 0 = Compressed (8-bit characters)
85
+ # 1 = Uncompressed (16-bit characters)
86
+ # 2 0x04 Asian phonetic settings (phonetic):
87
+ # 0 = Does not contain Asian phonetic settings
88
+ # 1 = Contains Asian phonetic settings
89
+ # 3 0x08 Rich-Text settings (richtext):
90
+ # 0 = Does not contain Rich-Text settings
91
+ # 1 = Contains Rich-Text settings
92
+ # [2 or 3] 2 (optional, only if richtext=1)
93
+ # Number of Rich-Text formatting runs (rt)
94
+ # [var.] 4 (optional, only if phonetic=1)
95
+ # Size of Asian phonetic settings block (in bytes, sz)
96
+ # var. ln Character array (8-bit characters
97
+ # or 2∙ln or 16-bit characters, dependent on ccompr)
98
+ # [var.] 4∙rt (optional, only if richtext=1)
99
+ # List of rt formatting runs (➜ 3.2)
100
+ # [var.] sz (optional, only if phonetic=1)
101
+ # Asian Phonetic Settings Block (➜ 3.4.2)
102
+ chars, offset, wide, phonetic, richtext, available, owing \
103
+ = read_string_header work, count_length
104
+ string, data = read_string_body work, offset, available, wide > 0
105
+ if owing > 0
106
+ @incomplete_string = [string, chars]
107
+ end
108
+ string
109
+ end
110
+ ##
111
+ # Read the body of a string. Returns the String (decompressed if necessary) and
112
+ # the available data (unchanged).
113
+ def read_string_body work, offset, available, wide
114
+ data = work[offset, available]
115
+ string = wide ? data : wide(data)
116
+ [string, data]
117
+ end
118
+ ##
119
+ # Read the header of a string. Returns the following information in an Array:
120
+ # * The total number of characters in the string
121
+ # * The offset of the actual string data (= the length of this header in bytes)
122
+ # * Whether or not the string was compressed (0/1)
123
+ # * Whether or not the string contains asian phonetic settings (0/1)
124
+ # * Whether or not the string contains richtext formatting (0/1)
125
+ # * The number of bytes containing characters in this chunk of data
126
+ # * The number of characters missing from this chunk of data and expected to
127
+ # follow in a Continue Opcode
128
+ def read_string_header work, count_length=1, offset=0
129
+ fmt = count_length == 1 ? 'C2' : 'vC'
130
+ chars, opts = work[offset, 1 + count_length].unpack fmt
131
+ wide = opts & 1
132
+ phonetic = (opts >> 2) & 1
133
+ richtext = (opts >> 3) & 1
134
+ size = chars * (wide + 1)
135
+ flagsize = 1 + count_length + richtext * 2 + phonetic * 4
136
+ avbl = [work.size - offset, flagsize + size].min
137
+ have_chrs = (avbl - flagsize) / (1 + wide)
138
+ owing = chars - have_chrs
139
+ [chars, flagsize, wide, phonetic, richtext, avbl, owing]
140
+ end
141
+ ##
142
+ # Insert null-characters into a compressed UTF-16 string
143
+ def wide string
144
+ string.split('').zip(Array.new(string.size, 0.chr)).join
145
+ end
146
+ private
147
+ ##
148
+ # Read the Shared String Table present in all Biff8 Files.
149
+ def _read_sst work, oppos, pos
150
+ worksize = work.size
151
+ while @workbook.sst_size < @sst_size && pos < worksize do
152
+ sst = SstEntry.new :offset => oppos + OPCODE_SIZE + pos,
153
+ :ole => @data,
154
+ :reader => self
155
+ sst.chars, sst.flags, wide, sst.phonetic, sst.richtext, sst.available,
156
+ sst.continued_chars = read_string_header work, 2, pos
157
+ sst.wide = wide > 0
158
+ if sst.continued?
159
+ @incomplete_sst = sst
160
+ end
161
+ @workbook.add_shared_string sst
162
+ pos += sst.available
163
+ end
164
+ end
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,67 @@
1
+ require 'date'
2
+ require 'spreadsheet/row'
3
+
4
+ module Spreadsheet
5
+ module Excel
6
+ ##
7
+ # Excel-specific Row methods
8
+ class Row < Row
9
+ ##
10
+ # The Excel date calculation erroneously assumes that 1900 is a leap-year. All
11
+ # Dates after 28.2.1900 are off by one.
12
+ LEAP_ERROR = Date.new 1900, 2, 28
13
+ ##
14
+ # Force convert the cell at _idx_ to a Date
15
+ def date idx
16
+ _date at(idx)
17
+ end
18
+ ##
19
+ # Force convert the cell at _idx_ to a DateTime
20
+ def datetime idx
21
+ _datetime at(idx)
22
+ end
23
+ ##
24
+ # Access data in this Row like you would in an Array. If a cell is formatted
25
+ # as a Date or DateTime, the decoded Date or DateTime value is returned.
26
+ def [] idx, len=nil
27
+ if len
28
+ idx = idx...(idx+len)
29
+ end
30
+ if idx.is_a? Range
31
+ data = []
32
+ idx.each do |i|
33
+ data.push enriched_data(i, at(i))
34
+ end
35
+ data
36
+ else
37
+ enriched_data idx, at(idx)
38
+ end
39
+ end
40
+ private
41
+ def _date data # :nodoc:
42
+ return data if data.is_a?(Date)
43
+ date = @worksheet.date_base + data.to_i
44
+ if date > LEAP_ERROR
45
+ date -= 1
46
+ end
47
+ date
48
+ end
49
+ def _datetime data # :nodoc:
50
+ return data if data.is_a?(DateTime)
51
+ date = _date data
52
+ DateTime.new(date.year, date.month, date.day) + (data.to_f % 1)
53
+ end
54
+ def enriched_data idx, data # :nodoc:
55
+ res = nil
56
+ if fmt = format(idx)
57
+ res = if fmt.datetime? || fmt.time?
58
+ _datetime data
59
+ elsif fmt.date?
60
+ _date data
61
+ end
62
+ end
63
+ res || data
64
+ end
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,45 @@
1
+ require 'spreadsheet/encodings'
2
+
3
+ module Spreadsheet
4
+ module Excel
5
+ ##
6
+ # Shared String Table Entry
7
+ class SstEntry
8
+ include Encodings
9
+ attr_accessor :chars, :phonetic, :richtext, :flags, :available,
10
+ :continued_chars, :wide
11
+ def initialize opts = {}
12
+ @offset = opts[:offset]
13
+ @ole = opts[:ole]
14
+ @reader = opts[:reader]
15
+ @continuations = []
16
+ end
17
+ ##
18
+ # Access the contents of this Shared String
19
+ def content
20
+ @content or begin
21
+ data = nil
22
+ data = @ole[@offset, @available]
23
+ content, _ = @reader.read_string_body data, @flags, @available, @wide
24
+ @continuations.each do |offset, len|
25
+ @reader.continue_string(@ole[offset,len], [content, @chars])
26
+ end
27
+ content = client content, 'UTF-16LE'
28
+ if @reader.memoize?
29
+ @content = content
30
+ end
31
+ content
32
+ end
33
+ end
34
+ ##
35
+ # Register the offset of a String continuation
36
+ def continue offset, size, chars
37
+ @continued_chars -= chars
38
+ @continuations.push [offset, size]
39
+ end
40
+ def continued? # :nodoc:
41
+ @continued_chars > 0
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,76 @@
1
+ require 'spreadsheet/workbook'
2
+ require 'spreadsheet/excel/offset'
3
+ require 'spreadsheet/excel/writer'
4
+ require 'ole/file_system'
5
+
6
+ module Spreadsheet
7
+ module Excel
8
+ ##
9
+ # Excel-specific Workbook methods. These are mostly pertinent to the Excel
10
+ # reader. You should have no reason to use any of these.
11
+ class Workbook < Spreadsheet::Workbook
12
+ include Encodings
13
+ include Offset
14
+ BIFF_VERSIONS = {
15
+ 0x000 => 2,
16
+ 0x007 => 2,
17
+ 0x200 => 2,
18
+ 0x300 => 3,
19
+ 0x400 => 4,
20
+ 0x500 => 5,
21
+ 0x600 => 8,
22
+ }
23
+ VERSION_STRINGS = {
24
+ 0x600 => 'Microsoft Excel 97/2000/XP',
25
+ 0x500 => 'Microsoft Excel 95',
26
+ }
27
+ offset :encoding, :boundsheets, :sst
28
+ attr_accessor :bof, :ole
29
+ attr_writer :date_base
30
+ def Workbook.open io, opts = {}
31
+ @reader = Reader.new opts
32
+ @reader.read io
33
+ end
34
+ def initialize *args
35
+ super
36
+ enc = 'UTF-16LE'
37
+ if RUBY_VERSION >= '1.9'
38
+ enc = Encoding.find enc
39
+ end
40
+ @encoding = enc
41
+ @version = 0x600
42
+ @sst = []
43
+ end
44
+ def add_shared_string str
45
+ @sst.push str
46
+ end
47
+ def biff_version
48
+ case @bof
49
+ when 0x009
50
+ 2
51
+ when 0x209
52
+ 3
53
+ when 0x409
54
+ 4
55
+ else
56
+ BIFF_VERSIONS.fetch(@version) { raise "Unkown BIFF_VERSION '#@version'" }
57
+ end
58
+ end
59
+ def date_base
60
+ @date_base ||= Date.new 1899, 12, 31
61
+ end
62
+ def shared_string idx
63
+ @sst[idx.to_i].content
64
+ end
65
+ def sst_size
66
+ @sst.size
67
+ end
68
+ def uninspect_variables
69
+ super.push '@sst', '@offsets', '@changes'
70
+ end
71
+ def version_string
72
+ client VERSION_STRINGS.fetch(@version, "Unknown"), 'UTF8'
73
+ end
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,85 @@
1
+ require 'spreadsheet/excel/offset'
2
+ require 'spreadsheet/excel/row'
3
+ require 'spreadsheet/worksheet'
4
+
5
+ module Spreadsheet
6
+ module Excel
7
+ ##
8
+ # Excel-specific Worksheet methods. These are mostly pertinent to the Excel
9
+ # reader, and to recording changes to the Worksheet. You should have no reason
10
+ # to use any of these.
11
+ class Worksheet < Spreadsheet::Worksheet
12
+ include Offset
13
+ offset :dimensions
14
+ attr_reader :offset, :ole
15
+ def initialize opts = {}
16
+ super
17
+ @offset, @ole, @reader = opts[:offset], opts[:ole], opts[:reader]
18
+ @dimensions = nil
19
+ end
20
+ def date_base
21
+ @workbook.date_base
22
+ end
23
+ def each *args
24
+ ensure_rows_read
25
+ super
26
+ end
27
+ def ensure_rows_read
28
+ return if @row_addresses
29
+ @dimensions = nil
30
+ @row_addresses = []
31
+ @reader.read_worksheet self, @offset
32
+ end
33
+ def row idx
34
+ ensure_rows_read
35
+ @rows.fetch idx do
36
+ if addr = @row_addresses[idx]
37
+ row = @reader.read_row self, addr
38
+ row.worksheet = self
39
+ row
40
+ else
41
+ Row.new self, idx
42
+ end
43
+ end
44
+ end
45
+ def row_updated idx, row
46
+ res = super
47
+ @workbook.changes.store self, true
48
+ @workbook.changes.store :boundsheets, true
49
+ @changes.store idx, true
50
+ @changes.store :dimensions, true
51
+ res
52
+ end
53
+ def set_row_address idx, opts
54
+ @offsets.store idx, opts[:row_block]
55
+ @row_addresses[idx] = opts
56
+ end
57
+ def shared_string idx
58
+ @workbook.shared_string idx
59
+ end
60
+ private
61
+ ## premature optimization?
62
+ def have_set_dimensions value, pos, len
63
+ if @row_addresses.size < row_count
64
+ @row_addresses.concat Array.new(row_count - @row_addresses.size)
65
+ end
66
+ end
67
+ def recalculate_dimensions
68
+ ensure_rows_read
69
+ shorten @rows
70
+ @dimensions = []
71
+ @dimensions[0] = [ index_of_first(@rows),
72
+ index_of_first(@row_addresses) ].compact.min
73
+ @dimensions[1] = [ @rows.size, @row_addresses.size ].compact.max
74
+ compact = @rows.compact
75
+ first_rows = compact.collect do |row| index_of_first row end.compact.min
76
+ first_addrs = @row_addresses.collect do |addr| addr[:first_used] end.min
77
+ @dimensions[2] = [ first_rows, first_addrs ].compact.min
78
+ last_rows = compact.collect do |row| row.size end.max
79
+ last_addrs = @row_addresses.collect do |addr| addr[:first_unused] end.max
80
+ @dimensions[3] = [last_rows, last_addrs].compact.max
81
+ @dimensions
82
+ end
83
+ end
84
+ end
85
+ end