roo 2.1.1 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Guardfile +1 -2
- data/README.md +22 -2
- data/lib/roo.rb +1 -0
- data/lib/roo/base.rb +36 -36
- data/lib/roo/errors.rb +9 -0
- data/lib/roo/excelx.rb +19 -71
- data/lib/roo/excelx/cell.rb +32 -3
- data/lib/roo/excelx/cell/base.rb +94 -0
- data/lib/roo/excelx/cell/boolean.rb +27 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +101 -0
- data/lib/roo/excelx/cell/empty.rb +19 -0
- data/lib/roo/excelx/cell/number.rb +80 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +43 -0
- data/lib/roo/excelx/comments.rb +33 -0
- data/lib/roo/excelx/coordinate.rb +12 -0
- data/lib/roo/excelx/format.rb +64 -0
- data/lib/roo/excelx/shared.rb +32 -0
- data/lib/roo/excelx/sheet.rb +12 -7
- data/lib/roo/excelx/sheet_doc.rb +95 -91
- data/lib/roo/link.rb +21 -2
- data/lib/roo/open_office.rb +4 -1
- data/lib/roo/version.rb +1 -1
- data/roo.gemspec +1 -1
- data/spec/lib/roo/base_spec.rb +19 -2
- data/spec/lib/roo/excelx_spec.rb +13 -10
- data/spec/lib/roo/openoffice_spec.rb +18 -1
- data/test/excelx/cell/test_base.rb +64 -0
- data/test/excelx/cell/test_boolean.rb +38 -0
- data/test/excelx/cell/test_date.rb +43 -0
- data/test/excelx/cell/test_datetime.rb +48 -0
- data/test/excelx/cell/test_empty.rb +8 -0
- data/test/excelx/cell/test_number.rb +58 -0
- data/test/excelx/cell/test_string.rb +30 -0
- data/test/excelx/cell/test_time.rb +33 -0
- data/test/test_roo.rb +14 -8
- metadata +23 -2
@@ -0,0 +1,64 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
module Format
|
4
|
+
EXCEPTIONAL_FORMATS = {
|
5
|
+
'h:mm am/pm' => :date,
|
6
|
+
'h:mm:ss am/pm' => :date
|
7
|
+
}
|
8
|
+
|
9
|
+
STANDARD_FORMATS = {
|
10
|
+
0 => 'General'.freeze,
|
11
|
+
1 => '0'.freeze,
|
12
|
+
2 => '0.00'.freeze,
|
13
|
+
3 => '#,##0'.freeze,
|
14
|
+
4 => '#,##0.00'.freeze,
|
15
|
+
9 => '0%'.freeze,
|
16
|
+
10 => '0.00%'.freeze,
|
17
|
+
11 => '0.00E+00'.freeze,
|
18
|
+
12 => '# ?/?'.freeze,
|
19
|
+
13 => '# ??/??'.freeze,
|
20
|
+
14 => 'mm-dd-yy'.freeze,
|
21
|
+
15 => 'd-mmm-yy'.freeze,
|
22
|
+
16 => 'd-mmm'.freeze,
|
23
|
+
17 => 'mmm-yy'.freeze,
|
24
|
+
18 => 'h:mm AM/PM'.freeze,
|
25
|
+
19 => 'h:mm:ss AM/PM'.freeze,
|
26
|
+
20 => 'h:mm'.freeze,
|
27
|
+
21 => 'h:mm:ss'.freeze,
|
28
|
+
22 => 'm/d/yy h:mm'.freeze,
|
29
|
+
37 => '#,##0 ;(#,##0)'.freeze,
|
30
|
+
38 => '#,##0 ;[Red](#,##0)'.freeze,
|
31
|
+
39 => '#,##0.00;(#,##0.00)'.freeze,
|
32
|
+
40 => '#,##0.00;[Red](#,##0.00)'.freeze,
|
33
|
+
45 => 'mm:ss'.freeze,
|
34
|
+
46 => '[h]:mm:ss'.freeze,
|
35
|
+
47 => 'mmss.0'.freeze,
|
36
|
+
48 => '##0.0E+0'.freeze,
|
37
|
+
49 => '@'.freeze
|
38
|
+
}
|
39
|
+
|
40
|
+
def to_type(format)
|
41
|
+
format = format.to_s.downcase
|
42
|
+
if (type = EXCEPTIONAL_FORMATS[format])
|
43
|
+
type
|
44
|
+
elsif format.include?('#')
|
45
|
+
:float
|
46
|
+
elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
|
47
|
+
if format.include?('h') || format.include?('s')
|
48
|
+
:datetime
|
49
|
+
else
|
50
|
+
:date
|
51
|
+
end
|
52
|
+
elsif format.include?('h') || format.include?('s')
|
53
|
+
:time
|
54
|
+
elsif format.include?('%')
|
55
|
+
:percentage
|
56
|
+
else
|
57
|
+
:float
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module_function :to_type
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
# Public: Shared class for allowing sheets to share data. This should
|
4
|
+
# reduce memory usage and reduce the number of objects being passed
|
5
|
+
# to various inititializers.
|
6
|
+
class Shared
|
7
|
+
attr_accessor :comments_files, :sheet_files, :rels_files
|
8
|
+
def initialize(dir)
|
9
|
+
@dir = dir
|
10
|
+
@comments_files = []
|
11
|
+
@sheet_files = []
|
12
|
+
@rels_files = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def styles
|
16
|
+
@styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
|
17
|
+
end
|
18
|
+
|
19
|
+
def shared_strings
|
20
|
+
@shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
|
21
|
+
end
|
22
|
+
|
23
|
+
def workbook
|
24
|
+
@workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
|
25
|
+
end
|
26
|
+
|
27
|
+
def base_date
|
28
|
+
workbook.base_date
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/roo/excelx/sheet.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
module Roo
|
2
3
|
class Excelx
|
3
4
|
class Sheet
|
4
|
-
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
|
8
|
+
|
9
|
+
def initialize(name, shared, sheet_index, options = {})
|
5
10
|
@name = name
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@
|
9
|
-
@sheet = SheetDoc.new(
|
11
|
+
@shared = shared
|
12
|
+
@rels = Relationships.new(rels_files[sheet_index])
|
13
|
+
@comments = Comments.new(comments_files[sheet_index])
|
14
|
+
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
10
15
|
end
|
11
16
|
|
12
17
|
def cells
|
@@ -14,7 +19,7 @@ module Roo
|
|
14
19
|
end
|
15
20
|
|
16
21
|
def present_cells
|
17
|
-
@present_cells ||= cells.select { |_, cell| cell && cell.
|
22
|
+
@present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
|
18
23
|
end
|
19
24
|
|
20
25
|
# Yield each row as array of Excelx::Cell objects
|
@@ -65,7 +70,7 @@ module Roo
|
|
65
70
|
|
66
71
|
def excelx_format(key)
|
67
72
|
cell = cells[key]
|
68
|
-
|
73
|
+
styles.style_format(cell.style).to_s if cell
|
69
74
|
end
|
70
75
|
|
71
76
|
def hyperlinks
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
require 'roo/excelx/extractor'
|
2
3
|
|
3
4
|
module Roo
|
4
5
|
class Excelx
|
5
6
|
class SheetDoc < Excelx::Extractor
|
6
|
-
|
7
|
+
extend Forwardable
|
8
|
+
delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
|
9
|
+
|
10
|
+
def initialize(path, relationships, shared, options = {})
|
7
11
|
super(path)
|
12
|
+
@shared = shared
|
8
13
|
@options = options
|
9
14
|
@relationships = relationships
|
10
|
-
@styles = styles
|
11
|
-
@shared_strings = shared_strings
|
12
|
-
@workbook = workbook
|
13
15
|
end
|
14
16
|
|
15
17
|
def cells(relationships)
|
@@ -44,75 +46,118 @@ module Roo
|
|
44
46
|
|
45
47
|
private
|
46
48
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
style = cell_xml['s'].to_i # should be here
|
51
|
-
# c: <c r="A5" s="2">
|
52
|
-
# <v>22606</v>
|
53
|
-
# </c>, format: , tmp_type: float
|
54
|
-
value_type =
|
55
|
-
case cell_xml['t']
|
56
|
-
when 's'
|
49
|
+
def cell_value_type(type, format)
|
50
|
+
case type
|
51
|
+
when 's'.freeze
|
57
52
|
:shared
|
58
|
-
when 'b'
|
53
|
+
when 'b'.freeze
|
59
54
|
:boolean
|
60
|
-
when 'str'
|
55
|
+
when 'str'.freeze
|
61
56
|
:string
|
62
|
-
when 'inlineStr'
|
57
|
+
when 'inlineStr'.freeze
|
63
58
|
:inlinestr
|
64
59
|
else
|
65
|
-
format = @styles.style_format(style)
|
66
60
|
Excelx::Format.to_type(format)
|
67
61
|
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Internal: Creates a cell based on an XML clell..
|
65
|
+
#
|
66
|
+
# cell_xml - a Nokogiri::XML::Element. e.g.
|
67
|
+
# <c r="A5" s="2">
|
68
|
+
# <v>22606</v>
|
69
|
+
# </c>
|
70
|
+
# hyperlink - a String for the hyperlink for the cell or nil when no
|
71
|
+
# hyperlink is present.
|
72
|
+
#
|
73
|
+
# Examples
|
74
|
+
#
|
75
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil)
|
76
|
+
# # => <Excelx::Cell::String>
|
77
|
+
#
|
78
|
+
# Returns a type of <Excelx::Cell>.
|
79
|
+
def cell_from_xml(cell_xml, hyperlink)
|
80
|
+
coordinate = extract_coordinate(cell_xml['r'])
|
81
|
+
return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
|
82
|
+
|
83
|
+
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
84
|
+
# This works by coincidence because Format[0] is General.
|
85
|
+
style = cell_xml['s'].to_i
|
86
|
+
format = styles.style_format(style)
|
87
|
+
value_type = cell_value_type(cell_xml['t'], format)
|
68
88
|
formula = nil
|
69
|
-
|
89
|
+
|
70
90
|
cell_xml.children.each do |cell|
|
71
91
|
case cell.name
|
72
92
|
when 'is'
|
73
93
|
cell.children.each do |inline_str|
|
74
94
|
if inline_str.name == 't'
|
75
|
-
return Excelx::Cell.
|
95
|
+
return Excelx::Cell.create_cell(:string, inline_str.content, formula, style, hyperlink, coordinate)
|
76
96
|
end
|
77
97
|
end
|
78
98
|
when 'f'
|
79
99
|
formula = cell.content
|
80
100
|
when 'v'
|
81
|
-
|
82
|
-
value_type =
|
83
|
-
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
|
84
|
-
:datetime
|
85
|
-
else
|
86
|
-
:date
|
87
|
-
end
|
88
|
-
end
|
89
|
-
excelx_type = [:numeric_or_formula, format.to_s]
|
90
|
-
value =
|
91
|
-
case value_type
|
92
|
-
when :shared
|
93
|
-
value_type = :string
|
94
|
-
excelx_type = :string
|
95
|
-
@shared_strings[cell.content.to_i]
|
96
|
-
when :boolean
|
97
|
-
(cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
|
98
|
-
when :date, :time, :datetime
|
99
|
-
cell.content
|
100
|
-
when :formula
|
101
|
-
cell.content.to_f
|
102
|
-
when :string
|
103
|
-
excelx_type = :string
|
104
|
-
cell.content
|
105
|
-
else
|
106
|
-
value_type = :float
|
107
|
-
cell.content
|
108
|
-
end
|
109
|
-
return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
101
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
110
102
|
end
|
111
103
|
end
|
112
|
-
|
104
|
+
end
|
105
|
+
|
106
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
107
|
+
# NOTE: format.to_s can replace excelx_type as an argument for
|
108
|
+
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
109
|
+
# it will break some brittle tests.
|
110
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
111
|
+
|
112
|
+
# NOTE: There are only a few situations where value != cell.content
|
113
|
+
# 1. when a sharedString is used. value = sharedString;
|
114
|
+
# cell.content = id of sharedString
|
115
|
+
# 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
|
116
|
+
# But a boolean cell should use TRUE|FALSE as the formatted value
|
117
|
+
# and use a Boolean for it's value. Using a Boolean value breaks
|
118
|
+
# Roo::Base#to_csv.
|
119
|
+
# 3. formula
|
120
|
+
case value_type
|
121
|
+
when :shared
|
122
|
+
value = shared_strings[cell.content.to_i]
|
123
|
+
Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
|
124
|
+
when :boolean, :string
|
125
|
+
value = cell.content
|
126
|
+
Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
|
127
|
+
when :time, :datetime
|
128
|
+
cell_content = cell.content.to_f
|
129
|
+
# NOTE: A date will be a whole number. A time will have be > 1. And
|
130
|
+
# in general, a datetime will have decimals. But if the cell is
|
131
|
+
# using a custom format, it's possible to be interpreted incorrectly.
|
132
|
+
# cell_content.to_i == cell_content && standard_style?=> :date
|
133
|
+
#
|
134
|
+
# Should check to see if the format is standard or not. If it's a
|
135
|
+
# standard format, than it's a date, otherwise, it is a datetime.
|
136
|
+
# @styles.standard_style?(style_id)
|
137
|
+
# STANDARD_STYLES.keys.include?(style_id.to_i)
|
138
|
+
cell_type = if cell_content < 1.0
|
139
|
+
:time
|
140
|
+
elsif (cell_content - cell_content.floor).abs > 0.000001
|
141
|
+
:datetime
|
142
|
+
else
|
143
|
+
:date
|
144
|
+
end
|
145
|
+
Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
146
|
+
when :date
|
147
|
+
Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
148
|
+
else
|
149
|
+
Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def extract_coordinate(coordinate)
|
154
|
+
row, column = ::Roo::Utils.split_coordinate(coordinate)
|
155
|
+
|
156
|
+
Excelx::Coordinate.new(row, column)
|
113
157
|
end
|
114
158
|
|
115
159
|
def extract_hyperlinks(relationships)
|
160
|
+
# FIXME: select the valid hyperlinks and then map those.
|
116
161
|
Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
|
117
162
|
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
118
163
|
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
@@ -154,47 +199,6 @@ module Roo
|
|
154
199
|
return dimension.attributes['ref'].value
|
155
200
|
end
|
156
201
|
end
|
157
|
-
|
158
|
-
=begin
|
159
|
-
Datei xl/comments1.xml
|
160
|
-
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
161
|
-
<comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
162
|
-
<authors>
|
163
|
-
<author />
|
164
|
-
</authors>
|
165
|
-
<commentList>
|
166
|
-
<comment ref="B4" authorId="0">
|
167
|
-
<text>
|
168
|
-
<r>
|
169
|
-
<rPr>
|
170
|
-
<sz val="10" />
|
171
|
-
<rFont val="Arial" />
|
172
|
-
<family val="2" />
|
173
|
-
</rPr>
|
174
|
-
<t>Kommentar fuer B4</t>
|
175
|
-
</r>
|
176
|
-
</text>
|
177
|
-
</comment>
|
178
|
-
<comment ref="B5" authorId="0">
|
179
|
-
<text>
|
180
|
-
<r>
|
181
|
-
<rPr>
|
182
|
-
<sz val="10" />
|
183
|
-
<rFont val="Arial" />
|
184
|
-
<family val="2" />
|
185
|
-
</rPr>
|
186
|
-
<t>Kommentar fuer B5</t>
|
187
|
-
</r>
|
188
|
-
</text>
|
189
|
-
</comment>
|
190
|
-
</commentList>
|
191
|
-
</comments>
|
192
|
-
=end
|
193
|
-
=begin
|
194
|
-
if @comments_doc[self.sheets.index(sheet)]
|
195
|
-
read_comments(sheet)
|
196
|
-
end
|
197
|
-
=end
|
198
202
|
end
|
199
203
|
end
|
200
204
|
end
|
data/lib/roo/link.rb
CHANGED
@@ -1,9 +1,28 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
1
3
|
module Roo
|
2
4
|
class Link < String
|
5
|
+
# FIXME: Roo::Link inherits from String. A link cell is_a?(Roo::Link). **It is
|
6
|
+
# the only situation where a cells `value` is always a String**. Link
|
7
|
+
# cells have a nifty `to_uri` method, but this method isn't easily
|
8
|
+
# reached. (e.g. `sheet.sheet_for(nil).cells[[row,column]]).value.to_uri`;
|
9
|
+
# `sheet.hyperlink(row, column)` doesn't use `to_uri`).
|
10
|
+
#
|
11
|
+
# 1. Add different types of links (String, Numeric, Date, DateTime, etc.)
|
12
|
+
# 2. Remove Roo::Link.
|
13
|
+
# 3. Don't inherit the string and pass the cell's value.
|
14
|
+
#
|
15
|
+
# I don't know the historical reasons for the Roo::Link, but right now
|
16
|
+
# it seems uneccessary. I'm in favor of keeping it just in case.
|
17
|
+
#
|
18
|
+
# I'm also in favor of passing the cell's value to Roo::Link. The
|
19
|
+
# cell.value's class would still be Roo::Link, but the value itself
|
20
|
+
# would depend on what type of cell it is (Numeric, Date, etc.).
|
21
|
+
#
|
3
22
|
attr_reader :href
|
4
|
-
|
23
|
+
alias_method :url, :href
|
5
24
|
|
6
|
-
def initialize(href='', text=href)
|
25
|
+
def initialize(href = '', text = href)
|
7
26
|
super(text)
|
8
27
|
@href = href
|
9
28
|
end
|
data/lib/roo/open_office.rb
CHANGED
@@ -3,6 +3,7 @@ require 'nokogiri'
|
|
3
3
|
require 'cgi'
|
4
4
|
require 'zip/filesystem'
|
5
5
|
require 'roo/font'
|
6
|
+
require 'base64'
|
6
7
|
|
7
8
|
module Roo
|
8
9
|
class OpenOffice < Roo::Base
|
@@ -410,7 +411,9 @@ module Roo
|
|
410
411
|
@style[sheet] ||= {}
|
411
412
|
@style[sheet][key] = style_name
|
412
413
|
case @cell_type[sheet][key]
|
413
|
-
when :float
|
414
|
+
when :float
|
415
|
+
@cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
|
416
|
+
when :percentage
|
414
417
|
@cell[sheet][key] = v.to_f
|
415
418
|
when :string
|
416
419
|
@cell[sheet][key] = str_v
|
data/lib/roo/version.rb
CHANGED
data/roo.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'roo/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = 'roo'
|
8
8
|
spec.version = Roo::VERSION
|
9
|
-
spec.authors = ['Thomas Preymesser', 'Hugh McGowan', 'Ben Woosley', 'Oleksandr Simonov']
|
9
|
+
spec.authors = ['Thomas Preymesser', 'Hugh McGowan', 'Ben Woosley', 'Oleksandr Simonov', 'Steven Daniels']
|
10
10
|
spec.email = ['ruby.ruby.ruby.roo@gmail.com', 'oleksandr@simonov.me']
|
11
11
|
spec.summary = 'Roo can access the contents of various spreadsheet files.'
|
12
12
|
spec.description = "Roo can access the contents of various spreadsheet files. It can handle\n* OpenOffice\n* Excelx\n* LibreOffice\n* CSV"
|