roo 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/Guardfile +1 -2
- data/README.md +22 -2
- data/lib/roo.rb +1 -0
- data/lib/roo/base.rb +36 -36
- data/lib/roo/errors.rb +9 -0
- data/lib/roo/excelx.rb +19 -71
- data/lib/roo/excelx/cell.rb +32 -3
- data/lib/roo/excelx/cell/base.rb +94 -0
- data/lib/roo/excelx/cell/boolean.rb +27 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +101 -0
- data/lib/roo/excelx/cell/empty.rb +19 -0
- data/lib/roo/excelx/cell/number.rb +80 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +43 -0
- data/lib/roo/excelx/comments.rb +33 -0
- data/lib/roo/excelx/coordinate.rb +12 -0
- data/lib/roo/excelx/format.rb +64 -0
- data/lib/roo/excelx/shared.rb +32 -0
- data/lib/roo/excelx/sheet.rb +12 -7
- data/lib/roo/excelx/sheet_doc.rb +95 -91
- data/lib/roo/link.rb +21 -2
- data/lib/roo/open_office.rb +4 -1
- data/lib/roo/version.rb +1 -1
- data/roo.gemspec +1 -1
- data/spec/lib/roo/base_spec.rb +19 -2
- data/spec/lib/roo/excelx_spec.rb +13 -10
- data/spec/lib/roo/openoffice_spec.rb +18 -1
- data/test/excelx/cell/test_base.rb +64 -0
- data/test/excelx/cell/test_boolean.rb +38 -0
- data/test/excelx/cell/test_date.rb +43 -0
- data/test/excelx/cell/test_datetime.rb +48 -0
- data/test/excelx/cell/test_empty.rb +8 -0
- data/test/excelx/cell/test_number.rb +58 -0
- data/test/excelx/cell/test_string.rb +30 -0
- data/test/excelx/cell/test_time.rb +33 -0
- data/test/test_roo.rb +14 -8
- metadata +23 -2
@@ -0,0 +1,64 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
module Format
|
4
|
+
EXCEPTIONAL_FORMATS = {
|
5
|
+
'h:mm am/pm' => :date,
|
6
|
+
'h:mm:ss am/pm' => :date
|
7
|
+
}
|
8
|
+
|
9
|
+
STANDARD_FORMATS = {
|
10
|
+
0 => 'General'.freeze,
|
11
|
+
1 => '0'.freeze,
|
12
|
+
2 => '0.00'.freeze,
|
13
|
+
3 => '#,##0'.freeze,
|
14
|
+
4 => '#,##0.00'.freeze,
|
15
|
+
9 => '0%'.freeze,
|
16
|
+
10 => '0.00%'.freeze,
|
17
|
+
11 => '0.00E+00'.freeze,
|
18
|
+
12 => '# ?/?'.freeze,
|
19
|
+
13 => '# ??/??'.freeze,
|
20
|
+
14 => 'mm-dd-yy'.freeze,
|
21
|
+
15 => 'd-mmm-yy'.freeze,
|
22
|
+
16 => 'd-mmm'.freeze,
|
23
|
+
17 => 'mmm-yy'.freeze,
|
24
|
+
18 => 'h:mm AM/PM'.freeze,
|
25
|
+
19 => 'h:mm:ss AM/PM'.freeze,
|
26
|
+
20 => 'h:mm'.freeze,
|
27
|
+
21 => 'h:mm:ss'.freeze,
|
28
|
+
22 => 'm/d/yy h:mm'.freeze,
|
29
|
+
37 => '#,##0 ;(#,##0)'.freeze,
|
30
|
+
38 => '#,##0 ;[Red](#,##0)'.freeze,
|
31
|
+
39 => '#,##0.00;(#,##0.00)'.freeze,
|
32
|
+
40 => '#,##0.00;[Red](#,##0.00)'.freeze,
|
33
|
+
45 => 'mm:ss'.freeze,
|
34
|
+
46 => '[h]:mm:ss'.freeze,
|
35
|
+
47 => 'mmss.0'.freeze,
|
36
|
+
48 => '##0.0E+0'.freeze,
|
37
|
+
49 => '@'.freeze
|
38
|
+
}
|
39
|
+
|
40
|
+
def to_type(format)
|
41
|
+
format = format.to_s.downcase
|
42
|
+
if (type = EXCEPTIONAL_FORMATS[format])
|
43
|
+
type
|
44
|
+
elsif format.include?('#')
|
45
|
+
:float
|
46
|
+
elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
|
47
|
+
if format.include?('h') || format.include?('s')
|
48
|
+
:datetime
|
49
|
+
else
|
50
|
+
:date
|
51
|
+
end
|
52
|
+
elsif format.include?('h') || format.include?('s')
|
53
|
+
:time
|
54
|
+
elsif format.include?('%')
|
55
|
+
:percentage
|
56
|
+
else
|
57
|
+
:float
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
module_function :to_type
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
# Public: Shared class for allowing sheets to share data. This should
|
4
|
+
# reduce memory usage and reduce the number of objects being passed
|
5
|
+
# to various inititializers.
|
6
|
+
class Shared
|
7
|
+
attr_accessor :comments_files, :sheet_files, :rels_files
|
8
|
+
def initialize(dir)
|
9
|
+
@dir = dir
|
10
|
+
@comments_files = []
|
11
|
+
@sheet_files = []
|
12
|
+
@rels_files = []
|
13
|
+
end
|
14
|
+
|
15
|
+
def styles
|
16
|
+
@styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
|
17
|
+
end
|
18
|
+
|
19
|
+
def shared_strings
|
20
|
+
@shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
|
21
|
+
end
|
22
|
+
|
23
|
+
def workbook
|
24
|
+
@workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
|
25
|
+
end
|
26
|
+
|
27
|
+
def base_date
|
28
|
+
workbook.base_date
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/roo/excelx/sheet.rb
CHANGED
@@ -1,12 +1,17 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
module Roo
|
2
3
|
class Excelx
|
3
4
|
class Sheet
|
4
|
-
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
|
8
|
+
|
9
|
+
def initialize(name, shared, sheet_index, options = {})
|
5
10
|
@name = name
|
6
|
-
@
|
7
|
-
@
|
8
|
-
@
|
9
|
-
@sheet = SheetDoc.new(
|
11
|
+
@shared = shared
|
12
|
+
@rels = Relationships.new(rels_files[sheet_index])
|
13
|
+
@comments = Comments.new(comments_files[sheet_index])
|
14
|
+
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
10
15
|
end
|
11
16
|
|
12
17
|
def cells
|
@@ -14,7 +19,7 @@ module Roo
|
|
14
19
|
end
|
15
20
|
|
16
21
|
def present_cells
|
17
|
-
@present_cells ||= cells.select { |_, cell| cell && cell.
|
22
|
+
@present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
|
18
23
|
end
|
19
24
|
|
20
25
|
# Yield each row as array of Excelx::Cell objects
|
@@ -65,7 +70,7 @@ module Roo
|
|
65
70
|
|
66
71
|
def excelx_format(key)
|
67
72
|
cell = cells[key]
|
68
|
-
|
73
|
+
styles.style_format(cell.style).to_s if cell
|
69
74
|
end
|
70
75
|
|
71
76
|
def hyperlinks
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -1,15 +1,17 @@
|
|
1
|
+
require 'forwardable'
|
1
2
|
require 'roo/excelx/extractor'
|
2
3
|
|
3
4
|
module Roo
|
4
5
|
class Excelx
|
5
6
|
class SheetDoc < Excelx::Extractor
|
6
|
-
|
7
|
+
extend Forwardable
|
8
|
+
delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
|
9
|
+
|
10
|
+
def initialize(path, relationships, shared, options = {})
|
7
11
|
super(path)
|
12
|
+
@shared = shared
|
8
13
|
@options = options
|
9
14
|
@relationships = relationships
|
10
|
-
@styles = styles
|
11
|
-
@shared_strings = shared_strings
|
12
|
-
@workbook = workbook
|
13
15
|
end
|
14
16
|
|
15
17
|
def cells(relationships)
|
@@ -44,75 +46,118 @@ module Roo
|
|
44
46
|
|
45
47
|
private
|
46
48
|
|
47
|
-
def
|
48
|
-
|
49
|
-
|
50
|
-
style = cell_xml['s'].to_i # should be here
|
51
|
-
# c: <c r="A5" s="2">
|
52
|
-
# <v>22606</v>
|
53
|
-
# </c>, format: , tmp_type: float
|
54
|
-
value_type =
|
55
|
-
case cell_xml['t']
|
56
|
-
when 's'
|
49
|
+
def cell_value_type(type, format)
|
50
|
+
case type
|
51
|
+
when 's'.freeze
|
57
52
|
:shared
|
58
|
-
when 'b'
|
53
|
+
when 'b'.freeze
|
59
54
|
:boolean
|
60
|
-
when 'str'
|
55
|
+
when 'str'.freeze
|
61
56
|
:string
|
62
|
-
when 'inlineStr'
|
57
|
+
when 'inlineStr'.freeze
|
63
58
|
:inlinestr
|
64
59
|
else
|
65
|
-
format = @styles.style_format(style)
|
66
60
|
Excelx::Format.to_type(format)
|
67
61
|
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Internal: Creates a cell based on an XML clell..
|
65
|
+
#
|
66
|
+
# cell_xml - a Nokogiri::XML::Element. e.g.
|
67
|
+
# <c r="A5" s="2">
|
68
|
+
# <v>22606</v>
|
69
|
+
# </c>
|
70
|
+
# hyperlink - a String for the hyperlink for the cell or nil when no
|
71
|
+
# hyperlink is present.
|
72
|
+
#
|
73
|
+
# Examples
|
74
|
+
#
|
75
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil)
|
76
|
+
# # => <Excelx::Cell::String>
|
77
|
+
#
|
78
|
+
# Returns a type of <Excelx::Cell>.
|
79
|
+
def cell_from_xml(cell_xml, hyperlink)
|
80
|
+
coordinate = extract_coordinate(cell_xml['r'])
|
81
|
+
return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
|
82
|
+
|
83
|
+
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
84
|
+
# This works by coincidence because Format[0] is General.
|
85
|
+
style = cell_xml['s'].to_i
|
86
|
+
format = styles.style_format(style)
|
87
|
+
value_type = cell_value_type(cell_xml['t'], format)
|
68
88
|
formula = nil
|
69
|
-
|
89
|
+
|
70
90
|
cell_xml.children.each do |cell|
|
71
91
|
case cell.name
|
72
92
|
when 'is'
|
73
93
|
cell.children.each do |inline_str|
|
74
94
|
if inline_str.name == 't'
|
75
|
-
return Excelx::Cell.
|
95
|
+
return Excelx::Cell.create_cell(:string, inline_str.content, formula, style, hyperlink, coordinate)
|
76
96
|
end
|
77
97
|
end
|
78
98
|
when 'f'
|
79
99
|
formula = cell.content
|
80
100
|
when 'v'
|
81
|
-
|
82
|
-
value_type =
|
83
|
-
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
|
84
|
-
:datetime
|
85
|
-
else
|
86
|
-
:date
|
87
|
-
end
|
88
|
-
end
|
89
|
-
excelx_type = [:numeric_or_formula, format.to_s]
|
90
|
-
value =
|
91
|
-
case value_type
|
92
|
-
when :shared
|
93
|
-
value_type = :string
|
94
|
-
excelx_type = :string
|
95
|
-
@shared_strings[cell.content.to_i]
|
96
|
-
when :boolean
|
97
|
-
(cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
|
98
|
-
when :date, :time, :datetime
|
99
|
-
cell.content
|
100
|
-
when :formula
|
101
|
-
cell.content.to_f
|
102
|
-
when :string
|
103
|
-
excelx_type = :string
|
104
|
-
cell.content
|
105
|
-
else
|
106
|
-
value_type = :float
|
107
|
-
cell.content
|
108
|
-
end
|
109
|
-
return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
101
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
110
102
|
end
|
111
103
|
end
|
112
|
-
|
104
|
+
end
|
105
|
+
|
106
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
|
107
|
+
# NOTE: format.to_s can replace excelx_type as an argument for
|
108
|
+
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
109
|
+
# it will break some brittle tests.
|
110
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
111
|
+
|
112
|
+
# NOTE: There are only a few situations where value != cell.content
|
113
|
+
# 1. when a sharedString is used. value = sharedString;
|
114
|
+
# cell.content = id of sharedString
|
115
|
+
# 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
|
116
|
+
# But a boolean cell should use TRUE|FALSE as the formatted value
|
117
|
+
# and use a Boolean for it's value. Using a Boolean value breaks
|
118
|
+
# Roo::Base#to_csv.
|
119
|
+
# 3. formula
|
120
|
+
case value_type
|
121
|
+
when :shared
|
122
|
+
value = shared_strings[cell.content.to_i]
|
123
|
+
Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
|
124
|
+
when :boolean, :string
|
125
|
+
value = cell.content
|
126
|
+
Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
|
127
|
+
when :time, :datetime
|
128
|
+
cell_content = cell.content.to_f
|
129
|
+
# NOTE: A date will be a whole number. A time will have be > 1. And
|
130
|
+
# in general, a datetime will have decimals. But if the cell is
|
131
|
+
# using a custom format, it's possible to be interpreted incorrectly.
|
132
|
+
# cell_content.to_i == cell_content && standard_style?=> :date
|
133
|
+
#
|
134
|
+
# Should check to see if the format is standard or not. If it's a
|
135
|
+
# standard format, than it's a date, otherwise, it is a datetime.
|
136
|
+
# @styles.standard_style?(style_id)
|
137
|
+
# STANDARD_STYLES.keys.include?(style_id.to_i)
|
138
|
+
cell_type = if cell_content < 1.0
|
139
|
+
:time
|
140
|
+
elsif (cell_content - cell_content.floor).abs > 0.000001
|
141
|
+
:datetime
|
142
|
+
else
|
143
|
+
:date
|
144
|
+
end
|
145
|
+
Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
146
|
+
when :date
|
147
|
+
Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
148
|
+
else
|
149
|
+
Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
def extract_coordinate(coordinate)
|
154
|
+
row, column = ::Roo::Utils.split_coordinate(coordinate)
|
155
|
+
|
156
|
+
Excelx::Coordinate.new(row, column)
|
113
157
|
end
|
114
158
|
|
115
159
|
def extract_hyperlinks(relationships)
|
160
|
+
# FIXME: select the valid hyperlinks and then map those.
|
116
161
|
Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
|
117
162
|
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
118
163
|
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
@@ -154,47 +199,6 @@ module Roo
|
|
154
199
|
return dimension.attributes['ref'].value
|
155
200
|
end
|
156
201
|
end
|
157
|
-
|
158
|
-
=begin
|
159
|
-
Datei xl/comments1.xml
|
160
|
-
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
161
|
-
<comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
162
|
-
<authors>
|
163
|
-
<author />
|
164
|
-
</authors>
|
165
|
-
<commentList>
|
166
|
-
<comment ref="B4" authorId="0">
|
167
|
-
<text>
|
168
|
-
<r>
|
169
|
-
<rPr>
|
170
|
-
<sz val="10" />
|
171
|
-
<rFont val="Arial" />
|
172
|
-
<family val="2" />
|
173
|
-
</rPr>
|
174
|
-
<t>Kommentar fuer B4</t>
|
175
|
-
</r>
|
176
|
-
</text>
|
177
|
-
</comment>
|
178
|
-
<comment ref="B5" authorId="0">
|
179
|
-
<text>
|
180
|
-
<r>
|
181
|
-
<rPr>
|
182
|
-
<sz val="10" />
|
183
|
-
<rFont val="Arial" />
|
184
|
-
<family val="2" />
|
185
|
-
</rPr>
|
186
|
-
<t>Kommentar fuer B5</t>
|
187
|
-
</r>
|
188
|
-
</text>
|
189
|
-
</comment>
|
190
|
-
</commentList>
|
191
|
-
</comments>
|
192
|
-
=end
|
193
|
-
=begin
|
194
|
-
if @comments_doc[self.sheets.index(sheet)]
|
195
|
-
read_comments(sheet)
|
196
|
-
end
|
197
|
-
=end
|
198
202
|
end
|
199
203
|
end
|
200
204
|
end
|
data/lib/roo/link.rb
CHANGED
@@ -1,9 +1,28 @@
|
|
1
|
+
require 'uri'
|
2
|
+
|
1
3
|
module Roo
|
2
4
|
class Link < String
|
5
|
+
# FIXME: Roo::Link inherits from String. A link cell is_a?(Roo::Link). **It is
|
6
|
+
# the only situation where a cells `value` is always a String**. Link
|
7
|
+
# cells have a nifty `to_uri` method, but this method isn't easily
|
8
|
+
# reached. (e.g. `sheet.sheet_for(nil).cells[[row,column]]).value.to_uri`;
|
9
|
+
# `sheet.hyperlink(row, column)` doesn't use `to_uri`).
|
10
|
+
#
|
11
|
+
# 1. Add different types of links (String, Numeric, Date, DateTime, etc.)
|
12
|
+
# 2. Remove Roo::Link.
|
13
|
+
# 3. Don't inherit the string and pass the cell's value.
|
14
|
+
#
|
15
|
+
# I don't know the historical reasons for the Roo::Link, but right now
|
16
|
+
# it seems uneccessary. I'm in favor of keeping it just in case.
|
17
|
+
#
|
18
|
+
# I'm also in favor of passing the cell's value to Roo::Link. The
|
19
|
+
# cell.value's class would still be Roo::Link, but the value itself
|
20
|
+
# would depend on what type of cell it is (Numeric, Date, etc.).
|
21
|
+
#
|
3
22
|
attr_reader :href
|
4
|
-
|
23
|
+
alias_method :url, :href
|
5
24
|
|
6
|
-
def initialize(href='', text=href)
|
25
|
+
def initialize(href = '', text = href)
|
7
26
|
super(text)
|
8
27
|
@href = href
|
9
28
|
end
|
data/lib/roo/open_office.rb
CHANGED
@@ -3,6 +3,7 @@ require 'nokogiri'
|
|
3
3
|
require 'cgi'
|
4
4
|
require 'zip/filesystem'
|
5
5
|
require 'roo/font'
|
6
|
+
require 'base64'
|
6
7
|
|
7
8
|
module Roo
|
8
9
|
class OpenOffice < Roo::Base
|
@@ -410,7 +411,9 @@ module Roo
|
|
410
411
|
@style[sheet] ||= {}
|
411
412
|
@style[sheet][key] = style_name
|
412
413
|
case @cell_type[sheet][key]
|
413
|
-
when :float
|
414
|
+
when :float
|
415
|
+
@cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
|
416
|
+
when :percentage
|
414
417
|
@cell[sheet][key] = v.to_f
|
415
418
|
when :string
|
416
419
|
@cell[sheet][key] = str_v
|
data/lib/roo/version.rb
CHANGED
data/roo.gemspec
CHANGED
@@ -6,7 +6,7 @@ require 'roo/version'
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = 'roo'
|
8
8
|
spec.version = Roo::VERSION
|
9
|
-
spec.authors = ['Thomas Preymesser', 'Hugh McGowan', 'Ben Woosley', 'Oleksandr Simonov']
|
9
|
+
spec.authors = ['Thomas Preymesser', 'Hugh McGowan', 'Ben Woosley', 'Oleksandr Simonov', 'Steven Daniels']
|
10
10
|
spec.email = ['ruby.ruby.ruby.roo@gmail.com', 'oleksandr@simonov.me']
|
11
11
|
spec.summary = 'Roo can access the contents of various spreadsheet files.'
|
12
12
|
spec.description = "Roo can access the contents of various spreadsheet files. It can handle\n* OpenOffice\n* Excelx\n* LibreOffice\n* CSV"
|