roo 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/lib/roo/base.rb +43 -19
- data/lib/roo/excelx.rb +390 -542
- data/lib/roo/excelx/cell.rb +77 -0
- data/lib/roo/excelx/comments.rb +9 -11
- data/lib/roo/excelx/extractor.rb +12 -10
- data/lib/roo/excelx/relationships.rb +13 -14
- data/lib/roo/excelx/shared_strings.rb +19 -22
- data/lib/roo/excelx/sheet.rb +107 -0
- data/lib/roo/excelx/sheet_doc.rb +98 -100
- data/lib/roo/excelx/styles.rb +42 -40
- data/lib/roo/excelx/workbook.rb +36 -36
- data/lib/roo/open_office.rb +4 -1
- data/lib/roo/version.rb +1 -1
- data/spec/lib/roo/excelx_spec.rb +36 -0
- data/spec/lib/roo/openoffice_spec.rb +11 -0
- data/test/test_generic_spreadsheet.rb +104 -78
- data/test/test_roo.rb +22 -1
- metadata +5 -4
- data/scripts/txt2html +0 -67
@@ -0,0 +1,77 @@
|
|
1
|
+
require 'date'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
class Cell
|
6
|
+
attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
|
7
|
+
attr_writer :value
|
8
|
+
|
9
|
+
def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
|
10
|
+
@type = type
|
11
|
+
@formula = formula
|
12
|
+
@base_date = base_date if [:date, :datetime].include?(@type)
|
13
|
+
@excelx_type = excelx_type
|
14
|
+
@excelx_value = excelx_value
|
15
|
+
@style = style
|
16
|
+
@value = type_cast_value(value)
|
17
|
+
@value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
|
18
|
+
@coordinate = coordinate
|
19
|
+
end
|
20
|
+
|
21
|
+
def type
|
22
|
+
case
|
23
|
+
when @formula
|
24
|
+
:formula
|
25
|
+
when @value.is_a?(Roo::Link)
|
26
|
+
:link
|
27
|
+
else
|
28
|
+
@type
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class Coordinate
|
33
|
+
attr_accessor :row, :column
|
34
|
+
|
35
|
+
def initialize(row, column)
|
36
|
+
@row, @column = row, column
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def type_cast_value(value)
|
43
|
+
case @type
|
44
|
+
when :float, :percentage
|
45
|
+
value.to_f
|
46
|
+
when :date
|
47
|
+
create_date(@base_date + value.to_i)
|
48
|
+
when :datetime
|
49
|
+
create_datetime(@base_date + value.to_f.round(6))
|
50
|
+
when :time
|
51
|
+
value.to_f * 86_400
|
52
|
+
else
|
53
|
+
value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def create_date(date)
|
58
|
+
yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
|
59
|
+
|
60
|
+
Date.new(yyyy.to_i, mm.to_i, dd.to_i)
|
61
|
+
end
|
62
|
+
|
63
|
+
def create_datetime(date)
|
64
|
+
datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
|
65
|
+
t = round_datetime(datetime_string)
|
66
|
+
|
67
|
+
DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
|
68
|
+
end
|
69
|
+
|
70
|
+
def round_datetime(datetime_string)
|
71
|
+
/(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
|
72
|
+
|
73
|
+
Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
data/lib/roo/excelx/comments.rb
CHANGED
@@ -1,24 +1,22 @@
|
|
1
1
|
require 'roo/excelx/extractor'
|
2
2
|
|
3
3
|
module Roo
|
4
|
-
class Excelx
|
4
|
+
class Excelx
|
5
|
+
class Comments < Excelx::Extractor
|
6
|
+
def comments
|
7
|
+
@comments ||= extract_comments
|
8
|
+
end
|
5
9
|
|
6
|
-
|
7
|
-
@comments ||= extract_comments
|
8
|
-
end
|
10
|
+
private
|
9
11
|
|
10
|
-
|
12
|
+
def extract_comments
|
13
|
+
return {} unless doc_exists?
|
11
14
|
|
12
|
-
|
13
|
-
if doc_exists?
|
14
|
-
Hash[doc.xpath("//comments/commentList/comment").map do |comment|
|
15
|
+
Hash[doc.xpath('//comments/commentList/comment').map do |comment|
|
15
16
|
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
|
16
17
|
[::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
|
17
18
|
end]
|
18
|
-
else
|
19
|
-
{}
|
20
19
|
end
|
21
20
|
end
|
22
|
-
|
23
21
|
end
|
24
22
|
end
|
data/lib/roo/excelx/extractor.rb
CHANGED
@@ -1,20 +1,22 @@
|
|
1
1
|
module Roo
|
2
|
-
class Excelx
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
class Excelx
|
3
|
+
class Extractor
|
4
|
+
def initialize(path)
|
5
|
+
@path = path
|
6
|
+
end
|
6
7
|
|
7
|
-
|
8
|
+
private
|
8
9
|
|
9
|
-
|
10
|
-
|
10
|
+
def doc
|
11
|
+
@doc ||=
|
11
12
|
if doc_exists?
|
12
13
|
::Roo::Utils.load_xml(@path).remove_namespaces!
|
13
14
|
end
|
14
|
-
|
15
|
+
end
|
15
16
|
|
16
|
-
|
17
|
-
|
17
|
+
def doc_exists?
|
18
|
+
@path && File.exist?(@path)
|
19
|
+
end
|
18
20
|
end
|
19
21
|
end
|
20
22
|
end
|
@@ -1,26 +1,25 @@
|
|
1
1
|
require 'roo/excelx/extractor'
|
2
2
|
|
3
3
|
module Roo
|
4
|
-
class Excelx
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
class Excelx
|
5
|
+
class Relationships < Excelx::Extractor
|
6
|
+
def [](index)
|
7
|
+
to_a[index]
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
def to_a
|
11
|
+
@relationships ||= extract_relationships
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
12
15
|
|
13
|
-
|
16
|
+
def extract_relationships
|
17
|
+
return [] unless doc_exists?
|
14
18
|
|
15
|
-
|
16
|
-
if doc_exists?
|
17
|
-
Hash[doc.xpath("/Relationships/Relationship").map do |rel|
|
19
|
+
Hash[doc.xpath('/Relationships/Relationship').map do |rel|
|
18
20
|
[rel.attribute('Id').text, rel]
|
19
21
|
end]
|
20
|
-
else
|
21
|
-
[]
|
22
22
|
end
|
23
23
|
end
|
24
|
-
|
25
24
|
end
|
26
25
|
end
|
@@ -1,40 +1,37 @@
|
|
1
1
|
require 'roo/excelx/extractor'
|
2
2
|
|
3
3
|
module Roo
|
4
|
-
class Excelx
|
5
|
-
|
6
|
-
|
7
|
-
|
4
|
+
class Excelx
|
5
|
+
class SharedStrings < Excelx::Extractor
|
6
|
+
def [](index)
|
7
|
+
to_a[index]
|
8
|
+
end
|
8
9
|
|
9
|
-
|
10
|
-
|
11
|
-
|
10
|
+
def to_a
|
11
|
+
@array ||= extract_shared_strings
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
12
15
|
|
13
|
-
|
16
|
+
def extract_shared_strings
|
17
|
+
return [] unless doc_exists?
|
14
18
|
|
15
|
-
def extract_shared_strings
|
16
|
-
if doc_exists?
|
17
19
|
# read the shared strings xml document
|
18
|
-
doc.xpath(
|
20
|
+
doc.xpath('/sst/si').map do |si|
|
19
21
|
shared_string = ''
|
20
22
|
si.children.each do |elem|
|
21
23
|
case elem.name
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
when 't'
|
29
|
-
shared_string = elem.content
|
24
|
+
when 'r'
|
25
|
+
elem.children.each do |r_elem|
|
26
|
+
shared_string << r_elem.content if r_elem.name == 't'
|
27
|
+
end
|
28
|
+
when 't'
|
29
|
+
shared_string = elem.content
|
30
30
|
end
|
31
31
|
end
|
32
32
|
shared_string
|
33
33
|
end
|
34
|
-
else
|
35
|
-
[]
|
36
34
|
end
|
37
35
|
end
|
38
|
-
|
39
36
|
end
|
40
37
|
end
|
@@ -0,0 +1,107 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
class Sheet
|
4
|
+
def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
|
5
|
+
@name = name
|
6
|
+
@rels = Relationships.new(rels_path)
|
7
|
+
@comments = Comments.new(comments_path)
|
8
|
+
@styles = styles
|
9
|
+
@sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
|
10
|
+
end
|
11
|
+
|
12
|
+
def cells
|
13
|
+
@cells ||= @sheet.cells(@rels)
|
14
|
+
end
|
15
|
+
|
16
|
+
def present_cells
|
17
|
+
@present_cells ||= cells.select { |_, cell| cell && cell.value }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Yield each row as array of Excelx::Cell objects
|
21
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
22
|
+
# pad_cells (boolean) and offset (int)
|
23
|
+
def each_row(options = {}, &block)
|
24
|
+
row_count = 0
|
25
|
+
options[:offset] ||= 0
|
26
|
+
@sheet.each_row_streaming do |row|
|
27
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
28
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
29
|
+
block.call(cells_for_row_element(row, options))
|
30
|
+
end
|
31
|
+
row_count += 1
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def row(row_number)
|
36
|
+
first_column.upto(last_column).map do |col|
|
37
|
+
cells[[row_number, col]]
|
38
|
+
end.map { |cell| cell && cell.value }
|
39
|
+
end
|
40
|
+
|
41
|
+
def column(col_number)
|
42
|
+
first_row.upto(last_row).map do |row|
|
43
|
+
cells[[row, col_number]]
|
44
|
+
end.map { |cell| cell && cell.value }
|
45
|
+
end
|
46
|
+
|
47
|
+
# returns the number of the first non-empty row
|
48
|
+
def first_row
|
49
|
+
@first_row ||= present_cells.keys.map { |row, _| row }.min
|
50
|
+
end
|
51
|
+
|
52
|
+
def last_row
|
53
|
+
@last_row ||= present_cells.keys.map { |row, _| row }.max
|
54
|
+
end
|
55
|
+
|
56
|
+
# returns the number of the first non-empty column
|
57
|
+
def first_column
|
58
|
+
@first_column ||= present_cells.keys.map { |_, col| col }.min
|
59
|
+
end
|
60
|
+
|
61
|
+
# returns the number of the last non-empty column
|
62
|
+
def last_column
|
63
|
+
@last_column ||= present_cells.keys.map { |_, col| col }.max
|
64
|
+
end
|
65
|
+
|
66
|
+
def excelx_format(key)
|
67
|
+
cell = cells[key]
|
68
|
+
@styles.style_format(cell.style).to_s if cell
|
69
|
+
end
|
70
|
+
|
71
|
+
def hyperlinks
|
72
|
+
@hyperlinks ||= @sheet.hyperlinks(@rels)
|
73
|
+
end
|
74
|
+
|
75
|
+
def comments
|
76
|
+
@comments.comments
|
77
|
+
end
|
78
|
+
|
79
|
+
def dimensions
|
80
|
+
@sheet.dimensions
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
# Take an xml row and return an array of Excelx::Cell objects
|
86
|
+
# optionally pad array to header width(assumed 1st row).
|
87
|
+
# takes option pad_cells (boolean) defaults false
|
88
|
+
def cells_for_row_element(row_element, options = {})
|
89
|
+
return [] unless row_element
|
90
|
+
cell_col = 0
|
91
|
+
cells = []
|
92
|
+
@sheet.each_cell(row_element) do |cell|
|
93
|
+
cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
|
94
|
+
cells << cell
|
95
|
+
cell_col = cell.coordinate.column
|
96
|
+
end
|
97
|
+
cells
|
98
|
+
end
|
99
|
+
|
100
|
+
def pad_cells(cell, last_column)
|
101
|
+
pad = []
|
102
|
+
(cell.coordinate.column - 1 - last_column).times { pad << nil }
|
103
|
+
pad
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -1,96 +1,93 @@
|
|
1
1
|
require 'roo/excelx/extractor'
|
2
2
|
|
3
3
|
module Roo
|
4
|
-
class Excelx
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
4
|
+
class Excelx
|
5
|
+
class SheetDoc < Excelx::Extractor
|
6
|
+
def initialize(path, relationships, styles, shared_strings, workbook, options = {})
|
7
|
+
super(path)
|
8
|
+
@options = options
|
9
|
+
@relationships = relationships
|
10
|
+
@styles = styles
|
11
|
+
@shared_strings = shared_strings
|
12
|
+
@workbook = workbook
|
13
|
+
end
|
13
14
|
|
14
|
-
|
15
|
-
|
16
|
-
|
15
|
+
def cells(relationships)
|
16
|
+
@cells ||= extract_cells(relationships)
|
17
|
+
end
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
19
|
+
def hyperlinks(relationships)
|
20
|
+
@hyperlinks ||= extract_hyperlinks(relationships)
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
23
|
+
# Get the dimensions for the sheet.
|
24
|
+
# This is the upper bound of cells that might
|
25
|
+
# be parsed. (the document may be sparse so cell count is only upper bound)
|
26
|
+
def dimensions
|
27
|
+
@dimensions ||= extract_dimensions
|
28
|
+
end
|
28
29
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
30
|
+
# Yield each row xml element to caller
|
31
|
+
def each_row_streaming(&block)
|
32
|
+
Roo::Utils.each_element(@path, 'row', &block)
|
33
|
+
end
|
33
34
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
35
|
+
# Yield each cell as Excelx::Cell to caller for given
|
36
|
+
# row xml
|
37
|
+
def each_cell(row_xml)
|
38
|
+
return [] unless row_xml
|
39
|
+
row_xml.children.each do |cell_element|
|
40
|
+
key = ::Roo::Utils.ref_to_key(cell_element['r'])
|
41
|
+
yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
|
42
|
+
end
|
41
43
|
end
|
42
|
-
end
|
43
44
|
|
44
|
-
|
45
|
+
private
|
45
46
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
47
|
+
def cell_from_xml(cell_xml, hyperlink)
|
48
|
+
# This is error prone, to_i will silently turn a nil into a 0
|
49
|
+
# and it works by coincidence that Format[0] is general
|
50
|
+
style = cell_xml['s'].to_i # should be here
|
51
|
+
# c: <c r="A5" s="2">
|
52
|
+
# <v>22606</v>
|
53
|
+
# </c>, format: , tmp_type: float
|
54
|
+
value_type =
|
54
55
|
case cell_xml['t']
|
55
56
|
when 's'
|
56
57
|
:shared
|
57
58
|
when 'b'
|
58
59
|
:boolean
|
59
|
-
# 2011-02-25 BEGIN
|
60
60
|
when 'str'
|
61
61
|
:string
|
62
|
-
# 2011-02-25 END
|
63
|
-
# 2011-09-15 BEGIN
|
64
62
|
when 'inlineStr'
|
65
63
|
:inlinestr
|
66
|
-
# 2011-09-15 END
|
67
64
|
else
|
68
65
|
format = @styles.style_format(style)
|
69
66
|
Excelx::Format.to_type(format)
|
70
67
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
68
|
+
formula = nil
|
69
|
+
row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
|
70
|
+
cell_xml.children.each do |cell|
|
71
|
+
case cell.name
|
72
|
+
when 'is'
|
73
|
+
cell.children.each do |inline_str|
|
74
|
+
if inline_str.name == 't'
|
75
|
+
return Excelx::Cell.new(inline_str.content, :string, formula, :string, inline_str.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
76
|
+
end
|
79
77
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
value_type =
|
78
|
+
when 'f'
|
79
|
+
formula = cell.content
|
80
|
+
when 'v'
|
81
|
+
if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
|
82
|
+
value_type =
|
86
83
|
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
|
87
84
|
:datetime
|
88
85
|
else
|
89
86
|
:date
|
90
87
|
end
|
91
|
-
|
92
|
-
|
93
|
-
|
88
|
+
end
|
89
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
90
|
+
value =
|
94
91
|
case value_type
|
95
92
|
when :shared
|
96
93
|
value_type = :string
|
@@ -109,54 +106,54 @@ module Roo
|
|
109
106
|
value_type = :float
|
110
107
|
cell.content
|
111
108
|
end
|
112
|
-
|
109
|
+
return Excelx::Cell.new(value, value_type, formula, excelx_type, cell.content, style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
|
110
|
+
end
|
113
111
|
end
|
112
|
+
Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
|
114
113
|
end
|
115
|
-
Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
|
116
|
-
end
|
117
114
|
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
115
|
+
def extract_hyperlinks(relationships)
|
116
|
+
Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
|
117
|
+
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
118
|
+
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
119
|
+
end
|
120
|
+
end.compact]
|
121
|
+
end
|
125
122
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
123
|
+
def expand_merged_ranges(cells)
|
124
|
+
# Extract merged ranges from xml
|
125
|
+
merges = {}
|
126
|
+
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
|
127
|
+
tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
|
128
|
+
for row in tl[0]..br[0] do
|
129
|
+
for col in tl[1]..br[1] do
|
130
|
+
next if row == tl[0] && col == tl[1]
|
131
|
+
merges[[row, col]] = tl
|
132
|
+
end
|
135
133
|
end
|
136
134
|
end
|
135
|
+
# Duplicate value into all cells in merged range
|
136
|
+
merges.each do |dst, src|
|
137
|
+
cells[dst] = cells[src]
|
138
|
+
end
|
137
139
|
end
|
138
|
-
# Duplicate value into all cells in merged range
|
139
|
-
merges.each do |dst, src|
|
140
|
-
cells[dst] = cells[src]
|
141
|
-
end
|
142
|
-
end
|
143
140
|
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
expand_merged_ranges(extracted_cells)
|
141
|
+
def extract_cells(relationships)
|
142
|
+
extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
|
143
|
+
key = ::Roo::Utils.ref_to_key(cell_xml['r'])
|
144
|
+
[key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
|
145
|
+
end]
|
146
|
+
|
147
|
+
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
|
148
|
+
|
149
|
+
extracted_cells
|
151
150
|
end
|
152
|
-
extracted_cells
|
153
|
-
end
|
154
151
|
|
155
|
-
|
156
|
-
|
157
|
-
|
152
|
+
def extract_dimensions
|
153
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
154
|
+
return dimension.attributes['ref'].value
|
155
|
+
end
|
158
156
|
end
|
159
|
-
end
|
160
157
|
|
161
158
|
=begin
|
162
159
|
Datei xl/comments1.xml
|
@@ -198,5 +195,6 @@ Datei xl/comments1.xml
|
|
198
195
|
read_comments(sheet)
|
199
196
|
end
|
200
197
|
=end
|
198
|
+
end
|
201
199
|
end
|
202
200
|
end
|