roo 2.3.0 → 2.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.codeclimate.yml +17 -0
- data/.github/issue_template.md +16 -0
- data/.github/pull_request_template.md +14 -0
- data/.github/workflows/pull-request.yml +15 -0
- data/.github/workflows/ruby.yml +34 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +186 -0
- data/CHANGELOG.md +148 -0
- data/Gemfile +4 -4
- data/LICENSE +2 -0
- data/README.md +84 -27
- data/Rakefile +1 -1
- data/lib/roo/base.rb +111 -237
- data/lib/roo/constants.rb +5 -3
- data/lib/roo/csv.rb +106 -85
- data/lib/roo/errors.rb +2 -0
- data/lib/roo/excelx/cell/base.rb +26 -12
- data/lib/roo/excelx/cell/boolean.rb +9 -6
- data/lib/roo/excelx/cell/date.rb +7 -7
- data/lib/roo/excelx/cell/datetime.rb +50 -44
- data/lib/roo/excelx/cell/empty.rb +3 -2
- data/lib/roo/excelx/cell/number.rb +60 -47
- data/lib/roo/excelx/cell/string.rb +3 -3
- data/lib/roo/excelx/cell/time.rb +17 -16
- data/lib/roo/excelx/cell.rb +11 -7
- data/lib/roo/excelx/comments.rb +3 -3
- data/lib/roo/excelx/coordinate.rb +11 -4
- data/lib/roo/excelx/extractor.rb +20 -3
- data/lib/roo/excelx/format.rb +38 -31
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +12 -4
- data/lib/roo/excelx/shared.rb +10 -3
- data/lib/roo/excelx/shared_strings.rb +113 -9
- data/lib/roo/excelx/sheet.rb +49 -10
- data/lib/roo/excelx/sheet_doc.rb +101 -48
- data/lib/roo/excelx/styles.rb +4 -4
- data/lib/roo/excelx/workbook.rb +8 -3
- data/lib/roo/excelx.rb +85 -42
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/open_office.rb +41 -27
- data/lib/roo/spreadsheet.rb +8 -2
- data/lib/roo/tempdir.rb +24 -0
- data/lib/roo/utils.rb +76 -26
- data/lib/roo/version.rb +1 -1
- data/lib/roo.rb +5 -0
- data/roo.gemspec +22 -12
- data/spec/lib/roo/base_spec.rb +65 -3
- data/spec/lib/roo/csv_spec.rb +19 -0
- data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
- data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
- data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
- data/spec/lib/roo/excelx_spec.rb +237 -5
- data/spec/lib/roo/openoffice_spec.rb +2 -2
- data/spec/lib/roo/spreadsheet_spec.rb +1 -1
- data/spec/lib/roo/strict_spec.rb +43 -0
- data/spec/lib/roo/utils_spec.rb +22 -9
- data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
- data/spec/lib/roo_spec.rb +0 -0
- data/spec/spec_helper.rb +2 -7
- data/test/excelx/cell/test_attr_reader_default.rb +72 -0
- data/test/excelx/cell/test_base.rb +6 -2
- data/test/excelx/cell/test_boolean.rb +1 -3
- data/test/excelx/cell/test_date.rb +1 -6
- data/test/excelx/cell/test_datetime.rb +7 -10
- data/test/excelx/cell/test_empty.rb +12 -2
- data/test/excelx/cell/test_number.rb +28 -4
- data/test/excelx/cell/test_string.rb +21 -3
- data/test/excelx/cell/test_time.rb +7 -10
- data/test/excelx/test_coordinate.rb +51 -0
- data/test/formatters/test_csv.rb +136 -0
- data/test/formatters/test_matrix.rb +76 -0
- data/test/formatters/test_xml.rb +78 -0
- data/test/formatters/test_yaml.rb +20 -0
- data/test/helpers/test_accessing_files.rb +81 -0
- data/test/helpers/test_comments.rb +43 -0
- data/test/helpers/test_formulas.rb +9 -0
- data/test/helpers/test_labels.rb +103 -0
- data/test/helpers/test_sheets.rb +55 -0
- data/test/helpers/test_styles.rb +62 -0
- data/test/roo/test_base.rb +182 -0
- data/test/roo/test_csv.rb +88 -0
- data/test/roo/test_excelx.rb +360 -0
- data/test/roo/test_libre_office.rb +9 -0
- data/test/roo/test_open_office.rb +289 -0
- data/test/test_helper.rb +129 -14
- data/test/test_roo.rb +60 -1765
- metadata +91 -21
- data/.travis.yml +0 -14
data/lib/roo/excelx/sheet.rb
CHANGED
@@ -4,11 +4,15 @@ module Roo
|
|
4
4
|
class Sheet
|
5
5
|
extend Forwardable
|
6
6
|
|
7
|
-
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
|
8
|
+
|
9
|
+
attr_reader :images
|
8
10
|
|
9
11
|
def initialize(name, shared, sheet_index, options = {})
|
10
12
|
@name = name
|
11
13
|
@shared = shared
|
14
|
+
@sheet_index = sheet_index
|
15
|
+
@images = Images.new(image_rels[sheet_index]).list
|
12
16
|
@rels = Relationships.new(rels_files[sheet_index])
|
13
17
|
@comments = Comments.new(comments_files[sheet_index])
|
14
18
|
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
@@ -19,7 +23,14 @@ module Roo
|
|
19
23
|
end
|
20
24
|
|
21
25
|
def present_cells
|
22
|
-
@present_cells ||=
|
26
|
+
@present_cells ||= begin
|
27
|
+
warn %{
|
28
|
+
[DEPRECATION] present_cells is deprecated. Alternate:
|
29
|
+
with activesupport => cells[key].presence
|
30
|
+
without activesupport => cells[key]&.presence
|
31
|
+
}
|
32
|
+
cells.select { |_, cell| cell&.presence }
|
33
|
+
end
|
23
34
|
end
|
24
35
|
|
25
36
|
# Yield each row as array of Excelx::Cell objects
|
@@ -39,33 +50,33 @@ module Roo
|
|
39
50
|
|
40
51
|
def row(row_number)
|
41
52
|
first_column.upto(last_column).map do |col|
|
42
|
-
cells[[row_number, col]]
|
43
|
-
end
|
53
|
+
cells[[row_number, col]]&.value
|
54
|
+
end
|
44
55
|
end
|
45
56
|
|
46
57
|
def column(col_number)
|
47
58
|
first_row.upto(last_row).map do |row|
|
48
|
-
cells[[row, col_number]]
|
49
|
-
end
|
59
|
+
cells[[row, col_number]]&.value
|
60
|
+
end
|
50
61
|
end
|
51
62
|
|
52
63
|
# returns the number of the first non-empty row
|
53
64
|
def first_row
|
54
|
-
@first_row ||=
|
65
|
+
@first_row ||= first_last_row_col[:first_row]
|
55
66
|
end
|
56
67
|
|
57
68
|
def last_row
|
58
|
-
@last_row ||=
|
69
|
+
@last_row ||= first_last_row_col[:last_row]
|
59
70
|
end
|
60
71
|
|
61
72
|
# returns the number of the first non-empty column
|
62
73
|
def first_column
|
63
|
-
@first_column ||=
|
74
|
+
@first_column ||= first_last_row_col[:first_column]
|
64
75
|
end
|
65
76
|
|
66
77
|
# returns the number of the last non-empty column
|
67
78
|
def last_column
|
68
|
-
@last_column ||=
|
79
|
+
@last_column ||= first_last_row_col[:last_column]
|
69
80
|
end
|
70
81
|
|
71
82
|
def excelx_format(key)
|
@@ -107,6 +118,34 @@ module Roo
|
|
107
118
|
(cell.coordinate.column - 1 - last_column).times { pad << nil }
|
108
119
|
pad
|
109
120
|
end
|
121
|
+
|
122
|
+
def first_last_row_col
|
123
|
+
@first_last_row_col ||= begin
|
124
|
+
first_row = last_row = first_col = last_col = nil
|
125
|
+
|
126
|
+
cells.each do |(row, col), cell|
|
127
|
+
next unless cell&.presence
|
128
|
+
first_row ||= row
|
129
|
+
last_row ||= row
|
130
|
+
first_col ||= col
|
131
|
+
last_col ||= col
|
132
|
+
|
133
|
+
if row > last_row
|
134
|
+
last_row = row
|
135
|
+
elsif row < first_row
|
136
|
+
first_row = row
|
137
|
+
end
|
138
|
+
|
139
|
+
if col > last_col
|
140
|
+
last_col = col
|
141
|
+
elsif col < first_col
|
142
|
+
first_col = col
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
{first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
|
147
|
+
end
|
148
|
+
end
|
110
149
|
end
|
111
150
|
end
|
112
151
|
end
|
data/lib/roo/excelx/sheet_doc.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
require 'roo/excelx/extractor'
|
3
5
|
|
@@ -5,7 +7,7 @@ module Roo
|
|
5
7
|
class Excelx
|
6
8
|
class SheetDoc < Excelx::Extractor
|
7
9
|
extend Forwardable
|
8
|
-
delegate [:
|
10
|
+
delegate [:workbook] => :@shared
|
9
11
|
|
10
12
|
def initialize(path, relationships, shared, options = {})
|
11
13
|
super(path)
|
@@ -19,7 +21,12 @@ module Roo
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def hyperlinks(relationships)
|
22
|
-
|
24
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
25
|
+
@hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
|
26
|
+
{}
|
27
|
+
else
|
28
|
+
extract_hyperlinks(relationships)
|
29
|
+
end
|
23
30
|
end
|
24
31
|
|
25
32
|
# Get the dimensions for the sheet.
|
@@ -39,8 +46,10 @@ module Roo
|
|
39
46
|
def each_cell(row_xml)
|
40
47
|
return [] unless row_xml
|
41
48
|
row_xml.children.each do |cell_element|
|
42
|
-
|
43
|
-
|
49
|
+
coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
|
50
|
+
hyperlinks = hyperlinks(@relationships)[coordinate]
|
51
|
+
|
52
|
+
yield cell_from_xml(cell_element, hyperlinks, coordinate)
|
44
53
|
end
|
45
54
|
end
|
46
55
|
|
@@ -48,13 +57,13 @@ module Roo
|
|
48
57
|
|
49
58
|
def cell_value_type(type, format)
|
50
59
|
case type
|
51
|
-
when 's'
|
60
|
+
when 's'
|
52
61
|
:shared
|
53
|
-
when 'b'
|
62
|
+
when 'b'
|
54
63
|
:boolean
|
55
|
-
when 'str'
|
64
|
+
when 'str'
|
56
65
|
:string
|
57
|
-
when 'inlineStr'
|
66
|
+
when 'inlineStr'
|
58
67
|
:inlinestr
|
59
68
|
else
|
60
69
|
Excelx::Format.to_type(format)
|
@@ -69,41 +78,53 @@ module Roo
|
|
69
78
|
# </c>
|
70
79
|
# hyperlink - a String for the hyperlink for the cell or nil when no
|
71
80
|
# hyperlink is present.
|
81
|
+
# coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
|
82
|
+
# or nil to extract coordinate from cell_xml.
|
83
|
+
# empty_cell - an Optional Boolean value.
|
72
84
|
#
|
73
85
|
# Examples
|
74
86
|
#
|
75
|
-
# cells_from_xml(<Nokogiri::XML::Element>, nil)
|
87
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
|
76
88
|
# # => <Excelx::Cell::String>
|
77
89
|
#
|
78
90
|
# Returns a type of <Excelx::Cell>.
|
79
|
-
def cell_from_xml(cell_xml, hyperlink)
|
80
|
-
coordinate
|
81
|
-
|
91
|
+
def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
|
92
|
+
coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
|
93
|
+
cell_xml_children = cell_xml.children
|
94
|
+
return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
|
82
95
|
|
83
96
|
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
84
97
|
# This works by coincidence because Format[0] is General.
|
85
|
-
style = cell_xml[
|
86
|
-
format = styles.style_format(style)
|
87
|
-
value_type = cell_value_type(cell_xml['t'], format)
|
98
|
+
style = cell_xml["s"].to_i
|
88
99
|
formula = nil
|
89
100
|
|
90
|
-
|
101
|
+
cell_xml_children.each do |cell|
|
91
102
|
case cell.name
|
92
103
|
when 'is'
|
93
|
-
cell.
|
94
|
-
|
95
|
-
|
96
|
-
end
|
104
|
+
content = cell.search('t').map(&:content).join
|
105
|
+
unless content.empty?
|
106
|
+
return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
|
97
107
|
end
|
98
108
|
when 'f'
|
99
109
|
formula = cell.content
|
100
110
|
when 'v'
|
101
|
-
|
111
|
+
format = style_format(style)
|
112
|
+
value_type = cell_value_type(cell_xml["t"], format)
|
113
|
+
|
114
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
102
115
|
end
|
103
116
|
end
|
117
|
+
|
118
|
+
create_empty_cell(coordinate, empty_cell)
|
119
|
+
end
|
120
|
+
|
121
|
+
def create_empty_cell(coordinate, empty_cell)
|
122
|
+
if empty_cell
|
123
|
+
Excelx::Cell::Empty.new(coordinate)
|
124
|
+
end
|
104
125
|
end
|
105
126
|
|
106
|
-
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink,
|
127
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
107
128
|
# NOTE: format.to_s can replace excelx_type as an argument for
|
108
129
|
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
109
130
|
# it will break some brittle tests.
|
@@ -119,11 +140,12 @@ module Roo
|
|
119
140
|
# 3. formula
|
120
141
|
case value_type
|
121
142
|
when :shared
|
122
|
-
|
123
|
-
|
143
|
+
cell_content = cell.content.to_i
|
144
|
+
value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
|
145
|
+
Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
|
124
146
|
when :boolean, :string
|
125
147
|
value = cell.content
|
126
|
-
Excelx::Cell.
|
148
|
+
Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
|
127
149
|
when :time, :datetime
|
128
150
|
cell_content = cell.content.to_f
|
129
151
|
# NOTE: A date will be a whole number. A time will have be > 1. And
|
@@ -142,38 +164,40 @@ module Roo
|
|
142
164
|
else
|
143
165
|
:date
|
144
166
|
end
|
145
|
-
|
167
|
+
base_value = cell_type == :date ? base_date : base_timestamp
|
168
|
+
Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
|
146
169
|
when :date
|
147
|
-
Excelx::Cell.
|
170
|
+
Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
148
171
|
else
|
149
|
-
Excelx::Cell.
|
172
|
+
Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
150
173
|
end
|
151
174
|
end
|
152
175
|
|
153
|
-
def
|
154
|
-
|
176
|
+
def extract_hyperlinks(relationships)
|
177
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
155
178
|
|
156
|
-
|
157
|
-
|
179
|
+
hyperlinks.each_with_object({}) do |hyperlink, hash|
|
180
|
+
if relationship = relationships[hyperlink['id']]
|
181
|
+
target_link = relationship['Target']
|
182
|
+
target_link += "##{hyperlink['location']}" if hyperlink['location']
|
158
183
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
|
163
|
-
[::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
|
184
|
+
Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
|
185
|
+
hash[coord] = target_link
|
186
|
+
end
|
164
187
|
end
|
165
|
-
end
|
188
|
+
end
|
166
189
|
end
|
167
190
|
|
168
191
|
def expand_merged_ranges(cells)
|
169
192
|
# Extract merged ranges from xml
|
170
193
|
merges = {}
|
171
194
|
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
195
|
+
src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
|
196
|
+
next unless cells[src]
|
197
|
+
for row in src[0]..dst[0] do
|
198
|
+
for col in src[1]..dst[1] do
|
199
|
+
next if row == src[0] && col == src[1]
|
200
|
+
merges[[row, col]] = src
|
177
201
|
end
|
178
202
|
end
|
179
203
|
end
|
@@ -184,10 +208,23 @@ module Roo
|
|
184
208
|
end
|
185
209
|
|
186
210
|
def extract_cells(relationships)
|
187
|
-
extracted_cells =
|
188
|
-
|
189
|
-
|
190
|
-
|
211
|
+
extracted_cells = {}
|
212
|
+
empty_cell = @options[:empty_cell]
|
213
|
+
|
214
|
+
doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
|
215
|
+
row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
|
216
|
+
r = cell_xml['r']
|
217
|
+
coordinate =
|
218
|
+
if r.nil?
|
219
|
+
::Roo::Excelx::Coordinate.new(ycoord, xcoord)
|
220
|
+
else
|
221
|
+
::Roo::Utils.extract_coordinate(r)
|
222
|
+
end
|
223
|
+
|
224
|
+
cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
|
225
|
+
extracted_cells[coordinate] = cell if cell
|
226
|
+
end
|
227
|
+
end
|
191
228
|
|
192
229
|
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
|
193
230
|
|
@@ -196,9 +233,25 @@ module Roo
|
|
196
233
|
|
197
234
|
def extract_dimensions
|
198
235
|
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
199
|
-
return dimension
|
236
|
+
return dimension["ref"]
|
200
237
|
end
|
201
238
|
end
|
239
|
+
|
240
|
+
def style_format(style)
|
241
|
+
@shared.styles.style_format(style)
|
242
|
+
end
|
243
|
+
|
244
|
+
def base_date
|
245
|
+
@shared.base_date
|
246
|
+
end
|
247
|
+
|
248
|
+
def base_timestamp
|
249
|
+
@shared.base_timestamp
|
250
|
+
end
|
251
|
+
|
252
|
+
def shared_strings
|
253
|
+
@shared.shared_strings
|
254
|
+
end
|
202
255
|
end
|
203
256
|
end
|
204
257
|
end
|
data/lib/roo/excelx/styles.rb
CHANGED
@@ -51,13 +51,13 @@ module Roo
|
|
51
51
|
xfs.children.map do |xf|
|
52
52
|
xf['numFmtId']
|
53
53
|
end
|
54
|
-
end
|
54
|
+
end.compact
|
55
55
|
end
|
56
56
|
|
57
57
|
def extract_num_fmts
|
58
|
-
|
59
|
-
[num_fmt['numFmtId']
|
60
|
-
end
|
58
|
+
doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
|
59
|
+
hash[num_fmt['numFmtId']] = num_fmt['formatCode']
|
60
|
+
end
|
61
61
|
end
|
62
62
|
end
|
63
63
|
end
|
data/lib/roo/excelx/workbook.rb
CHANGED
@@ -29,13 +29,18 @@ module Roo
|
|
29
29
|
|
30
30
|
# aka labels
|
31
31
|
def defined_names
|
32
|
-
|
32
|
+
doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
|
33
33
|
# "Sheet1!$C$5"
|
34
34
|
sheet, coordinates = defined_name.text.split('!$', 2)
|
35
|
+
next unless coordinates
|
35
36
|
col, row = coordinates.split('$')
|
36
37
|
name = defined_name['name']
|
37
|
-
[name
|
38
|
-
end
|
38
|
+
hash[name] = Label.new(name, sheet, row, col)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def base_timestamp
|
43
|
+
@base_timestamp ||= base_date.to_datetime.to_time.to_i
|
39
44
|
end
|
40
45
|
|
41
46
|
def base_date
|
data/lib/roo/excelx.rb
CHANGED
@@ -1,13 +1,18 @@
|
|
1
1
|
require 'nokogiri'
|
2
2
|
require 'zip/filesystem'
|
3
3
|
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
4
5
|
require 'roo/utils'
|
5
6
|
require 'forwardable'
|
7
|
+
require 'set'
|
6
8
|
|
7
9
|
module Roo
|
8
10
|
class Excelx < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
9
12
|
extend Forwardable
|
10
13
|
|
14
|
+
ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
|
15
|
+
|
11
16
|
require 'roo/excelx/shared'
|
12
17
|
require 'roo/excelx/workbook'
|
13
18
|
require 'roo/excelx/shared_strings'
|
@@ -19,8 +24,9 @@ module Roo
|
|
19
24
|
require 'roo/excelx/sheet_doc'
|
20
25
|
require 'roo/excelx/coordinate'
|
21
26
|
require 'roo/excelx/format'
|
27
|
+
require 'roo/excelx/images'
|
22
28
|
|
23
|
-
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
|
29
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
|
24
30
|
ExceedsMaxError = Class.new(StandardError)
|
25
31
|
|
26
32
|
# initialization and opening of a spreadsheet file
|
@@ -33,27 +39,38 @@ module Roo
|
|
33
39
|
cell_max = options.delete(:cell_max)
|
34
40
|
sheet_options = {}
|
35
41
|
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
43
|
+
sheet_options[:empty_cell] = (options[:empty_cell] || false)
|
44
|
+
shared_options = {}
|
36
45
|
|
46
|
+
shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
|
37
47
|
unless is_stream?(filename_or_stream)
|
38
48
|
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
39
|
-
basename =
|
49
|
+
basename = find_basename(filename_or_stream)
|
40
50
|
end
|
41
51
|
|
42
|
-
|
43
|
-
|
52
|
+
# NOTE: Create temp directory and allow Ruby to cleanup the temp directory
|
53
|
+
# when the object is garbage collected. Initially, the finalizer was
|
54
|
+
# created in the Roo::Tempdir module, but that led to a segfault
|
55
|
+
# when testing in Ruby 2.4.0.
|
56
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
57
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
|
58
|
+
|
59
|
+
@shared = Shared.new(@tmpdir, shared_options)
|
44
60
|
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
45
61
|
process_zipfile(@filename || filename_or_stream)
|
46
62
|
|
47
|
-
@sheet_names =
|
48
|
-
unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
49
|
-
sheet['name']
|
50
|
-
end
|
51
|
-
end.compact
|
63
|
+
@sheet_names = []
|
52
64
|
@sheets = []
|
53
|
-
@sheets_by_name =
|
54
|
-
|
55
|
-
|
56
|
-
|
65
|
+
@sheets_by_name = {}
|
66
|
+
|
67
|
+
workbook.sheets.each_with_index do |sheet, index|
|
68
|
+
next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
69
|
+
|
70
|
+
sheet_name = sheet['name']
|
71
|
+
@sheet_names << sheet_name
|
72
|
+
@sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
|
73
|
+
end
|
57
74
|
|
58
75
|
if cell_max
|
59
76
|
cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
|
@@ -61,9 +78,9 @@ module Roo
|
|
61
78
|
end
|
62
79
|
|
63
80
|
super
|
64
|
-
rescue
|
65
|
-
|
66
|
-
raise
|
81
|
+
rescue
|
82
|
+
self.class.finalize_tempdirs(object_id)
|
83
|
+
raise
|
67
84
|
end
|
68
85
|
|
69
86
|
def method_missing(method, *args)
|
@@ -82,7 +99,12 @@ module Roo
|
|
82
99
|
def sheet_for(sheet)
|
83
100
|
sheet ||= default_sheet
|
84
101
|
validate_sheet!(sheet)
|
85
|
-
@sheets_by_name[sheet]
|
102
|
+
@sheets_by_name[sheet] || @sheets[sheet]
|
103
|
+
end
|
104
|
+
|
105
|
+
def images(sheet = nil)
|
106
|
+
images_names = sheet_for(sheet).images.map(&:last)
|
107
|
+
images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
|
86
108
|
end
|
87
109
|
|
88
110
|
# Returns the content of a spreadsheet-cell.
|
@@ -194,6 +216,13 @@ module Roo
|
|
194
216
|
safe_send(sheet_for(sheet).cells[key], :cell_value)
|
195
217
|
end
|
196
218
|
|
219
|
+
# returns the internal value of an excelx cell
|
220
|
+
# Note: this is only available within the Excelx class
|
221
|
+
def formatted_value(row, col, sheet = nil)
|
222
|
+
key = normalize(row, col)
|
223
|
+
safe_send(sheet_for(sheet).cells[key], :formatted_value)
|
224
|
+
end
|
225
|
+
|
197
226
|
# returns the internal format of an excel cell
|
198
227
|
def excelx_format(row, col, sheet = nil)
|
199
228
|
key = normalize(row, col)
|
@@ -204,7 +233,7 @@ module Roo
|
|
204
233
|
sheet = sheet_for(sheet)
|
205
234
|
key = normalize(row, col)
|
206
235
|
cell = sheet.cells[key]
|
207
|
-
!cell || cell.empty? ||
|
236
|
+
!cell || cell.empty? ||
|
208
237
|
(row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
|
209
238
|
end
|
210
239
|
|
@@ -306,7 +335,7 @@ module Roo
|
|
306
335
|
|
307
336
|
wb.extract(path)
|
308
337
|
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
309
|
-
workbook_doc.xpath('//sheet').map { |s| s
|
338
|
+
workbook_doc.xpath('//sheet').map { |s| s['id'] }
|
310
339
|
end
|
311
340
|
|
312
341
|
# Internal
|
@@ -330,24 +359,22 @@ module Roo
|
|
330
359
|
|
331
360
|
wb_rels.extract(path)
|
332
361
|
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
333
|
-
worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
|
334
362
|
|
335
363
|
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
336
|
-
relationship
|
364
|
+
worksheet_types.include? relationship['Type']
|
337
365
|
end
|
338
366
|
|
339
|
-
relationships.
|
340
|
-
|
341
|
-
id = attributes['Id']
|
342
|
-
hash[id.value] = attributes['Target'].value
|
343
|
-
hash
|
367
|
+
relationships.each_with_object({}) do |relationship, hash|
|
368
|
+
hash[relationship['Id']] = relationship['Target']
|
344
369
|
end
|
345
370
|
end
|
346
371
|
|
372
|
+
# Extracts the sheets in order, but it will ignore sheets that are not
|
373
|
+
# worksheets.
|
347
374
|
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
348
|
-
sheet_ids.each_with_index do |id, i|
|
375
|
+
(sheet_ids & sheets.keys).each_with_index do |id, i|
|
349
376
|
name = sheets[id]
|
350
|
-
entry = entries.find { |e| e.name =~ /#{name}$/ }
|
377
|
+
entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
|
351
378
|
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
352
379
|
sheet_files << path
|
353
380
|
@sheet_files << path
|
@@ -355,6 +382,15 @@ module Roo
|
|
355
382
|
end
|
356
383
|
end
|
357
384
|
|
385
|
+
def extract_images(entries, tmpdir)
|
386
|
+
img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
|
387
|
+
img_entries.each do |entry|
|
388
|
+
path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
|
389
|
+
image_files << path
|
390
|
+
entry.extract(path)
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
358
394
|
# Extracts all needed files from the zip file
|
359
395
|
def process_zipfile(zipfilename_or_stream)
|
360
396
|
@sheet_files = []
|
@@ -388,10 +424,16 @@ module Roo
|
|
388
424
|
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
389
425
|
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
390
426
|
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
427
|
+
extract_images(entries, @tmpdir)
|
391
428
|
|
392
429
|
entries.each do |entry|
|
393
430
|
path =
|
394
431
|
case entry.name.downcase
|
432
|
+
when /richdata/
|
433
|
+
# FIXME: Ignore richData as parsing is not implemented yet and can cause
|
434
|
+
# Zip::DestinationFileExistsError when including a second "styles.xml" entry
|
435
|
+
# see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
|
436
|
+
nil
|
395
437
|
when /sharedstrings.xml$/
|
396
438
|
"#{@tmpdir}/roo_sharedStrings.xml"
|
397
439
|
when /styles.xml$/
|
@@ -404,34 +446,35 @@ module Roo
|
|
404
446
|
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
405
447
|
nr = Regexp.last_match[1].to_i
|
406
448
|
comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
449
|
+
when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
|
450
|
+
# NOTE: Chart sheet relationship files were interfering with
|
451
|
+
# worksheets.
|
452
|
+
nil
|
407
453
|
when /sheet([0-9]+).xml.rels$/
|
408
454
|
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
409
455
|
# it also stores the location for sharedStrings, comments,
|
410
456
|
# drawings, etc.
|
411
457
|
nr = Regexp.last_match[1].to_i
|
412
458
|
rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
459
|
+
when /drawing([0-9]+).xml.rels$/
|
460
|
+
# Extracting drawing relationships to make images lists for each sheet
|
461
|
+
nr = Regexp.last_match[1].to_i
|
462
|
+
image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
|
413
463
|
end
|
414
464
|
|
415
465
|
entry.extract(path) if path
|
416
466
|
end
|
417
467
|
end
|
418
468
|
|
419
|
-
|
420
|
-
|
421
|
-
def styles
|
422
|
-
@styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
|
423
|
-
end
|
424
|
-
|
425
|
-
def shared_strings
|
426
|
-
@shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
|
427
|
-
end
|
428
|
-
|
429
|
-
def workbook
|
430
|
-
@workbook ||= Workbook.new(File.join(@tmpdir, 'roo_workbook.xml'))
|
469
|
+
def safe_send(object, method, *args)
|
470
|
+
object.send(method, *args) if object&.respond_to?(method)
|
431
471
|
end
|
432
472
|
|
433
|
-
def
|
434
|
-
|
473
|
+
def worksheet_types
|
474
|
+
[
|
475
|
+
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
|
476
|
+
'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
|
477
|
+
]
|
435
478
|
end
|
436
479
|
end
|
437
480
|
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Roo
|
2
|
+
module Formatters
|
3
|
+
module Base
|
4
|
+
# converts an integer value to a time string like '02:05:06'
|
5
|
+
def integer_to_timestring(content)
|
6
|
+
h = (content / 3600.0).floor
|
7
|
+
content -= h * 3600
|
8
|
+
m = (content / 60.0).floor
|
9
|
+
content -= m * 60
|
10
|
+
s = content
|
11
|
+
Kernel.format("%02d:%02d:%02d", h, m, s)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|