roo 2.3.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +17 -0
  3. data/.github/issue_template.md +16 -0
  4. data/.github/pull_request_template.md +14 -0
  5. data/.github/workflows/pull-request.yml +15 -0
  6. data/.github/workflows/ruby.yml +34 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +186 -0
  9. data/CHANGELOG.md +148 -0
  10. data/Gemfile +4 -4
  11. data/LICENSE +2 -0
  12. data/README.md +84 -27
  13. data/Rakefile +1 -1
  14. data/lib/roo/base.rb +111 -237
  15. data/lib/roo/constants.rb +5 -3
  16. data/lib/roo/csv.rb +106 -85
  17. data/lib/roo/errors.rb +2 -0
  18. data/lib/roo/excelx/cell/base.rb +26 -12
  19. data/lib/roo/excelx/cell/boolean.rb +9 -6
  20. data/lib/roo/excelx/cell/date.rb +7 -7
  21. data/lib/roo/excelx/cell/datetime.rb +50 -44
  22. data/lib/roo/excelx/cell/empty.rb +3 -2
  23. data/lib/roo/excelx/cell/number.rb +60 -47
  24. data/lib/roo/excelx/cell/string.rb +3 -3
  25. data/lib/roo/excelx/cell/time.rb +17 -16
  26. data/lib/roo/excelx/cell.rb +11 -7
  27. data/lib/roo/excelx/comments.rb +3 -3
  28. data/lib/roo/excelx/coordinate.rb +11 -4
  29. data/lib/roo/excelx/extractor.rb +20 -3
  30. data/lib/roo/excelx/format.rb +38 -31
  31. data/lib/roo/excelx/images.rb +26 -0
  32. data/lib/roo/excelx/relationships.rb +12 -4
  33. data/lib/roo/excelx/shared.rb +10 -3
  34. data/lib/roo/excelx/shared_strings.rb +113 -9
  35. data/lib/roo/excelx/sheet.rb +49 -10
  36. data/lib/roo/excelx/sheet_doc.rb +101 -48
  37. data/lib/roo/excelx/styles.rb +4 -4
  38. data/lib/roo/excelx/workbook.rb +8 -3
  39. data/lib/roo/excelx.rb +85 -42
  40. data/lib/roo/formatters/base.rb +15 -0
  41. data/lib/roo/formatters/csv.rb +84 -0
  42. data/lib/roo/formatters/matrix.rb +23 -0
  43. data/lib/roo/formatters/xml.rb +31 -0
  44. data/lib/roo/formatters/yaml.rb +40 -0
  45. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  46. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  47. data/lib/roo/open_office.rb +41 -27
  48. data/lib/roo/spreadsheet.rb +8 -2
  49. data/lib/roo/tempdir.rb +24 -0
  50. data/lib/roo/utils.rb +76 -26
  51. data/lib/roo/version.rb +1 -1
  52. data/lib/roo.rb +5 -0
  53. data/roo.gemspec +22 -12
  54. data/spec/lib/roo/base_spec.rb +65 -3
  55. data/spec/lib/roo/csv_spec.rb +19 -0
  56. data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
  57. data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
  58. data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
  59. data/spec/lib/roo/excelx_spec.rb +237 -5
  60. data/spec/lib/roo/openoffice_spec.rb +2 -2
  61. data/spec/lib/roo/spreadsheet_spec.rb +1 -1
  62. data/spec/lib/roo/strict_spec.rb +43 -0
  63. data/spec/lib/roo/utils_spec.rb +22 -9
  64. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  65. data/spec/lib/roo_spec.rb +0 -0
  66. data/spec/spec_helper.rb +2 -7
  67. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  68. data/test/excelx/cell/test_base.rb +6 -2
  69. data/test/excelx/cell/test_boolean.rb +1 -3
  70. data/test/excelx/cell/test_date.rb +1 -6
  71. data/test/excelx/cell/test_datetime.rb +7 -10
  72. data/test/excelx/cell/test_empty.rb +12 -2
  73. data/test/excelx/cell/test_number.rb +28 -4
  74. data/test/excelx/cell/test_string.rb +21 -3
  75. data/test/excelx/cell/test_time.rb +7 -10
  76. data/test/excelx/test_coordinate.rb +51 -0
  77. data/test/formatters/test_csv.rb +136 -0
  78. data/test/formatters/test_matrix.rb +76 -0
  79. data/test/formatters/test_xml.rb +78 -0
  80. data/test/formatters/test_yaml.rb +20 -0
  81. data/test/helpers/test_accessing_files.rb +81 -0
  82. data/test/helpers/test_comments.rb +43 -0
  83. data/test/helpers/test_formulas.rb +9 -0
  84. data/test/helpers/test_labels.rb +103 -0
  85. data/test/helpers/test_sheets.rb +55 -0
  86. data/test/helpers/test_styles.rb +62 -0
  87. data/test/roo/test_base.rb +182 -0
  88. data/test/roo/test_csv.rb +88 -0
  89. data/test/roo/test_excelx.rb +360 -0
  90. data/test/roo/test_libre_office.rb +9 -0
  91. data/test/roo/test_open_office.rb +289 -0
  92. data/test/test_helper.rb +129 -14
  93. data/test/test_roo.rb +60 -1765
  94. metadata +91 -21
  95. data/.travis.yml +0 -14
@@ -4,11 +4,15 @@ module Roo
4
4
  class Sheet
5
5
  extend Forwardable
6
6
 
7
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
8
+
9
+ attr_reader :images
8
10
 
9
11
  def initialize(name, shared, sheet_index, options = {})
10
12
  @name = name
11
13
  @shared = shared
14
+ @sheet_index = sheet_index
15
+ @images = Images.new(image_rels[sheet_index]).list
12
16
  @rels = Relationships.new(rels_files[sheet_index])
13
17
  @comments = Comments.new(comments_files[sheet_index])
14
18
  @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
@@ -19,7 +23,14 @@ module Roo
19
23
  end
20
24
 
21
25
  def present_cells
22
- @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
26
+ @present_cells ||= begin
27
+ warn %{
28
+ [DEPRECATION] present_cells is deprecated. Alternate:
29
+ with activesupport => cells[key].presence
30
+ without activesupport => cells[key]&.presence
31
+ }
32
+ cells.select { |_, cell| cell&.presence }
33
+ end
23
34
  end
24
35
 
25
36
  # Yield each row as array of Excelx::Cell objects
@@ -39,33 +50,33 @@ module Roo
39
50
 
40
51
  def row(row_number)
41
52
  first_column.upto(last_column).map do |col|
42
- cells[[row_number, col]]
43
- end.map { |cell| cell && cell.value }
53
+ cells[[row_number, col]]&.value
54
+ end
44
55
  end
45
56
 
46
57
  def column(col_number)
47
58
  first_row.upto(last_row).map do |row|
48
- cells[[row, col_number]]
49
- end.map { |cell| cell && cell.value }
59
+ cells[[row, col_number]]&.value
60
+ end
50
61
  end
51
62
 
52
63
  # returns the number of the first non-empty row
53
64
  def first_row
54
- @first_row ||= present_cells.keys.map { |row, _| row }.min
65
+ @first_row ||= first_last_row_col[:first_row]
55
66
  end
56
67
 
57
68
  def last_row
58
- @last_row ||= present_cells.keys.map { |row, _| row }.max
69
+ @last_row ||= first_last_row_col[:last_row]
59
70
  end
60
71
 
61
72
  # returns the number of the first non-empty column
62
73
  def first_column
63
- @first_column ||= present_cells.keys.map { |_, col| col }.min
74
+ @first_column ||= first_last_row_col[:first_column]
64
75
  end
65
76
 
66
77
  # returns the number of the last non-empty column
67
78
  def last_column
68
- @last_column ||= present_cells.keys.map { |_, col| col }.max
79
+ @last_column ||= first_last_row_col[:last_column]
69
80
  end
70
81
 
71
82
  def excelx_format(key)
@@ -107,6 +118,34 @@ module Roo
107
118
  (cell.coordinate.column - 1 - last_column).times { pad << nil }
108
119
  pad
109
120
  end
121
+
122
+ def first_last_row_col
123
+ @first_last_row_col ||= begin
124
+ first_row = last_row = first_col = last_col = nil
125
+
126
+ cells.each do |(row, col), cell|
127
+ next unless cell&.presence
128
+ first_row ||= row
129
+ last_row ||= row
130
+ first_col ||= col
131
+ last_col ||= col
132
+
133
+ if row > last_row
134
+ last_row = row
135
+ elsif row < first_row
136
+ first_row = row
137
+ end
138
+
139
+ if col > last_col
140
+ last_col = col
141
+ elsif col < first_col
142
+ first_col = col
143
+ end
144
+ end
145
+
146
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
147
+ end
148
+ end
110
149
  end
111
150
  end
112
151
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
  require 'roo/excelx/extractor'
3
5
 
@@ -5,7 +7,7 @@ module Roo
5
7
  class Excelx
6
8
  class SheetDoc < Excelx::Extractor
7
9
  extend Forwardable
8
- delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
10
+ delegate [:workbook] => :@shared
9
11
 
10
12
  def initialize(path, relationships, shared, options = {})
11
13
  super(path)
@@ -19,7 +21,12 @@ module Roo
19
21
  end
20
22
 
21
23
  def hyperlinks(relationships)
22
- @hyperlinks ||= extract_hyperlinks(relationships)
24
+ # If you're sure you're not going to need this hyperlinks you can discard it
25
+ @hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
26
+ {}
27
+ else
28
+ extract_hyperlinks(relationships)
29
+ end
23
30
  end
24
31
 
25
32
  # Get the dimensions for the sheet.
@@ -39,8 +46,10 @@ module Roo
39
46
  def each_cell(row_xml)
40
47
  return [] unless row_xml
41
48
  row_xml.children.each do |cell_element|
42
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
43
- yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
49
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
50
+ hyperlinks = hyperlinks(@relationships)[coordinate]
51
+
52
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
44
53
  end
45
54
  end
46
55
 
@@ -48,13 +57,13 @@ module Roo
48
57
 
49
58
  def cell_value_type(type, format)
50
59
  case type
51
- when 's'.freeze
60
+ when 's'
52
61
  :shared
53
- when 'b'.freeze
62
+ when 'b'
54
63
  :boolean
55
- when 'str'.freeze
64
+ when 'str'
56
65
  :string
57
- when 'inlineStr'.freeze
66
+ when 'inlineStr'
58
67
  :inlinestr
59
68
  else
60
69
  Excelx::Format.to_type(format)
@@ -69,41 +78,53 @@ module Roo
69
78
  # </c>
70
79
  # hyperlink - a String for the hyperlink for the cell or nil when no
71
80
  # hyperlink is present.
81
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
82
+ # or nil to extract coordinate from cell_xml.
83
+ # empty_cell - an Optional Boolean value.
72
84
  #
73
85
  # Examples
74
86
  #
75
- # cells_from_xml(<Nokogiri::XML::Element>, nil)
87
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
76
88
  # # => <Excelx::Cell::String>
77
89
  #
78
90
  # Returns a type of <Excelx::Cell>.
79
- def cell_from_xml(cell_xml, hyperlink)
80
- coordinate = extract_coordinate(cell_xml['r'])
81
- return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
91
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
92
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
93
+ cell_xml_children = cell_xml.children
94
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
82
95
 
83
96
  # NOTE: This is error prone, to_i will silently turn a nil into a 0.
84
97
  # This works by coincidence because Format[0] is General.
85
- style = cell_xml['s'].to_i
86
- format = styles.style_format(style)
87
- value_type = cell_value_type(cell_xml['t'], format)
98
+ style = cell_xml["s"].to_i
88
99
  formula = nil
89
100
 
90
- cell_xml.children.each do |cell|
101
+ cell_xml_children.each do |cell|
91
102
  case cell.name
92
103
  when 'is'
93
- cell.children.each do |inline_str|
94
- if inline_str.name == 't'
95
- return Excelx::Cell.create_cell(:string, inline_str.content, formula, style, hyperlink, coordinate)
96
- end
104
+ content = cell.search('t').map(&:content).join
105
+ unless content.empty?
106
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
97
107
  end
98
108
  when 'f'
99
109
  formula = cell.content
100
110
  when 'v'
101
- return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
111
+ format = style_format(style)
112
+ value_type = cell_value_type(cell_xml["t"], format)
113
+
114
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
102
115
  end
103
116
  end
117
+
118
+ create_empty_cell(coordinate, empty_cell)
119
+ end
120
+
121
+ def create_empty_cell(coordinate, empty_cell)
122
+ if empty_cell
123
+ Excelx::Cell::Empty.new(coordinate)
124
+ end
104
125
  end
105
126
 
106
- def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
127
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
107
128
  # NOTE: format.to_s can replace excelx_type as an argument for
108
129
  # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
109
130
  # it will break some brittle tests.
@@ -119,11 +140,12 @@ module Roo
119
140
  # 3. formula
120
141
  case value_type
121
142
  when :shared
122
- value = shared_strings[cell.content.to_i]
123
- Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
143
+ cell_content = cell.content.to_i
144
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
145
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
124
146
  when :boolean, :string
125
147
  value = cell.content
126
- Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
148
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
127
149
  when :time, :datetime
128
150
  cell_content = cell.content.to_f
129
151
  # NOTE: A date will be a whole number. A time will have be > 1. And
@@ -142,38 +164,40 @@ module Roo
142
164
  else
143
165
  :date
144
166
  end
145
- Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
167
+ base_value = cell_type == :date ? base_date : base_timestamp
168
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
146
169
  when :date
147
- Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
170
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
148
171
  else
149
- Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
172
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
150
173
  end
151
174
  end
152
175
 
153
- def extract_coordinate(coordinate)
154
- row, column = ::Roo::Utils.split_coordinate(coordinate)
176
+ def extract_hyperlinks(relationships)
177
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
155
178
 
156
- Excelx::Coordinate.new(row, column)
157
- end
179
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
180
+ if relationship = relationships[hyperlink['id']]
181
+ target_link = relationship['Target']
182
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
158
183
 
159
- def extract_hyperlinks(relationships)
160
- # FIXME: select the valid hyperlinks and then map those.
161
- Hash[doc.xpath('/worksheet/hyperlinks/hyperlink').map do |hyperlink|
162
- if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
163
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
184
+ Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
185
+ hash[coord] = target_link
186
+ end
164
187
  end
165
- end.compact]
188
+ end
166
189
  end
167
190
 
168
191
  def expand_merged_ranges(cells)
169
192
  # Extract merged ranges from xml
170
193
  merges = {}
171
194
  doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
172
- tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
173
- for row in tl[0]..br[0] do
174
- for col in tl[1]..br[1] do
175
- next if row == tl[0] && col == tl[1]
176
- merges[[row, col]] = tl
195
+ src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
196
+ next unless cells[src]
197
+ for row in src[0]..dst[0] do
198
+ for col in src[1]..dst[1] do
199
+ next if row == src[0] && col == src[1]
200
+ merges[[row, col]] = src
177
201
  end
178
202
  end
179
203
  end
@@ -184,10 +208,23 @@ module Roo
184
208
  end
185
209
 
186
210
  def extract_cells(relationships)
187
- extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
188
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
189
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
190
- end]
211
+ extracted_cells = {}
212
+ empty_cell = @options[:empty_cell]
213
+
214
+ doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
215
+ row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
216
+ r = cell_xml['r']
217
+ coordinate =
218
+ if r.nil?
219
+ ::Roo::Excelx::Coordinate.new(ycoord, xcoord)
220
+ else
221
+ ::Roo::Utils.extract_coordinate(r)
222
+ end
223
+
224
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
225
+ extracted_cells[coordinate] = cell if cell
226
+ end
227
+ end
191
228
 
192
229
  expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
193
230
 
@@ -196,9 +233,25 @@ module Roo
196
233
 
197
234
  def extract_dimensions
198
235
  Roo::Utils.each_element(@path, 'dimension') do |dimension|
199
- return dimension.attributes['ref'].value
236
+ return dimension["ref"]
200
237
  end
201
238
  end
239
+
240
+ def style_format(style)
241
+ @shared.styles.style_format(style)
242
+ end
243
+
244
+ def base_date
245
+ @shared.base_date
246
+ end
247
+
248
+ def base_timestamp
249
+ @shared.base_timestamp
250
+ end
251
+
252
+ def shared_strings
253
+ @shared.shared_strings
254
+ end
202
255
  end
203
256
  end
204
257
  end
@@ -51,13 +51,13 @@ module Roo
51
51
  xfs.children.map do |xf|
52
52
  xf['numFmtId']
53
53
  end
54
- end
54
+ end.compact
55
55
  end
56
56
 
57
57
  def extract_num_fmts
58
- Hash[doc.xpath('//numFmt').map do |num_fmt|
59
- [num_fmt['numFmtId'], num_fmt['formatCode']]
60
- end]
58
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
59
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
60
+ end
61
61
  end
62
62
  end
63
63
  end
@@ -29,13 +29,18 @@ module Roo
29
29
 
30
30
  # aka labels
31
31
  def defined_names
32
- Hash[doc.xpath('//definedName').map do |defined_name|
32
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
33
33
  # "Sheet1!$C$5"
34
34
  sheet, coordinates = defined_name.text.split('!$', 2)
35
+ next unless coordinates
35
36
  col, row = coordinates.split('$')
36
37
  name = defined_name['name']
37
- [name, Label.new(name, sheet, row, col)]
38
- end]
38
+ hash[name] = Label.new(name, sheet, row, col)
39
+ end
40
+ end
41
+
42
+ def base_timestamp
43
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
39
44
  end
40
45
 
41
46
  def base_date
data/lib/roo/excelx.rb CHANGED
@@ -1,13 +1,18 @@
1
1
  require 'nokogiri'
2
2
  require 'zip/filesystem'
3
3
  require 'roo/link'
4
+ require 'roo/tempdir'
4
5
  require 'roo/utils'
5
6
  require 'forwardable'
7
+ require 'set'
6
8
 
7
9
  module Roo
8
10
  class Excelx < Roo::Base
11
+ extend Roo::Tempdir
9
12
  extend Forwardable
10
13
 
14
+ ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
15
+
11
16
  require 'roo/excelx/shared'
12
17
  require 'roo/excelx/workbook'
13
18
  require 'roo/excelx/shared_strings'
@@ -19,8 +24,9 @@ module Roo
19
24
  require 'roo/excelx/sheet_doc'
20
25
  require 'roo/excelx/coordinate'
21
26
  require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
22
28
 
23
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
24
30
  ExceedsMaxError = Class.new(StandardError)
25
31
 
26
32
  # initialization and opening of a spreadsheet file
@@ -33,27 +39,38 @@ module Roo
33
39
  cell_max = options.delete(:cell_max)
34
40
  sheet_options = {}
35
41
  sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
36
45
 
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
37
47
  unless is_stream?(filename_or_stream)
38
48
  file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
39
- basename = File.basename(filename_or_stream)
49
+ basename = find_basename(filename_or_stream)
40
50
  end
41
51
 
42
- @tmpdir = make_tmpdir(basename, options[:tmpdir_root])
43
- @shared = Shared.new(@tmpdir)
52
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
53
+ # when the object is garbage collected. Initially, the finalizer was
54
+ # created in the Roo::Tempdir module, but that led to a segfault
55
+ # when testing in Ruby 2.4.0.
56
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
57
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
58
+
59
+ @shared = Shared.new(@tmpdir, shared_options)
44
60
  @filename = local_filename(filename_or_stream, @tmpdir, packed)
45
61
  process_zipfile(@filename || filename_or_stream)
46
62
 
47
- @sheet_names = workbook.sheets.map do |sheet|
48
- unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
49
- sheet['name']
50
- end
51
- end.compact
63
+ @sheet_names = []
52
64
  @sheets = []
53
- @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
54
- @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
55
- [sheet_name, @sheets[n]]
56
- end]
65
+ @sheets_by_name = {}
66
+
67
+ workbook.sheets.each_with_index do |sheet, index|
68
+ next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
69
+
70
+ sheet_name = sheet['name']
71
+ @sheet_names << sheet_name
72
+ @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
73
+ end
57
74
 
58
75
  if cell_max
59
76
  cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
@@ -61,9 +78,9 @@ module Roo
61
78
  end
62
79
 
63
80
  super
64
- rescue => e # clean up any temp files, but only if an error was raised
65
- close
66
- raise e
81
+ rescue
82
+ self.class.finalize_tempdirs(object_id)
83
+ raise
67
84
  end
68
85
 
69
86
  def method_missing(method, *args)
@@ -82,7 +99,12 @@ module Roo
82
99
  def sheet_for(sheet)
83
100
  sheet ||= default_sheet
84
101
  validate_sheet!(sheet)
85
- @sheets_by_name[sheet]
102
+ @sheets_by_name[sheet] || @sheets[sheet]
103
+ end
104
+
105
+ def images(sheet = nil)
106
+ images_names = sheet_for(sheet).images.map(&:last)
107
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
86
108
  end
87
109
 
88
110
  # Returns the content of a spreadsheet-cell.
@@ -194,6 +216,13 @@ module Roo
194
216
  safe_send(sheet_for(sheet).cells[key], :cell_value)
195
217
  end
196
218
 
219
+ # returns the internal value of an excelx cell
220
+ # Note: this is only available within the Excelx class
221
+ def formatted_value(row, col, sheet = nil)
222
+ key = normalize(row, col)
223
+ safe_send(sheet_for(sheet).cells[key], :formatted_value)
224
+ end
225
+
197
226
  # returns the internal format of an excel cell
198
227
  def excelx_format(row, col, sheet = nil)
199
228
  key = normalize(row, col)
@@ -204,7 +233,7 @@ module Roo
204
233
  sheet = sheet_for(sheet)
205
234
  key = normalize(row, col)
206
235
  cell = sheet.cells[key]
207
- !cell || cell.empty? || (cell.type == :string && cell.value.empty?) ||
236
+ !cell || cell.empty? ||
208
237
  (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
209
238
  end
210
239
 
@@ -306,7 +335,7 @@ module Roo
306
335
 
307
336
  wb.extract(path)
308
337
  workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
309
- workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
338
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
310
339
  end
311
340
 
312
341
  # Internal
@@ -330,24 +359,22 @@ module Roo
330
359
 
331
360
  wb_rels.extract(path)
332
361
  rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
333
- worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
334
362
 
335
363
  relationships = rels_doc.xpath('//Relationship').select do |relationship|
336
- relationship.attributes['Type'].value == worksheet_type
364
+ worksheet_types.include? relationship['Type']
337
365
  end
338
366
 
339
- relationships.inject({}) do |hash, relationship|
340
- attributes = relationship.attributes
341
- id = attributes['Id']
342
- hash[id.value] = attributes['Target'].value
343
- hash
367
+ relationships.each_with_object({}) do |relationship, hash|
368
+ hash[relationship['Id']] = relationship['Target']
344
369
  end
345
370
  end
346
371
 
372
+ # Extracts the sheets in order, but it will ignore sheets that are not
373
+ # worksheets.
347
374
  def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
348
- sheet_ids.each_with_index do |id, i|
375
+ (sheet_ids & sheets.keys).each_with_index do |id, i|
349
376
  name = sheets[id]
350
- entry = entries.find { |e| e.name =~ /#{name}$/ }
377
+ entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
351
378
  path = "#{tmpdir}/roo_sheet#{i + 1}"
352
379
  sheet_files << path
353
380
  @sheet_files << path
@@ -355,6 +382,15 @@ module Roo
355
382
  end
356
383
  end
357
384
 
385
+ def extract_images(entries, tmpdir)
386
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
387
+ img_entries.each do |entry|
388
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
389
+ image_files << path
390
+ entry.extract(path)
391
+ end
392
+ end
393
+
358
394
  # Extracts all needed files from the zip file
359
395
  def process_zipfile(zipfilename_or_stream)
360
396
  @sheet_files = []
@@ -388,10 +424,16 @@ module Roo
388
424
  sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
389
425
  sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
390
426
  extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
427
+ extract_images(entries, @tmpdir)
391
428
 
392
429
  entries.each do |entry|
393
430
  path =
394
431
  case entry.name.downcase
432
+ when /richdata/
433
+ # FIXME: Ignore richData as parsing is not implemented yet and can cause
434
+ # Zip::DestinationFileExistsError when including a second "styles.xml" entry
435
+ # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
436
+ nil
395
437
  when /sharedstrings.xml$/
396
438
  "#{@tmpdir}/roo_sharedStrings.xml"
397
439
  when /styles.xml$/
@@ -404,34 +446,35 @@ module Roo
404
446
  # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
405
447
  nr = Regexp.last_match[1].to_i
406
448
  comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
449
+ when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
450
+ # NOTE: Chart sheet relationship files were interfering with
451
+ # worksheets.
452
+ nil
407
453
  when /sheet([0-9]+).xml.rels$/
408
454
  # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
409
455
  # it also stores the location for sharedStrings, comments,
410
456
  # drawings, etc.
411
457
  nr = Regexp.last_match[1].to_i
412
458
  rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
459
+ when /drawing([0-9]+).xml.rels$/
460
+ # Extracting drawing relationships to make images lists for each sheet
461
+ nr = Regexp.last_match[1].to_i
462
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
413
463
  end
414
464
 
415
465
  entry.extract(path) if path
416
466
  end
417
467
  end
418
468
 
419
- # NOTE: To reduce memory, styles, shared_strings, workbook can be class
420
- # variables in a Shared module.
421
- def styles
422
- @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
423
- end
424
-
425
- def shared_strings
426
- @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
427
- end
428
-
429
- def workbook
430
- @workbook ||= Workbook.new(File.join(@tmpdir, 'roo_workbook.xml'))
469
+ def safe_send(object, method, *args)
470
+ object.send(method, *args) if object&.respond_to?(method)
431
471
  end
432
472
 
433
- def safe_send(object, method, *args)
434
- object.send(method, *args) if object && object.respond_to?(method)
473
+ def worksheet_types
474
+ [
475
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
476
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
477
+ ]
435
478
  end
436
479
  end
437
480
  end
@@ -0,0 +1,15 @@
1
+ module Roo
2
+ module Formatters
3
+ module Base
4
+ # converts an integer value to a time string like '02:05:06'
5
+ def integer_to_timestring(content)
6
+ h = (content / 3600.0).floor
7
+ content -= h * 3600
8
+ m = (content / 60.0).floor
9
+ content -= m * 60
10
+ s = content
11
+ Kernel.format("%02d:%02d:%02d", h, m, s)
12
+ end
13
+ end
14
+ end
15
+ end