roo 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -4,11 +4,15 @@ module Roo
4
4
  class Sheet
5
5
  extend Forwardable
6
6
 
7
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
8
+
9
+ attr_reader :images
8
10
 
9
11
  def initialize(name, shared, sheet_index, options = {})
10
12
  @name = name
11
13
  @shared = shared
14
+ @sheet_index = sheet_index
15
+ @images = Images.new(image_rels[sheet_index]).list
12
16
  @rels = Relationships.new(rels_files[sheet_index])
13
17
  @comments = Comments.new(comments_files[sheet_index])
14
18
  @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
@@ -19,7 +23,14 @@ module Roo
19
23
  end
20
24
 
21
25
  def present_cells
22
- @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
26
+ @present_cells ||= begin
27
+ warn %{
28
+ [DEPRECATION] present_cells is deprecated. Alternate:
29
+ with activesupport => cells[key].presence
30
+ without activesupport => cells[key]&.presence
31
+ }
32
+ cells.select { |_, cell| cell&.presence }
33
+ end
23
34
  end
24
35
 
25
36
  # Yield each row as array of Excelx::Cell objects
@@ -39,33 +50,33 @@ module Roo
39
50
 
40
51
  def row(row_number)
41
52
  first_column.upto(last_column).map do |col|
42
- cells[[row_number, col]]
43
- end.map { |cell| cell && cell.value }
53
+ cells[[row_number, col]]&.value
54
+ end
44
55
  end
45
56
 
46
57
  def column(col_number)
47
58
  first_row.upto(last_row).map do |row|
48
- cells[[row, col_number]]
49
- end.map { |cell| cell && cell.value }
59
+ cells[[row, col_number]]&.value
60
+ end
50
61
  end
51
62
 
52
63
  # returns the number of the first non-empty row
53
64
  def first_row
54
- @first_row ||= present_cells.keys.map { |row, _| row }.min
65
+ @first_row ||= first_last_row_col[:first_row]
55
66
  end
56
67
 
57
68
  def last_row
58
- @last_row ||= present_cells.keys.map { |row, _| row }.max
69
+ @last_row ||= first_last_row_col[:last_row]
59
70
  end
60
71
 
61
72
  # returns the number of the first non-empty column
62
73
  def first_column
63
- @first_column ||= present_cells.keys.map { |_, col| col }.min
74
+ @first_column ||= first_last_row_col[:first_column]
64
75
  end
65
76
 
66
77
  # returns the number of the last non-empty column
67
78
  def last_column
68
- @last_column ||= present_cells.keys.map { |_, col| col }.max
79
+ @last_column ||= first_last_row_col[:last_column]
69
80
  end
70
81
 
71
82
  def excelx_format(key)
@@ -107,6 +118,34 @@ module Roo
107
118
  (cell.coordinate.column - 1 - last_column).times { pad << nil }
108
119
  pad
109
120
  end
121
+
122
+ def first_last_row_col
123
+ @first_last_row_col ||= begin
124
+ first_row = last_row = first_col = last_col = nil
125
+
126
+ cells.each do |(row, col), cell|
127
+ next unless cell&.presence
128
+ first_row ||= row
129
+ last_row ||= row
130
+ first_col ||= col
131
+ last_col ||= col
132
+
133
+ if row > last_row
134
+ last_row = row
135
+ elsif row < first_row
136
+ first_row = row
137
+ end
138
+
139
+ if col > last_col
140
+ last_col = col
141
+ elsif col < first_col
142
+ first_col = col
143
+ end
144
+ end
145
+
146
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
147
+ end
148
+ end
110
149
  end
111
150
  end
112
151
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
  require 'roo/excelx/extractor'
3
5
 
@@ -5,7 +7,7 @@ module Roo
5
7
  class Excelx
6
8
  class SheetDoc < Excelx::Extractor
7
9
  extend Forwardable
8
- delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
10
+ delegate [:workbook] => :@shared
9
11
 
10
12
  def initialize(path, relationships, shared, options = {})
11
13
  super(path)
@@ -19,7 +21,12 @@ module Roo
19
21
  end
20
22
 
21
23
  def hyperlinks(relationships)
22
- @hyperlinks ||= extract_hyperlinks(relationships)
24
+ # If you're sure you're not going to need this hyperlinks you can discard it
25
+ @hyperlinks ||= if @options[:no_hyperlinks]
26
+ {}
27
+ else
28
+ extract_hyperlinks(relationships)
29
+ end
23
30
  end
24
31
 
25
32
  # Get the dimensions for the sheet.
@@ -39,13 +46,10 @@ module Roo
39
46
  def each_cell(row_xml)
40
47
  return [] unless row_xml
41
48
  row_xml.children.each do |cell_element|
42
- # If you're sure you're not going to need this hyperlinks you can discard it
43
- hyperlinks = unless @options[:no_hyperlinks]
44
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
45
- hyperlinks(@relationships)[key]
46
- end
49
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
50
+ hyperlinks = hyperlinks(@relationships)[coordinate]
47
51
 
48
- yield cell_from_xml(cell_element, hyperlinks)
52
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
49
53
  end
50
54
  end
51
55
 
@@ -53,13 +57,13 @@ module Roo
53
57
 
54
58
  def cell_value_type(type, format)
55
59
  case type
56
- when 's'.freeze
60
+ when 's'
57
61
  :shared
58
- when 'b'.freeze
62
+ when 'b'
59
63
  :boolean
60
- when 'str'.freeze
64
+ when 'str'
61
65
  :string
62
- when 'inlineStr'.freeze
66
+ when 'inlineStr'
63
67
  :inlinestr
64
68
  else
65
69
  Excelx::Format.to_type(format)
@@ -74,42 +78,58 @@ module Roo
74
78
  # </c>
75
79
  # hyperlink - a String for the hyperlink for the cell or nil when no
76
80
  # hyperlink is present.
81
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
82
+ # or nil to extract coordinate from cell_xml.
83
+ # empty_cell - an Optional Boolean value.
77
84
  #
78
85
  # Examples
79
86
  #
80
- # cells_from_xml(<Nokogiri::XML::Element>, nil)
87
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
81
88
  # # => <Excelx::Cell::String>
82
89
  #
83
90
  # Returns a type of <Excelx::Cell>.
84
- def cell_from_xml(cell_xml, hyperlink)
85
- coordinate = extract_coordinate(cell_xml['r'])
86
- return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
91
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
92
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
93
+ cell_xml_children = cell_xml.children
94
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
87
95
 
88
96
  # NOTE: This is error prone, to_i will silently turn a nil into a 0.
89
97
  # This works by coincidence because Format[0] is General.
90
- style = cell_xml['s'].to_i
91
- format = styles.style_format(style)
92
- value_type = cell_value_type(cell_xml['t'], format)
98
+ style = cell_xml["s"].to_i
93
99
  formula = nil
94
100
 
95
- cell_xml.children.each do |cell|
101
+ cell_xml_children.each do |cell|
96
102
  case cell.name
97
103
  when 'is'
98
- content_arr = cell.search('t').map(&:content)
99
- unless content_arr.empty?
100
- return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
104
+ content = +""
105
+ cell.children.each do |inline_str|
106
+ if inline_str.name == 't'
107
+ content << inline_str.content
108
+ end
109
+ end
110
+ unless content.empty?
111
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
101
112
  end
102
113
  when 'f'
103
114
  formula = cell.content
104
115
  when 'v'
105
- return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
116
+ format = style_format(style)
117
+ value_type = cell_value_type(cell_xml["t"], format)
118
+
119
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
106
120
  end
107
121
  end
108
122
 
109
- Excelx::Cell::Empty.new(coordinate)
123
+ create_empty_cell(coordinate)
110
124
  end
111
125
 
112
- def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
126
+ def create_empty_cell(coordinate, empty_cell)
127
+ if empty_cell
128
+ Excelx::Cell::Empty.new(coordinate)
129
+ end
130
+ end
131
+
132
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
113
133
  # NOTE: format.to_s can replace excelx_type as an argument for
114
134
  # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
115
135
  # it will break some brittle tests.
@@ -125,11 +145,12 @@ module Roo
125
145
  # 3. formula
126
146
  case value_type
127
147
  when :shared
128
- value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
129
- Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
148
+ cell_content = cell.content.to_i
149
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
150
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
130
151
  when :boolean, :string
131
152
  value = cell.content
132
- Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
153
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
133
154
  when :time, :datetime
134
155
  cell_content = cell.content.to_f
135
156
  # NOTE: A date will be a whole number. A time will have be > 1. And
@@ -148,35 +169,32 @@ module Roo
148
169
  else
149
170
  :date
150
171
  end
151
- Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
172
+ base_value = cell_type == :date ? base_date : base_timestamp
173
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
152
174
  when :date
153
- Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
175
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
154
176
  else
155
- Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
177
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
156
178
  end
157
179
  end
158
180
 
159
- def extract_coordinate(coordinate)
160
- row, column = ::Roo::Utils.split_coordinate(coordinate)
161
-
162
- Excelx::Coordinate.new(row, column)
163
- end
164
-
165
181
  def extract_hyperlinks(relationships)
166
182
  return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
167
183
 
168
- Hash[hyperlinks.map do |hyperlink|
169
- if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
170
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
184
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
185
+ if relationship = relationships[hyperlink['id']]
186
+ target_link = relationship['Target']
187
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
188
+ hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link
171
189
  end
172
- end.compact]
190
+ end
173
191
  end
174
192
 
175
193
  def expand_merged_ranges(cells)
176
194
  # Extract merged ranges from xml
177
195
  merges = {}
178
196
  doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
179
- tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
197
+ tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
180
198
  for row in tl[0]..br[0] do
181
199
  for col in tl[1]..br[1] do
182
200
  next if row == tl[0] && col == tl[1]
@@ -191,10 +209,14 @@ module Roo
191
209
  end
192
210
 
193
211
  def extract_cells(relationships)
194
- extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
195
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
196
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
197
- end]
212
+ extracted_cells = {}
213
+ empty_cell = @options[:empty_cell]
214
+
215
+ doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml|
216
+ coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"])
217
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
218
+ extracted_cells[coordinate] = cell if cell
219
+ end
198
220
 
199
221
  expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
200
222
 
@@ -203,9 +225,25 @@ module Roo
203
225
 
204
226
  def extract_dimensions
205
227
  Roo::Utils.each_element(@path, 'dimension') do |dimension|
206
- return dimension.attributes['ref'].value
228
+ return dimension["ref"]
207
229
  end
208
230
  end
231
+
232
+ def style_format(style)
233
+ @shared.styles.style_format(style)
234
+ end
235
+
236
+ def base_date
237
+ @shared.base_date
238
+ end
239
+
240
+ def base_timestamp
241
+ @shared.base_timestamp
242
+ end
243
+
244
+ def shared_strings
245
+ @shared.shared_strings
246
+ end
209
247
  end
210
248
  end
211
249
  end
@@ -55,9 +55,9 @@ module Roo
55
55
  end
56
56
 
57
57
  def extract_num_fmts
58
- Hash[doc.xpath('//numFmt').map do |num_fmt|
59
- [num_fmt['numFmtId'], num_fmt['formatCode']]
60
- end]
58
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
59
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
60
+ end
61
61
  end
62
62
  end
63
63
  end
@@ -29,13 +29,17 @@ module Roo
29
29
 
30
30
  # aka labels
31
31
  def defined_names
32
- Hash[doc.xpath('//definedName').map do |defined_name|
32
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
33
33
  # "Sheet1!$C$5"
34
34
  sheet, coordinates = defined_name.text.split('!$', 2)
35
35
  col, row = coordinates.split('$')
36
36
  name = defined_name['name']
37
- [name, Label.new(name, sheet, row, col)]
38
- end]
37
+ hash[name] = Label.new(name, sheet, row, col)
38
+ end
39
+ end
40
+
41
+ def base_timestamp
42
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
39
43
  end
40
44
 
41
45
  def base_date
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Roo
4
+ module Helpers
5
+ module DefaultAttrReader
6
+ def attr_reader_with_default(attr_hash)
7
+ attr_hash.each do |attr_name, default_value|
8
+ instance_variable = :"@#{attr_name}"
9
+ define_method attr_name do
10
+ if instance_variable_defined? instance_variable
11
+ instance_variable_get instance_variable
12
+ else
13
+ default_value
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "weakref"
4
+
5
+ module Roo
6
+ module Helpers
7
+ module WeakInstanceCache
8
+ private
9
+
10
+ def instance_cache(key)
11
+ object = nil
12
+
13
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && ref.weakref_alive?
14
+ begin
15
+ object = ref.__getobj__
16
+ rescue => e
17
+ unless (defined?(::WeakRef::RefError) && e.is_a?(::WeakRef::RefError)) || (defined?(RefError) && e.is_a?(RefError))
18
+ raise e
19
+ end
20
+ end
21
+ end
22
+
23
+ unless object
24
+ object = yield
25
+ ObjectSpace.define_finalizer(object, instance_cache_finalizer(key))
26
+ instance_variable_set(key, WeakRef.new(object))
27
+ end
28
+
29
+ object
30
+ end
31
+
32
+ def instance_cache_finalizer(key)
33
+ proc do |object_id|
34
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && (!ref.weakref_alive? || ref.__getobj__.object_id == object_id)
35
+ remove_instance_variable(key)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
  require 'nokogiri'
3
5
  require 'cgi'
@@ -11,9 +13,9 @@ module Roo
11
13
  class OpenOffice < Roo::Base
12
14
  extend Roo::Tempdir
13
15
 
14
- ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
15
- XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
16
- XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
16
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'
17
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']"
18
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']"
17
19
 
18
20
  # initialization and opening of a spreadsheet file
19
21
  # values for packed: :zip
@@ -561,7 +563,7 @@ module Roo
561
563
  end
562
564
 
563
565
  def read_labels
564
- @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
566
+ @label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash|
565
567
  #-
566
568
  # $Sheet1.$C$5
567
569
  #+
@@ -569,8 +571,8 @@ module Roo
569
571
  sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
570
572
  col, row = coords.split('$')
571
573
  sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
572
- [name, [sheetname, row, col]]
573
- end]
574
+ hash[name] = [sheetname, row, col]
575
+ end
574
576
  end
575
577
 
576
578
  def read_styles(style_elements)