roo 2.7.1 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -4,11 +4,15 @@ module Roo
4
4
  class Sheet
5
5
  extend Forwardable
6
6
 
7
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
7
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
8
+
9
+ attr_reader :images
8
10
 
9
11
  def initialize(name, shared, sheet_index, options = {})
10
12
  @name = name
11
13
  @shared = shared
14
+ @sheet_index = sheet_index
15
+ @images = Images.new(image_rels[sheet_index]).list
12
16
  @rels = Relationships.new(rels_files[sheet_index])
13
17
  @comments = Comments.new(comments_files[sheet_index])
14
18
  @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
@@ -19,7 +23,14 @@ module Roo
19
23
  end
20
24
 
21
25
  def present_cells
22
- @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
26
+ @present_cells ||= begin
27
+ warn %{
28
+ [DEPRECATION] present_cells is deprecated. Alternate:
29
+ with activesupport => cells[key].presence
30
+ without activesupport => cells[key]&.presence
31
+ }
32
+ cells.select { |_, cell| cell&.presence }
33
+ end
23
34
  end
24
35
 
25
36
  # Yield each row as array of Excelx::Cell objects
@@ -39,33 +50,33 @@ module Roo
39
50
 
40
51
  def row(row_number)
41
52
  first_column.upto(last_column).map do |col|
42
- cells[[row_number, col]]
43
- end.map { |cell| cell && cell.value }
53
+ cells[[row_number, col]]&.value
54
+ end
44
55
  end
45
56
 
46
57
  def column(col_number)
47
58
  first_row.upto(last_row).map do |row|
48
- cells[[row, col_number]]
49
- end.map { |cell| cell && cell.value }
59
+ cells[[row, col_number]]&.value
60
+ end
50
61
  end
51
62
 
52
63
  # returns the number of the first non-empty row
53
64
  def first_row
54
- @first_row ||= present_cells.keys.map { |row, _| row }.min
65
+ @first_row ||= first_last_row_col[:first_row]
55
66
  end
56
67
 
57
68
  def last_row
58
- @last_row ||= present_cells.keys.map { |row, _| row }.max
69
+ @last_row ||= first_last_row_col[:last_row]
59
70
  end
60
71
 
61
72
  # returns the number of the first non-empty column
62
73
  def first_column
63
- @first_column ||= present_cells.keys.map { |_, col| col }.min
74
+ @first_column ||= first_last_row_col[:first_column]
64
75
  end
65
76
 
66
77
  # returns the number of the last non-empty column
67
78
  def last_column
68
- @last_column ||= present_cells.keys.map { |_, col| col }.max
79
+ @last_column ||= first_last_row_col[:last_column]
69
80
  end
70
81
 
71
82
  def excelx_format(key)
@@ -107,6 +118,34 @@ module Roo
107
118
  (cell.coordinate.column - 1 - last_column).times { pad << nil }
108
119
  pad
109
120
  end
121
+
122
+ def first_last_row_col
123
+ @first_last_row_col ||= begin
124
+ first_row = last_row = first_col = last_col = nil
125
+
126
+ cells.each do |(row, col), cell|
127
+ next unless cell&.presence
128
+ first_row ||= row
129
+ last_row ||= row
130
+ first_col ||= col
131
+ last_col ||= col
132
+
133
+ if row > last_row
134
+ last_row = row
135
+ elsif row < first_row
136
+ first_row = row
137
+ end
138
+
139
+ if col > last_col
140
+ last_col = col
141
+ elsif col < first_col
142
+ first_col = col
143
+ end
144
+ end
145
+
146
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
147
+ end
148
+ end
110
149
  end
111
150
  end
112
151
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
  require 'roo/excelx/extractor'
3
5
 
@@ -5,7 +7,7 @@ module Roo
5
7
  class Excelx
6
8
  class SheetDoc < Excelx::Extractor
7
9
  extend Forwardable
8
- delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
10
+ delegate [:workbook] => :@shared
9
11
 
10
12
  def initialize(path, relationships, shared, options = {})
11
13
  super(path)
@@ -19,7 +21,12 @@ module Roo
19
21
  end
20
22
 
21
23
  def hyperlinks(relationships)
22
- @hyperlinks ||= extract_hyperlinks(relationships)
24
+ # If you're sure you're not going to need this hyperlinks you can discard it
25
+ @hyperlinks ||= if @options[:no_hyperlinks]
26
+ {}
27
+ else
28
+ extract_hyperlinks(relationships)
29
+ end
23
30
  end
24
31
 
25
32
  # Get the dimensions for the sheet.
@@ -39,13 +46,10 @@ module Roo
39
46
  def each_cell(row_xml)
40
47
  return [] unless row_xml
41
48
  row_xml.children.each do |cell_element|
42
- # If you're sure you're not going to need this hyperlinks you can discard it
43
- hyperlinks = unless @options[:no_hyperlinks]
44
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
45
- hyperlinks(@relationships)[key]
46
- end
49
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
50
+ hyperlinks = hyperlinks(@relationships)[coordinate]
47
51
 
48
- yield cell_from_xml(cell_element, hyperlinks)
52
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
49
53
  end
50
54
  end
51
55
 
@@ -53,13 +57,13 @@ module Roo
53
57
 
54
58
  def cell_value_type(type, format)
55
59
  case type
56
- when 's'.freeze
60
+ when 's'
57
61
  :shared
58
- when 'b'.freeze
62
+ when 'b'
59
63
  :boolean
60
- when 'str'.freeze
64
+ when 'str'
61
65
  :string
62
- when 'inlineStr'.freeze
66
+ when 'inlineStr'
63
67
  :inlinestr
64
68
  else
65
69
  Excelx::Format.to_type(format)
@@ -74,42 +78,58 @@ module Roo
74
78
  # </c>
75
79
  # hyperlink - a String for the hyperlink for the cell or nil when no
76
80
  # hyperlink is present.
81
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
82
+ # or nil to extract coordinate from cell_xml.
83
+ # empty_cell - an Optional Boolean value.
77
84
  #
78
85
  # Examples
79
86
  #
80
- # cells_from_xml(<Nokogiri::XML::Element>, nil)
87
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
81
88
  # # => <Excelx::Cell::String>
82
89
  #
83
90
  # Returns a type of <Excelx::Cell>.
84
- def cell_from_xml(cell_xml, hyperlink)
85
- coordinate = extract_coordinate(cell_xml['r'])
86
- return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
91
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
92
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
93
+ cell_xml_children = cell_xml.children
94
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
87
95
 
88
96
  # NOTE: This is error prone, to_i will silently turn a nil into a 0.
89
97
  # This works by coincidence because Format[0] is General.
90
- style = cell_xml['s'].to_i
91
- format = styles.style_format(style)
92
- value_type = cell_value_type(cell_xml['t'], format)
98
+ style = cell_xml["s"].to_i
93
99
  formula = nil
94
100
 
95
- cell_xml.children.each do |cell|
101
+ cell_xml_children.each do |cell|
96
102
  case cell.name
97
103
  when 'is'
98
- content_arr = cell.search('t').map(&:content)
99
- unless content_arr.empty?
100
- return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
104
+ content = +""
105
+ cell.children.each do |inline_str|
106
+ if inline_str.name == 't'
107
+ content << inline_str.content
108
+ end
109
+ end
110
+ unless content.empty?
111
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
101
112
  end
102
113
  when 'f'
103
114
  formula = cell.content
104
115
  when 'v'
105
- return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
116
+ format = style_format(style)
117
+ value_type = cell_value_type(cell_xml["t"], format)
118
+
119
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
106
120
  end
107
121
  end
108
122
 
109
- Excelx::Cell::Empty.new(coordinate)
123
+ create_empty_cell(coordinate)
110
124
  end
111
125
 
112
- def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
126
+ def create_empty_cell(coordinate, empty_cell)
127
+ if empty_cell
128
+ Excelx::Cell::Empty.new(coordinate)
129
+ end
130
+ end
131
+
132
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
113
133
  # NOTE: format.to_s can replace excelx_type as an argument for
114
134
  # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
115
135
  # it will break some brittle tests.
@@ -125,11 +145,12 @@ module Roo
125
145
  # 3. formula
126
146
  case value_type
127
147
  when :shared
128
- value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
129
- Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
148
+ cell_content = cell.content.to_i
149
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
150
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
130
151
  when :boolean, :string
131
152
  value = cell.content
132
- Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
153
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
133
154
  when :time, :datetime
134
155
  cell_content = cell.content.to_f
135
156
  # NOTE: A date will be a whole number. A time will have be > 1. And
@@ -148,35 +169,32 @@ module Roo
148
169
  else
149
170
  :date
150
171
  end
151
- Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
172
+ base_value = cell_type == :date ? base_date : base_timestamp
173
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
152
174
  when :date
153
- Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
175
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
154
176
  else
155
- Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
177
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
156
178
  end
157
179
  end
158
180
 
159
- def extract_coordinate(coordinate)
160
- row, column = ::Roo::Utils.split_coordinate(coordinate)
161
-
162
- Excelx::Coordinate.new(row, column)
163
- end
164
-
165
181
  def extract_hyperlinks(relationships)
166
182
  return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
167
183
 
168
- Hash[hyperlinks.map do |hyperlink|
169
- if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
170
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
184
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
185
+ if relationship = relationships[hyperlink['id']]
186
+ target_link = relationship['Target']
187
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
188
+ hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link
171
189
  end
172
- end.compact]
190
+ end
173
191
  end
174
192
 
175
193
  def expand_merged_ranges(cells)
176
194
  # Extract merged ranges from xml
177
195
  merges = {}
178
196
  doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
179
- tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
197
+ tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
180
198
  for row in tl[0]..br[0] do
181
199
  for col in tl[1]..br[1] do
182
200
  next if row == tl[0] && col == tl[1]
@@ -191,10 +209,14 @@ module Roo
191
209
  end
192
210
 
193
211
  def extract_cells(relationships)
194
- extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
195
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
196
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
197
- end]
212
+ extracted_cells = {}
213
+ empty_cell = @options[:empty_cell]
214
+
215
+ doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml|
216
+ coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"])
217
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
218
+ extracted_cells[coordinate] = cell if cell
219
+ end
198
220
 
199
221
  expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
200
222
 
@@ -203,9 +225,25 @@ module Roo
203
225
 
204
226
  def extract_dimensions
205
227
  Roo::Utils.each_element(@path, 'dimension') do |dimension|
206
- return dimension.attributes['ref'].value
228
+ return dimension["ref"]
207
229
  end
208
230
  end
231
+
232
+ def style_format(style)
233
+ @shared.styles.style_format(style)
234
+ end
235
+
236
+ def base_date
237
+ @shared.base_date
238
+ end
239
+
240
+ def base_timestamp
241
+ @shared.base_timestamp
242
+ end
243
+
244
+ def shared_strings
245
+ @shared.shared_strings
246
+ end
209
247
  end
210
248
  end
211
249
  end
@@ -55,9 +55,9 @@ module Roo
55
55
  end
56
56
 
57
57
  def extract_num_fmts
58
- Hash[doc.xpath('//numFmt').map do |num_fmt|
59
- [num_fmt['numFmtId'], num_fmt['formatCode']]
60
- end]
58
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
59
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
60
+ end
61
61
  end
62
62
  end
63
63
  end
@@ -29,13 +29,17 @@ module Roo
29
29
 
30
30
  # aka labels
31
31
  def defined_names
32
- Hash[doc.xpath('//definedName').map do |defined_name|
32
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
33
33
  # "Sheet1!$C$5"
34
34
  sheet, coordinates = defined_name.text.split('!$', 2)
35
35
  col, row = coordinates.split('$')
36
36
  name = defined_name['name']
37
- [name, Label.new(name, sheet, row, col)]
38
- end]
37
+ hash[name] = Label.new(name, sheet, row, col)
38
+ end
39
+ end
40
+
41
+ def base_timestamp
42
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
39
43
  end
40
44
 
41
45
  def base_date
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Roo
4
+ module Helpers
5
+ module DefaultAttrReader
6
+ def attr_reader_with_default(attr_hash)
7
+ attr_hash.each do |attr_name, default_value|
8
+ instance_variable = :"@#{attr_name}"
9
+ define_method attr_name do
10
+ if instance_variable_defined? instance_variable
11
+ instance_variable_get instance_variable
12
+ else
13
+ default_value
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "weakref"
4
+
5
+ module Roo
6
+ module Helpers
7
+ module WeakInstanceCache
8
+ private
9
+
10
+ def instance_cache(key)
11
+ object = nil
12
+
13
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && ref.weakref_alive?
14
+ begin
15
+ object = ref.__getobj__
16
+ rescue => e
17
+ unless (defined?(::WeakRef::RefError) && e.is_a?(::WeakRef::RefError)) || (defined?(RefError) && e.is_a?(RefError))
18
+ raise e
19
+ end
20
+ end
21
+ end
22
+
23
+ unless object
24
+ object = yield
25
+ ObjectSpace.define_finalizer(object, instance_cache_finalizer(key))
26
+ instance_variable_set(key, WeakRef.new(object))
27
+ end
28
+
29
+ object
30
+ end
31
+
32
+ def instance_cache_finalizer(key)
33
+ proc do |object_id|
34
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && (!ref.weakref_alive? || ref.__getobj__.object_id == object_id)
35
+ remove_instance_variable(key)
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
  require 'nokogiri'
3
5
  require 'cgi'
@@ -11,9 +13,9 @@ module Roo
11
13
  class OpenOffice < Roo::Base
12
14
  extend Roo::Tempdir
13
15
 
14
- ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
15
- XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
16
- XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
16
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'
17
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']"
18
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']"
17
19
 
18
20
  # initialization and opening of a spreadsheet file
19
21
  # values for packed: :zip
@@ -561,7 +563,7 @@ module Roo
561
563
  end
562
564
 
563
565
  def read_labels
564
- @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
566
+ @label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash|
565
567
  #-
566
568
  # $Sheet1.$C$5
567
569
  #+
@@ -569,8 +571,8 @@ module Roo
569
571
  sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
570
572
  col, row = coords.split('$')
571
573
  sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
572
- [name, [sheetname, row, col]]
573
- end]
574
+ hash[name] = [sheetname, row, col]
575
+ end
574
576
  end
575
577
 
576
578
  def read_styles(style_elements)