roo 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
- ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
3
- ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
4
- ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
4
+ ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
5
+ ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
6
+ ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
5
7
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "csv"
2
4
  require "time"
3
5
 
@@ -63,25 +65,31 @@ module Roo
63
65
  def read_cells(sheet = default_sheet)
64
66
  sheet ||= default_sheet
65
67
  return if @cells_read[sheet]
66
- set_row_count(sheet)
67
- set_column_count(sheet)
68
- row_num = 1
68
+ row_num = 0
69
+ max_col_num = 0
69
70
 
70
71
  each_row csv_options do |row|
71
- row.each_with_index do |elem, col_num|
72
- coordinate = [row_num, col_num + 1]
72
+ row_num += 1
73
+ col_num = 0
74
+
75
+ row.each do |elem|
76
+ col_num += 1
77
+ coordinate = [row_num, col_num]
73
78
  @cell[coordinate] = elem
74
79
  @cell_type[coordinate] = celltype_class(elem)
75
80
  end
76
- row_num += 1
81
+
82
+ max_col_num = col_num if col_num > max_col_num
77
83
  end
78
84
 
85
+ set_row_count(sheet, row_num)
86
+ set_column_count(sheet, max_col_num)
79
87
  @cells_read[sheet] = true
80
88
  end
81
89
 
82
90
  def each_row(options, &block)
83
91
  if uri?(filename)
84
- each_row_using_temp_dir(filename)
92
+ each_row_using_tempdir(options, &block)
85
93
  elsif is_stream?(filename_or_stream)
86
94
  ::CSV.new(filename_or_stream, options).each(&block)
87
95
  else
@@ -89,24 +97,24 @@ module Roo
89
97
  end
90
98
  end
91
99
 
92
- def each_row_using_tempdir
100
+ def each_row_using_tempdir(options, &block)
93
101
  ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir|
94
102
  tmp_filename = download_uri(filename, tmpdir)
95
103
  ::CSV.foreach(tmp_filename, options, &block)
96
104
  end
97
105
  end
98
106
 
99
- def set_row_count(sheet)
107
+ def set_row_count(sheet, last_row)
100
108
  @first_row[sheet] = 1
101
- @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size
109
+ @last_row[sheet] = last_row
102
110
  @last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero?
103
111
 
104
112
  nil
105
113
  end
106
114
 
107
- def set_column_count(sheet)
115
+ def set_column_count(sheet, last_col)
108
116
  @first_column[sheet] = 1
109
- @last_column[sheet] = (::CSV.readlines(@filename, csv_options).first || []).size
117
+ @last_column[sheet] = last_col
110
118
  @last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero?
111
119
 
112
120
  nil
@@ -24,8 +24,9 @@ module Roo
24
24
  require 'roo/excelx/sheet_doc'
25
25
  require 'roo/excelx/coordinate'
26
26
  require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
27
28
 
28
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
29
30
  ExceedsMaxError = Class.new(StandardError)
30
31
 
31
32
  # initialization and opening of a spreadsheet file
@@ -39,7 +40,10 @@ module Roo
39
40
  sheet_options = {}
40
41
  sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
41
42
  sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
42
45
 
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
43
47
  unless is_stream?(filename_or_stream)
44
48
  file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
45
49
  basename = find_basename(filename_or_stream)
@@ -52,7 +56,7 @@ module Roo
52
56
  @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
53
57
  ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
54
58
 
55
- @shared = Shared.new(@tmpdir)
59
+ @shared = Shared.new(@tmpdir, shared_options)
56
60
  @filename = local_filename(filename_or_stream, @tmpdir, packed)
57
61
  process_zipfile(@filename || filename_or_stream)
58
62
 
@@ -62,10 +66,10 @@ module Roo
62
66
  end
63
67
  end.compact
64
68
  @sheets = []
65
- @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
66
- @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
67
- [sheet_name, @sheets[n]]
68
- end]
69
+ @sheets_by_name = {}
70
+ @sheet_names.each_with_index do |sheet_name, n|
71
+ @sheets_by_name[sheet_name] = @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
72
+ end
69
73
 
70
74
  if cell_max
71
75
  cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
@@ -94,7 +98,12 @@ module Roo
94
98
  def sheet_for(sheet)
95
99
  sheet ||= default_sheet
96
100
  validate_sheet!(sheet)
97
- @sheets_by_name[sheet]
101
+ @sheets_by_name[sheet] || @sheets[sheet]
102
+ end
103
+
104
+ def images(sheet = nil)
105
+ images_names = sheet_for(sheet).images.map(&:last)
106
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
98
107
  end
99
108
 
100
109
  # Returns the content of a spreadsheet-cell.
@@ -325,7 +334,7 @@ module Roo
325
334
 
326
335
  wb.extract(path)
327
336
  workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
328
- workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
337
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
329
338
  end
330
339
 
331
340
  # Internal
@@ -349,17 +358,13 @@ module Roo
349
358
 
350
359
  wb_rels.extract(path)
351
360
  rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
352
- worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
353
361
 
354
362
  relationships = rels_doc.xpath('//Relationship').select do |relationship|
355
- relationship.attributes['Type'].value == worksheet_type
363
+ worksheet_types.include? relationship['Type']
356
364
  end
357
365
 
358
- relationships.inject({}) do |hash, relationship|
359
- attributes = relationship.attributes
360
- id = attributes['Id']
361
- hash[id.value] = attributes['Target'].value
362
- hash
366
+ relationships.each_with_object({}) do |relationship, hash|
367
+ hash[relationship['Id']] = relationship['Target']
363
368
  end
364
369
  end
365
370
 
@@ -376,6 +381,15 @@ module Roo
376
381
  end
377
382
  end
378
383
 
384
+ def extract_images(entries, tmpdir)
385
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
386
+ img_entries.each do |entry|
387
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
388
+ image_files << path
389
+ entry.extract(path)
390
+ end
391
+ end
392
+
379
393
  # Extracts all needed files from the zip file
380
394
  def process_zipfile(zipfilename_or_stream)
381
395
  @sheet_files = []
@@ -409,6 +423,7 @@ module Roo
409
423
  sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
410
424
  sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
411
425
  extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
426
+ extract_images(entries, @tmpdir)
412
427
 
413
428
  entries.each do |entry|
414
429
  path =
@@ -435,6 +450,10 @@ module Roo
435
450
  # drawings, etc.
436
451
  nr = Regexp.last_match[1].to_i
437
452
  rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
453
+ when /drawing([0-9]+).xml.rels$/
454
+ # Extracting drawing relationships to make images lists for each sheet
455
+ nr = Regexp.last_match[1].to_i
456
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
438
457
  end
439
458
 
440
459
  entry.extract(path) if path
@@ -442,7 +461,14 @@ module Roo
442
461
  end
443
462
 
444
463
  def safe_send(object, method, *args)
445
- object.send(method, *args) if object && object.respond_to?(method)
464
+ object.send(method, *args) if object&.respond_to?(method)
465
+ end
466
+
467
+ def worksheet_types
468
+ [
469
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
470
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
471
+ ]
446
472
  end
447
473
  end
448
474
  end
@@ -40,19 +40,23 @@ module Roo
40
40
  end
41
41
 
42
42
  def self.create_cell(type, *values)
43
+ cell_class(type)&.new(*values)
44
+ end
45
+
46
+ def self.cell_class(type)
43
47
  case type
44
48
  when :string
45
- Cell::String.new(*values)
49
+ Cell::String
46
50
  when :boolean
47
- Cell::Boolean.new(*values)
51
+ Cell::Boolean
48
52
  when :number
49
- Cell::Number.new(*values)
53
+ Cell::Number
50
54
  when :date
51
- Cell::Date.new(*values)
55
+ Cell::Date
52
56
  when :datetime
53
- Cell::DateTime.new(*values)
57
+ Cell::DateTime
54
58
  when :time
55
- Cell::Time.new(*values)
59
+ Cell::Time
56
60
  end
57
61
  end
58
62
 
@@ -1,13 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "roo/helpers/default_attr_reader"
4
+
1
5
  module Roo
2
6
  class Excelx
3
7
  class Cell
4
8
  class Base
9
+ extend Roo::Helpers::DefaultAttrReader
5
10
  attr_reader :cell_type, :cell_value, :value
6
11
 
7
12
  # FIXME: I think style should be deprecated. Having a style attribute
8
13
  # for a cell doesn't really accomplish much. It seems to be used
9
14
  # when you want to export to excelx.
10
- attr_reader :style
15
+ attr_reader_with_default default_type: :base, style: 1
11
16
 
12
17
 
13
18
  # FIXME: Updating a cell's value should be able tochange the cell's type,
@@ -34,14 +39,12 @@ module Roo
34
39
  attr_writer :value
35
40
 
36
41
  def initialize(value, formula, excelx_type, style, link, coordinate)
37
- @link = !!link
38
42
  @cell_value = value
39
- @cell_type = excelx_type
40
- @formula = formula
41
- @style = style
43
+ @cell_type = excelx_type if excelx_type
44
+ @formula = formula if formula
45
+ @style = style unless style == 1
42
46
  @coordinate = coordinate
43
- @type = :base
44
- @value = link? ? Roo::Link.new(link, value) : value
47
+ @value = link ? Roo::Link.new(link, value) : value
45
48
  end
46
49
 
47
50
  def type
@@ -50,16 +53,16 @@ module Roo
50
53
  elsif link?
51
54
  :link
52
55
  else
53
- @type
56
+ default_type
54
57
  end
55
58
  end
56
59
 
57
60
  def formula?
58
- !!@formula
61
+ !!(defined?(@formula) && @formula)
59
62
  end
60
63
 
61
64
  def link?
62
- !!@link
65
+ Roo::Link === @value
63
66
  end
64
67
 
65
68
  alias_method :formatted_value, :value
@@ -68,9 +71,16 @@ module Roo
68
71
  formatted_value
69
72
  end
70
73
 
71
- # DEPRECATED: Please use link instead.
74
+ # DEPRECATED: Please use link? instead.
72
75
  def hyperlink
73
- warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.'
76
+ warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.'
77
+ link?
78
+ end
79
+
80
+ # DEPRECATED: Please use link? instead.
81
+ def link
82
+ warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.'
83
+ link?
74
84
  end
75
85
 
76
86
  # DEPRECATED: Please use cell_value instead.
@@ -88,6 +98,10 @@ module Roo
88
98
  def empty?
89
99
  false
90
100
  end
101
+
102
+ def presence
103
+ empty? ? nil : self
104
+ end
91
105
  end
92
106
  end
93
107
  end
@@ -1,17 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  class Cell
4
6
  class Boolean < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :boolean, cell_type: :boolean
6
10
 
7
11
  def initialize(value, formula, style, link, coordinate)
8
- super(value, formula, nil, style, link, coordinate)
9
- @type = @cell_type = :boolean
10
- @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
12
+ super(value, formula, nil, style, nil, coordinate)
13
+ @value = link ? Roo::Link.new(link, value) : create_boolean(value)
11
14
  end
12
15
 
13
16
  def formatted_value
14
- value ? 'TRUE'.freeze : 'FALSE'.freeze
17
+ value ? 'TRUE' : 'FALSE'
15
18
  end
16
19
 
17
20
  private
@@ -19,7 +22,7 @@ module Roo
19
22
  def create_boolean(value)
20
23
  # FIXME: Using a boolean will cause methods like Base#to_csv to fail.
21
24
  # Roo is using some method to ignore false/nil values.
22
- value.to_i == 1 ? true : false
25
+ value.to_i == 1
23
26
  end
24
27
  end
25
28
  end
@@ -4,23 +4,23 @@ module Roo
4
4
  class Excelx
5
5
  class Cell
6
6
  class Date < Roo::Excelx::Cell::DateTime
7
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :date
8
10
 
9
11
  def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
12
  # NOTE: Pass all arguments to the parent class, DateTime.
11
13
  super
12
- @type = :date
13
14
  @format = excelx_type.last
14
- @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
15
+ @value = link ? Roo::Link.new(link, value) : create_date(base_date, value)
15
16
  end
16
17
 
17
18
  private
18
19
 
19
- def create_date(base_date, value)
20
- date = base_date + value.to_i
21
- yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
20
+ def create_datetime(_,_); end
22
21
 
23
- ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
22
+ def create_date(base_date, value)
23
+ base_date + value.to_i
24
24
  end
25
25
  end
26
26
  end
@@ -1,16 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
 
3
5
  module Roo
4
6
  class Excelx
5
7
  class Cell
6
8
  class DateTime < Cell::Base
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
9
+ SECONDS_IN_DAY = 60 * 60 * 24
10
+
11
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
12
 
9
- def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
- super(value, formula, excelx_type, style, link, coordinate)
11
- @type = :datetime
13
+ attr_reader_with_default default_type: :datetime
14
+
15
+ def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate)
16
+ super(value, formula, excelx_type, style, nil, coordinate)
12
17
  @format = excelx_type.last
13
- @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
18
+ @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value)
14
19
  end
15
20
 
16
21
  # Public: Returns formatted value for a datetime. Format's can be an
@@ -78,7 +83,7 @@ module Roo
78
83
 
79
84
  TIME_FORMATS = {
80
85
  'hh' => '%H', # Hour (24): 01
81
- 'h' => '%-k'.freeze, # Hour (24): 1
86
+ 'h' => '%-k', # Hour (24): 1
82
87
  # 'hh'.freeze => '%I'.freeze, # Hour (12): 08
83
88
  # 'h'.freeze => '%-l'.freeze, # Hour (12): 8
84
89
  'mm' => '%M', # Minute: 01
@@ -92,18 +97,9 @@ module Roo
92
97
  '0' => '%1N' # Fractional Seconds: tenths.
93
98
  }
94
99
 
95
- def create_datetime(base_date, value)
96
- date = base_date + value.to_f.round(6)
97
- datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
98
- t = round_datetime(datetime_string)
99
-
100
- ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
101
- end
102
-
103
- def round_datetime(datetime_string)
104
- /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
105
-
106
- ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
100
+ def create_datetime(base_timestamp, value)
101
+ timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0)
102
+ ::Time.at(timestamp).utc.to_datetime
107
103
  end
108
104
  end
109
105
  end