roo 2.7.1 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -1,5 +1,7 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
- ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
3
- ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
4
- ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
4
+ ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
5
+ ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
6
+ ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
5
7
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require "csv"
2
4
  require "time"
3
5
 
@@ -63,25 +65,31 @@ module Roo
63
65
  def read_cells(sheet = default_sheet)
64
66
  sheet ||= default_sheet
65
67
  return if @cells_read[sheet]
66
- set_row_count(sheet)
67
- set_column_count(sheet)
68
- row_num = 1
68
+ row_num = 0
69
+ max_col_num = 0
69
70
 
70
71
  each_row csv_options do |row|
71
- row.each_with_index do |elem, col_num|
72
- coordinate = [row_num, col_num + 1]
72
+ row_num += 1
73
+ col_num = 0
74
+
75
+ row.each do |elem|
76
+ col_num += 1
77
+ coordinate = [row_num, col_num]
73
78
  @cell[coordinate] = elem
74
79
  @cell_type[coordinate] = celltype_class(elem)
75
80
  end
76
- row_num += 1
81
+
82
+ max_col_num = col_num if col_num > max_col_num
77
83
  end
78
84
 
85
+ set_row_count(sheet, row_num)
86
+ set_column_count(sheet, max_col_num)
79
87
  @cells_read[sheet] = true
80
88
  end
81
89
 
82
90
  def each_row(options, &block)
83
91
  if uri?(filename)
84
- each_row_using_temp_dir(filename)
92
+ each_row_using_tempdir(options, &block)
85
93
  elsif is_stream?(filename_or_stream)
86
94
  ::CSV.new(filename_or_stream, options).each(&block)
87
95
  else
@@ -89,24 +97,24 @@ module Roo
89
97
  end
90
98
  end
91
99
 
92
- def each_row_using_tempdir
100
+ def each_row_using_tempdir(options, &block)
93
101
  ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir|
94
102
  tmp_filename = download_uri(filename, tmpdir)
95
103
  ::CSV.foreach(tmp_filename, options, &block)
96
104
  end
97
105
  end
98
106
 
99
- def set_row_count(sheet)
107
+ def set_row_count(sheet, last_row)
100
108
  @first_row[sheet] = 1
101
- @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size
109
+ @last_row[sheet] = last_row
102
110
  @last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero?
103
111
 
104
112
  nil
105
113
  end
106
114
 
107
- def set_column_count(sheet)
115
+ def set_column_count(sheet, last_col)
108
116
  @first_column[sheet] = 1
109
- @last_column[sheet] = (::CSV.readlines(@filename, csv_options).first || []).size
117
+ @last_column[sheet] = last_col
110
118
  @last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero?
111
119
 
112
120
  nil
@@ -24,8 +24,9 @@ module Roo
24
24
  require 'roo/excelx/sheet_doc'
25
25
  require 'roo/excelx/coordinate'
26
26
  require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
27
28
 
28
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
29
30
  ExceedsMaxError = Class.new(StandardError)
30
31
 
31
32
  # initialization and opening of a spreadsheet file
@@ -39,7 +40,10 @@ module Roo
39
40
  sheet_options = {}
40
41
  sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
41
42
  sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
42
45
 
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
43
47
  unless is_stream?(filename_or_stream)
44
48
  file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
45
49
  basename = find_basename(filename_or_stream)
@@ -52,7 +56,7 @@ module Roo
52
56
  @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
53
57
  ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
54
58
 
55
- @shared = Shared.new(@tmpdir)
59
+ @shared = Shared.new(@tmpdir, shared_options)
56
60
  @filename = local_filename(filename_or_stream, @tmpdir, packed)
57
61
  process_zipfile(@filename || filename_or_stream)
58
62
 
@@ -62,10 +66,10 @@ module Roo
62
66
  end
63
67
  end.compact
64
68
  @sheets = []
65
- @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
66
- @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
67
- [sheet_name, @sheets[n]]
68
- end]
69
+ @sheets_by_name = {}
70
+ @sheet_names.each_with_index do |sheet_name, n|
71
+ @sheets_by_name[sheet_name] = @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
72
+ end
69
73
 
70
74
  if cell_max
71
75
  cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
@@ -94,7 +98,12 @@ module Roo
94
98
  def sheet_for(sheet)
95
99
  sheet ||= default_sheet
96
100
  validate_sheet!(sheet)
97
- @sheets_by_name[sheet]
101
+ @sheets_by_name[sheet] || @sheets[sheet]
102
+ end
103
+
104
+ def images(sheet = nil)
105
+ images_names = sheet_for(sheet).images.map(&:last)
106
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
98
107
  end
99
108
 
100
109
  # Returns the content of a spreadsheet-cell.
@@ -325,7 +334,7 @@ module Roo
325
334
 
326
335
  wb.extract(path)
327
336
  workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
328
- workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
337
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
329
338
  end
330
339
 
331
340
  # Internal
@@ -349,17 +358,13 @@ module Roo
349
358
 
350
359
  wb_rels.extract(path)
351
360
  rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
352
- worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
353
361
 
354
362
  relationships = rels_doc.xpath('//Relationship').select do |relationship|
355
- relationship.attributes['Type'].value == worksheet_type
363
+ worksheet_types.include? relationship['Type']
356
364
  end
357
365
 
358
- relationships.inject({}) do |hash, relationship|
359
- attributes = relationship.attributes
360
- id = attributes['Id']
361
- hash[id.value] = attributes['Target'].value
362
- hash
366
+ relationships.each_with_object({}) do |relationship, hash|
367
+ hash[relationship['Id']] = relationship['Target']
363
368
  end
364
369
  end
365
370
 
@@ -376,6 +381,15 @@ module Roo
376
381
  end
377
382
  end
378
383
 
384
+ def extract_images(entries, tmpdir)
385
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
386
+ img_entries.each do |entry|
387
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
388
+ image_files << path
389
+ entry.extract(path)
390
+ end
391
+ end
392
+
379
393
  # Extracts all needed files from the zip file
380
394
  def process_zipfile(zipfilename_or_stream)
381
395
  @sheet_files = []
@@ -409,6 +423,7 @@ module Roo
409
423
  sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
410
424
  sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
411
425
  extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
426
+ extract_images(entries, @tmpdir)
412
427
 
413
428
  entries.each do |entry|
414
429
  path =
@@ -435,6 +450,10 @@ module Roo
435
450
  # drawings, etc.
436
451
  nr = Regexp.last_match[1].to_i
437
452
  rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
453
+ when /drawing([0-9]+).xml.rels$/
454
+ # Extracting drawing relationships to make images lists for each sheet
455
+ nr = Regexp.last_match[1].to_i
456
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
438
457
  end
439
458
 
440
459
  entry.extract(path) if path
@@ -442,7 +461,14 @@ module Roo
442
461
  end
443
462
 
444
463
  def safe_send(object, method, *args)
445
- object.send(method, *args) if object && object.respond_to?(method)
464
+ object.send(method, *args) if object&.respond_to?(method)
465
+ end
466
+
467
+ def worksheet_types
468
+ [
469
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
470
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
471
+ ]
446
472
  end
447
473
  end
448
474
  end
@@ -40,19 +40,23 @@ module Roo
40
40
  end
41
41
 
42
42
  def self.create_cell(type, *values)
43
+ cell_class(type)&.new(*values)
44
+ end
45
+
46
+ def self.cell_class(type)
43
47
  case type
44
48
  when :string
45
- Cell::String.new(*values)
49
+ Cell::String
46
50
  when :boolean
47
- Cell::Boolean.new(*values)
51
+ Cell::Boolean
48
52
  when :number
49
- Cell::Number.new(*values)
53
+ Cell::Number
50
54
  when :date
51
- Cell::Date.new(*values)
55
+ Cell::Date
52
56
  when :datetime
53
- Cell::DateTime.new(*values)
57
+ Cell::DateTime
54
58
  when :time
55
- Cell::Time.new(*values)
59
+ Cell::Time
56
60
  end
57
61
  end
58
62
 
@@ -1,13 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "roo/helpers/default_attr_reader"
4
+
1
5
  module Roo
2
6
  class Excelx
3
7
  class Cell
4
8
  class Base
9
+ extend Roo::Helpers::DefaultAttrReader
5
10
  attr_reader :cell_type, :cell_value, :value
6
11
 
7
12
  # FIXME: I think style should be deprecated. Having a style attribute
8
13
  # for a cell doesn't really accomplish much. It seems to be used
9
14
  # when you want to export to excelx.
10
- attr_reader :style
15
+ attr_reader_with_default default_type: :base, style: 1
11
16
 
12
17
 
13
18
  # FIXME: Updating a cell's value should be able tochange the cell's type,
@@ -34,14 +39,12 @@ module Roo
34
39
  attr_writer :value
35
40
 
36
41
  def initialize(value, formula, excelx_type, style, link, coordinate)
37
- @link = !!link
38
42
  @cell_value = value
39
- @cell_type = excelx_type
40
- @formula = formula
41
- @style = style
43
+ @cell_type = excelx_type if excelx_type
44
+ @formula = formula if formula
45
+ @style = style unless style == 1
42
46
  @coordinate = coordinate
43
- @type = :base
44
- @value = link? ? Roo::Link.new(link, value) : value
47
+ @value = link ? Roo::Link.new(link, value) : value
45
48
  end
46
49
 
47
50
  def type
@@ -50,16 +53,16 @@ module Roo
50
53
  elsif link?
51
54
  :link
52
55
  else
53
- @type
56
+ default_type
54
57
  end
55
58
  end
56
59
 
57
60
  def formula?
58
- !!@formula
61
+ !!(defined?(@formula) && @formula)
59
62
  end
60
63
 
61
64
  def link?
62
- !!@link
65
+ Roo::Link === @value
63
66
  end
64
67
 
65
68
  alias_method :formatted_value, :value
@@ -68,9 +71,16 @@ module Roo
68
71
  formatted_value
69
72
  end
70
73
 
71
- # DEPRECATED: Please use link instead.
74
+ # DEPRECATED: Please use link? instead.
72
75
  def hyperlink
73
- warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.'
76
+ warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.'
77
+ link?
78
+ end
79
+
80
+ # DEPRECATED: Please use link? instead.
81
+ def link
82
+ warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.'
83
+ link?
74
84
  end
75
85
 
76
86
  # DEPRECATED: Please use cell_value instead.
@@ -88,6 +98,10 @@ module Roo
88
98
  def empty?
89
99
  false
90
100
  end
101
+
102
+ def presence
103
+ empty? ? nil : self
104
+ end
91
105
  end
92
106
  end
93
107
  end
@@ -1,17 +1,20 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  class Cell
4
6
  class Boolean < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :boolean, cell_type: :boolean
6
10
 
7
11
  def initialize(value, formula, style, link, coordinate)
8
- super(value, formula, nil, style, link, coordinate)
9
- @type = @cell_type = :boolean
10
- @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
12
+ super(value, formula, nil, style, nil, coordinate)
13
+ @value = link ? Roo::Link.new(link, value) : create_boolean(value)
11
14
  end
12
15
 
13
16
  def formatted_value
14
- value ? 'TRUE'.freeze : 'FALSE'.freeze
17
+ value ? 'TRUE' : 'FALSE'
15
18
  end
16
19
 
17
20
  private
@@ -19,7 +22,7 @@ module Roo
19
22
  def create_boolean(value)
20
23
  # FIXME: Using a boolean will cause methods like Base#to_csv to fail.
21
24
  # Roo is using some method to ignore false/nil values.
22
- value.to_i == 1 ? true : false
25
+ value.to_i == 1
23
26
  end
24
27
  end
25
28
  end
@@ -4,23 +4,23 @@ module Roo
4
4
  class Excelx
5
5
  class Cell
6
6
  class Date < Roo::Excelx::Cell::DateTime
7
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :date
8
10
 
9
11
  def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
12
  # NOTE: Pass all arguments to the parent class, DateTime.
11
13
  super
12
- @type = :date
13
14
  @format = excelx_type.last
14
- @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
15
+ @value = link ? Roo::Link.new(link, value) : create_date(base_date, value)
15
16
  end
16
17
 
17
18
  private
18
19
 
19
- def create_date(base_date, value)
20
- date = base_date + value.to_i
21
- yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
20
+ def create_datetime(_,_); end
22
21
 
23
- ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
22
+ def create_date(base_date, value)
23
+ base_date + value.to_i
24
24
  end
25
25
  end
26
26
  end
@@ -1,16 +1,21 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'date'
2
4
 
3
5
  module Roo
4
6
  class Excelx
5
7
  class Cell
6
8
  class DateTime < Cell::Base
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
9
+ SECONDS_IN_DAY = 60 * 60 * 24
10
+
11
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
12
 
9
- def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
- super(value, formula, excelx_type, style, link, coordinate)
11
- @type = :datetime
13
+ attr_reader_with_default default_type: :datetime
14
+
15
+ def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate)
16
+ super(value, formula, excelx_type, style, nil, coordinate)
12
17
  @format = excelx_type.last
13
- @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
18
+ @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value)
14
19
  end
15
20
 
16
21
  # Public: Returns formatted value for a datetime. Format's can be an
@@ -78,7 +83,7 @@ module Roo
78
83
 
79
84
  TIME_FORMATS = {
80
85
  'hh' => '%H', # Hour (24): 01
81
- 'h' => '%-k'.freeze, # Hour (24): 1
86
+ 'h' => '%-k', # Hour (24): 1
82
87
  # 'hh'.freeze => '%I'.freeze, # Hour (12): 08
83
88
  # 'h'.freeze => '%-l'.freeze, # Hour (12): 8
84
89
  'mm' => '%M', # Minute: 01
@@ -92,18 +97,9 @@ module Roo
92
97
  '0' => '%1N' # Fractional Seconds: tenths.
93
98
  }
94
99
 
95
- def create_datetime(base_date, value)
96
- date = base_date + value.to_f.round(6)
97
- datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
98
- t = round_datetime(datetime_string)
99
-
100
- ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
101
- end
102
-
103
- def round_datetime(datetime_string)
104
- /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
105
-
106
- ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
100
+ def create_datetime(base_timestamp, value)
101
+ timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0)
102
+ ::Time.at(timestamp).utc.to_datetime
107
103
  end
108
104
  end
109
105
  end