roo 2.7.1 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -3,10 +3,11 @@ module Roo
3
3
  class Excelx
4
4
  class Cell
5
5
  class Empty < Cell::Base
6
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
6
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
7
+
8
+ attr_reader_with_default default_type: nil, style: nil
7
9
 
8
10
  def initialize(coordinate)
9
- @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
10
11
  @coordinate = coordinate
11
12
  end
12
13
 
@@ -1,16 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  class Cell
4
6
  class Number < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ # FIXME: change default_type to number. This will break brittle tests.
10
+ attr_reader_with_default default_type: :float
6
11
 
7
12
  def initialize(value, formula, excelx_type, style, link, coordinate)
8
13
  super
9
- # FIXME: change @type to number. This will break brittle tests.
10
14
  # FIXME: Excelx_type is an array, but the first value isn't used.
11
- @type = :float
12
15
  @format = excelx_type.last
13
- @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
16
+ @value = link ? Roo::Link.new(link, value) : create_numeric(value)
14
17
  end
15
18
 
16
19
  def create_numeric(number)
@@ -21,48 +24,50 @@ module Roo
21
24
  when /\.0/
22
25
  Float(number)
23
26
  else
24
- (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
27
+ (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10)
25
28
  end
26
29
  end
27
30
 
28
31
  def formatted_value
29
32
  return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
30
33
 
31
- formatter = formats[@format]
34
+ formatter = generate_formatter(@format)
32
35
  if formatter.is_a? Proc
33
36
  formatter.call(@cell_value)
34
- elsif zero_padded_number?
35
- "%0#{@format.size}d" % @cell_value
36
37
  else
37
38
  Kernel.format(formatter, @cell_value)
38
39
  end
39
40
  end
40
41
 
41
- def formats
42
+ def generate_formatter(format)
42
43
  # FIXME: numbers can be other colors besides red:
43
44
  # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
44
- {
45
- 'General' => '%.0f',
46
- '0' => '%.0f',
47
- '0.00' => '%.2f',
48
- '0.000000' => '%.6f',
49
- '#,##0' => number_format('%.0f'),
50
- '#,##0.00' => number_format('%.2f'),
51
- '0%' => proc do |number|
52
- Kernel.format('%d%', number.to_f * 100)
53
- end,
54
- '0.00%' => proc do |number|
55
- Kernel.format('%.2f%', number.to_f * 100)
56
- end,
57
- '0.00E+00' => '%.2E',
58
- '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
59
- '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
60
- '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
61
- '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
45
+ case format
46
+ when /^General$/i then '%.0f'
47
+ when '0' then '%.0f'
48
+ when /^(0+)$/ then "%0#{$1.size}d"
49
+ when /^0\.(0+)$/ then "%.#{$1.size}f"
50
+ when '#,##0' then number_format('%.0f')
51
+ when '#,##0.00' then number_format('%.2f')
52
+ when '0%'
53
+ proc do |number|
54
+ Kernel.format('%d%%', number.to_f * 100)
55
+ end
56
+ when '0.00%'
57
+ proc do |number|
58
+ Kernel.format('%.2f%%', number.to_f * 100)
59
+ end
60
+ when '0.00E+00' then '%.2E'
61
+ when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)')
62
+ when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)')
63
+ when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)')
64
+ when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)')
62
65
  # FIXME: not quite sure what the format should look like in this case.
63
- '##0.0E+0' => '%.1E',
64
- '@' => proc { |number| number }
65
- }
66
+ when '##0.0E+0' then '%.1E'
67
+ when '@' then proc { |number| number }
68
+ else
69
+ raise "Unknown format: #{format.inspect}"
70
+ end
66
71
  end
67
72
 
68
73
  private
@@ -77,10 +82,6 @@ module Roo
77
82
  Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
78
83
  end
79
84
  end
80
-
81
- def zero_padded_number?
82
- @format[/0+/] == @format
83
- end
84
85
  end
85
86
  end
86
87
  end
@@ -2,12 +2,12 @@ module Roo
2
2
  class Excelx
3
3
  class Cell
4
4
  class String < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
5
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
6
+
7
+ attr_reader_with_default default_type: :string, cell_type: :string
6
8
 
7
9
  def initialize(value, formula, style, link, coordinate)
8
10
  super(value, formula, nil, style, link, coordinate)
9
- @type = @cell_type = :string
10
- @value = link? ? Roo::Link.new(link, value) : value
11
11
  end
12
12
 
13
13
  def empty?
@@ -4,15 +4,16 @@ module Roo
4
4
  class Excelx
5
5
  class Cell
6
6
  class Time < Roo::Excelx::Cell::DateTime
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :time
8
10
 
9
11
  def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
12
  # NOTE: Pass all arguments to DateTime super class.
11
13
  super
12
- @type = :time
13
14
  @format = excelx_type.last
14
15
  @datetime = create_datetime(base_date, value)
15
- @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
+ @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
17
  end
17
18
 
18
19
  def formatted_value
@@ -12,10 +12,10 @@ module Roo
12
12
  def extract_comments
13
13
  return {} unless doc_exists?
14
14
 
15
- Hash[doc.xpath('//comments/commentList/comment').map do |comment|
15
+ doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
16
16
  value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
17
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
18
- end]
17
+ hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
18
+ end
19
19
  end
20
20
  end
21
21
  end
@@ -1,11 +1,18 @@
1
1
  module Roo
2
2
  class Excelx
3
- class Coordinate
4
- attr_accessor :row, :column
3
+ class Coordinate < ::Array
5
4
 
6
5
  def initialize(row, column)
7
- @row = row
8
- @column = column
6
+ super() << row << column
7
+ freeze
8
+ end
9
+
10
+ def row
11
+ self[0]
12
+ end
13
+
14
+ def column
15
+ self[1]
9
16
  end
10
17
  end
11
18
  end
@@ -1,16 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "roo/helpers/weak_instance_cache"
4
+
1
5
  module Roo
2
6
  class Excelx
3
7
  class Extractor
4
- def initialize(path)
8
+ include Roo::Helpers::WeakInstanceCache
9
+
10
+ COMMON_STRINGS = {
11
+ t: "t",
12
+ r: "r",
13
+ s: "s",
14
+ ref: "ref",
15
+ html_tag_open: "<html>",
16
+ html_tag_closed: "</html>"
17
+ }
18
+
19
+ def initialize(path, options = {})
5
20
  @path = path
21
+ @options = options
6
22
  end
7
23
 
8
24
  private
9
25
 
10
26
  def doc
11
- raise FileNotFound, "#{@path} file not found" unless doc_exists?
27
+ instance_cache(:@doc) do
28
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
12
29
 
13
- ::Roo::Utils.load_xml(@path).remove_namespaces!
30
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
31
+ end
14
32
  end
15
33
 
16
34
  def doc_exists?
@@ -1,49 +1,57 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  module Format
6
+ extend self
4
7
  EXCEPTIONAL_FORMATS = {
5
8
  'h:mm am/pm' => :date,
6
9
  'h:mm:ss am/pm' => :date
7
10
  }
8
11
 
9
12
  STANDARD_FORMATS = {
10
- 0 => 'General'.freeze,
11
- 1 => '0'.freeze,
12
- 2 => '0.00'.freeze,
13
- 3 => '#,##0'.freeze,
14
- 4 => '#,##0.00'.freeze,
15
- 9 => '0%'.freeze,
16
- 10 => '0.00%'.freeze,
17
- 11 => '0.00E+00'.freeze,
18
- 12 => '# ?/?'.freeze,
19
- 13 => '# ??/??'.freeze,
20
- 14 => 'mm-dd-yy'.freeze,
21
- 15 => 'd-mmm-yy'.freeze,
22
- 16 => 'd-mmm'.freeze,
23
- 17 => 'mmm-yy'.freeze,
24
- 18 => 'h:mm AM/PM'.freeze,
25
- 19 => 'h:mm:ss AM/PM'.freeze,
26
- 20 => 'h:mm'.freeze,
27
- 21 => 'h:mm:ss'.freeze,
28
- 22 => 'm/d/yy h:mm'.freeze,
29
- 37 => '#,##0 ;(#,##0)'.freeze,
30
- 38 => '#,##0 ;[Red](#,##0)'.freeze,
31
- 39 => '#,##0.00;(#,##0.00)'.freeze,
32
- 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
33
- 45 => 'mm:ss'.freeze,
34
- 46 => '[h]:mm:ss'.freeze,
35
- 47 => 'mmss.0'.freeze,
36
- 48 => '##0.0E+0'.freeze,
37
- 49 => '@'.freeze
13
+ 0 => 'General',
14
+ 1 => '0',
15
+ 2 => '0.00',
16
+ 3 => '#,##0',
17
+ 4 => '#,##0.00',
18
+ 9 => '0%',
19
+ 10 => '0.00%',
20
+ 11 => '0.00E+00',
21
+ 12 => '# ?/?',
22
+ 13 => '# ??/??',
23
+ 14 => 'mm-dd-yy',
24
+ 15 => 'd-mmm-yy',
25
+ 16 => 'd-mmm',
26
+ 17 => 'mmm-yy',
27
+ 18 => 'h:mm AM/PM',
28
+ 19 => 'h:mm:ss AM/PM',
29
+ 20 => 'h:mm',
30
+ 21 => 'h:mm:ss',
31
+ 22 => 'm/d/yy h:mm',
32
+ 37 => '#,##0 ;(#,##0)',
33
+ 38 => '#,##0 ;[Red](#,##0)',
34
+ 39 => '#,##0.00;(#,##0.00)',
35
+ 40 => '#,##0.00;[Red](#,##0.00)',
36
+ 45 => 'mm:ss',
37
+ 46 => '[h]:mm:ss',
38
+ 47 => 'mmss.0',
39
+ 48 => '##0.0E+0',
40
+ 49 => '@'
38
41
  }
39
42
 
40
43
  def to_type(format)
44
+ @to_type ||= {}
45
+ @to_type[format] ||= _to_type(format)
46
+ end
47
+
48
+ def _to_type(format)
41
49
  format = format.to_s.downcase
42
50
  if (type = EXCEPTIONAL_FORMATS[format])
43
51
  type
44
52
  elsif format.include?('#')
45
53
  :float
46
- elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
54
+ elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
47
55
  if format.include?('h') || format.include?('s')
48
56
  :datetime
49
57
  else
@@ -58,7 +66,6 @@ module Roo
58
66
  end
59
67
  end
60
68
 
61
- module_function :to_type
62
69
  end
63
- end
70
+ end
64
71
  end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Images < Excelx::Extractor
6
+
7
+ # Returns: Hash { id1: extracted_file_name1 },
8
+ # Example: { "rId1"=>"roo_media_image1.png",
9
+ # "rId2"=>"roo_media_image2.png",
10
+ # "rId3"=>"roo_media_image3.png" }
11
+ def list
12
+ @images ||= extract_images_names
13
+ end
14
+
15
+ private
16
+
17
+ def extract_images_names
18
+ return {} unless doc_exists?
19
+
20
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
21
+ hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -16,9 +16,9 @@ module Roo
16
16
  def extract_relationships
17
17
  return [] unless doc_exists?
18
18
 
19
- Hash[doc.xpath('/Relationships/Relationship').map do |rel|
20
- [rel.attribute('Id').text, rel]
21
- end]
19
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
20
+ hash[rel['Id']] = rel
21
+ end
22
22
  end
23
23
  end
24
24
  end
@@ -4,12 +4,15 @@ module Roo
4
4
  # reduce memory usage and reduce the number of objects being passed
5
5
  # to various inititializers.
6
6
  class Shared
7
- attr_accessor :comments_files, :sheet_files, :rels_files
8
- def initialize(dir)
7
+ attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
8
+ def initialize(dir, options = {})
9
9
  @dir = dir
10
10
  @comments_files = []
11
11
  @sheet_files = []
12
12
  @rels_files = []
13
+ @options = options
14
+ @image_rels = []
15
+ @image_files = []
13
16
  end
14
17
 
15
18
  def styles
@@ -17,7 +20,7 @@ module Roo
17
20
  end
18
21
 
19
22
  def shared_strings
20
- @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
23
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
21
24
  end
22
25
 
23
26
  def workbook
@@ -27,6 +30,10 @@ module Roo
27
30
  def base_date
28
31
  workbook.base_date
29
32
  end
33
+
34
+ def base_timestamp
35
+ workbook.base_timestamp
36
+ end
30
37
  end
31
38
  end
32
39
  end
@@ -1,16 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'roo/excelx/extractor'
2
4
 
3
5
  module Roo
4
6
  class Excelx
5
7
  class SharedStrings < Excelx::Extractor
6
-
7
- COMMON_STRINGS = {
8
- t: "t",
9
- r: "r",
10
- html_tag_open: "<html>",
11
- html_tag_closed: "</html>"
12
- }
13
-
14
8
  def [](index)
15
9
  to_a[index]
16
10
  end
@@ -26,6 +20,7 @@ module Roo
26
20
  # Use to_html or to_a for html returns
27
21
  # See what is happening with commit???
28
22
  def use_html?(index)
23
+ return false if @options[:disable_html_wrapper]
29
24
  to_html[index][/<([biu]|sup|sub)>/]
30
25
  end
31
26
 
@@ -45,7 +40,7 @@ module Roo
45
40
  document = fix_invalid_shared_strings(doc)
46
41
  # read the shared strings xml document
47
42
  document.xpath('/sst/si').map do |si|
48
- shared_string = ''
43
+ shared_string = +""
49
44
  si.children.each do |elem|
50
45
  case elem.name
51
46
  when 'r'
@@ -65,7 +60,7 @@ module Roo
65
60
  fix_invalid_shared_strings(doc)
66
61
  # read the shared strings xml document
67
62
  doc.xpath('/sst/si').map do |si|
68
- html_string = '<html>'
63
+ html_string = '<html>'.dup
69
64
  si.children.each do |elem|
70
65
  case elem.name
71
66
  when 'r'
@@ -95,7 +90,7 @@ module Roo
95
90
  #
96
91
  # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
97
92
  def extract_html_r(r_elem)
98
- str = ''
93
+ str = +""
99
94
  xml_elems = {
100
95
  sub: false,
101
96
  sup: false,
@@ -103,7 +98,6 @@ module Roo
103
98
  i: false,
104
99
  u: false
105
100
  }
106
- b, i, u, sub, sup = false, false, false, false, false
107
101
  r_elem.children.each do |elem|
108
102
  case elem.name
109
103
  when 'rPr'
@@ -141,13 +135,13 @@ module Roo
141
135
 
142
136
  # This will return an html string
143
137
  def create_html(text, formatting)
144
- tmp_str = ''
138
+ tmp_str = +""
145
139
  formatting.each do |elem, val|
146
140
  tmp_str << "<#{elem}>" if val
147
141
  end
148
142
  tmp_str << text
149
- reverse_format = Hash[formatting.to_a.reverse]
150
- reverse_format.each do |elem, val|
143
+
144
+ formatting.reverse_each do |elem, val|
151
145
  tmp_str << "</#{elem}>" if val
152
146
  end
153
147
  tmp_str