roo 2.7.1 → 2.8.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +5 -5
  2. data/.github/issue_template.md +16 -0
  3. data/.github/pull_request_template.md +14 -0
  4. data/.rubocop.yml +186 -0
  5. data/.travis.yml +12 -7
  6. data/CHANGELOG.md +31 -2
  7. data/LICENSE +2 -0
  8. data/README.md +25 -12
  9. data/lib/roo.rb +4 -1
  10. data/lib/roo/base.rb +65 -56
  11. data/lib/roo/constants.rb +5 -3
  12. data/lib/roo/csv.rb +20 -12
  13. data/lib/roo/excelx.rb +42 -16
  14. data/lib/roo/excelx/cell.rb +10 -6
  15. data/lib/roo/excelx/cell/base.rb +26 -12
  16. data/lib/roo/excelx/cell/boolean.rb +9 -6
  17. data/lib/roo/excelx/cell/date.rb +7 -7
  18. data/lib/roo/excelx/cell/datetime.rb +14 -18
  19. data/lib/roo/excelx/cell/empty.rb +3 -2
  20. data/lib/roo/excelx/cell/number.rb +35 -34
  21. data/lib/roo/excelx/cell/string.rb +3 -3
  22. data/lib/roo/excelx/cell/time.rb +4 -3
  23. data/lib/roo/excelx/comments.rb +3 -3
  24. data/lib/roo/excelx/coordinate.rb +11 -4
  25. data/lib/roo/excelx/extractor.rb +21 -3
  26. data/lib/roo/excelx/format.rb +38 -31
  27. data/lib/roo/excelx/images.rb +26 -0
  28. data/lib/roo/excelx/relationships.rb +3 -3
  29. data/lib/roo/excelx/shared.rb +10 -3
  30. data/lib/roo/excelx/shared_strings.rb +9 -15
  31. data/lib/roo/excelx/sheet.rb +49 -10
  32. data/lib/roo/excelx/sheet_doc.rb +86 -48
  33. data/lib/roo/excelx/styles.rb +3 -3
  34. data/lib/roo/excelx/workbook.rb +7 -3
  35. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  36. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  37. data/lib/roo/open_office.rb +8 -6
  38. data/lib/roo/spreadsheet.rb +1 -1
  39. data/lib/roo/utils.rb +48 -19
  40. data/lib/roo/version.rb +1 -1
  41. data/roo.gemspec +13 -11
  42. data/spec/lib/roo/base_spec.rb +45 -3
  43. data/spec/lib/roo/excelx_spec.rb +125 -31
  44. data/spec/lib/roo/strict_spec.rb +43 -0
  45. data/spec/lib/roo/utils_spec.rb +12 -3
  46. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  47. data/spec/lib/roo_spec.rb +0 -0
  48. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  49. data/test/excelx/cell/test_base.rb +5 -0
  50. data/test/excelx/cell/test_datetime.rb +6 -6
  51. data/test/excelx/cell/test_empty.rb +11 -0
  52. data/test/excelx/cell/test_number.rb +9 -0
  53. data/test/excelx/cell/test_string.rb +20 -0
  54. data/test/excelx/cell/test_time.rb +4 -4
  55. data/test/excelx/test_coordinate.rb +51 -0
  56. data/test/formatters/test_csv.rb +17 -0
  57. data/test/formatters/test_xml.rb +4 -4
  58. data/test/roo/test_base.rb +2 -2
  59. data/test/roo/test_csv.rb +28 -0
  60. data/test/test_helper.rb +13 -0
  61. data/test/test_roo.rb +7 -7
  62. metadata +21 -11
  63. data/.github/ISSUE_TEMPLATE +0 -10
  64. data/Gemfile_ruby2 +0 -30
@@ -3,10 +3,11 @@ module Roo
3
3
  class Excelx
4
4
  class Cell
5
5
  class Empty < Cell::Base
6
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
6
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
7
+
8
+ attr_reader_with_default default_type: nil, style: nil
7
9
 
8
10
  def initialize(coordinate)
9
- @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
10
11
  @coordinate = coordinate
11
12
  end
12
13
 
@@ -1,16 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  class Cell
4
6
  class Number < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ # FIXME: change default_type to number. This will break brittle tests.
10
+ attr_reader_with_default default_type: :float
6
11
 
7
12
  def initialize(value, formula, excelx_type, style, link, coordinate)
8
13
  super
9
- # FIXME: change @type to number. This will break brittle tests.
10
14
  # FIXME: Excelx_type is an array, but the first value isn't used.
11
- @type = :float
12
15
  @format = excelx_type.last
13
- @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
16
+ @value = link ? Roo::Link.new(link, value) : create_numeric(value)
14
17
  end
15
18
 
16
19
  def create_numeric(number)
@@ -21,48 +24,50 @@ module Roo
21
24
  when /\.0/
22
25
  Float(number)
23
26
  else
24
- (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
27
+ (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10)
25
28
  end
26
29
  end
27
30
 
28
31
  def formatted_value
29
32
  return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
30
33
 
31
- formatter = formats[@format]
34
+ formatter = generate_formatter(@format)
32
35
  if formatter.is_a? Proc
33
36
  formatter.call(@cell_value)
34
- elsif zero_padded_number?
35
- "%0#{@format.size}d" % @cell_value
36
37
  else
37
38
  Kernel.format(formatter, @cell_value)
38
39
  end
39
40
  end
40
41
 
41
- def formats
42
+ def generate_formatter(format)
42
43
  # FIXME: numbers can be other colors besides red:
43
44
  # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
44
- {
45
- 'General' => '%.0f',
46
- '0' => '%.0f',
47
- '0.00' => '%.2f',
48
- '0.000000' => '%.6f',
49
- '#,##0' => number_format('%.0f'),
50
- '#,##0.00' => number_format('%.2f'),
51
- '0%' => proc do |number|
52
- Kernel.format('%d%', number.to_f * 100)
53
- end,
54
- '0.00%' => proc do |number|
55
- Kernel.format('%.2f%', number.to_f * 100)
56
- end,
57
- '0.00E+00' => '%.2E',
58
- '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
59
- '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
60
- '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
61
- '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
45
+ case format
46
+ when /^General$/i then '%.0f'
47
+ when '0' then '%.0f'
48
+ when /^(0+)$/ then "%0#{$1.size}d"
49
+ when /^0\.(0+)$/ then "%.#{$1.size}f"
50
+ when '#,##0' then number_format('%.0f')
51
+ when '#,##0.00' then number_format('%.2f')
52
+ when '0%'
53
+ proc do |number|
54
+ Kernel.format('%d%%', number.to_f * 100)
55
+ end
56
+ when '0.00%'
57
+ proc do |number|
58
+ Kernel.format('%.2f%%', number.to_f * 100)
59
+ end
60
+ when '0.00E+00' then '%.2E'
61
+ when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)')
62
+ when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)')
63
+ when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)')
64
+ when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)')
62
65
  # FIXME: not quite sure what the format should look like in this case.
63
- '##0.0E+0' => '%.1E',
64
- '@' => proc { |number| number }
65
- }
66
+ when '##0.0E+0' then '%.1E'
67
+ when '@' then proc { |number| number }
68
+ else
69
+ raise "Unknown format: #{format.inspect}"
70
+ end
66
71
  end
67
72
 
68
73
  private
@@ -77,10 +82,6 @@ module Roo
77
82
  Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
78
83
  end
79
84
  end
80
-
81
- def zero_padded_number?
82
- @format[/0+/] == @format
83
- end
84
85
  end
85
86
  end
86
87
  end
@@ -2,12 +2,12 @@ module Roo
2
2
  class Excelx
3
3
  class Cell
4
4
  class String < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
5
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
6
+
7
+ attr_reader_with_default default_type: :string, cell_type: :string
6
8
 
7
9
  def initialize(value, formula, style, link, coordinate)
8
10
  super(value, formula, nil, style, link, coordinate)
9
- @type = @cell_type = :string
10
- @value = link? ? Roo::Link.new(link, value) : value
11
11
  end
12
12
 
13
13
  def empty?
@@ -4,15 +4,16 @@ module Roo
4
4
  class Excelx
5
5
  class Cell
6
6
  class Time < Roo::Excelx::Cell::DateTime
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :time
8
10
 
9
11
  def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
12
  # NOTE: Pass all arguments to DateTime super class.
11
13
  super
12
- @type = :time
13
14
  @format = excelx_type.last
14
15
  @datetime = create_datetime(base_date, value)
15
- @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
+ @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
17
  end
17
18
 
18
19
  def formatted_value
@@ -12,10 +12,10 @@ module Roo
12
12
  def extract_comments
13
13
  return {} unless doc_exists?
14
14
 
15
- Hash[doc.xpath('//comments/commentList/comment').map do |comment|
15
+ doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
16
16
  value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
17
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
18
- end]
17
+ hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
18
+ end
19
19
  end
20
20
  end
21
21
  end
@@ -1,11 +1,18 @@
1
1
  module Roo
2
2
  class Excelx
3
- class Coordinate
4
- attr_accessor :row, :column
3
+ class Coordinate < ::Array
5
4
 
6
5
  def initialize(row, column)
7
- @row = row
8
- @column = column
6
+ super() << row << column
7
+ freeze
8
+ end
9
+
10
+ def row
11
+ self[0]
12
+ end
13
+
14
+ def column
15
+ self[1]
9
16
  end
10
17
  end
11
18
  end
@@ -1,16 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "roo/helpers/weak_instance_cache"
4
+
1
5
  module Roo
2
6
  class Excelx
3
7
  class Extractor
4
- def initialize(path)
8
+ include Roo::Helpers::WeakInstanceCache
9
+
10
+ COMMON_STRINGS = {
11
+ t: "t",
12
+ r: "r",
13
+ s: "s",
14
+ ref: "ref",
15
+ html_tag_open: "<html>",
16
+ html_tag_closed: "</html>"
17
+ }
18
+
19
+ def initialize(path, options = {})
5
20
  @path = path
21
+ @options = options
6
22
  end
7
23
 
8
24
  private
9
25
 
10
26
  def doc
11
- raise FileNotFound, "#{@path} file not found" unless doc_exists?
27
+ instance_cache(:@doc) do
28
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
12
29
 
13
- ::Roo::Utils.load_xml(@path).remove_namespaces!
30
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
31
+ end
14
32
  end
15
33
 
16
34
  def doc_exists?
@@ -1,49 +1,57 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  module Format
6
+ extend self
4
7
  EXCEPTIONAL_FORMATS = {
5
8
  'h:mm am/pm' => :date,
6
9
  'h:mm:ss am/pm' => :date
7
10
  }
8
11
 
9
12
  STANDARD_FORMATS = {
10
- 0 => 'General'.freeze,
11
- 1 => '0'.freeze,
12
- 2 => '0.00'.freeze,
13
- 3 => '#,##0'.freeze,
14
- 4 => '#,##0.00'.freeze,
15
- 9 => '0%'.freeze,
16
- 10 => '0.00%'.freeze,
17
- 11 => '0.00E+00'.freeze,
18
- 12 => '# ?/?'.freeze,
19
- 13 => '# ??/??'.freeze,
20
- 14 => 'mm-dd-yy'.freeze,
21
- 15 => 'd-mmm-yy'.freeze,
22
- 16 => 'd-mmm'.freeze,
23
- 17 => 'mmm-yy'.freeze,
24
- 18 => 'h:mm AM/PM'.freeze,
25
- 19 => 'h:mm:ss AM/PM'.freeze,
26
- 20 => 'h:mm'.freeze,
27
- 21 => 'h:mm:ss'.freeze,
28
- 22 => 'm/d/yy h:mm'.freeze,
29
- 37 => '#,##0 ;(#,##0)'.freeze,
30
- 38 => '#,##0 ;[Red](#,##0)'.freeze,
31
- 39 => '#,##0.00;(#,##0.00)'.freeze,
32
- 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
33
- 45 => 'mm:ss'.freeze,
34
- 46 => '[h]:mm:ss'.freeze,
35
- 47 => 'mmss.0'.freeze,
36
- 48 => '##0.0E+0'.freeze,
37
- 49 => '@'.freeze
13
+ 0 => 'General',
14
+ 1 => '0',
15
+ 2 => '0.00',
16
+ 3 => '#,##0',
17
+ 4 => '#,##0.00',
18
+ 9 => '0%',
19
+ 10 => '0.00%',
20
+ 11 => '0.00E+00',
21
+ 12 => '# ?/?',
22
+ 13 => '# ??/??',
23
+ 14 => 'mm-dd-yy',
24
+ 15 => 'd-mmm-yy',
25
+ 16 => 'd-mmm',
26
+ 17 => 'mmm-yy',
27
+ 18 => 'h:mm AM/PM',
28
+ 19 => 'h:mm:ss AM/PM',
29
+ 20 => 'h:mm',
30
+ 21 => 'h:mm:ss',
31
+ 22 => 'm/d/yy h:mm',
32
+ 37 => '#,##0 ;(#,##0)',
33
+ 38 => '#,##0 ;[Red](#,##0)',
34
+ 39 => '#,##0.00;(#,##0.00)',
35
+ 40 => '#,##0.00;[Red](#,##0.00)',
36
+ 45 => 'mm:ss',
37
+ 46 => '[h]:mm:ss',
38
+ 47 => 'mmss.0',
39
+ 48 => '##0.0E+0',
40
+ 49 => '@'
38
41
  }
39
42
 
40
43
  def to_type(format)
44
+ @to_type ||= {}
45
+ @to_type[format] ||= _to_type(format)
46
+ end
47
+
48
+ def _to_type(format)
41
49
  format = format.to_s.downcase
42
50
  if (type = EXCEPTIONAL_FORMATS[format])
43
51
  type
44
52
  elsif format.include?('#')
45
53
  :float
46
- elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
54
+ elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
47
55
  if format.include?('h') || format.include?('s')
48
56
  :datetime
49
57
  else
@@ -58,7 +66,6 @@ module Roo
58
66
  end
59
67
  end
60
68
 
61
- module_function :to_type
62
69
  end
63
- end
70
+ end
64
71
  end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Images < Excelx::Extractor
6
+
7
+ # Returns: Hash { id1: extracted_file_name1 },
8
+ # Example: { "rId1"=>"roo_media_image1.png",
9
+ # "rId2"=>"roo_media_image2.png",
10
+ # "rId3"=>"roo_media_image3.png" }
11
+ def list
12
+ @images ||= extract_images_names
13
+ end
14
+
15
+ private
16
+
17
+ def extract_images_names
18
+ return {} unless doc_exists?
19
+
20
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
21
+ hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -16,9 +16,9 @@ module Roo
16
16
  def extract_relationships
17
17
  return [] unless doc_exists?
18
18
 
19
- Hash[doc.xpath('/Relationships/Relationship').map do |rel|
20
- [rel.attribute('Id').text, rel]
21
- end]
19
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
20
+ hash[rel['Id']] = rel
21
+ end
22
22
  end
23
23
  end
24
24
  end
@@ -4,12 +4,15 @@ module Roo
4
4
  # reduce memory usage and reduce the number of objects being passed
5
5
  # to various inititializers.
6
6
  class Shared
7
- attr_accessor :comments_files, :sheet_files, :rels_files
8
- def initialize(dir)
7
+ attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
8
+ def initialize(dir, options = {})
9
9
  @dir = dir
10
10
  @comments_files = []
11
11
  @sheet_files = []
12
12
  @rels_files = []
13
+ @options = options
14
+ @image_rels = []
15
+ @image_files = []
13
16
  end
14
17
 
15
18
  def styles
@@ -17,7 +20,7 @@ module Roo
17
20
  end
18
21
 
19
22
  def shared_strings
20
- @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
23
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
21
24
  end
22
25
 
23
26
  def workbook
@@ -27,6 +30,10 @@ module Roo
27
30
  def base_date
28
31
  workbook.base_date
29
32
  end
33
+
34
+ def base_timestamp
35
+ workbook.base_timestamp
36
+ end
30
37
  end
31
38
  end
32
39
  end
@@ -1,16 +1,10 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'roo/excelx/extractor'
2
4
 
3
5
  module Roo
4
6
  class Excelx
5
7
  class SharedStrings < Excelx::Extractor
6
-
7
- COMMON_STRINGS = {
8
- t: "t",
9
- r: "r",
10
- html_tag_open: "<html>",
11
- html_tag_closed: "</html>"
12
- }
13
-
14
8
  def [](index)
15
9
  to_a[index]
16
10
  end
@@ -26,6 +20,7 @@ module Roo
26
20
  # Use to_html or to_a for html returns
27
21
  # See what is happening with commit???
28
22
  def use_html?(index)
23
+ return false if @options[:disable_html_wrapper]
29
24
  to_html[index][/<([biu]|sup|sub)>/]
30
25
  end
31
26
 
@@ -45,7 +40,7 @@ module Roo
45
40
  document = fix_invalid_shared_strings(doc)
46
41
  # read the shared strings xml document
47
42
  document.xpath('/sst/si').map do |si|
48
- shared_string = ''
43
+ shared_string = +""
49
44
  si.children.each do |elem|
50
45
  case elem.name
51
46
  when 'r'
@@ -65,7 +60,7 @@ module Roo
65
60
  fix_invalid_shared_strings(doc)
66
61
  # read the shared strings xml document
67
62
  doc.xpath('/sst/si').map do |si|
68
- html_string = '<html>'
63
+ html_string = '<html>'.dup
69
64
  si.children.each do |elem|
70
65
  case elem.name
71
66
  when 'r'
@@ -95,7 +90,7 @@ module Roo
95
90
  #
96
91
  # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
97
92
  def extract_html_r(r_elem)
98
- str = ''
93
+ str = +""
99
94
  xml_elems = {
100
95
  sub: false,
101
96
  sup: false,
@@ -103,7 +98,6 @@ module Roo
103
98
  i: false,
104
99
  u: false
105
100
  }
106
- b, i, u, sub, sup = false, false, false, false, false
107
101
  r_elem.children.each do |elem|
108
102
  case elem.name
109
103
  when 'rPr'
@@ -141,13 +135,13 @@ module Roo
141
135
 
142
136
  # This will return an html string
143
137
  def create_html(text, formatting)
144
- tmp_str = ''
138
+ tmp_str = +""
145
139
  formatting.each do |elem, val|
146
140
  tmp_str << "<#{elem}>" if val
147
141
  end
148
142
  tmp_str << text
149
- reverse_format = Hash[formatting.to_a.reverse]
150
- reverse_format.each do |elem, val|
143
+
144
+ formatting.reverse_each do |elem, val|
151
145
  tmp_str << "</#{elem}>" if val
152
146
  end
153
147
  tmp_str