roo 2.3.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +17 -0
  3. data/.github/issue_template.md +16 -0
  4. data/.github/pull_request_template.md +14 -0
  5. data/.github/workflows/pull-request.yml +15 -0
  6. data/.github/workflows/ruby.yml +34 -0
  7. data/.gitignore +4 -0
  8. data/.rubocop.yml +186 -0
  9. data/CHANGELOG.md +148 -0
  10. data/Gemfile +4 -4
  11. data/LICENSE +2 -0
  12. data/README.md +84 -27
  13. data/Rakefile +1 -1
  14. data/lib/roo/base.rb +111 -237
  15. data/lib/roo/constants.rb +5 -3
  16. data/lib/roo/csv.rb +106 -85
  17. data/lib/roo/errors.rb +2 -0
  18. data/lib/roo/excelx/cell/base.rb +26 -12
  19. data/lib/roo/excelx/cell/boolean.rb +9 -6
  20. data/lib/roo/excelx/cell/date.rb +7 -7
  21. data/lib/roo/excelx/cell/datetime.rb +50 -44
  22. data/lib/roo/excelx/cell/empty.rb +3 -2
  23. data/lib/roo/excelx/cell/number.rb +60 -47
  24. data/lib/roo/excelx/cell/string.rb +3 -3
  25. data/lib/roo/excelx/cell/time.rb +17 -16
  26. data/lib/roo/excelx/cell.rb +11 -7
  27. data/lib/roo/excelx/comments.rb +3 -3
  28. data/lib/roo/excelx/coordinate.rb +11 -4
  29. data/lib/roo/excelx/extractor.rb +20 -3
  30. data/lib/roo/excelx/format.rb +38 -31
  31. data/lib/roo/excelx/images.rb +26 -0
  32. data/lib/roo/excelx/relationships.rb +12 -4
  33. data/lib/roo/excelx/shared.rb +10 -3
  34. data/lib/roo/excelx/shared_strings.rb +113 -9
  35. data/lib/roo/excelx/sheet.rb +49 -10
  36. data/lib/roo/excelx/sheet_doc.rb +101 -48
  37. data/lib/roo/excelx/styles.rb +4 -4
  38. data/lib/roo/excelx/workbook.rb +8 -3
  39. data/lib/roo/excelx.rb +85 -42
  40. data/lib/roo/formatters/base.rb +15 -0
  41. data/lib/roo/formatters/csv.rb +84 -0
  42. data/lib/roo/formatters/matrix.rb +23 -0
  43. data/lib/roo/formatters/xml.rb +31 -0
  44. data/lib/roo/formatters/yaml.rb +40 -0
  45. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  46. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  47. data/lib/roo/open_office.rb +41 -27
  48. data/lib/roo/spreadsheet.rb +8 -2
  49. data/lib/roo/tempdir.rb +24 -0
  50. data/lib/roo/utils.rb +76 -26
  51. data/lib/roo/version.rb +1 -1
  52. data/lib/roo.rb +5 -0
  53. data/roo.gemspec +22 -12
  54. data/spec/lib/roo/base_spec.rb +65 -3
  55. data/spec/lib/roo/csv_spec.rb +19 -0
  56. data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
  57. data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
  58. data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
  59. data/spec/lib/roo/excelx_spec.rb +237 -5
  60. data/spec/lib/roo/openoffice_spec.rb +2 -2
  61. data/spec/lib/roo/spreadsheet_spec.rb +1 -1
  62. data/spec/lib/roo/strict_spec.rb +43 -0
  63. data/spec/lib/roo/utils_spec.rb +22 -9
  64. data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
  65. data/spec/lib/roo_spec.rb +0 -0
  66. data/spec/spec_helper.rb +2 -7
  67. data/test/excelx/cell/test_attr_reader_default.rb +72 -0
  68. data/test/excelx/cell/test_base.rb +6 -2
  69. data/test/excelx/cell/test_boolean.rb +1 -3
  70. data/test/excelx/cell/test_date.rb +1 -6
  71. data/test/excelx/cell/test_datetime.rb +7 -10
  72. data/test/excelx/cell/test_empty.rb +12 -2
  73. data/test/excelx/cell/test_number.rb +28 -4
  74. data/test/excelx/cell/test_string.rb +21 -3
  75. data/test/excelx/cell/test_time.rb +7 -10
  76. data/test/excelx/test_coordinate.rb +51 -0
  77. data/test/formatters/test_csv.rb +136 -0
  78. data/test/formatters/test_matrix.rb +76 -0
  79. data/test/formatters/test_xml.rb +78 -0
  80. data/test/formatters/test_yaml.rb +20 -0
  81. data/test/helpers/test_accessing_files.rb +81 -0
  82. data/test/helpers/test_comments.rb +43 -0
  83. data/test/helpers/test_formulas.rb +9 -0
  84. data/test/helpers/test_labels.rb +103 -0
  85. data/test/helpers/test_sheets.rb +55 -0
  86. data/test/helpers/test_styles.rb +62 -0
  87. data/test/roo/test_base.rb +182 -0
  88. data/test/roo/test_csv.rb +88 -0
  89. data/test/roo/test_excelx.rb +360 -0
  90. data/test/roo/test_libre_office.rb +9 -0
  91. data/test/roo/test_open_office.rb +289 -0
  92. data/test/test_helper.rb +129 -14
  93. data/test/test_roo.rb +60 -1765
  94. metadata +91 -21
  95. data/.travis.yml +0 -14
@@ -1,37 +1,37 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  class Cell
4
6
  class Number < Cell::Base
5
- ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!)
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
6
8
 
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
9
+ # FIXME: change default_type to number. This will break brittle tests.
10
+ attr_reader_with_default default_type: :float
8
11
 
9
12
  def initialize(value, formula, excelx_type, style, link, coordinate)
10
13
  super
11
- # FIXME: change @type to number. This will break brittle tests.
12
14
  # FIXME: Excelx_type is an array, but the first value isn't used.
13
- @type = :float
14
15
  @format = excelx_type.last
15
- @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
16
+ @value = link ? Roo::Link.new(link, value) : create_numeric(value)
16
17
  end
17
18
 
18
19
  def create_numeric(number)
19
- return number if ERROR_VALUES.include?(number)
20
-
20
+ return number if Excelx::ERROR_VALUES.include?(number)
21
21
  case @format
22
22
  when /%/
23
23
  Float(number)
24
24
  when /\.0/
25
25
  Float(number)
26
26
  else
27
- number.include?('.') ? Float(number) : Integer(number)
27
+ (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10)
28
28
  end
29
29
  end
30
30
 
31
31
  def formatted_value
32
- return @cell_value if ERROR_VALUES.include?(@cell_value)
32
+ return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
33
33
 
34
- formatter = formats[@format]
34
+ formatter = generate_formatter(@format)
35
35
  if formatter.is_a? Proc
36
36
  formatter.call(@cell_value)
37
37
  else
@@ -39,46 +39,59 @@ module Roo
39
39
  end
40
40
  end
41
41
 
42
- def formats
42
+ def generate_formatter(format)
43
43
  # FIXME: numbers can be other colors besides red:
44
44
  # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
45
- {
46
- 'General' => '%.0f',
47
- '0' => '%.0f',
48
- '0.00' => '%.2f',
49
- '#,##0' => proc do |number|
50
- Kernel.format('%.0f', number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
51
- end,
52
- '#,##0.00' => proc do |number|
53
- Kernel.format('%.2f', number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
54
- end,
55
- '0%' => proc do |number|
56
- Kernel.format('%d%', number.to_f * 100)
57
- end,
58
- '0.00%' => proc do |number|
59
- Kernel.format('%.2f%', number.to_f * 100)
60
- end,
61
- '0.00E+00' => '%.2E',
62
- '#,##0 ;(#,##0)' => proc do |number|
63
- formatter = number.to_i > 0 ? '%.0f' : '(%.0f)'
64
- Kernel.format(formatter, number.to_f.abs).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
65
- end,
66
- '#,##0 ;[Red](#,##0)' => proc do |number|
67
- formatter = number.to_i > 0 ? '%.0f' : '[Red](%.0f)'
68
- Kernel.format(formatter, number.to_f.abs).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
69
- end,
70
- '#,##0.00;(#,##0.00)' => proc do |number|
71
- formatter = number.to_i > 0 ? '%.2f' : '(%.2f)'
72
- Kernel.format(formatter, number.to_f.abs).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
73
- end,
74
- '#,##0.00;[Red](#,##0.00)' => proc do |number|
75
- formatter = number.to_i > 0 ? '%.2f' : '[Red](%.2f)'
76
- Kernel.format(formatter, number.to_f.abs).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
77
- end,
45
+ case format
46
+ when /^General$/i then '%.0f'
47
+ when '0' then '%.0f'
48
+ when /^(0+)$/ then "%0#{$1.size}d"
49
+ when /^0\.(0+)$/ then "%.#{$1.size}f"
50
+ when '#,##0' then number_format('%.0f')
51
+ when /^#,##0.(0+)$/ then number_format("%.#{$1.size}f")
52
+ when '0%'
53
+ proc do |number|
54
+ Kernel.format('%.0f%%', number.to_f * 100)
55
+ end
56
+ when '0.00%'
57
+ proc do |number|
58
+ Kernel.format('%.2f%%', number.to_f * 100)
59
+ end
60
+ when '0.00E+00' then '%.2E'
61
+ when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)')
62
+ when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)')
63
+ when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)')
64
+ when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)')
78
65
  # FIXME: not quite sure what the format should look like in this case.
79
- '##0.0E+0' => '%.1E',
80
- '@' => proc { |number| number }
81
- }
66
+ when '##0.0E+0' then '%.1E'
67
+ when "_-* #,##0.00\\ _€_-;\\-* #,##0.00\\ _€_-;_-* \"-\"??\\ _€_-;_-@_-" then number_format('%.2f', '-%.2f')
68
+ when '@' then proc { |number| number }
69
+ when /^(?:_\()?"([^"]*)"(?:\* )?([^_]+)/
70
+ proc do |number|
71
+ formatted_number = generate_formatter($2).call(number)
72
+ "#{$1}#{formatted_number}"
73
+ end
74
+ when /^_[- \(]\[\$([^-]*)[^#@]+([^_]+)/
75
+ proc do |number|
76
+ formatted_number = generate_formatter($2).call(number)
77
+ "#{$1}#{formatted_number}"
78
+ end
79
+ else
80
+ raise "Unknown format: #{format.inspect}"
81
+ end
82
+ end
83
+
84
+ private
85
+
86
+ def number_format(formatter, negative_formatter = nil)
87
+ proc do |number|
88
+ if negative_formatter
89
+ formatter = number.to_i > 0 ? formatter : negative_formatter
90
+ number = number.to_f.abs
91
+ end
92
+
93
+ Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
94
+ end
82
95
  end
83
96
  end
84
97
  end
@@ -2,12 +2,12 @@ module Roo
2
2
  class Excelx
3
3
  class Cell
4
4
  class String < Cell::Base
5
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
5
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
6
+
7
+ attr_reader_with_default default_type: :string, cell_type: :string
6
8
 
7
9
  def initialize(value, formula, style, link, coordinate)
8
10
  super(value, formula, nil, style, link, coordinate)
9
- @type = @cell_type = :string
10
- @value = link? ? Roo::Link.new(link, value) : value
11
11
  end
12
12
 
13
13
  def empty?
@@ -4,15 +4,16 @@ module Roo
4
4
  class Excelx
5
5
  class Cell
6
6
  class Time < Roo::Excelx::Cell::DateTime
7
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
7
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
8
+
9
+ attr_reader_with_default default_type: :time
8
10
 
9
11
  def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
10
12
  # NOTE: Pass all arguments to DateTime super class.
11
13
  super
12
- @type = :time
13
14
  @format = excelx_type.last
14
15
  @datetime = create_datetime(base_date, value)
15
- @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
16
+ @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).round.to_i
16
17
  end
17
18
 
18
19
  def formatted_value
@@ -24,19 +25,19 @@ module Roo
24
25
 
25
26
  private
26
27
 
27
- def create_datetime(base_date, value)
28
- date = base_date + value.to_f.round(6)
29
- datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
30
- t = round_datetime(datetime_string)
31
-
32
- ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
33
- end
34
-
35
- def round_datetime(datetime_string)
36
- /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
37
-
38
- ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
39
- end
28
+ # def create_datetime(base_date, value)
29
+ # date = base_date + value.to_f.round(6)
30
+ # datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
31
+ # t = round_datetime(datetime_string)
32
+ #
33
+ # ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
34
+ # end
35
+
36
+ # def round_datetime(datetime_string)
37
+ # /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
38
+ #
39
+ # ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
40
+ # end
40
41
  end
41
42
  end
42
43
  end
@@ -11,7 +11,7 @@ require 'roo/excelx/cell/time'
11
11
  module Roo
12
12
  class Excelx
13
13
  class Cell
14
- attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
14
+ attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
15
15
  attr_writer :value
16
16
 
17
17
  # DEPRECATED: Please use Cell.create_cell instead.
@@ -40,19 +40,23 @@ module Roo
40
40
  end
41
41
 
42
42
  def self.create_cell(type, *values)
43
+ cell_class(type)&.new(*values)
44
+ end
45
+
46
+ def self.cell_class(type)
43
47
  case type
44
48
  when :string
45
- Cell::String.new(*values)
49
+ Cell::String
46
50
  when :boolean
47
- Cell::Boolean.new(*values)
51
+ Cell::Boolean
48
52
  when :number
49
- Cell::Number.new(*values)
53
+ Cell::Number
50
54
  when :date
51
- Cell::Date.new(*values)
55
+ Cell::Date
52
56
  when :datetime
53
- Cell::DateTime.new(*values)
57
+ Cell::DateTime
54
58
  when :time
55
- Cell::Time.new(*values)
59
+ Cell::Time
56
60
  end
57
61
  end
58
62
 
@@ -12,10 +12,10 @@ module Roo
12
12
  def extract_comments
13
13
  return {} unless doc_exists?
14
14
 
15
- Hash[doc.xpath('//comments/commentList/comment').map do |comment|
15
+ doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
16
16
  value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
17
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
18
- end]
17
+ hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
18
+ end
19
19
  end
20
20
  end
21
21
  end
@@ -1,11 +1,18 @@
1
1
  module Roo
2
2
  class Excelx
3
- class Coordinate
4
- attr_accessor :row, :column
3
+ class Coordinate < ::Array
5
4
 
6
5
  def initialize(row, column)
7
- @row = row
8
- @column = column
6
+ super() << row << column
7
+ freeze
8
+ end
9
+
10
+ def row
11
+ self[0]
12
+ end
13
+
14
+ def column
15
+ self[1]
9
16
  end
10
17
  end
11
18
  end
@@ -1,15 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "roo/helpers/weak_instance_cache"
4
+
1
5
  module Roo
2
6
  class Excelx
3
7
  class Extractor
4
- def initialize(path)
8
+ include Roo::Helpers::WeakInstanceCache
9
+
10
+ COMMON_STRINGS = {
11
+ t: "t",
12
+ r: "r",
13
+ s: "s",
14
+ ref: "ref",
15
+ html_tag_open: "<html>",
16
+ html_tag_closed: "</html>"
17
+ }
18
+
19
+ def initialize(path, options = {})
5
20
  @path = path
21
+ @options = options
6
22
  end
7
23
 
8
24
  private
9
25
 
10
26
  def doc
11
- @doc ||=
12
- if doc_exists?
27
+ instance_cache(:@doc) do
28
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
29
+
13
30
  ::Roo::Utils.load_xml(@path).remove_namespaces!
14
31
  end
15
32
  end
@@ -1,49 +1,57 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Roo
2
4
  class Excelx
3
5
  module Format
6
+ extend self
4
7
  EXCEPTIONAL_FORMATS = {
5
8
  'h:mm am/pm' => :date,
6
9
  'h:mm:ss am/pm' => :date
7
10
  }
8
11
 
9
12
  STANDARD_FORMATS = {
10
- 0 => 'General'.freeze,
11
- 1 => '0'.freeze,
12
- 2 => '0.00'.freeze,
13
- 3 => '#,##0'.freeze,
14
- 4 => '#,##0.00'.freeze,
15
- 9 => '0%'.freeze,
16
- 10 => '0.00%'.freeze,
17
- 11 => '0.00E+00'.freeze,
18
- 12 => '# ?/?'.freeze,
19
- 13 => '# ??/??'.freeze,
20
- 14 => 'mm-dd-yy'.freeze,
21
- 15 => 'd-mmm-yy'.freeze,
22
- 16 => 'd-mmm'.freeze,
23
- 17 => 'mmm-yy'.freeze,
24
- 18 => 'h:mm AM/PM'.freeze,
25
- 19 => 'h:mm:ss AM/PM'.freeze,
26
- 20 => 'h:mm'.freeze,
27
- 21 => 'h:mm:ss'.freeze,
28
- 22 => 'm/d/yy h:mm'.freeze,
29
- 37 => '#,##0 ;(#,##0)'.freeze,
30
- 38 => '#,##0 ;[Red](#,##0)'.freeze,
31
- 39 => '#,##0.00;(#,##0.00)'.freeze,
32
- 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
33
- 45 => 'mm:ss'.freeze,
34
- 46 => '[h]:mm:ss'.freeze,
35
- 47 => 'mmss.0'.freeze,
36
- 48 => '##0.0E+0'.freeze,
37
- 49 => '@'.freeze
13
+ 0 => 'General',
14
+ 1 => '0',
15
+ 2 => '0.00',
16
+ 3 => '#,##0',
17
+ 4 => '#,##0.00',
18
+ 9 => '0%',
19
+ 10 => '0.00%',
20
+ 11 => '0.00E+00',
21
+ 12 => '# ?/?',
22
+ 13 => '# ??/??',
23
+ 14 => 'mm-dd-yy',
24
+ 15 => 'd-mmm-yy',
25
+ 16 => 'd-mmm',
26
+ 17 => 'mmm-yy',
27
+ 18 => 'h:mm AM/PM',
28
+ 19 => 'h:mm:ss AM/PM',
29
+ 20 => 'h:mm',
30
+ 21 => 'h:mm:ss',
31
+ 22 => 'm/d/yy h:mm',
32
+ 37 => '#,##0 ;(#,##0)',
33
+ 38 => '#,##0 ;[Red](#,##0)',
34
+ 39 => '#,##0.00;(#,##0.00)',
35
+ 40 => '#,##0.00;[Red](#,##0.00)',
36
+ 45 => 'mm:ss',
37
+ 46 => '[h]:mm:ss',
38
+ 47 => 'mmss.0',
39
+ 48 => '##0.0E+0',
40
+ 49 => '@'
38
41
  }
39
42
 
40
43
  def to_type(format)
44
+ @to_type ||= {}
45
+ @to_type[format] ||= _to_type(format)
46
+ end
47
+
48
+ def _to_type(format)
41
49
  format = format.to_s.downcase
42
50
  if (type = EXCEPTIONAL_FORMATS[format])
43
51
  type
44
52
  elsif format.include?('#')
45
53
  :float
46
- elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
54
+ elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
47
55
  if format.include?('h') || format.include?('s')
48
56
  :datetime
49
57
  else
@@ -58,7 +66,6 @@ module Roo
58
66
  end
59
67
  end
60
68
 
61
- module_function :to_type
62
69
  end
63
- end
70
+ end
64
71
  end
@@ -0,0 +1,26 @@
1
+ require 'roo/excelx/extractor'
2
+
3
+ module Roo
4
+ class Excelx
5
+ class Images < Excelx::Extractor
6
+
7
+ # Returns: Hash { id1: extracted_file_name1 },
8
+ # Example: { "rId1"=>"roo_media_image1.png",
9
+ # "rId2"=>"roo_media_image2.png",
10
+ # "rId3"=>"roo_media_image3.png" }
11
+ def list
12
+ @images ||= extract_images_names
13
+ end
14
+
15
+ private
16
+
17
+ def extract_images_names
18
+ return {} unless doc_exists?
19
+
20
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
21
+ hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'roo/excelx/extractor'
2
4
 
3
5
  module Roo
@@ -11,14 +13,20 @@ module Roo
11
13
  @relationships ||= extract_relationships
12
14
  end
13
15
 
16
+ def include_type?(type)
17
+ to_a.any? do |_, rel|
18
+ rel["Type"]&.include? type
19
+ end
20
+ end
21
+
14
22
  private
15
23
 
16
24
  def extract_relationships
17
- return [] unless doc_exists?
25
+ return {} unless doc_exists?
18
26
 
19
- Hash[doc.xpath('/Relationships/Relationship').map do |rel|
20
- [rel.attribute('Id').text, rel]
21
- end]
27
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
28
+ hash[rel['Id']] = rel
29
+ end
22
30
  end
23
31
  end
24
32
  end
@@ -4,12 +4,15 @@ module Roo
4
4
  # reduce memory usage and reduce the number of objects being passed
5
5
  # to various inititializers.
6
6
  class Shared
7
- attr_accessor :comments_files, :sheet_files, :rels_files
8
- def initialize(dir)
7
+ attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
8
+ def initialize(dir, options = {})
9
9
  @dir = dir
10
10
  @comments_files = []
11
11
  @sheet_files = []
12
12
  @rels_files = []
13
+ @options = options
14
+ @image_rels = []
15
+ @image_files = []
13
16
  end
14
17
 
15
18
  def styles
@@ -17,7 +20,7 @@ module Roo
17
20
  end
18
21
 
19
22
  def shared_strings
20
- @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
23
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
21
24
  end
22
25
 
23
26
  def workbook
@@ -27,6 +30,10 @@ module Roo
27
30
  def base_date
28
31
  workbook.base_date
29
32
  end
33
+
34
+ def base_timestamp
35
+ workbook.base_timestamp
36
+ end
30
37
  end
31
38
  end
32
39
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'roo/excelx/extractor'
2
4
 
3
5
  module Roo
@@ -11,24 +13,34 @@ module Roo
11
13
  @array ||= extract_shared_strings
12
14
  end
13
15
 
16
+ def to_html
17
+ @html ||= extract_html
18
+ end
19
+
20
+ # Use to_html or to_a for html returns
21
+ # See what is happening with commit???
22
+ def use_html?(index)
23
+ return false if @options[:disable_html_wrapper]
24
+ to_html[index][/<([biu]|sup|sub)>/]
25
+ end
26
+
14
27
  private
15
28
 
16
29
  def fix_invalid_shared_strings(doc)
17
30
  invalid = { '_x000D_' => "\n" }
18
31
  xml = doc.to_s
32
+ return doc unless xml[/#{invalid.keys.join('|')}/]
19
33
 
20
- if xml[/#{invalid.keys.join('|')}/]
21
- @doc = ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
22
- end
34
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
23
35
  end
24
36
 
25
37
  def extract_shared_strings
26
38
  return [] unless doc_exists?
27
39
 
28
- fix_invalid_shared_strings(doc)
40
+ document = fix_invalid_shared_strings(doc)
29
41
  # read the shared strings xml document
30
- doc.xpath('/sst/si').map do |si|
31
- shared_string = ''
42
+ document.xpath('/sst/si').map do |si|
43
+ shared_string = +""
32
44
  si.children.each do |elem|
33
45
  case elem.name
34
46
  when 'r'
@@ -42,6 +54,98 @@ module Roo
42
54
  shared_string
43
55
  end
44
56
  end
45
- end
46
- end
47
- end
57
+
58
+ def extract_html
59
+ return [] unless doc_exists?
60
+ fix_invalid_shared_strings(doc)
61
+ # read the shared strings xml document
62
+ doc.xpath('/sst/si').map do |si|
63
+ html_string = '<html>'.dup
64
+ si.children.each do |elem|
65
+ case elem.name
66
+ when 'r'
67
+ html_string << extract_html_r(elem)
68
+ when 't'
69
+ html_string << elem.content
70
+ end # case elem.name
71
+ end # si.children.each do |elem|
72
+ html_string << '</html>'
73
+ end # doc.xpath('/sst/si').map do |si|
74
+ end # def extract_html
75
+
76
+ # The goal of this function is to take the following XML code snippet and create a html tag
77
+ # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
78
+ # {code:xml}
79
+ # <r>
80
+ # <rPr>
81
+ # <i/>
82
+ # <b/>
83
+ # <u/>
84
+ # <vertAlign val="subscript"/>
85
+ # <vertAlign val="superscript"/>
86
+ # </rPr>
87
+ # <t>TEXT</t>
88
+ # </r>
89
+ # {code}
90
+ #
91
+ # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
92
+ def extract_html_r(r_elem)
93
+ str = +""
94
+ xml_elems = {
95
+ sub: false,
96
+ sup: false,
97
+ b: false,
98
+ i: false,
99
+ u: false
100
+ }
101
+ r_elem.children.each do |elem|
102
+ case elem.name
103
+ when 'rPr'
104
+ elem.children.each do |rPr_elem|
105
+ case rPr_elem.name
106
+ when 'b'
107
+ # set formatting for Bold to true
108
+ xml_elems[:b] = true
109
+ when 'i'
110
+ # set formatting for Italics to true
111
+ xml_elems[:i] = true
112
+ when 'u'
113
+ # set formatting for Underline to true
114
+ xml_elems[:u] = true
115
+ when 'vertAlign'
116
+ # See if the Vertical Alignment is subscript or superscript
117
+ case rPr_elem.xpath('@val').first.value
118
+ when 'subscript'
119
+ # set formatting for Subscript to true and Superscript to false ... Can't have both
120
+ xml_elems[:sub] = true
121
+ xml_elems[:sup] = false
122
+ when 'superscript'
123
+ # set formatting for Superscript to true and Subscript to false ... Can't have both
124
+ xml_elems[:sup] = true
125
+ xml_elems[:sub] = false
126
+ end
127
+ end
128
+ end
129
+ when 't'
130
+ str << create_html(elem.content, xml_elems)
131
+ end
132
+ end
133
+ str
134
+ end # extract_html_r
135
+
136
+ # This will return an html string
137
+ def create_html(text, formatting)
138
+ tmp_str = +""
139
+ formatting.each do |elem, val|
140
+ tmp_str << "<#{elem}>" if val
141
+ end
142
+ tmp_str << text
143
+
144
+ formatting.reverse_each do |elem, val|
145
+ tmp_str << "</#{elem}>" if val
146
+ end
147
+ tmp_str
148
+ end
149
+ end # class SharedStrings < Excelx::Extractor
150
+ end # class Excelx
151
+ end # module Roo