ruh-roo 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +677 -0
- data/Gemfile +24 -0
- data/LICENSE +24 -0
- data/README.md +315 -0
- data/lib/roo/base.rb +607 -0
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -0
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +89 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +248 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +63 -0
- data/lib/roo/excelx.rb +480 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +628 -0
- data/lib/roo/spreadsheet.rb +39 -0
- data/lib/roo/tempdir.rb +21 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +36 -0
- data/roo.gemspec +28 -0
- metadata +189 -0
@@ -0,0 +1,110 @@
|
|
1
|
+
require 'date'
|
2
|
+
require 'roo/excelx/cell/base'
|
3
|
+
require 'roo/excelx/cell/boolean'
|
4
|
+
require 'roo/excelx/cell/datetime'
|
5
|
+
require 'roo/excelx/cell/date'
|
6
|
+
require 'roo/excelx/cell/empty'
|
7
|
+
require 'roo/excelx/cell/number'
|
8
|
+
require 'roo/excelx/cell/string'
|
9
|
+
require 'roo/excelx/cell/time'
|
10
|
+
|
11
|
+
module Roo
|
12
|
+
class Excelx
|
13
|
+
class Cell
|
14
|
+
attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
|
15
|
+
attr_writer :value
|
16
|
+
|
17
|
+
# DEPRECATED: Please use Cell.create_cell instead.
|
18
|
+
def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
|
19
|
+
warn '[DEPRECATION] `Cell.new` is deprecated. Please use `Cell.create_cell` instead.'
|
20
|
+
@type = type
|
21
|
+
@formula = formula
|
22
|
+
@base_date = base_date if [:date, :datetime].include?(@type)
|
23
|
+
@excelx_type = excelx_type
|
24
|
+
@excelx_value = excelx_value
|
25
|
+
@style = style
|
26
|
+
@value = type_cast_value(value)
|
27
|
+
@value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
|
28
|
+
@coordinate = coordinate
|
29
|
+
end
|
30
|
+
|
31
|
+
def type
|
32
|
+
case
|
33
|
+
when @formula
|
34
|
+
:formula
|
35
|
+
when @value.is_a?(Roo::Link)
|
36
|
+
:link
|
37
|
+
else
|
38
|
+
@type
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.create_cell(type, *values)
|
43
|
+
cell_class(type)&.new(*values)
|
44
|
+
end
|
45
|
+
|
46
|
+
def self.cell_class(type)
|
47
|
+
case type
|
48
|
+
when :string
|
49
|
+
Cell::String
|
50
|
+
when :boolean
|
51
|
+
Cell::Boolean
|
52
|
+
when :number
|
53
|
+
Cell::Number
|
54
|
+
when :date
|
55
|
+
Cell::Date
|
56
|
+
when :datetime
|
57
|
+
Cell::DateTime
|
58
|
+
when :time
|
59
|
+
Cell::Time
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# Deprecated: use Roo::Excelx::Coordinate instead.
|
64
|
+
class Coordinate
|
65
|
+
attr_accessor :row, :column
|
66
|
+
|
67
|
+
def initialize(row, column)
|
68
|
+
warn '[DEPRECATION] `Roo::Excel::Cell::Coordinate` is deprecated. Please use `Roo::Excelx::Coordinate` instead.'
|
69
|
+
@row, @column = row, column
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
|
75
|
+
def type_cast_value(value)
|
76
|
+
case @type
|
77
|
+
when :float, :percentage
|
78
|
+
value.to_f
|
79
|
+
when :date
|
80
|
+
create_date(@base_date + value.to_i)
|
81
|
+
when :datetime
|
82
|
+
create_datetime(@base_date + value.to_f.round(6))
|
83
|
+
when :time
|
84
|
+
value.to_f * 86_400
|
85
|
+
else
|
86
|
+
value
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def create_date(date)
|
91
|
+
yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
|
92
|
+
|
93
|
+
::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
|
94
|
+
end
|
95
|
+
|
96
|
+
def create_datetime(date)
|
97
|
+
datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
|
98
|
+
t = round_datetime(datetime_string)
|
99
|
+
|
100
|
+
::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
|
101
|
+
end
|
102
|
+
|
103
|
+
def round_datetime(datetime_string)
|
104
|
+
/(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
|
105
|
+
|
106
|
+
::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'roo/excelx/extractor'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
class Comments < Excelx::Extractor
|
6
|
+
def comments
|
7
|
+
@comments ||= extract_comments
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def extract_comments
|
13
|
+
return {} unless doc_exists?
|
14
|
+
|
15
|
+
doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
|
16
|
+
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
|
17
|
+
hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
# xl/comments1.xml
|
24
|
+
# <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
25
|
+
# <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
26
|
+
# <authors>
|
27
|
+
# <author />
|
28
|
+
# </authors>
|
29
|
+
# <commentList>
|
30
|
+
# <comment ref="B4" authorId="0">
|
31
|
+
# <text>
|
32
|
+
# <r>
|
33
|
+
# <rPr>
|
34
|
+
# <sz val="10" />
|
35
|
+
# <rFont val="Arial" />
|
36
|
+
# <family val="2" />
|
37
|
+
# </rPr>
|
38
|
+
# <t>Comment for B4</t>
|
39
|
+
# </r>
|
40
|
+
# </text>
|
41
|
+
# </comment>
|
42
|
+
# <comment ref="B5" authorId="0">
|
43
|
+
# <text>
|
44
|
+
# <r>
|
45
|
+
# <rPr>
|
46
|
+
# <sz val="10" />
|
47
|
+
# <rFont val="Arial" />
|
48
|
+
# <family val="2" />
|
49
|
+
# </rPr>
|
50
|
+
# <t>Comment for B5</t>
|
51
|
+
# </r>
|
52
|
+
# </text>
|
53
|
+
# </comment>
|
54
|
+
# </commentList>
|
55
|
+
# </comments>
|
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "roo/helpers/weak_instance_cache"
|
4
|
+
|
5
|
+
module Roo
|
6
|
+
class Excelx
|
7
|
+
class Extractor
|
8
|
+
include Roo::Helpers::WeakInstanceCache
|
9
|
+
|
10
|
+
COMMON_STRINGS = {
|
11
|
+
t: "t",
|
12
|
+
r: "r",
|
13
|
+
s: "s",
|
14
|
+
ref: "ref",
|
15
|
+
html_tag_open: "<html>",
|
16
|
+
html_tag_closed: "</html>"
|
17
|
+
}
|
18
|
+
|
19
|
+
def initialize(path, options = {})
|
20
|
+
@path = path
|
21
|
+
@options = options
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def doc
|
27
|
+
instance_cache(:@doc) do
|
28
|
+
raise FileNotFound, "#{@path} file not found" unless doc_exists?
|
29
|
+
|
30
|
+
::Roo::Utils.load_xml(@path).remove_namespaces!
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def doc_exists?
|
35
|
+
@path && File.exist?(@path)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
module Format
|
6
|
+
extend self
|
7
|
+
EXCEPTIONAL_FORMATS = {
|
8
|
+
'h:mm am/pm' => :date,
|
9
|
+
'h:mm:ss am/pm' => :date
|
10
|
+
}
|
11
|
+
|
12
|
+
STANDARD_FORMATS = {
|
13
|
+
0 => 'General',
|
14
|
+
1 => '0',
|
15
|
+
2 => '0.00',
|
16
|
+
3 => '#,##0',
|
17
|
+
4 => '#,##0.00',
|
18
|
+
9 => '0%',
|
19
|
+
10 => '0.00%',
|
20
|
+
11 => '0.00E+00',
|
21
|
+
12 => '# ?/?',
|
22
|
+
13 => '# ??/??',
|
23
|
+
14 => 'mm-dd-yy',
|
24
|
+
15 => 'd-mmm-yy',
|
25
|
+
16 => 'd-mmm',
|
26
|
+
17 => 'mmm-yy',
|
27
|
+
18 => 'h:mm AM/PM',
|
28
|
+
19 => 'h:mm:ss AM/PM',
|
29
|
+
20 => 'h:mm',
|
30
|
+
21 => 'h:mm:ss',
|
31
|
+
22 => 'm/d/yy h:mm',
|
32
|
+
37 => '#,##0 ;(#,##0)',
|
33
|
+
38 => '#,##0 ;[Red](#,##0)',
|
34
|
+
39 => '#,##0.00;(#,##0.00)',
|
35
|
+
40 => '#,##0.00;[Red](#,##0.00)',
|
36
|
+
45 => 'mm:ss',
|
37
|
+
46 => '[h]:mm:ss',
|
38
|
+
47 => 'mmss.0',
|
39
|
+
48 => '##0.0E+0',
|
40
|
+
49 => '@'
|
41
|
+
}
|
42
|
+
|
43
|
+
def to_type(format)
|
44
|
+
@to_type ||= {}
|
45
|
+
@to_type[format] ||= _to_type(format)
|
46
|
+
end
|
47
|
+
|
48
|
+
def _to_type(format)
|
49
|
+
format = format.to_s.downcase
|
50
|
+
if (type = EXCEPTIONAL_FORMATS[format])
|
51
|
+
type
|
52
|
+
elsif format.include?('#')
|
53
|
+
:float
|
54
|
+
elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
|
55
|
+
if format.include?('h') || format.include?('s')
|
56
|
+
:datetime
|
57
|
+
else
|
58
|
+
:date
|
59
|
+
end
|
60
|
+
elsif format.include?('h') || format.include?('s')
|
61
|
+
:time
|
62
|
+
elsif format.include?('%')
|
63
|
+
:percentage
|
64
|
+
else
|
65
|
+
:float
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'roo/excelx/extractor'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
class Images < Excelx::Extractor
|
6
|
+
|
7
|
+
# Returns: Hash { id1: extracted_file_name1 },
|
8
|
+
# Example: { "rId1"=>"roo_media_image1.png",
|
9
|
+
# "rId2"=>"roo_media_image2.png",
|
10
|
+
# "rId3"=>"roo_media_image3.png" }
|
11
|
+
def list
|
12
|
+
@images ||= extract_images_names
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def extract_images_names
|
18
|
+
return {} unless doc_exists?
|
19
|
+
|
20
|
+
doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
|
21
|
+
hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'roo/excelx/extractor'
|
4
|
+
|
5
|
+
module Roo
|
6
|
+
class Excelx
|
7
|
+
class Relationships < Excelx::Extractor
|
8
|
+
def [](index)
|
9
|
+
to_a[index]
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_a
|
13
|
+
@relationships ||= extract_relationships
|
14
|
+
end
|
15
|
+
|
16
|
+
def include_type?(type)
|
17
|
+
to_a.any? do |_, rel|
|
18
|
+
rel["Type"]&.include? type
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def extract_relationships
|
25
|
+
return {} unless doc_exists?
|
26
|
+
|
27
|
+
doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
|
28
|
+
hash[rel['Id']] = rel
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Roo
|
2
|
+
class Excelx
|
3
|
+
# Public: Shared class for allowing sheets to share data. This should
|
4
|
+
# reduce memory usage and reduce the number of objects being passed
|
5
|
+
# to various inititializers.
|
6
|
+
class Shared
|
7
|
+
attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
|
8
|
+
def initialize(dir, options = {})
|
9
|
+
@dir = dir
|
10
|
+
@comments_files = []
|
11
|
+
@sheet_files = []
|
12
|
+
@rels_files = []
|
13
|
+
@options = options
|
14
|
+
@image_rels = []
|
15
|
+
@image_files = []
|
16
|
+
end
|
17
|
+
|
18
|
+
def styles
|
19
|
+
@styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
|
20
|
+
end
|
21
|
+
|
22
|
+
def shared_strings
|
23
|
+
@shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
|
24
|
+
end
|
25
|
+
|
26
|
+
def workbook
|
27
|
+
@workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
|
28
|
+
end
|
29
|
+
|
30
|
+
def base_date
|
31
|
+
workbook.base_date
|
32
|
+
end
|
33
|
+
|
34
|
+
def base_timestamp
|
35
|
+
workbook.base_timestamp
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'roo/excelx/extractor'
|
4
|
+
|
5
|
+
module Roo
|
6
|
+
class Excelx
|
7
|
+
class SharedStrings < Excelx::Extractor
|
8
|
+
def [](index)
|
9
|
+
to_a[index]
|
10
|
+
end
|
11
|
+
|
12
|
+
def to_a
|
13
|
+
@array ||= extract_shared_strings
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_html
|
17
|
+
@html ||= extract_html
|
18
|
+
end
|
19
|
+
|
20
|
+
# Use to_html or to_a for html returns
|
21
|
+
# See what is happening with commit???
|
22
|
+
def use_html?(index)
|
23
|
+
return false if @options[:disable_html_wrapper]
|
24
|
+
to_html[index][/<([biu]|sup|sub)>/]
|
25
|
+
end
|
26
|
+
|
27
|
+
private
|
28
|
+
|
29
|
+
def fix_invalid_shared_strings(doc)
|
30
|
+
invalid = { '_x000D_' => "\n" }
|
31
|
+
xml = doc.to_s
|
32
|
+
return doc unless xml[/#{invalid.keys.join('|')}/]
|
33
|
+
|
34
|
+
::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
|
35
|
+
end
|
36
|
+
|
37
|
+
def extract_shared_strings
|
38
|
+
return [] unless doc_exists?
|
39
|
+
|
40
|
+
document = fix_invalid_shared_strings(doc)
|
41
|
+
# read the shared strings xml document
|
42
|
+
document.xpath('/sst/si').map do |si|
|
43
|
+
shared_string = +""
|
44
|
+
si.children.each do |elem|
|
45
|
+
case elem.name
|
46
|
+
when 'r'
|
47
|
+
elem.children.each do |r_elem|
|
48
|
+
shared_string << r_elem.content if r_elem.name == 't'
|
49
|
+
end
|
50
|
+
when 't'
|
51
|
+
shared_string = elem.content
|
52
|
+
end
|
53
|
+
end
|
54
|
+
shared_string
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def extract_html
|
59
|
+
return [] unless doc_exists?
|
60
|
+
fix_invalid_shared_strings(doc)
|
61
|
+
# read the shared strings xml document
|
62
|
+
doc.xpath('/sst/si').map do |si|
|
63
|
+
html_string = '<html>'.dup
|
64
|
+
si.children.each do |elem|
|
65
|
+
case elem.name
|
66
|
+
when 'r'
|
67
|
+
html_string << extract_html_r(elem)
|
68
|
+
when 't'
|
69
|
+
html_string << elem.content
|
70
|
+
end # case elem.name
|
71
|
+
end # si.children.each do |elem|
|
72
|
+
html_string << '</html>'
|
73
|
+
end # doc.xpath('/sst/si').map do |si|
|
74
|
+
end # def extract_html
|
75
|
+
|
76
|
+
# The goal of this function is to take the following XML code snippet and create a html tag
|
77
|
+
# r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
|
78
|
+
# {code:xml}
|
79
|
+
# <r>
|
80
|
+
# <rPr>
|
81
|
+
# <i/>
|
82
|
+
# <b/>
|
83
|
+
# <u/>
|
84
|
+
# <vertAlign val="subscript"/>
|
85
|
+
# <vertAlign val="superscript"/>
|
86
|
+
# </rPr>
|
87
|
+
# <t>TEXT</t>
|
88
|
+
# </r>
|
89
|
+
# {code}
|
90
|
+
#
|
91
|
+
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
|
92
|
+
def extract_html_r(r_elem)
|
93
|
+
str = +""
|
94
|
+
xml_elems = {
|
95
|
+
sub: false,
|
96
|
+
sup: false,
|
97
|
+
b: false,
|
98
|
+
i: false,
|
99
|
+
u: false
|
100
|
+
}
|
101
|
+
r_elem.children.each do |elem|
|
102
|
+
case elem.name
|
103
|
+
when 'rPr'
|
104
|
+
elem.children.each do |rPr_elem|
|
105
|
+
case rPr_elem.name
|
106
|
+
when 'b'
|
107
|
+
# set formatting for Bold to true
|
108
|
+
xml_elems[:b] = true
|
109
|
+
when 'i'
|
110
|
+
# set formatting for Italics to true
|
111
|
+
xml_elems[:i] = true
|
112
|
+
when 'u'
|
113
|
+
# set formatting for Underline to true
|
114
|
+
xml_elems[:u] = true
|
115
|
+
when 'vertAlign'
|
116
|
+
# See if the Vertical Alignment is subscript or superscript
|
117
|
+
case rPr_elem.xpath('@val').first.value
|
118
|
+
when 'subscript'
|
119
|
+
# set formatting for Subscript to true and Superscript to false ... Can't have both
|
120
|
+
xml_elems[:sub] = true
|
121
|
+
xml_elems[:sup] = false
|
122
|
+
when 'superscript'
|
123
|
+
# set formatting for Superscript to true and Subscript to false ... Can't have both
|
124
|
+
xml_elems[:sup] = true
|
125
|
+
xml_elems[:sub] = false
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
when 't'
|
130
|
+
str << create_html(elem.content, xml_elems)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
str
|
134
|
+
end # extract_html_r
|
135
|
+
|
136
|
+
# This will return an html string
|
137
|
+
def create_html(text, formatting)
|
138
|
+
tmp_str = +""
|
139
|
+
formatting.each do |elem, val|
|
140
|
+
tmp_str << "<#{elem}>" if val
|
141
|
+
end
|
142
|
+
tmp_str << text
|
143
|
+
|
144
|
+
formatting.reverse_each do |elem, val|
|
145
|
+
tmp_str << "</#{elem}>" if val
|
146
|
+
end
|
147
|
+
tmp_str
|
148
|
+
end
|
149
|
+
end # class SharedStrings < Excelx::Extractor
|
150
|
+
end # class Excelx
|
151
|
+
end # module Roo
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
module Roo
|
3
|
+
class Excelx
|
4
|
+
class Sheet
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
|
8
|
+
|
9
|
+
attr_reader :images
|
10
|
+
|
11
|
+
def initialize(name, shared, sheet_index, options = {})
|
12
|
+
@name = name
|
13
|
+
@shared = shared
|
14
|
+
@sheet_index = sheet_index
|
15
|
+
@images = Images.new(image_rels[sheet_index]).list
|
16
|
+
@rels = Relationships.new(rels_files[sheet_index])
|
17
|
+
@comments = Comments.new(comments_files[sheet_index])
|
18
|
+
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def cells
|
22
|
+
@cells ||= @sheet.cells(@rels)
|
23
|
+
end
|
24
|
+
|
25
|
+
def present_cells
|
26
|
+
@present_cells ||= begin
|
27
|
+
warn %{
|
28
|
+
[DEPRECATION] present_cells is deprecated. Alternate:
|
29
|
+
with activesupport => cells[key].presence
|
30
|
+
without activesupport => cells[key]&.presence
|
31
|
+
}
|
32
|
+
cells.select { |_, cell| cell&.presence }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Yield each row as array of Excelx::Cell objects
|
37
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
38
|
+
# pad_cells (boolean) and offset (int)
|
39
|
+
def each_row(options = {}, &block)
|
40
|
+
row_count = 0
|
41
|
+
options[:offset] ||= 0
|
42
|
+
@sheet.each_row_streaming do |row|
|
43
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
44
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
45
|
+
block.call(cells_for_row_element(row, options))
|
46
|
+
end
|
47
|
+
row_count += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def row(row_number)
|
52
|
+
first_column.upto(last_column).map do |col|
|
53
|
+
cells[[row_number, col]]&.value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def column(col_number)
|
58
|
+
first_row.upto(last_row).map do |row|
|
59
|
+
cells[[row, col_number]]&.value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns the number of the first non-empty row
|
64
|
+
def first_row
|
65
|
+
@first_row ||= first_last_row_col[:first_row]
|
66
|
+
end
|
67
|
+
|
68
|
+
def last_row
|
69
|
+
@last_row ||= first_last_row_col[:last_row]
|
70
|
+
end
|
71
|
+
|
72
|
+
# returns the number of the first non-empty column
|
73
|
+
def first_column
|
74
|
+
@first_column ||= first_last_row_col[:first_column]
|
75
|
+
end
|
76
|
+
|
77
|
+
# returns the number of the last non-empty column
|
78
|
+
def last_column
|
79
|
+
@last_column ||= first_last_row_col[:last_column]
|
80
|
+
end
|
81
|
+
|
82
|
+
def excelx_format(key)
|
83
|
+
cell = cells[key]
|
84
|
+
styles.style_format(cell.style).to_s if cell
|
85
|
+
end
|
86
|
+
|
87
|
+
def hyperlinks
|
88
|
+
@hyperlinks ||= @sheet.hyperlinks(@rels)
|
89
|
+
end
|
90
|
+
|
91
|
+
def comments
|
92
|
+
@comments.comments
|
93
|
+
end
|
94
|
+
|
95
|
+
def dimensions
|
96
|
+
@sheet.dimensions
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Take an xml row and return an array of Excelx::Cell objects
|
102
|
+
# optionally pad array to header width(assumed 1st row).
|
103
|
+
# takes option pad_cells (boolean) defaults false
|
104
|
+
def cells_for_row_element(row_element, options = {})
|
105
|
+
return [] unless row_element
|
106
|
+
cell_col = 0
|
107
|
+
cells = []
|
108
|
+
@sheet.each_cell(row_element) do |cell|
|
109
|
+
cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
|
110
|
+
cells << cell
|
111
|
+
cell_col = cell.coordinate.column
|
112
|
+
end
|
113
|
+
cells
|
114
|
+
end
|
115
|
+
|
116
|
+
def pad_cells(cell, last_column)
|
117
|
+
pad = []
|
118
|
+
(cell.coordinate.column - 1 - last_column).times { pad << nil }
|
119
|
+
pad
|
120
|
+
end
|
121
|
+
|
122
|
+
def first_last_row_col
|
123
|
+
@first_last_row_col ||= begin
|
124
|
+
first_row = last_row = first_col = last_col = nil
|
125
|
+
|
126
|
+
cells.each do |(row, col), cell|
|
127
|
+
next unless cell&.presence
|
128
|
+
first_row ||= row
|
129
|
+
last_row ||= row
|
130
|
+
first_col ||= col
|
131
|
+
last_col ||= col
|
132
|
+
|
133
|
+
if row > last_row
|
134
|
+
last_row = row
|
135
|
+
elsif row < first_row
|
136
|
+
first_row = row
|
137
|
+
end
|
138
|
+
|
139
|
+
if col > last_col
|
140
|
+
last_col = col
|
141
|
+
elsif col < first_col
|
142
|
+
first_col = col
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
{first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|