ruh-roo 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +677 -0
- data/Gemfile +24 -0
- data/LICENSE +24 -0
- data/README.md +315 -0
- data/lib/roo/base.rb +607 -0
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -0
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +89 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +248 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +63 -0
- data/lib/roo/excelx.rb +480 -0
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +628 -0
- data/lib/roo/spreadsheet.rb +39 -0
- data/lib/roo/tempdir.rb +21 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +36 -0
- data/roo.gemspec +28 -0
- metadata +189 -0
@@ -0,0 +1,248 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
require 'roo/excelx/extractor'
|
5
|
+
|
6
|
+
module Roo
|
7
|
+
class Excelx
|
8
|
+
class SheetDoc < Excelx::Extractor
|
9
|
+
extend Forwardable
|
10
|
+
delegate [:workbook] => :@shared
|
11
|
+
|
12
|
+
def initialize(path, relationships, shared, options = {})
|
13
|
+
super(path)
|
14
|
+
@shared = shared
|
15
|
+
@options = options
|
16
|
+
@relationships = relationships
|
17
|
+
end
|
18
|
+
|
19
|
+
def cells(relationships)
|
20
|
+
@cells ||= extract_cells(relationships)
|
21
|
+
end
|
22
|
+
|
23
|
+
def hyperlinks(relationships)
|
24
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
25
|
+
@hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
|
26
|
+
{}
|
27
|
+
else
|
28
|
+
extract_hyperlinks(relationships)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the dimensions for the sheet.
|
33
|
+
# This is the upper bound of cells that might
|
34
|
+
# be parsed. (the document may be sparse so cell count is only upper bound)
|
35
|
+
def dimensions
|
36
|
+
@dimensions ||= extract_dimensions
|
37
|
+
end
|
38
|
+
|
39
|
+
# Yield each row xml element to caller
|
40
|
+
def each_row_streaming(&block)
|
41
|
+
Roo::Utils.each_element(@path, 'row', &block)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Yield each cell as Excelx::Cell to caller for given
|
45
|
+
# row xml
|
46
|
+
def each_cell(row_xml)
|
47
|
+
return [] unless row_xml
|
48
|
+
row_xml.children.each do |cell_element|
|
49
|
+
coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
|
50
|
+
hyperlinks = hyperlinks(@relationships)[coordinate]
|
51
|
+
|
52
|
+
yield cell_from_xml(cell_element, hyperlinks, coordinate)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def cell_value_type(type, format)
|
59
|
+
case type
|
60
|
+
when 's'
|
61
|
+
:shared
|
62
|
+
when 'b'
|
63
|
+
:boolean
|
64
|
+
when 'str'
|
65
|
+
:string
|
66
|
+
when 'inlineStr'
|
67
|
+
:inlinestr
|
68
|
+
else
|
69
|
+
Excelx::Format.to_type(format)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Internal: Creates a cell based on an XML clell..
|
74
|
+
#
|
75
|
+
# cell_xml - a Nokogiri::XML::Element. e.g.
|
76
|
+
# <c r="A5" s="2">
|
77
|
+
# <v>22606</v>
|
78
|
+
# </c>
|
79
|
+
# hyperlink - a String for the hyperlink for the cell or nil when no
|
80
|
+
# hyperlink is present.
|
81
|
+
# coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
|
82
|
+
# or nil to extract coordinate from cell_xml.
|
83
|
+
# empty_cell - an Optional Boolean value.
|
84
|
+
#
|
85
|
+
# Examples
|
86
|
+
#
|
87
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
|
88
|
+
# # => <Excelx::Cell::String>
|
89
|
+
#
|
90
|
+
# Returns a type of <Excelx::Cell>.
|
91
|
+
def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
|
92
|
+
coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
|
93
|
+
cell_xml_children = cell_xml.children
|
94
|
+
return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
|
95
|
+
|
96
|
+
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
97
|
+
# This works by coincidence because Format[0] is General.
|
98
|
+
style = cell_xml["s"].to_i
|
99
|
+
formula = nil
|
100
|
+
|
101
|
+
cell_xml_children.each do |cell|
|
102
|
+
case cell.name
|
103
|
+
when 'is'
|
104
|
+
content = cell.search('t').map(&:content).join
|
105
|
+
unless content.empty?
|
106
|
+
return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
|
107
|
+
end
|
108
|
+
when 'f'
|
109
|
+
formula = cell.content
|
110
|
+
when 'v'
|
111
|
+
format = style_format(style)
|
112
|
+
value_type = cell_value_type(cell_xml["t"], format)
|
113
|
+
|
114
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
create_empty_cell(coordinate, empty_cell)
|
119
|
+
end
|
120
|
+
|
121
|
+
def create_empty_cell(coordinate, empty_cell)
|
122
|
+
if empty_cell
|
123
|
+
Excelx::Cell::Empty.new(coordinate)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
128
|
+
# NOTE: format.to_s can replace excelx_type as an argument for
|
129
|
+
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
130
|
+
# it will break some brittle tests.
|
131
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
132
|
+
|
133
|
+
# NOTE: There are only a few situations where value != cell.content
|
134
|
+
# 1. when a sharedString is used. value = sharedString;
|
135
|
+
# cell.content = id of sharedString
|
136
|
+
# 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
|
137
|
+
# But a boolean cell should use TRUE|FALSE as the formatted value
|
138
|
+
# and use a Boolean for it's value. Using a Boolean value breaks
|
139
|
+
# Roo::Base#to_csv.
|
140
|
+
# 3. formula
|
141
|
+
case value_type
|
142
|
+
when :shared
|
143
|
+
cell_content = cell.content.to_i
|
144
|
+
value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
|
145
|
+
Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
|
146
|
+
when :boolean, :string
|
147
|
+
value = cell.content
|
148
|
+
Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
|
149
|
+
when :time, :datetime
|
150
|
+
cell_content = cell.content.to_f
|
151
|
+
# NOTE: A date will be a whole number. A time will have be > 1. And
|
152
|
+
# in general, a datetime will have decimals. But if the cell is
|
153
|
+
# using a custom format, it's possible to be interpreted incorrectly.
|
154
|
+
# cell_content.to_i == cell_content && standard_style?=> :date
|
155
|
+
#
|
156
|
+
# Should check to see if the format is standard or not. If it's a
|
157
|
+
# standard format, than it's a date, otherwise, it is a datetime.
|
158
|
+
# @styles.standard_style?(style_id)
|
159
|
+
# STANDARD_STYLES.keys.include?(style_id.to_i)
|
160
|
+
cell_type = if cell_content < 1.0
|
161
|
+
:time
|
162
|
+
elsif (cell_content - cell_content.floor).abs > 0.000001
|
163
|
+
:datetime
|
164
|
+
else
|
165
|
+
:date
|
166
|
+
end
|
167
|
+
base_value = cell_type == :date ? base_date : base_timestamp
|
168
|
+
Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
|
169
|
+
when :date
|
170
|
+
Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
171
|
+
else
|
172
|
+
Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def extract_hyperlinks(relationships)
|
177
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
178
|
+
|
179
|
+
hyperlinks.each_with_object({}) do |hyperlink, hash|
|
180
|
+
if relationship = relationships[hyperlink['id']]
|
181
|
+
target_link = relationship['Target']
|
182
|
+
target_link += "##{hyperlink['location']}" if hyperlink['location']
|
183
|
+
|
184
|
+
Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
|
185
|
+
hash[coord] = target_link
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def expand_merged_ranges(cells)
|
192
|
+
# Extract merged ranges from xml
|
193
|
+
merges = {}
|
194
|
+
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
|
195
|
+
src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
|
196
|
+
next unless cells[src]
|
197
|
+
for row in src[0]..dst[0] do
|
198
|
+
for col in src[1]..dst[1] do
|
199
|
+
next if row == src[0] && col == src[1]
|
200
|
+
merges[[row, col]] = src
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
# Duplicate value into all cells in merged range
|
205
|
+
merges.each do |dst, src|
|
206
|
+
cells[dst] = cells[src]
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def extract_cells(relationships)
|
211
|
+
extracted_cells = {}
|
212
|
+
empty_cell = @options[:empty_cell]
|
213
|
+
|
214
|
+
doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml|
|
215
|
+
coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"])
|
216
|
+
cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
|
217
|
+
extracted_cells[coordinate] = cell if cell
|
218
|
+
end
|
219
|
+
|
220
|
+
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
|
221
|
+
|
222
|
+
extracted_cells
|
223
|
+
end
|
224
|
+
|
225
|
+
def extract_dimensions
|
226
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
227
|
+
return dimension["ref"]
|
228
|
+
end
|
229
|
+
end
|
230
|
+
|
231
|
+
def style_format(style)
|
232
|
+
@shared.styles.style_format(style)
|
233
|
+
end
|
234
|
+
|
235
|
+
def base_date
|
236
|
+
@shared.base_date
|
237
|
+
end
|
238
|
+
|
239
|
+
def base_timestamp
|
240
|
+
@shared.base_timestamp
|
241
|
+
end
|
242
|
+
|
243
|
+
def shared_strings
|
244
|
+
@shared.shared_strings
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'roo/font'
|
2
|
+
require 'roo/excelx/extractor'
|
3
|
+
|
4
|
+
module Roo
|
5
|
+
class Excelx
|
6
|
+
class Styles < Excelx::Extractor
|
7
|
+
# convert internal excelx attribute to a format
|
8
|
+
def style_format(style)
|
9
|
+
id = num_fmt_ids[style.to_i]
|
10
|
+
num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
|
11
|
+
end
|
12
|
+
|
13
|
+
def definitions
|
14
|
+
@definitions ||= extract_definitions
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def num_fmt_ids
|
20
|
+
@num_fmt_ids ||= extract_num_fmt_ids
|
21
|
+
end
|
22
|
+
|
23
|
+
def num_fmts
|
24
|
+
@num_fmts ||= extract_num_fmts
|
25
|
+
end
|
26
|
+
|
27
|
+
def fonts
|
28
|
+
@fonts ||= extract_fonts
|
29
|
+
end
|
30
|
+
|
31
|
+
def extract_definitions
|
32
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
33
|
+
xfs.children.map do |xf|
|
34
|
+
fonts[xf['fontId'].to_i]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def extract_fonts
|
40
|
+
doc.xpath('//fonts/font').map do |font_el|
|
41
|
+
Font.new.tap do |font|
|
42
|
+
font.bold = !font_el.xpath('./b').empty?
|
43
|
+
font.italic = !font_el.xpath('./i').empty?
|
44
|
+
font.underline = !font_el.xpath('./u').empty?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def extract_num_fmt_ids
|
50
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
51
|
+
xfs.children.map do |xf|
|
52
|
+
xf['numFmtId']
|
53
|
+
end
|
54
|
+
end.compact
|
55
|
+
end
|
56
|
+
|
57
|
+
def extract_num_fmts
|
58
|
+
doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
|
59
|
+
hash[num_fmt['numFmtId']] = num_fmt['formatCode']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'roo/excelx/extractor'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
class Workbook < Excelx::Extractor
|
6
|
+
class Label
|
7
|
+
attr_reader :sheet, :row, :col, :name
|
8
|
+
|
9
|
+
def initialize(name, sheet, row, col)
|
10
|
+
@name = name
|
11
|
+
@sheet = sheet
|
12
|
+
@row = row.to_i
|
13
|
+
@col = ::Roo::Utils.letter_to_number(col)
|
14
|
+
end
|
15
|
+
|
16
|
+
def key
|
17
|
+
[@row, @col]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(path)
|
22
|
+
super
|
23
|
+
fail ArgumentError, 'missing required workbook file' unless doc_exists?
|
24
|
+
end
|
25
|
+
|
26
|
+
def sheets
|
27
|
+
doc.xpath('//sheet')
|
28
|
+
end
|
29
|
+
|
30
|
+
# aka labels
|
31
|
+
def defined_names
|
32
|
+
doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
|
33
|
+
# "Sheet1!$C$5"
|
34
|
+
sheet, coordinates = defined_name.text.split('!$', 2)
|
35
|
+
col, row = coordinates.split('$')
|
36
|
+
name = defined_name['name']
|
37
|
+
hash[name] = Label.new(name, sheet, row, col)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def base_timestamp
|
42
|
+
@base_timestamp ||= base_date.to_datetime.to_time.to_i
|
43
|
+
end
|
44
|
+
|
45
|
+
def base_date
|
46
|
+
@base_date ||=
|
47
|
+
begin
|
48
|
+
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
|
49
|
+
# it's set in the Workbook's workbookPr
|
50
|
+
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
51
|
+
result = Date.new(1899, 12, 30) # default
|
52
|
+
doc.css('workbookPr[date1904]').each do |workbookPr|
|
53
|
+
if workbookPr['date1904'] =~ /true|1/i
|
54
|
+
result = Date.new(1904, 01, 01)
|
55
|
+
break
|
56
|
+
end
|
57
|
+
end
|
58
|
+
result
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|