roo 1.13.2 → 2.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.codeclimate.yml +17 -0
- data/.github/issue_template.md +16 -0
- data/.github/pull_request_template.md +14 -0
- data/.github/workflows/pull-request.yml +15 -0
- data/.github/workflows/ruby.yml +34 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +186 -0
- data/.simplecov +4 -0
- data/CHANGELOG.md +702 -0
- data/Gemfile +18 -12
- data/Guardfile +23 -0
- data/LICENSE +5 -1
- data/README.md +328 -0
- data/Rakefile +23 -23
- data/examples/roo_soap_client.rb +28 -31
- data/examples/roo_soap_server.rb +4 -6
- data/examples/write_me.rb +9 -10
- data/lib/roo/base.rb +317 -504
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -113
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +99 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +257 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +64 -0
- data/lib/roo/excelx.rb +407 -601
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +631 -0
- data/lib/roo/spreadsheet.rb +28 -23
- data/lib/roo/tempdir.rb +24 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +26 -24
- data/roo.gemspec +29 -203
- data/spec/helpers.rb +5 -0
- data/spec/lib/roo/base_spec.rb +291 -3
- data/spec/lib/roo/csv_spec.rb +38 -11
- data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
- data/spec/lib/roo/excelx/format_spec.rb +7 -6
- data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
- data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
- data/spec/lib/roo/excelx_spec.rb +672 -11
- data/spec/lib/roo/libreoffice_spec.rb +16 -6
- data/spec/lib/roo/openoffice_spec.rb +30 -8
- data/spec/lib/roo/spreadsheet_spec.rb +60 -12
- data/spec/lib/roo/strict_spec.rb +43 -0
- data/spec/lib/roo/utils_spec.rb +119 -0
- data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
- data/spec/lib/roo_spec.rb +0 -0
- data/spec/spec_helper.rb +7 -6
- data/test/all_ss.rb +12 -11
- data/test/excelx/cell/test_attr_reader_default.rb +72 -0
- data/test/excelx/cell/test_base.rb +68 -0
- data/test/excelx/cell/test_boolean.rb +36 -0
- data/test/excelx/cell/test_date.rb +38 -0
- data/test/excelx/cell/test_datetime.rb +45 -0
- data/test/excelx/cell/test_empty.rb +18 -0
- data/test/excelx/cell/test_number.rb +90 -0
- data/test/excelx/cell/test_string.rb +48 -0
- data/test/excelx/cell/test_time.rb +30 -0
- data/test/excelx/test_coordinate.rb +51 -0
- data/test/formatters/test_csv.rb +136 -0
- data/test/formatters/test_matrix.rb +76 -0
- data/test/formatters/test_xml.rb +78 -0
- data/test/formatters/test_yaml.rb +20 -0
- data/test/helpers/test_accessing_files.rb +81 -0
- data/test/helpers/test_comments.rb +43 -0
- data/test/helpers/test_formulas.rb +9 -0
- data/test/helpers/test_labels.rb +103 -0
- data/test/helpers/test_sheets.rb +55 -0
- data/test/helpers/test_styles.rb +62 -0
- data/test/roo/test_base.rb +182 -0
- data/test/roo/test_csv.rb +88 -0
- data/test/roo/test_excelx.rb +360 -0
- data/test/roo/test_libre_office.rb +9 -0
- data/test/roo/test_open_office.rb +289 -0
- data/test/test_helper.rb +123 -59
- data/test/test_roo.rb +392 -2292
- metadata +153 -298
- data/CHANGELOG +0 -417
- data/Gemfile.lock +0 -78
- data/README.markdown +0 -126
- data/VERSION +0 -1
- data/lib/roo/excel.rb +0 -355
- data/lib/roo/excel2003xml.rb +0 -300
- data/lib/roo/google.rb +0 -292
- data/lib/roo/openoffice.rb +0 -496
- data/lib/roo/roo_rails_helper.rb +0 -83
- data/lib/roo/worksheet.rb +0 -18
- data/scripts/txt2html +0 -67
- data/spec/lib/roo/excel2003xml_spec.rb +0 -15
- data/spec/lib/roo/excel_spec.rb +0 -17
- data/spec/lib/roo/google_spec.rb +0 -64
- data/test/files/1900_base.xls +0 -0
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xls +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xls +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund.xml +0 -62518
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/bad_excel_date.xls +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xls +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bbu.xml +0 -152
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xls +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/boolean.xml +0 -112
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xls +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/borders.xml +0 -144
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/bug-row-column-fixnum-float.xls +0 -0
- data/test/files/bug-row-column-fixnum-float.xml +0 -127
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xls +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xls +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/datetime.xml +0 -142
- data/test/files/datetime_floatconv.xls +0 -0
- data/test/files/datetime_floatconv.xml +0 -148
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xls +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/emptysheets.xml +0 -105
- data/test/files/excel2003.xml +0 -21140
- data/test/files/false_encoding.xls +0 -0
- data/test/files/false_encoding.xml +0 -132
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xls +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula.xml +0 -134
- data/test/files/formula_parse_error.xls +0 -0
- data/test/files/formula_parse_error.xml +0 -1833
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.xls +0 -0
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/matrix.xls +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xls +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xls +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1.xml +0 -312
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xls +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/only_one_sheet.xml +0 -67
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xls +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/paragraph.xml +0 -127
- data/test/files/prova.xls +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xls +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet.xml +0 -225
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/simple_spreadsheet_from_italo.xls +0 -0
- data/test/files/simple_spreadsheet_from_italo.xml +0 -242
- data/test/files/so_datetime.csv +0 -7
- data/test/files/style.ods +0 -0
- data/test/files/style.xls +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/style.xml +0 -154
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xls +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/time-test.xml +0 -131
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_excelx.xls +0 -0
- data/test/files/type_openoffice.xls +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xls +0 -0
- data/test/files/whitespace.xlsx +0 -0
- data/test/files/whitespace.xml +0 -184
- data/test/rm_sub_test.rb +0 -12
- data/test/rm_test.rb +0 -7
- data/test/test_generic_spreadsheet.rb +0 -259
- data/website/index.html +0 -385
- data/website/index.txt +0 -423
- data/website/javascripts/rounded_corners_lite.inc.js +0 -285
- data/website/stylesheets/screen.css +0 -130
- data/website/template.rhtml +0 -48
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
require 'forwardable'
|
|
2
|
+
module Roo
|
|
3
|
+
class Excelx
|
|
4
|
+
class Sheet
|
|
5
|
+
extend Forwardable
|
|
6
|
+
|
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
|
|
8
|
+
|
|
9
|
+
attr_reader :images
|
|
10
|
+
|
|
11
|
+
def initialize(name, shared, sheet_index, options = {})
|
|
12
|
+
@name = name
|
|
13
|
+
@shared = shared
|
|
14
|
+
@sheet_index = sheet_index
|
|
15
|
+
@images = Images.new(image_rels[sheet_index]).list
|
|
16
|
+
@rels = Relationships.new(rels_files[sheet_index])
|
|
17
|
+
@comments = Comments.new(comments_files[sheet_index])
|
|
18
|
+
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def cells
|
|
22
|
+
@cells ||= @sheet.cells(@rels)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def present_cells
|
|
26
|
+
@present_cells ||= begin
|
|
27
|
+
warn %{
|
|
28
|
+
[DEPRECATION] present_cells is deprecated. Alternate:
|
|
29
|
+
with activesupport => cells[key].presence
|
|
30
|
+
without activesupport => cells[key]&.presence
|
|
31
|
+
}
|
|
32
|
+
cells.select { |_, cell| cell&.presence }
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Yield each row as array of Excelx::Cell objects
|
|
37
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
|
38
|
+
# pad_cells (boolean) and offset (int)
|
|
39
|
+
def each_row(options = {}, &block)
|
|
40
|
+
row_count = 0
|
|
41
|
+
options[:offset] ||= 0
|
|
42
|
+
@sheet.each_row_streaming do |row|
|
|
43
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
|
44
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
|
45
|
+
block.call(cells_for_row_element(row, options))
|
|
46
|
+
end
|
|
47
|
+
row_count += 1
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def row(row_number)
|
|
52
|
+
first_column.upto(last_column).map do |col|
|
|
53
|
+
cells[[row_number, col]]&.value
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def column(col_number)
|
|
58
|
+
first_row.upto(last_row).map do |row|
|
|
59
|
+
cells[[row, col_number]]&.value
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# returns the number of the first non-empty row
|
|
64
|
+
def first_row
|
|
65
|
+
@first_row ||= first_last_row_col[:first_row]
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def last_row
|
|
69
|
+
@last_row ||= first_last_row_col[:last_row]
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# returns the number of the first non-empty column
|
|
73
|
+
def first_column
|
|
74
|
+
@first_column ||= first_last_row_col[:first_column]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# returns the number of the last non-empty column
|
|
78
|
+
def last_column
|
|
79
|
+
@last_column ||= first_last_row_col[:last_column]
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def excelx_format(key)
|
|
83
|
+
cell = cells[key]
|
|
84
|
+
styles.style_format(cell.style).to_s if cell
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def hyperlinks
|
|
88
|
+
@hyperlinks ||= @sheet.hyperlinks(@rels)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def comments
|
|
92
|
+
@comments.comments
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def dimensions
|
|
96
|
+
@sheet.dimensions
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
# Take an xml row and return an array of Excelx::Cell objects
|
|
102
|
+
# optionally pad array to header width(assumed 1st row).
|
|
103
|
+
# takes option pad_cells (boolean) defaults false
|
|
104
|
+
def cells_for_row_element(row_element, options = {})
|
|
105
|
+
return [] unless row_element
|
|
106
|
+
cell_col = 0
|
|
107
|
+
cells = []
|
|
108
|
+
@sheet.each_cell(row_element) do |cell|
|
|
109
|
+
cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
|
|
110
|
+
cells << cell
|
|
111
|
+
cell_col = cell.coordinate.column
|
|
112
|
+
end
|
|
113
|
+
cells
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def pad_cells(cell, last_column)
|
|
117
|
+
pad = []
|
|
118
|
+
(cell.coordinate.column - 1 - last_column).times { pad << nil }
|
|
119
|
+
pad
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def first_last_row_col
|
|
123
|
+
@first_last_row_col ||= begin
|
|
124
|
+
first_row = last_row = first_col = last_col = nil
|
|
125
|
+
|
|
126
|
+
cells.each do |(row, col), cell|
|
|
127
|
+
next unless cell&.presence
|
|
128
|
+
first_row ||= row
|
|
129
|
+
last_row ||= row
|
|
130
|
+
first_col ||= col
|
|
131
|
+
last_col ||= col
|
|
132
|
+
|
|
133
|
+
if row > last_row
|
|
134
|
+
last_row = row
|
|
135
|
+
elsif row < first_row
|
|
136
|
+
first_row = row
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
if col > last_col
|
|
140
|
+
last_col = col
|
|
141
|
+
elsif col < first_col
|
|
142
|
+
first_col = col
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
{first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'forwardable'
|
|
4
|
+
require 'roo/excelx/extractor'
|
|
5
|
+
|
|
6
|
+
module Roo
|
|
7
|
+
class Excelx
|
|
8
|
+
class SheetDoc < Excelx::Extractor
|
|
9
|
+
extend Forwardable
|
|
10
|
+
delegate [:workbook] => :@shared
|
|
11
|
+
|
|
12
|
+
def initialize(path, relationships, shared, options = {})
|
|
13
|
+
super(path)
|
|
14
|
+
@shared = shared
|
|
15
|
+
@options = options
|
|
16
|
+
@relationships = relationships
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def cells(relationships)
|
|
20
|
+
@cells ||= extract_cells(relationships)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def hyperlinks(relationships)
|
|
24
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
|
25
|
+
@hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
|
|
26
|
+
{}
|
|
27
|
+
else
|
|
28
|
+
extract_hyperlinks(relationships)
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Get the dimensions for the sheet.
|
|
33
|
+
# This is the upper bound of cells that might
|
|
34
|
+
# be parsed. (the document may be sparse so cell count is only upper bound)
|
|
35
|
+
def dimensions
|
|
36
|
+
@dimensions ||= extract_dimensions
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Yield each row xml element to caller
|
|
40
|
+
def each_row_streaming(&block)
|
|
41
|
+
Roo::Utils.each_element(@path, 'row', &block)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Yield each cell as Excelx::Cell to caller for given
|
|
45
|
+
# row xml
|
|
46
|
+
def each_cell(row_xml)
|
|
47
|
+
return [] unless row_xml
|
|
48
|
+
row_xml.children.each do |cell_element|
|
|
49
|
+
coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
|
|
50
|
+
hyperlinks = hyperlinks(@relationships)[coordinate]
|
|
51
|
+
|
|
52
|
+
yield cell_from_xml(cell_element, hyperlinks, coordinate)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
def cell_value_type(type, format)
|
|
59
|
+
case type
|
|
60
|
+
when 's'
|
|
61
|
+
:shared
|
|
62
|
+
when 'b'
|
|
63
|
+
:boolean
|
|
64
|
+
when 'str'
|
|
65
|
+
:string
|
|
66
|
+
when 'inlineStr'
|
|
67
|
+
:inlinestr
|
|
68
|
+
else
|
|
69
|
+
Excelx::Format.to_type(format)
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Internal: Creates a cell based on an XML clell..
|
|
74
|
+
#
|
|
75
|
+
# cell_xml - a Nokogiri::XML::Element. e.g.
|
|
76
|
+
# <c r="A5" s="2">
|
|
77
|
+
# <v>22606</v>
|
|
78
|
+
# </c>
|
|
79
|
+
# hyperlink - a String for the hyperlink for the cell or nil when no
|
|
80
|
+
# hyperlink is present.
|
|
81
|
+
# coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
|
|
82
|
+
# or nil to extract coordinate from cell_xml.
|
|
83
|
+
# empty_cell - an Optional Boolean value.
|
|
84
|
+
#
|
|
85
|
+
# Examples
|
|
86
|
+
#
|
|
87
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
|
|
88
|
+
# # => <Excelx::Cell::String>
|
|
89
|
+
#
|
|
90
|
+
# Returns a type of <Excelx::Cell>.
|
|
91
|
+
def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
|
|
92
|
+
coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
|
|
93
|
+
cell_xml_children = cell_xml.children
|
|
94
|
+
return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
|
|
95
|
+
|
|
96
|
+
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
|
97
|
+
# This works by coincidence because Format[0] is General.
|
|
98
|
+
style = cell_xml["s"].to_i
|
|
99
|
+
formula = nil
|
|
100
|
+
|
|
101
|
+
cell_xml_children.each do |cell|
|
|
102
|
+
case cell.name
|
|
103
|
+
when 'is'
|
|
104
|
+
content = cell.search('t').map(&:content).join
|
|
105
|
+
unless content.empty?
|
|
106
|
+
return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
|
|
107
|
+
end
|
|
108
|
+
when 'f'
|
|
109
|
+
formula = cell.content
|
|
110
|
+
when 'v'
|
|
111
|
+
format = style_format(style)
|
|
112
|
+
value_type = cell_value_type(cell_xml["t"], format)
|
|
113
|
+
|
|
114
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
create_empty_cell(coordinate, empty_cell)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def create_empty_cell(coordinate, empty_cell)
|
|
122
|
+
if empty_cell
|
|
123
|
+
Excelx::Cell::Empty.new(coordinate)
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
|
128
|
+
# NOTE: format.to_s can replace excelx_type as an argument for
|
|
129
|
+
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
|
130
|
+
# it will break some brittle tests.
|
|
131
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
|
132
|
+
|
|
133
|
+
# NOTE: There are only a few situations where value != cell.content
|
|
134
|
+
# 1. when a sharedString is used. value = sharedString;
|
|
135
|
+
# cell.content = id of sharedString
|
|
136
|
+
# 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
|
|
137
|
+
# But a boolean cell should use TRUE|FALSE as the formatted value
|
|
138
|
+
# and use a Boolean for it's value. Using a Boolean value breaks
|
|
139
|
+
# Roo::Base#to_csv.
|
|
140
|
+
# 3. formula
|
|
141
|
+
case value_type
|
|
142
|
+
when :shared
|
|
143
|
+
cell_content = cell.content.to_i
|
|
144
|
+
value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
|
|
145
|
+
Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
|
|
146
|
+
when :boolean, :string
|
|
147
|
+
value = cell.content
|
|
148
|
+
Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
|
|
149
|
+
when :time, :datetime
|
|
150
|
+
cell_content = cell.content.to_f
|
|
151
|
+
# NOTE: A date will be a whole number. A time will have be > 1. And
|
|
152
|
+
# in general, a datetime will have decimals. But if the cell is
|
|
153
|
+
# using a custom format, it's possible to be interpreted incorrectly.
|
|
154
|
+
# cell_content.to_i == cell_content && standard_style?=> :date
|
|
155
|
+
#
|
|
156
|
+
# Should check to see if the format is standard or not. If it's a
|
|
157
|
+
# standard format, than it's a date, otherwise, it is a datetime.
|
|
158
|
+
# @styles.standard_style?(style_id)
|
|
159
|
+
# STANDARD_STYLES.keys.include?(style_id.to_i)
|
|
160
|
+
cell_type = if cell_content < 1.0
|
|
161
|
+
:time
|
|
162
|
+
elsif (cell_content - cell_content.floor).abs > 0.000001
|
|
163
|
+
:datetime
|
|
164
|
+
else
|
|
165
|
+
:date
|
|
166
|
+
end
|
|
167
|
+
base_value = cell_type == :date ? base_date : base_timestamp
|
|
168
|
+
Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
|
|
169
|
+
when :date
|
|
170
|
+
Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
|
171
|
+
else
|
|
172
|
+
Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def extract_hyperlinks(relationships)
|
|
177
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
|
178
|
+
|
|
179
|
+
hyperlinks.each_with_object({}) do |hyperlink, hash|
|
|
180
|
+
if relationship = relationships[hyperlink['id']]
|
|
181
|
+
target_link = relationship['Target']
|
|
182
|
+
target_link += "##{hyperlink['location']}" if hyperlink['location']
|
|
183
|
+
|
|
184
|
+
Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
|
|
185
|
+
hash[coord] = target_link
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def expand_merged_ranges(cells)
|
|
192
|
+
# Extract merged ranges from xml
|
|
193
|
+
merges = {}
|
|
194
|
+
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
|
|
195
|
+
src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
|
|
196
|
+
next unless cells[src]
|
|
197
|
+
for row in src[0]..dst[0] do
|
|
198
|
+
for col in src[1]..dst[1] do
|
|
199
|
+
next if row == src[0] && col == src[1]
|
|
200
|
+
merges[[row, col]] = src
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
204
|
+
# Duplicate value into all cells in merged range
|
|
205
|
+
merges.each do |dst, src|
|
|
206
|
+
cells[dst] = cells[src]
|
|
207
|
+
end
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def extract_cells(relationships)
|
|
211
|
+
extracted_cells = {}
|
|
212
|
+
empty_cell = @options[:empty_cell]
|
|
213
|
+
|
|
214
|
+
doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
|
|
215
|
+
row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
|
|
216
|
+
r = cell_xml['r']
|
|
217
|
+
coordinate =
|
|
218
|
+
if r.nil?
|
|
219
|
+
::Roo::Excelx::Coordinate.new(ycoord, xcoord)
|
|
220
|
+
else
|
|
221
|
+
::Roo::Utils.extract_coordinate(r)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
|
|
225
|
+
extracted_cells[coordinate] = cell if cell
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
|
|
230
|
+
|
|
231
|
+
extracted_cells
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def extract_dimensions
|
|
235
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
|
236
|
+
return dimension["ref"]
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
def style_format(style)
|
|
241
|
+
@shared.styles.style_format(style)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def base_date
|
|
245
|
+
@shared.base_date
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def base_timestamp
|
|
249
|
+
@shared.base_timestamp
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def shared_strings
|
|
253
|
+
@shared.shared_strings
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'roo/font'
|
|
2
|
+
require 'roo/excelx/extractor'
|
|
3
|
+
|
|
4
|
+
module Roo
|
|
5
|
+
class Excelx
|
|
6
|
+
class Styles < Excelx::Extractor
|
|
7
|
+
# convert internal excelx attribute to a format
|
|
8
|
+
def style_format(style)
|
|
9
|
+
id = num_fmt_ids[style.to_i]
|
|
10
|
+
num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def definitions
|
|
14
|
+
@definitions ||= extract_definitions
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
private
|
|
18
|
+
|
|
19
|
+
def num_fmt_ids
|
|
20
|
+
@num_fmt_ids ||= extract_num_fmt_ids
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def num_fmts
|
|
24
|
+
@num_fmts ||= extract_num_fmts
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def fonts
|
|
28
|
+
@fonts ||= extract_fonts
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def extract_definitions
|
|
32
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
|
33
|
+
xfs.children.map do |xf|
|
|
34
|
+
fonts[xf['fontId'].to_i]
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def extract_fonts
|
|
40
|
+
doc.xpath('//fonts/font').map do |font_el|
|
|
41
|
+
Font.new.tap do |font|
|
|
42
|
+
font.bold = !font_el.xpath('./b').empty?
|
|
43
|
+
font.italic = !font_el.xpath('./i').empty?
|
|
44
|
+
font.underline = !font_el.xpath('./u').empty?
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def extract_num_fmt_ids
|
|
50
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
|
51
|
+
xfs.children.map do |xf|
|
|
52
|
+
xf['numFmtId']
|
|
53
|
+
end
|
|
54
|
+
end.compact
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def extract_num_fmts
|
|
58
|
+
doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
|
|
59
|
+
hash[num_fmt['numFmtId']] = num_fmt['formatCode']
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
require 'roo/excelx/extractor'
|
|
2
|
+
|
|
3
|
+
module Roo
|
|
4
|
+
class Excelx
|
|
5
|
+
class Workbook < Excelx::Extractor
|
|
6
|
+
class Label
|
|
7
|
+
attr_reader :sheet, :row, :col, :name
|
|
8
|
+
|
|
9
|
+
def initialize(name, sheet, row, col)
|
|
10
|
+
@name = name
|
|
11
|
+
@sheet = sheet
|
|
12
|
+
@row = row.to_i
|
|
13
|
+
@col = ::Roo::Utils.letter_to_number(col)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def key
|
|
17
|
+
[@row, @col]
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def initialize(path)
|
|
22
|
+
super
|
|
23
|
+
fail ArgumentError, 'missing required workbook file' unless doc_exists?
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def sheets
|
|
27
|
+
doc.xpath('//sheet')
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# aka labels
|
|
31
|
+
def defined_names
|
|
32
|
+
doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
|
|
33
|
+
# "Sheet1!$C$5"
|
|
34
|
+
sheet, coordinates = defined_name.text.split('!$', 2)
|
|
35
|
+
next unless coordinates
|
|
36
|
+
col, row = coordinates.split('$')
|
|
37
|
+
name = defined_name['name']
|
|
38
|
+
hash[name] = Label.new(name, sheet, row, col)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def base_timestamp
|
|
43
|
+
@base_timestamp ||= base_date.to_datetime.to_time.to_i
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def base_date
|
|
47
|
+
@base_date ||=
|
|
48
|
+
begin
|
|
49
|
+
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
|
|
50
|
+
# it's set in the Workbook's workbookPr
|
|
51
|
+
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
|
52
|
+
result = Date.new(1899, 12, 30) # default
|
|
53
|
+
doc.css('workbookPr[date1904]').each do |workbookPr|
|
|
54
|
+
if workbookPr['date1904'] =~ /true|1/i
|
|
55
|
+
result = Date.new(1904, 01, 01)
|
|
56
|
+
break
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
result
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|