roo 1.13.1 → 2.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.codeclimate.yml +17 -0
- data/.github/issue_template.md +16 -0
- data/.github/pull_request_template.md +14 -0
- data/.github/workflows/pull-request.yml +15 -0
- data/.github/workflows/ruby.yml +34 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +186 -0
- data/.simplecov +4 -0
- data/CHANGELOG.md +702 -0
- data/Gemfile +18 -12
- data/Guardfile +23 -0
- data/LICENSE +5 -1
- data/README.md +328 -0
- data/Rakefile +23 -23
- data/examples/roo_soap_client.rb +28 -31
- data/examples/roo_soap_server.rb +4 -6
- data/examples/write_me.rb +9 -10
- data/lib/roo/base.rb +317 -504
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -113
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +99 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +257 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +64 -0
- data/lib/roo/excelx.rb +407 -601
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +631 -0
- data/lib/roo/spreadsheet.rb +28 -23
- data/lib/roo/tempdir.rb +24 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +26 -24
- data/roo.gemspec +29 -202
- data/spec/helpers.rb +5 -0
- data/spec/lib/roo/base_spec.rb +291 -3
- data/spec/lib/roo/csv_spec.rb +38 -11
- data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
- data/spec/lib/roo/excelx/format_spec.rb +7 -6
- data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
- data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
- data/spec/lib/roo/excelx_spec.rb +682 -6
- data/spec/lib/roo/libreoffice_spec.rb +16 -6
- data/spec/lib/roo/openoffice_spec.rb +30 -8
- data/spec/lib/roo/spreadsheet_spec.rb +60 -12
- data/spec/lib/roo/strict_spec.rb +43 -0
- data/spec/lib/roo/utils_spec.rb +119 -0
- data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
- data/spec/lib/roo_spec.rb +0 -0
- data/spec/spec_helper.rb +7 -6
- data/test/all_ss.rb +12 -11
- data/test/excelx/cell/test_attr_reader_default.rb +72 -0
- data/test/excelx/cell/test_base.rb +68 -0
- data/test/excelx/cell/test_boolean.rb +36 -0
- data/test/excelx/cell/test_date.rb +38 -0
- data/test/excelx/cell/test_datetime.rb +45 -0
- data/test/excelx/cell/test_empty.rb +18 -0
- data/test/excelx/cell/test_number.rb +90 -0
- data/test/excelx/cell/test_string.rb +48 -0
- data/test/excelx/cell/test_time.rb +30 -0
- data/test/excelx/test_coordinate.rb +51 -0
- data/test/formatters/test_csv.rb +136 -0
- data/test/formatters/test_matrix.rb +76 -0
- data/test/formatters/test_xml.rb +78 -0
- data/test/formatters/test_yaml.rb +20 -0
- data/test/helpers/test_accessing_files.rb +81 -0
- data/test/helpers/test_comments.rb +43 -0
- data/test/helpers/test_formulas.rb +9 -0
- data/test/helpers/test_labels.rb +103 -0
- data/test/helpers/test_sheets.rb +55 -0
- data/test/helpers/test_styles.rb +62 -0
- data/test/roo/test_base.rb +182 -0
- data/test/roo/test_csv.rb +88 -0
- data/test/roo/test_excelx.rb +360 -0
- data/test/roo/test_libre_office.rb +9 -0
- data/test/roo/test_open_office.rb +289 -0
- data/test/test_helper.rb +123 -59
- data/test/test_roo.rb +392 -2292
- metadata +153 -296
- data/CHANGELOG +0 -412
- data/Gemfile.lock +0 -78
- data/README.markdown +0 -126
- data/VERSION +0 -1
- data/lib/roo/excel.rb +0 -355
- data/lib/roo/excel2003xml.rb +0 -300
- data/lib/roo/google.rb +0 -292
- data/lib/roo/openoffice.rb +0 -496
- data/lib/roo/roo_rails_helper.rb +0 -83
- data/lib/roo/worksheet.rb +0 -18
- data/scripts/txt2html +0 -67
- data/spec/lib/roo/excel2003xml_spec.rb +0 -15
- data/spec/lib/roo/excel_spec.rb +0 -17
- data/spec/lib/roo/google_spec.rb +0 -64
- data/test/files/1900_base.xls +0 -0
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xls +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xls +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund.xml +0 -62518
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/bad_excel_date.xls +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xls +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bbu.xml +0 -152
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xls +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/boolean.xml +0 -112
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xls +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/borders.xml +0 -144
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/bug-row-column-fixnum-float.xls +0 -0
- data/test/files/bug-row-column-fixnum-float.xml +0 -127
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xls +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xls +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/datetime.xml +0 -142
- data/test/files/datetime_floatconv.xls +0 -0
- data/test/files/datetime_floatconv.xml +0 -148
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xls +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/emptysheets.xml +0 -105
- data/test/files/excel2003.xml +0 -21140
- data/test/files/false_encoding.xls +0 -0
- data/test/files/false_encoding.xml +0 -132
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xls +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula.xml +0 -134
- data/test/files/formula_parse_error.xls +0 -0
- data/test/files/formula_parse_error.xml +0 -1833
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.xls +0 -0
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/matrix.xls +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xls +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xls +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1.xml +0 -312
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xls +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/only_one_sheet.xml +0 -67
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xls +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/paragraph.xml +0 -127
- data/test/files/prova.xls +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xls +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet.xml +0 -225
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/simple_spreadsheet_from_italo.xls +0 -0
- data/test/files/simple_spreadsheet_from_italo.xml +0 -242
- data/test/files/so_datetime.csv +0 -7
- data/test/files/style.ods +0 -0
- data/test/files/style.xls +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/style.xml +0 -154
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xls +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/time-test.xml +0 -131
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_excelx.xls +0 -0
- data/test/files/type_openoffice.xls +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xls +0 -0
- data/test/files/whitespace.xlsx +0 -0
- data/test/files/whitespace.xml +0 -184
- data/test/rm_sub_test.rb +0 -12
- data/test/rm_test.rb +0 -7
- data/test/test_generic_spreadsheet.rb +0 -259
- data/website/index.html +0 -385
- data/website/index.txt +0 -423
- data/website/javascripts/rounded_corners_lite.inc.js +0 -285
- data/website/stylesheets/screen.css +0 -130
- data/website/template.rhtml +0 -48
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'forwardable'
|
2
|
+
module Roo
|
3
|
+
class Excelx
|
4
|
+
class Sheet
|
5
|
+
extend Forwardable
|
6
|
+
|
7
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
|
8
|
+
|
9
|
+
attr_reader :images
|
10
|
+
|
11
|
+
def initialize(name, shared, sheet_index, options = {})
|
12
|
+
@name = name
|
13
|
+
@shared = shared
|
14
|
+
@sheet_index = sheet_index
|
15
|
+
@images = Images.new(image_rels[sheet_index]).list
|
16
|
+
@rels = Relationships.new(rels_files[sheet_index])
|
17
|
+
@comments = Comments.new(comments_files[sheet_index])
|
18
|
+
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
|
19
|
+
end
|
20
|
+
|
21
|
+
def cells
|
22
|
+
@cells ||= @sheet.cells(@rels)
|
23
|
+
end
|
24
|
+
|
25
|
+
def present_cells
|
26
|
+
@present_cells ||= begin
|
27
|
+
warn %{
|
28
|
+
[DEPRECATION] present_cells is deprecated. Alternate:
|
29
|
+
with activesupport => cells[key].presence
|
30
|
+
without activesupport => cells[key]&.presence
|
31
|
+
}
|
32
|
+
cells.select { |_, cell| cell&.presence }
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
# Yield each row as array of Excelx::Cell objects
|
37
|
+
# accepts options max_rows (int) (offset by 1 for header),
|
38
|
+
# pad_cells (boolean) and offset (int)
|
39
|
+
def each_row(options = {}, &block)
|
40
|
+
row_count = 0
|
41
|
+
options[:offset] ||= 0
|
42
|
+
@sheet.each_row_streaming do |row|
|
43
|
+
break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
|
44
|
+
if block_given? && !(options[:offset] && row_count < options[:offset])
|
45
|
+
block.call(cells_for_row_element(row, options))
|
46
|
+
end
|
47
|
+
row_count += 1
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def row(row_number)
|
52
|
+
first_column.upto(last_column).map do |col|
|
53
|
+
cells[[row_number, col]]&.value
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def column(col_number)
|
58
|
+
first_row.upto(last_row).map do |row|
|
59
|
+
cells[[row, col_number]]&.value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# returns the number of the first non-empty row
|
64
|
+
def first_row
|
65
|
+
@first_row ||= first_last_row_col[:first_row]
|
66
|
+
end
|
67
|
+
|
68
|
+
def last_row
|
69
|
+
@last_row ||= first_last_row_col[:last_row]
|
70
|
+
end
|
71
|
+
|
72
|
+
# returns the number of the first non-empty column
|
73
|
+
def first_column
|
74
|
+
@first_column ||= first_last_row_col[:first_column]
|
75
|
+
end
|
76
|
+
|
77
|
+
# returns the number of the last non-empty column
|
78
|
+
def last_column
|
79
|
+
@last_column ||= first_last_row_col[:last_column]
|
80
|
+
end
|
81
|
+
|
82
|
+
def excelx_format(key)
|
83
|
+
cell = cells[key]
|
84
|
+
styles.style_format(cell.style).to_s if cell
|
85
|
+
end
|
86
|
+
|
87
|
+
def hyperlinks
|
88
|
+
@hyperlinks ||= @sheet.hyperlinks(@rels)
|
89
|
+
end
|
90
|
+
|
91
|
+
def comments
|
92
|
+
@comments.comments
|
93
|
+
end
|
94
|
+
|
95
|
+
def dimensions
|
96
|
+
@sheet.dimensions
|
97
|
+
end
|
98
|
+
|
99
|
+
private
|
100
|
+
|
101
|
+
# Take an xml row and return an array of Excelx::Cell objects
|
102
|
+
# optionally pad array to header width(assumed 1st row).
|
103
|
+
# takes option pad_cells (boolean) defaults false
|
104
|
+
def cells_for_row_element(row_element, options = {})
|
105
|
+
return [] unless row_element
|
106
|
+
cell_col = 0
|
107
|
+
cells = []
|
108
|
+
@sheet.each_cell(row_element) do |cell|
|
109
|
+
cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
|
110
|
+
cells << cell
|
111
|
+
cell_col = cell.coordinate.column
|
112
|
+
end
|
113
|
+
cells
|
114
|
+
end
|
115
|
+
|
116
|
+
def pad_cells(cell, last_column)
|
117
|
+
pad = []
|
118
|
+
(cell.coordinate.column - 1 - last_column).times { pad << nil }
|
119
|
+
pad
|
120
|
+
end
|
121
|
+
|
122
|
+
def first_last_row_col
|
123
|
+
@first_last_row_col ||= begin
|
124
|
+
first_row = last_row = first_col = last_col = nil
|
125
|
+
|
126
|
+
cells.each do |(row, col), cell|
|
127
|
+
next unless cell&.presence
|
128
|
+
first_row ||= row
|
129
|
+
last_row ||= row
|
130
|
+
first_col ||= col
|
131
|
+
last_col ||= col
|
132
|
+
|
133
|
+
if row > last_row
|
134
|
+
last_row = row
|
135
|
+
elsif row < first_row
|
136
|
+
first_row = row
|
137
|
+
end
|
138
|
+
|
139
|
+
if col > last_col
|
140
|
+
last_col = col
|
141
|
+
elsif col < first_col
|
142
|
+
first_col = col
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
{first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,257 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'forwardable'
|
4
|
+
require 'roo/excelx/extractor'
|
5
|
+
|
6
|
+
module Roo
|
7
|
+
class Excelx
|
8
|
+
class SheetDoc < Excelx::Extractor
|
9
|
+
extend Forwardable
|
10
|
+
delegate [:workbook] => :@shared
|
11
|
+
|
12
|
+
def initialize(path, relationships, shared, options = {})
|
13
|
+
super(path)
|
14
|
+
@shared = shared
|
15
|
+
@options = options
|
16
|
+
@relationships = relationships
|
17
|
+
end
|
18
|
+
|
19
|
+
def cells(relationships)
|
20
|
+
@cells ||= extract_cells(relationships)
|
21
|
+
end
|
22
|
+
|
23
|
+
def hyperlinks(relationships)
|
24
|
+
# If you're sure you're not going to need this hyperlinks you can discard it
|
25
|
+
@hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink")
|
26
|
+
{}
|
27
|
+
else
|
28
|
+
extract_hyperlinks(relationships)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
# Get the dimensions for the sheet.
|
33
|
+
# This is the upper bound of cells that might
|
34
|
+
# be parsed. (the document may be sparse so cell count is only upper bound)
|
35
|
+
def dimensions
|
36
|
+
@dimensions ||= extract_dimensions
|
37
|
+
end
|
38
|
+
|
39
|
+
# Yield each row xml element to caller
|
40
|
+
def each_row_streaming(&block)
|
41
|
+
Roo::Utils.each_element(@path, 'row', &block)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Yield each cell as Excelx::Cell to caller for given
|
45
|
+
# row xml
|
46
|
+
def each_cell(row_xml)
|
47
|
+
return [] unless row_xml
|
48
|
+
row_xml.children.each do |cell_element|
|
49
|
+
coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
|
50
|
+
hyperlinks = hyperlinks(@relationships)[coordinate]
|
51
|
+
|
52
|
+
yield cell_from_xml(cell_element, hyperlinks, coordinate)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
private
|
57
|
+
|
58
|
+
def cell_value_type(type, format)
|
59
|
+
case type
|
60
|
+
when 's'
|
61
|
+
:shared
|
62
|
+
when 'b'
|
63
|
+
:boolean
|
64
|
+
when 'str'
|
65
|
+
:string
|
66
|
+
when 'inlineStr'
|
67
|
+
:inlinestr
|
68
|
+
else
|
69
|
+
Excelx::Format.to_type(format)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
# Internal: Creates a cell based on an XML clell..
|
74
|
+
#
|
75
|
+
# cell_xml - a Nokogiri::XML::Element. e.g.
|
76
|
+
# <c r="A5" s="2">
|
77
|
+
# <v>22606</v>
|
78
|
+
# </c>
|
79
|
+
# hyperlink - a String for the hyperlink for the cell or nil when no
|
80
|
+
# hyperlink is present.
|
81
|
+
# coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
|
82
|
+
# or nil to extract coordinate from cell_xml.
|
83
|
+
# empty_cell - an Optional Boolean value.
|
84
|
+
#
|
85
|
+
# Examples
|
86
|
+
#
|
87
|
+
# cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
|
88
|
+
# # => <Excelx::Cell::String>
|
89
|
+
#
|
90
|
+
# Returns a type of <Excelx::Cell>.
|
91
|
+
def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
|
92
|
+
coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
|
93
|
+
cell_xml_children = cell_xml.children
|
94
|
+
return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
|
95
|
+
|
96
|
+
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
|
97
|
+
# This works by coincidence because Format[0] is General.
|
98
|
+
style = cell_xml["s"].to_i
|
99
|
+
formula = nil
|
100
|
+
|
101
|
+
cell_xml_children.each do |cell|
|
102
|
+
case cell.name
|
103
|
+
when 'is'
|
104
|
+
content = cell.search('t').map(&:content).join
|
105
|
+
unless content.empty?
|
106
|
+
return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
|
107
|
+
end
|
108
|
+
when 'f'
|
109
|
+
formula = cell.content
|
110
|
+
when 'v'
|
111
|
+
format = style_format(style)
|
112
|
+
value_type = cell_value_type(cell_xml["t"], format)
|
113
|
+
|
114
|
+
return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
|
118
|
+
create_empty_cell(coordinate, empty_cell)
|
119
|
+
end
|
120
|
+
|
121
|
+
def create_empty_cell(coordinate, empty_cell)
|
122
|
+
if empty_cell
|
123
|
+
Excelx::Cell::Empty.new(coordinate)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
|
128
|
+
# NOTE: format.to_s can replace excelx_type as an argument for
|
129
|
+
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
|
130
|
+
# it will break some brittle tests.
|
131
|
+
excelx_type = [:numeric_or_formula, format.to_s]
|
132
|
+
|
133
|
+
# NOTE: There are only a few situations where value != cell.content
|
134
|
+
# 1. when a sharedString is used. value = sharedString;
|
135
|
+
# cell.content = id of sharedString
|
136
|
+
# 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
|
137
|
+
# But a boolean cell should use TRUE|FALSE as the formatted value
|
138
|
+
# and use a Boolean for it's value. Using a Boolean value breaks
|
139
|
+
# Roo::Base#to_csv.
|
140
|
+
# 3. formula
|
141
|
+
case value_type
|
142
|
+
when :shared
|
143
|
+
cell_content = cell.content.to_i
|
144
|
+
value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
|
145
|
+
Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
|
146
|
+
when :boolean, :string
|
147
|
+
value = cell.content
|
148
|
+
Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
|
149
|
+
when :time, :datetime
|
150
|
+
cell_content = cell.content.to_f
|
151
|
+
# NOTE: A date will be a whole number. A time will have be > 1. And
|
152
|
+
# in general, a datetime will have decimals. But if the cell is
|
153
|
+
# using a custom format, it's possible to be interpreted incorrectly.
|
154
|
+
# cell_content.to_i == cell_content && standard_style?=> :date
|
155
|
+
#
|
156
|
+
# Should check to see if the format is standard or not. If it's a
|
157
|
+
# standard format, than it's a date, otherwise, it is a datetime.
|
158
|
+
# @styles.standard_style?(style_id)
|
159
|
+
# STANDARD_STYLES.keys.include?(style_id.to_i)
|
160
|
+
cell_type = if cell_content < 1.0
|
161
|
+
:time
|
162
|
+
elsif (cell_content - cell_content.floor).abs > 0.000001
|
163
|
+
:datetime
|
164
|
+
else
|
165
|
+
:date
|
166
|
+
end
|
167
|
+
base_value = cell_type == :date ? base_date : base_timestamp
|
168
|
+
Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
|
169
|
+
when :date
|
170
|
+
Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
|
171
|
+
else
|
172
|
+
Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def extract_hyperlinks(relationships)
|
177
|
+
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
|
178
|
+
|
179
|
+
hyperlinks.each_with_object({}) do |hyperlink, hash|
|
180
|
+
if relationship = relationships[hyperlink['id']]
|
181
|
+
target_link = relationship['Target']
|
182
|
+
target_link += "##{hyperlink['location']}" if hyperlink['location']
|
183
|
+
|
184
|
+
Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord|
|
185
|
+
hash[coord] = target_link
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
190
|
+
|
191
|
+
def expand_merged_ranges(cells)
|
192
|
+
# Extract merged ranges from xml
|
193
|
+
merges = {}
|
194
|
+
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
|
195
|
+
src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
|
196
|
+
next unless cells[src]
|
197
|
+
for row in src[0]..dst[0] do
|
198
|
+
for col in src[1]..dst[1] do
|
199
|
+
next if row == src[0] && col == src[1]
|
200
|
+
merges[[row, col]] = src
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
# Duplicate value into all cells in merged range
|
205
|
+
merges.each do |dst, src|
|
206
|
+
cells[dst] = cells[src]
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def extract_cells(relationships)
|
211
|
+
extracted_cells = {}
|
212
|
+
empty_cell = @options[:empty_cell]
|
213
|
+
|
214
|
+
doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord|
|
215
|
+
row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord|
|
216
|
+
r = cell_xml['r']
|
217
|
+
coordinate =
|
218
|
+
if r.nil?
|
219
|
+
::Roo::Excelx::Coordinate.new(ycoord, xcoord)
|
220
|
+
else
|
221
|
+
::Roo::Utils.extract_coordinate(r)
|
222
|
+
end
|
223
|
+
|
224
|
+
cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
|
225
|
+
extracted_cells[coordinate] = cell if cell
|
226
|
+
end
|
227
|
+
end
|
228
|
+
|
229
|
+
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
|
230
|
+
|
231
|
+
extracted_cells
|
232
|
+
end
|
233
|
+
|
234
|
+
def extract_dimensions
|
235
|
+
Roo::Utils.each_element(@path, 'dimension') do |dimension|
|
236
|
+
return dimension["ref"]
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
def style_format(style)
|
241
|
+
@shared.styles.style_format(style)
|
242
|
+
end
|
243
|
+
|
244
|
+
def base_date
|
245
|
+
@shared.base_date
|
246
|
+
end
|
247
|
+
|
248
|
+
def base_timestamp
|
249
|
+
@shared.base_timestamp
|
250
|
+
end
|
251
|
+
|
252
|
+
def shared_strings
|
253
|
+
@shared.shared_strings
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'roo/font'
|
2
|
+
require 'roo/excelx/extractor'
|
3
|
+
|
4
|
+
module Roo
|
5
|
+
class Excelx
|
6
|
+
class Styles < Excelx::Extractor
|
7
|
+
# convert internal excelx attribute to a format
|
8
|
+
def style_format(style)
|
9
|
+
id = num_fmt_ids[style.to_i]
|
10
|
+
num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
|
11
|
+
end
|
12
|
+
|
13
|
+
def definitions
|
14
|
+
@definitions ||= extract_definitions
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def num_fmt_ids
|
20
|
+
@num_fmt_ids ||= extract_num_fmt_ids
|
21
|
+
end
|
22
|
+
|
23
|
+
def num_fmts
|
24
|
+
@num_fmts ||= extract_num_fmts
|
25
|
+
end
|
26
|
+
|
27
|
+
def fonts
|
28
|
+
@fonts ||= extract_fonts
|
29
|
+
end
|
30
|
+
|
31
|
+
def extract_definitions
|
32
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
33
|
+
xfs.children.map do |xf|
|
34
|
+
fonts[xf['fontId'].to_i]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def extract_fonts
|
40
|
+
doc.xpath('//fonts/font').map do |font_el|
|
41
|
+
Font.new.tap do |font|
|
42
|
+
font.bold = !font_el.xpath('./b').empty?
|
43
|
+
font.italic = !font_el.xpath('./i').empty?
|
44
|
+
font.underline = !font_el.xpath('./u').empty?
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def extract_num_fmt_ids
|
50
|
+
doc.xpath('//cellXfs').flat_map do |xfs|
|
51
|
+
xfs.children.map do |xf|
|
52
|
+
xf['numFmtId']
|
53
|
+
end
|
54
|
+
end.compact
|
55
|
+
end
|
56
|
+
|
57
|
+
def extract_num_fmts
|
58
|
+
doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
|
59
|
+
hash[num_fmt['numFmtId']] = num_fmt['formatCode']
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'roo/excelx/extractor'
|
2
|
+
|
3
|
+
module Roo
|
4
|
+
class Excelx
|
5
|
+
class Workbook < Excelx::Extractor
|
6
|
+
class Label
|
7
|
+
attr_reader :sheet, :row, :col, :name
|
8
|
+
|
9
|
+
def initialize(name, sheet, row, col)
|
10
|
+
@name = name
|
11
|
+
@sheet = sheet
|
12
|
+
@row = row.to_i
|
13
|
+
@col = ::Roo::Utils.letter_to_number(col)
|
14
|
+
end
|
15
|
+
|
16
|
+
def key
|
17
|
+
[@row, @col]
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def initialize(path)
|
22
|
+
super
|
23
|
+
fail ArgumentError, 'missing required workbook file' unless doc_exists?
|
24
|
+
end
|
25
|
+
|
26
|
+
def sheets
|
27
|
+
doc.xpath('//sheet')
|
28
|
+
end
|
29
|
+
|
30
|
+
# aka labels
|
31
|
+
def defined_names
|
32
|
+
doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
|
33
|
+
# "Sheet1!$C$5"
|
34
|
+
sheet, coordinates = defined_name.text.split('!$', 2)
|
35
|
+
next unless coordinates
|
36
|
+
col, row = coordinates.split('$')
|
37
|
+
name = defined_name['name']
|
38
|
+
hash[name] = Label.new(name, sheet, row, col)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def base_timestamp
|
43
|
+
@base_timestamp ||= base_date.to_datetime.to_time.to_i
|
44
|
+
end
|
45
|
+
|
46
|
+
def base_date
|
47
|
+
@base_date ||=
|
48
|
+
begin
|
49
|
+
# Default to 1900 (minus one day due to excel quirk) but use 1904 if
|
50
|
+
# it's set in the Workbook's workbookPr
|
51
|
+
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
52
|
+
result = Date.new(1899, 12, 30) # default
|
53
|
+
doc.css('workbookPr[date1904]').each do |workbookPr|
|
54
|
+
if workbookPr['date1904'] =~ /true|1/i
|
55
|
+
result = Date.new(1904, 01, 01)
|
56
|
+
break
|
57
|
+
end
|
58
|
+
end
|
59
|
+
result
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|