roo 1.13.2 → 2.10.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.codeclimate.yml +17 -0
- data/.github/issue_template.md +16 -0
- data/.github/pull_request_template.md +14 -0
- data/.github/workflows/pull-request.yml +15 -0
- data/.github/workflows/ruby.yml +34 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +186 -0
- data/.simplecov +4 -0
- data/CHANGELOG.md +702 -0
- data/Gemfile +18 -12
- data/Guardfile +23 -0
- data/LICENSE +5 -1
- data/README.md +328 -0
- data/Rakefile +23 -23
- data/examples/roo_soap_client.rb +28 -31
- data/examples/roo_soap_server.rb +4 -6
- data/examples/write_me.rb +9 -10
- data/lib/roo/base.rb +317 -504
- data/lib/roo/constants.rb +7 -0
- data/lib/roo/csv.rb +141 -113
- data/lib/roo/errors.rb +11 -0
- data/lib/roo/excelx/cell/base.rb +108 -0
- data/lib/roo/excelx/cell/boolean.rb +30 -0
- data/lib/roo/excelx/cell/date.rb +28 -0
- data/lib/roo/excelx/cell/datetime.rb +107 -0
- data/lib/roo/excelx/cell/empty.rb +20 -0
- data/lib/roo/excelx/cell/number.rb +99 -0
- data/lib/roo/excelx/cell/string.rb +19 -0
- data/lib/roo/excelx/cell/time.rb +44 -0
- data/lib/roo/excelx/cell.rb +110 -0
- data/lib/roo/excelx/comments.rb +55 -0
- data/lib/roo/excelx/coordinate.rb +19 -0
- data/lib/roo/excelx/extractor.rb +39 -0
- data/lib/roo/excelx/format.rb +71 -0
- data/lib/roo/excelx/images.rb +26 -0
- data/lib/roo/excelx/relationships.rb +33 -0
- data/lib/roo/excelx/shared.rb +39 -0
- data/lib/roo/excelx/shared_strings.rb +151 -0
- data/lib/roo/excelx/sheet.rb +151 -0
- data/lib/roo/excelx/sheet_doc.rb +257 -0
- data/lib/roo/excelx/styles.rb +64 -0
- data/lib/roo/excelx/workbook.rb +64 -0
- data/lib/roo/excelx.rb +407 -601
- data/lib/roo/font.rb +17 -0
- data/lib/roo/formatters/base.rb +15 -0
- data/lib/roo/formatters/csv.rb +84 -0
- data/lib/roo/formatters/matrix.rb +23 -0
- data/lib/roo/formatters/xml.rb +31 -0
- data/lib/roo/formatters/yaml.rb +40 -0
- data/lib/roo/helpers/default_attr_reader.rb +20 -0
- data/lib/roo/helpers/weak_instance_cache.rb +41 -0
- data/lib/roo/libre_office.rb +4 -0
- data/lib/roo/link.rb +34 -0
- data/lib/roo/open_office.rb +631 -0
- data/lib/roo/spreadsheet.rb +28 -23
- data/lib/roo/tempdir.rb +24 -0
- data/lib/roo/utils.rb +128 -0
- data/lib/roo/version.rb +3 -0
- data/lib/roo.rb +26 -24
- data/roo.gemspec +29 -203
- data/spec/helpers.rb +5 -0
- data/spec/lib/roo/base_spec.rb +291 -3
- data/spec/lib/roo/csv_spec.rb +38 -11
- data/spec/lib/roo/excelx/cell/time_spec.rb +15 -0
- data/spec/lib/roo/excelx/format_spec.rb +7 -6
- data/spec/lib/roo/excelx/relationships_spec.rb +43 -0
- data/spec/lib/roo/excelx/sheet_doc_spec.rb +11 -0
- data/spec/lib/roo/excelx_spec.rb +672 -11
- data/spec/lib/roo/libreoffice_spec.rb +16 -6
- data/spec/lib/roo/openoffice_spec.rb +30 -8
- data/spec/lib/roo/spreadsheet_spec.rb +60 -12
- data/spec/lib/roo/strict_spec.rb +43 -0
- data/spec/lib/roo/utils_spec.rb +119 -0
- data/spec/lib/roo/weak_instance_cache_spec.rb +92 -0
- data/spec/lib/roo_spec.rb +0 -0
- data/spec/spec_helper.rb +7 -6
- data/test/all_ss.rb +12 -11
- data/test/excelx/cell/test_attr_reader_default.rb +72 -0
- data/test/excelx/cell/test_base.rb +68 -0
- data/test/excelx/cell/test_boolean.rb +36 -0
- data/test/excelx/cell/test_date.rb +38 -0
- data/test/excelx/cell/test_datetime.rb +45 -0
- data/test/excelx/cell/test_empty.rb +18 -0
- data/test/excelx/cell/test_number.rb +90 -0
- data/test/excelx/cell/test_string.rb +48 -0
- data/test/excelx/cell/test_time.rb +30 -0
- data/test/excelx/test_coordinate.rb +51 -0
- data/test/formatters/test_csv.rb +136 -0
- data/test/formatters/test_matrix.rb +76 -0
- data/test/formatters/test_xml.rb +78 -0
- data/test/formatters/test_yaml.rb +20 -0
- data/test/helpers/test_accessing_files.rb +81 -0
- data/test/helpers/test_comments.rb +43 -0
- data/test/helpers/test_formulas.rb +9 -0
- data/test/helpers/test_labels.rb +103 -0
- data/test/helpers/test_sheets.rb +55 -0
- data/test/helpers/test_styles.rb +62 -0
- data/test/roo/test_base.rb +182 -0
- data/test/roo/test_csv.rb +88 -0
- data/test/roo/test_excelx.rb +360 -0
- data/test/roo/test_libre_office.rb +9 -0
- data/test/roo/test_open_office.rb +289 -0
- data/test/test_helper.rb +123 -59
- data/test/test_roo.rb +392 -2292
- metadata +153 -298
- data/CHANGELOG +0 -417
- data/Gemfile.lock +0 -78
- data/README.markdown +0 -126
- data/VERSION +0 -1
- data/lib/roo/excel.rb +0 -355
- data/lib/roo/excel2003xml.rb +0 -300
- data/lib/roo/google.rb +0 -292
- data/lib/roo/openoffice.rb +0 -496
- data/lib/roo/roo_rails_helper.rb +0 -83
- data/lib/roo/worksheet.rb +0 -18
- data/scripts/txt2html +0 -67
- data/spec/lib/roo/excel2003xml_spec.rb +0 -15
- data/spec/lib/roo/excel_spec.rb +0 -17
- data/spec/lib/roo/google_spec.rb +0 -64
- data/test/files/1900_base.xls +0 -0
- data/test/files/1900_base.xlsx +0 -0
- data/test/files/1904_base.xls +0 -0
- data/test/files/1904_base.xlsx +0 -0
- data/test/files/Bibelbund.csv +0 -3741
- data/test/files/Bibelbund.ods +0 -0
- data/test/files/Bibelbund.xls +0 -0
- data/test/files/Bibelbund.xlsx +0 -0
- data/test/files/Bibelbund.xml +0 -62518
- data/test/files/Bibelbund1.ods +0 -0
- data/test/files/Pfand_from_windows_phone.xlsx +0 -0
- data/test/files/bad_excel_date.xls +0 -0
- data/test/files/bbu.ods +0 -0
- data/test/files/bbu.xls +0 -0
- data/test/files/bbu.xlsx +0 -0
- data/test/files/bbu.xml +0 -152
- data/test/files/bode-v1.ods.zip +0 -0
- data/test/files/bode-v1.xls.zip +0 -0
- data/test/files/boolean.csv +0 -2
- data/test/files/boolean.ods +0 -0
- data/test/files/boolean.xls +0 -0
- data/test/files/boolean.xlsx +0 -0
- data/test/files/boolean.xml +0 -112
- data/test/files/borders.ods +0 -0
- data/test/files/borders.xls +0 -0
- data/test/files/borders.xlsx +0 -0
- data/test/files/borders.xml +0 -144
- data/test/files/bug-numbered-sheet-names.xlsx +0 -0
- data/test/files/bug-row-column-fixnum-float.xls +0 -0
- data/test/files/bug-row-column-fixnum-float.xml +0 -127
- data/test/files/comments.ods +0 -0
- data/test/files/comments.xls +0 -0
- data/test/files/comments.xlsx +0 -0
- data/test/files/csvtypes.csv +0 -1
- data/test/files/datetime.ods +0 -0
- data/test/files/datetime.xls +0 -0
- data/test/files/datetime.xlsx +0 -0
- data/test/files/datetime.xml +0 -142
- data/test/files/datetime_floatconv.xls +0 -0
- data/test/files/datetime_floatconv.xml +0 -148
- data/test/files/dreimalvier.ods +0 -0
- data/test/files/emptysheets.ods +0 -0
- data/test/files/emptysheets.xls +0 -0
- data/test/files/emptysheets.xlsx +0 -0
- data/test/files/emptysheets.xml +0 -105
- data/test/files/excel2003.xml +0 -21140
- data/test/files/false_encoding.xls +0 -0
- data/test/files/false_encoding.xml +0 -132
- data/test/files/file_item_error.xlsx +0 -0
- data/test/files/formula.ods +0 -0
- data/test/files/formula.xls +0 -0
- data/test/files/formula.xlsx +0 -0
- data/test/files/formula.xml +0 -134
- data/test/files/formula_parse_error.xls +0 -0
- data/test/files/formula_parse_error.xml +0 -1833
- data/test/files/formula_string_error.xlsx +0 -0
- data/test/files/html-escape.ods +0 -0
- data/test/files/link.xls +0 -0
- data/test/files/link.xlsx +0 -0
- data/test/files/matrix.ods +0 -0
- data/test/files/matrix.xls +0 -0
- data/test/files/named_cells.ods +0 -0
- data/test/files/named_cells.xls +0 -0
- data/test/files/named_cells.xlsx +0 -0
- data/test/files/no_spreadsheet_file.txt +0 -1
- data/test/files/numbers1.csv +0 -18
- data/test/files/numbers1.ods +0 -0
- data/test/files/numbers1.xls +0 -0
- data/test/files/numbers1.xlsx +0 -0
- data/test/files/numbers1.xml +0 -312
- data/test/files/numeric-link.xlsx +0 -0
- data/test/files/only_one_sheet.ods +0 -0
- data/test/files/only_one_sheet.xls +0 -0
- data/test/files/only_one_sheet.xlsx +0 -0
- data/test/files/only_one_sheet.xml +0 -67
- data/test/files/paragraph.ods +0 -0
- data/test/files/paragraph.xls +0 -0
- data/test/files/paragraph.xlsx +0 -0
- data/test/files/paragraph.xml +0 -127
- data/test/files/prova.xls +0 -0
- data/test/files/ric.ods +0 -0
- data/test/files/simple_spreadsheet.ods +0 -0
- data/test/files/simple_spreadsheet.xls +0 -0
- data/test/files/simple_spreadsheet.xlsx +0 -0
- data/test/files/simple_spreadsheet.xml +0 -225
- data/test/files/simple_spreadsheet_from_italo.ods +0 -0
- data/test/files/simple_spreadsheet_from_italo.xls +0 -0
- data/test/files/simple_spreadsheet_from_italo.xml +0 -242
- data/test/files/so_datetime.csv +0 -7
- data/test/files/style.ods +0 -0
- data/test/files/style.xls +0 -0
- data/test/files/style.xlsx +0 -0
- data/test/files/style.xml +0 -154
- data/test/files/time-test.csv +0 -2
- data/test/files/time-test.ods +0 -0
- data/test/files/time-test.xls +0 -0
- data/test/files/time-test.xlsx +0 -0
- data/test/files/time-test.xml +0 -131
- data/test/files/type_excel.ods +0 -0
- data/test/files/type_excel.xlsx +0 -0
- data/test/files/type_excelx.ods +0 -0
- data/test/files/type_excelx.xls +0 -0
- data/test/files/type_openoffice.xls +0 -0
- data/test/files/type_openoffice.xlsx +0 -0
- data/test/files/whitespace.ods +0 -0
- data/test/files/whitespace.xls +0 -0
- data/test/files/whitespace.xlsx +0 -0
- data/test/files/whitespace.xml +0 -184
- data/test/rm_sub_test.rb +0 -12
- data/test/rm_test.rb +0 -7
- data/test/test_generic_spreadsheet.rb +0 -259
- data/website/index.html +0 -385
- data/website/index.txt +0 -423
- data/website/javascripts/rounded_corners_lite.inc.js +0 -285
- data/website/stylesheets/screen.css +0 -130
- data/website/template.rhtml +0 -48
data/lib/roo/excelx.rb
CHANGED
@@ -1,674 +1,480 @@
|
|
1
|
-
require 'date'
|
2
1
|
require 'nokogiri'
|
3
|
-
require '
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
:datetime
|
52
|
-
else
|
53
|
-
:date
|
54
|
-
end
|
55
|
-
elsif format.include?('h') || format.include?('s')
|
56
|
-
:time
|
57
|
-
elsif format.include?('%')
|
58
|
-
:percentage
|
59
|
-
else
|
60
|
-
:float
|
2
|
+
require 'zip/filesystem'
|
3
|
+
require 'roo/link'
|
4
|
+
require 'roo/tempdir'
|
5
|
+
require 'roo/utils'
|
6
|
+
require 'forwardable'
|
7
|
+
require 'set'
|
8
|
+
|
9
|
+
module Roo
|
10
|
+
class Excelx < Roo::Base
|
11
|
+
extend Roo::Tempdir
|
12
|
+
extend Forwardable
|
13
|
+
|
14
|
+
ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
|
15
|
+
|
16
|
+
require 'roo/excelx/shared'
|
17
|
+
require 'roo/excelx/workbook'
|
18
|
+
require 'roo/excelx/shared_strings'
|
19
|
+
require 'roo/excelx/styles'
|
20
|
+
require 'roo/excelx/cell'
|
21
|
+
require 'roo/excelx/sheet'
|
22
|
+
require 'roo/excelx/relationships'
|
23
|
+
require 'roo/excelx/comments'
|
24
|
+
require 'roo/excelx/sheet_doc'
|
25
|
+
require 'roo/excelx/coordinate'
|
26
|
+
require 'roo/excelx/format'
|
27
|
+
require 'roo/excelx/images'
|
28
|
+
|
29
|
+
delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
|
30
|
+
ExceedsMaxError = Class.new(StandardError)
|
31
|
+
|
32
|
+
# initialization and opening of a spreadsheet file
|
33
|
+
# values for packed: :zip
|
34
|
+
# optional cell_max (int) parameter for early aborting attempts to parse
|
35
|
+
# enormous documents.
|
36
|
+
def initialize(filename_or_stream, options = {})
|
37
|
+
packed = options[:packed]
|
38
|
+
file_warning = options.fetch(:file_warning, :error)
|
39
|
+
cell_max = options.delete(:cell_max)
|
40
|
+
sheet_options = {}
|
41
|
+
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
|
42
|
+
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
|
43
|
+
sheet_options[:empty_cell] = (options[:empty_cell] || false)
|
44
|
+
shared_options = {}
|
45
|
+
|
46
|
+
shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
|
47
|
+
unless is_stream?(filename_or_stream)
|
48
|
+
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
|
49
|
+
basename = find_basename(filename_or_stream)
|
61
50
|
end
|
62
|
-
end
|
63
51
|
|
64
|
-
|
65
|
-
|
52
|
+
# NOTE: Create temp directory and allow Ruby to cleanup the temp directory
|
53
|
+
# when the object is garbage collected. Initially, the finalizer was
|
54
|
+
# created in the Roo::Tempdir module, but that led to a segfault
|
55
|
+
# when testing in Ruby 2.4.0.
|
56
|
+
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
|
57
|
+
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
|
66
58
|
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
filename = download_uri(filename, tmpdir) if uri?(filename)
|
82
|
-
filename = unzip(filename, tmpdir) if packed == :zip
|
83
|
-
@filename = filename
|
84
|
-
unless File.file?(@filename)
|
85
|
-
raise IOError, "file #{@filename} does not exist"
|
86
|
-
end
|
87
|
-
@comments_files = Array.new
|
88
|
-
@rels_files = Array.new
|
89
|
-
extract_content(tmpdir, @filename)
|
90
|
-
@workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
|
91
|
-
@shared_table = []
|
92
|
-
if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
|
93
|
-
@sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
|
94
|
-
read_shared_strings(@sharedstring_doc)
|
59
|
+
@shared = Shared.new(@tmpdir, shared_options)
|
60
|
+
@filename = local_filename(filename_or_stream, @tmpdir, packed)
|
61
|
+
process_zipfile(@filename || filename_or_stream)
|
62
|
+
|
63
|
+
@sheet_names = []
|
64
|
+
@sheets = []
|
65
|
+
@sheets_by_name = {}
|
66
|
+
|
67
|
+
workbook.sheets.each_with_index do |sheet, index|
|
68
|
+
next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
|
69
|
+
|
70
|
+
sheet_name = sheet['name']
|
71
|
+
@sheet_names << sheet_name
|
72
|
+
@sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
|
95
73
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
read_styles(@styles_doc)
|
74
|
+
|
75
|
+
if cell_max
|
76
|
+
cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
|
77
|
+
raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
|
101
78
|
end
|
102
|
-
@sheet_doc = load_xmls(@sheet_files)
|
103
|
-
@comments_doc = load_xmls(@comments_files)
|
104
|
-
@rels_doc = load_xmls(@rels_files)
|
105
|
-
end
|
106
|
-
super(filename, options)
|
107
|
-
@formula = Hash.new
|
108
|
-
@excelx_type = Hash.new
|
109
|
-
@excelx_value = Hash.new
|
110
|
-
@s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
|
111
|
-
@comment = Hash.new
|
112
|
-
@comments_read = Hash.new
|
113
|
-
@hyperlink = Hash.new
|
114
|
-
@hyperlinks_read = Hash.new
|
115
|
-
end
|
116
79
|
|
117
|
-
def method_missing(m,*args)
|
118
|
-
# is method name a label name
|
119
|
-
read_labels
|
120
|
-
if @label.has_key?(m.to_s)
|
121
|
-
sheet ||= @default_sheet
|
122
|
-
read_cells(sheet)
|
123
|
-
row,col = label(m.to_s)
|
124
|
-
cell(row,col)
|
125
|
-
else
|
126
|
-
# call super for methods like #a1
|
127
80
|
super
|
81
|
+
rescue
|
82
|
+
self.class.finalize_tempdirs(object_id)
|
83
|
+
raise
|
128
84
|
end
|
129
|
-
end
|
130
85
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
read_cells(sheet)
|
138
|
-
row,col = normalize(row,col)
|
139
|
-
if celltype(row,col,sheet) == :date
|
140
|
-
yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
|
141
|
-
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
|
142
|
-
elsif celltype(row,col,sheet) == :datetime
|
143
|
-
date_part,time_part = @cell[sheet][[row,col]].split(' ')
|
144
|
-
yyyy,mm,dd = date_part.split('-')
|
145
|
-
hh,mi,ss = time_part.split(':')
|
146
|
-
return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
|
147
|
-
end
|
148
|
-
@cell[sheet][[row,col]]
|
149
|
-
end
|
150
|
-
|
151
|
-
# Returns the formula at (row,col).
|
152
|
-
# Returns nil if there is no formula.
|
153
|
-
# The method #formula? checks if there is a formula.
|
154
|
-
def formula(row,col,sheet=nil)
|
155
|
-
sheet ||= @default_sheet
|
156
|
-
read_cells(sheet)
|
157
|
-
row,col = normalize(row,col)
|
158
|
-
@formula[sheet][[row,col]] && @formula[sheet][[row,col]]
|
159
|
-
end
|
160
|
-
alias_method :formula?, :formula
|
161
|
-
|
162
|
-
# returns each formula in the selected sheet as an array of elements
|
163
|
-
# [row, col, formula]
|
164
|
-
def formulas(sheet=nil)
|
165
|
-
sheet ||= @default_sheet
|
166
|
-
read_cells(sheet)
|
167
|
-
if @formula[sheet]
|
168
|
-
@formula[sheet].each.collect do |elem|
|
169
|
-
[elem[0][0], elem[0][1], elem[1]]
|
86
|
+
def method_missing(method, *args)
|
87
|
+
if (label = workbook.defined_names[method.to_s])
|
88
|
+
safe_send(sheet_for(label.sheet).cells[label.key], :value)
|
89
|
+
else
|
90
|
+
# call super for methods like #a1
|
91
|
+
super
|
170
92
|
end
|
171
|
-
else
|
172
|
-
[]
|
173
93
|
end
|
174
|
-
end
|
175
94
|
|
176
|
-
|
177
|
-
|
95
|
+
def sheets
|
96
|
+
@sheet_names
|
97
|
+
end
|
178
98
|
|
179
|
-
def
|
180
|
-
|
99
|
+
def sheet_for(sheet)
|
100
|
+
sheet ||= default_sheet
|
101
|
+
validate_sheet!(sheet)
|
102
|
+
@sheets_by_name[sheet] || @sheets[sheet]
|
181
103
|
end
|
182
104
|
|
183
|
-
def
|
184
|
-
|
105
|
+
def images(sheet = nil)
|
106
|
+
images_names = sheet_for(sheet).images.map(&:last)
|
107
|
+
images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
|
185
108
|
end
|
186
109
|
|
187
|
-
|
188
|
-
|
110
|
+
# Returns the content of a spreadsheet-cell.
|
111
|
+
# (1,1) is the upper left corner.
|
112
|
+
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
|
113
|
+
# cell at the first line and first row.
|
114
|
+
def cell(row, col, sheet = nil)
|
115
|
+
key = normalize(row, col)
|
116
|
+
safe_send(sheet_for(sheet).cells[key], :value)
|
189
117
|
end
|
190
|
-
end
|
191
118
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
read_cells(sheet)
|
196
|
-
row,col = normalize(row,col)
|
197
|
-
s_attribute = @s_attribute[sheet][[row,col]]
|
198
|
-
s_attribute ||= 0
|
199
|
-
s_attribute = s_attribute.to_i
|
200
|
-
@style_definitions[s_attribute]
|
201
|
-
end
|
119
|
+
def row(rownumber, sheet = nil)
|
120
|
+
sheet_for(sheet).row(rownumber)
|
121
|
+
end
|
202
122
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
# * :datetime
|
211
|
-
def celltype(row,col,sheet=nil)
|
212
|
-
sheet ||= @default_sheet
|
213
|
-
read_cells(sheet)
|
214
|
-
row,col = normalize(row,col)
|
215
|
-
if @formula[sheet][[row,col]]
|
216
|
-
return :formula
|
217
|
-
else
|
218
|
-
@cell_type[sheet][[row,col]]
|
123
|
+
# returns all values in this column as an array
|
124
|
+
# column numbers are 1,2,3,... like in the spreadsheet
|
125
|
+
def column(column_number, sheet = nil)
|
126
|
+
if column_number.is_a?(::String)
|
127
|
+
column_number = ::Roo::Utils.letter_to_number(column_number)
|
128
|
+
end
|
129
|
+
sheet_for(sheet).column(column_number)
|
219
130
|
end
|
220
|
-
end
|
221
131
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
def excelx_type(row,col,sheet=nil)
|
227
|
-
sheet ||= @default_sheet
|
228
|
-
read_cells(sheet)
|
229
|
-
row,col = normalize(row,col)
|
230
|
-
return @excelx_type[sheet][[row,col]]
|
231
|
-
end
|
132
|
+
# returns the number of the first non-empty row
|
133
|
+
def first_row(sheet = nil)
|
134
|
+
sheet_for(sheet).first_row
|
135
|
+
end
|
232
136
|
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
read_cells(sheet)
|
238
|
-
row,col = normalize(row,col)
|
239
|
-
return @excelx_value[sheet][[row,col]]
|
240
|
-
end
|
137
|
+
# returns the number of the last non-empty row
|
138
|
+
def last_row(sheet = nil)
|
139
|
+
sheet_for(sheet).last_row
|
140
|
+
end
|
241
141
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
row,col = normalize(row,col)
|
247
|
-
s = @s_attribute[sheet][[row,col]]
|
248
|
-
attribute2format(s).to_s
|
249
|
-
end
|
142
|
+
# returns the number of the first non-empty column
|
143
|
+
def first_column(sheet = nil)
|
144
|
+
sheet_for(sheet).first_column
|
145
|
+
end
|
250
146
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
sheet['name']
|
147
|
+
# returns the number of the last non-empty column
|
148
|
+
def last_column(sheet = nil)
|
149
|
+
sheet_for(sheet).last_column
|
255
150
|
end
|
256
|
-
end
|
257
151
|
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
152
|
+
# set a cell to a certain value
|
153
|
+
# (this will not be saved back to the spreadsheet file!)
|
154
|
+
def set(row, col, value, sheet = nil) #:nodoc:
|
155
|
+
key = normalize(row, col)
|
156
|
+
cell_type = cell_type_by_value(value)
|
157
|
+
sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
|
158
|
+
end
|
265
159
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
else
|
273
|
-
return @label[labelname][1].to_i,
|
274
|
-
Roo::Base.letter_to_number(@label[labelname][2]),
|
275
|
-
@label[labelname][0]
|
160
|
+
# Returns the formula at (row,col).
|
161
|
+
# Returns nil if there is no formula.
|
162
|
+
# The method #formula? checks if there is a formula.
|
163
|
+
def formula(row, col, sheet = nil)
|
164
|
+
key = normalize(row, col)
|
165
|
+
safe_send(sheet_for(sheet).cells[key], :formula)
|
276
166
|
end
|
277
|
-
end
|
278
167
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
read_labels
|
285
|
-
@label.map do |label|
|
286
|
-
[ label[0], # name
|
287
|
-
[ label[1][1].to_i, # row
|
288
|
-
Roo::Base.letter_to_number(label[1][2]), # column
|
289
|
-
label[1][0], # sheet
|
290
|
-
] ]
|
168
|
+
# Predicate methods really should return a boolean
|
169
|
+
# value. Hopefully no one was relying on the fact that this
|
170
|
+
# previously returned either nil/formula
|
171
|
+
def formula?(*args)
|
172
|
+
!!formula(*args)
|
291
173
|
end
|
292
|
-
end
|
293
174
|
|
294
|
-
|
295
|
-
|
296
|
-
|
175
|
+
# returns each formula in the selected sheet as an array of tuples in following format
|
176
|
+
# [[row, col, formula], [row, col, formula],...]
|
177
|
+
def formulas(sheet = nil)
|
178
|
+
sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
|
179
|
+
[x, y, cell.formula]
|
180
|
+
end
|
181
|
+
end
|
297
182
|
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
return nil unless @hyperlink[sheet]
|
305
|
-
@hyperlink[sheet][[row,col]]
|
306
|
-
end
|
183
|
+
# Given a cell, return the cell's style
|
184
|
+
def font(row, col, sheet = nil)
|
185
|
+
key = normalize(row, col)
|
186
|
+
definition_index = safe_send(sheet_for(sheet).cells[key], :style)
|
187
|
+
styles.definitions[definition_index] if definition_index
|
188
|
+
end
|
307
189
|
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
#
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
190
|
+
# returns the type of a cell:
|
191
|
+
# * :float
|
192
|
+
# * :string,
|
193
|
+
# * :date
|
194
|
+
# * :percentage
|
195
|
+
# * :formula
|
196
|
+
# * :time
|
197
|
+
# * :datetime
|
198
|
+
def celltype(row, col, sheet = nil)
|
199
|
+
key = normalize(row, col)
|
200
|
+
safe_send(sheet_for(sheet).cells[key], :type)
|
201
|
+
end
|
318
202
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
#
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
203
|
+
# returns the internal type of an excel cell
|
204
|
+
# * :numeric_or_formula
|
205
|
+
# * :string
|
206
|
+
# Note: this is only available within the Excelx class
|
207
|
+
def excelx_type(row, col, sheet = nil)
|
208
|
+
key = normalize(row, col)
|
209
|
+
safe_send(sheet_for(sheet).cells[key], :cell_type)
|
210
|
+
end
|
327
211
|
|
328
|
-
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
if @comment[sheet]
|
334
|
-
@comment[sheet].each.collect do |elem|
|
335
|
-
[elem[0][0],elem[0][1],elem[1]]
|
336
|
-
end
|
337
|
-
else
|
338
|
-
[]
|
212
|
+
# returns the internal value of an excelx cell
|
213
|
+
# Note: this is only available within the Excelx class
|
214
|
+
def excelx_value(row, col, sheet = nil)
|
215
|
+
key = normalize(row, col)
|
216
|
+
safe_send(sheet_for(sheet).cells[key], :cell_value)
|
339
217
|
end
|
340
|
-
end
|
341
218
|
|
342
|
-
|
219
|
+
# returns the internal value of an excelx cell
|
220
|
+
# Note: this is only available within the Excelx class
|
221
|
+
def formatted_value(row, col, sheet = nil)
|
222
|
+
key = normalize(row, col)
|
223
|
+
safe_send(sheet_for(sheet).cells[key], :formatted_value)
|
224
|
+
end
|
343
225
|
|
344
|
-
|
345
|
-
|
346
|
-
|
226
|
+
# returns the internal format of an excel cell
|
227
|
+
def excelx_format(row, col, sheet = nil)
|
228
|
+
key = normalize(row, col)
|
229
|
+
sheet_for(sheet).excelx_format(key)
|
347
230
|
end
|
348
|
-
end
|
349
231
|
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
@cell_type[sheet][key] = value_type
|
358
|
-
@formula[sheet] ||= {}
|
359
|
-
@formula[sheet][key] = formula if formula
|
360
|
-
@cell[sheet] ||= {}
|
361
|
-
@cell[sheet][key] =
|
362
|
-
case @cell_type[sheet][key]
|
363
|
-
when :float
|
364
|
-
v.to_f
|
365
|
-
when :string
|
366
|
-
v
|
367
|
-
when :date
|
368
|
-
(base_date+v.to_i).strftime("%Y-%m-%d")
|
369
|
-
when :datetime
|
370
|
-
(base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
|
371
|
-
when :percentage
|
372
|
-
v.to_f
|
373
|
-
when :time
|
374
|
-
v.to_f*(24*60*60)
|
375
|
-
else
|
376
|
-
v
|
377
|
-
end
|
232
|
+
def empty?(row, col, sheet = nil)
|
233
|
+
sheet = sheet_for(sheet)
|
234
|
+
key = normalize(row, col)
|
235
|
+
cell = sheet.cells[key]
|
236
|
+
!cell || cell.empty? ||
|
237
|
+
(row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
|
238
|
+
end
|
378
239
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
@s_attribute[sheet] ||= {}
|
385
|
-
@s_attribute[sheet][key] = s_attribute
|
386
|
-
end
|
240
|
+
# shows the internal representation of all cells
|
241
|
+
# for debugging purposes
|
242
|
+
def to_s(sheet = nil)
|
243
|
+
sheet_for(sheet).cells.inspect
|
244
|
+
end
|
387
245
|
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
# 2011-02-25 BEGIN
|
406
|
-
when 'str'
|
407
|
-
:string
|
408
|
-
# 2011-02-25 END
|
409
|
-
# 2011-09-15 BEGIN
|
410
|
-
when 'inlineStr'
|
411
|
-
:inlinestr
|
412
|
-
# 2011-09-15 END
|
413
|
-
else
|
414
|
-
format = attribute2format(s_attribute)
|
415
|
-
Format.to_type(format)
|
416
|
-
end
|
417
|
-
formula = nil
|
418
|
-
c.children.each do |cell|
|
419
|
-
case cell.name
|
420
|
-
when 'is'
|
421
|
-
cell.children.each do |is|
|
422
|
-
if is.name == 't'
|
423
|
-
inlinestr_content = is.content
|
424
|
-
value_type = :string
|
425
|
-
v = inlinestr_content
|
426
|
-
excelx_type = :string
|
427
|
-
y, x = Roo::Base.split_coordinate(c['r'])
|
428
|
-
excelx_value = inlinestr_content #cell.content
|
429
|
-
set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
|
430
|
-
end
|
431
|
-
end
|
432
|
-
when 'f'
|
433
|
-
formula = cell.content
|
434
|
-
when 'v'
|
435
|
-
if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
|
436
|
-
value_type =
|
437
|
-
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
|
438
|
-
:datetime
|
439
|
-
else
|
440
|
-
:date
|
441
|
-
end
|
442
|
-
end
|
443
|
-
excelx_type = [:numeric_or_formula,format.to_s]
|
444
|
-
excelx_value = cell.content
|
445
|
-
v =
|
446
|
-
case value_type
|
447
|
-
when :shared
|
448
|
-
value_type = :string
|
449
|
-
excelx_type = :string
|
450
|
-
@shared_table[cell.content.to_i]
|
451
|
-
when :boolean
|
452
|
-
(cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
|
453
|
-
when :date
|
454
|
-
cell.content
|
455
|
-
when :time
|
456
|
-
cell.content
|
457
|
-
when :datetime
|
458
|
-
cell.content
|
459
|
-
when :formula
|
460
|
-
cell.content.to_f #TODO: !!!!
|
461
|
-
when :string
|
462
|
-
excelx_type = :string
|
463
|
-
cell.content
|
464
|
-
else
|
465
|
-
value_type = :float
|
466
|
-
cell.content
|
467
|
-
end
|
468
|
-
y, x = Roo::Base.split_coordinate(c['r'])
|
469
|
-
set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
|
470
|
-
end
|
246
|
+
# returns the row,col values of the labelled cell
|
247
|
+
# (nil,nil) if label is not defined
|
248
|
+
def label(name)
|
249
|
+
labels = workbook.defined_names
|
250
|
+
return [nil, nil, nil] if labels.empty? || !labels.key?(name)
|
251
|
+
|
252
|
+
[labels[name].row, labels[name].col, labels[name].sheet]
|
253
|
+
end
|
254
|
+
|
255
|
+
# Returns an array which all labels. Each element is an array with
|
256
|
+
# [labelname, [row,col,sheetname]]
|
257
|
+
def labels
|
258
|
+
@labels ||= workbook.defined_names.map do |name, label|
|
259
|
+
[
|
260
|
+
name,
|
261
|
+
[label.row, label.col, label.sheet]
|
262
|
+
]
|
471
263
|
end
|
472
264
|
end
|
473
|
-
@cells_read[sheet] = true
|
474
|
-
# begin comments
|
475
|
-
=begin
|
476
|
-
Datei xl/comments1.xml
|
477
|
-
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
478
|
-
<comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
|
479
|
-
<authors>
|
480
|
-
<author />
|
481
|
-
</authors>
|
482
|
-
<commentList>
|
483
|
-
<comment ref="B4" authorId="0">
|
484
|
-
<text>
|
485
|
-
<r>
|
486
|
-
<rPr>
|
487
|
-
<sz val="10" />
|
488
|
-
<rFont val="Arial" />
|
489
|
-
<family val="2" />
|
490
|
-
</rPr>
|
491
|
-
<t>Kommentar fuer B4</t>
|
492
|
-
</r>
|
493
|
-
</text>
|
494
|
-
</comment>
|
495
|
-
<comment ref="B5" authorId="0">
|
496
|
-
<text>
|
497
|
-
<r>
|
498
|
-
<rPr>
|
499
|
-
<sz val="10" />
|
500
|
-
<rFont val="Arial" />
|
501
|
-
<family val="2" />
|
502
|
-
</rPr>
|
503
|
-
<t>Kommentar fuer B5</t>
|
504
|
-
</r>
|
505
|
-
</text>
|
506
|
-
</comment>
|
507
|
-
</commentList>
|
508
|
-
</comments>
|
509
|
-
=end
|
510
|
-
=begin
|
511
|
-
if @comments_doc[self.sheets.index(sheet)]
|
512
|
-
read_comments(sheet)
|
513
|
-
end
|
514
|
-
=end
|
515
|
-
#end comments
|
516
|
-
end
|
517
265
|
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
266
|
+
def hyperlink?(row, col, sheet = nil)
|
267
|
+
!!hyperlink(row, col, sheet)
|
268
|
+
end
|
269
|
+
|
270
|
+
# returns the hyperlink at (row/col)
|
271
|
+
# nil if there is no hyperlink
|
272
|
+
def hyperlink(row, col, sheet = nil)
|
273
|
+
key = normalize(row, col)
|
274
|
+
sheet_for(sheet).hyperlinks[key]
|
275
|
+
end
|
276
|
+
|
277
|
+
# returns the comment at (row/col)
|
278
|
+
# nil if there is no comment
|
279
|
+
def comment(row, col, sheet = nil)
|
280
|
+
key = normalize(row, col)
|
281
|
+
sheet_for(sheet).comments[key]
|
282
|
+
end
|
283
|
+
|
284
|
+
# true, if there is a comment
|
285
|
+
def comment?(row, col, sheet = nil)
|
286
|
+
!!comment(row, col, sheet)
|
287
|
+
end
|
288
|
+
|
289
|
+
def comments(sheet = nil)
|
290
|
+
sheet_for(sheet).comments.map do |(x, y), comment|
|
291
|
+
[x, y, comment]
|
530
292
|
end
|
531
293
|
end
|
532
|
-
@comments_read[sheet] = true
|
533
|
-
end
|
534
294
|
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
end]
|
544
|
-
@sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
|
545
|
-
if rel_element = rels[h.attribute('id').text]
|
546
|
-
row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
|
547
|
-
@hyperlink[sheet] ||= {}
|
548
|
-
@hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
|
549
|
-
end
|
295
|
+
# Yield an array of Excelx::Cell
|
296
|
+
# Takes options for sheet, pad_cells, and max_rows
|
297
|
+
def each_row_streaming(options = {})
|
298
|
+
sheet = sheet_for(options.delete(:sheet))
|
299
|
+
if block_given?
|
300
|
+
sheet.each_row(options) { |row| yield row }
|
301
|
+
else
|
302
|
+
sheet.to_enum(:each_row, options)
|
550
303
|
end
|
551
304
|
end
|
552
|
-
@hyperlinks_read[sheet] = true
|
553
|
-
end
|
554
305
|
|
555
|
-
|
556
|
-
@label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
|
557
|
-
# "Sheet1!$C$5"
|
558
|
-
sheet, coordinates = defined_name.text.split('!$', 2)
|
559
|
-
col,row = coordinates.split('$')
|
560
|
-
[defined_name['name'], [sheet,row,col]]
|
561
|
-
end]
|
562
|
-
end
|
306
|
+
private
|
563
307
|
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
Roo::ZipFile.open(zipfilename) {|zf|
|
568
|
-
zf.entries.each {|entry|
|
569
|
-
entry_name = entry.to_s.downcase
|
308
|
+
def clean_sheet(sheet)
|
309
|
+
@sheets_by_name[sheet].cells.each_pair do |coord, value|
|
310
|
+
next unless value.value.is_a?(::String)
|
570
311
|
|
571
|
-
|
572
|
-
|
573
|
-
"#{tmpdir}/roo_workbook.xml"
|
574
|
-
elsif entry_name.end_with?('sharedstrings.xml')
|
575
|
-
"#{tmpdir}/roo_sharedStrings.xml"
|
576
|
-
elsif entry_name.end_with?('styles.xml')
|
577
|
-
"#{tmpdir}/roo_styles.xml"
|
578
|
-
elsif entry_name =~ /sheet([0-9]+).xml$/
|
579
|
-
nr = $1
|
580
|
-
@sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
|
581
|
-
elsif entry_name =~ /comments([0-9]+).xml$/
|
582
|
-
nr = $1
|
583
|
-
@comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
|
584
|
-
elsif entry_name =~ /sheet([0-9]+).xml.rels$/
|
585
|
-
nr = $1
|
586
|
-
@rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
|
587
|
-
end
|
588
|
-
if path
|
589
|
-
extract_file(zip, entry, path)
|
590
|
-
end
|
591
|
-
}
|
592
|
-
}
|
593
|
-
end
|
312
|
+
@sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
|
313
|
+
end
|
594
314
|
|
595
|
-
|
596
|
-
|
597
|
-
f << source_zip.read(entry)
|
598
|
-
}
|
599
|
-
end
|
315
|
+
@cleaned[sheet] = true
|
316
|
+
end
|
600
317
|
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
318
|
+
# Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
|
319
|
+
# documents require a workbook.xml file, so a if the file is missing
|
320
|
+
# it is not a valid xlsx file. In these cases, an ArgumentError is
|
321
|
+
# raised.
|
322
|
+
#
|
323
|
+
# wb - a Zip::Entry for the workbook.xml file.
|
324
|
+
# path - A String for Zip::Entry's destination path.
|
325
|
+
#
|
326
|
+
# Examples
|
327
|
+
#
|
328
|
+
# extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
|
329
|
+
# # => ["rId1", "rId2", "rId3"]
|
330
|
+
#
|
331
|
+
# Returns an Array of Strings.
|
332
|
+
def extract_worksheet_ids(entries, path)
|
333
|
+
wb = entries.find { |e| e.name[/workbook.xml$/] }
|
334
|
+
fail ArgumentError 'missing required workbook file' if wb.nil?
|
335
|
+
|
336
|
+
wb.extract(path)
|
337
|
+
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
338
|
+
workbook_doc.xpath('//sheet').map { |s| s['id'] }
|
605
339
|
end
|
606
|
-
end
|
607
340
|
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
341
|
+
# Internal
|
342
|
+
#
|
343
|
+
# wb_rels - A Zip::Entry for the workbook.xml.rels file.
|
344
|
+
# path - A String for the Zip::Entry's destination path.
|
345
|
+
#
|
346
|
+
# Examples
|
347
|
+
#
|
348
|
+
# extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
|
349
|
+
# # => {
|
350
|
+
# "rId1"=>"worksheets/sheet1.xml",
|
351
|
+
# "rId2"=>"worksheets/sheet2.xml",
|
352
|
+
# "rId3"=>"worksheets/sheet3.xml"
|
353
|
+
# }
|
354
|
+
#
|
355
|
+
# Returns a Hash.
|
356
|
+
def extract_worksheet_rels(entries, path)
|
357
|
+
wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
|
358
|
+
fail ArgumentError 'missing required workbook file' if wb_rels.nil?
|
359
|
+
|
360
|
+
wb_rels.extract(path)
|
361
|
+
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
|
362
|
+
|
363
|
+
relationships = rels_doc.xpath('//Relationship').select do |relationship|
|
364
|
+
worksheet_types.include? relationship['Type']
|
365
|
+
end
|
366
|
+
|
367
|
+
relationships.each_with_object({}) do |relationship, hash|
|
368
|
+
hash[relationship['Id']] = relationship['Target']
|
623
369
|
end
|
624
|
-
@shared_table << shared_table_entry
|
625
370
|
end
|
626
|
-
end
|
627
371
|
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
font.italic = !font_el.xpath('./xmlns:i').empty?
|
639
|
-
font.underline = !font_el.xpath('./xmlns:u').empty?
|
372
|
+
# Extracts the sheets in order, but it will ignore sheets that are not
|
373
|
+
# worksheets.
|
374
|
+
def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
|
375
|
+
(sheet_ids & sheets.keys).each_with_index do |id, i|
|
376
|
+
name = sheets[id]
|
377
|
+
entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
|
378
|
+
path = "#{tmpdir}/roo_sheet#{i + 1}"
|
379
|
+
sheet_files << path
|
380
|
+
@sheet_files << path
|
381
|
+
entry.extract(path)
|
640
382
|
end
|
641
383
|
end
|
642
384
|
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
385
|
+
def extract_images(entries, tmpdir)
|
386
|
+
img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
|
387
|
+
img_entries.each do |entry|
|
388
|
+
path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
|
389
|
+
image_files << path
|
390
|
+
entry.extract(path)
|
647
391
|
end
|
648
392
|
end
|
649
|
-
end
|
650
393
|
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
@numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
|
655
|
-
end
|
394
|
+
# Extracts all needed files from the zip file
|
395
|
+
def process_zipfile(zipfilename_or_stream)
|
396
|
+
@sheet_files = []
|
656
397
|
|
657
|
-
|
658
|
-
|
659
|
-
|
398
|
+
unless is_stream?(zipfilename_or_stream)
|
399
|
+
zip_file = Zip::File.open(zipfilename_or_stream)
|
400
|
+
else
|
401
|
+
zip_file = Zip::CentralDirectory.new
|
402
|
+
zip_file.read_from_stream zipfilename_or_stream
|
403
|
+
end
|
404
|
+
|
405
|
+
process_zipfile_entries zip_file.to_a.sort_by(&:name)
|
406
|
+
end
|
407
|
+
|
408
|
+
def process_zipfile_entries(entries)
|
409
|
+
# NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
|
410
|
+
# are not in order. With Numbers 3.1, the first sheet is always
|
411
|
+
# sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
|
412
|
+
# independent of a worksheet's filename (i.e. sheet6.xml can be the
|
413
|
+
# first worksheet).
|
414
|
+
#
|
415
|
+
# workbook.xml lists the correct order of worksheets and
|
416
|
+
# workbook.xml.rels lists the filenames for those worksheets.
|
417
|
+
#
|
418
|
+
# workbook.xml:
|
419
|
+
# <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
|
420
|
+
# <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
|
421
|
+
# workbook.xml.rel:
|
422
|
+
# <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
423
|
+
# <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
|
424
|
+
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
|
425
|
+
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
|
426
|
+
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
|
427
|
+
extract_images(entries, @tmpdir)
|
428
|
+
|
429
|
+
entries.each do |entry|
|
430
|
+
path =
|
431
|
+
case entry.name.downcase
|
432
|
+
when /richdata/
|
433
|
+
# FIXME: Ignore richData as parsing is not implemented yet and can cause
|
434
|
+
# Zip::DestinationFileExistsError when including a second "styles.xml" entry
|
435
|
+
# see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
|
436
|
+
nil
|
437
|
+
when /sharedstrings.xml$/
|
438
|
+
"#{@tmpdir}/roo_sharedStrings.xml"
|
439
|
+
when /styles.xml$/
|
440
|
+
"#{@tmpdir}/roo_styles.xml"
|
441
|
+
when /comments([0-9]+).xml$/
|
442
|
+
# FIXME: Most of the time, The order of the comment files are the same
|
443
|
+
# the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
|
444
|
+
# In some situations, this isn't true. The true location of a
|
445
|
+
# sheet's comment file is in the sheet1.xml.rels file. SEE
|
446
|
+
# ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
|
447
|
+
nr = Regexp.last_match[1].to_i
|
448
|
+
comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
|
449
|
+
when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
|
450
|
+
# NOTE: Chart sheet relationship files were interfering with
|
451
|
+
# worksheets.
|
452
|
+
nil
|
453
|
+
when /sheet([0-9]+).xml.rels$/
|
454
|
+
# FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
|
455
|
+
# it also stores the location for sharedStrings, comments,
|
456
|
+
# drawings, etc.
|
457
|
+
nr = Regexp.last_match[1].to_i
|
458
|
+
rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
|
459
|
+
when /drawing([0-9]+).xml.rels$/
|
460
|
+
# Extracting drawing relationships to make images lists for each sheet
|
461
|
+
nr = Regexp.last_match[1].to_i
|
462
|
+
image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
|
463
|
+
end
|
660
464
|
|
661
|
-
|
662
|
-
# it's set in the Workbook's workbookPr
|
663
|
-
# http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
|
664
|
-
def read_base_date
|
665
|
-
base_date = Date.new(1899,12,30)
|
666
|
-
@workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
|
667
|
-
if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
|
668
|
-
base_date = Date.new(1904,01,01)
|
465
|
+
entry.extract(path) if path
|
669
466
|
end
|
670
467
|
end
|
671
|
-
base_date
|
672
|
-
end
|
673
468
|
|
674
|
-
|
469
|
+
def safe_send(object, method, *args)
|
470
|
+
object.send(method, *args) if object&.respond_to?(method)
|
471
|
+
end
|
472
|
+
|
473
|
+
def worksheet_types
|
474
|
+
[
|
475
|
+
'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
|
476
|
+
'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
|
477
|
+
]
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|