roo 1.13.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +7 -0
  3. data/.simplecov +4 -0
  4. data/.travis.yml +13 -0
  5. data/CHANGELOG.md +500 -0
  6. data/Gemfile +16 -10
  7. data/Guardfile +24 -0
  8. data/LICENSE +3 -1
  9. data/README.md +254 -0
  10. data/Rakefile +23 -23
  11. data/examples/roo_soap_client.rb +28 -31
  12. data/examples/roo_soap_server.rb +4 -6
  13. data/examples/write_me.rb +9 -10
  14. data/lib/roo/base.rb +303 -388
  15. data/lib/roo/csv.rb +120 -113
  16. data/lib/roo/excelx/comments.rb +24 -0
  17. data/lib/roo/excelx/extractor.rb +20 -0
  18. data/lib/roo/excelx/relationships.rb +26 -0
  19. data/lib/roo/excelx/shared_strings.rb +40 -0
  20. data/lib/roo/excelx/sheet_doc.rb +202 -0
  21. data/lib/roo/excelx/styles.rb +62 -0
  22. data/lib/roo/excelx/workbook.rb +59 -0
  23. data/lib/roo/excelx.rb +452 -484
  24. data/lib/roo/font.rb +17 -0
  25. data/lib/roo/libre_office.rb +5 -0
  26. data/lib/roo/link.rb +15 -0
  27. data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
  28. data/lib/roo/spreadsheet.rb +20 -23
  29. data/lib/roo/utils.rb +78 -0
  30. data/lib/roo/version.rb +3 -0
  31. data/lib/roo.rb +18 -24
  32. data/roo.gemspec +20 -204
  33. data/spec/lib/roo/base_spec.rb +1 -4
  34. data/spec/lib/roo/csv_spec.rb +21 -13
  35. data/spec/lib/roo/excelx/format_spec.rb +7 -6
  36. data/spec/lib/roo/excelx_spec.rb +388 -11
  37. data/spec/lib/roo/libreoffice_spec.rb +16 -6
  38. data/spec/lib/roo/openoffice_spec.rb +2 -8
  39. data/spec/lib/roo/spreadsheet_spec.rb +40 -12
  40. data/spec/lib/roo/utils_spec.rb +106 -0
  41. data/spec/spec_helper.rb +2 -1
  42. data/test/test_generic_spreadsheet.rb +19 -67
  43. data/test/test_helper.rb +9 -56
  44. data/test/test_roo.rb +252 -477
  45. metadata +63 -302
  46. data/CHANGELOG +0 -417
  47. data/Gemfile.lock +0 -78
  48. data/README.markdown +0 -126
  49. data/VERSION +0 -1
  50. data/lib/roo/excel.rb +0 -355
  51. data/lib/roo/excel2003xml.rb +0 -300
  52. data/lib/roo/google.rb +0 -292
  53. data/lib/roo/roo_rails_helper.rb +0 -83
  54. data/lib/roo/worksheet.rb +0 -18
  55. data/spec/lib/roo/excel2003xml_spec.rb +0 -15
  56. data/spec/lib/roo/excel_spec.rb +0 -17
  57. data/spec/lib/roo/google_spec.rb +0 -64
  58. data/test/files/1900_base.xls +0 -0
  59. data/test/files/1900_base.xlsx +0 -0
  60. data/test/files/1904_base.xls +0 -0
  61. data/test/files/1904_base.xlsx +0 -0
  62. data/test/files/Bibelbund.csv +0 -3741
  63. data/test/files/Bibelbund.ods +0 -0
  64. data/test/files/Bibelbund.xls +0 -0
  65. data/test/files/Bibelbund.xlsx +0 -0
  66. data/test/files/Bibelbund.xml +0 -62518
  67. data/test/files/Bibelbund1.ods +0 -0
  68. data/test/files/Pfand_from_windows_phone.xlsx +0 -0
  69. data/test/files/bad_excel_date.xls +0 -0
  70. data/test/files/bbu.ods +0 -0
  71. data/test/files/bbu.xls +0 -0
  72. data/test/files/bbu.xlsx +0 -0
  73. data/test/files/bbu.xml +0 -152
  74. data/test/files/bode-v1.ods.zip +0 -0
  75. data/test/files/bode-v1.xls.zip +0 -0
  76. data/test/files/boolean.csv +0 -2
  77. data/test/files/boolean.ods +0 -0
  78. data/test/files/boolean.xls +0 -0
  79. data/test/files/boolean.xlsx +0 -0
  80. data/test/files/boolean.xml +0 -112
  81. data/test/files/borders.ods +0 -0
  82. data/test/files/borders.xls +0 -0
  83. data/test/files/borders.xlsx +0 -0
  84. data/test/files/borders.xml +0 -144
  85. data/test/files/bug-numbered-sheet-names.xlsx +0 -0
  86. data/test/files/bug-row-column-fixnum-float.xls +0 -0
  87. data/test/files/bug-row-column-fixnum-float.xml +0 -127
  88. data/test/files/comments.ods +0 -0
  89. data/test/files/comments.xls +0 -0
  90. data/test/files/comments.xlsx +0 -0
  91. data/test/files/csvtypes.csv +0 -1
  92. data/test/files/datetime.ods +0 -0
  93. data/test/files/datetime.xls +0 -0
  94. data/test/files/datetime.xlsx +0 -0
  95. data/test/files/datetime.xml +0 -142
  96. data/test/files/datetime_floatconv.xls +0 -0
  97. data/test/files/datetime_floatconv.xml +0 -148
  98. data/test/files/dreimalvier.ods +0 -0
  99. data/test/files/emptysheets.ods +0 -0
  100. data/test/files/emptysheets.xls +0 -0
  101. data/test/files/emptysheets.xlsx +0 -0
  102. data/test/files/emptysheets.xml +0 -105
  103. data/test/files/excel2003.xml +0 -21140
  104. data/test/files/false_encoding.xls +0 -0
  105. data/test/files/false_encoding.xml +0 -132
  106. data/test/files/file_item_error.xlsx +0 -0
  107. data/test/files/formula.ods +0 -0
  108. data/test/files/formula.xls +0 -0
  109. data/test/files/formula.xlsx +0 -0
  110. data/test/files/formula.xml +0 -134
  111. data/test/files/formula_parse_error.xls +0 -0
  112. data/test/files/formula_parse_error.xml +0 -1833
  113. data/test/files/formula_string_error.xlsx +0 -0
  114. data/test/files/html-escape.ods +0 -0
  115. data/test/files/link.xls +0 -0
  116. data/test/files/link.xlsx +0 -0
  117. data/test/files/matrix.ods +0 -0
  118. data/test/files/matrix.xls +0 -0
  119. data/test/files/named_cells.ods +0 -0
  120. data/test/files/named_cells.xls +0 -0
  121. data/test/files/named_cells.xlsx +0 -0
  122. data/test/files/no_spreadsheet_file.txt +0 -1
  123. data/test/files/numbers1.csv +0 -18
  124. data/test/files/numbers1.ods +0 -0
  125. data/test/files/numbers1.xls +0 -0
  126. data/test/files/numbers1.xlsx +0 -0
  127. data/test/files/numbers1.xml +0 -312
  128. data/test/files/numeric-link.xlsx +0 -0
  129. data/test/files/only_one_sheet.ods +0 -0
  130. data/test/files/only_one_sheet.xls +0 -0
  131. data/test/files/only_one_sheet.xlsx +0 -0
  132. data/test/files/only_one_sheet.xml +0 -67
  133. data/test/files/paragraph.ods +0 -0
  134. data/test/files/paragraph.xls +0 -0
  135. data/test/files/paragraph.xlsx +0 -0
  136. data/test/files/paragraph.xml +0 -127
  137. data/test/files/prova.xls +0 -0
  138. data/test/files/ric.ods +0 -0
  139. data/test/files/simple_spreadsheet.ods +0 -0
  140. data/test/files/simple_spreadsheet.xls +0 -0
  141. data/test/files/simple_spreadsheet.xlsx +0 -0
  142. data/test/files/simple_spreadsheet.xml +0 -225
  143. data/test/files/simple_spreadsheet_from_italo.ods +0 -0
  144. data/test/files/simple_spreadsheet_from_italo.xls +0 -0
  145. data/test/files/simple_spreadsheet_from_italo.xml +0 -242
  146. data/test/files/so_datetime.csv +0 -7
  147. data/test/files/style.ods +0 -0
  148. data/test/files/style.xls +0 -0
  149. data/test/files/style.xlsx +0 -0
  150. data/test/files/style.xml +0 -154
  151. data/test/files/time-test.csv +0 -2
  152. data/test/files/time-test.ods +0 -0
  153. data/test/files/time-test.xls +0 -0
  154. data/test/files/time-test.xlsx +0 -0
  155. data/test/files/time-test.xml +0 -131
  156. data/test/files/type_excel.ods +0 -0
  157. data/test/files/type_excel.xlsx +0 -0
  158. data/test/files/type_excelx.ods +0 -0
  159. data/test/files/type_excelx.xls +0 -0
  160. data/test/files/type_openoffice.xls +0 -0
  161. data/test/files/type_openoffice.xlsx +0 -0
  162. data/test/files/whitespace.ods +0 -0
  163. data/test/files/whitespace.xls +0 -0
  164. data/test/files/whitespace.xlsx +0 -0
  165. data/test/files/whitespace.xml +0 -184
  166. data/test/rm_sub_test.rb +0 -12
  167. data/test/rm_test.rb +0 -7
  168. data/website/index.html +0 -385
  169. data/website/index.txt +0 -423
  170. data/website/javascripts/rounded_corners_lite.inc.js +0 -285
  171. data/website/stylesheets/screen.css +0 -130
  172. data/website/template.rhtml +0 -48
data/lib/roo/excelx.rb CHANGED
@@ -1,8 +1,18 @@
1
1
  require 'date'
2
2
  require 'nokogiri'
3
- require 'spreadsheet'
3
+ require 'roo/link'
4
+ require 'roo/utils'
5
+ require 'zip/filesystem'
4
6
 
5
7
  class Roo::Excelx < Roo::Base
8
+ autoload :Workbook, 'roo/excelx/workbook'
9
+ autoload :SharedStrings, 'roo/excelx/shared_strings'
10
+ autoload :Styles, 'roo/excelx/styles'
11
+
12
+ autoload :Relationships, 'roo/excelx/relationships'
13
+ autoload :Comments, 'roo/excelx/comments'
14
+ autoload :SheetDoc, 'roo/excelx/sheet_doc'
15
+
6
16
  module Format
7
17
  EXCEPTIONAL_FORMATS = {
8
18
  'h:mm am/pm' => :date,
@@ -46,7 +56,7 @@ class Roo::Excelx < Roo::Base
46
56
  type
47
57
  elsif format.include?('#')
48
58
  :float
49
- elsif format.include?('d') || format.include?('y')
59
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
50
60
  if format.include?('h') || format.include?('s')
51
61
  :datetime
52
62
  else
@@ -64,140 +74,314 @@ class Roo::Excelx < Roo::Base
64
74
  module_function :to_type
65
75
  end
66
76
 
67
- # initialization and opening of a spreadsheet file
68
- # values for packed: :zip
69
- def initialize(filename, options = {}, deprecated_file_warning = :error)
70
- if Hash === options
71
- packed = options[:packed]
72
- file_warning = options[:file_warning] || :error
73
- else
74
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
75
- packed = options
76
- file_warning = deprecated_file_warning
77
+ class Cell
78
+ attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
79
+ attr_writer :value
80
+
81
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
82
+ @type = type
83
+ @formula = formula
84
+ @base_date = base_date if [:date, :datetime].include?(@type)
85
+ @excelx_type = excelx_type
86
+ @excelx_value = excelx_value
87
+ @style = style
88
+ @value = type_cast_value(value)
89
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
90
+ @coordinate = coordinate
77
91
  end
78
92
 
79
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
80
- make_tmpdir do |tmpdir|
81
- filename = download_uri(filename, tmpdir) if uri?(filename)
82
- filename = unzip(filename, tmpdir) if packed == :zip
83
- @filename = filename
84
- unless File.file?(@filename)
85
- raise IOError, "file #{@filename} does not exist"
93
+ def type
94
+ if @formula
95
+ :formula
96
+ elsif @value.is_a?(Roo::Link)
97
+ :link
98
+ else
99
+ @type
100
+ end
101
+ end
102
+
103
+ class Coordinate
104
+ attr_accessor :row, :column
105
+
106
+ def initialize(row, column)
107
+ @row, @column = row, column
86
108
  end
87
- @comments_files = Array.new
88
- @rels_files = Array.new
89
- extract_content(tmpdir, @filename)
90
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
91
- @shared_table = []
92
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
93
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
94
- read_shared_strings(@sharedstring_doc)
109
+ end
110
+
111
+ private
112
+
113
+ def type_cast_value(value)
114
+ case @type
115
+ when :float, :percentage
116
+ value.to_f
117
+ when :date
118
+ yyyy,mm,dd = (@base_date+value.to_i).strftime("%Y-%m-%d").split('-')
119
+ Date.new(yyyy.to_i,mm.to_i,dd.to_i)
120
+ when :datetime
121
+ create_datetime_from((@base_date+value.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N"))
122
+ when :time
123
+ value.to_f*(24*60*60)
124
+ when :string
125
+ value
126
+ else
127
+ value
128
+ end
129
+ end
130
+
131
+ def create_datetime_from(datetime_string)
132
+ date_part,time_part = round_time_from(datetime_string).split(' ')
133
+ yyyy,mm,dd = date_part.split('-')
134
+ hh,mi,ss = time_part.split(':')
135
+ DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
136
+ end
137
+
138
+ def round_time_from(datetime_string)
139
+ date_part,time_part = datetime_string.split(' ')
140
+ yyyy,mm,dd = date_part.split('-')
141
+ hh,mi,ss = time_part.split(':')
142
+ Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S")
143
+ end
144
+ end
145
+
146
+ class Sheet
147
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
148
+ @name = name
149
+ @rels = Relationships.new(rels_path)
150
+ @comments = Comments.new(comments_path)
151
+ @styles = styles
152
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
153
+ end
154
+
155
+ def cells
156
+ @cells ||= @sheet.cells(@rels)
157
+ end
158
+
159
+ def present_cells
160
+ @present_cells ||= cells.select {|key, cell| cell && cell.value }
161
+ end
162
+
163
+ # Yield each row as array of Excelx::Cell objects
164
+ # accepts options max_rows (int) (offset by 1 for header)
165
+ # and pad_cells (boolean)
166
+ def each_row(options = {}, &block)
167
+ row_count = 0
168
+ @sheet.each_row_streaming do |row|
169
+ break if options[:max_rows] && row_count == options[:max_rows] + 1
170
+ block.call(cells_for_row_element(row, options)) if block_given?
171
+ row_count += 1
172
+ end
173
+ end
174
+
175
+ def row(row_number)
176
+ first_column.upto(last_column).map do |col|
177
+ cells[[row_number,col]]
178
+ end.map {|cell| cell && cell.value }
179
+ end
180
+
181
+ def column(col_number)
182
+ first_row.upto(last_row).map do |row|
183
+ cells[[row,col_number]]
184
+ end.map {|cell| cell && cell.value }
185
+ end
186
+
187
+ # returns the number of the first non-empty row
188
+ def first_row
189
+ @first_row ||= present_cells.keys.map {|row, _| row }.min
190
+ end
191
+
192
+ def last_row
193
+ @last_row ||= present_cells.keys.map {|row, _| row }.max
194
+ end
195
+
196
+ # returns the number of the first non-empty column
197
+ def first_column
198
+ @first_column ||= present_cells.keys.map {|_, col| col }.min
199
+ end
200
+
201
+ # returns the number of the last non-empty column
202
+ def last_column
203
+ @last_column ||= present_cells.keys.map {|_, col| col }.max
204
+ end
205
+
206
+ def excelx_format(key)
207
+ cell = cells[key]
208
+ @styles.style_format(cell.style).to_s if cell
209
+ end
210
+
211
+ def hyperlinks
212
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
213
+ end
214
+
215
+ def comments
216
+ @comments.comments
217
+ end
218
+
219
+ def dimensions
220
+ @sheet.dimensions
221
+ end
222
+
223
+ private
224
+
225
+ # Take an xml row and return an array of Excelx::Cell objects
226
+ # optionally pad array to header width(assumed 1st row).
227
+ # takes option pad_cells (boolean) defaults false
228
+ def cells_for_row_element(row_element, options = {})
229
+ return [] unless row_element
230
+ cell_col = 0
231
+ cells = []
232
+ @sheet.each_cell(row_element) do |cell|
233
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
234
+ cells << cell
235
+ cell_col = cell.coordinate.column
95
236
  end
96
- @styles_table = []
97
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
98
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
99
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
100
- read_styles(@styles_doc)
237
+ cells
238
+ end
239
+
240
+ def pad_cells(cell, last_column)
241
+ pad = []
242
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
243
+ pad
244
+ end
245
+ end
246
+
247
+ ExceedsMaxError = Class.new(StandardError)
248
+
249
+ # initialization and opening of a spreadsheet file
250
+ # values for packed: :zip
251
+ # optional cell_max (int) parameter for early aborting attempts to parse
252
+ # enormous documents.
253
+ def initialize(filename, options = {})
254
+ packed = options[:packed]
255
+ file_warning = options.fetch(:file_warning, :error)
256
+ cell_max = options.delete(:cell_max)
257
+ sheet_options = {}
258
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
259
+
260
+ file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
261
+
262
+ @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
263
+ @filename = local_filename(filename, @tmpdir, packed)
264
+ @comments_files = []
265
+ @rels_files = []
266
+ process_zipfile(@tmpdir, @filename)
267
+
268
+ @sheet_names = workbook.sheets.map do |sheet|
269
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
270
+ sheet['name']
101
271
  end
102
- @sheet_doc = load_xmls(@sheet_files)
103
- @comments_doc = load_xmls(@comments_files)
104
- @rels_doc = load_xmls(@rels_files)
105
- end
106
- super(filename, options)
107
- @formula = Hash.new
108
- @excelx_type = Hash.new
109
- @excelx_value = Hash.new
110
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
111
- @comment = Hash.new
112
- @comments_read = Hash.new
113
- @hyperlink = Hash.new
114
- @hyperlinks_read = Hash.new
115
- end
116
-
117
- def method_missing(m,*args)
118
- # is method name a label name
119
- read_labels
120
- if @label.has_key?(m.to_s)
121
- sheet ||= @default_sheet
122
- read_cells(sheet)
123
- row,col = label(m.to_s)
124
- cell(row,col)
272
+ end.compact
273
+ @sheets = []
274
+ @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
275
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
276
+ [sheet_name, @sheets[n]]
277
+ end]
278
+
279
+ if cell_max
280
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
281
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
282
+ end
283
+
284
+ super
285
+ end
286
+
287
+ def method_missing(method,*args)
288
+ if label = workbook.defined_names[method.to_s]
289
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
125
290
  else
126
291
  # call super for methods like #a1
127
292
  super
128
293
  end
129
294
  end
130
295
 
296
+ def sheets
297
+ @sheet_names
298
+ end
299
+
300
+ def sheet_for(sheet)
301
+ sheet ||= default_sheet
302
+ validate_sheet!(sheet)
303
+ @sheets_by_name[sheet]
304
+ end
305
+
131
306
  # Returns the content of a spreadsheet-cell.
132
307
  # (1,1) is the upper left corner.
133
308
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
134
309
  # cell at the first line and first row.
135
310
  def cell(row, col, sheet=nil)
136
- sheet ||= @default_sheet
137
- read_cells(sheet)
138
- row,col = normalize(row,col)
139
- if celltype(row,col,sheet) == :date
140
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
141
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
142
- elsif celltype(row,col,sheet) == :datetime
143
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
144
- yyyy,mm,dd = date_part.split('-')
145
- hh,mi,ss = time_part.split(':')
146
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
311
+ key = normalize(row,col)
312
+ safe_send(sheet_for(sheet).cells[key], :value)
313
+ end
314
+
315
+ def row(rownumber,sheet=nil)
316
+ sheet_for(sheet).row(rownumber)
317
+ end
318
+
319
+ # returns all values in this column as an array
320
+ # column numbers are 1,2,3,... like in the spreadsheet
321
+ def column(column_number,sheet=nil)
322
+ if column_number.is_a?(::String)
323
+ column_number = ::Roo::Utils.letter_to_number(column_number)
147
324
  end
148
- @cell[sheet][[row,col]]
325
+ sheet_for(sheet).column(column_number)
149
326
  end
150
327
 
328
+ # returns the number of the first non-empty row
329
+ def first_row(sheet=nil)
330
+ sheet_for(sheet).first_row
331
+ end
332
+
333
+ # returns the number of the last non-empty row
334
+ def last_row(sheet=nil)
335
+ sheet_for(sheet).last_row
336
+ end
337
+
338
+ # returns the number of the first non-empty column
339
+ def first_column(sheet=nil)
340
+ sheet_for(sheet).first_column
341
+ end
342
+
343
+ # returns the number of the last non-empty column
344
+ def last_column(sheet=nil)
345
+ sheet_for(sheet).last_column
346
+ end
347
+
348
+ # set a cell to a certain value
349
+ # (this will not be saved back to the spreadsheet file!)
350
+ def set(row,col,value, sheet = nil) #:nodoc:
351
+ key = normalize(row,col)
352
+ cell_type = cell_type_by_value(value)
353
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col))
354
+ end
355
+
356
+
151
357
  # Returns the formula at (row,col).
152
358
  # Returns nil if there is no formula.
153
359
  # The method #formula? checks if there is a formula.
154
360
  def formula(row,col,sheet=nil)
155
- sheet ||= @default_sheet
156
- read_cells(sheet)
157
- row,col = normalize(row,col)
158
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
361
+ key = normalize(row,col)
362
+ safe_send(sheet_for(sheet).cells[key], :formula)
159
363
  end
160
- alias_method :formula?, :formula
161
364
 
162
- # returns each formula in the selected sheet as an array of elements
163
- # [row, col, formula]
164
- def formulas(sheet=nil)
165
- sheet ||= @default_sheet
166
- read_cells(sheet)
167
- if @formula[sheet]
168
- @formula[sheet].each.collect do |elem|
169
- [elem[0][0], elem[0][1], elem[1]]
170
- end
171
- else
172
- []
173
- end
365
+ # Predicate methods really should return a boolean
366
+ # value. Hopefully no one was relying on the fact that this
367
+ # previously returned either nil/formula
368
+ def formula?(*args)
369
+ !!formula(*args)
174
370
  end
175
371
 
176
- class Font
177
- attr_accessor :bold, :italic, :underline
178
-
179
- def bold?
180
- @bold == true
181
- end
182
-
183
- def italic?
184
- @italic == true
185
- end
186
-
187
- def underline?
188
- @underline == true
372
+ # returns each formula in the selected sheet as an array of tuples in following format
373
+ # [[row, col, formula], [row, col, formula],...]
374
+ def formulas(sheet=nil)
375
+ sheet_for(sheet).cells.select {|_, cell| cell.formula }.map do |(x, y), cell|
376
+ [x, y, cell.formula]
189
377
  end
190
378
  end
191
379
 
192
380
  # Given a cell, return the cell's style
193
381
  def font(row, col, sheet=nil)
194
- sheet ||= @default_sheet
195
- read_cells(sheet)
196
- row,col = normalize(row,col)
197
- s_attribute = @s_attribute[sheet][[row,col]]
198
- s_attribute ||= 0
199
- s_attribute = s_attribute.to_i
200
- @style_definitions[s_attribute]
382
+ key = normalize(row,col)
383
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
384
+ styles.definitions[definition_index] if definition_index
201
385
  end
202
386
 
203
387
  # returns the type of a cell:
@@ -209,14 +393,8 @@ class Roo::Excelx < Roo::Base
209
393
  # * :time
210
394
  # * :datetime
211
395
  def celltype(row,col,sheet=nil)
212
- sheet ||= @default_sheet
213
- read_cells(sheet)
214
- row,col = normalize(row,col)
215
- if @formula[sheet][[row,col]]
216
- return :formula
217
- else
218
- @cell_type[sheet][[row,col]]
219
- end
396
+ key = normalize(row, col)
397
+ safe_send(sheet_for(sheet).cells[key], :type)
220
398
  end
221
399
 
222
400
  # returns the internal type of an excel cell
@@ -224,451 +402,241 @@ class Roo::Excelx < Roo::Base
224
402
  # * :string
225
403
  # Note: this is only available within the Excelx class
226
404
  def excelx_type(row,col,sheet=nil)
227
- sheet ||= @default_sheet
228
- read_cells(sheet)
229
- row,col = normalize(row,col)
230
- return @excelx_type[sheet][[row,col]]
405
+ key = normalize(row,col)
406
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
231
407
  end
232
408
 
233
409
  # returns the internal value of an excelx cell
234
410
  # Note: this is only available within the Excelx class
235
411
  def excelx_value(row,col,sheet=nil)
236
- sheet ||= @default_sheet
237
- read_cells(sheet)
238
- row,col = normalize(row,col)
239
- return @excelx_value[sheet][[row,col]]
412
+ key = normalize(row,col)
413
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
240
414
  end
241
415
 
242
416
  # returns the internal format of an excel cell
243
417
  def excelx_format(row,col,sheet=nil)
244
- sheet ||= @default_sheet
245
- read_cells(sheet)
246
- row,col = normalize(row,col)
247
- s = @s_attribute[sheet][[row,col]]
248
- attribute2format(s).to_s
418
+ key = normalize(row,col)
419
+ sheet_for(sheet).excelx_format(key)
249
420
  end
250
421
 
251
- # returns an array of sheet names in the spreadsheet
252
- def sheets
253
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
254
- sheet['name']
255
- end
422
+ def empty?(row,col,sheet=nil)
423
+ sheet = sheet_for(sheet)
424
+ key = normalize(row,col)
425
+ cell = sheet.cells[key]
426
+ !cell || !cell.value || (cell.type == :string && cell.value.empty?) \
427
+ || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
256
428
  end
257
429
 
258
430
  # shows the internal representation of all cells
259
431
  # for debugging purposes
260
432
  def to_s(sheet=nil)
261
- sheet ||= @default_sheet
262
- read_cells(sheet)
263
- @cell[sheet].inspect
433
+ sheet_for(sheet).cells.inspect
264
434
  end
265
435
 
266
436
  # returns the row,col values of the labelled cell
267
437
  # (nil,nil) if label is not defined
268
- def label(labelname)
269
- read_labels
270
- if @label.empty? || !@label.has_key?(labelname)
271
- return nil,nil,nil
438
+ def label(name)
439
+ labels = workbook.defined_names
440
+ if labels.empty? || !labels.key?(name)
441
+ [nil,nil,nil]
272
442
  else
273
- return @label[labelname][1].to_i,
274
- Roo::Base.letter_to_number(@label[labelname][2]),
275
- @label[labelname][0]
443
+ [labels[name].row,
444
+ labels[name].col,
445
+ labels[name].sheet]
276
446
  end
277
447
  end
278
448
 
279
449
  # Returns an array which all labels. Each element is an array with
280
450
  # [labelname, [row,col,sheetname]]
281
451
  def labels
282
- # sheet ||= @default_sheet
283
- # read_cells(sheet)
284
- read_labels
285
- @label.map do |label|
286
- [ label[0], # name
287
- [ label[1][1].to_i, # row
288
- Roo::Base.letter_to_number(label[1][2]), # column
289
- label[1][0], # sheet
452
+ @labels ||= workbook.defined_names.map do |name, label|
453
+ [ name,
454
+ [ label.row,
455
+ label.col,
456
+ label.sheet,
290
457
  ] ]
291
458
  end
292
459
  end
293
460
 
294
461
  def hyperlink?(row,col,sheet=nil)
295
- hyperlink(row, col, sheet) != nil
462
+ !!hyperlink(row, col, sheet)
296
463
  end
297
464
 
298
465
  # returns the hyperlink at (row/col)
299
466
  # nil if there is no hyperlink
300
467
  def hyperlink(row,col,sheet=nil)
301
- sheet ||= @default_sheet
302
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
303
- row,col = normalize(row,col)
304
- return nil unless @hyperlink[sheet]
305
- @hyperlink[sheet][[row,col]]
468
+ key = normalize(row,col)
469
+ sheet_for(sheet).hyperlinks[key]
306
470
  end
307
471
 
308
472
  # returns the comment at (row/col)
309
473
  # nil if there is no comment
310
474
  def comment(row,col,sheet=nil)
311
- sheet ||= @default_sheet
312
- #read_cells(sheet)
313
- read_comments(sheet) unless @comments_read[sheet]
314
- row,col = normalize(row,col)
315
- return nil unless @comment[sheet]
316
- @comment[sheet][[row,col]]
475
+ key = normalize(row,col)
476
+ sheet_for(sheet).comments[key]
317
477
  end
318
478
 
319
479
  # true, if there is a comment
320
480
  def comment?(row,col,sheet=nil)
321
- sheet ||= @default_sheet
322
- # read_cells(sheet)
323
- read_comments(sheet) unless @comments_read[sheet]
324
- row,col = normalize(row,col)
325
- comment(row,col) != nil
481
+ !!comment(row,col,sheet)
326
482
  end
327
483
 
328
- # returns each comment in the selected sheet as an array of elements
329
- # [row, col, comment]
330
484
  def comments(sheet=nil)
331
- sheet ||= @default_sheet
332
- read_comments(sheet) unless @comments_read[sheet]
333
- if @comment[sheet]
334
- @comment[sheet].each.collect do |elem|
335
- [elem[0][0],elem[0][1],elem[1]]
336
- end
337
- else
338
- []
485
+ sheet_for(sheet).comments.map do |(x, y), comment|
486
+ [x, y, comment]
339
487
  end
340
488
  end
341
489
 
342
- private
490
+ # Yield an array of Excelx::Cell
491
+ # Takes options for sheet, pad_cells, and max_rows
492
+ def each_row_streaming(options={})
493
+ sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row }
494
+ end
343
495
 
344
- def load_xmls(paths)
345
- paths.compact.map do |item|
346
- load_xml(item)
347
- end
348
- end
349
-
350
- # helper function to set the internal representation of cells
351
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
352
- excelx_type=nil,
353
- excelx_value=nil,
354
- s_attribute=nil)
355
- key = [y,x+i]
356
- @cell_type[sheet] ||= {}
357
- @cell_type[sheet][key] = value_type
358
- @formula[sheet] ||= {}
359
- @formula[sheet][key] = formula if formula
360
- @cell[sheet] ||= {}
361
- @cell[sheet][key] =
362
- case @cell_type[sheet][key]
363
- when :float
364
- v.to_f
365
- when :string
366
- v
367
- when :date
368
- (base_date+v.to_i).strftime("%Y-%m-%d")
369
- when :datetime
370
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
371
- when :percentage
372
- v.to_f
373
- when :time
374
- v.to_f*(24*60*60)
375
- else
376
- v
377
- end
496
+ private
378
497
 
379
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
380
- @excelx_type[sheet] ||= {}
381
- @excelx_type[sheet][key] = excelx_type
382
- @excelx_value[sheet] ||= {}
383
- @excelx_value[sheet][key] = excelx_value
384
- @s_attribute[sheet] ||= {}
385
- @s_attribute[sheet][key] = s_attribute
386
- end
498
+ def clean_sheet(sheet)
499
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
500
+ next unless value.value.is_a?(::String)
387
501
 
388
- # read all cells in the selected sheet
389
- def read_cells(sheet=nil)
390
- sheet ||= @default_sheet
391
- validate_sheet!(sheet)
392
- return if @cells_read[sheet]
393
-
394
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
395
- s_attribute = c['s'].to_i # should be here
396
- # c: <c r="A5" s="2">
397
- # <v>22606</v>
398
- # </c>, format: , tmp_type: float
399
- value_type =
400
- case c['t']
401
- when 's'
402
- :shared
403
- when 'b'
404
- :boolean
405
- # 2011-02-25 BEGIN
406
- when 'str'
407
- :string
408
- # 2011-02-25 END
409
- # 2011-09-15 BEGIN
410
- when 'inlineStr'
411
- :inlinestr
412
- # 2011-09-15 END
413
- else
414
- format = attribute2format(s_attribute)
415
- Format.to_type(format)
416
- end
417
- formula = nil
418
- c.children.each do |cell|
419
- case cell.name
420
- when 'is'
421
- cell.children.each do |is|
422
- if is.name == 't'
423
- inlinestr_content = is.content
424
- value_type = :string
425
- v = inlinestr_content
426
- excelx_type = :string
427
- y, x = Roo::Base.split_coordinate(c['r'])
428
- excelx_value = inlinestr_content #cell.content
429
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
430
- end
431
- end
432
- when 'f'
433
- formula = cell.content
434
- when 'v'
435
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
436
- value_type =
437
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
438
- :datetime
439
- else
440
- :date
441
- end
442
- end
443
- excelx_type = [:numeric_or_formula,format.to_s]
444
- excelx_value = cell.content
445
- v =
446
- case value_type
447
- when :shared
448
- value_type = :string
449
- excelx_type = :string
450
- @shared_table[cell.content.to_i]
451
- when :boolean
452
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
453
- when :date
454
- cell.content
455
- when :time
456
- cell.content
457
- when :datetime
458
- cell.content
459
- when :formula
460
- cell.content.to_f #TODO: !!!!
461
- when :string
462
- excelx_type = :string
463
- cell.content
464
- else
465
- value_type = :float
466
- cell.content
467
- end
468
- y, x = Roo::Base.split_coordinate(c['r'])
469
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
470
- end
471
- end
502
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
472
503
  end
473
- @cells_read[sheet] = true
474
- # begin comments
475
- =begin
476
- Datei xl/comments1.xml
477
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
478
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
479
- <authors>
480
- <author />
481
- </authors>
482
- <commentList>
483
- <comment ref="B4" authorId="0">
484
- <text>
485
- <r>
486
- <rPr>
487
- <sz val="10" />
488
- <rFont val="Arial" />
489
- <family val="2" />
490
- </rPr>
491
- <t>Kommentar fuer B4</t>
492
- </r>
493
- </text>
494
- </comment>
495
- <comment ref="B5" authorId="0">
496
- <text>
497
- <r>
498
- <rPr>
499
- <sz val="10" />
500
- <rFont val="Arial" />
501
- <family val="2" />
502
- </rPr>
503
- <t>Kommentar fuer B5</t>
504
- </r>
505
- </text>
506
- </comment>
507
- </commentList>
508
- </comments>
509
- =end
510
- =begin
511
- if @comments_doc[self.sheets.index(sheet)]
512
- read_comments(sheet)
513
- end
514
- =end
515
- #end comments
516
- end
517
-
518
- # Reads all comments from a sheet
519
- def read_comments(sheet=nil)
520
- sheet ||= @default_sheet
521
- validate_sheet!(sheet)
522
- n = self.sheets.index(sheet)
523
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
524
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
525
- ref = comment.attributes['ref'].to_s
526
- row,col = Roo::Base.split_coordinate(ref)
527
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
528
- @comment[sheet] ||= {}
529
- @comment[sheet][[row,col]] = text.text
530
- end
531
- end
532
- @comments_read[sheet] = true
533
- end
534
504
 
535
- # Reads all hyperlinks from a sheet
536
- def read_hyperlinks(sheet=nil)
537
- sheet ||= @default_sheet
538
- validate_sheet!(sheet)
539
- n = self.sheets.index(sheet)
540
- if rels_doc = @rels_doc[n]
541
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
542
- [r.attribute('Id').text, r]
543
- end]
544
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
545
- if rel_element = rels[h.attribute('id').text]
546
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
547
- @hyperlink[sheet] ||= {}
548
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
549
- end
550
- end
505
+ @cleaned[sheet] = true
506
+ end
507
+
508
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
509
+ # documents require a workbook.xml file, so a if the file is missing
510
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
511
+ # raised.
512
+ #
513
+ # wb - a Zip::Entry for the workbook.xml file.
514
+ # path - A String for Zip::Entry's destination path.
515
+ #
516
+ # Examples
517
+ #
518
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
519
+ # # => ["rId1", "rId2", "rId3"]
520
+ #
521
+ # Returns an Array of Strings.
522
+ def extract_worksheet_ids(entries, path)
523
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
524
+ fail ArgumentError 'missing required workbook file' if wb.nil?
525
+
526
+ wb.extract(path)
527
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
528
+ workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
529
+ end
530
+
531
+ # Internal
532
+ #
533
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
534
+ # path - A String for the Zip::Entry's destination path.
535
+ #
536
+ # Examples
537
+ #
538
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
539
+ # # => {
540
+ # "rId1"=>"worksheets/sheet1.xml",
541
+ # "rId2"=>"worksheets/sheet2.xml",
542
+ # "rId3"=>"worksheets/sheet3.xml"
543
+ # }
544
+ #
545
+ # Returns a Hash.
546
+ def extract_worksheet_rels(entries, path)
547
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
548
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
549
+
550
+ wb_rels.extract(path)
551
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
552
+ worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
553
+
554
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
555
+ relationship.attributes['Type'].value == worksheet_type
551
556
  end
552
- @hyperlinks_read[sheet] = true
553
- end
554
557
 
555
- def read_labels
556
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
557
- # "Sheet1!$C$5"
558
- sheet, coordinates = defined_name.text.split('!$', 2)
559
- col,row = coordinates.split('$')
560
- [defined_name['name'], [sheet,row,col]]
561
- end]
562
- end
563
-
564
- # Extracts all needed files from the zip file
565
- def process_zipfile(tmpdir, zipfilename, zip, path='')
566
- @sheet_files = []
567
- Roo::ZipFile.open(zipfilename) {|zf|
568
- zf.entries.each {|entry|
569
- entry_name = entry.to_s.downcase
570
-
571
- path =
572
- if entry_name.end_with?('workbook.xml')
573
- "#{tmpdir}/roo_workbook.xml"
574
- elsif entry_name.end_with?('sharedstrings.xml')
575
- "#{tmpdir}/roo_sharedStrings.xml"
576
- elsif entry_name.end_with?('styles.xml')
577
- "#{tmpdir}/roo_styles.xml"
578
- elsif entry_name =~ /sheet([0-9]+).xml$/
579
- nr = $1
580
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
581
- elsif entry_name =~ /comments([0-9]+).xml$/
582
- nr = $1
583
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
584
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
585
- nr = $1
586
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
587
- end
588
- if path
589
- extract_file(zip, entry, path)
590
- end
591
- }
592
- }
593
- end
594
-
595
- def extract_file(source_zip, entry, destination_path)
596
- open(destination_path,'wb') {|f|
597
- f << source_zip.read(entry)
598
- }
599
- end
600
-
601
- # extract files from the zip file
602
- def extract_content(tmpdir, zipfilename)
603
- Roo::ZipFile.open(@filename) do |zip|
604
- process_zipfile(tmpdir, zipfilename,zip)
558
+ relationships.inject({}) do |hash, relationship|
559
+ attributes = relationship.attributes
560
+ id = attributes['Id'];
561
+ hash[id.value] = attributes['Target'].value
562
+ hash
605
563
  end
606
564
  end
607
565
 
608
- # read the shared strings xml document
609
- def read_shared_strings(doc)
610
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
611
- shared_table_entry = ''
612
- si.children.each do |elem|
613
- if elem.name == 'r' and elem.children
614
- elem.children.each do |r_elem|
615
- if r_elem.name == 't'
616
- shared_table_entry << r_elem.content
617
- end
618
- end
619
- end
620
- if elem.name == 't'
621
- shared_table_entry = elem.content
622
- end
623
- end
624
- @shared_table << shared_table_entry
566
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
567
+ sheet_ids.each_with_index do |id, i|
568
+ name = sheets[id]
569
+ entry = entries.find { |entry| entry.name =~ /#{name}$/ }
570
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
571
+ @sheet_files << path
572
+ entry.extract(path)
625
573
  end
626
574
  end
627
575
 
628
- # read the styles elements of an excelx document
629
- def read_styles(doc)
630
- @cellXfs = []
631
-
632
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
633
- [numFmt['numFmtId'], numFmt['formatCode']]
634
- end]
635
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
636
- Font.new.tap do |font|
637
- font.bold = !font_el.xpath('./xmlns:b').empty?
638
- font.italic = !font_el.xpath('./xmlns:i').empty?
639
- font.underline = !font_el.xpath('./xmlns:u').empty?
576
+ # Extracts all needed files from the zip file
577
+ def process_zipfile(tmpdir, zipfilename)
578
+ @sheet_files = []
579
+ entries = Zip::File.open(zipfilename).to_a.sort_by(&:name)
580
+
581
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
582
+ # are not in order. With Numbers 3.1, the first sheet is always
583
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
584
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
585
+ # first worksheet).
586
+ #
587
+ # workbook.xml lists the correct order of worksheets and
588
+ # workbook.xml.rels lists the filenames for those worksheets.
589
+ #
590
+ # workbook.xml:
591
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
592
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
593
+ # workbook.xml.rel:
594
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
595
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
596
+ sheet_ids = extract_worksheet_ids(entries, "#{tmpdir}/roo_workbook.xml")
597
+ sheets = extract_worksheet_rels(entries, "#{tmpdir}/roo_workbook.xml.rels")
598
+ extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
599
+
600
+ entries.each do |entry|
601
+ path =
602
+ case entry.name.downcase
603
+ when /sharedstrings.xml$/
604
+ "#{tmpdir}/roo_sharedStrings.xml"
605
+ when /styles.xml$/
606
+ "#{tmpdir}/roo_styles.xml"
607
+ when /comments([0-9]+).xml$/
608
+ # FIXME: Most of the time, The order of the comment files are the same
609
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
610
+ # In some situations, this isn't true. The true location of a
611
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
612
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
613
+ nr = Regexp.last_match[1].to_i
614
+ @comments_files[nr - 1] = "#{tmpdir}/roo_comments#{nr}"
615
+ when /sheet([0-9]+).xml.rels$/
616
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
617
+ # it also stores the location for sharedStrings, comments,
618
+ # drawings, etc.
619
+ nr = Regexp.last_match[1].to_i
620
+ @rels_files[nr - 1] = "#{tmpdir}/roo_rels#{nr}"
640
621
  end
641
- end
642
622
 
643
- doc.xpath("//xmlns:cellXfs").each do |xfs|
644
- xfs.children.each do |xf|
645
- @cellXfs << xf['numFmtId']
646
- @style_definitions << fonts[xf['fontId'].to_i]
647
- end
623
+ entry.extract(path) if path
648
624
  end
649
625
  end
650
626
 
651
- # convert internal excelx attribute to a format
652
- def attribute2format(s)
653
- id = @cellXfs[s.to_i]
654
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
627
+ def styles
628
+ @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
655
629
  end
656
630
 
657
- def base_date
658
- @base_date ||= read_base_date
631
+ def shared_strings
632
+ @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
659
633
  end
660
634
 
661
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
662
- # it's set in the Workbook's workbookPr
663
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
664
- def read_base_date
665
- base_date = Date.new(1899,12,30)
666
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
667
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
668
- base_date = Date.new(1904,01,01)
669
- end
670
- end
671
- base_date
635
+ def workbook
636
+ @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
672
637
  end
673
638
 
674
- end # class
639
+ def safe_send(object, method, *args)
640
+ object.send(method, *args) if object && object.respond_to?(method)
641
+ end
642
+ end