ruh-roo 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +677 -0
  3. data/Gemfile +24 -0
  4. data/LICENSE +24 -0
  5. data/README.md +315 -0
  6. data/lib/roo/base.rb +607 -0
  7. data/lib/roo/constants.rb +7 -0
  8. data/lib/roo/csv.rb +141 -0
  9. data/lib/roo/errors.rb +11 -0
  10. data/lib/roo/excelx/cell/base.rb +108 -0
  11. data/lib/roo/excelx/cell/boolean.rb +30 -0
  12. data/lib/roo/excelx/cell/date.rb +28 -0
  13. data/lib/roo/excelx/cell/datetime.rb +107 -0
  14. data/lib/roo/excelx/cell/empty.rb +20 -0
  15. data/lib/roo/excelx/cell/number.rb +89 -0
  16. data/lib/roo/excelx/cell/string.rb +19 -0
  17. data/lib/roo/excelx/cell/time.rb +44 -0
  18. data/lib/roo/excelx/cell.rb +110 -0
  19. data/lib/roo/excelx/comments.rb +55 -0
  20. data/lib/roo/excelx/coordinate.rb +19 -0
  21. data/lib/roo/excelx/extractor.rb +39 -0
  22. data/lib/roo/excelx/format.rb +71 -0
  23. data/lib/roo/excelx/images.rb +26 -0
  24. data/lib/roo/excelx/relationships.rb +33 -0
  25. data/lib/roo/excelx/shared.rb +39 -0
  26. data/lib/roo/excelx/shared_strings.rb +151 -0
  27. data/lib/roo/excelx/sheet.rb +151 -0
  28. data/lib/roo/excelx/sheet_doc.rb +248 -0
  29. data/lib/roo/excelx/styles.rb +64 -0
  30. data/lib/roo/excelx/workbook.rb +63 -0
  31. data/lib/roo/excelx.rb +480 -0
  32. data/lib/roo/font.rb +17 -0
  33. data/lib/roo/formatters/base.rb +15 -0
  34. data/lib/roo/formatters/csv.rb +84 -0
  35. data/lib/roo/formatters/matrix.rb +23 -0
  36. data/lib/roo/formatters/xml.rb +31 -0
  37. data/lib/roo/formatters/yaml.rb +40 -0
  38. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  39. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  40. data/lib/roo/libre_office.rb +4 -0
  41. data/lib/roo/link.rb +34 -0
  42. data/lib/roo/open_office.rb +628 -0
  43. data/lib/roo/spreadsheet.rb +39 -0
  44. data/lib/roo/tempdir.rb +21 -0
  45. data/lib/roo/utils.rb +128 -0
  46. data/lib/roo/version.rb +3 -0
  47. data/lib/roo.rb +36 -0
  48. data/roo.gemspec +28 -0
  49. metadata +189 -0
data/lib/roo/excelx.rb ADDED
@@ -0,0 +1,480 @@
1
+ require 'nokogiri'
2
+ require 'zip/filesystem'
3
+ require 'roo/link'
4
+ require 'roo/tempdir'
5
+ require 'roo/utils'
6
+ require 'forwardable'
7
+ require 'set'
8
+
9
+ module Roo
10
+ class Excelx < Roo::Base
11
+ extend Roo::Tempdir
12
+ extend Forwardable
13
+
14
+ ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
15
+
16
+ require 'roo/excelx/shared'
17
+ require 'roo/excelx/workbook'
18
+ require 'roo/excelx/shared_strings'
19
+ require 'roo/excelx/styles'
20
+ require 'roo/excelx/cell'
21
+ require 'roo/excelx/sheet'
22
+ require 'roo/excelx/relationships'
23
+ require 'roo/excelx/comments'
24
+ require 'roo/excelx/sheet_doc'
25
+ require 'roo/excelx/coordinate'
26
+ require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
28
+
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
30
+ ExceedsMaxError = Class.new(StandardError)
31
+
32
+ # initialization and opening of a spreadsheet file
33
+ # values for packed: :zip
34
+ # optional cell_max (int) parameter for early aborting attempts to parse
35
+ # enormous documents.
36
+ def initialize(filename_or_stream, options = {})
37
+ packed = options[:packed]
38
+ file_warning = options.fetch(:file_warning, :error)
39
+ cell_max = options.delete(:cell_max)
40
+ sheet_options = {}
41
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
45
+
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
47
+ unless is_stream?(filename_or_stream)
48
+ file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
49
+ basename = find_basename(filename_or_stream)
50
+ end
51
+
52
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
53
+ # when the object is garbage collected. Initially, the finalizer was
54
+ # created in the Roo::Tempdir module, but that led to a segfault
55
+ # when testing in Ruby 2.4.0.
56
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
57
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
58
+
59
+ @shared = Shared.new(@tmpdir, shared_options)
60
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
61
+ process_zipfile(@filename || filename_or_stream)
62
+
63
+ @sheet_names = []
64
+ @sheets = []
65
+ @sheets_by_name = {}
66
+
67
+ workbook.sheets.each_with_index do |sheet, index|
68
+ next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
69
+
70
+ sheet_name = sheet['name']
71
+ @sheet_names << sheet_name
72
+ @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
73
+ end
74
+
75
+ if cell_max
76
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
77
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
78
+ end
79
+
80
+ super
81
+ rescue
82
+ self.class.finalize_tempdirs(object_id)
83
+ raise
84
+ end
85
+
86
+ def method_missing(method, *args)
87
+ if (label = workbook.defined_names[method.to_s])
88
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
89
+ else
90
+ # call super for methods like #a1
91
+ super
92
+ end
93
+ end
94
+
95
+ def sheets
96
+ @sheet_names
97
+ end
98
+
99
+ def sheet_for(sheet)
100
+ sheet ||= default_sheet
101
+ validate_sheet!(sheet)
102
+ @sheets_by_name[sheet] || @sheets[sheet]
103
+ end
104
+
105
+ def images(sheet = nil)
106
+ images_names = sheet_for(sheet).images.map(&:last)
107
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
108
+ end
109
+
110
+ # Returns the content of a spreadsheet-cell.
111
+ # (1,1) is the upper left corner.
112
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
113
+ # cell at the first line and first row.
114
+ def cell(row, col, sheet = nil)
115
+ key = normalize(row, col)
116
+ safe_send(sheet_for(sheet).cells[key], :value)
117
+ end
118
+
119
+ def row(rownumber, sheet = nil)
120
+ sheet_for(sheet).row(rownumber)
121
+ end
122
+
123
+ # returns all values in this column as an array
124
+ # column numbers are 1,2,3,... like in the spreadsheet
125
+ def column(column_number, sheet = nil)
126
+ if column_number.is_a?(::String)
127
+ column_number = ::Roo::Utils.letter_to_number(column_number)
128
+ end
129
+ sheet_for(sheet).column(column_number)
130
+ end
131
+
132
+ # returns the number of the first non-empty row
133
+ def first_row(sheet = nil)
134
+ sheet_for(sheet).first_row
135
+ end
136
+
137
+ # returns the number of the last non-empty row
138
+ def last_row(sheet = nil)
139
+ sheet_for(sheet).last_row
140
+ end
141
+
142
+ # returns the number of the first non-empty column
143
+ def first_column(sheet = nil)
144
+ sheet_for(sheet).first_column
145
+ end
146
+
147
+ # returns the number of the last non-empty column
148
+ def last_column(sheet = nil)
149
+ sheet_for(sheet).last_column
150
+ end
151
+
152
+ # set a cell to a certain value
153
+ # (this will not be saved back to the spreadsheet file!)
154
+ def set(row, col, value, sheet = nil) #:nodoc:
155
+ key = normalize(row, col)
156
+ cell_type = cell_type_by_value(value)
157
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
158
+ end
159
+
160
+ # Returns the formula at (row,col).
161
+ # Returns nil if there is no formula.
162
+ # The method #formula? checks if there is a formula.
163
+ def formula(row, col, sheet = nil)
164
+ key = normalize(row, col)
165
+ safe_send(sheet_for(sheet).cells[key], :formula)
166
+ end
167
+
168
+ # Predicate methods really should return a boolean
169
+ # value. Hopefully no one was relying on the fact that this
170
+ # previously returned either nil/formula
171
+ def formula?(*args)
172
+ !!formula(*args)
173
+ end
174
+
175
+ # returns each formula in the selected sheet as an array of tuples in following format
176
+ # [[row, col, formula], [row, col, formula],...]
177
+ def formulas(sheet = nil)
178
+ sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
179
+ [x, y, cell.formula]
180
+ end
181
+ end
182
+
183
+ # Given a cell, return the cell's style
184
+ def font(row, col, sheet = nil)
185
+ key = normalize(row, col)
186
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
187
+ styles.definitions[definition_index] if definition_index
188
+ end
189
+
190
+ # returns the type of a cell:
191
+ # * :float
192
+ # * :string,
193
+ # * :date
194
+ # * :percentage
195
+ # * :formula
196
+ # * :time
197
+ # * :datetime
198
+ def celltype(row, col, sheet = nil)
199
+ key = normalize(row, col)
200
+ safe_send(sheet_for(sheet).cells[key], :type)
201
+ end
202
+
203
+ # returns the internal type of an excel cell
204
+ # * :numeric_or_formula
205
+ # * :string
206
+ # Note: this is only available within the Excelx class
207
+ def excelx_type(row, col, sheet = nil)
208
+ key = normalize(row, col)
209
+ safe_send(sheet_for(sheet).cells[key], :cell_type)
210
+ end
211
+
212
+ # returns the internal value of an excelx cell
213
+ # Note: this is only available within the Excelx class
214
+ def excelx_value(row, col, sheet = nil)
215
+ key = normalize(row, col)
216
+ safe_send(sheet_for(sheet).cells[key], :cell_value)
217
+ end
218
+
219
+ # returns the internal value of an excelx cell
220
+ # Note: this is only available within the Excelx class
221
+ def formatted_value(row, col, sheet = nil)
222
+ key = normalize(row, col)
223
+ safe_send(sheet_for(sheet).cells[key], :formatted_value)
224
+ end
225
+
226
+ # returns the internal format of an excel cell
227
+ def excelx_format(row, col, sheet = nil)
228
+ key = normalize(row, col)
229
+ sheet_for(sheet).excelx_format(key)
230
+ end
231
+
232
+ def empty?(row, col, sheet = nil)
233
+ sheet = sheet_for(sheet)
234
+ key = normalize(row, col)
235
+ cell = sheet.cells[key]
236
+ !cell || cell.empty? ||
237
+ (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
238
+ end
239
+
240
+ # shows the internal representation of all cells
241
+ # for debugging purposes
242
+ def to_s(sheet = nil)
243
+ sheet_for(sheet).cells.inspect
244
+ end
245
+
246
+ # returns the row,col values of the labelled cell
247
+ # (nil,nil) if label is not defined
248
+ def label(name)
249
+ labels = workbook.defined_names
250
+ return [nil, nil, nil] if labels.empty? || !labels.key?(name)
251
+
252
+ [labels[name].row, labels[name].col, labels[name].sheet]
253
+ end
254
+
255
+ # Returns an array which all labels. Each element is an array with
256
+ # [labelname, [row,col,sheetname]]
257
+ def labels
258
+ @labels ||= workbook.defined_names.map do |name, label|
259
+ [
260
+ name,
261
+ [label.row, label.col, label.sheet]
262
+ ]
263
+ end
264
+ end
265
+
266
+ def hyperlink?(row, col, sheet = nil)
267
+ !!hyperlink(row, col, sheet)
268
+ end
269
+
270
+ # returns the hyperlink at (row/col)
271
+ # nil if there is no hyperlink
272
+ def hyperlink(row, col, sheet = nil)
273
+ key = normalize(row, col)
274
+ sheet_for(sheet).hyperlinks[key]
275
+ end
276
+
277
+ # returns the comment at (row/col)
278
+ # nil if there is no comment
279
+ def comment(row, col, sheet = nil)
280
+ key = normalize(row, col)
281
+ sheet_for(sheet).comments[key]
282
+ end
283
+
284
+ # true, if there is a comment
285
+ def comment?(row, col, sheet = nil)
286
+ !!comment(row, col, sheet)
287
+ end
288
+
289
+ def comments(sheet = nil)
290
+ sheet_for(sheet).comments.map do |(x, y), comment|
291
+ [x, y, comment]
292
+ end
293
+ end
294
+
295
+ # Yield an array of Excelx::Cell
296
+ # Takes options for sheet, pad_cells, and max_rows
297
+ def each_row_streaming(options = {})
298
+ sheet = sheet_for(options.delete(:sheet))
299
+ if block_given?
300
+ sheet.each_row(options) { |row| yield row }
301
+ else
302
+ sheet.to_enum(:each_row, options)
303
+ end
304
+ end
305
+
306
+ private
307
+
308
+ def clean_sheet(sheet)
309
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
310
+ next unless value.value.is_a?(::String)
311
+
312
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
313
+ end
314
+
315
+ @cleaned[sheet] = true
316
+ end
317
+
318
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
319
+ # documents require a workbook.xml file, so a if the file is missing
320
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
321
+ # raised.
322
+ #
323
+ # wb - a Zip::Entry for the workbook.xml file.
324
+ # path - A String for Zip::Entry's destination path.
325
+ #
326
+ # Examples
327
+ #
328
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
329
+ # # => ["rId1", "rId2", "rId3"]
330
+ #
331
+ # Returns an Array of Strings.
332
+ def extract_worksheet_ids(entries, path)
333
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
334
+ fail ArgumentError 'missing required workbook file' if wb.nil?
335
+
336
+ wb.extract(path)
337
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
338
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
339
+ end
340
+
341
+ # Internal
342
+ #
343
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
344
+ # path - A String for the Zip::Entry's destination path.
345
+ #
346
+ # Examples
347
+ #
348
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
349
+ # # => {
350
+ # "rId1"=>"worksheets/sheet1.xml",
351
+ # "rId2"=>"worksheets/sheet2.xml",
352
+ # "rId3"=>"worksheets/sheet3.xml"
353
+ # }
354
+ #
355
+ # Returns a Hash.
356
+ def extract_worksheet_rels(entries, path)
357
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
358
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
359
+
360
+ wb_rels.extract(path)
361
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
362
+
363
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
364
+ worksheet_types.include? relationship['Type']
365
+ end
366
+
367
+ relationships.each_with_object({}) do |relationship, hash|
368
+ hash[relationship['Id']] = relationship['Target']
369
+ end
370
+ end
371
+
372
+ # Extracts the sheets in order, but it will ignore sheets that are not
373
+ # worksheets.
374
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
375
+ (sheet_ids & sheets.keys).each_with_index do |id, i|
376
+ name = sheets[id]
377
+ entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
378
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
379
+ sheet_files << path
380
+ @sheet_files << path
381
+ entry.extract(path)
382
+ end
383
+ end
384
+
385
+ def extract_images(entries, tmpdir)
386
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
387
+ img_entries.each do |entry|
388
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
389
+ image_files << path
390
+ entry.extract(path)
391
+ end
392
+ end
393
+
394
+ # Extracts all needed files from the zip file
395
+ def process_zipfile(zipfilename_or_stream)
396
+ @sheet_files = []
397
+
398
+ unless is_stream?(zipfilename_or_stream)
399
+ zip_file = Zip::File.open(zipfilename_or_stream)
400
+ else
401
+ zip_file = Zip::CentralDirectory.new
402
+ zip_file.read_from_stream zipfilename_or_stream
403
+ end
404
+
405
+ process_zipfile_entries zip_file.to_a.sort_by(&:name)
406
+ end
407
+
408
+ def process_zipfile_entries(entries)
409
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
410
+ # are not in order. With Numbers 3.1, the first sheet is always
411
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
412
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
413
+ # first worksheet).
414
+ #
415
+ # workbook.xml lists the correct order of worksheets and
416
+ # workbook.xml.rels lists the filenames for those worksheets.
417
+ #
418
+ # workbook.xml:
419
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
420
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
421
+ # workbook.xml.rel:
422
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
423
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
424
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
425
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
426
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
427
+ extract_images(entries, @tmpdir)
428
+
429
+ entries.each do |entry|
430
+ path =
431
+ case entry.name.downcase
432
+ when /richdata/
433
+ # FIXME: Ignore richData as parsing is not implemented yet and can cause
434
+ # Zip::DestinationFileExistsError when including a second "styles.xml" entry
435
+ # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
436
+ nil
437
+ when /sharedstrings.xml$/
438
+ "#{@tmpdir}/roo_sharedStrings.xml"
439
+ when /styles.xml$/
440
+ "#{@tmpdir}/roo_styles.xml"
441
+ when /comments([0-9]+).xml$/
442
+ # FIXME: Most of the time, The order of the comment files are the same
443
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
444
+ # In some situations, this isn't true. The true location of a
445
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
446
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
447
+ nr = Regexp.last_match[1].to_i
448
+ comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
449
+ when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
450
+ # NOTE: Chart sheet relationship files were interfering with
451
+ # worksheets.
452
+ nil
453
+ when /sheet([0-9]+).xml.rels$/
454
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
455
+ # it also stores the location for sharedStrings, comments,
456
+ # drawings, etc.
457
+ nr = Regexp.last_match[1].to_i
458
+ rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
459
+ when /drawing([0-9]+).xml.rels$/
460
+ # Extracting drawing relationships to make images lists for each sheet
461
+ nr = Regexp.last_match[1].to_i
462
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
463
+ end
464
+
465
+ entry.extract(path) if path
466
+ end
467
+ end
468
+
469
+ def safe_send(object, method, *args)
470
+ object.send(method, *args) if object&.respond_to?(method)
471
+ end
472
+
473
+ def worksheet_types
474
+ [
475
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
476
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
477
+ ]
478
+ end
479
+ end
480
+ end
data/lib/roo/font.rb ADDED
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Roo
2
+ module Formatters
3
+ module Base
4
+ # converts an integer value to a time string like '02:05:06'
5
+ def integer_to_timestring(content)
6
+ h = (content / 3600.0).floor
7
+ content -= h * 3600
8
+ m = (content / 60.0).floor
9
+ content -= m * 60
10
+ s = content
11
+ Kernel.format("%02d:%02d:%02d", h, m, s)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ module Roo
2
+ module Formatters
3
+ module CSV
4
+ def to_csv(filename = nil, separator = ",", sheet = default_sheet)
5
+ if filename
6
+ File.open(filename, "w") do |file|
7
+ write_csv_content(file, sheet, separator)
8
+ end
9
+ true
10
+ else
11
+ sio = ::StringIO.new
12
+ write_csv_content(sio, sheet, separator)
13
+ sio.rewind
14
+ sio.read
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ # Write all cells to the csv file. File can be a filename or nil. If the
21
+ # file argument is nil the output goes to STDOUT
22
+ def write_csv_content(file = nil, sheet = nil, separator = ",")
23
+ file ||= STDOUT
24
+ return unless first_row(sheet) # The sheet is empty
25
+
26
+ 1.upto(last_row(sheet)) do |row|
27
+ 1.upto(last_column(sheet)) do |col|
28
+ # TODO: use CSV.generate_line
29
+ file.print(separator) if col > 1
30
+ file.print cell_to_csv(row, col, sheet)
31
+ end
32
+ file.print("\n")
33
+ end
34
+ end
35
+
36
+ # The content of a cell in the csv output
37
+ def cell_to_csv(row, col, sheet)
38
+ return "" if empty?(row, col, sheet)
39
+
40
+ onecell = cell(row, col, sheet)
41
+
42
+ case celltype(row, col, sheet)
43
+ when :string
44
+ %("#{onecell.gsub('"', '""')}") unless onecell.empty?
45
+ when :boolean
46
+ # TODO: this only works for excelx
47
+ onecell = self.sheet_for(sheet).cells[[row, col]].formatted_value
48
+ %("#{onecell.gsub('"', '""').downcase}")
49
+ when :float, :percentage
50
+ if onecell == onecell.to_i
51
+ onecell.to_i.to_s
52
+ else
53
+ onecell.to_s
54
+ end
55
+ when :formula
56
+ case onecell
57
+ when String
58
+ %("#{onecell.gsub('"', '""')}") unless onecell.empty?
59
+ when Integer
60
+ onecell.to_s
61
+ when Float
62
+ if onecell == onecell.to_i
63
+ onecell.to_i.to_s
64
+ else
65
+ onecell.to_s
66
+ end
67
+ when Date, DateTime, TrueClass, FalseClass
68
+ onecell.to_s
69
+ else
70
+ fail "unhandled onecell-class #{onecell.class}"
71
+ end
72
+ when :date, :datetime
73
+ onecell.to_s
74
+ when :time
75
+ integer_to_timestring(onecell)
76
+ when :link
77
+ %("#{onecell.url.gsub('"', '""')}")
78
+ else
79
+ fail "unhandled celltype #{celltype(row, col, sheet)}"
80
+ end || ""
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,23 @@
1
+ module Roo
2
+ module Formatters
3
+ module Matrix
4
+ # returns a matrix object from the whole sheet or a rectangular area of a sheet
5
+ def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
6
+ require 'matrix'
7
+
8
+ return ::Matrix.empty unless first_row
9
+
10
+ from_row ||= first_row(sheet)
11
+ to_row ||= last_row(sheet)
12
+ from_column ||= first_column(sheet)
13
+ to_column ||= last_column(sheet)
14
+
15
+ ::Matrix.rows(from_row.upto(to_row).map do |row|
16
+ from_column.upto(to_column).map do |col|
17
+ cell(row, col, sheet)
18
+ end
19
+ end)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ # returns an XML representation of all sheets of a spreadsheet file
2
+ module Roo
3
+ module Formatters
4
+ module XML
5
+ def to_xml
6
+ Nokogiri::XML::Builder.new do |xml|
7
+ xml.spreadsheet do
8
+ sheets.each do |sheet|
9
+ self.default_sheet = sheet
10
+ xml.sheet(name: sheet) do |x|
11
+ if first_row && last_row && first_column && last_column
12
+ # sonst gibt es Fehler bei leeren Blaettern
13
+ first_row.upto(last_row) do |row|
14
+ first_column.upto(last_column) do |col|
15
+ next if empty?(row, col)
16
+
17
+ x.cell(cell(row, col),
18
+ row: row,
19
+ column: col,
20
+ type: celltype(row, col))
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end.to_xml
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ module Roo
2
+ module Formatters
3
+ module YAML
4
+ # returns a rectangular area (default: all cells) as yaml-output
5
+ # you can add additional attributes with the prefix parameter like:
6
+ # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
7
+ def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
8
+ # return an empty string if there is no first_row, i.e. the sheet is empty
9
+ return "" unless first_row
10
+
11
+ from_row ||= first_row(sheet)
12
+ to_row ||= last_row(sheet)
13
+ from_column ||= first_column(sheet)
14
+ to_column ||= last_column(sheet)
15
+
16
+ result = "--- \n"
17
+ from_row.upto(to_row) do |row|
18
+ from_column.upto(to_column) do |col|
19
+ next if empty?(row, col, sheet)
20
+
21
+ result << "cell_#{row}_#{col}: \n"
22
+ prefix.each do|k, v|
23
+ result << " #{k}: #{v} \n"
24
+ end
25
+ result << " row: #{row} \n"
26
+ result << " col: #{col} \n"
27
+ result << " celltype: #{celltype(row, col, sheet)} \n"
28
+ value = cell(row, col, sheet)
29
+ if celltype(row, col, sheet) == :time
30
+ value = integer_to_timestring(value)
31
+ end
32
+ result << " value: #{value} \n"
33
+ end
34
+ end
35
+
36
+ result
37
+ end
38
+ end
39
+ end
40
+ end