ruh-roo 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +677 -0
  3. data/Gemfile +24 -0
  4. data/LICENSE +24 -0
  5. data/README.md +315 -0
  6. data/lib/roo/base.rb +607 -0
  7. data/lib/roo/constants.rb +7 -0
  8. data/lib/roo/csv.rb +141 -0
  9. data/lib/roo/errors.rb +11 -0
  10. data/lib/roo/excelx/cell/base.rb +108 -0
  11. data/lib/roo/excelx/cell/boolean.rb +30 -0
  12. data/lib/roo/excelx/cell/date.rb +28 -0
  13. data/lib/roo/excelx/cell/datetime.rb +107 -0
  14. data/lib/roo/excelx/cell/empty.rb +20 -0
  15. data/lib/roo/excelx/cell/number.rb +89 -0
  16. data/lib/roo/excelx/cell/string.rb +19 -0
  17. data/lib/roo/excelx/cell/time.rb +44 -0
  18. data/lib/roo/excelx/cell.rb +110 -0
  19. data/lib/roo/excelx/comments.rb +55 -0
  20. data/lib/roo/excelx/coordinate.rb +19 -0
  21. data/lib/roo/excelx/extractor.rb +39 -0
  22. data/lib/roo/excelx/format.rb +71 -0
  23. data/lib/roo/excelx/images.rb +26 -0
  24. data/lib/roo/excelx/relationships.rb +33 -0
  25. data/lib/roo/excelx/shared.rb +39 -0
  26. data/lib/roo/excelx/shared_strings.rb +151 -0
  27. data/lib/roo/excelx/sheet.rb +151 -0
  28. data/lib/roo/excelx/sheet_doc.rb +248 -0
  29. data/lib/roo/excelx/styles.rb +64 -0
  30. data/lib/roo/excelx/workbook.rb +63 -0
  31. data/lib/roo/excelx.rb +480 -0
  32. data/lib/roo/font.rb +17 -0
  33. data/lib/roo/formatters/base.rb +15 -0
  34. data/lib/roo/formatters/csv.rb +84 -0
  35. data/lib/roo/formatters/matrix.rb +23 -0
  36. data/lib/roo/formatters/xml.rb +31 -0
  37. data/lib/roo/formatters/yaml.rb +40 -0
  38. data/lib/roo/helpers/default_attr_reader.rb +20 -0
  39. data/lib/roo/helpers/weak_instance_cache.rb +41 -0
  40. data/lib/roo/libre_office.rb +4 -0
  41. data/lib/roo/link.rb +34 -0
  42. data/lib/roo/open_office.rb +628 -0
  43. data/lib/roo/spreadsheet.rb +39 -0
  44. data/lib/roo/tempdir.rb +21 -0
  45. data/lib/roo/utils.rb +128 -0
  46. data/lib/roo/version.rb +3 -0
  47. data/lib/roo.rb +36 -0
  48. data/roo.gemspec +28 -0
  49. metadata +189 -0
data/lib/roo/excelx.rb ADDED
@@ -0,0 +1,480 @@
1
+ require 'nokogiri'
2
+ require 'zip/filesystem'
3
+ require 'roo/link'
4
+ require 'roo/tempdir'
5
+ require 'roo/utils'
6
+ require 'forwardable'
7
+ require 'set'
8
+
9
+ module Roo
10
+ class Excelx < Roo::Base
11
+ extend Roo::Tempdir
12
+ extend Forwardable
13
+
14
+ ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
15
+
16
+ require 'roo/excelx/shared'
17
+ require 'roo/excelx/workbook'
18
+ require 'roo/excelx/shared_strings'
19
+ require 'roo/excelx/styles'
20
+ require 'roo/excelx/cell'
21
+ require 'roo/excelx/sheet'
22
+ require 'roo/excelx/relationships'
23
+ require 'roo/excelx/comments'
24
+ require 'roo/excelx/sheet_doc'
25
+ require 'roo/excelx/coordinate'
26
+ require 'roo/excelx/format'
27
+ require 'roo/excelx/images'
28
+
29
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
30
+ ExceedsMaxError = Class.new(StandardError)
31
+
32
+ # initialization and opening of a spreadsheet file
33
+ # values for packed: :zip
34
+ # optional cell_max (int) parameter for early aborting attempts to parse
35
+ # enormous documents.
36
+ def initialize(filename_or_stream, options = {})
37
+ packed = options[:packed]
38
+ file_warning = options.fetch(:file_warning, :error)
39
+ cell_max = options.delete(:cell_max)
40
+ sheet_options = {}
41
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
42
+ sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
43
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
44
+ shared_options = {}
45
+
46
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
47
+ unless is_stream?(filename_or_stream)
48
+ file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
49
+ basename = find_basename(filename_or_stream)
50
+ end
51
+
52
+ # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
53
+ # when the object is garbage collected. Initially, the finalizer was
54
+ # created in the Roo::Tempdir module, but that led to a segfault
55
+ # when testing in Ruby 2.4.0.
56
+ @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
57
+ ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
58
+
59
+ @shared = Shared.new(@tmpdir, shared_options)
60
+ @filename = local_filename(filename_or_stream, @tmpdir, packed)
61
+ process_zipfile(@filename || filename_or_stream)
62
+
63
+ @sheet_names = []
64
+ @sheets = []
65
+ @sheets_by_name = {}
66
+
67
+ workbook.sheets.each_with_index do |sheet, index|
68
+ next if options[:only_visible_sheets] && sheet['state'] == 'hidden'
69
+
70
+ sheet_name = sheet['name']
71
+ @sheet_names << sheet_name
72
+ @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options)
73
+ end
74
+
75
+ if cell_max
76
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
77
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
78
+ end
79
+
80
+ super
81
+ rescue
82
+ self.class.finalize_tempdirs(object_id)
83
+ raise
84
+ end
85
+
86
+ def method_missing(method, *args)
87
+ if (label = workbook.defined_names[method.to_s])
88
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
89
+ else
90
+ # call super for methods like #a1
91
+ super
92
+ end
93
+ end
94
+
95
+ def sheets
96
+ @sheet_names
97
+ end
98
+
99
+ def sheet_for(sheet)
100
+ sheet ||= default_sheet
101
+ validate_sheet!(sheet)
102
+ @sheets_by_name[sheet] || @sheets[sheet]
103
+ end
104
+
105
+ def images(sheet = nil)
106
+ images_names = sheet_for(sheet).images.map(&:last)
107
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
108
+ end
109
+
110
+ # Returns the content of a spreadsheet-cell.
111
+ # (1,1) is the upper left corner.
112
+ # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
113
+ # cell at the first line and first row.
114
+ def cell(row, col, sheet = nil)
115
+ key = normalize(row, col)
116
+ safe_send(sheet_for(sheet).cells[key], :value)
117
+ end
118
+
119
+ def row(rownumber, sheet = nil)
120
+ sheet_for(sheet).row(rownumber)
121
+ end
122
+
123
+ # returns all values in this column as an array
124
+ # column numbers are 1,2,3,... like in the spreadsheet
125
+ def column(column_number, sheet = nil)
126
+ if column_number.is_a?(::String)
127
+ column_number = ::Roo::Utils.letter_to_number(column_number)
128
+ end
129
+ sheet_for(sheet).column(column_number)
130
+ end
131
+
132
+ # returns the number of the first non-empty row
133
+ def first_row(sheet = nil)
134
+ sheet_for(sheet).first_row
135
+ end
136
+
137
+ # returns the number of the last non-empty row
138
+ def last_row(sheet = nil)
139
+ sheet_for(sheet).last_row
140
+ end
141
+
142
+ # returns the number of the first non-empty column
143
+ def first_column(sheet = nil)
144
+ sheet_for(sheet).first_column
145
+ end
146
+
147
+ # returns the number of the last non-empty column
148
+ def last_column(sheet = nil)
149
+ sheet_for(sheet).last_column
150
+ end
151
+
152
+ # set a cell to a certain value
153
+ # (this will not be saved back to the spreadsheet file!)
154
+ def set(row, col, value, sheet = nil) #:nodoc:
155
+ key = normalize(row, col)
156
+ cell_type = cell_type_by_value(value)
157
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
158
+ end
159
+
160
+ # Returns the formula at (row,col).
161
+ # Returns nil if there is no formula.
162
+ # The method #formula? checks if there is a formula.
163
+ def formula(row, col, sheet = nil)
164
+ key = normalize(row, col)
165
+ safe_send(sheet_for(sheet).cells[key], :formula)
166
+ end
167
+
168
+ # Predicate methods really should return a boolean
169
+ # value. Hopefully no one was relying on the fact that this
170
+ # previously returned either nil/formula
171
+ def formula?(*args)
172
+ !!formula(*args)
173
+ end
174
+
175
+ # returns each formula in the selected sheet as an array of tuples in following format
176
+ # [[row, col, formula], [row, col, formula],...]
177
+ def formulas(sheet = nil)
178
+ sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
179
+ [x, y, cell.formula]
180
+ end
181
+ end
182
+
183
+ # Given a cell, return the cell's style
184
+ def font(row, col, sheet = nil)
185
+ key = normalize(row, col)
186
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
187
+ styles.definitions[definition_index] if definition_index
188
+ end
189
+
190
+ # returns the type of a cell:
191
+ # * :float
192
+ # * :string,
193
+ # * :date
194
+ # * :percentage
195
+ # * :formula
196
+ # * :time
197
+ # * :datetime
198
+ def celltype(row, col, sheet = nil)
199
+ key = normalize(row, col)
200
+ safe_send(sheet_for(sheet).cells[key], :type)
201
+ end
202
+
203
+ # returns the internal type of an excel cell
204
+ # * :numeric_or_formula
205
+ # * :string
206
+ # Note: this is only available within the Excelx class
207
+ def excelx_type(row, col, sheet = nil)
208
+ key = normalize(row, col)
209
+ safe_send(sheet_for(sheet).cells[key], :cell_type)
210
+ end
211
+
212
+ # returns the internal value of an excelx cell
213
+ # Note: this is only available within the Excelx class
214
+ def excelx_value(row, col, sheet = nil)
215
+ key = normalize(row, col)
216
+ safe_send(sheet_for(sheet).cells[key], :cell_value)
217
+ end
218
+
219
+ # returns the internal value of an excelx cell
220
+ # Note: this is only available within the Excelx class
221
+ def formatted_value(row, col, sheet = nil)
222
+ key = normalize(row, col)
223
+ safe_send(sheet_for(sheet).cells[key], :formatted_value)
224
+ end
225
+
226
+ # returns the internal format of an excel cell
227
+ def excelx_format(row, col, sheet = nil)
228
+ key = normalize(row, col)
229
+ sheet_for(sheet).excelx_format(key)
230
+ end
231
+
232
+ def empty?(row, col, sheet = nil)
233
+ sheet = sheet_for(sheet)
234
+ key = normalize(row, col)
235
+ cell = sheet.cells[key]
236
+ !cell || cell.empty? ||
237
+ (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
238
+ end
239
+
240
+ # shows the internal representation of all cells
241
+ # for debugging purposes
242
+ def to_s(sheet = nil)
243
+ sheet_for(sheet).cells.inspect
244
+ end
245
+
246
+ # returns the row,col values of the labelled cell
247
+ # (nil,nil) if label is not defined
248
+ def label(name)
249
+ labels = workbook.defined_names
250
+ return [nil, nil, nil] if labels.empty? || !labels.key?(name)
251
+
252
+ [labels[name].row, labels[name].col, labels[name].sheet]
253
+ end
254
+
255
+ # Returns an array which all labels. Each element is an array with
256
+ # [labelname, [row,col,sheetname]]
257
+ def labels
258
+ @labels ||= workbook.defined_names.map do |name, label|
259
+ [
260
+ name,
261
+ [label.row, label.col, label.sheet]
262
+ ]
263
+ end
264
+ end
265
+
266
+ def hyperlink?(row, col, sheet = nil)
267
+ !!hyperlink(row, col, sheet)
268
+ end
269
+
270
+ # returns the hyperlink at (row/col)
271
+ # nil if there is no hyperlink
272
+ def hyperlink(row, col, sheet = nil)
273
+ key = normalize(row, col)
274
+ sheet_for(sheet).hyperlinks[key]
275
+ end
276
+
277
+ # returns the comment at (row/col)
278
+ # nil if there is no comment
279
+ def comment(row, col, sheet = nil)
280
+ key = normalize(row, col)
281
+ sheet_for(sheet).comments[key]
282
+ end
283
+
284
+ # true, if there is a comment
285
+ def comment?(row, col, sheet = nil)
286
+ !!comment(row, col, sheet)
287
+ end
288
+
289
+ def comments(sheet = nil)
290
+ sheet_for(sheet).comments.map do |(x, y), comment|
291
+ [x, y, comment]
292
+ end
293
+ end
294
+
295
+ # Yield an array of Excelx::Cell
296
+ # Takes options for sheet, pad_cells, and max_rows
297
+ def each_row_streaming(options = {})
298
+ sheet = sheet_for(options.delete(:sheet))
299
+ if block_given?
300
+ sheet.each_row(options) { |row| yield row }
301
+ else
302
+ sheet.to_enum(:each_row, options)
303
+ end
304
+ end
305
+
306
+ private
307
+
308
+ def clean_sheet(sheet)
309
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
310
+ next unless value.value.is_a?(::String)
311
+
312
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
313
+ end
314
+
315
+ @cleaned[sheet] = true
316
+ end
317
+
318
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
319
+ # documents require a workbook.xml file, so a if the file is missing
320
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
321
+ # raised.
322
+ #
323
+ # wb - a Zip::Entry for the workbook.xml file.
324
+ # path - A String for Zip::Entry's destination path.
325
+ #
326
+ # Examples
327
+ #
328
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
329
+ # # => ["rId1", "rId2", "rId3"]
330
+ #
331
+ # Returns an Array of Strings.
332
+ def extract_worksheet_ids(entries, path)
333
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
334
+ fail ArgumentError 'missing required workbook file' if wb.nil?
335
+
336
+ wb.extract(path)
337
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
338
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
339
+ end
340
+
341
+ # Internal
342
+ #
343
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
344
+ # path - A String for the Zip::Entry's destination path.
345
+ #
346
+ # Examples
347
+ #
348
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
349
+ # # => {
350
+ # "rId1"=>"worksheets/sheet1.xml",
351
+ # "rId2"=>"worksheets/sheet2.xml",
352
+ # "rId3"=>"worksheets/sheet3.xml"
353
+ # }
354
+ #
355
+ # Returns a Hash.
356
+ def extract_worksheet_rels(entries, path)
357
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
358
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
359
+
360
+ wb_rels.extract(path)
361
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
362
+
363
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
364
+ worksheet_types.include? relationship['Type']
365
+ end
366
+
367
+ relationships.each_with_object({}) do |relationship, hash|
368
+ hash[relationship['Id']] = relationship['Target']
369
+ end
370
+ end
371
+
372
+ # Extracts the sheets in order, but it will ignore sheets that are not
373
+ # worksheets.
374
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
375
+ (sheet_ids & sheets.keys).each_with_index do |id, i|
376
+ name = sheets[id]
377
+ entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
378
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
379
+ sheet_files << path
380
+ @sheet_files << path
381
+ entry.extract(path)
382
+ end
383
+ end
384
+
385
+ def extract_images(entries, tmpdir)
386
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
387
+ img_entries.each do |entry|
388
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
389
+ image_files << path
390
+ entry.extract(path)
391
+ end
392
+ end
393
+
394
+ # Extracts all needed files from the zip file
395
+ def process_zipfile(zipfilename_or_stream)
396
+ @sheet_files = []
397
+
398
+ unless is_stream?(zipfilename_or_stream)
399
+ zip_file = Zip::File.open(zipfilename_or_stream)
400
+ else
401
+ zip_file = Zip::CentralDirectory.new
402
+ zip_file.read_from_stream zipfilename_or_stream
403
+ end
404
+
405
+ process_zipfile_entries zip_file.to_a.sort_by(&:name)
406
+ end
407
+
408
+ def process_zipfile_entries(entries)
409
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
410
+ # are not in order. With Numbers 3.1, the first sheet is always
411
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
412
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
413
+ # first worksheet).
414
+ #
415
+ # workbook.xml lists the correct order of worksheets and
416
+ # workbook.xml.rels lists the filenames for those worksheets.
417
+ #
418
+ # workbook.xml:
419
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
420
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
421
+ # workbook.xml.rel:
422
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
423
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
424
+ sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
425
+ sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
426
+ extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
427
+ extract_images(entries, @tmpdir)
428
+
429
+ entries.each do |entry|
430
+ path =
431
+ case entry.name.downcase
432
+ when /richdata/
433
+ # FIXME: Ignore richData as parsing is not implemented yet and can cause
434
+ # Zip::DestinationFileExistsError when including a second "styles.xml" entry
435
+ # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2
436
+ nil
437
+ when /sharedstrings.xml$/
438
+ "#{@tmpdir}/roo_sharedStrings.xml"
439
+ when /styles.xml$/
440
+ "#{@tmpdir}/roo_styles.xml"
441
+ when /comments([0-9]+).xml$/
442
+ # FIXME: Most of the time, The order of the comment files are the same
443
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
444
+ # In some situations, this isn't true. The true location of a
445
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
446
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
447
+ nr = Regexp.last_match[1].to_i
448
+ comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
449
+ when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
450
+ # NOTE: Chart sheet relationship files were interfering with
451
+ # worksheets.
452
+ nil
453
+ when /sheet([0-9]+).xml.rels$/
454
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
455
+ # it also stores the location for sharedStrings, comments,
456
+ # drawings, etc.
457
+ nr = Regexp.last_match[1].to_i
458
+ rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
459
+ when /drawing([0-9]+).xml.rels$/
460
+ # Extracting drawing relationships to make images lists for each sheet
461
+ nr = Regexp.last_match[1].to_i
462
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
463
+ end
464
+
465
+ entry.extract(path) if path
466
+ end
467
+ end
468
+
469
+ def safe_send(object, method, *args)
470
+ object.send(method, *args) if object&.respond_to?(method)
471
+ end
472
+
473
+ def worksheet_types
474
+ [
475
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
476
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
477
+ ]
478
+ end
479
+ end
480
+ end
data/lib/roo/font.rb ADDED
@@ -0,0 +1,17 @@
1
+ module Roo
2
+ class Font
3
+ attr_accessor :bold, :italic, :underline
4
+
5
+ def bold?
6
+ @bold
7
+ end
8
+
9
+ def italic?
10
+ @italic
11
+ end
12
+
13
+ def underline?
14
+ @underline
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,15 @@
1
+ module Roo
2
+ module Formatters
3
+ module Base
4
+ # converts an integer value to a time string like '02:05:06'
5
+ def integer_to_timestring(content)
6
+ h = (content / 3600.0).floor
7
+ content -= h * 3600
8
+ m = (content / 60.0).floor
9
+ content -= m * 60
10
+ s = content
11
+ Kernel.format("%02d:%02d:%02d", h, m, s)
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,84 @@
1
+ module Roo
2
+ module Formatters
3
+ module CSV
4
+ def to_csv(filename = nil, separator = ",", sheet = default_sheet)
5
+ if filename
6
+ File.open(filename, "w") do |file|
7
+ write_csv_content(file, sheet, separator)
8
+ end
9
+ true
10
+ else
11
+ sio = ::StringIO.new
12
+ write_csv_content(sio, sheet, separator)
13
+ sio.rewind
14
+ sio.read
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ # Write all cells to the csv file. File can be a filename or nil. If the
21
+ # file argument is nil the output goes to STDOUT
22
+ def write_csv_content(file = nil, sheet = nil, separator = ",")
23
+ file ||= STDOUT
24
+ return unless first_row(sheet) # The sheet is empty
25
+
26
+ 1.upto(last_row(sheet)) do |row|
27
+ 1.upto(last_column(sheet)) do |col|
28
+ # TODO: use CSV.generate_line
29
+ file.print(separator) if col > 1
30
+ file.print cell_to_csv(row, col, sheet)
31
+ end
32
+ file.print("\n")
33
+ end
34
+ end
35
+
36
+ # The content of a cell in the csv output
37
+ def cell_to_csv(row, col, sheet)
38
+ return "" if empty?(row, col, sheet)
39
+
40
+ onecell = cell(row, col, sheet)
41
+
42
+ case celltype(row, col, sheet)
43
+ when :string
44
+ %("#{onecell.gsub('"', '""')}") unless onecell.empty?
45
+ when :boolean
46
+ # TODO: this only works for excelx
47
+ onecell = self.sheet_for(sheet).cells[[row, col]].formatted_value
48
+ %("#{onecell.gsub('"', '""').downcase}")
49
+ when :float, :percentage
50
+ if onecell == onecell.to_i
51
+ onecell.to_i.to_s
52
+ else
53
+ onecell.to_s
54
+ end
55
+ when :formula
56
+ case onecell
57
+ when String
58
+ %("#{onecell.gsub('"', '""')}") unless onecell.empty?
59
+ when Integer
60
+ onecell.to_s
61
+ when Float
62
+ if onecell == onecell.to_i
63
+ onecell.to_i.to_s
64
+ else
65
+ onecell.to_s
66
+ end
67
+ when Date, DateTime, TrueClass, FalseClass
68
+ onecell.to_s
69
+ else
70
+ fail "unhandled onecell-class #{onecell.class}"
71
+ end
72
+ when :date, :datetime
73
+ onecell.to_s
74
+ when :time
75
+ integer_to_timestring(onecell)
76
+ when :link
77
+ %("#{onecell.url.gsub('"', '""')}")
78
+ else
79
+ fail "unhandled celltype #{celltype(row, col, sheet)}"
80
+ end || ""
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,23 @@
1
+ module Roo
2
+ module Formatters
3
+ module Matrix
4
+ # returns a matrix object from the whole sheet or a rectangular area of a sheet
5
+ def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
6
+ require 'matrix'
7
+
8
+ return ::Matrix.empty unless first_row
9
+
10
+ from_row ||= first_row(sheet)
11
+ to_row ||= last_row(sheet)
12
+ from_column ||= first_column(sheet)
13
+ to_column ||= last_column(sheet)
14
+
15
+ ::Matrix.rows(from_row.upto(to_row).map do |row|
16
+ from_column.upto(to_column).map do |col|
17
+ cell(row, col, sheet)
18
+ end
19
+ end)
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,31 @@
1
+ # returns an XML representation of all sheets of a spreadsheet file
2
+ module Roo
3
+ module Formatters
4
+ module XML
5
+ def to_xml
6
+ Nokogiri::XML::Builder.new do |xml|
7
+ xml.spreadsheet do
8
+ sheets.each do |sheet|
9
+ self.default_sheet = sheet
10
+ xml.sheet(name: sheet) do |x|
11
+ if first_row && last_row && first_column && last_column
12
+ # sonst gibt es Fehler bei leeren Blaettern
13
+ first_row.upto(last_row) do |row|
14
+ first_column.upto(last_column) do |col|
15
+ next if empty?(row, col)
16
+
17
+ x.cell(cell(row, col),
18
+ row: row,
19
+ column: col,
20
+ type: celltype(row, col))
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end.to_xml
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,40 @@
1
+ module Roo
2
+ module Formatters
3
+ module YAML
4
+ # returns a rectangular area (default: all cells) as yaml-output
5
+ # you can add additional attributes with the prefix parameter like:
6
+ # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
7
+ def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
8
+ # return an empty string if there is no first_row, i.e. the sheet is empty
9
+ return "" unless first_row
10
+
11
+ from_row ||= first_row(sheet)
12
+ to_row ||= last_row(sheet)
13
+ from_column ||= first_column(sheet)
14
+ to_column ||= last_column(sheet)
15
+
16
+ result = "--- \n"
17
+ from_row.upto(to_row) do |row|
18
+ from_column.upto(to_column) do |col|
19
+ next if empty?(row, col, sheet)
20
+
21
+ result << "cell_#{row}_#{col}: \n"
22
+ prefix.each do|k, v|
23
+ result << " #{k}: #{v} \n"
24
+ end
25
+ result << " row: #{row} \n"
26
+ result << " col: #{col} \n"
27
+ result << " celltype: #{celltype(row, col, sheet)} \n"
28
+ value = cell(row, col, sheet)
29
+ if celltype(row, col, sheet) == :time
30
+ value = integer_to_timestring(value)
31
+ end
32
+ result << " value: #{value} \n"
33
+ end
34
+ end
35
+
36
+ result
37
+ end
38
+ end
39
+ end
40
+ end