donibuchanan-roo 1.3.12 → 1.9.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/roo/excelx.rb CHANGED
@@ -1,11 +1,15 @@
1
- require 'xml'
1
+ #TODO: require 'xml'
2
2
  require 'fileutils'
3
3
  require 'zip/zipfilesystem'
4
4
  require 'date'
5
+ require 'rubygems'
6
+ require 'nokogiri'
5
7
 
6
- class String
7
- def end_with?(str)
8
- self[-str.length,str.length] == str
8
+ if RUBY_VERSION < '1.9.0'
9
+ class String
10
+ def end_with?(str)
11
+ self[-str.length,str.length] == str
12
+ end
9
13
  end
10
14
  end
11
15
 
@@ -92,6 +96,7 @@ class Excelx < GenericSpreadsheet
92
96
  begin
93
97
  file_type_check(filename,'.xlsx','an Excel-xlsx')
94
98
  @cells_read = Hash.new
99
+ @read_first_100_rows = Hash.new
95
100
  @filename = filename
96
101
  unless File.file?(@filename)
97
102
  raise IOError, "file #{@filename} does not exist"
@@ -100,27 +105,30 @@ class Excelx < GenericSpreadsheet
100
105
  @file_nr = @@nr
101
106
  extract_content(@filename)
102
107
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
- @workbook_doc = XML::Parser.io(file).parse
108
+ # TODO: @workbook_doc = XML::Parser.io(file).parse
109
+ @workbook_doc = Nokogiri::XML(file)
104
110
  file.close
105
- @shared_table = []
111
+
106
112
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
113
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
- @sharedstring_doc = XML::Parser.io(file).parse
114
+ #TODO: @sharedstring_doc = XML::Parser.io(file).parse
115
+ @sharedstring_doc = Nokogiri::XML(file)
109
116
  file.close
110
- read_shared_strings(@sharedstring_doc)
111
117
  end
112
118
  @styles_table = []
113
- @style_definitions = Array.new { |h,k| h[k] = {} }
119
+ @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
114
120
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
121
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
- @styles_doc = XML::Parser.io(file).parse
122
+ #TODO: @styles_doc = XML::Parser.io(file).parse
123
+ @styles_doc = Nokogiri::XML(file)
117
124
  file.close
118
125
  read_styles(@styles_doc)
119
126
  end
120
127
  @sheet_doc = []
121
128
  @sheet_files.each_with_index do |item, i|
122
129
  file = File.new(item)
123
- @sheet_doc[i] = XML::Parser.io(file).parse
130
+ #TODO: @sheet_doc[i] = XML::Parser.io(file).parse
131
+ @sheet_doc[i] = Nokogiri::XML(file)
124
132
  file.close
125
133
  end
126
134
  ensure
@@ -148,7 +156,9 @@ class Excelx < GenericSpreadsheet
148
156
  # cell at the first line and first row.
149
157
  def cell(row, col, sheet=nil)
150
158
  sheet = @default_sheet unless sheet
151
- read_cells(sheet) unless @cells_read[sheet]
159
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and row <= 100)
160
+ read_cells(sheet)
161
+ end
152
162
  row,col = normalize(row,col)
153
163
  if celltype(row,col,sheet) == :date
154
164
  yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
@@ -161,6 +171,20 @@ class Excelx < GenericSpreadsheet
161
171
  end
162
172
  @cell[sheet][[row,col]]
163
173
  end
174
+
175
+ # returns all values in this row as an array
176
+ # row numbers are 1,2,3,... like in the spreadsheet
177
+ def row(rownumber,sheet=nil)
178
+ sheet = @default_sheet unless sheet
179
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and rownumber <= 100)
180
+ read_cells(sheet)
181
+ end
182
+ result = []
183
+ first_column(sheet).upto(last_column(sheet)) do |col|
184
+ result << cell(rownumber,col,sheet)
185
+ end
186
+ result
187
+ end
164
188
 
165
189
  # Returns the formula at (row,col).
166
190
  # Returns nil if there is no formula.
@@ -192,7 +216,7 @@ class Excelx < GenericSpreadsheet
192
216
  end
193
217
 
194
218
  def italic?
195
- @italic == true
219
+ @italic == true
196
220
  end
197
221
 
198
222
  def underline?
@@ -202,13 +226,13 @@ class Excelx < GenericSpreadsheet
202
226
 
203
227
  # Given a cell, return the cell's style
204
228
  def font(row, col, sheet=nil)
205
- sheet = @default_sheet unless sheet
206
- read_cells(sheet) unless @cells_read[sheet]
207
- row,col = normalize(row,col)
208
- s_attribute = @s_attribute[sheet][[row,col]]
209
- s_attribute ||= 0
210
- s_attribute = s_attribute.to_i
211
- @style_definitions[s_attribute]
229
+ sheet = @default_sheet unless sheet
230
+ read_cells(sheet) unless @cells_read[sheet]
231
+ row,col = normalize(row,col)
232
+ s_attribute = @s_attribute[sheet][[row,col]]
233
+ s_attribute ||= 0
234
+ s_attribute = s_attribute.to_i
235
+ @style_definitions[s_attribute]
212
236
  end
213
237
 
214
238
  # set a cell to a certain value
@@ -239,7 +263,9 @@ class Excelx < GenericSpreadsheet
239
263
  # * :datetime
240
264
  def celltype(row,col,sheet=nil)
241
265
  sheet = @default_sheet unless sheet
242
- read_cells(sheet) unless @cells_read[sheet]
266
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and row <= 100)
267
+ read_cells(sheet)
268
+ end
243
269
  row,col = normalize(row,col)
244
270
  if @formula[sheet][[row,col]]
245
271
  return :formula
@@ -274,15 +300,17 @@ class Excelx < GenericSpreadsheet
274
300
  read_cells(sheet) unless @cells_read[sheet]
275
301
  row,col = normalize(row,col)
276
302
  s = @s_attribute[sheet][[row,col]]
277
- result = attribute2format(s)
303
+ result = attribute2format(s).to_s
278
304
  result
279
305
  end
280
306
 
281
307
  # returns an array of sheet names in the spreadsheet
282
308
  def sheets
283
309
  return_sheets = []
284
- @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
- return_sheets << sheet.attributes.to_h['name']
310
+ #TODO: @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
311
+ @workbook_doc.xpath("//*[local-name()='sheet']").each do |sheet|
312
+ #TODO: return_sheets << sheet.attributes.to_h['name']
313
+ return_sheets << sheet['name']
286
314
  end
287
315
  return_sheets
288
316
  end
@@ -310,6 +338,93 @@ class Excelx < GenericSpreadsheet
310
338
  }
311
339
  theformulas
312
340
  end
341
+
342
+ def first_row(sheet=nil)
343
+ if sheet == nil
344
+ sheet = @default_sheet
345
+ end
346
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet] or @cells_read[sheet]
347
+ if @first_row[sheet]
348
+ return @first_row[sheet]
349
+ end
350
+ impossible_value = 999_999 # more than a spreadsheet can hold
351
+ result = impossible_value
352
+ @cell[sheet].each_pair {|key,value|
353
+ y,x = key # _to_string(key).split(',')
354
+ y = y.to_i
355
+ result = [result, y].min if value
356
+ } if @cell[sheet]
357
+ result = nil if result == impossible_value
358
+ @first_row[sheet] = result
359
+ result
360
+ end
361
+
362
+ # returns the number of the last non-empty row
363
+ def last_row(sheet=nil)
364
+ sheet = @default_sheet unless sheet
365
+ if @last_row[sheet]
366
+ return @last_row[sheet]
367
+ end
368
+ # read a few columns
369
+ unless @cells_read[sheet]
370
+ fst_column = first_column(sheet)
371
+ read_column(sheet,GenericSpreadsheet.number_to_letter(fst_column),3)
372
+ end
373
+ impossible_value = 0
374
+ result = impossible_value
375
+ @cell[sheet].each_pair {|key,value|
376
+ y,x = key # _to_string(key).split(',')
377
+ y = y.to_i
378
+ result = [result, y].max if value
379
+ } if @cell[sheet]
380
+ result = nil if result == impossible_value
381
+ @last_row[sheet] = result
382
+ result
383
+ end
384
+
385
+ def first_column(sheet=nil)
386
+ if sheet == nil
387
+ sheet = @default_sheet
388
+ end
389
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet]
390
+ if @first_row[sheet]
391
+ return @first_row[sheet]
392
+ end
393
+ if @first_column[sheet]
394
+ return @first_column[sheet]
395
+ end
396
+ impossible_value = 999_999 # more than a spreadsheet can hold
397
+ result = impossible_value
398
+ @cell[sheet].each_pair {|key,value|
399
+ y,x = key # _to_string(key).split(',')
400
+ x = x # .to_i
401
+ result = [result, x].min if value
402
+ } if @cell[sheet]
403
+ result = nil if result == impossible_value
404
+ @first_column[sheet] = result
405
+ result
406
+ end
407
+
408
+
409
+ # returns the number of the last non-empty column
410
+ def last_column(sheet=nil)
411
+ sheet = @default_sheet unless sheet
412
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet]
413
+ if @last_column[sheet]
414
+ return @last_column[sheet]
415
+ end
416
+ impossible_value = 0
417
+ result = impossible_value
418
+ @cell[sheet].each_pair {|key,value|
419
+ y,x = key # _to_string(key).split(',')
420
+ x = x.to_i
421
+ result = [result, x].max if value
422
+ } if @cell[sheet]
423
+ result = nil if result == impossible_value
424
+ @last_column[sheet] = result
425
+ result
426
+ end
427
+
313
428
 
314
429
  private
315
430
 
@@ -374,38 +489,73 @@ class Excelx < GenericSpreadsheet
374
489
  return x,y
375
490
  end
376
491
 
377
- # read all cells in the selected sheet
378
492
  def format2type(format)
493
+ format = format.to_s # weil von Typ Nokogiri::XML::Attr
379
494
  if FORMATS.has_key? format
380
495
  FORMATS[format]
381
496
  else
382
497
  :float
383
498
  end
384
499
  end
385
-
500
+ def read_first_100_rows(sheet=nil)
501
+ read_cells(sheet, 100)
502
+ @cells_read[sheet] = false
503
+ @read_first_100_rows[sheet] = true
504
+ end
505
+
506
+ def read_column(sheet=nil, column='A', number_columns=2)
507
+ read_cells(sheet, nil,column,number_columns)
508
+ @cells_read[sheet] = false
509
+ end
386
510
  # read all cells in the selected sheet
387
- def read_cells(sheet=nil)
511
+ def read_cells(sheet=nil, limit=nil, column_letter=nil, number_columns=nil)
388
512
  sheet = @default_sheet unless sheet
389
513
  sheet_found = false
390
514
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
515
  raise RangeError unless self.sheets.include? sheet
392
516
  n = self.sheets.index(sheet)
393
- @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
- s_attribute = c.attributes.to_h['s'].to_i # should be here
395
- if (c.attributes.to_h['t'] == 's')
396
- tmp_type = :shared
397
- elsif (c.attributes.to_h['t'] == 'b')
398
- tmp_type = :boolean
399
- else
400
- # s_attribute = c.attributes.to_h['s'].to_i # was here
401
- format = attribute2format(s_attribute)
402
- tmp_type = format2type(format)
403
- end
517
+ #TODO: @sheet_doc[n].find("//*[local-name()='c']").each do |c|
518
+ xpath_limit = ''
519
+ if limit
520
+ xpath_limit = "and ( number(substring(@r,2)) <= #{limit} or ( not (number(substring(@r,2)) > 0) and (number(substring(@r,3)) <= #{limit})))"
521
+ end
522
+ if column_letter
523
+ xpath_limit += " and ("
524
+ column_number = GenericSpreadsheet.letter_to_number(column_letter)
525
+ number_columns.times do |column_index|
526
+ letter = GenericSpreadsheet.number_to_letter(column_number + column_index)
527
+ xpath_limit += " or " if column_index > 0
528
+ xpath_limit += "( substring(@r,1,1) = '#{letter}' and substring(@r,2,1) > 0 )"
529
+ end
530
+ xpath_limit += ")"
531
+ end
532
+ @sheet_doc[n].xpath("//*[local-name()='c' #{xpath_limit}]").each do |c|
533
+ #TODO: s_attribute = c.attributes.to_h['s'].to_i # should be here
534
+ s_attribute = c['s'].to_i # should be here
535
+ #TODO: if (c.attributes.to_h['t'] == 's')
536
+ # c: <c r="A5" s="2">
537
+ # <v>22606</v>
538
+ # </c>, format: , tmp_type: float
539
+
540
+ if c['t'] == 's'
541
+ tmp_type = :shared
542
+ #TODO: elsif (c.attributes.to_h['t'] == 'b')
543
+ elsif c['t'] == 'b'
544
+ tmp_type = :boolean
545
+ else
546
+ #s_attribute = c.attributes.to_h['s'].to_i # was here
547
+ s_attribute = c['s'].to_i # was here
548
+ format = attribute2format(s_attribute)
549
+ tmp_type = format2type(format)
550
+ end
404
551
  formula = nil
405
- c.each_element do |cell|
552
+ #TODO: c.each_element do |cell|
553
+ c.children.each do |cell|
554
+ #TODO: if cell.name == 'f'
406
555
  if cell.name == 'f'
407
556
  formula = cell.content
408
557
  end
558
+ #TODO: if cell.name == 'v'
409
559
  if cell.name == 'v'
410
560
  if tmp_type == :time or tmp_type == :datetime
411
561
  if cell.content.to_f >= 1.0
@@ -417,10 +567,12 @@ class Excelx < GenericSpreadsheet
417
567
  else
418
568
  end
419
569
  end
420
- excelx_type = [:numeric_or_formula,format]
570
+ excelx_type = [:numeric_or_formula,format.to_s]
421
571
  excelx_value = cell.content
422
572
  if tmp_type == :shared
423
573
  vt = :string
574
+ read_shared_strings(@sharedstring_doc,limit) if @shared_table.nil?
575
+
424
576
  str_v = @shared_table[cell.content.to_i]
425
577
  excelx_type = :string
426
578
  elsif tmp_type == :boolean
@@ -443,7 +595,8 @@ class Excelx < GenericSpreadsheet
443
595
  v = cell.content
444
596
  end
445
597
  #puts "vt: #{vt}" if cell.text.include? "22606.5120"
446
- x,y = split_coordinate(c.attributes.to_h['r'])
598
+ #TODO: x,y = split_coordinate(c.attributes.to_h['r'])
599
+ x,y = split_coordinate(c['r'])
447
600
  tr=nil #TODO: ???s
448
601
  set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
449
602
  end
@@ -522,18 +675,25 @@ class Excelx < GenericSpreadsheet
522
675
  end
523
676
 
524
677
  # read the shared strings xml document
525
- def read_shared_strings(doc)
526
- doc.find("//*[local-name()='si']").each do |si|
678
+ def read_shared_strings(doc, limit=nil)
679
+ #TODO: doc.find("//*[local-name()='si']").each do |si|
680
+ @shared_table = []
681
+ if limit
682
+ xpath_limit = " and position() < #{limit*30}"
683
+ end
684
+ doc.xpath("//*[local-name()='si' #{xpath_limit}]").each do |si|
527
685
  shared_table_entry = ''
528
- si.each_element do |elem|
529
- if (elem.name == 'r')
530
- elem.each_element do |r_elem|
531
- if (r_elem.name == 't')
686
+ #TODO: si.each_element do |elem|
687
+ si.children.each do |elem|
688
+ if elem.name == 'r' and elem.children
689
+ # elem.each_element do |r_elem|
690
+ elem.children.each do |r_elem|
691
+ if r_elem.name == 't'
532
692
  shared_table_entry << r_elem.content
533
693
  end
534
694
  end
535
695
  end
536
- if (elem.name == 't')
696
+ if elem.name == 't'
537
697
  shared_table_entry = elem.content
538
698
  end
539
699
  end
@@ -547,37 +707,46 @@ class Excelx < GenericSpreadsheet
547
707
  @cellXfs = []
548
708
  fonts = []
549
709
 
550
- doc.find("//*[local-name()='numFmt']").each do |numFmt|
551
- numFmtId = numFmt.attributes.to_h['numFmtId']
552
- formatCode = numFmt.attributes.to_h['formatCode']
710
+ #TODO: doc.find("//*[local-name()='numFmt']").each do |numFmt|
711
+ doc.xpath("//*[local-name()='numFmt']").each do |numFmt|
712
+ # TODO: numFmtId = numFmt.attributes.to_h['numFmtId']
713
+ numFmtId = numFmt.attributes['numFmtId']
714
+ #TODO: formatCode = numFmt.attributes.to_h['formatCode']
715
+ formatCode = numFmt.attributes['formatCode']
553
716
  @numFmts << [numFmtId, formatCode]
554
717
  end
555
- doc.find("//*[local-name()='fonts']").each do |fonts_el|
556
- fonts_el.each_element do |font_el|
557
- if font_el.name == 'font'
718
+ #TODO: doc.find("//*[local-name()='fonts']").each do |fonts_el|
719
+ doc.xpath("//*[local-name()='fonts']").each do |fonts_el|
720
+ #TODO: fonts_el.each_element do |font_el|
721
+ fonts_el.children.each do |font_el|
722
+ #TODO: if font_el.name == 'font'
723
+ if font_el == 'font'
558
724
  font = Excelx::Font.new
559
725
  font_el.each_element do |font_sub_el|
560
726
  case font_sub_el.name
561
- when 'b'
562
- font.bold = true
563
- when 'i'
564
- font.italic = true
565
- when 'u'
566
- font.underline = true
567
- end
727
+ when 'b'
728
+ font.bold = true
729
+ when 'i'
730
+ font.italic = true
731
+ when 'u'
732
+ font.underline = true
733
+ end
568
734
  end
569
735
  fonts << font
570
736
  end
571
737
  end
572
738
  end
573
739
 
574
- doc.find("//*[local-name()='cellXfs']").each do |xfs|
575
- xfs.each do |xf|
576
- numFmtId = xf.attributes.to_h['numFmtId']
577
- @cellXfs << [numFmtId]
578
- fontId = xf.attributes.to_h['fontId'].to_i
579
- @style_definitions << fonts[fontId]
580
- end
740
+ #TODO: doc.find("//*[local-name()='cellXfs']").each do |xfs|
741
+ doc.xpath("//*[local-name()='cellXfs']").each do |xfs|
742
+ xfs.children.each do |xf|
743
+ #TODO: numFmtId = xf.attributes.to_h['numFmtId']
744
+ numFmtId = xf['numFmtId']
745
+ @cellXfs << [numFmtId]
746
+ #TODO: fontId = xf.attributes.to_h['fontId'].to_i
747
+ fontId = xf['fontId'].to_i
748
+ @style_definitions << fonts[fontId]
749
+ end
581
750
  end
582
751
  end
583
752
 
@@ -585,7 +754,10 @@ class Excelx < GenericSpreadsheet
585
754
  def attribute2format(s)
586
755
  result = nil
587
756
  @numFmts.each {|nf|
588
- if nf.first == @cellXfs[s.to_i].first
757
+ #TODO: if nf.first == @cellXfs[s.to_i].first
758
+ # to_s weil das eine Nokogiri::XML::Attr und das
759
+ # andere ein String ist
760
+ if nf.first.to_s == @cellXfs[s.to_i].first
589
761
  result = nf[1]
590
762
  break
591
763
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require 'rubygems'
2
3
  require 'builder'
3
4
 
@@ -313,7 +314,7 @@ class GenericSpreadsheet
313
314
  def info
314
315
  result = "File: #{File.basename(@filename)}\n"+
315
316
  "Number of sheets: #{sheets.size}\n"+
316
- "Sheets: #{sheets.map{|sheet| sheet+", "}.to_s[0..-3]}\n"
317
+ "Sheets: #{sheets.join(', ')}\n"
317
318
  n = 1
318
319
  sheets.each {|sheet|
319
320
  self.default_sheet = sheet
@@ -359,6 +360,33 @@ class GenericSpreadsheet
359
360
  xml_document
360
361
  end
361
362
 
363
+ # when a method like spreadsheet.a42 is called
364
+ # convert it to a call of spreadsheet.cell('a',42)
365
+ def method_missing(m, *args)
366
+ # #aa42 => #cell('aa',42)
367
+ # #aa42('Sheet1') => #cell('aa',42,'Sheet1')
368
+ if m =~ /^([a-z]+)(\d)$/
369
+ col = GenericSpreadsheet.letter_to_number($1)
370
+ row = $2.to_i
371
+ if args.size > 0
372
+ return cell(row,col,args[0])
373
+ else
374
+ return cell(row,col)
375
+ end
376
+ # else
377
+ # geht noch nicht, weil label unterhalb (in Openoffice) dieser Klasse
378
+ # es definiert ist
379
+ # p "Label #{m} angesprochen?"
380
+ # row,col,sheet = label('anton')
381
+ # # row,col,sheet = label(m)
382
+ # p "row: #{row}"
383
+ # p "col: #{col}"
384
+ # p "sheet: #{sheet}"
385
+ # return cell(row,col)
386
+ end
387
+ raise ArgumentError, "Method #{m} missing. Args: #{args}"
388
+ end
389
+
362
390
  protected
363
391
 
364
392
  def file_type_check(filename, ext, name)
@@ -366,10 +394,9 @@ class GenericSpreadsheet
366
394
  '.ods' => 'Openoffice.new',
367
395
  '.xls' => 'Excel.new',
368
396
  '.xlsx' => 'Excelx.new',
369
- '.xml' => 'Excel2003.new'
370
397
  }
371
398
  case ext
372
- when '.ods', '.xls', '.xlsx', '.xml'
399
+ when '.ods', '.xls', '.xlsx'
373
400
  correct_class = "use #{new_expression[ext]} to handle #{ext} spreadsheet files"
374
401
  else
375
402
  raise "unknown file type: #{ext}"
@@ -578,8 +605,8 @@ class GenericSpreadsheet
578
605
  if onecell == ""
579
606
  str << ''
580
607
  else
581
- onecell.gsub!(/"/,'""')
582
- str << ('"'+onecell+'"')
608
+ one = onecell.gsub(/"/,'""')
609
+ str << ('"'+one+'"')
583
610
  end
584
611
  when :float,:percentage
585
612
  if onecell == onecell.to_i
@@ -592,8 +619,8 @@ class GenericSpreadsheet
592
619
  if onecell == ""
593
620
  str << ''
594
621
  else
595
- onecell.gsub!(/"/,'""')
596
- str << '"'+onecell+'"'
622
+ one = onecell.gsub(/"/,'""')
623
+ str << '"'+one+'"'
597
624
  end
598
625
  elsif onecell.class == Float
599
626
  if onecell == onecell.to_i
@@ -617,7 +644,6 @@ class GenericSpreadsheet
617
644
 
618
645
  # converts an integer value to a time string like '02:05:06'
619
646
  def self.integer_to_timestring(content)
620
- return content if String === content
621
647
  h = (content/3600.0).floor
622
648
  content = content - h*3600
623
649
  m = (content/60.0).floor