donibuchanan-roo 1.3.12 → 1.9.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/roo/excelx.rb CHANGED
@@ -1,11 +1,15 @@
1
- require 'xml'
1
+ #TODO: require 'xml'
2
2
  require 'fileutils'
3
3
  require 'zip/zipfilesystem'
4
4
  require 'date'
5
+ require 'rubygems'
6
+ require 'nokogiri'
5
7
 
6
- class String
7
- def end_with?(str)
8
- self[-str.length,str.length] == str
8
+ if RUBY_VERSION < '1.9.0'
9
+ class String
10
+ def end_with?(str)
11
+ self[-str.length,str.length] == str
12
+ end
9
13
  end
10
14
  end
11
15
 
@@ -92,6 +96,7 @@ class Excelx < GenericSpreadsheet
92
96
  begin
93
97
  file_type_check(filename,'.xlsx','an Excel-xlsx')
94
98
  @cells_read = Hash.new
99
+ @read_first_100_rows = Hash.new
95
100
  @filename = filename
96
101
  unless File.file?(@filename)
97
102
  raise IOError, "file #{@filename} does not exist"
@@ -100,27 +105,30 @@ class Excelx < GenericSpreadsheet
100
105
  @file_nr = @@nr
101
106
  extract_content(@filename)
102
107
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
103
- @workbook_doc = XML::Parser.io(file).parse
108
+ # TODO: @workbook_doc = XML::Parser.io(file).parse
109
+ @workbook_doc = Nokogiri::XML(file)
104
110
  file.close
105
- @shared_table = []
111
+
106
112
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
107
113
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
108
- @sharedstring_doc = XML::Parser.io(file).parse
114
+ #TODO: @sharedstring_doc = XML::Parser.io(file).parse
115
+ @sharedstring_doc = Nokogiri::XML(file)
109
116
  file.close
110
- read_shared_strings(@sharedstring_doc)
111
117
  end
112
118
  @styles_table = []
113
- @style_definitions = Array.new { |h,k| h[k] = {} }
119
+ @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
114
120
  if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
115
121
  file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
116
- @styles_doc = XML::Parser.io(file).parse
122
+ #TODO: @styles_doc = XML::Parser.io(file).parse
123
+ @styles_doc = Nokogiri::XML(file)
117
124
  file.close
118
125
  read_styles(@styles_doc)
119
126
  end
120
127
  @sheet_doc = []
121
128
  @sheet_files.each_with_index do |item, i|
122
129
  file = File.new(item)
123
- @sheet_doc[i] = XML::Parser.io(file).parse
130
+ #TODO: @sheet_doc[i] = XML::Parser.io(file).parse
131
+ @sheet_doc[i] = Nokogiri::XML(file)
124
132
  file.close
125
133
  end
126
134
  ensure
@@ -148,7 +156,9 @@ class Excelx < GenericSpreadsheet
148
156
  # cell at the first line and first row.
149
157
  def cell(row, col, sheet=nil)
150
158
  sheet = @default_sheet unless sheet
151
- read_cells(sheet) unless @cells_read[sheet]
159
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and row <= 100)
160
+ read_cells(sheet)
161
+ end
152
162
  row,col = normalize(row,col)
153
163
  if celltype(row,col,sheet) == :date
154
164
  yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
@@ -161,6 +171,20 @@ class Excelx < GenericSpreadsheet
161
171
  end
162
172
  @cell[sheet][[row,col]]
163
173
  end
174
+
175
+ # returns all values in this row as an array
176
+ # row numbers are 1,2,3,... like in the spreadsheet
177
+ def row(rownumber,sheet=nil)
178
+ sheet = @default_sheet unless sheet
179
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and rownumber <= 100)
180
+ read_cells(sheet)
181
+ end
182
+ result = []
183
+ first_column(sheet).upto(last_column(sheet)) do |col|
184
+ result << cell(rownumber,col,sheet)
185
+ end
186
+ result
187
+ end
164
188
 
165
189
  # Returns the formula at (row,col).
166
190
  # Returns nil if there is no formula.
@@ -192,7 +216,7 @@ class Excelx < GenericSpreadsheet
192
216
  end
193
217
 
194
218
  def italic?
195
- @italic == true
219
+ @italic == true
196
220
  end
197
221
 
198
222
  def underline?
@@ -202,13 +226,13 @@ class Excelx < GenericSpreadsheet
202
226
 
203
227
  # Given a cell, return the cell's style
204
228
  def font(row, col, sheet=nil)
205
- sheet = @default_sheet unless sheet
206
- read_cells(sheet) unless @cells_read[sheet]
207
- row,col = normalize(row,col)
208
- s_attribute = @s_attribute[sheet][[row,col]]
209
- s_attribute ||= 0
210
- s_attribute = s_attribute.to_i
211
- @style_definitions[s_attribute]
229
+ sheet = @default_sheet unless sheet
230
+ read_cells(sheet) unless @cells_read[sheet]
231
+ row,col = normalize(row,col)
232
+ s_attribute = @s_attribute[sheet][[row,col]]
233
+ s_attribute ||= 0
234
+ s_attribute = s_attribute.to_i
235
+ @style_definitions[s_attribute]
212
236
  end
213
237
 
214
238
  # set a cell to a certain value
@@ -239,7 +263,9 @@ class Excelx < GenericSpreadsheet
239
263
  # * :datetime
240
264
  def celltype(row,col,sheet=nil)
241
265
  sheet = @default_sheet unless sheet
242
- read_cells(sheet) unless @cells_read[sheet]
266
+ unless @cells_read[sheet] or (@read_first_100_rows[sheet] and row <= 100)
267
+ read_cells(sheet)
268
+ end
243
269
  row,col = normalize(row,col)
244
270
  if @formula[sheet][[row,col]]
245
271
  return :formula
@@ -274,15 +300,17 @@ class Excelx < GenericSpreadsheet
274
300
  read_cells(sheet) unless @cells_read[sheet]
275
301
  row,col = normalize(row,col)
276
302
  s = @s_attribute[sheet][[row,col]]
277
- result = attribute2format(s)
303
+ result = attribute2format(s).to_s
278
304
  result
279
305
  end
280
306
 
281
307
  # returns an array of sheet names in the spreadsheet
282
308
  def sheets
283
309
  return_sheets = []
284
- @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
285
- return_sheets << sheet.attributes.to_h['name']
310
+ #TODO: @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
311
+ @workbook_doc.xpath("//*[local-name()='sheet']").each do |sheet|
312
+ #TODO: return_sheets << sheet.attributes.to_h['name']
313
+ return_sheets << sheet['name']
286
314
  end
287
315
  return_sheets
288
316
  end
@@ -310,6 +338,93 @@ class Excelx < GenericSpreadsheet
310
338
  }
311
339
  theformulas
312
340
  end
341
+
342
+ def first_row(sheet=nil)
343
+ if sheet == nil
344
+ sheet = @default_sheet
345
+ end
346
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet] or @cells_read[sheet]
347
+ if @first_row[sheet]
348
+ return @first_row[sheet]
349
+ end
350
+ impossible_value = 999_999 # more than a spreadsheet can hold
351
+ result = impossible_value
352
+ @cell[sheet].each_pair {|key,value|
353
+ y,x = key # _to_string(key).split(',')
354
+ y = y.to_i
355
+ result = [result, y].min if value
356
+ } if @cell[sheet]
357
+ result = nil if result == impossible_value
358
+ @first_row[sheet] = result
359
+ result
360
+ end
361
+
362
+ # returns the number of the last non-empty row
363
+ def last_row(sheet=nil)
364
+ sheet = @default_sheet unless sheet
365
+ if @last_row[sheet]
366
+ return @last_row[sheet]
367
+ end
368
+ # read a few columns
369
+ unless @cells_read[sheet]
370
+ fst_column = first_column(sheet)
371
+ read_column(sheet,GenericSpreadsheet.number_to_letter(fst_column),3)
372
+ end
373
+ impossible_value = 0
374
+ result = impossible_value
375
+ @cell[sheet].each_pair {|key,value|
376
+ y,x = key # _to_string(key).split(',')
377
+ y = y.to_i
378
+ result = [result, y].max if value
379
+ } if @cell[sheet]
380
+ result = nil if result == impossible_value
381
+ @last_row[sheet] = result
382
+ result
383
+ end
384
+
385
+ def first_column(sheet=nil)
386
+ if sheet == nil
387
+ sheet = @default_sheet
388
+ end
389
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet]
390
+ if @first_row[sheet]
391
+ return @first_row[sheet]
392
+ end
393
+ if @first_column[sheet]
394
+ return @first_column[sheet]
395
+ end
396
+ impossible_value = 999_999 # more than a spreadsheet can hold
397
+ result = impossible_value
398
+ @cell[sheet].each_pair {|key,value|
399
+ y,x = key # _to_string(key).split(',')
400
+ x = x # .to_i
401
+ result = [result, x].min if value
402
+ } if @cell[sheet]
403
+ result = nil if result == impossible_value
404
+ @first_column[sheet] = result
405
+ result
406
+ end
407
+
408
+
409
+ # returns the number of the last non-empty column
410
+ def last_column(sheet=nil)
411
+ sheet = @default_sheet unless sheet
412
+ read_first_100_rows(sheet) unless @read_first_100_rows[sheet]
413
+ if @last_column[sheet]
414
+ return @last_column[sheet]
415
+ end
416
+ impossible_value = 0
417
+ result = impossible_value
418
+ @cell[sheet].each_pair {|key,value|
419
+ y,x = key # _to_string(key).split(',')
420
+ x = x.to_i
421
+ result = [result, x].max if value
422
+ } if @cell[sheet]
423
+ result = nil if result == impossible_value
424
+ @last_column[sheet] = result
425
+ result
426
+ end
427
+
313
428
 
314
429
  private
315
430
 
@@ -374,38 +489,73 @@ class Excelx < GenericSpreadsheet
374
489
  return x,y
375
490
  end
376
491
 
377
- # read all cells in the selected sheet
378
492
  def format2type(format)
493
+ format = format.to_s # weil von Typ Nokogiri::XML::Attr
379
494
  if FORMATS.has_key? format
380
495
  FORMATS[format]
381
496
  else
382
497
  :float
383
498
  end
384
499
  end
385
-
500
+ def read_first_100_rows(sheet=nil)
501
+ read_cells(sheet, 100)
502
+ @cells_read[sheet] = false
503
+ @read_first_100_rows[sheet] = true
504
+ end
505
+
506
+ def read_column(sheet=nil, column='A', number_columns=2)
507
+ read_cells(sheet, nil,column,number_columns)
508
+ @cells_read[sheet] = false
509
+ end
386
510
  # read all cells in the selected sheet
387
- def read_cells(sheet=nil)
511
+ def read_cells(sheet=nil, limit=nil, column_letter=nil, number_columns=nil)
388
512
  sheet = @default_sheet unless sheet
389
513
  sheet_found = false
390
514
  raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
391
515
  raise RangeError unless self.sheets.include? sheet
392
516
  n = self.sheets.index(sheet)
393
- @sheet_doc[n].find("//*[local-name()='c']").each do |c|
394
- s_attribute = c.attributes.to_h['s'].to_i # should be here
395
- if (c.attributes.to_h['t'] == 's')
396
- tmp_type = :shared
397
- elsif (c.attributes.to_h['t'] == 'b')
398
- tmp_type = :boolean
399
- else
400
- # s_attribute = c.attributes.to_h['s'].to_i # was here
401
- format = attribute2format(s_attribute)
402
- tmp_type = format2type(format)
403
- end
517
+ #TODO: @sheet_doc[n].find("//*[local-name()='c']").each do |c|
518
+ xpath_limit = ''
519
+ if limit
520
+ xpath_limit = "and ( number(substring(@r,2)) <= #{limit} or ( not (number(substring(@r,2)) > 0) and (number(substring(@r,3)) <= #{limit})))"
521
+ end
522
+ if column_letter
523
+ xpath_limit += " and ("
524
+ column_number = GenericSpreadsheet.letter_to_number(column_letter)
525
+ number_columns.times do |column_index|
526
+ letter = GenericSpreadsheet.number_to_letter(column_number + column_index)
527
+ xpath_limit += " or " if column_index > 0
528
+ xpath_limit += "( substring(@r,1,1) = '#{letter}' and substring(@r,2,1) > 0 )"
529
+ end
530
+ xpath_limit += ")"
531
+ end
532
+ @sheet_doc[n].xpath("//*[local-name()='c' #{xpath_limit}]").each do |c|
533
+ #TODO: s_attribute = c.attributes.to_h['s'].to_i # should be here
534
+ s_attribute = c['s'].to_i # should be here
535
+ #TODO: if (c.attributes.to_h['t'] == 's')
536
+ # c: <c r="A5" s="2">
537
+ # <v>22606</v>
538
+ # </c>, format: , tmp_type: float
539
+
540
+ if c['t'] == 's'
541
+ tmp_type = :shared
542
+ #TODO: elsif (c.attributes.to_h['t'] == 'b')
543
+ elsif c['t'] == 'b'
544
+ tmp_type = :boolean
545
+ else
546
+ #s_attribute = c.attributes.to_h['s'].to_i # was here
547
+ s_attribute = c['s'].to_i # was here
548
+ format = attribute2format(s_attribute)
549
+ tmp_type = format2type(format)
550
+ end
404
551
  formula = nil
405
- c.each_element do |cell|
552
+ #TODO: c.each_element do |cell|
553
+ c.children.each do |cell|
554
+ #TODO: if cell.name == 'f'
406
555
  if cell.name == 'f'
407
556
  formula = cell.content
408
557
  end
558
+ #TODO: if cell.name == 'v'
409
559
  if cell.name == 'v'
410
560
  if tmp_type == :time or tmp_type == :datetime
411
561
  if cell.content.to_f >= 1.0
@@ -417,10 +567,12 @@ class Excelx < GenericSpreadsheet
417
567
  else
418
568
  end
419
569
  end
420
- excelx_type = [:numeric_or_formula,format]
570
+ excelx_type = [:numeric_or_formula,format.to_s]
421
571
  excelx_value = cell.content
422
572
  if tmp_type == :shared
423
573
  vt = :string
574
+ read_shared_strings(@sharedstring_doc,limit) if @shared_table.nil?
575
+
424
576
  str_v = @shared_table[cell.content.to_i]
425
577
  excelx_type = :string
426
578
  elsif tmp_type == :boolean
@@ -443,7 +595,8 @@ class Excelx < GenericSpreadsheet
443
595
  v = cell.content
444
596
  end
445
597
  #puts "vt: #{vt}" if cell.text.include? "22606.5120"
446
- x,y = split_coordinate(c.attributes.to_h['r'])
598
+ #TODO: x,y = split_coordinate(c.attributes.to_h['r'])
599
+ x,y = split_coordinate(c['r'])
447
600
  tr=nil #TODO: ???s
448
601
  set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
449
602
  end
@@ -522,18 +675,25 @@ class Excelx < GenericSpreadsheet
522
675
  end
523
676
 
524
677
  # read the shared strings xml document
525
- def read_shared_strings(doc)
526
- doc.find("//*[local-name()='si']").each do |si|
678
+ def read_shared_strings(doc, limit=nil)
679
+ #TODO: doc.find("//*[local-name()='si']").each do |si|
680
+ @shared_table = []
681
+ if limit
682
+ xpath_limit = " and position() < #{limit*30}"
683
+ end
684
+ doc.xpath("//*[local-name()='si' #{xpath_limit}]").each do |si|
527
685
  shared_table_entry = ''
528
- si.each_element do |elem|
529
- if (elem.name == 'r')
530
- elem.each_element do |r_elem|
531
- if (r_elem.name == 't')
686
+ #TODO: si.each_element do |elem|
687
+ si.children.each do |elem|
688
+ if elem.name == 'r' and elem.children
689
+ # elem.each_element do |r_elem|
690
+ elem.children.each do |r_elem|
691
+ if r_elem.name == 't'
532
692
  shared_table_entry << r_elem.content
533
693
  end
534
694
  end
535
695
  end
536
- if (elem.name == 't')
696
+ if elem.name == 't'
537
697
  shared_table_entry = elem.content
538
698
  end
539
699
  end
@@ -547,37 +707,46 @@ class Excelx < GenericSpreadsheet
547
707
  @cellXfs = []
548
708
  fonts = []
549
709
 
550
- doc.find("//*[local-name()='numFmt']").each do |numFmt|
551
- numFmtId = numFmt.attributes.to_h['numFmtId']
552
- formatCode = numFmt.attributes.to_h['formatCode']
710
+ #TODO: doc.find("//*[local-name()='numFmt']").each do |numFmt|
711
+ doc.xpath("//*[local-name()='numFmt']").each do |numFmt|
712
+ # TODO: numFmtId = numFmt.attributes.to_h['numFmtId']
713
+ numFmtId = numFmt.attributes['numFmtId']
714
+ #TODO: formatCode = numFmt.attributes.to_h['formatCode']
715
+ formatCode = numFmt.attributes['formatCode']
553
716
  @numFmts << [numFmtId, formatCode]
554
717
  end
555
- doc.find("//*[local-name()='fonts']").each do |fonts_el|
556
- fonts_el.each_element do |font_el|
557
- if font_el.name == 'font'
718
+ #TODO: doc.find("//*[local-name()='fonts']").each do |fonts_el|
719
+ doc.xpath("//*[local-name()='fonts']").each do |fonts_el|
720
+ #TODO: fonts_el.each_element do |font_el|
721
+ fonts_el.children.each do |font_el|
722
+ #TODO: if font_el.name == 'font'
723
+ if font_el == 'font'
558
724
  font = Excelx::Font.new
559
725
  font_el.each_element do |font_sub_el|
560
726
  case font_sub_el.name
561
- when 'b'
562
- font.bold = true
563
- when 'i'
564
- font.italic = true
565
- when 'u'
566
- font.underline = true
567
- end
727
+ when 'b'
728
+ font.bold = true
729
+ when 'i'
730
+ font.italic = true
731
+ when 'u'
732
+ font.underline = true
733
+ end
568
734
  end
569
735
  fonts << font
570
736
  end
571
737
  end
572
738
  end
573
739
 
574
- doc.find("//*[local-name()='cellXfs']").each do |xfs|
575
- xfs.each do |xf|
576
- numFmtId = xf.attributes.to_h['numFmtId']
577
- @cellXfs << [numFmtId]
578
- fontId = xf.attributes.to_h['fontId'].to_i
579
- @style_definitions << fonts[fontId]
580
- end
740
+ #TODO: doc.find("//*[local-name()='cellXfs']").each do |xfs|
741
+ doc.xpath("//*[local-name()='cellXfs']").each do |xfs|
742
+ xfs.children.each do |xf|
743
+ #TODO: numFmtId = xf.attributes.to_h['numFmtId']
744
+ numFmtId = xf['numFmtId']
745
+ @cellXfs << [numFmtId]
746
+ #TODO: fontId = xf.attributes.to_h['fontId'].to_i
747
+ fontId = xf['fontId'].to_i
748
+ @style_definitions << fonts[fontId]
749
+ end
581
750
  end
582
751
  end
583
752
 
@@ -585,7 +754,10 @@ class Excelx < GenericSpreadsheet
585
754
  def attribute2format(s)
586
755
  result = nil
587
756
  @numFmts.each {|nf|
588
- if nf.first == @cellXfs[s.to_i].first
757
+ #TODO: if nf.first == @cellXfs[s.to_i].first
758
+ # to_s weil das eine Nokogiri::XML::Attr und das
759
+ # andere ein String ist
760
+ if nf.first.to_s == @cellXfs[s.to_i].first
589
761
  result = nf[1]
590
762
  break
591
763
  end
@@ -1,3 +1,4 @@
1
+ # encoding: utf-8
1
2
  require 'rubygems'
2
3
  require 'builder'
3
4
 
@@ -313,7 +314,7 @@ class GenericSpreadsheet
313
314
  def info
314
315
  result = "File: #{File.basename(@filename)}\n"+
315
316
  "Number of sheets: #{sheets.size}\n"+
316
- "Sheets: #{sheets.map{|sheet| sheet+", "}.to_s[0..-3]}\n"
317
+ "Sheets: #{sheets.join(', ')}\n"
317
318
  n = 1
318
319
  sheets.each {|sheet|
319
320
  self.default_sheet = sheet
@@ -359,6 +360,33 @@ class GenericSpreadsheet
359
360
  xml_document
360
361
  end
361
362
 
363
+ # when a method like spreadsheet.a42 is called
364
+ # convert it to a call of spreadsheet.cell('a',42)
365
+ def method_missing(m, *args)
366
+ # #aa42 => #cell('aa',42)
367
+ # #aa42('Sheet1') => #cell('aa',42,'Sheet1')
368
+ if m =~ /^([a-z]+)(\d)$/
369
+ col = GenericSpreadsheet.letter_to_number($1)
370
+ row = $2.to_i
371
+ if args.size > 0
372
+ return cell(row,col,args[0])
373
+ else
374
+ return cell(row,col)
375
+ end
376
+ # else
377
+ # geht noch nicht, weil label unterhalb (in Openoffice) dieser Klasse
378
+ # es definiert ist
379
+ # p "Label #{m} angesprochen?"
380
+ # row,col,sheet = label('anton')
381
+ # # row,col,sheet = label(m)
382
+ # p "row: #{row}"
383
+ # p "col: #{col}"
384
+ # p "sheet: #{sheet}"
385
+ # return cell(row,col)
386
+ end
387
+ raise ArgumentError, "Method #{m} missing. Args: #{args}"
388
+ end
389
+
362
390
  protected
363
391
 
364
392
  def file_type_check(filename, ext, name)
@@ -366,10 +394,9 @@ class GenericSpreadsheet
366
394
  '.ods' => 'Openoffice.new',
367
395
  '.xls' => 'Excel.new',
368
396
  '.xlsx' => 'Excelx.new',
369
- '.xml' => 'Excel2003.new'
370
397
  }
371
398
  case ext
372
- when '.ods', '.xls', '.xlsx', '.xml'
399
+ when '.ods', '.xls', '.xlsx'
373
400
  correct_class = "use #{new_expression[ext]} to handle #{ext} spreadsheet files"
374
401
  else
375
402
  raise "unknown file type: #{ext}"
@@ -578,8 +605,8 @@ class GenericSpreadsheet
578
605
  if onecell == ""
579
606
  str << ''
580
607
  else
581
- onecell.gsub!(/"/,'""')
582
- str << ('"'+onecell+'"')
608
+ one = onecell.gsub(/"/,'""')
609
+ str << ('"'+one+'"')
583
610
  end
584
611
  when :float,:percentage
585
612
  if onecell == onecell.to_i
@@ -592,8 +619,8 @@ class GenericSpreadsheet
592
619
  if onecell == ""
593
620
  str << ''
594
621
  else
595
- onecell.gsub!(/"/,'""')
596
- str << '"'+onecell+'"'
622
+ one = onecell.gsub(/"/,'""')
623
+ str << '"'+one+'"'
597
624
  end
598
625
  elsif onecell.class == Float
599
626
  if onecell == onecell.to_i
@@ -617,7 +644,6 @@ class GenericSpreadsheet
617
644
 
618
645
  # converts an integer value to a time string like '02:05:06'
619
646
  def self.integer_to_timestring(content)
620
- return content if String === content
621
647
  h = (content/3600.0).floor
622
648
  content = content - h*3600
623
649
  m = (content/60.0).floor