roo 1.9.7 → 1.10.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,7 +1,19 @@
1
+ == 1.10.0 2011-10-10
2
+
3
+ * 4 enhancements
4
+ * New class Csv.
5
+ * Openoffice, Libreoffice: new method 'labels'
6
+ * Excelx: implemented all methods concerning labels
7
+ * Openoffice, Excelx: new methods concerning comments (comment, comment? and comments)
8
+
9
+ * 2 bugfixes
10
+ * XLSX: some cells were not recognized correctly from a spreadsheet file from a windows mobile phone.
11
+ * labels: Moved to a separate methode. There were problems if there was an access to a label before read_cells were called.
12
+
1
13
  == 1.9.7 2011-08-27
2
14
 
3
15
  * 1 bugfix
4
- * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string under some circumstances.
16
+ * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string.
5
17
 
6
18
  == 1.9.6 2011-08-03
7
19
 
data/README.txt CHANGED
@@ -9,6 +9,8 @@ Roo can access the contents of various spreadsheet files. It can handle
9
9
  * Excel
10
10
  * Google spreadsheets
11
11
  * Excelx
12
+ * Libreoffice
13
+ * CSV
12
14
 
13
15
 
14
16
  == FEATURES/PROBLEMS:
data/bin/roo CHANGED
@@ -26,8 +26,7 @@ Choice.options do
26
26
  long '--info <spreadsheetfile>'
27
27
  desc 'Show information about a spreadsheet file'
28
28
  action do
29
- p Choice.choices
30
- puts 'Filename: '+Choice.choices['info']
29
+ puts '#'
31
30
  end
32
31
  end
33
32
  end
data/lib/roo.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  module Roo
4
4
 
5
5
  # :stopdoc:
6
- VERSION = '1.9.7'
6
+ VERSION = '1.10.0'
7
7
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
8
8
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
9
9
  # :startdoc:
@@ -54,6 +54,8 @@ module Roo
54
54
  Excelx.new(file)
55
55
  when '.ods'
56
56
  Openoffice.new(file)
57
+ when '.csv'
58
+ Csv.new(file)
57
59
  # when ''
58
60
  else
59
61
  Google.new(file)
@@ -70,6 +72,7 @@ require 'roo/openoffice'
70
72
  require 'roo/excel'
71
73
  require 'roo/excelx'
72
74
  require 'roo/google'
75
+ require 'roo/csv'
73
76
 
74
77
  #Roo.require_all_libs_relative_to(__FILE__)
75
78
 
Binary file
data/lib/roo/csv.rb ADDED
@@ -0,0 +1,117 @@
1
+ require 'rubygems'
2
+ require 'csv'
3
+
4
+ # The Csv class can read csv files (must be separated with commas) which then
5
+ # can be handled like spreadsheets. This means you can access cells like A5
6
+ # within these files.
7
+ # The Csv class provides only string objects. If you want conversions to other
8
+ # types you have to do it yourself.
9
+
10
+ class Csv < GenericSpreadsheet
11
+ def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
12
+ @filename = filename
13
+ super()
14
+ @cell = Hash.new
15
+ @cell_type = Hash.new
16
+ @cells_read = Hash.new
17
+ @first_row = Hash.new
18
+ @last_row = Hash.new
19
+ @first_column = Hash.new
20
+ @last_column = Hash.new
21
+ end
22
+
23
+ # Returns an array with the names of the sheets. In Csv class there is only
24
+ # one dummy sheet, because a csv file cannot have more than one sheet.
25
+ def sheets
26
+ ['default']
27
+ end
28
+
29
+ def cell(row, col, sheet=nil)
30
+ sheet = @default_sheet unless sheet
31
+ read_cells(sheet) unless @cells_read[sheet]
32
+ row,col = normalize(row,col)
33
+ @cell[[row,col]]
34
+ end
35
+
36
+ def celltype(row, col, sheet=nil)
37
+ sheet = @default_sheet unless sheet
38
+ read_cells(sheet) unless @cells_read[sheet]
39
+ row,col = normalize(row,col)
40
+ @cell_type[[row,col]]
41
+ end
42
+
43
+ def cell_postprocessing(row,col,value)
44
+ value
45
+ end
46
+
47
+ private
48
+
49
+ def celltype_class(value)
50
+ return {String => :string,
51
+ Float => :float,
52
+ Date => :date,
53
+ DateTime => :datetime,
54
+ }[value.class]
55
+ raise "unknown type for #{value.inspect}"
56
+ end
57
+
58
+ def read_cells(sheet=nil)
59
+ sheet = @default_sheet unless sheet
60
+ @cell_type = {} unless @cell_type
61
+ @cell = {} unless @cell
62
+ @first_row[sheet] = 1
63
+ @last_row[sheet] = 0
64
+ @first_column[sheet] = 1
65
+ @last_column[sheet] = 1
66
+ rownum = 1
67
+ CSV.foreach(@filename) do |row|
68
+ row.each_with_index do |elem,i|
69
+ @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
70
+ @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
71
+ if i+1 > @last_column[sheet]
72
+ @last_column[sheet] += 1
73
+ end
74
+ end
75
+ rownum += 1
76
+ @last_row[sheet] += 1
77
+ end
78
+ @cells_read[sheet] = true
79
+ #-- adjust @first_row if neccessary
80
+ loop do
81
+ if !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
82
+ @first_row[sheet] += 1
83
+ else
84
+ break
85
+ end
86
+ end
87
+ #-- adjust @last_row if neccessary
88
+ loop do
89
+ if !row(@last_row[sheet]).any? and @last_row[sheet] and
90
+ @last_row[sheet] > @first_row[sheet]
91
+ @last_row[sheet] -= 1
92
+ else
93
+ break
94
+ end
95
+ end
96
+ #-- adjust @first_column if neccessary
97
+ loop do
98
+ if !column(@first_column[sheet]).any? and
99
+ @first_column[sheet] and
100
+ @first_column[sheet] < @last_column[sheet]
101
+ @first_column[sheet] += 1
102
+ else
103
+ break
104
+ end
105
+ end
106
+ #-- adjust @last_column if neccessary
107
+ loop do
108
+ if !column(@last_column[sheet]).any? and
109
+ @last_column[sheet] and
110
+ @last_column[sheet] > @first_column[sheet]
111
+ @last_column[sheet] -= 1
112
+ else
113
+ break
114
+ end
115
+ end
116
+ end
117
+ end # class Csv
data/lib/roo/excel.rb CHANGED
@@ -93,7 +93,7 @@ end
93
93
  # Class for handling Excel-Spreadsheets
94
94
  class Excel < GenericSpreadsheet
95
95
 
96
- EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets'
96
+ EXCEL_NO_FORMULAS = 'Formulas are not supported for excel spreadsheets.'
97
97
 
98
98
  # Creates a new Excel spreadsheet object.
99
99
  # Parameter packed: :zip - File is a zip-file
@@ -191,17 +191,17 @@ class Excel < GenericSpreadsheet
191
191
 
192
192
  # returns NO formula in excel spreadsheets
193
193
  def formula(row,col,sheet=nil)
194
- raise EXCEL_NO_FORMULAS
194
+ wait_for_version_080
195
195
  end
196
196
 
197
197
  # raises an exception because formulas are not supported for excel files
198
198
  def formula?(row,col,sheet=nil)
199
- raise EXCEL_NO_FORMULAS
199
+ wait_for_version_080
200
200
  end
201
201
 
202
202
  # returns NO formulas in excel spreadsheets
203
203
  def formulas(sheet=nil)
204
- raise EXCEL_NO_FORMULAS
204
+ wait_for_version_080
205
205
  end
206
206
 
207
207
  # Given a cell, return the cell's font
@@ -459,5 +459,14 @@ class Excel < GenericSpreadsheet
459
459
  return value_type, value
460
460
  end
461
461
  private :read_cell
462
-
462
+
463
+ def wait_for_version_080
464
+ if Spreadsheet::VERSION<='0.8.0'
465
+ raise EXCEL_NO_FORMULAS+
466
+ " We have to wait for the 0.8.0 version of the Spreadsheet gem (currently used version is #{Spreadsheet::VERSION})"
467
+ else
468
+ raise 'Thomas should implement formulas from Spreadsheet gem'
469
+ end
470
+ end
471
+
463
472
  end
data/lib/roo/excelx.rb CHANGED
@@ -48,6 +48,8 @@ class Excelx < GenericSpreadsheet
48
48
  'hh:mm:ss' => :time,
49
49
  "dd/mm/yy\\ hh:mm" => :datetime,
50
50
  'dd/mmm/yy' => :date, # 2011-05-21
51
+ 'yyyy-mm-dd' => :date, # 2011-09-16
52
+ # was used in a spreadsheet file from a windows phone
51
53
  }
52
54
  STANDARD_FORMATS = {
53
55
  0 => 'General',
@@ -102,6 +104,7 @@ class Excelx < GenericSpreadsheet
102
104
  end
103
105
  @@nr += 1
104
106
  @file_nr = @@nr
107
+ @comments_files = Array.new
105
108
  extract_content(@filename)
106
109
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
107
110
  @workbook_doc = Nokogiri::XML(file)
@@ -127,6 +130,12 @@ class Excelx < GenericSpreadsheet
127
130
  @sheet_doc[i] = Nokogiri::XML(file)
128
131
  file.close
129
132
  end
133
+ @comments_doc = []
134
+ @comments_files.each_with_index do |item, i|
135
+ file = File.new(item)
136
+ @comments_doc[i] = Nokogiri::XML(file)
137
+ file.close
138
+ end
130
139
  FileUtils::rm_r(@tmpdir)
131
140
  @default_sheet = self.sheets.first
132
141
  @cell = Hash.new
@@ -140,8 +149,26 @@ class Excelx < GenericSpreadsheet
140
149
  @excelx_type = Hash.new
141
150
  @excelx_value = Hash.new
142
151
  @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
152
+ @label = Hash.new
153
+ @labels_read = false
154
+ @comment = Hash.new
155
+ @comments_read = Hash.new
143
156
  end
144
157
 
158
+ def method_missing(m,*args)
159
+ # is method name a label name
160
+ read_labels unless @labels_read
161
+ if @label.has_key?(m.to_s)
162
+ sheet = @default_sheet unless sheet
163
+ read_cells(sheet) unless @cells_read[sheet]
164
+ row,col = label(m.to_s)
165
+ cell(row,col)
166
+ else
167
+ # call super for methods like #a1
168
+ super
169
+ end
170
+ end
171
+
145
172
  # Returns the content of a spreadsheet-cell.
146
173
  # (1,1) is the upper left corner.
147
174
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
@@ -183,7 +210,21 @@ class Excelx < GenericSpreadsheet
183
210
  row,col = normalize(row,col)
184
211
  formula(row,col) != nil
185
212
  end
186
-
213
+
214
+ # returns each formula in the selected sheet as an array of elements
215
+ # [row, col, formula]
216
+ def formulas(sheet=nil)
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ if @formula[sheet]
220
+ @formula[sheet].each.collect do |elem|
221
+ [elem[0][0], elem[0][1], elem[1]]
222
+ end
223
+ else
224
+ []
225
+ end
226
+ end
227
+
187
228
  class Font
188
229
  attr_accessor :bold, :italic, :underline
189
230
 
@@ -295,13 +336,80 @@ class Excelx < GenericSpreadsheet
295
336
  @cell[sheet].inspect
296
337
  end
297
338
 
339
+ # returns the row,col values of the labelled cell
340
+ # (nil,nil) if label is not defined
341
+ def label(labelname)
342
+ read_labels unless @labels_read
343
+ unless @label.size > 0
344
+ return nil,nil,nil
345
+ end
346
+ if @label.has_key? labelname
347
+ return @label[labelname][1].to_i,
348
+ GenericSpreadsheet.letter_to_number(@label[labelname][2]),
349
+ @label[labelname][0]
350
+ else
351
+ return nil,nil,nil
352
+ end
353
+ end
354
+
355
+ # Returns an array which all labels. Each element is an array with
356
+ # [labelname, [sheetname,row,col]]
357
+ def labels
358
+ # sheet = @default_sheet unless sheet
359
+ # read_cells(sheet) unless @cells_read[sheet]
360
+ read_labels unless @labels_read
361
+ result = []
362
+ @label.each do |label|
363
+ result << [ label[0], # name
364
+ [ label[1][1].to_i, # row
365
+ GenericSpreadsheet.letter_to_number(label[1][2]), # column
366
+ label[1][0], # sheet
367
+ ] ]
368
+ end
369
+ result
370
+ end
371
+
372
+ # returns the comment at (row/col)
373
+ # nil if there is no comment
374
+ def comment(row,col,sheet=nil)
375
+ sheet = @default_sheet unless sheet
376
+ #read_cells(sheet) unless @cells_read[sheet]
377
+ read_comments(sheet) unless @comments_read[sheet]
378
+ row,col = normalize(row,col)
379
+ return nil unless @comment[sheet]
380
+ @comment[sheet][[row,col]]
381
+ end
382
+
383
+ # true, if there is a comment
384
+ def comment?(row,col,sheet=nil)
385
+ sheet = @default_sheet unless sheet
386
+ # read_cells(sheet) unless @cells_read[sheet]
387
+ read_comments(sheet) unless @comments_read[sheet]
388
+ row,col = normalize(row,col)
389
+ comment(row,col) != nil
390
+ end
391
+
392
+ # returns each comment in the selected sheet as an array of elements
393
+ # [row, col, comment]
394
+ def comments(sheet=nil)
395
+ sheet = @default_sheet unless sheet
396
+ read_comments(sheet) unless @comments_read[sheet]
397
+ if @comment[sheet]
398
+ @comment[sheet].each.collect do |elem|
399
+ [elem[0][0],elem[0][1],elem[1]]
400
+ end
401
+ else
402
+ []
403
+ end
404
+ end
405
+
298
406
  private
299
407
 
300
408
  # helper function to set the internal representation of cells
301
409
  def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
302
- excelx_type=nil,
303
- excelx_value=nil,
304
- s_attribute=nil)
410
+ excelx_type=nil,
411
+ excelx_value=nil,
412
+ s_attribute=nil)
305
413
  key = [y,x+i]
306
414
  @cell_type[sheet] = {} unless @cell_type[sheet]
307
415
  @cell_type[sheet][key] = vt
@@ -332,32 +440,6 @@ class Excelx < GenericSpreadsheet
332
440
  @s_attribute[sheet][key] = s_attribute
333
441
  end
334
442
 
335
- # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
336
- def split_coord(s)
337
- letter = ""
338
- number = 0
339
- i = 0
340
- while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
341
- letter += s[i,1]
342
- i+=1
343
- end
344
- while i<s.length and "0123456789".include?(s[i,1])
345
- number = number*10 + s[i,1].to_i
346
- i+=1
347
- end
348
- if letter=="" or number==0
349
- raise ArgumentError
350
- end
351
- return letter,number
352
- end
353
-
354
- def split_coordinate(str)
355
- letter,number = split_coord(str)
356
- x = GenericSpreadsheet.letter_to_number(letter)
357
- y = number
358
- return x,y
359
- end
360
-
361
443
  def format2type(format)
362
444
  format = format.to_s # weil von Typ Nokogiri::XML::Attr
363
445
  if FORMATS.has_key? format
@@ -387,6 +469,10 @@ class Excelx < GenericSpreadsheet
387
469
  elsif c['t'] == 'str'
388
470
  tmp_type = :string
389
471
  # 2011-02-25 END
472
+ # 2011-09-15 BEGIN
473
+ elsif c['t'] == 'inlineStr'
474
+ tmp_type = :inlinestr
475
+ # 2011-09-15 END
390
476
  else
391
477
  s_attribute = c['s'].to_i
392
478
  format = attribute2format(s_attribute)
@@ -394,6 +480,23 @@ class Excelx < GenericSpreadsheet
394
480
  end
395
481
  formula = nil
396
482
  c.children.each do |cell|
483
+ # 2011-09-15 BEGIN
484
+ if cell.name == 'is'
485
+ cell.children.each do |is|
486
+ if is.name == 't'
487
+ inlinestr_content = is.content
488
+ vt = :string
489
+ str_v = inlinestr_content
490
+ excelx_type = :string
491
+ y, x = GenericSpreadsheet.split_coordinate(c['r'])
492
+ v = nil
493
+ tr=nil #TODO: ???s
494
+ excelx_value = inlinestr_content #cell.content
495
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
496
+ end
497
+ end
498
+ end
499
+ # 2011-09-15 END
397
500
  if cell.name == 'f'
398
501
  formula = cell.content
399
502
  end
@@ -439,7 +542,7 @@ class Excelx < GenericSpreadsheet
439
542
  vt = :float
440
543
  v = cell.content
441
544
  end
442
- x,y = split_coordinate(c['r'])
545
+ y, x = GenericSpreadsheet.split_coordinate(c['r'])
443
546
  tr=nil #TODO: ???s
444
547
  set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
445
548
  end
@@ -450,27 +553,115 @@ class Excelx < GenericSpreadsheet
450
553
  raise RangeError
451
554
  end
452
555
  @cells_read[sheet] = true
556
+ # begin comments
557
+ =begin
558
+ Datei xl/comments1.xml
559
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
560
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
561
+ <authors>
562
+ <author />
563
+ </authors>
564
+ <commentList>
565
+ <comment ref="B4" authorId="0">
566
+ <text>
567
+ <r>
568
+ <rPr>
569
+ <sz val="10" />
570
+ <rFont val="Arial" />
571
+ <family val="2" />
572
+ </rPr>
573
+ <t>Kommentar fuer B4</t>
574
+ </r>
575
+ </text>
576
+ </comment>
577
+ <comment ref="B5" authorId="0">
578
+ <text>
579
+ <r>
580
+ <rPr>
581
+ <sz val="10" />
582
+ <rFont val="Arial" />
583
+ <family val="2" />
584
+ </rPr>
585
+ <t>Kommentar fuer B5</t>
586
+ </r>
587
+ </text>
588
+ </comment>
589
+ </commentList>
590
+ </comments>
591
+ =end
592
+ =begin
593
+ if @comments_doc[self.sheets.index(sheet)]
594
+ read_comments(sheet)
595
+ end
596
+ =end
597
+ #end comments
453
598
  end
454
-
599
+
600
+ # Reads all comments from a sheet
601
+ def read_comments(sheet=nil)
602
+ sheet = @default_sheet unless sheet
603
+ #sheet_found = false
604
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
605
+ raise RangeError unless self.sheets.include? sheet
606
+ n = self.sheets.index(sheet)
607
+ return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
608
+ @comments_doc[n].xpath("//*[local-name()='comments']").each do |comment|
609
+ comment.children.each do |cc|
610
+ if cc.name == 'commentList'
611
+ cc.children.each do |commentlist|
612
+ if commentlist.name == 'comment'
613
+ ref = commentlist.attributes['ref'].to_s
614
+ row,col = GenericSpreadsheet.split_coordinate(ref)
615
+ commentlist.children.each do |clc|
616
+ if clc.name == 'text'
617
+ clc.children.each do |text|
618
+ if text.name == 'r'
619
+ text.children.each do |r|
620
+ if r.name == 't'
621
+ comment = r.text
622
+ @comment[sheet] = Hash.new unless @comment[sheet]
623
+ @comment[sheet][[row,col]] = comment
624
+ end
625
+ end
626
+ end
627
+ end
628
+ end
629
+ end
630
+ end
631
+ end
632
+ end
633
+ end
634
+ end
635
+ @comments_read[sheet] = true
636
+ end
637
+
638
+ def read_labels
639
+ @workbook_doc.xpath("//*[local-name()='definedName']").each do |defined_name|
640
+ # "Sheet1!$C$5"
641
+ sheet = defined_name.text.split('!').first
642
+ coordinates = defined_name.text.split('!')[1]
643
+ dummy,col,row = coordinates.split('$')
644
+ @label[defined_name['name']] = [sheet,row,col]
645
+ end
646
+ @labels_read = true
647
+ end
648
+
455
649
  # Checks if the default_sheet exists. If not an RangeError exception is
456
650
  # raised
457
651
  def check_default_sheet
458
652
  sheet_found = false
459
653
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
460
-
461
654
  sheet_found = true if sheets.include?(@default_sheet)
462
-
463
655
  if ! sheet_found
464
656
  raise RangeError, "sheet '#{@default_sheet}' not found"
465
657
  end
466
658
  end
467
659
 
468
- # extracts all needed files from the zip file
660
+ # Extracts all needed files from the zip file
469
661
  def process_zipfile(zipfilename, zip, path='')
470
662
  @sheet_files = []
471
663
  Zip::ZipFile.open(zipfilename) {|zf|
472
664
  zf.entries.each {|entry|
473
- #entry.extract
474
665
  if entry.to_s.end_with?('workbook.xml')
475
666
  open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
476
667
  f << zip.read(entry)
@@ -493,9 +684,16 @@ class Excelx < GenericSpreadsheet
493
684
  }
494
685
  @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
495
686
  end
687
+ if entry.to_s =~ /comments([0-9]+).xml$/
688
+ nr = $1
689
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}",'wb') {|f|
690
+ f << zip.read(entry)
691
+ }
692
+ @comments_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}"
693
+ end
496
694
  }
497
695
  }
498
- return
696
+ # return
499
697
  end
500
698
 
501
699
  # extract files from the zip file