roo 1.9.7 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,7 +1,19 @@
1
+ == 1.10.0 2011-10-10
2
+
3
+ * 4 enhancements
4
+ * New class Csv.
5
+ * Openoffice, Libreoffice: new method 'labels'
6
+ * Excelx: implemented all methods concerning labels
7
+ * Openoffice, Excelx: new methods concerning comments (comment, comment? and comments)
8
+
9
+ * 2 bugfixes
10
+ * XLSX: some cells were not recognized correctly from a spreadsheet file from a windows mobile phone.
11
+ * labels: Moved to a separate methode. There were problems if there was an access to a label before read_cells were called.
12
+
1
13
  == 1.9.7 2011-08-27
2
14
 
3
15
  * 1 bugfix
4
- * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string under some circumstances.
16
+ * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string.
5
17
 
6
18
  == 1.9.6 2011-08-03
7
19
 
data/README.txt CHANGED
@@ -9,6 +9,8 @@ Roo can access the contents of various spreadsheet files. It can handle
9
9
  * Excel
10
10
  * Google spreadsheets
11
11
  * Excelx
12
+ * Libreoffice
13
+ * CSV
12
14
 
13
15
 
14
16
  == FEATURES/PROBLEMS:
data/bin/roo CHANGED
@@ -26,8 +26,7 @@ Choice.options do
26
26
  long '--info <spreadsheetfile>'
27
27
  desc 'Show information about a spreadsheet file'
28
28
  action do
29
- p Choice.choices
30
- puts 'Filename: '+Choice.choices['info']
29
+ puts '#'
31
30
  end
32
31
  end
33
32
  end
data/lib/roo.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  module Roo
4
4
 
5
5
  # :stopdoc:
6
- VERSION = '1.9.7'
6
+ VERSION = '1.10.0'
7
7
  LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
8
8
  PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
9
9
  # :startdoc:
@@ -54,6 +54,8 @@ module Roo
54
54
  Excelx.new(file)
55
55
  when '.ods'
56
56
  Openoffice.new(file)
57
+ when '.csv'
58
+ Csv.new(file)
57
59
  # when ''
58
60
  else
59
61
  Google.new(file)
@@ -70,6 +72,7 @@ require 'roo/openoffice'
70
72
  require 'roo/excel'
71
73
  require 'roo/excelx'
72
74
  require 'roo/google'
75
+ require 'roo/csv'
73
76
 
74
77
  #Roo.require_all_libs_relative_to(__FILE__)
75
78
 
Binary file
data/lib/roo/csv.rb ADDED
@@ -0,0 +1,117 @@
1
+ require 'rubygems'
2
+ require 'csv'
3
+
4
+ # The Csv class can read csv files (must be separated with commas) which then
5
+ # can be handled like spreadsheets. This means you can access cells like A5
6
+ # within these files.
7
+ # The Csv class provides only string objects. If you want conversions to other
8
+ # types you have to do it yourself.
9
+
10
+ class Csv < GenericSpreadsheet
11
+ def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
12
+ @filename = filename
13
+ super()
14
+ @cell = Hash.new
15
+ @cell_type = Hash.new
16
+ @cells_read = Hash.new
17
+ @first_row = Hash.new
18
+ @last_row = Hash.new
19
+ @first_column = Hash.new
20
+ @last_column = Hash.new
21
+ end
22
+
23
+ # Returns an array with the names of the sheets. In Csv class there is only
24
+ # one dummy sheet, because a csv file cannot have more than one sheet.
25
+ def sheets
26
+ ['default']
27
+ end
28
+
29
+ def cell(row, col, sheet=nil)
30
+ sheet = @default_sheet unless sheet
31
+ read_cells(sheet) unless @cells_read[sheet]
32
+ row,col = normalize(row,col)
33
+ @cell[[row,col]]
34
+ end
35
+
36
+ def celltype(row, col, sheet=nil)
37
+ sheet = @default_sheet unless sheet
38
+ read_cells(sheet) unless @cells_read[sheet]
39
+ row,col = normalize(row,col)
40
+ @cell_type[[row,col]]
41
+ end
42
+
43
+ def cell_postprocessing(row,col,value)
44
+ value
45
+ end
46
+
47
+ private
48
+
49
+ def celltype_class(value)
50
+ return {String => :string,
51
+ Float => :float,
52
+ Date => :date,
53
+ DateTime => :datetime,
54
+ }[value.class]
55
+ raise "unknown type for #{value.inspect}"
56
+ end
57
+
58
+ def read_cells(sheet=nil)
59
+ sheet = @default_sheet unless sheet
60
+ @cell_type = {} unless @cell_type
61
+ @cell = {} unless @cell
62
+ @first_row[sheet] = 1
63
+ @last_row[sheet] = 0
64
+ @first_column[sheet] = 1
65
+ @last_column[sheet] = 1
66
+ rownum = 1
67
+ CSV.foreach(@filename) do |row|
68
+ row.each_with_index do |elem,i|
69
+ @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
70
+ @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
71
+ if i+1 > @last_column[sheet]
72
+ @last_column[sheet] += 1
73
+ end
74
+ end
75
+ rownum += 1
76
+ @last_row[sheet] += 1
77
+ end
78
+ @cells_read[sheet] = true
79
+ #-- adjust @first_row if neccessary
80
+ loop do
81
+ if !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
82
+ @first_row[sheet] += 1
83
+ else
84
+ break
85
+ end
86
+ end
87
+ #-- adjust @last_row if neccessary
88
+ loop do
89
+ if !row(@last_row[sheet]).any? and @last_row[sheet] and
90
+ @last_row[sheet] > @first_row[sheet]
91
+ @last_row[sheet] -= 1
92
+ else
93
+ break
94
+ end
95
+ end
96
+ #-- adjust @first_column if neccessary
97
+ loop do
98
+ if !column(@first_column[sheet]).any? and
99
+ @first_column[sheet] and
100
+ @first_column[sheet] < @last_column[sheet]
101
+ @first_column[sheet] += 1
102
+ else
103
+ break
104
+ end
105
+ end
106
+ #-- adjust @last_column if neccessary
107
+ loop do
108
+ if !column(@last_column[sheet]).any? and
109
+ @last_column[sheet] and
110
+ @last_column[sheet] > @first_column[sheet]
111
+ @last_column[sheet] -= 1
112
+ else
113
+ break
114
+ end
115
+ end
116
+ end
117
+ end # class Csv
data/lib/roo/excel.rb CHANGED
@@ -93,7 +93,7 @@ end
93
93
  # Class for handling Excel-Spreadsheets
94
94
  class Excel < GenericSpreadsheet
95
95
 
96
- EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets'
96
+ EXCEL_NO_FORMULAS = 'Formulas are not supported for excel spreadsheets.'
97
97
 
98
98
  # Creates a new Excel spreadsheet object.
99
99
  # Parameter packed: :zip - File is a zip-file
@@ -191,17 +191,17 @@ class Excel < GenericSpreadsheet
191
191
 
192
192
  # returns NO formula in excel spreadsheets
193
193
  def formula(row,col,sheet=nil)
194
- raise EXCEL_NO_FORMULAS
194
+ wait_for_version_080
195
195
  end
196
196
 
197
197
  # raises an exception because formulas are not supported for excel files
198
198
  def formula?(row,col,sheet=nil)
199
- raise EXCEL_NO_FORMULAS
199
+ wait_for_version_080
200
200
  end
201
201
 
202
202
  # returns NO formulas in excel spreadsheets
203
203
  def formulas(sheet=nil)
204
- raise EXCEL_NO_FORMULAS
204
+ wait_for_version_080
205
205
  end
206
206
 
207
207
  # Given a cell, return the cell's font
@@ -459,5 +459,14 @@ class Excel < GenericSpreadsheet
459
459
  return value_type, value
460
460
  end
461
461
  private :read_cell
462
-
462
+
463
+ def wait_for_version_080
464
+ if Spreadsheet::VERSION<='0.8.0'
465
+ raise EXCEL_NO_FORMULAS+
466
+ " We have to wait for the 0.8.0 version of the Spreadsheet gem (currently used version is #{Spreadsheet::VERSION})"
467
+ else
468
+ raise 'Thomas should implement formulas from Spreadsheet gem'
469
+ end
470
+ end
471
+
463
472
  end
data/lib/roo/excelx.rb CHANGED
@@ -48,6 +48,8 @@ class Excelx < GenericSpreadsheet
48
48
  'hh:mm:ss' => :time,
49
49
  "dd/mm/yy\\ hh:mm" => :datetime,
50
50
  'dd/mmm/yy' => :date, # 2011-05-21
51
+ 'yyyy-mm-dd' => :date, # 2011-09-16
52
+ # was used in a spreadsheet file from a windows phone
51
53
  }
52
54
  STANDARD_FORMATS = {
53
55
  0 => 'General',
@@ -102,6 +104,7 @@ class Excelx < GenericSpreadsheet
102
104
  end
103
105
  @@nr += 1
104
106
  @file_nr = @@nr
107
+ @comments_files = Array.new
105
108
  extract_content(@filename)
106
109
  file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
107
110
  @workbook_doc = Nokogiri::XML(file)
@@ -127,6 +130,12 @@ class Excelx < GenericSpreadsheet
127
130
  @sheet_doc[i] = Nokogiri::XML(file)
128
131
  file.close
129
132
  end
133
+ @comments_doc = []
134
+ @comments_files.each_with_index do |item, i|
135
+ file = File.new(item)
136
+ @comments_doc[i] = Nokogiri::XML(file)
137
+ file.close
138
+ end
130
139
  FileUtils::rm_r(@tmpdir)
131
140
  @default_sheet = self.sheets.first
132
141
  @cell = Hash.new
@@ -140,8 +149,26 @@ class Excelx < GenericSpreadsheet
140
149
  @excelx_type = Hash.new
141
150
  @excelx_value = Hash.new
142
151
  @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
152
+ @label = Hash.new
153
+ @labels_read = false
154
+ @comment = Hash.new
155
+ @comments_read = Hash.new
143
156
  end
144
157
 
158
+ def method_missing(m,*args)
159
+ # is method name a label name
160
+ read_labels unless @labels_read
161
+ if @label.has_key?(m.to_s)
162
+ sheet = @default_sheet unless sheet
163
+ read_cells(sheet) unless @cells_read[sheet]
164
+ row,col = label(m.to_s)
165
+ cell(row,col)
166
+ else
167
+ # call super for methods like #a1
168
+ super
169
+ end
170
+ end
171
+
145
172
  # Returns the content of a spreadsheet-cell.
146
173
  # (1,1) is the upper left corner.
147
174
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
@@ -183,7 +210,21 @@ class Excelx < GenericSpreadsheet
183
210
  row,col = normalize(row,col)
184
211
  formula(row,col) != nil
185
212
  end
186
-
213
+
214
+ # returns each formula in the selected sheet as an array of elements
215
+ # [row, col, formula]
216
+ def formulas(sheet=nil)
217
+ sheet = @default_sheet unless sheet
218
+ read_cells(sheet) unless @cells_read[sheet]
219
+ if @formula[sheet]
220
+ @formula[sheet].each.collect do |elem|
221
+ [elem[0][0], elem[0][1], elem[1]]
222
+ end
223
+ else
224
+ []
225
+ end
226
+ end
227
+
187
228
  class Font
188
229
  attr_accessor :bold, :italic, :underline
189
230
 
@@ -295,13 +336,80 @@ class Excelx < GenericSpreadsheet
295
336
  @cell[sheet].inspect
296
337
  end
297
338
 
339
+ # returns the row,col values of the labelled cell
340
+ # (nil,nil) if label is not defined
341
+ def label(labelname)
342
+ read_labels unless @labels_read
343
+ unless @label.size > 0
344
+ return nil,nil,nil
345
+ end
346
+ if @label.has_key? labelname
347
+ return @label[labelname][1].to_i,
348
+ GenericSpreadsheet.letter_to_number(@label[labelname][2]),
349
+ @label[labelname][0]
350
+ else
351
+ return nil,nil,nil
352
+ end
353
+ end
354
+
355
+ # Returns an array which all labels. Each element is an array with
356
+ # [labelname, [sheetname,row,col]]
357
+ def labels
358
+ # sheet = @default_sheet unless sheet
359
+ # read_cells(sheet) unless @cells_read[sheet]
360
+ read_labels unless @labels_read
361
+ result = []
362
+ @label.each do |label|
363
+ result << [ label[0], # name
364
+ [ label[1][1].to_i, # row
365
+ GenericSpreadsheet.letter_to_number(label[1][2]), # column
366
+ label[1][0], # sheet
367
+ ] ]
368
+ end
369
+ result
370
+ end
371
+
372
+ # returns the comment at (row/col)
373
+ # nil if there is no comment
374
+ def comment(row,col,sheet=nil)
375
+ sheet = @default_sheet unless sheet
376
+ #read_cells(sheet) unless @cells_read[sheet]
377
+ read_comments(sheet) unless @comments_read[sheet]
378
+ row,col = normalize(row,col)
379
+ return nil unless @comment[sheet]
380
+ @comment[sheet][[row,col]]
381
+ end
382
+
383
+ # true, if there is a comment
384
+ def comment?(row,col,sheet=nil)
385
+ sheet = @default_sheet unless sheet
386
+ # read_cells(sheet) unless @cells_read[sheet]
387
+ read_comments(sheet) unless @comments_read[sheet]
388
+ row,col = normalize(row,col)
389
+ comment(row,col) != nil
390
+ end
391
+
392
+ # returns each comment in the selected sheet as an array of elements
393
+ # [row, col, comment]
394
+ def comments(sheet=nil)
395
+ sheet = @default_sheet unless sheet
396
+ read_comments(sheet) unless @comments_read[sheet]
397
+ if @comment[sheet]
398
+ @comment[sheet].each.collect do |elem|
399
+ [elem[0][0],elem[0][1],elem[1]]
400
+ end
401
+ else
402
+ []
403
+ end
404
+ end
405
+
298
406
  private
299
407
 
300
408
  # helper function to set the internal representation of cells
301
409
  def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
302
- excelx_type=nil,
303
- excelx_value=nil,
304
- s_attribute=nil)
410
+ excelx_type=nil,
411
+ excelx_value=nil,
412
+ s_attribute=nil)
305
413
  key = [y,x+i]
306
414
  @cell_type[sheet] = {} unless @cell_type[sheet]
307
415
  @cell_type[sheet][key] = vt
@@ -332,32 +440,6 @@ class Excelx < GenericSpreadsheet
332
440
  @s_attribute[sheet][key] = s_attribute
333
441
  end
334
442
 
335
- # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
336
- def split_coord(s)
337
- letter = ""
338
- number = 0
339
- i = 0
340
- while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
341
- letter += s[i,1]
342
- i+=1
343
- end
344
- while i<s.length and "0123456789".include?(s[i,1])
345
- number = number*10 + s[i,1].to_i
346
- i+=1
347
- end
348
- if letter=="" or number==0
349
- raise ArgumentError
350
- end
351
- return letter,number
352
- end
353
-
354
- def split_coordinate(str)
355
- letter,number = split_coord(str)
356
- x = GenericSpreadsheet.letter_to_number(letter)
357
- y = number
358
- return x,y
359
- end
360
-
361
443
  def format2type(format)
362
444
  format = format.to_s # weil von Typ Nokogiri::XML::Attr
363
445
  if FORMATS.has_key? format
@@ -387,6 +469,10 @@ class Excelx < GenericSpreadsheet
387
469
  elsif c['t'] == 'str'
388
470
  tmp_type = :string
389
471
  # 2011-02-25 END
472
+ # 2011-09-15 BEGIN
473
+ elsif c['t'] == 'inlineStr'
474
+ tmp_type = :inlinestr
475
+ # 2011-09-15 END
390
476
  else
391
477
  s_attribute = c['s'].to_i
392
478
  format = attribute2format(s_attribute)
@@ -394,6 +480,23 @@ class Excelx < GenericSpreadsheet
394
480
  end
395
481
  formula = nil
396
482
  c.children.each do |cell|
483
+ # 2011-09-15 BEGIN
484
+ if cell.name == 'is'
485
+ cell.children.each do |is|
486
+ if is.name == 't'
487
+ inlinestr_content = is.content
488
+ vt = :string
489
+ str_v = inlinestr_content
490
+ excelx_type = :string
491
+ y, x = GenericSpreadsheet.split_coordinate(c['r'])
492
+ v = nil
493
+ tr=nil #TODO: ???s
494
+ excelx_value = inlinestr_content #cell.content
495
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
496
+ end
497
+ end
498
+ end
499
+ # 2011-09-15 END
397
500
  if cell.name == 'f'
398
501
  formula = cell.content
399
502
  end
@@ -439,7 +542,7 @@ class Excelx < GenericSpreadsheet
439
542
  vt = :float
440
543
  v = cell.content
441
544
  end
442
- x,y = split_coordinate(c['r'])
545
+ y, x = GenericSpreadsheet.split_coordinate(c['r'])
443
546
  tr=nil #TODO: ???s
444
547
  set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
445
548
  end
@@ -450,27 +553,115 @@ class Excelx < GenericSpreadsheet
450
553
  raise RangeError
451
554
  end
452
555
  @cells_read[sheet] = true
556
+ # begin comments
557
+ =begin
558
+ Datei xl/comments1.xml
559
+ <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
560
+ <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
561
+ <authors>
562
+ <author />
563
+ </authors>
564
+ <commentList>
565
+ <comment ref="B4" authorId="0">
566
+ <text>
567
+ <r>
568
+ <rPr>
569
+ <sz val="10" />
570
+ <rFont val="Arial" />
571
+ <family val="2" />
572
+ </rPr>
573
+ <t>Kommentar fuer B4</t>
574
+ </r>
575
+ </text>
576
+ </comment>
577
+ <comment ref="B5" authorId="0">
578
+ <text>
579
+ <r>
580
+ <rPr>
581
+ <sz val="10" />
582
+ <rFont val="Arial" />
583
+ <family val="2" />
584
+ </rPr>
585
+ <t>Kommentar fuer B5</t>
586
+ </r>
587
+ </text>
588
+ </comment>
589
+ </commentList>
590
+ </comments>
591
+ =end
592
+ =begin
593
+ if @comments_doc[self.sheets.index(sheet)]
594
+ read_comments(sheet)
595
+ end
596
+ =end
597
+ #end comments
453
598
  end
454
-
599
+
600
+ # Reads all comments from a sheet
601
+ def read_comments(sheet=nil)
602
+ sheet = @default_sheet unless sheet
603
+ #sheet_found = false
604
+ raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
605
+ raise RangeError unless self.sheets.include? sheet
606
+ n = self.sheets.index(sheet)
607
+ return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
608
+ @comments_doc[n].xpath("//*[local-name()='comments']").each do |comment|
609
+ comment.children.each do |cc|
610
+ if cc.name == 'commentList'
611
+ cc.children.each do |commentlist|
612
+ if commentlist.name == 'comment'
613
+ ref = commentlist.attributes['ref'].to_s
614
+ row,col = GenericSpreadsheet.split_coordinate(ref)
615
+ commentlist.children.each do |clc|
616
+ if clc.name == 'text'
617
+ clc.children.each do |text|
618
+ if text.name == 'r'
619
+ text.children.each do |r|
620
+ if r.name == 't'
621
+ comment = r.text
622
+ @comment[sheet] = Hash.new unless @comment[sheet]
623
+ @comment[sheet][[row,col]] = comment
624
+ end
625
+ end
626
+ end
627
+ end
628
+ end
629
+ end
630
+ end
631
+ end
632
+ end
633
+ end
634
+ end
635
+ @comments_read[sheet] = true
636
+ end
637
+
638
+ def read_labels
639
+ @workbook_doc.xpath("//*[local-name()='definedName']").each do |defined_name|
640
+ # "Sheet1!$C$5"
641
+ sheet = defined_name.text.split('!').first
642
+ coordinates = defined_name.text.split('!')[1]
643
+ dummy,col,row = coordinates.split('$')
644
+ @label[defined_name['name']] = [sheet,row,col]
645
+ end
646
+ @labels_read = true
647
+ end
648
+
455
649
  # Checks if the default_sheet exists. If not an RangeError exception is
456
650
  # raised
457
651
  def check_default_sheet
458
652
  sheet_found = false
459
653
  raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
460
-
461
654
  sheet_found = true if sheets.include?(@default_sheet)
462
-
463
655
  if ! sheet_found
464
656
  raise RangeError, "sheet '#{@default_sheet}' not found"
465
657
  end
466
658
  end
467
659
 
468
- # extracts all needed files from the zip file
660
+ # Extracts all needed files from the zip file
469
661
  def process_zipfile(zipfilename, zip, path='')
470
662
  @sheet_files = []
471
663
  Zip::ZipFile.open(zipfilename) {|zf|
472
664
  zf.entries.each {|entry|
473
- #entry.extract
474
665
  if entry.to_s.end_with?('workbook.xml')
475
666
  open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
476
667
  f << zip.read(entry)
@@ -493,9 +684,16 @@ class Excelx < GenericSpreadsheet
493
684
  }
494
685
  @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
495
686
  end
687
+ if entry.to_s =~ /comments([0-9]+).xml$/
688
+ nr = $1
689
+ open(@tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}",'wb') {|f|
690
+ f << zip.read(entry)
691
+ }
692
+ @comments_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}"
693
+ end
496
694
  }
497
695
  }
498
- return
696
+ # return
499
697
  end
500
698
 
501
699
  # extract files from the zip file