RubyGems - roo - Versions diffs - 1.9.7 → 1.10.0 - Mend

roo 1.9.7 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

data/History.txt +13 -1
data/README.txt +2 -0
data/bin/roo +1 -2
data/lib/roo.rb +4 -1
data/lib/roo/.csv.rb.swp +0 -0
data/lib/roo/csv.rb +117 -0
data/lib/roo/excel.rb +14 -5
data/lib/roo/excelx.rb +235 -37
data/lib/roo/generic_spreadsheet.rb +23 -4
data/lib/roo/openoffice.rb +145 -41
data/test/Pfand_from_windows_phone.xlsx +0 -0
data/test/comments.ods +0 -0
data/test/comments.xls +0 -0
data/test/comments.xlsx +0 -0
data/test/csvtypes.csv +1 -0
data/test/named_cells.xls +0 -0
data/test/named_cells.xlsx +0 -0
data/test/numbers1.csv +18 -0
data/test/test_roo.rb +444 -223
metadata +31 -22
data/PostInstall.txt +0 -1
data/a.xls +0 -0
data/csv8532 +0 -862
data/test/benchmark1.rb +0 -43
data/tmp.xls +0 -0

data/History.txt CHANGED Viewed

@@ -1,7 +1,19 @@
+== 1.10.0 2011-10-10
+* 4 enhancements
+  * New class Csv.
+  * Openoffice, Libreoffice: new method 'labels'
+  * Excelx: implemented all methods concerning labels
+  * Openoffice, Excelx: new methods concerning comments (comment, comment? and comments)
+* 2 bugfixes
+  * XLSX: some cells were not recognized correctly from a spreadsheet file from a windows mobile phone.
+  * labels: Moved to a separate methode. There were problems if there was an access to a label before read_cells were called.
 == 1.9.7 2011-08-27
 * 1 bugfix
-  * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string under some circumstances.
+  * Openoffice: Better way for extracting formula strings, some characters were deleted at the formula string.
 == 1.9.6 2011-08-03

data/README.txt CHANGED Viewed

@@ -9,6 +9,8 @@ Roo can access the contents of various spreadsheet files. It can handle
 * Excel
 * Google spreadsheets
 * Excelx
+* Libreoffice
+* CSV
 == FEATURES/PROBLEMS:

data/bin/roo CHANGED Viewed

@@ -26,8 +26,7 @@ Choice.options do
 		long '--info <spreadsheetfile>'
 		desc 'Show information about a spreadsheet file'
 		action do
-			p Choice.choices
-			puts 'Filename: '+Choice.choices['info']
+			puts '#'
 		end
 	end
 end

data/lib/roo.rb CHANGED Viewed

@@ -3,7 +3,7 @@
 module Roo
   # :stopdoc:
-  VERSION = '1.9.7'
+  VERSION = '1.10.0'
   LIBPATH = ::File.expand_path(::File.dirname(__FILE__)) + ::File::SEPARATOR
   PATH = ::File.dirname(LIBPATH) + ::File::SEPARATOR
   # :startdoc:
@@ -54,6 +54,8 @@ module Roo
           Excelx.new(file)
         when '.ods'
           Openoffice.new(file)
+        when '.csv'
+          Csv.new(file)
           # when ''
         else
           Google.new(file)
@@ -70,6 +72,7 @@ require 'roo/openoffice'
 require 'roo/excel'
 require 'roo/excelx'
 require 'roo/google'
+require 'roo/csv'
 #Roo.require_all_libs_relative_to(__FILE__)

data/lib/roo/.csv.rb.swp ADDED Viewed

Binary file

data/lib/roo/csv.rb ADDED Viewed

@@ -0,0 +1,117 @@
+require 'rubygems'
+require 'csv'
+# The Csv class can read csv files (must be separated with commas) which then
+# can be handled like spreadsheets. This means you can access cells like A5
+# within these files.
+# The Csv class provides only string objects. If you want conversions to other
+# types you have to do it yourself.
+class Csv < GenericSpreadsheet
+  def initialize(filename, packed=nil, file_warning=:error, tmpdir=nil)
+    @filename = filename
+    super()
+    @cell = Hash.new
+    @cell_type = Hash.new
+    @cells_read = Hash.new
+    @first_row = Hash.new
+    @last_row = Hash.new
+    @first_column = Hash.new
+    @last_column = Hash.new
+  end
+  # Returns an array with the names of the sheets. In Csv class there is only
+  # one dummy sheet, because a csv file cannot have more than one sheet.
+  def sheets
+    ['default']
+  end
+  def cell(row, col, sheet=nil)
+    sheet = @default_sheet unless sheet
+    read_cells(sheet) unless @cells_read[sheet]
+    row,col = normalize(row,col)
+    @cell[[row,col]]
+  end
+  def celltype(row, col, sheet=nil)
+    sheet = @default_sheet unless sheet
+    read_cells(sheet) unless @cells_read[sheet]
+    row,col = normalize(row,col)
+    @cell_type[[row,col]]
+  end
+  def cell_postprocessing(row,col,value)
+    value
+  end
+  private
+  def celltype_class(value)
+    return {String => :string,
+      Float => :float,
+      Date => :date,
+      DateTime => :datetime,
+    }[value.class]
+    raise "unknown type for #{value.inspect}"
+  end
+  def read_cells(sheet=nil)
+    sheet = @default_sheet unless sheet
+    @cell_type = {} unless @cell_type
+    @cell = {} unless @cell
+    @first_row[sheet] = 1
+    @last_row[sheet] = 0
+    @first_column[sheet] = 1
+    @last_column[sheet] = 1
+    rownum = 1
+    CSV.foreach(@filename) do |row|
+      row.each_with_index do |elem,i|
+        @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
+        @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
+        if i+1 > @last_column[sheet]
+          @last_column[sheet] += 1
+        end
+      end
+      rownum += 1
+      @last_row[sheet] += 1
+    end
+    @cells_read[sheet] = true
+    #-- adjust @first_row if neccessary
+    loop do
+      if !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
+        @first_row[sheet] += 1
+      else
+        break
+      end
+    end
+    #-- adjust @last_row if neccessary
+    loop do
+      if !row(@last_row[sheet]).any? and @last_row[sheet] and
+          @last_row[sheet] > @first_row[sheet]
+        @last_row[sheet] -= 1
+      else
+        break
+      end
+    end
+    #-- adjust @first_column if neccessary
+    loop do
+      if !column(@first_column[sheet]).any? and
+          @first_column[sheet] and
+          @first_column[sheet] < @last_column[sheet]
+        @first_column[sheet] += 1
+      else
+        break
+      end
+    end
+    #-- adjust @last_column if neccessary
+    loop do
+      if !column(@last_column[sheet]).any? and
+          @last_column[sheet] and
+          @last_column[sheet] > @first_column[sheet]
+        @last_column[sheet] -= 1
+      else
+        break
+      end
+    end
+  end
+end # class Csv

data/lib/roo/excel.rb CHANGED Viewed

@@ -93,7 +93,7 @@ end
 # Class for handling Excel-Spreadsheets
 class Excel < GenericSpreadsheet
-  EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets'
+  EXCEL_NO_FORMULAS = 'Formulas are not supported for excel spreadsheets.'
   # Creates a new Excel spreadsheet object.
   # Parameter packed: :zip - File is a zip-file
@@ -191,17 +191,17 @@ class Excel < GenericSpreadsheet
   # returns NO formula in excel spreadsheets
   def formula(row,col,sheet=nil)
-    raise EXCEL_NO_FORMULAS
+    wait_for_version_080
   end
   # raises an exception because formulas are not supported for excel files
   def formula?(row,col,sheet=nil)
-    raise EXCEL_NO_FORMULAS
+    wait_for_version_080
   end
   # returns NO formulas in excel spreadsheets
   def formulas(sheet=nil)
-    raise EXCEL_NO_FORMULAS
+    wait_for_version_080
   end
   # Given a cell, return the cell's font
@@ -459,5 +459,14 @@ class Excel < GenericSpreadsheet
     return value_type, value
   end
   private :read_cell
+  def wait_for_version_080
+    if Spreadsheet::VERSION<='0.8.0'
+      raise EXCEL_NO_FORMULAS+
+        " We have to wait for the 0.8.0 version of the Spreadsheet gem (currently used version is #{Spreadsheet::VERSION})"
+    else
+      raise 'Thomas should implement formulas from Spreadsheet gem'
+    end
+  end
 end

data/lib/roo/excelx.rb CHANGED Viewed

@@ -48,6 +48,8 @@ class Excelx < GenericSpreadsheet
     'hh:mm:ss' => :time,
     "dd/mm/yy\\ hh:mm" => :datetime,
     'dd/mmm/yy' => :date, # 2011-05-21
+    'yyyy-mm-dd' => :date, # 2011-09-16
+    # was used in a spreadsheet file from a windows phone
   }
   STANDARD_FORMATS = {
     0 => 'General',
@@ -102,6 +104,7 @@ class Excelx < GenericSpreadsheet
     end
     @@nr += 1
     @file_nr = @@nr
+    @comments_files = Array.new
     extract_content(@filename)
     file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
     @workbook_doc = Nokogiri::XML(file)
@@ -127,6 +130,12 @@ class Excelx < GenericSpreadsheet
       @sheet_doc[i] = Nokogiri::XML(file)
       file.close
     end
+    @comments_doc = []
+    @comments_files.each_with_index do |item, i|
+      file = File.new(item)
+      @comments_doc[i] = Nokogiri::XML(file)
+      file.close
+    end
     FileUtils::rm_r(@tmpdir)
     @default_sheet = self.sheets.first
     @cell = Hash.new
@@ -140,8 +149,26 @@ class Excelx < GenericSpreadsheet
     @excelx_type = Hash.new
     @excelx_value = Hash.new
     @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
+    @label = Hash.new
+    @labels_read = false
+    @comment = Hash.new
+    @comments_read = Hash.new
   end
+  def method_missing(m,*args)
+    # is method name a label name
+    read_labels unless @labels_read
+    if @label.has_key?(m.to_s)
+      sheet = @default_sheet unless sheet
+      read_cells(sheet) unless @cells_read[sheet]
+      row,col = label(m.to_s)
+      cell(row,col)
+    else
+      # call super for methods like #a1
+      super
+    end
+  end
   # Returns the content of a spreadsheet-cell.
   # (1,1) is the upper left corner.
   # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
@@ -183,7 +210,21 @@ class Excelx < GenericSpreadsheet
     row,col = normalize(row,col)
     formula(row,col) != nil
   end
+    # returns each formula in the selected sheet as an array of elements
+  # [row, col, formula]
+  def formulas(sheet=nil)
+    sheet = @default_sheet unless sheet
+    read_cells(sheet) unless @cells_read[sheet]
+    if @formula[sheet]
+      @formula[sheet].each.collect do |elem|
+        [elem[0][0], elem[0][1], elem[1]]
+      end
+    else
+      []
+    end
+  end
   class Font
     attr_accessor :bold, :italic, :underline
@@ -295,13 +336,80 @@ class Excelx < GenericSpreadsheet
     @cell[sheet].inspect
   end
+  # returns the row,col values of the labelled cell
+  # (nil,nil) if label is not defined
+  def label(labelname)
+    read_labels unless @labels_read
+    unless @label.size > 0
+      return nil,nil,nil
+    end
+    if @label.has_key? labelname
+      return @label[labelname][1].to_i,
+        GenericSpreadsheet.letter_to_number(@label[labelname][2]),
+        @label[labelname][0]
+    else
+      return nil,nil,nil
+    end
+  end
+  # Returns an array which all labels. Each element is an array with
+  # [labelname, [sheetname,row,col]]
+  def labels
+    # sheet = @default_sheet unless sheet
+    # read_cells(sheet) unless @cells_read[sheet]
+    read_labels unless @labels_read
+    result = []
+    @label.each do |label|
+      result << [ label[0], # name
+        [ label[1][1].to_i, # row
+          GenericSpreadsheet.letter_to_number(label[1][2]), # column
+          label[1][0], # sheet
+        ] ]
+    end
+    result
+  end
+  # returns the comment at (row/col)
+  # nil if there is no comment
+  def comment(row,col,sheet=nil)
+    sheet = @default_sheet unless sheet
+    #read_cells(sheet) unless @cells_read[sheet]
+    read_comments(sheet) unless @comments_read[sheet]
+    row,col = normalize(row,col)
+    return nil unless @comment[sheet]
+    @comment[sheet][[row,col]]
+  end
+  # true, if there is a comment
+  def comment?(row,col,sheet=nil)
+    sheet = @default_sheet unless sheet
+    # read_cells(sheet) unless @cells_read[sheet]
+    read_comments(sheet) unless @comments_read[sheet]
+    row,col = normalize(row,col)
+    comment(row,col) != nil
+  end
+  # returns each comment in the selected sheet as an array of elements
+  # [row, col, comment]
+  def comments(sheet=nil)
+    sheet = @default_sheet unless sheet
+    read_comments(sheet) unless @comments_read[sheet]
+    if @comment[sheet]
+      @comment[sheet].each.collect do |elem|
+        [elem[0][0],elem[0][1],elem[1]]
+      end
+    else
+      []
+    end
+  end
   private
   # helper function to set the internal representation of cells
   def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
-    excelx_type=nil,
-    excelx_value=nil,
-    s_attribute=nil)
+      excelx_type=nil,
+      excelx_value=nil,
+      s_attribute=nil)
     key = [y,x+i]
     @cell_type[sheet] = {} unless @cell_type[sheet]
     @cell_type[sheet][key] = vt
@@ -332,32 +440,6 @@ class Excelx < GenericSpreadsheet
     @s_attribute[sheet][key] = s_attribute
   end
-  # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
-  def split_coord(s)
-    letter = ""
-    number = 0
-    i = 0
-    while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
-      letter += s[i,1]
-      i+=1
-    end
-    while i<s.length and "0123456789".include?(s[i,1])
-      number = number*10 + s[i,1].to_i
-      i+=1
-    end
-    if letter=="" or number==0
-      raise ArgumentError
-    end
-    return letter,number
-  end
-  def split_coordinate(str)
-    letter,number = split_coord(str)
-    x = GenericSpreadsheet.letter_to_number(letter)
-    y = number
-    return x,y
-  end
   def format2type(format)
     format = format.to_s # weil von Typ Nokogiri::XML::Attr
     if FORMATS.has_key? format
@@ -387,6 +469,10 @@ class Excelx < GenericSpreadsheet
       elsif c['t'] == 'str'
         tmp_type = :string
         # 2011-02-25 END
+        # 2011-09-15 BEGIN
+      elsif c['t'] == 'inlineStr'
+	      tmp_type = :inlinestr
+        # 2011-09-15 END
       else
         s_attribute = c['s'].to_i
         format = attribute2format(s_attribute)
@@ -394,6 +480,23 @@ class Excelx < GenericSpreadsheet
       end
       formula = nil
       c.children.each do |cell|
+	      # 2011-09-15 BEGIN
+        if cell.name == 'is'
+          cell.children.each do |is|
+            if is.name == 't'
+              inlinestr_content = is.content
+              vt = :string
+              str_v = inlinestr_content
+              excelx_type = :string
+              y, x = GenericSpreadsheet.split_coordinate(c['r'])
+              v = nil
+              tr=nil #TODO: ???s
+              excelx_value = inlinestr_content #cell.content
+              set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
+            end
+          end
+        end
+	      # 2011-09-15 END
         if cell.name == 'f'
           formula = cell.content
         end
@@ -439,7 +542,7 @@ class Excelx < GenericSpreadsheet
             vt = :float
             v = cell.content
           end
-          x,y = split_coordinate(c['r'])
+          y, x = GenericSpreadsheet.split_coordinate(c['r'])
           tr=nil #TODO: ???s
           set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
         end
@@ -450,27 +553,115 @@ class Excelx < GenericSpreadsheet
       raise RangeError
     end
     @cells_read[sheet] = true
+    # begin comments
+=begin
+Datei xl/comments1.xml
+  <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+  <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+    <authors>
+      <author />
+    </authors>
+    <commentList>
+      <comment ref="B4" authorId="0">
+        <text>
+          <r>
+            <rPr>
+              <sz val="10" />
+              <rFont val="Arial" />
+              <family val="2" />
+            </rPr>
+            <t>Kommentar fuer B4</t>
+          </r>
+        </text>
+      </comment>
+      <comment ref="B5" authorId="0">
+        <text>
+          <r>
+            <rPr>
+            <sz val="10" />
+            <rFont val="Arial" />
+            <family val="2" />
+          </rPr>
+          <t>Kommentar fuer B5</t>
+        </r>
+      </text>
+    </comment>
+  </commentList>
+  </comments>
+=end
+=begin
+    if @comments_doc[self.sheets.index(sheet)]
+      read_comments(sheet)
+    end
+=end
+    #end comments
   end
+  # Reads all comments from a sheet
+  def read_comments(sheet=nil)
+    sheet = @default_sheet unless sheet
+    #sheet_found = false
+    raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
+    raise RangeError unless self.sheets.include? sheet
+    n = self.sheets.index(sheet)
+    return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
+    @comments_doc[n].xpath("//*[local-name()='comments']").each do |comment|
+      comment.children.each do |cc|
+        if cc.name == 'commentList'
+          cc.children.each do |commentlist|
+            if commentlist.name == 'comment'
+              ref = commentlist.attributes['ref'].to_s
+              row,col = GenericSpreadsheet.split_coordinate(ref)
+              commentlist.children.each do |clc|
+                if clc.name == 'text'
+                  clc.children.each do |text|
+                    if text.name == 'r'
+                      text.children.each do |r|
+                        if r.name == 't'
+                          comment = r.text
+                          @comment[sheet] = Hash.new unless @comment[sheet]
+                          @comment[sheet][[row,col]] = comment
+                        end
+                      end
+                    end
+                  end
+                end
+              end
+            end
+          end
+        end
+      end
+    end
+    @comments_read[sheet] = true
+  end
+  def read_labels
+    @workbook_doc.xpath("//*[local-name()='definedName']").each do |defined_name|
+	    # "Sheet1!$C$5"
+      sheet = defined_name.text.split('!').first
+      coordinates = defined_name.text.split('!')[1]
+      dummy,col,row = coordinates.split('$')
+      @label[defined_name['name']] = [sheet,row,col]
+    end
+    @labels_read = true
+  end
   # Checks if the default_sheet exists. If not an RangeError exception is
   # raised
   def check_default_sheet
     sheet_found = false
     raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
     sheet_found = true if sheets.include?(@default_sheet)
     if ! sheet_found
       raise RangeError, "sheet '#{@default_sheet}' not found"
     end
   end
-  # extracts all needed files from the zip file
+  # Extracts all needed files from the zip file
   def process_zipfile(zipfilename, zip, path='')
     @sheet_files = []
     Zip::ZipFile.open(zipfilename) {|zf|
       zf.entries.each {|entry|
-        #entry.extract
         if entry.to_s.end_with?('workbook.xml')
           open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
             f << zip.read(entry)
@@ -493,9 +684,16 @@ class Excelx < GenericSpreadsheet
           }
           @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
         end
+        if entry.to_s =~ /comments([0-9]+).xml$/
+          nr = $1
+          open(@tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}",'wb') {|f|
+            f << zip.read(entry)
+          }
+          @comments_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}"
+        end
       }
     }
-    return
+    # return
   end
   # extract files from the zip file