RubyGems - roo - Versions diffs - 1.13.2 → 2.0.0 - Mend

roo 1.13.2 → 2.0.0

Files changed (172) hide show

checksums.yaml +4 -4
data/.gitignore +7 -0
data/.simplecov +4 -0
data/.travis.yml +13 -0
data/CHANGELOG.md +500 -0
data/Gemfile +16 -10
data/Guardfile +24 -0
data/LICENSE +3 -1
data/README.md +254 -0
data/Rakefile +23 -23
data/examples/roo_soap_client.rb +28 -31
data/examples/roo_soap_server.rb +4 -6
data/examples/write_me.rb +9 -10
data/lib/roo/base.rb +303 -388
data/lib/roo/csv.rb +120 -113
data/lib/roo/excelx/comments.rb +24 -0
data/lib/roo/excelx/extractor.rb +20 -0
data/lib/roo/excelx/relationships.rb +26 -0
data/lib/roo/excelx/shared_strings.rb +40 -0
data/lib/roo/excelx/sheet_doc.rb +202 -0
data/lib/roo/excelx/styles.rb +62 -0
data/lib/roo/excelx/workbook.rb +59 -0
data/lib/roo/excelx.rb +452 -484
data/lib/roo/font.rb +17 -0
data/lib/roo/libre_office.rb +5 -0
data/lib/roo/link.rb +15 -0
data/lib/roo/{openoffice.rb → open_office.rb} +678 -496
data/lib/roo/spreadsheet.rb +20 -23
data/lib/roo/utils.rb +78 -0
data/lib/roo/version.rb +3 -0
data/lib/roo.rb +18 -24
data/roo.gemspec +20 -204
data/spec/lib/roo/base_spec.rb +1 -4
data/spec/lib/roo/csv_spec.rb +21 -13
data/spec/lib/roo/excelx/format_spec.rb +7 -6
data/spec/lib/roo/excelx_spec.rb +388 -11
data/spec/lib/roo/libreoffice_spec.rb +16 -6
data/spec/lib/roo/openoffice_spec.rb +2 -8
data/spec/lib/roo/spreadsheet_spec.rb +40 -12
data/spec/lib/roo/utils_spec.rb +106 -0
data/spec/spec_helper.rb +2 -1
data/test/test_generic_spreadsheet.rb +19 -67
data/test/test_helper.rb +9 -56
data/test/test_roo.rb +252 -477
metadata +63 -302
data/CHANGELOG +0 -417
data/Gemfile.lock +0 -78
data/README.markdown +0 -126
data/VERSION +0 -1
data/lib/roo/excel.rb +0 -355
data/lib/roo/excel2003xml.rb +0 -300
data/lib/roo/google.rb +0 -292
data/lib/roo/roo_rails_helper.rb +0 -83
data/lib/roo/worksheet.rb +0 -18
data/spec/lib/roo/excel2003xml_spec.rb +0 -15
data/spec/lib/roo/excel_spec.rb +0 -17
data/spec/lib/roo/google_spec.rb +0 -64
data/test/files/1900_base.xls +0 -0
data/test/files/1900_base.xlsx +0 -0
data/test/files/1904_base.xls +0 -0
data/test/files/1904_base.xlsx +0 -0
data/test/files/Bibelbund.csv +0 -3741
data/test/files/Bibelbund.ods +0 -0
data/test/files/Bibelbund.xls +0 -0
data/test/files/Bibelbund.xlsx +0 -0
data/test/files/Bibelbund.xml +0 -62518
data/test/files/Bibelbund1.ods +0 -0
data/test/files/Pfand_from_windows_phone.xlsx +0 -0
data/test/files/bad_excel_date.xls +0 -0
data/test/files/bbu.ods +0 -0
data/test/files/bbu.xls +0 -0
data/test/files/bbu.xlsx +0 -0
data/test/files/bbu.xml +0 -152
data/test/files/bode-v1.ods.zip +0 -0
data/test/files/bode-v1.xls.zip +0 -0
data/test/files/boolean.csv +0 -2
data/test/files/boolean.ods +0 -0
data/test/files/boolean.xls +0 -0
data/test/files/boolean.xlsx +0 -0
data/test/files/boolean.xml +0 -112
data/test/files/borders.ods +0 -0
data/test/files/borders.xls +0 -0
data/test/files/borders.xlsx +0 -0
data/test/files/borders.xml +0 -144
data/test/files/bug-numbered-sheet-names.xlsx +0 -0
data/test/files/bug-row-column-fixnum-float.xls +0 -0
data/test/files/bug-row-column-fixnum-float.xml +0 -127
data/test/files/comments.ods +0 -0
data/test/files/comments.xls +0 -0
data/test/files/comments.xlsx +0 -0
data/test/files/csvtypes.csv +0 -1
data/test/files/datetime.ods +0 -0
data/test/files/datetime.xls +0 -0
data/test/files/datetime.xlsx +0 -0
data/test/files/datetime.xml +0 -142
data/test/files/datetime_floatconv.xls +0 -0
data/test/files/datetime_floatconv.xml +0 -148
data/test/files/dreimalvier.ods +0 -0
data/test/files/emptysheets.ods +0 -0
data/test/files/emptysheets.xls +0 -0
data/test/files/emptysheets.xlsx +0 -0
data/test/files/emptysheets.xml +0 -105
data/test/files/excel2003.xml +0 -21140
data/test/files/false_encoding.xls +0 -0
data/test/files/false_encoding.xml +0 -132
data/test/files/file_item_error.xlsx +0 -0
data/test/files/formula.ods +0 -0
data/test/files/formula.xls +0 -0
data/test/files/formula.xlsx +0 -0
data/test/files/formula.xml +0 -134
data/test/files/formula_parse_error.xls +0 -0
data/test/files/formula_parse_error.xml +0 -1833
data/test/files/formula_string_error.xlsx +0 -0
data/test/files/html-escape.ods +0 -0
data/test/files/link.xls +0 -0
data/test/files/link.xlsx +0 -0
data/test/files/matrix.ods +0 -0
data/test/files/matrix.xls +0 -0
data/test/files/named_cells.ods +0 -0
data/test/files/named_cells.xls +0 -0
data/test/files/named_cells.xlsx +0 -0
data/test/files/no_spreadsheet_file.txt +0 -1
data/test/files/numbers1.csv +0 -18
data/test/files/numbers1.ods +0 -0
data/test/files/numbers1.xls +0 -0
data/test/files/numbers1.xlsx +0 -0
data/test/files/numbers1.xml +0 -312
data/test/files/numeric-link.xlsx +0 -0
data/test/files/only_one_sheet.ods +0 -0
data/test/files/only_one_sheet.xls +0 -0
data/test/files/only_one_sheet.xlsx +0 -0
data/test/files/only_one_sheet.xml +0 -67
data/test/files/paragraph.ods +0 -0
data/test/files/paragraph.xls +0 -0
data/test/files/paragraph.xlsx +0 -0
data/test/files/paragraph.xml +0 -127
data/test/files/prova.xls +0 -0
data/test/files/ric.ods +0 -0
data/test/files/simple_spreadsheet.ods +0 -0
data/test/files/simple_spreadsheet.xls +0 -0
data/test/files/simple_spreadsheet.xlsx +0 -0
data/test/files/simple_spreadsheet.xml +0 -225
data/test/files/simple_spreadsheet_from_italo.ods +0 -0
data/test/files/simple_spreadsheet_from_italo.xls +0 -0
data/test/files/simple_spreadsheet_from_italo.xml +0 -242
data/test/files/so_datetime.csv +0 -7
data/test/files/style.ods +0 -0
data/test/files/style.xls +0 -0
data/test/files/style.xlsx +0 -0
data/test/files/style.xml +0 -154
data/test/files/time-test.csv +0 -2
data/test/files/time-test.ods +0 -0
data/test/files/time-test.xls +0 -0
data/test/files/time-test.xlsx +0 -0
data/test/files/time-test.xml +0 -131
data/test/files/type_excel.ods +0 -0
data/test/files/type_excel.xlsx +0 -0
data/test/files/type_excelx.ods +0 -0
data/test/files/type_excelx.xls +0 -0
data/test/files/type_openoffice.xls +0 -0
data/test/files/type_openoffice.xlsx +0 -0
data/test/files/whitespace.ods +0 -0
data/test/files/whitespace.xls +0 -0
data/test/files/whitespace.xlsx +0 -0
data/test/files/whitespace.xml +0 -184
data/test/rm_sub_test.rb +0 -12
data/test/rm_test.rb +0 -7
data/website/index.html +0 -385
data/website/index.txt +0 -423
data/website/javascripts/rounded_corners_lite.inc.js +0 -285
data/website/stylesheets/screen.css +0 -130
data/website/template.rhtml +0 -48

data/lib/roo/csv.rb CHANGED Viewed

@@ -1,113 +1,120 @@
-require 'csv'
-require 'time'
-# The CSV class can read csv files (must be separated with commas) which then
-# can be handled like spreadsheets. This means you can access cells like A5
-# within these files.
-# The CSV class provides only string objects. If you want conversions to other
-# types you have to do it yourself.
-#
-# You can pass options to the underlying CSV parse operation, via the
-# :csv_options option.
-#
-class Roo::CSV < Roo::Base
-  def initialize(filename, options = {})
-    super
-  end
-  attr_reader :filename
-  # Returns an array with the names of the sheets. In CSV class there is only
-  # one dummy sheet, because a csv file cannot have more than one sheet.
-  def sheets
-    ['default']
-  end
-  def cell(row, col, sheet=nil)
-    sheet ||= @default_sheet
-    read_cells(sheet)
-    @cell[normalize(row,col)]
-  end
-  def celltype(row, col, sheet=nil)
-    sheet ||= @default_sheet
-    read_cells(sheet)
-    @cell_type[normalize(row,col)]
-  end
-  def cell_postprocessing(row,col,value)
-    value
-  end
-  def csv_options
-    @options[:csv_options] || {}
-  end
-  private
-  TYPE_MAP = {
-    String => :string,
-    Float => :float,
-    Date => :date,
-    DateTime => :datetime,
-  }
-  def celltype_class(value)
-    TYPE_MAP[value.class]
-  end
-  def each_row(options, &block)
-    if uri?(filename)
-      make_tmpdir do |tmpdir|
-        tmp_filename = download_uri(filename, tmpdir)
-        CSV.foreach(tmp_filename, options, &block)
-      end
-    else
-      CSV.foreach(filename, options, &block)
-    end
-  end
-  def read_cells(sheet=nil)
-    sheet ||= @default_sheet
-    return if @cells_read[sheet]
-    @first_row[sheet] = 1
-    @last_row[sheet] = 0
-    @first_column[sheet] = 1
-    @last_column[sheet] = 1
-    rownum = 1
-    each_row csv_options do |row|
-      row.each_with_index do |elem,i|
-        @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
-        @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
-        if i+1 > @last_column[sheet]
-          @last_column[sheet] += 1
-        end
-      end
-      rownum += 1
-      @last_row[sheet] += 1
-    end
-    @cells_read[sheet] = true
-    #-- adjust @first_row if neccessary
-    while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
-      @first_row[sheet] += 1
-    end
-    #-- adjust @last_row if neccessary
-    while !row(@last_row[sheet]).any? and @last_row[sheet] and
-        @last_row[sheet] > @first_row[sheet]
-      @last_row[sheet] -= 1
-    end
-    #-- adjust @first_column if neccessary
-    while !column(@first_column[sheet]).any? and
-          @first_column[sheet] and
-          @first_column[sheet] < @last_column[sheet]
-      @first_column[sheet] += 1
-    end
-    #-- adjust @last_column if neccessary
-    while !column(@last_column[sheet]).any? and
-          @last_column[sheet] and
-          @last_column[sheet] > @first_column[sheet]
-      @last_column[sheet] -= 1
-    end
-  end
-end
+require 'csv'
+require 'time'
+# The CSV class can read csv files (must be separated with commas) which then
+# can be handled like spreadsheets. This means you can access cells like A5
+# within these files.
+# The CSV class provides only string objects. If you want conversions to other
+# types you have to do it yourself.
+#
+# You can pass options to the underlying CSV parse operation, via the
+# :csv_options option.
+#
+class Roo::CSV < Roo::Base
+  attr_reader :filename
+  # Returns an array with the names of the sheets. In CSV class there is only
+  # one dummy sheet, because a csv file cannot have more than one sheet.
+  def sheets
+    ['default']
+  end
+  def cell(row, col, sheet=nil)
+    sheet ||= default_sheet
+    read_cells(sheet)
+    @cell[normalize(row,col)]
+  end
+  def celltype(row, col, sheet=nil)
+    sheet ||= default_sheet
+    read_cells(sheet)
+    @cell_type[normalize(row,col)]
+  end
+  def cell_postprocessing(row,col,value)
+    value
+  end
+  def csv_options
+    @options[:csv_options] || {}
+  end
+  private
+  TYPE_MAP = {
+    String => :string,
+    Float => :float,
+    Date => :date,
+    DateTime => :datetime,
+  }
+  def celltype_class(value)
+    TYPE_MAP[value.class]
+  end
+  def each_row(options, &block)
+    if uri?(filename)
+      make_tmpdir do |tmpdir|
+        tmp_filename = download_uri(filename, tmpdir)
+        CSV.foreach(tmp_filename, options, &block)
+      end
+    else
+      CSV.foreach(filename, options, &block)
+    end
+  end
+  def read_cells(sheet = default_sheet)
+    sheet ||= default_sheet
+    return if @cells_read[sheet]
+    @first_row[sheet] = 1
+    @last_row[sheet] = 0
+    @first_column[sheet] = 1
+    @last_column[sheet] = 1
+    rownum = 1
+    each_row csv_options do |row|
+      row.each_with_index do |elem,i|
+        @cell[[rownum,i+1]] = cell_postprocessing rownum,i+1, elem
+        @cell_type[[rownum,i+1]] = celltype_class @cell[[rownum,i+1]]
+        if i+1 > @last_column[sheet]
+          @last_column[sheet] += 1
+        end
+      end
+      rownum += 1
+      @last_row[sheet] += 1
+    end
+    @cells_read[sheet] = true
+    #-- adjust @first_row if neccessary
+    while !row(@first_row[sheet]).any? and @first_row[sheet] < @last_row[sheet]
+      @first_row[sheet] += 1
+    end
+    #-- adjust @last_row if neccessary
+    while !row(@last_row[sheet]).any? and @last_row[sheet] and
+        @last_row[sheet] > @first_row[sheet]
+      @last_row[sheet] -= 1
+    end
+    #-- adjust @first_column if neccessary
+    while !column(@first_column[sheet]).any? and
+          @first_column[sheet] and
+          @first_column[sheet] < @last_column[sheet]
+      @first_column[sheet] += 1
+    end
+    #-- adjust @last_column if neccessary
+    while !column(@last_column[sheet]).any? and
+          @last_column[sheet] and
+          @last_column[sheet] > @first_column[sheet]
+      @last_column[sheet] -= 1
+    end
+  end
+  def clean_sheet(sheet)
+    read_cells(sheet)
+    @cell.each_pair do |coord, value|
+      @cell[coord] = sanitize_value(value) if value.is_a?(::String)
+    end
+    @cleaned[sheet] = true
+  end
+end

data/lib/roo/excelx/comments.rb ADDED Viewed

@@ -0,0 +1,24 @@
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::Comments < Excelx::Extractor
+    def comments
+      @comments ||= extract_comments
+    end
+    private
+    def extract_comments
+      if doc_exists?
+        Hash[doc.xpath("//comments/commentList/comment").map do |comment|
+          value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
+          [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
+        end]
+      else
+        {}
+      end
+    end
+  end
+end

data/lib/roo/excelx/extractor.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Roo
+  class Excelx::Extractor
+    def initialize(path)
+      @path = path
+    end
+    private
+    def doc
+      @doc ||=
+        if doc_exists?
+          ::Roo::Utils.load_xml(@path).remove_namespaces!
+        end
+    end
+    def doc_exists?
+      @path && File.exist?(@path)
+    end
+  end
+end

data/lib/roo/excelx/relationships.rb ADDED Viewed

@@ -0,0 +1,26 @@
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::Relationships < Excelx::Extractor
+    def [](index)
+      to_a[index]
+    end
+    def to_a
+      @relationships ||= extract_relationships
+    end
+    private
+    def extract_relationships
+      if doc_exists?
+        Hash[doc.xpath("/Relationships/Relationship").map do |rel|
+          [rel.attribute('Id').text, rel]
+        end]
+      else
+        []
+      end
+    end
+  end
+end

data/lib/roo/excelx/shared_strings.rb ADDED Viewed

@@ -0,0 +1,40 @@
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::SharedStrings < Excelx::Extractor
+    def [](index)
+      to_a[index]
+    end
+    def to_a
+      @array ||= extract_shared_strings
+    end
+    private
+    def extract_shared_strings
+      if doc_exists?
+        # read the shared strings xml document
+        doc.xpath("/sst/si").map do |si|
+          shared_string = ''
+          si.children.each do |elem|
+            case elem.name
+              when 'r'
+                elem.children.each do |r_elem|
+                  if r_elem.name == 't'
+                    shared_string << r_elem.content
+                  end
+                end
+              when 't'
+                shared_string = elem.content
+            end
+          end
+          shared_string
+        end
+      else
+        []
+      end
+    end
+  end
+end

data/lib/roo/excelx/sheet_doc.rb ADDED Viewed

@@ -0,0 +1,202 @@
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::SheetDoc < Excelx::Extractor
+    def initialize(path, relationships, styles, shared_strings, workbook, options = {})
+      super(path)
+      @options = options
+      @relationships = relationships
+      @styles = styles
+      @shared_strings = shared_strings
+      @workbook = workbook
+    end
+    def cells(relationships)
+      @cells ||= extract_cells(relationships)
+    end
+    def hyperlinks(relationships)
+      @hyperlinks ||= extract_hyperlinks(relationships)
+    end
+    # Get the dimensions for the sheet.
+    # This is the upper bound of cells that might
+    # be parsed. (the document may be sparse so cell count is only upper bound)
+    def dimensions
+      @dimensions ||= extract_dimensions
+    end
+    # Yield each row xml element to caller
+    def each_row_streaming(&block)
+      Roo::Utils.each_element(@path, 'row', &block)
+    end
+    # Yield each cell as Excelx::Cell to caller for given
+    # row xml
+    def each_cell(row_xml)
+      return [] unless row_xml
+      row_xml.children.each do |cell_element|
+        key = ::Roo::Utils.ref_to_key(cell_element['r'])
+        yield cell_from_xml(cell_element, hyperlinks(@relationships)[key])
+      end
+    end
+    private
+    def cell_from_xml(cell_xml, hyperlink)
+      # This is error prone, to_i will silently turn a nil into a 0
+      # and it works by coincidence that Format[0] is general
+      style = cell_xml['s'].to_i   # should be here
+      # c: <c r="A5" s="2">
+      # <v>22606</v>
+      # </c>, format: , tmp_type: float
+      value_type =
+        case cell_xml['t']
+        when 's'
+          :shared
+        when 'b'
+          :boolean
+        # 2011-02-25 BEGIN
+        when 'str'
+          :string
+        # 2011-02-25 END
+        # 2011-09-15 BEGIN
+        when 'inlineStr'
+          :inlinestr
+        # 2011-09-15 END
+        else
+          format = @styles.style_format(style)
+          Excelx::Format.to_type(format)
+        end
+      formula = nil
+      row, column = ::Roo::Utils.split_coordinate(cell_xml['r'])
+      cell_xml.children.each do |cell|
+        case cell.name
+        when 'is'
+          cell.children.each do |inline_str|
+            if inline_str.name == 't'
+              return Excelx::Cell.new(inline_str.content,:string,formula,:string,inline_str.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
+            end
+          end
+        when 'f'
+          formula = cell.content
+        when 'v'
+          if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
+            value_type =
+              if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
+                :datetime
+              else
+                :date
+              end
+          end
+          excelx_type = [:numeric_or_formula,format.to_s]
+          value =
+            case value_type
+            when :shared
+              value_type = :string
+              excelx_type = :string
+              @shared_strings[cell.content.to_i]
+            when :boolean
+              (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
+            when :date, :time, :datetime
+              cell.content
+            when :formula
+              cell.content.to_f
+            when :string
+              excelx_type = :string
+              cell.content
+            else
+              value_type = :float
+              cell.content
+            end
+          return Excelx::Cell.new(value,value_type,formula,excelx_type,cell.content,style, hyperlink, @workbook.base_date, Excelx::Cell::Coordinate.new(row, column))
+        end
+      end
+      Excelx::Cell.new(nil, nil, nil, nil, nil, nil, nil, nil, Excelx::Cell::Coordinate.new(row, column))
+    end
+    def extract_hyperlinks(relationships)
+      Hash[doc.xpath("/worksheet/hyperlinks/hyperlink").map do |hyperlink|
+        if hyperlink.attribute('id') && relationship = relationships[hyperlink.attribute('id').text]
+          [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
+        end
+      end.compact]
+    end
+    def expand_merged_ranges(cells)
+      # Extract merged ranges from xml
+      merges = {}
+      doc.xpath("/worksheet/mergeCells/mergeCell").each do |mergecell_xml|
+        tl, br = mergecell_xml['ref'].split(/:/).map {|ref| ::Roo::Utils.ref_to_key(ref)}
+        for row in tl[0]..br[0] do
+          for col in tl[1]..br[1] do
+            next if row == tl[0] && col == tl[1]
+            merges[[row,col]] = tl
+          end
+        end
+      end
+      # Duplicate value into all cells in merged range
+      merges.each do |dst, src|
+        cells[dst] = cells[src]
+      end
+    end
+    def extract_cells(relationships)
+      extracted_cells = Hash[doc.xpath("/worksheet/sheetData/row/c").map do |cell_xml|
+        key = ::Roo::Utils.ref_to_key(cell_xml['r'])
+        [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
+      end]
+      if @options[:expand_merged_ranges]
+        expand_merged_ranges(extracted_cells)
+      end
+      extracted_cells
+    end
+    def extract_dimensions
+      Roo::Utils.each_element(@path, 'dimension') do |dimension|
+        return dimension.attributes["ref"].value
+      end
+    end
+=begin
+Datei xl/comments1.xml
+  <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+  <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+    <authors>
+      <author />
+    </authors>
+    <commentList>
+      <comment ref="B4" authorId="0">
+        <text>
+          <r>
+            <rPr>
+              <sz val="10" />
+              <rFont val="Arial" />
+              <family val="2" />
+            </rPr>
+            <t>Kommentar fuer B4</t>
+          </r>
+        </text>
+      </comment>
+      <comment ref="B5" authorId="0">
+        <text>
+          <r>
+            <rPr>
+            <sz val="10" />
+            <rFont val="Arial" />
+            <family val="2" />
+          </rPr>
+          <t>Kommentar fuer B5</t>
+        </r>
+      </text>
+    </comment>
+  </commentList>
+  </comments>
+=end
+=begin
+    if @comments_doc[self.sheets.index(sheet)]
+      read_comments(sheet)
+    end
+=end
+  end
+end

data/lib/roo/excelx/styles.rb ADDED Viewed

@@ -0,0 +1,62 @@
+require 'roo/font'
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::Styles < Excelx::Extractor
+    # convert internal excelx attribute to a format
+    def style_format(style)
+      id = num_fmt_ids[style.to_i]
+      num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
+    end
+    def definitions
+      @definitions ||= extract_definitions
+    end
+    private
+    def num_fmt_ids
+      @num_fmt_ids ||= extract_num_fmt_ids
+    end
+    def num_fmts
+      @num_fmts ||= extract_num_fmts
+    end
+    def fonts
+     @fonts ||= extract_fonts
+    end
+    def extract_definitions
+      doc.xpath("//cellXfs").flat_map do |xfs|
+        xfs.children.map do |xf|
+          fonts[xf['fontId'].to_i]
+        end
+      end
+    end
+    def extract_fonts
+      doc.xpath("//fonts/font").map do |font_el|
+        Font.new.tap do |font|
+          font.bold = !font_el.xpath('./b').empty?
+          font.italic = !font_el.xpath('./i').empty?
+          font.underline = !font_el.xpath('./u').empty?
+        end
+      end
+    end
+    def extract_num_fmt_ids
+      doc.xpath("//cellXfs").flat_map do |xfs|
+        xfs.children.map do |xf|
+          xf['numFmtId']
+        end
+      end
+    end
+    def extract_num_fmts
+      Hash[doc.xpath("//numFmt").map do |num_fmt|
+        [num_fmt['numFmtId'], num_fmt['formatCode']]
+      end]
+    end
+  end
+end

data/lib/roo/excelx/workbook.rb ADDED Viewed

@@ -0,0 +1,59 @@
+require 'roo/excelx/extractor'
+module Roo
+  class Excelx::Workbook < Excelx::Extractor
+    class Label
+      attr_reader :sheet, :row, :col, :name
+      def initialize(name, sheet, row, col)
+        @name = name
+        @sheet = sheet
+        @row = row.to_i
+        @col = ::Roo::Utils.letter_to_number(col)
+      end
+      def key
+        [@row, @col]
+      end
+    end
+    def initialize(path)
+      super
+      if !doc_exists?
+        raise ArgumentError, 'missing required workbook file'
+      end
+    end
+    def sheets
+      doc.xpath("//sheet")
+    end
+    # aka labels
+    def defined_names
+      Hash[doc.xpath("//definedName").map do |defined_name|
+        # "Sheet1!$C$5"
+        sheet, coordinates = defined_name.text.split('!$', 2)
+        col,row = coordinates.split('$')
+        name = defined_name['name']
+        [name, Label.new(name, sheet,row,col)]
+      end]
+    end
+    def base_date
+      @base_date ||=
+        begin
+          # Default to 1900 (minus one day due to excel quirk) but use 1904 if
+          # it's set in the Workbook's workbookPr
+          # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
+          result = Date.new(1899,12,30) # default
+          doc.css("workbookPr[date1904]").each do |workbookPr|
+            if workbookPr["date1904"] =~ /true|1/i
+              result = Date.new(1904,01,01)
+              break
+            end
+          end
+          result
+        end
+    end
+  end
+end