RubyGems - simple_xlsx_reader - Versions diffs - 1.0.1 → 1.0.5 - Mend

simple_xlsx_reader 1.0.1 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +5 -5
data/.travis.yml +8 -0
data/CHANGELOG.md +23 -0
data/README.md +13 -6
data/lib/simple_xlsx_reader/version.rb +1 -1
data/lib/simple_xlsx_reader.rb +137 -46
data/simple_xlsx_reader.gemspec +4 -2
data/test/date1904_test.rb +1 -1
data/test/datetime_test.rb +3 -2
data/test/datetimes.xlsx +0 -0
data/test/gdocs_sheet.xlsx +0 -0
data/test/gdocs_sheet_test.rb +15 -0
data/test/lower_case_sharedstrings.xlsx +0 -0
data/test/lower_case_sharedstrings_test.rb +15 -0
data/test/performance_test.rb +3 -3
data/test/sesame_street_blog.xlsx +0 -0
data/test/simple_xlsx_reader_test.rb +176 -25
data/test/styles.xml +4 -2
data/test/test_helper.rb +1 -0
metadata +29 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
-SHA1:
-  metadata.gz: 94422da0193805c579ba37c7c3e58b35a996dfbc
-  data.tar.gz: a9c5e1f01acc0c60165a13adc1af087743a60935
+SHA256:
+  metadata.gz: e2b04473235c5ed2c2764f62a627fa6f16816c36e0fcff3497be229f8666a0f7
+  data.tar.gz: 9367b0082f31e9cb208d9f97ed6cb67d5276a459562809460694602339dfdaad
 SHA512:
-  metadata.gz: 33338f8fcf3c180ea346548061598953842358a21acd6d97bf451c07d8655f179af0cf7b7791f7c9de1a8411578e3623faab178b3cd74893aaf6d040a7abde96
-  data.tar.gz: 50035b920f6811eed88c318c17b47bf8823aa1ac4bf114af3bc29174edcf08ebd5d16902177aa6a48b70f8e70a745249bb8494101f9f310f24d5f5d5bbc13f27
+  metadata.gz: cd42f7a0b8830a2f01703dca10ae779b973566ad25e3b74d31dc3693977fa5b2b3442e47bc1a3b50723bae3bb9f31facd923f1eaba06b51cc8b927e7fb207cf3
+  data.tar.gz: 38ecb026b0ad5a1985d88349a839a9d2972f85596504e6f300686f9751169a3c8d62582e79119106085a9cadc066517206da117993c3a30f48a5a0c58f256b4c

data/.travis.yml ADDED Viewed

@@ -0,0 +1,8 @@
+language: ruby
+cache: bundler
+before_install:
+  - gem update bundler
+rvm:
+  - 2.5.8
+  - 2.7.2
+  - 3.0.0

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,26 @@
+### 1.0.5
+* Support string or io input via `SimpleXlsxReader#parse` (@kalsan, @til)
+### 1.0.4
+* Fix Windows + RubyZip 1.2.1 bug preventing files from being read
+* Add ability to parse hyperlinks
+* Support files exported from Google Docs (@Strnadj)
+### 1.0.3
+Broken on Ruby 1.9; yanked.
+### 1.0.2
+* Fix Ruby 1.9.3-specific bug preventing parsing most sheets [middagj, eritiro]
+* Better support for non-excel-generated xlsx files [bwlang]
+  * You don't always have a numFmtId column, and that's OK
+  * Sometimes 'sharedStrings.xml' can be 'sharedstrings.xml'
+* Fixed parsing times very close to 12/30/1899 [Valeriy Utyaganov]
+* Be more flexible with custom formats using a numFmtId < 164
 ### 1.0.1
 * Add support for the 1904 date system [zilverline]

data/README.md CHANGED Viewed

@@ -1,4 +1,4 @@
-# SimpleXlsxReader
+# SimpleXlsxReader [![Build Status](https://travis-ci.org/woahdae/simple_xlsx_reader.svg?branch=master)](https://travis-ci.org/woahdae/simple_xlsx_reader)
 An xlsx reader for Ruby that parses xlsx cell values into plain ruby
 primitives and dates/times.
@@ -35,14 +35,21 @@ Here's the totality of the public api, in code:
     module SimpleXlsxReader
       def self.open(file_path)
-        Document.new(file_path).tap(&:sheets)
+        Document.new(file_path: file_path).tap(&:sheets)
+      end
+      def self.parse(string_or_io)
+        Document.new(string_or_io: string_or_io).tap(&:sheets)
       end
       class Document
-        attr_reader :file_path
+        attr_reader :string_or_io
+        def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
+          ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
+            fail(ArgumentError, 'either file_path or string_or_io must be provided')
-        def initialize(file_path)
-          @file_path = file_path
+          @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
         end
         def sheets
@@ -54,7 +61,7 @@ Here's the totality of the public api, in code:
         end
         def xml
-          Xml.load(file_path)
+          Xml.load(string_or_io)
         end
         class Sheet < Struct.new(:name, :rows)

data/lib/simple_xlsx_reader/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module SimpleXlsxReader
-  VERSION = "1.0.1"
+  VERSION = "1.0.5"
 end

data/lib/simple_xlsx_reader.rb CHANGED Viewed

@@ -19,6 +19,33 @@ end
 module SimpleXlsxReader
   class CellLoadError < StandardError; end
+  # We support hyperlinks as a "type" even though they're technically
+  # represented either as a function or an external reference in the xlsx spec.
+  #
+  # Since having hyperlink data in our sheet usually means we might want to do
+  # something primarily with the URL (store it in the database, download it, etc),
+  # we go through extra effort to parse the function or follow the reference
+  # to represent the hyperlink primarily as a URL. However, maybe we do want
+  # the hyperlink "friendly name" part (as MS calls it), so here we've subclassed
+  # string to tack on the friendly name. This means 80% of us that just want
+  # the URL value will have to do nothing extra, but the 20% that might want the
+  # friendly name can access it.
+  #
+  # Note, by default, the value we would get by just asking the cell would
+  # be the "friendly name" and *not* the URL, which is tucked away in the
+  # function definition or a separate "relationships" meta-document.
+  #
+  # See MS documentation on the HYPERLINK function for some background:
+  # https://support.office.com/en-us/article/HYPERLINK-function-333c7ce6-c5ae-4164-9c47-7de9b76f577f
+  class Hyperlink < String
+    attr_reader :friendly_name
+    def initialize(url, friendly_name = nil)
+      @friendly_name = friendly_name
+      super(url)
+    end
+  end
   def self.configuration
     @configuration ||= Struct.new(:catch_cell_load_errors).new.tap do |c|
       c.catch_cell_load_errors = false
@@ -26,14 +53,21 @@ module SimpleXlsxReader
   end
   def self.open(file_path)
-    Document.new(file_path).tap(&:sheets)
+    Document.new(file_path: file_path).tap(&:sheets)
+  end
+  def self.parse(string_or_io)
+    Document.new(string_or_io: string_or_io).tap(&:sheets)
   end
   class Document
-    attr_reader :file_path
+    attr_reader :string_or_io
+    def initialize(legacy_file_path = nil, file_path: nil, string_or_io: nil)
+      ((file_path || legacy_file_path).nil? ^ string_or_io.nil?) ||
+        fail(ArgumentError, 'either file_path or string_or_io must be provided')
-    def initialize(file_path)
-      @file_path = file_path
+      @string_or_io = string_or_io || File.new(file_path || legacy_file_path)
     end
     def sheets
@@ -45,7 +79,7 @@ module SimpleXlsxReader
     end
     def xml
-      Xml.load(file_path)
+      Xml.load(string_or_io)
     end
     class Sheet < Struct.new(:name, :rows)
@@ -69,28 +103,54 @@ module SimpleXlsxReader
     ##
     # For internal use; stores source xml in nokogiri documents
     class Xml
-      attr_accessor :workbook, :shared_strings, :sheets, :styles
+      attr_accessor :workbook, :shared_strings, :sheets, :sheet_rels, :styles
-      def self.load(file_path)
+      def self.load(string_or_io)
         self.new.tap do |xml|
-          SimpleXlsxReader::Zip.open(file_path) do |zip|
-            xml.workbook = Nokogiri::XML(zip.read('xl/workbook.xml')).remove_namespaces!
-            xml.styles   = Nokogiri::XML(zip.read('xl/styles.xml')).remove_namespaces!
-            # optional feature used by excel, but not often used by xlsx
-            # generation libraries
-            if zip.file.file?('xl/sharedStrings.xml')
-              xml.shared_strings = Nokogiri::XML(zip.read('xl/sharedStrings.xml')).remove_namespaces!
-            end
+          SimpleXlsxReader::Zip.open_buffer(string_or_io) do |zip|
             xml.sheets = []
-            i = 0
-            loop do
-              i += 1
-              break if !zip.file.file?("xl/worksheets/sheet#{i}.xml")
+            xml.sheet_rels = []
+            # This weird style of enumerating over the entries lets us
+            # concisely assign entries in a case insensitive and
+            # slash insensitive ('/' vs '\') manner.
+            #
+            # RubyZip used to normalize the slashes, but doesn't now:
+            # https://github.com/rubyzip/rubyzip/issues/324
+            zip.entries.each do |entry|
+              if entry.name.match(/^xl.workbook\.xml$/) # xl/workbook.xml
+                xml.workbook = Nokogiri::XML(zip.read(entry)).remove_namespaces!
+              elsif entry.name.match(/^xl.styles\.xml$/) # xl/styles.xml
+                xml.styles   = Nokogiri::XML(zip.read(entry)).remove_namespaces!
+              elsif entry.name.match(/^xl.sharedStrings\.xml$/i) # xl/sharedStrings.xml
+                # optional feature used by excel, but not often used by xlsx
+                # generation libraries. Path name is sometimes lowercase, too.
+                xml.shared_strings = Nokogiri::XML(zip.read(entry)).remove_namespaces!
+              elsif match = entry.name.match(/^xl.worksheets.sheet([0-9]*)\.xml$/)
+                sheet_number = match.captures.first.to_i
+                xml.sheets[sheet_number] =
+                  Nokogiri::XML(zip.read(entry)).remove_namespaces!
+              elsif match = entry.name.match(/^xl.worksheets._rels.sheet([0-9]*)\.xml\.rels$/)
+                sheet_number = match.captures.first.to_i
+                xml.sheet_rels[sheet_number] =
+                  Nokogiri::XML(zip.read(entry)).remove_namespaces!
+              end
+            end
-              xml.sheets <<
-                Nokogiri::XML(zip.read("xl/worksheets/sheet#{i}.xml")).remove_namespaces!
+            # Sometimes there's a zero-index sheet.xml, ex.
+            # Google Docs creates:
+            #
+            # xl/worksheets/sheet.xml
+            # xl/worksheets/sheet1.xml
+            # xl/worksheets/sheet2.xml
+            # While Excel creates:
+            # xl/worksheets/sheet1.xml
+            # xl/worksheets/sheet2.xml
+            #
+            # So, for the latter case, let's shift [null, <Sheet 1>, <Sheet 2>]
+            if !xml.sheets[0]
+              xml.sheets.shift
+              xml.sheet_rels.shift
             end
           end
         end
@@ -105,7 +165,7 @@ module SimpleXlsxReader
       def load_sheets
         sheet_toc.each_with_index.map do |(sheet_name, _sheet_number), i|
-          parse_sheet(sheet_name, xml.sheets[i])  # sheet_number is *not* the index into xml.sheets
+          parse_sheet(sheet_name, xml.sheets[i], xml.sheet_rels[i])  # sheet_number is *not* the index into xml.sheets
         end
       end
@@ -121,9 +181,10 @@ module SimpleXlsxReader
         end
       end
-      def parse_sheet(sheet_name, xsheet)
+      def parse_sheet(sheet_name, xsheet, xrels)
         sheet = Sheet.new(sheet_name)
         sheet_width, sheet_height = *sheet_dimensions(xsheet)
+        cells_w_links = xsheet.xpath('//hyperlinks/hyperlink').inject({}) {|acc, e| acc[e.attr(:ref)] = e.attr(:id); acc}
         sheet.rows = Array.new(sheet_height) { Array.new(sheet_width) }
         xsheet.xpath("/worksheet/sheetData/row/c").each do |xcell|
@@ -148,10 +209,21 @@ module SimpleXlsxReader
           # by about 60%. Odd.
           xvalue = type == 'inlineStr' ?
             (xis = xcell.children.find {|c| c.name == 'is'}) && xis.children.find {|c| c.name == 't'} :
-            xcell.children.find {|c| c.name == 'v'}
+            xcell.children.find {|c| c.name == 'f' && c.text.start_with?('HYPERLINK(') || c.name == 'v'}
+          if xvalue
+            value = xvalue.text.strip
+            if rel_id = cells_w_links[xcell.attr('r')] # a hyperlink made via GUI
+              url = xrels.at_xpath(%(//*[@Id="#{rel_id}"])).attr('Target')
+            elsif xvalue.name == 'f' # only time we have a function is if it's a hyperlink
+              url = value.slice(/HYPERLINK\("(.*?)"/, 1)
+            end
+          end
           cell = begin
-            self.class.cast(xvalue && xvalue.text.strip, type, style,
+            self.class.cast(value, type, style,
+                            :url => url,
                             :shared_strings => shared_strings,
                             :base_date => base_date)
           rescue => e
@@ -218,11 +290,13 @@ module SimpleXlsxReader
       # 'ABA' = 26 * 26 * 1 + 26 * 2  + 1
       # 'BZA' = 26 * 26 * 2 + 26 * 26 + 1
       def column_letter_to_number(column_letter)
-        pow = -1
-        column_letter.codepoints.reverse.inject(0) do |acc, charcode|
-          pow += 1
-          acc + 26**pow * (charcode - 64)
+        pow = column_letter.length - 1
+        result = 0
+        column_letter.each_byte do |b|
+          result += 26**pow * (b - 64)
+          pow -= 1
         end
+        result
       end
       # Excel doesn't record types for some cells, only its display style, so
@@ -241,21 +315,32 @@ module SimpleXlsxReader
       # type.
       def style_types
         @style_types ||=
-          xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
-            style_type_by_num_fmt_id(xstyle.attributes['numFmtId'].value)}
+            xml.styles.xpath('/styleSheet/cellXfs/xf').map {|xstyle|
+              style_type_by_num_fmt_id(num_fmt_id(xstyle))}
+      end
+      #returns the numFmtId value if it's available
+      def num_fmt_id(xstyle)
+        if xstyle.attributes['numFmtId']
+          xstyle.attributes['numFmtId'].value
+        else
+          nil
+        end
       end
       # Finds the type we think a style is; For example, fmtId 14 is a date
-      # style, so this would return :date
+      # style, so this would return :date.
+      #
+      # Note, custom styles usually (are supposed to?) have a numFmtId >= 164,
+      # but in practice can sometimes be simply out of the usual "Any Language"
+      # id range that goes up to 49. For example, I have seen a numFmtId of
+      # 59 specified as a date. In Thai, 59 is a number format, so this seems
+      # like a bad idea, but we try to be flexible and just go with it.
       def style_type_by_num_fmt_id(id)
         return nil if id.nil?
         id = id.to_i
-        if id >= 164 # custom style, arg!
-          custom_style_types[id]
-        else # we should know this one
-          NumFmtMap[id]
-        end
+        NumFmtMap[id] || custom_style_types[id]
       end
       # Map of (numFmtId >= 164) (custom styles) to our best guess at the type
@@ -314,7 +399,7 @@ module SimpleXlsxReader
           type = style
         end
-        case type
+        casted = case type
         ##
         # There are few built-in types
@@ -347,15 +432,15 @@ module SimpleXlsxReader
         # the trickiest. note that  all these formats can vary on
         # whether they actually contain a date, time, or datetime.
         when :date, :time, :date_time
-          days_since_date_system_start, fraction_of_24 = value.split('.')
+          value = Float(value)
+          days_since_date_system_start = value.to_i
+          fraction_of_24 = value - days_since_date_system_start
           # http://stackoverflow.com/questions/10559767/how-to-convert-ms-excel-date-from-float-to-date-format-in-ruby
-          date = options.fetch(:base_date, DATE_SYSTEM_1900) + Integer(days_since_date_system_start)
-          if fraction_of_24 # there is a time associated
-            fraction_of_24 = "0.#{fraction_of_24}".to_f
-            seconds        = (fraction_of_24 * 86400).round
+          date = options.fetch(:base_date, DATE_SYSTEM_1900) + days_since_date_system_start
+          if fraction_of_24 > 0 # there is a time associated
+            seconds = (fraction_of_24 * 86400).round
             return Time.utc(date.year, date.month, date.day) + seconds
           else
             return date
@@ -374,6 +459,12 @@ module SimpleXlsxReader
         else
           value
         end
+        if options[:url]
+          Hyperlink.new(options[:url], casted)
+        else
+          casted
+        end
       end
       ## Returns the base_date from which to calculate dates.

data/simple_xlsx_reader.gemspec CHANGED Viewed

@@ -7,19 +7,21 @@ Gem::Specification.new do |gem|
   gem.name          = "simple_xlsx_reader"
   gem.version       = SimpleXlsxReader::VERSION
   gem.authors       = ["Woody Peterson"]
-  gem.email         = ["woody@sigby.com"]
+  gem.email         = ["woody.peterson@gmail.com"]
   gem.description   = %q{Read xlsx data the Ruby way}
   gem.summary       = %q{Read xlsx data the Ruby way}
   gem.homepage      = ""
+  gem.license       = "MIT"
   gem.add_dependency 'nokogiri'
   gem.add_dependency 'rubyzip'
   gem.add_development_dependency 'minitest', '>= 5.0'
+  gem.add_development_dependency 'rake'
   gem.add_development_dependency 'pry'
   gem.files         = `git ls-files`.split($/)
   gem.executables   = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
-  gem.test_files    = gem.files.grep(%r{^(test|spec|features)/})
+  gem.test_files    = gem.files.grep(%r{^test/})
   gem.require_paths = ["lib"]
 end

data/test/date1904_test.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'test_helper'
+require_relative 'test_helper'
 describe SimpleXlsxReader do
   let(:date1904_file) { File.join(File.dirname(__FILE__), 'date1904.xlsx') }

data/test/datetime_test.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'test_helper'
+require_relative 'test_helper'
 describe SimpleXlsxReader do
   let(:datetimes_file) { File.join(File.dirname(__FILE__),
@@ -11,7 +11,8 @@ describe SimpleXlsxReader do
       "Datetimes" =>
         [[Time.parse("2013-08-19 18:29:59 UTC")],
          [Time.parse("2013-08-19 18:30:00 UTC")],
-         [Time.parse("2013-08-19 18:30:01 UTC")]]
+         [Time.parse("2013-08-19 18:30:01 UTC")],
+         [Time.parse("1899-12-30 00:30:00 UTC")]]
     })
   end

data/test/datetimes.xlsx CHANGED Viewed

Binary file

data/test/gdocs_sheet.xlsx ADDED Viewed

Binary file

data/test/gdocs_sheet_test.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require_relative 'test_helper'
+require 'time'
+describe SimpleXlsxReader do
+  let(:one_sheet_file) { File.join(File.dirname(__FILE__), 'gdocs_sheet.xlsx') }
+  let(:subject) { SimpleXlsxReader::Document.new(one_sheet_file) }
+  it 'able to load file from google docs' do
+    subject.to_hash.must_equal({
+      "List 1" => [["Empty gdocs list 1"]],
+      "List 2" => [["Empty gdocs list 2"]]
+    })
+  end
+end

data/test/lower_case_sharedstrings.xlsx ADDED Viewed

Binary file

data/test/lower_case_sharedstrings_test.rb ADDED Viewed

@@ -0,0 +1,15 @@
+require_relative 'test_helper'
+describe SimpleXlsxReader do
+  let(:lower_case_shared_strings) { File.join(File.dirname(__FILE__),
+                                                'lower_case_sharedstrings.xlsx') }
+  let(:subject) { SimpleXlsxReader::Document.new(lower_case_shared_strings) }
+  describe '#to_hash' do
+    it 'should have the word Well in the first row' do
+      subject.sheets.first.rows[0].must_include('Well')
+    end
+  end
+end

data/test/performance_test.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'test_helper'
+require_relative 'test_helper'
 require 'minitest/benchmark'
 describe 'SimpleXlsxReader Benchmark' do
@@ -96,13 +96,13 @@ describe 'SimpleXlsxReader Benchmark' do
     bench_exp(1,10000)
   end
-  bench_performance_linear 'parses sheets in linear time', 0.9999 do |n|
+  bench_performance_linear 'parses sheets in linear time', 0.999 do |n|
     raise "not enough sample data; asked for #{n}, only have #{@xml.sheets.size}"\
       if @xml.sheets[n].nil?
     sheet = SimpleXlsxReader::Document::Mapper.new(@xml).
-      parse_sheet('test', @xml.sheets[n])
+      parse_sheet('test', @xml.sheets[n], nil)
     raise "sheet didn't parse correctly; expected #{n + 1} rows, got #{sheet.rows.size}"\
       if sheet.rows.size != n + 1

data/test/sesame_street_blog.xlsx CHANGED Viewed

Binary file

data/test/simple_xlsx_reader_test.rb CHANGED Viewed

@@ -1,26 +1,66 @@
-require 'test_helper'
+require_relative 'test_helper'
 require 'time'
+SXR = SimpleXlsxReader
 describe SimpleXlsxReader do
-  let(:sesame_street_blog_file) { File.join(File.dirname(__FILE__),
-                                            'sesame_street_blog.xlsx') }
-  let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file) }
-  describe '#to_hash' do
-    it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
-      subject.to_hash.must_equal({
-        "Authors"=>
-          [["Name", "Occupation"],
-           ["Big Bird", "Teacher"]],
-        "Posts"=>
-          [["Author Name", "Title", "Body", "Created At", "Comment Count"],
-           ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1],
-           ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2],
-           ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0],
-           ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil]]
-      })
+  let(:sesame_street_blog_file_path) { File.join(File.dirname(__FILE__), 'sesame_street_blog.xlsx') }
+  let(:sesame_street_blog_io) { File.new(sesame_street_blog_file_path) }
+  let(:expected_result) do
+    {
+      "Authors"=>
+        [["Name", "Occupation"],
+          ["Big Bird", "Teacher"]],
+      "Posts"=>
+        [["Author Name", "Title", "Body", "Created At", "Comment Count", "URL"],
+          ["Big Bird", "The Number 1", "The Greatest", Time.parse("2002-01-01 11:00:00 UTC"), 1, SXR::Hyperlink.new("http://www.example.com/hyperlink-function", "This uses the HYPERLINK() function")],
+          ["Big Bird", "The Number 2", "Second Best", Time.parse("2002-01-02 14:00:00 UTC"), 2, SXR::Hyperlink.new("http://www.example.com/hyperlink-gui", "This uses the hyperlink GUI option")],
+          ["Big Bird", "Formula Dates", "Tricky tricky", Time.parse("2002-01-03 14:00:00 UTC"), 0, nil],
+          ["Empty Eagress", nil, "The title, date, and comment have types, but no values", nil, nil, nil]]
+    }
+  end
+  describe SimpleXlsxReader do
+    describe 'load from file path' do
+      let(:subject) { SimpleXlsxReader.open(sesame_street_blog_file_path) }
+      it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
+        subject.to_hash.must_equal(expected_result)
+      end
+    end
+    describe 'load from buffer' do
+      let(:subject) { SimpleXlsxReader.parse(sesame_street_blog_io) }
+      it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
+        subject.to_hash.must_equal(expected_result)
+      end
+    end
+  end
+  describe SimpleXlsxReader::Document do
+    describe 'load from file path' do
+      let(:subject) { SimpleXlsxReader::Document.new(file_path: sesame_street_blog_file_path) }
+      it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
+        subject.to_hash.must_equal(expected_result)
+      end
+    end
+    describe 'load from buffer' do
+      let(:subject) { SimpleXlsxReader::Document.new(string_or_io: sesame_street_blog_io) }
+      it 'reads an xlsx buffer into a hash of {[sheet name] => [data]}' do
+        subject.to_hash.must_equal(expected_result)
+      end
+    end
+    describe 'load from file path (legacy API)' do
+      let(:subject) { SimpleXlsxReader::Document.new(sesame_street_blog_file_path) }
+      it 'reads an xlsx file into a hash of {[sheet name] => [data]}' do
+        subject.to_hash.must_equal(expected_result)
+      end
     end
   end
@@ -63,10 +103,33 @@ describe SimpleXlsxReader do
           must_equal Time.parse('2013-08-19 18:30 UTC')
       end
+      it 'reads less-than-zero complex number types styled as times' do
+        described_class.cast('6.25E-2', 'n', :time).
+          must_equal Time.parse('1899-12-30 01:30:00 UTC')
+      end
       it 'reads number types styled as date_times' do
         described_class.cast('41505.77083', 'n', :date_time).
           must_equal Time.parse('2013-08-19 18:30 UTC')
       end
+      it 'raises when date-styled values are not numerical' do
+        lambda { described_class.cast('14 is not a valid date', nil, :date) }.
+          must_raise(ArgumentError)
+      end
+      describe "with the url option" do
+        let(:url) { "http://www.example.com/hyperlink" }
+        it 'creates a hyperlink with a string type' do
+          described_class.cast("A link", 'str', :string, url: url).
+            must_equal SXR::Hyperlink.new(url, "A link")
+        end
+        it 'creates a hyperlink with a shared string type' do
+          described_class.cast("2", 's', nil, shared_strings: ['a','b','c'], url: url).
+            must_equal SXR::Hyperlink.new(url, 'c')
+        end
+      end
     end
     describe '#shared_strings' do
@@ -102,6 +165,13 @@ describe SimpleXlsxReader do
       it 'reads custom formatted styles (numFmtId >= 164)' do
         mapper.style_types[1].must_equal :date_time
+        mapper.custom_style_types[164].must_equal :date_time
+      end
+      # something I've seen in the wild; don't think it's correct, but let's be flexible.
+      it 'reads custom formatted styles given an id < 164, but not explicitly defined in the SpreadsheetML spec' do
+        mapper.style_types[2].must_equal :date_time
+        mapper.custom_style_types[59].must_equal :date_time
       end
     end
@@ -246,16 +316,55 @@ describe SimpleXlsxReader do
       it 'raises if configuration.catch_cell_load_errors' do
         SimpleXlsxReader.configuration.catch_cell_load_errors = false
-        lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first) }.
+        lambda { described_class.new(xml).parse_sheet('test', xml.sheets.first, nil) }.
           must_raise(SimpleXlsxReader::CellLoadError)
       end
       it 'records a load error if not configuration.catch_cell_load_errors' do
         SimpleXlsxReader.configuration.catch_cell_load_errors = true
-        sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first)
-        sheet.load_errors[[0,0]].must_include 'invalid value for Integer'
+        sheet = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil)
+        sheet.load_errors[[0,0]].must_include 'invalid value for Float'
+      end
+    end
+    describe "missing numFmtId attributes" do
+      let(:xml) do
+        SimpleXlsxReader::Document::Xml.new.tap do |xml|
+          xml.sheets = [Nokogiri::XML(
+                            <<-XML
+            <worksheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+              <dimension ref="A1:A1" />
+              <sheetData>
+                <row>
+                  <c r='A1' s='s'>
+                    <v>some content</v>
+                  </c>
+                </row>
+              </sheetData>
+            </worksheet>
+                        XML
+                        ).remove_namespaces!]
+          xml.styles = Nokogiri::XML(
+              <<-XML
+            <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+            </styleSheet>
+          XML
+          ).remove_namespaces!
+        end
+      end
+      before do
+        @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows[0]
       end
+      it 'continues even when cells are missing numFmtId attributes ' do
+        @row[0].must_equal 'some content'
+      end
     end
     describe 'parsing types' do
@@ -284,8 +393,21 @@ describe SimpleXlsxReader do
                     <c r='G1' t='inlineStr' s='0'>
                       <is><t>Cell G1</t></is>
                     </c>
+                    <c r='H1' s='0'>
+                      <f>HYPERLINK("http://www.example.com/hyperlink-function", "HYPERLINK function")</f>
+                      <v>HYPERLINK function</v>
+                    </c>
+                    <c r='I1' s='0'>
+                      <v>GUI-made hyperlink</v>
+                    </c>
                   </row>
                 </sheetData>
+                <hyperlinks>
+                  <hyperlink ref="I1" id="rId1"/>
+                </hyperlinks>
               </worksheet>
             XML
           ).remove_namespaces!]
@@ -303,11 +425,28 @@ describe SimpleXlsxReader do
               </styleSheet>
             XML
           ).remove_namespaces!
+          # Although not a "type" or "style" according to xlsx spec,
+          # it sure could/should be, so let's test it with the rest of our
+          # typecasting code.
+          xml.sheet_rels = [Nokogiri::XML(
+            <<-XML
+              <Relationships>
+                <Relationship
+                  Id="rId1"
+                  Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"
+                  Target="http://www.example.com/hyperlink-gui"
+                  TargetMode="External"
+                />
+              </Relationships>
+            XML
+          ).remove_namespaces!]
         end
       end
       before do
-        @row = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows[0]
+        @row = described_class.new(xml).parse_sheet('test', xml.sheets.first, xml.sheet_rels.first).rows[0]
       end
       it "reads 'Generic' cells as strings" do
@@ -341,6 +480,18 @@ describe SimpleXlsxReader do
       it "reads strings formatted as inlineStr" do
         @row[6].must_equal 'Cell G1'
       end
+      it "reads hyperlinks created via HYPERLINK()" do
+        @row[7].must_equal(
+          SXR::Hyperlink.new(
+            "http://www.example.com/hyperlink-function", "HYPERLINK function"))
+      end
+      it "reads hyperlinks created via the GUI" do
+        @row[8].must_equal(
+          SXR::Hyperlink.new(
+            "http://www.example.com/hyperlink-gui", "GUI-made hyperlink"))
+      end
     end
     describe 'parsing documents with blank rows' do
@@ -389,7 +540,7 @@ describe SimpleXlsxReader do
       end
       before do
-        @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first).rows
+        @rows = described_class.new(xml).parse_sheet('test', xml.sheets.first, nil).rows
       end
       it "reads row data despite gaps in row numbering" do

data/test/styles.xml CHANGED Viewed

@@ -1,6 +1,7 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
 <styleSheet xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac" mc:Ignorable="x14ac">
-  <numFmts count="1">
+  <numFmts count="2">
+    <numFmt numFmtId="59" formatCode="dd/mm/yyyy"/>
     <numFmt numFmtId="164" formatCode="[$-409]m/d/yy\ h:mm\ AM/PM;@"/>
   </numFmts>
   <fonts count="3" x14ac:knownFonts="1">
@@ -50,9 +51,10 @@
     <xf numFmtId="0" fontId="1" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
     <xf numFmtId="0" fontId="2" fillId="0" borderId="0" applyNumberFormat="0" applyFill="0" applyBorder="0" applyAlignment="0" applyProtection="0"/>
   </cellStyleXfs>
-  <cellXfs count="3">
+  <cellXfs count="4">
     <xf numFmtId="0" fontId="0" fillId="0" borderId="0" xfId="0"/>
     <xf numFmtId="164" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
+    <xf numFmtId="59" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
     <xf numFmtId="1" fontId="0" fillId="0" borderId="0" xfId="0" applyNumberFormat="1"/>
   </cellXfs>
   <cellStyles count="3">

data/test/test_helper.rb CHANGED Viewed

@@ -2,6 +2,7 @@ gem 'minitest'
 require 'minitest/autorun'
 require 'minitest/spec'
 require 'pry'
+require 'time'
 $:.unshift File.expand_path("lib")
 require 'simple_xlsx_reader'

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: simple_xlsx_reader
 version: !ruby/object:Gem::Version
-  version: 1.0.1
+  version: 1.0.5
 platform: ruby
 authors:
 - Woody Peterson
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-09-02 00:00:00.000000000 Z
+date: 2022-05-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -52,6 +52,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '5.0'
+- !ruby/object:Gem::Dependency
+  name: rake
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: pry
   requirement: !ruby/object:Gem::Requirement
@@ -68,12 +82,13 @@ dependencies:
         version: '0'
 description: Read xlsx data the Ruby way
 email:
-- woody@sigby.com
+- woody.peterson@gmail.com
 executables: []
 extensions: []
 extra_rdoc_files: []
 files:
 - ".gitignore"
+- ".travis.yml"
 - CHANGELOG.md
 - Gemfile
 - LICENSE.txt
@@ -86,6 +101,10 @@ files:
 - test/date1904_test.rb
 - test/datetime_test.rb
 - test/datetimes.xlsx
+- test/gdocs_sheet.xlsx
+- test/gdocs_sheet_test.rb
+- test/lower_case_sharedstrings.xlsx
+- test/lower_case_sharedstrings_test.rb
 - test/performance_test.rb
 - test/sesame_street_blog.xlsx
 - test/shared_strings.xml
@@ -93,7 +112,8 @@ files:
 - test/styles.xml
 - test/test_helper.rb
 homepage: ''
-licenses: []
+licenses:
+- MIT
 metadata: {}
 post_install_message:
 rdoc_options: []
@@ -110,8 +130,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubyforge_project:
-rubygems_version: 2.2.0
+rubygems_version: 3.1.6
 signing_key:
 specification_version: 4
 summary: Read xlsx data the Ruby way
@@ -120,6 +139,10 @@ test_files:
 - test/date1904_test.rb
 - test/datetime_test.rb
 - test/datetimes.xlsx
+- test/gdocs_sheet.xlsx
+- test/gdocs_sheet_test.rb
+- test/lower_case_sharedstrings.xlsx
+- test/lower_case_sharedstrings_test.rb
 - test/performance_test.rb
 - test/sesame_street_blog.xlsx
 - test/shared_strings.xml