RubyGems - xsv - Versions diffs - 0.2.0 → 0.2.1 - Mend

xsv 0.2.0 → 0.2.1

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 47e4ee16a95b100a1c1bbc526912235bbd1386601a33e3c3449320fe4ea8bc52
-  data.tar.gz: 5b0f8320ff29a3dd036cf4396052cbcee00556c2f116dce63c81eb6f4bb69e2a
+  metadata.gz: ace53a58655a50f4de2f10c4a2d68819774e3e74625595353290502299630b30
+  data.tar.gz: 8278ea7b26ac261781ae71328762104db9d14de0cbc05be1b0551a3cb876ea35
 SHA512:
-  metadata.gz: 40bde712c1df13d4fd330b24fdd3cb9e40e5983b4271c172b6f77245aa4d57b8aadc470be5faffb5c759e807c6bf5a507f87139279b86395cddf0fc70b5446fb
-  data.tar.gz: 943f5436b416f226f8bcfa2a856dc8079f061be20ca8cc534a9e147692906a280f85f6eb300b7c4e4681ce70a6c5bb81029c6abb70deab87950919e21f03104a
+  metadata.gz: 31ccd6261073893e0a0873997d7befa39fc0f8088f5f4d9d1ec3f97b7d90f48d1153714cff4f124a4be853976ce714326a6ec8634e646f15ecf6cf428c245a39
+  data.tar.gz: eed43d77052870dc45bc009719b130096a05a6add11576a349ca027ff93b428bfe64813dd83a8ea58a1a7da94aa8b490b4394efa65bbb175ae09dd74c77ee20a

data/.gitignore CHANGED Viewed

@@ -6,3 +6,9 @@
 /pkg/
 /spec/reports/
 /tmp/
+.DS_Store
+/inspect*
+/dump*
+/*.gem

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    xsv (0.1.2)
+    xsv (0.2.0)
       nokogiri (~> 1.10)
       rubyzip (~> 2.2)

data/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 Xsv is a very basic parser for Excel files in the .xlsx format that strives to
 provide feature parity with common CSV readers and nothing more. This means
-it only parses values to basic Ruby types and does not deal with formatting
+it only parses values to basic Ruby types and does not deal with most formatting
 or more advanced functionality. The goal is to allow for fast parsing of large
 worksheets with minimal RAM and CPU consumption.
@@ -68,6 +68,24 @@ sheet[1] # => {"header1" => "value1", "header2" => "value2"}
 Be aware that hash mode will lead to unpredictable results if you have multiple
 columns with the same name!
+### Assumptions
+Since Xsv treats worksheets like csv files it makes certain assumptions about your
+sheet:
+- In array mode, your data starts on the first row
+- In has mode the first row of the sheet contains headers, followed by rows of data
+If your data or headers does not start on the first row of the sheet you can
+tell Xsv to skip a number of rows:
+```ruby
+workbook.sheets[0].row_skip = 1
+```
+All operations will honour this offset, making the skipped rows unreachable.
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/lib/xsv/helpers.rb CHANGED Viewed

@@ -34,8 +34,13 @@ module Xsv
       49 => "@",
     }
+    MINUTE = 60
+    HOUR = 3600
     # Return the index number for the given Excel column name
     def column_index(col)
+      col = col.scan(/^[A-Z]+/).first
       val = 0
       while col.length > 0
         val *= 26
@@ -52,12 +57,35 @@ module Xsv
     # Return a time as a string for the given Excel time value
     def parse_time(number)
+      # Disregard date part
+      if number > 0
+        number = number - number.truncate
+      end
       base = number * 24
       hours = base.truncate
-      minutes = (base - hours) * 60
+      minutes = ((base - hours) * 60).round
-      "%02d:%02d" % [base, minutes.round]
+      # Compensate for rounding errors
+      if minutes >= 60
+        hours = hours + (minutes / 60)
+        minutes = minutes % 60
+      end
+      "%02d:%02d" % [hours, minutes]
+    end
+    def parse_datetime(number)
+      date_base = number.truncate
+      time = parse_date(date_base).to_time
+      time_base = (number - date_base) * 24
+      hours = time_base.truncate
+      minutes = (time_base - hours) * 60
+      time + hours * HOUR + minutes.round * MINUTE
     end
     def parse_number(string)
@@ -68,6 +96,11 @@ module Xsv
       end
     end
+    # Tests if the given format string includes both date and time
+    def is_datetime_format?(format)
+      is_date_format?(format) && is_time_format?(format)
+    end
     # Tests if the given format string is a date
     def is_date_format?(format)
       return false if format.nil?

data/lib/xsv/sheet.rb CHANGED Viewed

@@ -4,16 +4,34 @@ module Xsv
     attr_reader :xml, :mode
+    # Set a number of rows to skip at the top of the sheet (header row offset)
+    attr_accessor :row_skip
     def initialize(workbook, xml)
       @workbook = workbook
       @xml = xml
       @headers = []
+      @mode = :array
+      @row_skip = 0
-      # Determine number of columns
-      bounds = @xml.css("cols col").map { |c| [c["min"].to_i, c["max"].to_i] }.flatten
-      @column_count = (bounds.max - bounds.min) + 1
+      dimension = xml.css("dimension").first
-      @mode = :array
+      if dimension
+        _firstCell, lastCell = dimension["ref"].split(":")
+      end
+      if lastCell
+        # Assume the dimension reflects the content
+        @column_count = column_index(lastCell) + 1
+      else
+        # Find the last cell in every row that has a value
+        rightmost_cells = @xml.xpath("//xmlns:row/xmlns:c[*[local-name() = 'v']][last()]").map { |c| column_index(c["r"]) }
+        @column_count = rightmost_cells.max + 1
+      end
+      # Find the last row that contains actual values
+      @last_row = @xml.xpath("//xmlns:row[*[xmlns:v]][last()]").first["r"].to_i
     end
     def inspect
@@ -22,19 +40,28 @@ module Xsv
     # Iterate over rows
     def each_row
-      row_index = 0
+      row_index = 0 - @row_skip
       @xml.css("sheetData row").each do |row_xml|
+        if row_index < 0
+          row_index += 1
+          next
+        end
         row_index += 1
         next if row_index == 1 && @mode == :hash
         # pad empty rows
-        while row_index < row_xml["r"].to_i do
+        while row_index < row_xml["r"].to_i - @row_skip do
           yield(empty_row)
           row_index += 1
         end
         yield(parse_row(row_xml))
+        # Do not return empty trailing rows
+        break if row_index == @last_row - @row_skip
       end
       true
@@ -42,7 +69,7 @@ module Xsv
     # Get row by number, starting at 0
     def [](number)
-      row_xml = xml.css("sheetData row[r=#{number + 1}]").first
+      row_xml = xml.css("sheetData row[r=#{number + @row_skip + 1}]").first
       if row_xml
         parse_row(row_xml)
@@ -55,7 +82,7 @@ module Xsv
     # all methods return hashes instead of arrays
     def parse_headers!
       @mode = :array
-      parse_headers
+      @headers = parse_headers
       @mode = :hash
@@ -65,7 +92,7 @@ module Xsv
     private
     def parse_headers
-      @headers = parse_row(@xml.css("sheetData row").first)
+      parse_row(@xml.css("sheetData row")[@row_skip])
     end
     def empty_row
@@ -80,7 +107,7 @@ module Xsv
     def parse_row(xml)
       row = empty_row
-      xml.css("c").each do |c_xml|
+      xml.css("c").first(@column_count).each do |c_xml|
         value = case c_xml["t"]
           when "s"
             @workbook.shared_strings[c_xml.css("v").inner_text.to_i]
@@ -89,29 +116,36 @@ module Xsv
           when "e" # N/A
             nil
           when nil
-            value = parse_number(c_xml.css("v").inner_text)
+            v = c_xml.css("v").first
+            if v.nil?
+              nil
+            elsif c_xml["s"]
+              value = parse_number(v.inner_text)
-            if c_xml["s"]
               style = @workbook.xfs[c_xml["s"].to_i]
               numFmtId = style[:numFmtId].to_i
+              numFmt = @workbook.numFmts[numFmtId]
               if numFmtId == 0
                 value
-              elsif is_date_format?(@workbook.numFmts[numFmtId])
+              elsif is_datetime_format?(numFmt)
+                parse_datetime(value)
+              elsif is_date_format?(numFmt)
                 parse_date(value)
-              elsif is_time_format?(@workbook.numFmts[numFmtId])
+              elsif is_time_format?(numFmt)
                 parse_time(value)
               else
                 value
               end
             else
-              value
+              parse_number(v.inner_text)
             end
           else
             raise Xsv::Error, "Encountered unknown column type #{c_xml["t"]}"
           end
         # Determine column position and pad row with nil values
-        col_index = column_index(c_xml["r"].scan(/^[A-Z]+/).first)
+        col_index = column_index(c_xml["r"])
         case @mode
         when :array

data/lib/xsv/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Xsv
-  VERSION = "0.2.0"
+  VERSION = "0.2.1"
 end

data/lib/xsv/workbook.rb CHANGED Viewed

@@ -57,8 +57,8 @@ module Xsv
     end
     def fetch_sheets
-      @zip.glob("xl/worksheets/sheet*.xml").sort do |entry|
-        entry.name.scan(/\d+/).first.to_i
+      @zip.glob("xl/worksheets/sheet*.xml").sort do |a, b|
+        a.name.scan(/\d+/).first.to_i <=> b.name.scan(/\d+/).first.to_i
       end.each do |entry|
         @sheets << Xsv::Sheet.new(self, Nokogiri::XML(entry.get_input_stream))
       end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: xsv
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.2.1
 platform: ruby
 authors:
 - Martijn Storck