RubyGems - fastercsv - Versions diffs - 1.2.0 → 1.2.1 - Mend

fastercsv 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

data/CHANGELOG CHANGED

@@ -2,6 +2,15 @@
 Below is a complete listing of changes for each revision of FasterCSV.
+== 1.2.1
+* Worked around an odd incompatibility with the Regexps used to remove line
+  endings in some (seemingly rare) Ruby environments.
+* Made FasterCSV::lineno() writer aware.
+* Support Hashes in FasterCSV#<<(), by detecting headers when writing.
+* Added limited support for switching the quote character.
+* Refining Data and DateTime matching for built-in convertors.
 == 1.2.0
 * Added the FasterCSV::table() shortcut.

data/LICENSE CHANGED

@@ -1,6 +1,6 @@
 = License Terms
-Distributed under the user's choice of the GPL[http://www.gnu.org/copyleft/gpl.html] (see COPYING for details) or the
+Distributed under the user's choice of the {GPL Version 2}[http://www.gnu.org/licenses/old-licenses/gpl-2.0.html] (see COPYING for details) or the
 {Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
 James Edward Gray II.

data/README CHANGED

@@ -55,6 +55,16 @@ See FasterCSV for documentation.
 See the INSTALL file for instructions.
+== What is CSV, really?
+FasterCSV maintains a pretty strict definition of CSV taken directly from {the RFC}[http://www.ietf.org/rfc/rfc4180.txt].  I relax the rules in only one place and that is to make using this library easier.  FasterCSV will parse all valid CSV.
+What you don't want to do is feed FasterCSV invalid CSV.  Because of the way the CSV format works, it's common for a parser to need to read until the end of the file to be sure a field is invalid.  This eats a lot of time and memory.
+Luckily, when working with invalid CSV, Ruby's built-in methods will almost always be superior in every way.  For example, parsing non-quoted fields is as easy as:
+  data.split(",")
 == Questions and/or Comments
 Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with

data/Rakefile CHANGED

@@ -45,6 +45,8 @@ task :benchmark do
   path = "test/test_data.csv"
 	sh %Q{time ruby -r csv -e } +
 	   %Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
+	sh %Q{time ruby -r lightcsv -e } +
+	   %Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
 	sh %Q{time ruby -r lib/faster_csv -e } +
 	   %Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
 end

data/lib/faster_csv.rb CHANGED

@@ -75,7 +75,7 @@ require "stringio"
 #
 class FasterCSV
   # The version of the installed library.
-  VERSION = "1.2.0".freeze
+  VERSION = "1.2.1".freeze
   #
   # A FasterCSV::Row is part Array and part Hash.  It retains an order for the
@@ -712,6 +712,13 @@ class FasterCSV
   #
   FieldInfo = Struct.new(:index, :line, :header)
+  # A Regexp used to find and convert some common Date formats.
+  DateMatcher     = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
+                            \d{4}-\d{2}-\d{2} )\z /x
+  # A Regexp used to find and convert some common DateTime formats.
+  DateTimeMatcher =
+    / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
+            \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
   #
   # This Hash holds the built-in converters of FasterCSV that can be accessed by
   # name.  You can select Converters with FasterCSV.convert() or through the
@@ -732,12 +739,16 @@ class FasterCSV
   # To add a combo field, the value should be an Array of names.  Combo fields
   # can be nested with other combo fields.
   #
-  Converters = { :integer   => lambda { |f| Integer(f)        rescue f },
-                 :float     => lambda { |f| Float(f)          rescue f },
-                 :numeric   => [:integer, :float],
-                 :date      => lambda { |f| Date.parse(f)     rescue f },
-                 :date_time => lambda { |f| DateTime.parse(f) rescue f },
-                 :all       => [:date_time, :numeric] }
+  Converters  = { :integer   => lambda { |f| Integer(f)        rescue f },
+                  :float     => lambda { |f| Float(f)          rescue f },
+                  :numeric   => [:integer, :float],
+                  :date      => lambda { |f|
+                    f =~ DateMatcher ? (Date.parse(f) rescue f) : f
+                  },
+                  :date_time => lambda { |f|
+                    f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
+                  },
+                  :all       => [:date_time, :numeric] }
   #
   # This Hash holds the built-in header converters of FasterCSV that can be
@@ -768,6 +779,7 @@ class FasterCSV
   #
   # <b><tt>:col_sep</tt></b>::            <tt>","</tt>
   # <b><tt>:row_sep</tt></b>::            <tt>:auto</tt>
+  # <b><tt>:quote_char</tt></b>::         <tt>'"'</tt>
   # <b><tt>:converters</tt></b>::         +nil+
   # <b><tt>:unconverted_fields</tt></b>:: +nil+
   # <b><tt>:headers</tt></b>::            +false+
@@ -778,6 +790,7 @@ class FasterCSV
   #
   DEFAULT_OPTIONS = { :col_sep            => ",",
                       :row_sep            => :auto,
+                      :quote_char         => '"',
                       :converters         => nil,
                       :unconverted_fields => nil,
                       :headers            => false,
@@ -1267,6 +1280,15 @@ class FasterCSV
   #                                       (<tt>$/</tt>) is used.  Obviously,
   #                                       discovery takes a little time.  Set
   #                                       manually if speed is important.
+  # <b><tt>:quote_char</tt></b>::         The character used to quote fields.
+  #                                       This has to be a single character
+  #                                       String.  This is useful for
+  #                                       application that incorrectly use
+  #                                       <tt>'</tt> as the quote character
+  #                                       instead of the correct <tt>"</tt>.
+  #                                       FasterCSV will always consider a
+  #                                       double sequence this character to be
+  #                                       an escaped quote.
   # <b><tt>:converters</tt></b>::         An Array of names from the Converters
   #                                       Hash and/or lambdas that handle custom
   #                                       conversion.  A single converter
@@ -1368,9 +1390,16 @@ class FasterCSV
   # The data source must be open for writing.
   #
   def <<(row)
-    # handle FasterCSV::Row objects
-    row = row.fields if row.is_a? self.class::Row
+    # Handle FasterCSV::Row objects and Hashes
+    row = case row
+      when self.class::Row then row.fields
+      when Hash            then @headers.map { |header| row[header] }
+      else                      row
+    end
+    @headers =  row if header_row?
+    @lineno  += 1
     @io << row.map(&@quote).join(@col_sep) + @row_sep  # quote and separate
     self  # for chaining
@@ -1533,7 +1562,7 @@ class FasterCSV
             end
           end
         else                  # we found a quoted field...
-          $1.gsub('""', '"')  # unescape contents
+          $1.gsub(@quote_char * 2, @quote_char)  # unescape contents
         end
         ""  # gsub!'s replacement, clear the field
       end
@@ -1582,8 +1611,13 @@ class FasterCSV
   #
   def init_separators(options)
     # store the selected separators
-    @col_sep = options.delete(:col_sep)
-    @row_sep = options.delete(:row_sep)
+    @col_sep    = options.delete(:col_sep)
+    @row_sep    = options.delete(:row_sep)
+    @quote_char = options.delete(:quote_char)
+    if @quote_char.length != 1
+      raise ArgumentError, ":quote_char has to be a single character String"
+    end
     # automatically discover row separator when requested
     if @row_sep == :auto
@@ -1620,8 +1654,13 @@ class FasterCSV
     end
     # establish quoting rules
+    do_quote = lambda do |field|
+      @quote_char                                      +
+      String(field).gsub(@quote_char, @quote_char * 2) +
+      @quote_char
+    end
     @quote = if options.delete(:force_quotes)
-      lambda { |field| %Q{"#{String(field).gsub('"', '""')}"} }
+      do_quote
     else
       lambda do |field|
         if field.nil?  # represent +nil+ fields as empty unquoted fields
@@ -1629,8 +1668,9 @@ class FasterCSV
         else
           field = String(field)  # Stringify fields
           # represent empty fields as empty quoted fields
-          if field.empty? or field.count(%Q{\r\n#{@col_sep}"}).nonzero?
-            %Q{"#{field.gsub('"', '""')}"}  # escape quoted fields
+          if field.empty? or
+             field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
+            do_quote.call(field)
           else
             field  # unquoted field
           end
@@ -1645,19 +1685,24 @@ class FasterCSV
     @skip_blanks = options.delete(:skip_blanks)
     # prebuild Regexps for faster parsing
-    @parsers    = {
+    esc_col_sep = Regexp.escape(@col_sep)
+    esc_row_sep = Regexp.escape(@row_sep)
+    esc_quote   = Regexp.escape(@quote_char)
+    @parsers = {
       :leading_fields =>
-        /\A(?:#{Regexp.escape(@col_sep)})+/,     # for empty leading fields
+        /\A(?:#{esc_col_sep})+/,                 # for empty leading fields
       :csv_row        =>
         ### The Primary Parser ###
-        / \G(?:^|#{Regexp.escape(@col_sep)})     # anchor the match
-          (?: "((?>[^"]*)(?>""[^"]*)*)"          # find quoted fields
+        / \G(?:^|#{esc_col_sep})                 # anchor the match
+          (?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
+                            (?> #{esc_quote*2}
+                                [^#{esc_quote}]* )* )#{esc_quote}
               |                                  # ... or ...
-              ([^"#{Regexp.escape(@col_sep)}]*)  # unquoted fields
+              ([^#{esc_quote}#{esc_col_sep}]*)   # unquoted fields
               )/x,
         ### End Primary Parser ###
       :line_end       =>
-        /#{Regexp.escape(@row_sep)}\Z/           # safer than chomp!()
+        /#{esc_row_sep}\z/                       # safer than chomp!()
     }
   end

data/test/tc_data_converters.rb CHANGED

@@ -45,7 +45,10 @@ class TestDataConverters < Test::Unit::TestCase
   def test_builtin_date_converter
     # does convert
-    assert_instance_of(Date, FasterCSV::Converters[:date][@win_safe_time_str])
+    assert_instance_of(
+      Date,
+      FasterCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
+    )
     # does not convert
     assert_instance_of(String, FasterCSV::Converters[:date]["junk"])

data/test/tc_features.rb CHANGED

@@ -59,6 +59,14 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
                                         :row_sep => "\r\n") )
   end
+  def test_quote_char
+    TEST_CASES.each do |test_case|
+      assert_equal( test_case.last.map { |t| t.tr('"', "'") unless t.nil? },
+                    FasterCSV.parse_line( test_case.first.tr('"', "'"),
+                                          :quote_char => "'" ) )
+    end
+  end
   def test_row_sep_auto_discovery
     ["\r\n", "\n", "\r"].each do |line_end|
       data       = "1,2,3#{line_end}4,5#{line_end}"

data/test/tc_interface.rb CHANGED

@@ -89,7 +89,7 @@ class TestFasterCSVInterface < Test::Unit::TestCase
     end
     assert_equal(@expected, data)
   end
-[]
   def test_table
     table = FasterCSV.table(@path, :col_sep => "\t", :row_sep => "\r\n")
     assert_instance_of(FasterCSV::Table, table)
@@ -134,6 +134,44 @@ class TestFasterCSVInterface < Test::Unit::TestCase
     assert_instance_of(String, line)
     assert_equal("1;2;3\n", line)
   end
+  def test_write_header_detection
+    File.unlink(@path)
+    headers = %w{a b c}
+    FasterCSV.open(@path, "w", :headers => true) do |csv|
+      csv << headers
+      csv << %w{1 2 3}
+      assert_equal(headers, csv.instance_variable_get(:@headers))
+    end
+  end
+  def test_write_lineno
+    File.unlink(@path)
+    FasterCSV.open(@path, "w") do |csv|
+      lines = 20
+      lines.times { csv << %w{a b c} }
+      assert_equal(lines, csv.lineno)
+    end
+  end
+  def test_write_hash
+    File.unlink(@path)
+    lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
+    FasterCSV.open( @path, "w", :headers           => true,
+                                :converters        => :all,
+                                :header_converters => :symbol ) do |csv|
+      csv << lines.first.keys
+      lines.each { |line| csv << line }
+    end
+    FasterCSV.open( @path, "w", :headers           => true,
+                                :converters        => :all,
+                                :header_converters => :symbol ) do |csv|
+      csv.each { |line| assert_equal(lines.shift, line.to_hash) }
+    end
+  end
   def test_append  # aliased add_row() and puts()
     File.unlink(@path)

metadata CHANGED

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.9.2
+rubygems_version: 0.9.4
 specification_version: 1
 name: fastercsv
 version: !ruby/object:Gem::Version
-  version: 1.2.0
-date: 2007-02-07 00:00:00 -06:00
+  version: 1.2.1
+date: 2007-09-20 00:00:00 -05:00
 summary: FasterCSV is CSV, but faster, smaller, and cleaner.
 require_paths:
 - lib