fastercsv 1.2.0 → 1.2.1

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -2,6 +2,15 @@
2
2
 
3
3
  Below is a complete listing of changes for each revision of FasterCSV.
4
4
 
5
+ == 1.2.1
6
+
7
+ * Worked around an odd incompatibility with the Regexps used to remove line
8
+ endings in some (seemingly rare) Ruby environments.
9
+ * Made FasterCSV::lineno() writer aware.
10
+ * Support Hashes in FasterCSV#<<(), by detecting headers when writing.
11
+ * Added limited support for switching the quote character.
12
+ * Refining Data and DateTime matching for built-in convertors.
13
+
5
14
  == 1.2.0
6
15
 
7
16
  * Added the FasterCSV::table() shortcut.
data/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  = License Terms
2
2
 
3
- Distributed under the user's choice of the GPL[http://www.gnu.org/copyleft/gpl.html] (see COPYING for details) or the
3
+ Distributed under the user's choice of the {GPL Version 2}[http://www.gnu.org/licenses/old-licenses/gpl-2.0.html] (see COPYING for details) or the
4
4
  {Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
5
5
  James Edward Gray II.
6
6
 
data/README CHANGED
@@ -55,6 +55,16 @@ See FasterCSV for documentation.
55
55
 
56
56
  See the INSTALL file for instructions.
57
57
 
58
+ == What is CSV, really?
59
+
60
+ FasterCSV maintains a pretty strict definition of CSV taken directly from {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one place and that is to make using this library easier. FasterCSV will parse all valid CSV.
61
+
62
+ What you don't want to do is feed FasterCSV invalid CSV. Because of the way the CSV format works, it's common for a parser to need to read until the end of the file to be sure a field is invalid. This eats a lot of time and memory.
63
+
64
+ Luckily, when working with invalid CSV, Ruby's built-in methods will almost always be superior in every way. For example, parsing non-quoted fields is as easy as:
65
+
66
+ data.split(",")
67
+
58
68
  == Questions and/or Comments
59
69
 
60
70
  Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
data/Rakefile CHANGED
@@ -45,6 +45,8 @@ task :benchmark do
45
45
  path = "test/test_data.csv"
46
46
  sh %Q{time ruby -r csv -e } +
47
47
  %Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
48
+ sh %Q{time ruby -r lightcsv -e } +
49
+ %Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
48
50
  sh %Q{time ruby -r lib/faster_csv -e } +
49
51
  %Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
50
52
  end
@@ -75,7 +75,7 @@ require "stringio"
75
75
  #
76
76
  class FasterCSV
77
77
  # The version of the installed library.
78
- VERSION = "1.2.0".freeze
78
+ VERSION = "1.2.1".freeze
79
79
 
80
80
  #
81
81
  # A FasterCSV::Row is part Array and part Hash. It retains an order for the
@@ -712,6 +712,13 @@ class FasterCSV
712
712
  #
713
713
  FieldInfo = Struct.new(:index, :line, :header)
714
714
 
715
+ # A Regexp used to find and convert some common Date formats.
716
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
717
+ \d{4}-\d{2}-\d{2} )\z /x
718
+ # A Regexp used to find and convert some common DateTime formats.
719
+ DateTimeMatcher =
720
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
721
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
715
722
  #
716
723
  # This Hash holds the built-in converters of FasterCSV that can be accessed by
717
724
  # name. You can select Converters with FasterCSV.convert() or through the
@@ -732,12 +739,16 @@ class FasterCSV
732
739
  # To add a combo field, the value should be an Array of names. Combo fields
733
740
  # can be nested with other combo fields.
734
741
  #
735
- Converters = { :integer => lambda { |f| Integer(f) rescue f },
736
- :float => lambda { |f| Float(f) rescue f },
737
- :numeric => [:integer, :float],
738
- :date => lambda { |f| Date.parse(f) rescue f },
739
- :date_time => lambda { |f| DateTime.parse(f) rescue f },
740
- :all => [:date_time, :numeric] }
742
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
743
+ :float => lambda { |f| Float(f) rescue f },
744
+ :numeric => [:integer, :float],
745
+ :date => lambda { |f|
746
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
747
+ },
748
+ :date_time => lambda { |f|
749
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
750
+ },
751
+ :all => [:date_time, :numeric] }
741
752
 
742
753
  #
743
754
  # This Hash holds the built-in header converters of FasterCSV that can be
@@ -768,6 +779,7 @@ class FasterCSV
768
779
  #
769
780
  # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
770
781
  # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
782
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
771
783
  # <b><tt>:converters</tt></b>:: +nil+
772
784
  # <b><tt>:unconverted_fields</tt></b>:: +nil+
773
785
  # <b><tt>:headers</tt></b>:: +false+
@@ -778,6 +790,7 @@ class FasterCSV
778
790
  #
779
791
  DEFAULT_OPTIONS = { :col_sep => ",",
780
792
  :row_sep => :auto,
793
+ :quote_char => '"',
781
794
  :converters => nil,
782
795
  :unconverted_fields => nil,
783
796
  :headers => false,
@@ -1267,6 +1280,15 @@ class FasterCSV
1267
1280
  # (<tt>$/</tt>) is used. Obviously,
1268
1281
  # discovery takes a little time. Set
1269
1282
  # manually if speed is important.
1283
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1284
+ # This has to be a single character
1285
+ # String. This is useful for
1286
+ # application that incorrectly use
1287
+ # <tt>'</tt> as the quote character
1288
+ # instead of the correct <tt>"</tt>.
1289
+ # FasterCSV will always consider a
1290
+ # double sequence this character to be
1291
+ # an escaped quote.
1270
1292
  # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1271
1293
  # Hash and/or lambdas that handle custom
1272
1294
  # conversion. A single converter
@@ -1368,9 +1390,16 @@ class FasterCSV
1368
1390
  # The data source must be open for writing.
1369
1391
  #
1370
1392
  def <<(row)
1371
- # handle FasterCSV::Row objects
1372
- row = row.fields if row.is_a? self.class::Row
1373
-
1393
+ # Handle FasterCSV::Row objects and Hashes
1394
+ row = case row
1395
+ when self.class::Row then row.fields
1396
+ when Hash then @headers.map { |header| row[header] }
1397
+ else row
1398
+ end
1399
+
1400
+ @headers = row if header_row?
1401
+ @lineno += 1
1402
+
1374
1403
  @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1375
1404
 
1376
1405
  self # for chaining
@@ -1533,7 +1562,7 @@ class FasterCSV
1533
1562
  end
1534
1563
  end
1535
1564
  else # we found a quoted field...
1536
- $1.gsub('""', '"') # unescape contents
1565
+ $1.gsub(@quote_char * 2, @quote_char) # unescape contents
1537
1566
  end
1538
1567
  "" # gsub!'s replacement, clear the field
1539
1568
  end
@@ -1582,8 +1611,13 @@ class FasterCSV
1582
1611
  #
1583
1612
  def init_separators(options)
1584
1613
  # store the selected separators
1585
- @col_sep = options.delete(:col_sep)
1586
- @row_sep = options.delete(:row_sep)
1614
+ @col_sep = options.delete(:col_sep)
1615
+ @row_sep = options.delete(:row_sep)
1616
+ @quote_char = options.delete(:quote_char)
1617
+
1618
+ if @quote_char.length != 1
1619
+ raise ArgumentError, ":quote_char has to be a single character String"
1620
+ end
1587
1621
 
1588
1622
  # automatically discover row separator when requested
1589
1623
  if @row_sep == :auto
@@ -1620,8 +1654,13 @@ class FasterCSV
1620
1654
  end
1621
1655
 
1622
1656
  # establish quoting rules
1657
+ do_quote = lambda do |field|
1658
+ @quote_char +
1659
+ String(field).gsub(@quote_char, @quote_char * 2) +
1660
+ @quote_char
1661
+ end
1623
1662
  @quote = if options.delete(:force_quotes)
1624
- lambda { |field| %Q{"#{String(field).gsub('"', '""')}"} }
1663
+ do_quote
1625
1664
  else
1626
1665
  lambda do |field|
1627
1666
  if field.nil? # represent +nil+ fields as empty unquoted fields
@@ -1629,8 +1668,9 @@ class FasterCSV
1629
1668
  else
1630
1669
  field = String(field) # Stringify fields
1631
1670
  # represent empty fields as empty quoted fields
1632
- if field.empty? or field.count(%Q{\r\n#{@col_sep}"}).nonzero?
1633
- %Q{"#{field.gsub('"', '""')}"} # escape quoted fields
1671
+ if field.empty? or
1672
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1673
+ do_quote.call(field)
1634
1674
  else
1635
1675
  field # unquoted field
1636
1676
  end
@@ -1645,19 +1685,24 @@ class FasterCSV
1645
1685
  @skip_blanks = options.delete(:skip_blanks)
1646
1686
 
1647
1687
  # prebuild Regexps for faster parsing
1648
- @parsers = {
1688
+ esc_col_sep = Regexp.escape(@col_sep)
1689
+ esc_row_sep = Regexp.escape(@row_sep)
1690
+ esc_quote = Regexp.escape(@quote_char)
1691
+ @parsers = {
1649
1692
  :leading_fields =>
1650
- /\A(?:#{Regexp.escape(@col_sep)})+/, # for empty leading fields
1693
+ /\A(?:#{esc_col_sep})+/, # for empty leading fields
1651
1694
  :csv_row =>
1652
1695
  ### The Primary Parser ###
1653
- / \G(?:^|#{Regexp.escape(@col_sep)}) # anchor the match
1654
- (?: "((?>[^"]*)(?>""[^"]*)*)" # find quoted fields
1696
+ / \G(?:^|#{esc_col_sep}) # anchor the match
1697
+ (?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
1698
+ (?> #{esc_quote*2}
1699
+ [^#{esc_quote}]* )* )#{esc_quote}
1655
1700
  | # ... or ...
1656
- ([^"#{Regexp.escape(@col_sep)}]*) # unquoted fields
1701
+ ([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
1657
1702
  )/x,
1658
1703
  ### End Primary Parser ###
1659
1704
  :line_end =>
1660
- /#{Regexp.escape(@row_sep)}\Z/ # safer than chomp!()
1705
+ /#{esc_row_sep}\z/ # safer than chomp!()
1661
1706
  }
1662
1707
  end
1663
1708
 
@@ -45,7 +45,10 @@ class TestDataConverters < Test::Unit::TestCase
45
45
 
46
46
  def test_builtin_date_converter
47
47
  # does convert
48
- assert_instance_of(Date, FasterCSV::Converters[:date][@win_safe_time_str])
48
+ assert_instance_of(
49
+ Date,
50
+ FasterCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
51
+ )
49
52
 
50
53
  # does not convert
51
54
  assert_instance_of(String, FasterCSV::Converters[:date]["junk"])
@@ -59,6 +59,14 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
59
59
  :row_sep => "\r\n") )
60
60
  end
61
61
 
62
+ def test_quote_char
63
+ TEST_CASES.each do |test_case|
64
+ assert_equal( test_case.last.map { |t| t.tr('"', "'") unless t.nil? },
65
+ FasterCSV.parse_line( test_case.first.tr('"', "'"),
66
+ :quote_char => "'" ) )
67
+ end
68
+ end
69
+
62
70
  def test_row_sep_auto_discovery
63
71
  ["\r\n", "\n", "\r"].each do |line_end|
64
72
  data = "1,2,3#{line_end}4,5#{line_end}"
@@ -89,7 +89,7 @@ class TestFasterCSVInterface < Test::Unit::TestCase
89
89
  end
90
90
  assert_equal(@expected, data)
91
91
  end
92
- []
92
+
93
93
  def test_table
94
94
  table = FasterCSV.table(@path, :col_sep => "\t", :row_sep => "\r\n")
95
95
  assert_instance_of(FasterCSV::Table, table)
@@ -134,6 +134,44 @@ class TestFasterCSVInterface < Test::Unit::TestCase
134
134
  assert_instance_of(String, line)
135
135
  assert_equal("1;2;3\n", line)
136
136
  end
137
+
138
+ def test_write_header_detection
139
+ File.unlink(@path)
140
+
141
+ headers = %w{a b c}
142
+ FasterCSV.open(@path, "w", :headers => true) do |csv|
143
+ csv << headers
144
+ csv << %w{1 2 3}
145
+ assert_equal(headers, csv.instance_variable_get(:@headers))
146
+ end
147
+ end
148
+
149
+ def test_write_lineno
150
+ File.unlink(@path)
151
+
152
+ FasterCSV.open(@path, "w") do |csv|
153
+ lines = 20
154
+ lines.times { csv << %w{a b c} }
155
+ assert_equal(lines, csv.lineno)
156
+ end
157
+ end
158
+
159
+ def test_write_hash
160
+ File.unlink(@path)
161
+
162
+ lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
163
+ FasterCSV.open( @path, "w", :headers => true,
164
+ :converters => :all,
165
+ :header_converters => :symbol ) do |csv|
166
+ csv << lines.first.keys
167
+ lines.each { |line| csv << line }
168
+ end
169
+ FasterCSV.open( @path, "w", :headers => true,
170
+ :converters => :all,
171
+ :header_converters => :symbol ) do |csv|
172
+ csv.each { |line| assert_equal(lines.shift, line.to_hash) }
173
+ end
174
+ end
137
175
 
138
176
  def test_append # aliased add_row() and puts()
139
177
  File.unlink(@path)
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: fastercsv
5
5
  version: !ruby/object:Gem::Version
6
- version: 1.2.0
7
- date: 2007-02-07 00:00:00 -06:00
6
+ version: 1.2.1
7
+ date: 2007-09-20 00:00:00 -05:00
8
8
  summary: FasterCSV is CSV, but faster, smaller, and cleaner.
9
9
  require_paths:
10
10
  - lib