fastercsv 1.4.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -2,6 +2,14 @@
2
2
 
3
3
  Below is a complete listing of changes for each revision of FasterCSV.
4
4
 
5
+ == 1.5.0
6
+
7
+ * The main parser has been rewritten by Timothy Elliott to avoid big input
8
+ issues with Ruby 1.8's regex engine. This makes FasterCSV handle more inputs
9
+ gracefully.
10
+ * FasterCSV will now exit with a notice to upgrade if required in Ruby 1.9.
11
+ * Included a missing file so the tests will run in source packages.
12
+
5
13
  == 1.4.0
6
14
 
7
15
  * Added encoding support patch from Michael Reinsch.
data/Rakefile CHANGED
@@ -12,8 +12,8 @@ task :default => [:test]
12
12
 
13
13
  Rake::TestTask.new do |test|
14
14
  test.libs << "test"
15
- test.test_files = [ "test/ts_all.rb" ]
16
- test.verbose = true
15
+ test.test_files = %w[test/ts_all.rb]
16
+ test.verbose = true
17
17
  end
18
18
 
19
19
  Rake::RDocTask.new do |rdoc|
@@ -45,8 +45,6 @@ task :benchmark do
45
45
  path = "test/test_data.csv"
46
46
  sh %Q{time ruby -r csv -e } +
47
47
  %Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
48
- sh %Q{time ruby -r lightcsv -e } +
49
- %Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
50
48
  sh %Q{time ruby -r lib/faster_csv -e } +
51
49
  %Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
52
50
  end
@@ -58,12 +56,12 @@ spec = Gem::Specification.new do |spec|
58
56
  spec.platform = Gem::Platform::RUBY
59
57
  spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
60
58
 
61
- spec.test_suite_file = "test/ts_all.rb"
59
+ spec.test_files = %w[test/ts_all.rb]
62
60
  spec.files = Dir.glob("{lib,test,examples}/**/*.rb").
63
61
  reject { |item| item.include?(".svn") } +
64
62
  Dir.glob("{test,examples}/**/*.csv").
65
63
  reject { |item| item.include?(".svn") } +
66
- ["Rakefile", "setup.rb"]
64
+ %w[Rakefile setup.rb test/line_endings.gz]
67
65
 
68
66
  spec.has_rdoc = true
69
67
  spec.extra_rdoc_files = %w[ AUTHORS COPYING README INSTALL TODO CHANGELOG
data/lib/faster_csv.rb CHANGED
@@ -7,6 +7,13 @@
7
7
  #
8
8
  # See FasterCSV for documentation.
9
9
 
10
+ if RUBY_VERSION >= "1.9"
11
+ abort <<-VERSION_WARNING.gsub(/^\s+/, "")
12
+ Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
13
+ support for Ruby 1.9's m17n encoding engine.
14
+ VERSION_WARNING
15
+ end
16
+
10
17
  require "forwardable"
11
18
  require "English"
12
19
  require "enumerator"
@@ -75,7 +82,7 @@ require "stringio"
75
82
  #
76
83
  class FasterCSV
77
84
  # The version of the installed library.
78
- VERSION = "1.4.0".freeze
85
+ VERSION = "1.5.0".freeze
79
86
 
80
87
  #
81
88
  # A FasterCSV::Row is part Array and part Hash. It retains an order for the
@@ -1559,7 +1566,7 @@ class FasterCSV
1559
1566
  end
1560
1567
 
1561
1568
  # begin with a blank line, so we can always add to it
1562
- line = ""
1569
+ line = String.new
1563
1570
 
1564
1571
  #
1565
1572
  # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
@@ -1567,7 +1574,11 @@ class FasterCSV
1567
1574
  #
1568
1575
  loop do
1569
1576
  # add another read to the line
1570
- line += @io.gets(@row_sep) rescue return nil
1577
+ begin
1578
+ line += @io.gets(@row_sep)
1579
+ rescue
1580
+ return nil
1581
+ end
1571
1582
  # copy the line so we can chop it up in parsing
1572
1583
  parse = line.dup
1573
1584
  parse.sub!(@parsers[:line_end], "")
@@ -1590,41 +1601,37 @@ class FasterCSV
1590
1601
  end
1591
1602
  end
1592
1603
 
1593
- #
1594
- # shave leading empty fields if needed, because the main parser chokes
1595
- # on these
1596
- #
1597
- csv = if parse.sub!(@parsers[:leading_fields], "")
1598
- [nil] * ($&.length / @col_sep.length)
1599
- else
1600
- Array.new
1601
- end
1602
- #
1603
- # then parse the main fields with a hyper-tuned Regexp from
1604
- # Mastering Regular Expressions, Second Edition
1605
- #
1606
- parse.gsub!(@parsers[:csv_row]) do
1607
- csv << if $1.nil? # we found an unquoted field
1608
- if $2.empty? # switch empty unquoted fields to +nil+...
1609
- nil # for CSV compatibility
1610
- else
1611
- # I decided to take a strict approach to CSV parsing...
1612
- if $2.count("\r\n").zero? # verify correctness of field...
1613
- $2
1614
- else
1615
- # or throw an Exception
1616
- raise MalformedCSVError, "Unquoted fields do not allow " +
1617
- "\\r or \\n (line #{lineno + 1})."
1618
- end
1604
+ # parse the fields with a mix of String#split and regular expressions
1605
+ csv = Array.new
1606
+ current_field = String.new
1607
+ field_quotes = 0
1608
+ parse.split(@col_sep, -1).each do |match|
1609
+ if current_field.empty? && match.count(@quote_and_newlines).zero?
1610
+ csv << (match.empty? ? nil : match)
1611
+ elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
1612
+ current_field << match
1613
+ field_quotes += match.count(@quote_char)
1614
+ if field_quotes % 2 == 0
1615
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1616
+ raise MalformedCSVError unless in_quotes
1617
+ current_field = in_quotes
1618
+ current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1619
+ csv << current_field
1620
+ current_field = String.new
1621
+ field_quotes = 0
1622
+ else # we found a quoted field that spans multiple lines
1623
+ current_field << @col_sep
1619
1624
  end
1620
- else # we found a quoted field...
1621
- $1.gsub(@quote_char * 2, @quote_char) # unescape contents
1625
+ elsif match.count("\r\n").zero?
1626
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1627
+ else
1628
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1629
+ "\\r or \\n (line #{lineno + 1})."
1622
1630
  end
1623
- "" # gsub!'s replacement, clear the field
1624
1631
  end
1625
1632
 
1626
1633
  # if parse is empty?(), we found all the fields on the line...
1627
- if parse.empty?
1634
+ if field_quotes % 2 == 0
1628
1635
  @lineno += 1
1629
1636
 
1630
1637
  # save fields unconverted fields, if needed...
@@ -1646,9 +1653,7 @@ class FasterCSV
1646
1653
  # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1647
1654
  if @io.eof?
1648
1655
  raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1649
- elsif parse =~ @parsers[:bad_field]
1650
- raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1651
- elsif @field_size_limit and parse.length >= @field_size_limit
1656
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1652
1657
  raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1653
1658
  end
1654
1659
  # otherwise, we need to loop and pull some more data to complete the row
@@ -1697,9 +1702,10 @@ class FasterCSV
1697
1702
  #
1698
1703
  def init_separators(options)
1699
1704
  # store the selected separators
1700
- @col_sep = options.delete(:col_sep)
1701
- @row_sep = options.delete(:row_sep)
1702
- @quote_char = options.delete(:quote_char)
1705
+ @col_sep = options.delete(:col_sep)
1706
+ @row_sep = options.delete(:row_sep)
1707
+ @quote_char = options.delete(:quote_char)
1708
+ @quote_and_newlines = "#{@quote_char}\r\n"
1703
1709
 
1704
1710
  if @quote_char.length != 1
1705
1711
  raise ArgumentError, ":quote_char has to be a single character String"
@@ -1785,31 +1791,12 @@ class FasterCSV
1785
1791
  esc_row_sep = Regexp.escape(@row_sep)
1786
1792
  esc_quote = Regexp.escape(@quote_char)
1787
1793
  @parsers = {
1788
- # for empty leading fields
1789
- :leading_fields => Regexp.new("\\A(?:#{esc_col_sep})+", nil, @encoding),
1790
- # The Primary Parser
1791
- :csv_row => Regexp.new(<<-END_PARSER, Regexp::EXTENDED, @encoding),
1792
- \\G(?:\\A|#{esc_col_sep}) # anchor the match
1793
- (?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
1794
- (?> #{esc_quote*2}
1795
- [^#{esc_quote}]* )* )#{esc_quote}
1796
- | # ... or ...
1797
- ([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
1798
- )
1799
- (?=#{esc_col_sep}|\\z) # ensure we are at field's end
1800
- END_PARSER
1801
- # a test for unescaped quotes
1802
- :bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
1803
- \\A#{esc_col_sep}? # starts with an optional comma
1804
- (?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
1805
- (?> #{esc_quote*2}
1806
- [^#{esc_quote}]* )*
1807
- #{esc_quote}[^#{esc_quote}]
1808
- | # ... or ...
1809
- [^#{esc_quote}#{esc_col_sep}]+
1810
- #{esc_quote} # unescaped quote
1811
- )
1812
- END_BAD
1794
+ :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1795
+ Regexp::MULTILINE,
1796
+ @encoding ),
1797
+ :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1798
+ Regexp::MULTILINE,
1799
+ @encoding ),
1813
1800
  # safer than chomp!()
1814
1801
  :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1815
1802
  }
Binary file
@@ -108,6 +108,13 @@ class TestCSVParsing < Test::Unit::TestCase
108
108
  #
109
109
  assert_equal(Array.new, FasterCSV.parse_line("\n1,2,3\n"))
110
110
  end
111
+
112
+ def test_non_regex_edge_cases
113
+ # An early version of the non-regex parser fails this test
114
+ [["foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"]]].each do |edge_case|
115
+ assert_equal(edge_case.last, FasterCSV.parse_line(edge_case.first))
116
+ end
117
+ end
111
118
 
112
119
  def test_malformed_csv
113
120
  assert_raise(FasterCSV::MalformedCSVError) do
data/test/tc_interface.rb CHANGED
@@ -103,6 +103,17 @@ class TestFasterCSVInterface < Test::Unit::TestCase
103
103
  assert_equal(nil, csv.shift)
104
104
  end
105
105
  end
106
+
107
+ def test_long_line # ruby's regex parser may have problems with long rows
108
+ File.unlink(@path)
109
+
110
+ long_field_length = 2800
111
+ File.open(@path, "w") do |file|
112
+ file << "1\t2\t#{'3' * long_field_length}\r\n"
113
+ end
114
+ @expected = [%w{1 2} + ['3' * long_field_length]]
115
+ test_shift
116
+ end
106
117
 
107
118
  ### Test Write Interface ###
108
119
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastercsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,11 +9,15 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-09-10 00:00:00 -05:00
12
+ date: 2009-06-15 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
16
+ description: |
17
+ FasterCSV is intended as a complete replacement to the CSV standard library. It
18
+ is significantly faster and smaller while still being pure Ruby code. It also
19
+ strives for a better interface.
20
+
17
21
  email: james@grayproductions.net
18
22
  executables: []
19
23
 
@@ -52,6 +56,7 @@ files:
52
56
  - examples/purchase.csv
53
57
  - Rakefile
54
58
  - setup.rb
59
+ - test/line_endings.gz
55
60
  - AUTHORS
56
61
  - COPYING
57
62
  - README
@@ -61,6 +66,8 @@ files:
61
66
  - LICENSE
62
67
  has_rdoc: true
63
68
  homepage: http://fastercsv.rubyforge.org
69
+ licenses: []
70
+
64
71
  post_install_message:
65
72
  rdoc_options:
66
73
  - --title
@@ -84,9 +91,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
91
  requirements: []
85
92
 
86
93
  rubyforge_project: fastercsv
87
- rubygems_version: 1.2.0
94
+ rubygems_version: 1.3.4
88
95
  signing_key:
89
- specification_version: 2
96
+ specification_version: 3
90
97
  summary: FasterCSV is CSV, but faster, smaller, and cleaner.
91
98
  test_files:
92
99
  - test/ts_all.rb