fastercsv 1.4.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -2,6 +2,14 @@
2
2
 
3
3
  Below is a complete listing of changes for each revision of FasterCSV.
4
4
 
5
+ == 1.5.0
6
+
7
+ * The main parser has been rewritten by Timothy Elliott to avoid big input
8
+ issues with Ruby 1.8's regex engine. This makes FasterCSV handle more inputs
9
+ gracefully.
10
+ * FasterCSV will now exit with a notice to upgrade if required in Ruby 1.9.
11
+ * Included a missing file so the tests will run in source packages.
12
+
5
13
  == 1.4.0
6
14
 
7
15
  * Added encoding support patch from Michael Reinsch.
data/Rakefile CHANGED
@@ -12,8 +12,8 @@ task :default => [:test]
12
12
 
13
13
  Rake::TestTask.new do |test|
14
14
  test.libs << "test"
15
- test.test_files = [ "test/ts_all.rb" ]
16
- test.verbose = true
15
+ test.test_files = %w[test/ts_all.rb]
16
+ test.verbose = true
17
17
  end
18
18
 
19
19
  Rake::RDocTask.new do |rdoc|
@@ -45,8 +45,6 @@ task :benchmark do
45
45
  path = "test/test_data.csv"
46
46
  sh %Q{time ruby -r csv -e } +
47
47
  %Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
48
- sh %Q{time ruby -r lightcsv -e } +
49
- %Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
50
48
  sh %Q{time ruby -r lib/faster_csv -e } +
51
49
  %Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
52
50
  end
@@ -58,12 +56,12 @@ spec = Gem::Specification.new do |spec|
58
56
  spec.platform = Gem::Platform::RUBY
59
57
  spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
60
58
 
61
- spec.test_suite_file = "test/ts_all.rb"
59
+ spec.test_files = %w[test/ts_all.rb]
62
60
  spec.files = Dir.glob("{lib,test,examples}/**/*.rb").
63
61
  reject { |item| item.include?(".svn") } +
64
62
  Dir.glob("{test,examples}/**/*.csv").
65
63
  reject { |item| item.include?(".svn") } +
66
- ["Rakefile", "setup.rb"]
64
+ %w[Rakefile setup.rb test/line_endings.gz]
67
65
 
68
66
  spec.has_rdoc = true
69
67
  spec.extra_rdoc_files = %w[ AUTHORS COPYING README INSTALL TODO CHANGELOG
data/lib/faster_csv.rb CHANGED
@@ -7,6 +7,13 @@
7
7
  #
8
8
  # See FasterCSV for documentation.
9
9
 
10
+ if RUBY_VERSION >= "1.9"
11
+ abort <<-VERSION_WARNING.gsub(/^\s+/, "")
12
+ Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
13
+ support for Ruby 1.9's m17n encoding engine.
14
+ VERSION_WARNING
15
+ end
16
+
10
17
  require "forwardable"
11
18
  require "English"
12
19
  require "enumerator"
@@ -75,7 +82,7 @@ require "stringio"
75
82
  #
76
83
  class FasterCSV
77
84
  # The version of the installed library.
78
- VERSION = "1.4.0".freeze
85
+ VERSION = "1.5.0".freeze
79
86
 
80
87
  #
81
88
  # A FasterCSV::Row is part Array and part Hash. It retains an order for the
@@ -1559,7 +1566,7 @@ class FasterCSV
1559
1566
  end
1560
1567
 
1561
1568
  # begin with a blank line, so we can always add to it
1562
- line = ""
1569
+ line = String.new
1563
1570
 
1564
1571
  #
1565
1572
  # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
@@ -1567,7 +1574,11 @@ class FasterCSV
1567
1574
  #
1568
1575
  loop do
1569
1576
  # add another read to the line
1570
- line += @io.gets(@row_sep) rescue return nil
1577
+ begin
1578
+ line += @io.gets(@row_sep)
1579
+ rescue
1580
+ return nil
1581
+ end
1571
1582
  # copy the line so we can chop it up in parsing
1572
1583
  parse = line.dup
1573
1584
  parse.sub!(@parsers[:line_end], "")
@@ -1590,41 +1601,37 @@ class FasterCSV
1590
1601
  end
1591
1602
  end
1592
1603
 
1593
- #
1594
- # shave leading empty fields if needed, because the main parser chokes
1595
- # on these
1596
- #
1597
- csv = if parse.sub!(@parsers[:leading_fields], "")
1598
- [nil] * ($&.length / @col_sep.length)
1599
- else
1600
- Array.new
1601
- end
1602
- #
1603
- # then parse the main fields with a hyper-tuned Regexp from
1604
- # Mastering Regular Expressions, Second Edition
1605
- #
1606
- parse.gsub!(@parsers[:csv_row]) do
1607
- csv << if $1.nil? # we found an unquoted field
1608
- if $2.empty? # switch empty unquoted fields to +nil+...
1609
- nil # for CSV compatibility
1610
- else
1611
- # I decided to take a strict approach to CSV parsing...
1612
- if $2.count("\r\n").zero? # verify correctness of field...
1613
- $2
1614
- else
1615
- # or throw an Exception
1616
- raise MalformedCSVError, "Unquoted fields do not allow " +
1617
- "\\r or \\n (line #{lineno + 1})."
1618
- end
1604
+ # parse the fields with a mix of String#split and regular expressions
1605
+ csv = Array.new
1606
+ current_field = String.new
1607
+ field_quotes = 0
1608
+ parse.split(@col_sep, -1).each do |match|
1609
+ if current_field.empty? && match.count(@quote_and_newlines).zero?
1610
+ csv << (match.empty? ? nil : match)
1611
+ elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
1612
+ current_field << match
1613
+ field_quotes += match.count(@quote_char)
1614
+ if field_quotes % 2 == 0
1615
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1616
+ raise MalformedCSVError unless in_quotes
1617
+ current_field = in_quotes
1618
+ current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1619
+ csv << current_field
1620
+ current_field = String.new
1621
+ field_quotes = 0
1622
+ else # we found a quoted field that spans multiple lines
1623
+ current_field << @col_sep
1619
1624
  end
1620
- else # we found a quoted field...
1621
- $1.gsub(@quote_char * 2, @quote_char) # unescape contents
1625
+ elsif match.count("\r\n").zero?
1626
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1627
+ else
1628
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1629
+ "\\r or \\n (line #{lineno + 1})."
1622
1630
  end
1623
- "" # gsub!'s replacement, clear the field
1624
1631
  end
1625
1632
 
1626
1633
  # if parse is empty?(), we found all the fields on the line...
1627
- if parse.empty?
1634
+ if field_quotes % 2 == 0
1628
1635
  @lineno += 1
1629
1636
 
1630
1637
  # save fields unconverted fields, if needed...
@@ -1646,9 +1653,7 @@ class FasterCSV
1646
1653
  # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1647
1654
  if @io.eof?
1648
1655
  raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1649
- elsif parse =~ @parsers[:bad_field]
1650
- raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1651
- elsif @field_size_limit and parse.length >= @field_size_limit
1656
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1652
1657
  raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1653
1658
  end
1654
1659
  # otherwise, we need to loop and pull some more data to complete the row
@@ -1697,9 +1702,10 @@ class FasterCSV
1697
1702
  #
1698
1703
  def init_separators(options)
1699
1704
  # store the selected separators
1700
- @col_sep = options.delete(:col_sep)
1701
- @row_sep = options.delete(:row_sep)
1702
- @quote_char = options.delete(:quote_char)
1705
+ @col_sep = options.delete(:col_sep)
1706
+ @row_sep = options.delete(:row_sep)
1707
+ @quote_char = options.delete(:quote_char)
1708
+ @quote_and_newlines = "#{@quote_char}\r\n"
1703
1709
 
1704
1710
  if @quote_char.length != 1
1705
1711
  raise ArgumentError, ":quote_char has to be a single character String"
@@ -1785,31 +1791,12 @@ class FasterCSV
1785
1791
  esc_row_sep = Regexp.escape(@row_sep)
1786
1792
  esc_quote = Regexp.escape(@quote_char)
1787
1793
  @parsers = {
1788
- # for empty leading fields
1789
- :leading_fields => Regexp.new("\\A(?:#{esc_col_sep})+", nil, @encoding),
1790
- # The Primary Parser
1791
- :csv_row => Regexp.new(<<-END_PARSER, Regexp::EXTENDED, @encoding),
1792
- \\G(?:\\A|#{esc_col_sep}) # anchor the match
1793
- (?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
1794
- (?> #{esc_quote*2}
1795
- [^#{esc_quote}]* )* )#{esc_quote}
1796
- | # ... or ...
1797
- ([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
1798
- )
1799
- (?=#{esc_col_sep}|\\z) # ensure we are at field's end
1800
- END_PARSER
1801
- # a test for unescaped quotes
1802
- :bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
1803
- \\A#{esc_col_sep}? # starts with an optional comma
1804
- (?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
1805
- (?> #{esc_quote*2}
1806
- [^#{esc_quote}]* )*
1807
- #{esc_quote}[^#{esc_quote}]
1808
- | # ... or ...
1809
- [^#{esc_quote}#{esc_col_sep}]+
1810
- #{esc_quote} # unescaped quote
1811
- )
1812
- END_BAD
1794
+ :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1795
+ Regexp::MULTILINE,
1796
+ @encoding ),
1797
+ :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1798
+ Regexp::MULTILINE,
1799
+ @encoding ),
1813
1800
  # safer than chomp!()
1814
1801
  :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1815
1802
  }
Binary file
@@ -108,6 +108,13 @@ class TestCSVParsing < Test::Unit::TestCase
108
108
  #
109
109
  assert_equal(Array.new, FasterCSV.parse_line("\n1,2,3\n"))
110
110
  end
111
+
112
+ def test_non_regex_edge_cases
113
+ # An early version of the non-regex parser fails this test
114
+ [["foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"]]].each do |edge_case|
115
+ assert_equal(edge_case.last, FasterCSV.parse_line(edge_case.first))
116
+ end
117
+ end
111
118
 
112
119
  def test_malformed_csv
113
120
  assert_raise(FasterCSV::MalformedCSVError) do
data/test/tc_interface.rb CHANGED
@@ -103,6 +103,17 @@ class TestFasterCSVInterface < Test::Unit::TestCase
103
103
  assert_equal(nil, csv.shift)
104
104
  end
105
105
  end
106
+
107
+ def test_long_line # ruby's regex parser may have problems with long rows
108
+ File.unlink(@path)
109
+
110
+ long_field_length = 2800
111
+ File.open(@path, "w") do |file|
112
+ file << "1\t2\t#{'3' * long_field_length}\r\n"
113
+ end
114
+ @expected = [%w{1 2} + ['3' * long_field_length]]
115
+ test_shift
116
+ end
106
117
 
107
118
  ### Test Write Interface ###
108
119
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fastercsv
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,11 +9,15 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-09-10 00:00:00 -05:00
12
+ date: 2009-06-15 00:00:00 -05:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
16
- description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
16
+ description: |
17
+ FasterCSV is intended as a complete replacement to the CSV standard library. It
18
+ is significantly faster and smaller while still being pure Ruby code. It also
19
+ strives for a better interface.
20
+
17
21
  email: james@grayproductions.net
18
22
  executables: []
19
23
 
@@ -52,6 +56,7 @@ files:
52
56
  - examples/purchase.csv
53
57
  - Rakefile
54
58
  - setup.rb
59
+ - test/line_endings.gz
55
60
  - AUTHORS
56
61
  - COPYING
57
62
  - README
@@ -61,6 +66,8 @@ files:
61
66
  - LICENSE
62
67
  has_rdoc: true
63
68
  homepage: http://fastercsv.rubyforge.org
69
+ licenses: []
70
+
64
71
  post_install_message:
65
72
  rdoc_options:
66
73
  - --title
@@ -84,9 +91,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
91
  requirements: []
85
92
 
86
93
  rubyforge_project: fastercsv
87
- rubygems_version: 1.2.0
94
+ rubygems_version: 1.3.4
88
95
  signing_key:
89
- specification_version: 2
96
+ specification_version: 3
90
97
  summary: FasterCSV is CSV, but faster, smaller, and cleaner.
91
98
  test_files:
92
99
  - test/ts_all.rb