fastercsv 1.4.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +8 -0
- data/Rakefile +4 -6
- data/lib/faster_csv.rb +51 -64
- data/test/line_endings.gz +0 -0
- data/test/tc_csv_parsing.rb +7 -0
- data/test/tc_interface.rb +11 -0
- metadata +12 -5
data/CHANGELOG
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.5.0
|
6
|
+
|
7
|
+
* The main parser has been rewritten by Timothy Elliott to avoid big input
|
8
|
+
issues with Ruby 1.8's regex engine. This makes FasterCSV handle more inputs
|
9
|
+
gracefully.
|
10
|
+
* FasterCSV will now exit with a notice to upgrade if required in Ruby 1.9.
|
11
|
+
* Included a missing file so the tests will run in source packages.
|
12
|
+
|
5
13
|
== 1.4.0
|
6
14
|
|
7
15
|
* Added encoding support patch from Michael Reinsch.
|
data/Rakefile
CHANGED
@@ -12,8 +12,8 @@ task :default => [:test]
|
|
12
12
|
|
13
13
|
Rake::TestTask.new do |test|
|
14
14
|
test.libs << "test"
|
15
|
-
test.test_files =
|
16
|
-
test.verbose =
|
15
|
+
test.test_files = %w[test/ts_all.rb]
|
16
|
+
test.verbose = true
|
17
17
|
end
|
18
18
|
|
19
19
|
Rake::RDocTask.new do |rdoc|
|
@@ -45,8 +45,6 @@ task :benchmark do
|
|
45
45
|
path = "test/test_data.csv"
|
46
46
|
sh %Q{time ruby -r csv -e } +
|
47
47
|
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
48
|
-
sh %Q{time ruby -r lightcsv -e } +
|
49
|
-
%Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
|
50
48
|
sh %Q{time ruby -r lib/faster_csv -e } +
|
51
49
|
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
52
50
|
end
|
@@ -58,12 +56,12 @@ spec = Gem::Specification.new do |spec|
|
|
58
56
|
spec.platform = Gem::Platform::RUBY
|
59
57
|
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
60
58
|
|
61
|
-
spec.
|
59
|
+
spec.test_files = %w[test/ts_all.rb]
|
62
60
|
spec.files = Dir.glob("{lib,test,examples}/**/*.rb").
|
63
61
|
reject { |item| item.include?(".svn") } +
|
64
62
|
Dir.glob("{test,examples}/**/*.csv").
|
65
63
|
reject { |item| item.include?(".svn") } +
|
66
|
-
|
64
|
+
%w[Rakefile setup.rb test/line_endings.gz]
|
67
65
|
|
68
66
|
spec.has_rdoc = true
|
69
67
|
spec.extra_rdoc_files = %w[ AUTHORS COPYING README INSTALL TODO CHANGELOG
|
data/lib/faster_csv.rb
CHANGED
@@ -7,6 +7,13 @@
|
|
7
7
|
#
|
8
8
|
# See FasterCSV for documentation.
|
9
9
|
|
10
|
+
if RUBY_VERSION >= "1.9"
|
11
|
+
abort <<-VERSION_WARNING.gsub(/^\s+/, "")
|
12
|
+
Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
|
13
|
+
support for Ruby 1.9's m17n encoding engine.
|
14
|
+
VERSION_WARNING
|
15
|
+
end
|
16
|
+
|
10
17
|
require "forwardable"
|
11
18
|
require "English"
|
12
19
|
require "enumerator"
|
@@ -75,7 +82,7 @@ require "stringio"
|
|
75
82
|
#
|
76
83
|
class FasterCSV
|
77
84
|
# The version of the installed library.
|
78
|
-
VERSION = "1.
|
85
|
+
VERSION = "1.5.0".freeze
|
79
86
|
|
80
87
|
#
|
81
88
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -1559,7 +1566,7 @@ class FasterCSV
|
|
1559
1566
|
end
|
1560
1567
|
|
1561
1568
|
# begin with a blank line, so we can always add to it
|
1562
|
-
line =
|
1569
|
+
line = String.new
|
1563
1570
|
|
1564
1571
|
#
|
1565
1572
|
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
@@ -1567,7 +1574,11 @@ class FasterCSV
|
|
1567
1574
|
#
|
1568
1575
|
loop do
|
1569
1576
|
# add another read to the line
|
1570
|
-
|
1577
|
+
begin
|
1578
|
+
line += @io.gets(@row_sep)
|
1579
|
+
rescue
|
1580
|
+
return nil
|
1581
|
+
end
|
1571
1582
|
# copy the line so we can chop it up in parsing
|
1572
1583
|
parse = line.dup
|
1573
1584
|
parse.sub!(@parsers[:line_end], "")
|
@@ -1590,41 +1601,37 @@ class FasterCSV
|
|
1590
1601
|
end
|
1591
1602
|
end
|
1592
1603
|
|
1593
|
-
#
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
$2
|
1614
|
-
else
|
1615
|
-
# or throw an Exception
|
1616
|
-
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1617
|
-
"\\r or \\n (line #{lineno + 1})."
|
1618
|
-
end
|
1604
|
+
# parse the fields with a mix of String#split and regular expressions
|
1605
|
+
csv = Array.new
|
1606
|
+
current_field = String.new
|
1607
|
+
field_quotes = 0
|
1608
|
+
parse.split(@col_sep, -1).each do |match|
|
1609
|
+
if current_field.empty? && match.count(@quote_and_newlines).zero?
|
1610
|
+
csv << (match.empty? ? nil : match)
|
1611
|
+
elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
|
1612
|
+
current_field << match
|
1613
|
+
field_quotes += match.count(@quote_char)
|
1614
|
+
if field_quotes % 2 == 0
|
1615
|
+
in_quotes = current_field[@parsers[:quoted_field], 1]
|
1616
|
+
raise MalformedCSVError unless in_quotes
|
1617
|
+
current_field = in_quotes
|
1618
|
+
current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
|
1619
|
+
csv << current_field
|
1620
|
+
current_field = String.new
|
1621
|
+
field_quotes = 0
|
1622
|
+
else # we found a quoted field that spans multiple lines
|
1623
|
+
current_field << @col_sep
|
1619
1624
|
end
|
1620
|
-
|
1621
|
-
|
1625
|
+
elsif match.count("\r\n").zero?
|
1626
|
+
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1627
|
+
else
|
1628
|
+
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1629
|
+
"\\r or \\n (line #{lineno + 1})."
|
1622
1630
|
end
|
1623
|
-
"" # gsub!'s replacement, clear the field
|
1624
1631
|
end
|
1625
1632
|
|
1626
1633
|
# if parse is empty?(), we found all the fields on the line...
|
1627
|
-
if
|
1634
|
+
if field_quotes % 2 == 0
|
1628
1635
|
@lineno += 1
|
1629
1636
|
|
1630
1637
|
# save fields unconverted fields, if needed...
|
@@ -1646,9 +1653,7 @@ class FasterCSV
|
|
1646
1653
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
1647
1654
|
if @io.eof?
|
1648
1655
|
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1649
|
-
elsif
|
1650
|
-
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1651
|
-
elsif @field_size_limit and parse.length >= @field_size_limit
|
1656
|
+
elsif @field_size_limit and current_field.size >= @field_size_limit
|
1652
1657
|
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
1653
1658
|
end
|
1654
1659
|
# otherwise, we need to loop and pull some more data to complete the row
|
@@ -1697,9 +1702,10 @@ class FasterCSV
|
|
1697
1702
|
#
|
1698
1703
|
def init_separators(options)
|
1699
1704
|
# store the selected separators
|
1700
|
-
@col_sep
|
1701
|
-
@row_sep
|
1702
|
-
@quote_char
|
1705
|
+
@col_sep = options.delete(:col_sep)
|
1706
|
+
@row_sep = options.delete(:row_sep)
|
1707
|
+
@quote_char = options.delete(:quote_char)
|
1708
|
+
@quote_and_newlines = "#{@quote_char}\r\n"
|
1703
1709
|
|
1704
1710
|
if @quote_char.length != 1
|
1705
1711
|
raise ArgumentError, ":quote_char has to be a single character String"
|
@@ -1785,31 +1791,12 @@ class FasterCSV
|
|
1785
1791
|
esc_row_sep = Regexp.escape(@row_sep)
|
1786
1792
|
esc_quote = Regexp.escape(@quote_char)
|
1787
1793
|
@parsers = {
|
1788
|
-
|
1789
|
-
|
1790
|
-
|
1791
|
-
:
|
1792
|
-
|
1793
|
-
|
1794
|
-
(?> #{esc_quote*2}
|
1795
|
-
[^#{esc_quote}]* )* )#{esc_quote}
|
1796
|
-
| # ... or ...
|
1797
|
-
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1798
|
-
)
|
1799
|
-
(?=#{esc_col_sep}|\\z) # ensure we are at field's end
|
1800
|
-
END_PARSER
|
1801
|
-
# a test for unescaped quotes
|
1802
|
-
:bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
|
1803
|
-
\\A#{esc_col_sep}? # starts with an optional comma
|
1804
|
-
(?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
|
1805
|
-
(?> #{esc_quote*2}
|
1806
|
-
[^#{esc_quote}]* )*
|
1807
|
-
#{esc_quote}[^#{esc_quote}]
|
1808
|
-
| # ... or ...
|
1809
|
-
[^#{esc_quote}#{esc_col_sep}]+
|
1810
|
-
#{esc_quote} # unescaped quote
|
1811
|
-
)
|
1812
|
-
END_BAD
|
1794
|
+
:any_field => Regexp.new( "[^#{esc_col_sep}]+",
|
1795
|
+
Regexp::MULTILINE,
|
1796
|
+
@encoding ),
|
1797
|
+
:quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
|
1798
|
+
Regexp::MULTILINE,
|
1799
|
+
@encoding ),
|
1813
1800
|
# safer than chomp!()
|
1814
1801
|
:line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
|
1815
1802
|
}
|
Binary file
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -108,6 +108,13 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
108
108
|
#
|
109
109
|
assert_equal(Array.new, FasterCSV.parse_line("\n1,2,3\n"))
|
110
110
|
end
|
111
|
+
|
112
|
+
def test_non_regex_edge_cases
|
113
|
+
# An early version of the non-regex parser fails this test
|
114
|
+
[["foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"]]].each do |edge_case|
|
115
|
+
assert_equal(edge_case.last, FasterCSV.parse_line(edge_case.first))
|
116
|
+
end
|
117
|
+
end
|
111
118
|
|
112
119
|
def test_malformed_csv
|
113
120
|
assert_raise(FasterCSV::MalformedCSVError) do
|
data/test/tc_interface.rb
CHANGED
@@ -103,6 +103,17 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
103
103
|
assert_equal(nil, csv.shift)
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
def test_long_line # ruby's regex parser may have problems with long rows
|
108
|
+
File.unlink(@path)
|
109
|
+
|
110
|
+
long_field_length = 2800
|
111
|
+
File.open(@path, "w") do |file|
|
112
|
+
file << "1\t2\t#{'3' * long_field_length}\r\n"
|
113
|
+
end
|
114
|
+
@expected = [%w{1 2} + ['3' * long_field_length]]
|
115
|
+
test_shift
|
116
|
+
end
|
106
117
|
|
107
118
|
### Test Write Interface ###
|
108
119
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastercsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,11 +9,15 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-06-15 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
16
|
+
description: |
|
17
|
+
FasterCSV is intended as a complete replacement to the CSV standard library. It
|
18
|
+
is significantly faster and smaller while still being pure Ruby code. It also
|
19
|
+
strives for a better interface.
|
20
|
+
|
17
21
|
email: james@grayproductions.net
|
18
22
|
executables: []
|
19
23
|
|
@@ -52,6 +56,7 @@ files:
|
|
52
56
|
- examples/purchase.csv
|
53
57
|
- Rakefile
|
54
58
|
- setup.rb
|
59
|
+
- test/line_endings.gz
|
55
60
|
- AUTHORS
|
56
61
|
- COPYING
|
57
62
|
- README
|
@@ -61,6 +66,8 @@ files:
|
|
61
66
|
- LICENSE
|
62
67
|
has_rdoc: true
|
63
68
|
homepage: http://fastercsv.rubyforge.org
|
69
|
+
licenses: []
|
70
|
+
|
64
71
|
post_install_message:
|
65
72
|
rdoc_options:
|
66
73
|
- --title
|
@@ -84,9 +91,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
91
|
requirements: []
|
85
92
|
|
86
93
|
rubyforge_project: fastercsv
|
87
|
-
rubygems_version: 1.
|
94
|
+
rubygems_version: 1.3.4
|
88
95
|
signing_key:
|
89
|
-
specification_version:
|
96
|
+
specification_version: 3
|
90
97
|
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
91
98
|
test_files:
|
92
99
|
- test/ts_all.rb
|