fastercsv 1.4.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +8 -0
- data/Rakefile +4 -6
- data/lib/faster_csv.rb +51 -64
- data/test/line_endings.gz +0 -0
- data/test/tc_csv_parsing.rb +7 -0
- data/test/tc_interface.rb +11 -0
- metadata +12 -5
data/CHANGELOG
CHANGED
@@ -2,6 +2,14 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.5.0
|
6
|
+
|
7
|
+
* The main parser has been rewritten by Timothy Elliott to avoid big input
|
8
|
+
issues with Ruby 1.8's regex engine. This makes FasterCSV handle more inputs
|
9
|
+
gracefully.
|
10
|
+
* FasterCSV will now exit with a notice to upgrade if required in Ruby 1.9.
|
11
|
+
* Included a missing file so the tests will run in source packages.
|
12
|
+
|
5
13
|
== 1.4.0
|
6
14
|
|
7
15
|
* Added encoding support patch from Michael Reinsch.
|
data/Rakefile
CHANGED
@@ -12,8 +12,8 @@ task :default => [:test]
|
|
12
12
|
|
13
13
|
Rake::TestTask.new do |test|
|
14
14
|
test.libs << "test"
|
15
|
-
test.test_files =
|
16
|
-
test.verbose =
|
15
|
+
test.test_files = %w[test/ts_all.rb]
|
16
|
+
test.verbose = true
|
17
17
|
end
|
18
18
|
|
19
19
|
Rake::RDocTask.new do |rdoc|
|
@@ -45,8 +45,6 @@ task :benchmark do
|
|
45
45
|
path = "test/test_data.csv"
|
46
46
|
sh %Q{time ruby -r csv -e } +
|
47
47
|
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
48
|
-
sh %Q{time ruby -r lightcsv -e } +
|
49
|
-
%Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
|
50
48
|
sh %Q{time ruby -r lib/faster_csv -e } +
|
51
49
|
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
52
50
|
end
|
@@ -58,12 +56,12 @@ spec = Gem::Specification.new do |spec|
|
|
58
56
|
spec.platform = Gem::Platform::RUBY
|
59
57
|
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
60
58
|
|
61
|
-
spec.
|
59
|
+
spec.test_files = %w[test/ts_all.rb]
|
62
60
|
spec.files = Dir.glob("{lib,test,examples}/**/*.rb").
|
63
61
|
reject { |item| item.include?(".svn") } +
|
64
62
|
Dir.glob("{test,examples}/**/*.csv").
|
65
63
|
reject { |item| item.include?(".svn") } +
|
66
|
-
|
64
|
+
%w[Rakefile setup.rb test/line_endings.gz]
|
67
65
|
|
68
66
|
spec.has_rdoc = true
|
69
67
|
spec.extra_rdoc_files = %w[ AUTHORS COPYING README INSTALL TODO CHANGELOG
|
data/lib/faster_csv.rb
CHANGED
@@ -7,6 +7,13 @@
|
|
7
7
|
#
|
8
8
|
# See FasterCSV for documentation.
|
9
9
|
|
10
|
+
if RUBY_VERSION >= "1.9"
|
11
|
+
abort <<-VERSION_WARNING.gsub(/^\s+/, "")
|
12
|
+
Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
|
13
|
+
support for Ruby 1.9's m17n encoding engine.
|
14
|
+
VERSION_WARNING
|
15
|
+
end
|
16
|
+
|
10
17
|
require "forwardable"
|
11
18
|
require "English"
|
12
19
|
require "enumerator"
|
@@ -75,7 +82,7 @@ require "stringio"
|
|
75
82
|
#
|
76
83
|
class FasterCSV
|
77
84
|
# The version of the installed library.
|
78
|
-
VERSION = "1.
|
85
|
+
VERSION = "1.5.0".freeze
|
79
86
|
|
80
87
|
#
|
81
88
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -1559,7 +1566,7 @@ class FasterCSV
|
|
1559
1566
|
end
|
1560
1567
|
|
1561
1568
|
# begin with a blank line, so we can always add to it
|
1562
|
-
line =
|
1569
|
+
line = String.new
|
1563
1570
|
|
1564
1571
|
#
|
1565
1572
|
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
@@ -1567,7 +1574,11 @@ class FasterCSV
|
|
1567
1574
|
#
|
1568
1575
|
loop do
|
1569
1576
|
# add another read to the line
|
1570
|
-
|
1577
|
+
begin
|
1578
|
+
line += @io.gets(@row_sep)
|
1579
|
+
rescue
|
1580
|
+
return nil
|
1581
|
+
end
|
1571
1582
|
# copy the line so we can chop it up in parsing
|
1572
1583
|
parse = line.dup
|
1573
1584
|
parse.sub!(@parsers[:line_end], "")
|
@@ -1590,41 +1601,37 @@ class FasterCSV
|
|
1590
1601
|
end
|
1591
1602
|
end
|
1592
1603
|
|
1593
|
-
#
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1601
|
-
|
1602
|
-
|
1603
|
-
|
1604
|
-
|
1605
|
-
|
1606
|
-
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
|
1611
|
-
|
1612
|
-
|
1613
|
-
$2
|
1614
|
-
else
|
1615
|
-
# or throw an Exception
|
1616
|
-
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1617
|
-
"\\r or \\n (line #{lineno + 1})."
|
1618
|
-
end
|
1604
|
+
# parse the fields with a mix of String#split and regular expressions
|
1605
|
+
csv = Array.new
|
1606
|
+
current_field = String.new
|
1607
|
+
field_quotes = 0
|
1608
|
+
parse.split(@col_sep, -1).each do |match|
|
1609
|
+
if current_field.empty? && match.count(@quote_and_newlines).zero?
|
1610
|
+
csv << (match.empty? ? nil : match)
|
1611
|
+
elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
|
1612
|
+
current_field << match
|
1613
|
+
field_quotes += match.count(@quote_char)
|
1614
|
+
if field_quotes % 2 == 0
|
1615
|
+
in_quotes = current_field[@parsers[:quoted_field], 1]
|
1616
|
+
raise MalformedCSVError unless in_quotes
|
1617
|
+
current_field = in_quotes
|
1618
|
+
current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
|
1619
|
+
csv << current_field
|
1620
|
+
current_field = String.new
|
1621
|
+
field_quotes = 0
|
1622
|
+
else # we found a quoted field that spans multiple lines
|
1623
|
+
current_field << @col_sep
|
1619
1624
|
end
|
1620
|
-
|
1621
|
-
|
1625
|
+
elsif match.count("\r\n").zero?
|
1626
|
+
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1627
|
+
else
|
1628
|
+
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1629
|
+
"\\r or \\n (line #{lineno + 1})."
|
1622
1630
|
end
|
1623
|
-
"" # gsub!'s replacement, clear the field
|
1624
1631
|
end
|
1625
1632
|
|
1626
1633
|
# if parse is empty?(), we found all the fields on the line...
|
1627
|
-
if
|
1634
|
+
if field_quotes % 2 == 0
|
1628
1635
|
@lineno += 1
|
1629
1636
|
|
1630
1637
|
# save fields unconverted fields, if needed...
|
@@ -1646,9 +1653,7 @@ class FasterCSV
|
|
1646
1653
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
1647
1654
|
if @io.eof?
|
1648
1655
|
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1649
|
-
elsif
|
1650
|
-
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1651
|
-
elsif @field_size_limit and parse.length >= @field_size_limit
|
1656
|
+
elsif @field_size_limit and current_field.size >= @field_size_limit
|
1652
1657
|
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
1653
1658
|
end
|
1654
1659
|
# otherwise, we need to loop and pull some more data to complete the row
|
@@ -1697,9 +1702,10 @@ class FasterCSV
|
|
1697
1702
|
#
|
1698
1703
|
def init_separators(options)
|
1699
1704
|
# store the selected separators
|
1700
|
-
@col_sep
|
1701
|
-
@row_sep
|
1702
|
-
@quote_char
|
1705
|
+
@col_sep = options.delete(:col_sep)
|
1706
|
+
@row_sep = options.delete(:row_sep)
|
1707
|
+
@quote_char = options.delete(:quote_char)
|
1708
|
+
@quote_and_newlines = "#{@quote_char}\r\n"
|
1703
1709
|
|
1704
1710
|
if @quote_char.length != 1
|
1705
1711
|
raise ArgumentError, ":quote_char has to be a single character String"
|
@@ -1785,31 +1791,12 @@ class FasterCSV
|
|
1785
1791
|
esc_row_sep = Regexp.escape(@row_sep)
|
1786
1792
|
esc_quote = Regexp.escape(@quote_char)
|
1787
1793
|
@parsers = {
|
1788
|
-
|
1789
|
-
|
1790
|
-
|
1791
|
-
:
|
1792
|
-
|
1793
|
-
|
1794
|
-
(?> #{esc_quote*2}
|
1795
|
-
[^#{esc_quote}]* )* )#{esc_quote}
|
1796
|
-
| # ... or ...
|
1797
|
-
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1798
|
-
)
|
1799
|
-
(?=#{esc_col_sep}|\\z) # ensure we are at field's end
|
1800
|
-
END_PARSER
|
1801
|
-
# a test for unescaped quotes
|
1802
|
-
:bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
|
1803
|
-
\\A#{esc_col_sep}? # starts with an optional comma
|
1804
|
-
(?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
|
1805
|
-
(?> #{esc_quote*2}
|
1806
|
-
[^#{esc_quote}]* )*
|
1807
|
-
#{esc_quote}[^#{esc_quote}]
|
1808
|
-
| # ... or ...
|
1809
|
-
[^#{esc_quote}#{esc_col_sep}]+
|
1810
|
-
#{esc_quote} # unescaped quote
|
1811
|
-
)
|
1812
|
-
END_BAD
|
1794
|
+
:any_field => Regexp.new( "[^#{esc_col_sep}]+",
|
1795
|
+
Regexp::MULTILINE,
|
1796
|
+
@encoding ),
|
1797
|
+
:quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
|
1798
|
+
Regexp::MULTILINE,
|
1799
|
+
@encoding ),
|
1813
1800
|
# safer than chomp!()
|
1814
1801
|
:line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
|
1815
1802
|
}
|
Binary file
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -108,6 +108,13 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
108
108
|
#
|
109
109
|
assert_equal(Array.new, FasterCSV.parse_line("\n1,2,3\n"))
|
110
110
|
end
|
111
|
+
|
112
|
+
def test_non_regex_edge_cases
|
113
|
+
# An early version of the non-regex parser fails this test
|
114
|
+
[["foo,\"foo,bar,baz,foo\",\"foo\"", ["foo", "foo,bar,baz,foo", "foo"]]].each do |edge_case|
|
115
|
+
assert_equal(edge_case.last, FasterCSV.parse_line(edge_case.first))
|
116
|
+
end
|
117
|
+
end
|
111
118
|
|
112
119
|
def test_malformed_csv
|
113
120
|
assert_raise(FasterCSV::MalformedCSVError) do
|
data/test/tc_interface.rb
CHANGED
@@ -103,6 +103,17 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
103
103
|
assert_equal(nil, csv.shift)
|
104
104
|
end
|
105
105
|
end
|
106
|
+
|
107
|
+
def test_long_line # ruby's regex parser may have problems with long rows
|
108
|
+
File.unlink(@path)
|
109
|
+
|
110
|
+
long_field_length = 2800
|
111
|
+
File.open(@path, "w") do |file|
|
112
|
+
file << "1\t2\t#{'3' * long_field_length}\r\n"
|
113
|
+
end
|
114
|
+
@expected = [%w{1 2} + ['3' * long_field_length]]
|
115
|
+
test_shift
|
116
|
+
end
|
106
117
|
|
107
118
|
### Test Write Interface ###
|
108
119
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fastercsv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,11 +9,15 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-06-15 00:00:00 -05:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|
16
|
-
description:
|
16
|
+
description: |
|
17
|
+
FasterCSV is intended as a complete replacement to the CSV standard library. It
|
18
|
+
is significantly faster and smaller while still being pure Ruby code. It also
|
19
|
+
strives for a better interface.
|
20
|
+
|
17
21
|
email: james@grayproductions.net
|
18
22
|
executables: []
|
19
23
|
|
@@ -52,6 +56,7 @@ files:
|
|
52
56
|
- examples/purchase.csv
|
53
57
|
- Rakefile
|
54
58
|
- setup.rb
|
59
|
+
- test/line_endings.gz
|
55
60
|
- AUTHORS
|
56
61
|
- COPYING
|
57
62
|
- README
|
@@ -61,6 +66,8 @@ files:
|
|
61
66
|
- LICENSE
|
62
67
|
has_rdoc: true
|
63
68
|
homepage: http://fastercsv.rubyforge.org
|
69
|
+
licenses: []
|
70
|
+
|
64
71
|
post_install_message:
|
65
72
|
rdoc_options:
|
66
73
|
- --title
|
@@ -84,9 +91,9 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
91
|
requirements: []
|
85
92
|
|
86
93
|
rubyforge_project: fastercsv
|
87
|
-
rubygems_version: 1.
|
94
|
+
rubygems_version: 1.3.4
|
88
95
|
signing_key:
|
89
|
-
specification_version:
|
96
|
+
specification_version: 3
|
90
97
|
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
91
98
|
test_files:
|
92
99
|
- test/ts_all.rb
|