fastercsv 1.2.0 → 1.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +9 -0
- data/LICENSE +1 -1
- data/README +10 -0
- data/Rakefile +2 -0
- data/lib/faster_csv.rb +67 -22
- data/test/tc_data_converters.rb +4 -1
- data/test/tc_features.rb +8 -0
- data/test/tc_interface.rb +39 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
@@ -2,6 +2,15 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.2.1
|
6
|
+
|
7
|
+
* Worked around an odd incompatibility with the Regexps used to remove line
|
8
|
+
endings in some (seemingly rare) Ruby environments.
|
9
|
+
* Made FasterCSV::lineno() writer aware.
|
10
|
+
* Support Hashes in FasterCSV#<<(), by detecting headers when writing.
|
11
|
+
* Added limited support for switching the quote character.
|
12
|
+
* Refining Data and DateTime matching for built-in convertors.
|
13
|
+
|
5
14
|
== 1.2.0
|
6
15
|
|
7
16
|
* Added the FasterCSV::table() shortcut.
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= License Terms
|
2
2
|
|
3
|
-
Distributed under the user's choice of the GPL[http://www.gnu.org/
|
3
|
+
Distributed under the user's choice of the {GPL Version 2}[http://www.gnu.org/licenses/old-licenses/gpl-2.0.html] (see COPYING for details) or the
|
4
4
|
{Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
|
5
5
|
James Edward Gray II.
|
6
6
|
|
data/README
CHANGED
@@ -55,6 +55,16 @@ See FasterCSV for documentation.
|
|
55
55
|
|
56
56
|
See the INSTALL file for instructions.
|
57
57
|
|
58
|
+
== What is CSV, really?
|
59
|
+
|
60
|
+
FasterCSV maintains a pretty strict definition of CSV taken directly from {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one place and that is to make using this library easier. FasterCSV will parse all valid CSV.
|
61
|
+
|
62
|
+
What you don't want to do is feed FasterCSV invalid CSV. Because of the way the CSV format works, it's common for a parser to need to read until the end of the file to be sure a field is invalid. This eats a lot of time and memory.
|
63
|
+
|
64
|
+
Luckily, when working with invalid CSV, Ruby's built-in methods will almost always be superior in every way. For example, parsing non-quoted fields is as easy as:
|
65
|
+
|
66
|
+
data.split(",")
|
67
|
+
|
58
68
|
== Questions and/or Comments
|
59
69
|
|
60
70
|
Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
|
data/Rakefile
CHANGED
@@ -45,6 +45,8 @@ task :benchmark do
|
|
45
45
|
path = "test/test_data.csv"
|
46
46
|
sh %Q{time ruby -r csv -e } +
|
47
47
|
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
48
|
+
sh %Q{time ruby -r lightcsv -e } +
|
49
|
+
%Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
|
48
50
|
sh %Q{time ruby -r lib/faster_csv -e } +
|
49
51
|
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
50
52
|
end
|
data/lib/faster_csv.rb
CHANGED
@@ -75,7 +75,7 @@ require "stringio"
|
|
75
75
|
#
|
76
76
|
class FasterCSV
|
77
77
|
# The version of the installed library.
|
78
|
-
VERSION = "1.2.
|
78
|
+
VERSION = "1.2.1".freeze
|
79
79
|
|
80
80
|
#
|
81
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -712,6 +712,13 @@ class FasterCSV
|
|
712
712
|
#
|
713
713
|
FieldInfo = Struct.new(:index, :line, :header)
|
714
714
|
|
715
|
+
# A Regexp used to find and convert some common Date formats.
|
716
|
+
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
717
|
+
\d{4}-\d{2}-\d{2} )\z /x
|
718
|
+
# A Regexp used to find and convert some common DateTime formats.
|
719
|
+
DateTimeMatcher =
|
720
|
+
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
721
|
+
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
|
715
722
|
#
|
716
723
|
# This Hash holds the built-in converters of FasterCSV that can be accessed by
|
717
724
|
# name. You can select Converters with FasterCSV.convert() or through the
|
@@ -732,12 +739,16 @@ class FasterCSV
|
|
732
739
|
# To add a combo field, the value should be an Array of names. Combo fields
|
733
740
|
# can be nested with other combo fields.
|
734
741
|
#
|
735
|
-
Converters
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
742
|
+
Converters = { :integer => lambda { |f| Integer(f) rescue f },
|
743
|
+
:float => lambda { |f| Float(f) rescue f },
|
744
|
+
:numeric => [:integer, :float],
|
745
|
+
:date => lambda { |f|
|
746
|
+
f =~ DateMatcher ? (Date.parse(f) rescue f) : f
|
747
|
+
},
|
748
|
+
:date_time => lambda { |f|
|
749
|
+
f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
|
750
|
+
},
|
751
|
+
:all => [:date_time, :numeric] }
|
741
752
|
|
742
753
|
#
|
743
754
|
# This Hash holds the built-in header converters of FasterCSV that can be
|
@@ -768,6 +779,7 @@ class FasterCSV
|
|
768
779
|
#
|
769
780
|
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
770
781
|
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
782
|
+
# <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
|
771
783
|
# <b><tt>:converters</tt></b>:: +nil+
|
772
784
|
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
773
785
|
# <b><tt>:headers</tt></b>:: +false+
|
@@ -778,6 +790,7 @@ class FasterCSV
|
|
778
790
|
#
|
779
791
|
DEFAULT_OPTIONS = { :col_sep => ",",
|
780
792
|
:row_sep => :auto,
|
793
|
+
:quote_char => '"',
|
781
794
|
:converters => nil,
|
782
795
|
:unconverted_fields => nil,
|
783
796
|
:headers => false,
|
@@ -1267,6 +1280,15 @@ class FasterCSV
|
|
1267
1280
|
# (<tt>$/</tt>) is used. Obviously,
|
1268
1281
|
# discovery takes a little time. Set
|
1269
1282
|
# manually if speed is important.
|
1283
|
+
# <b><tt>:quote_char</tt></b>:: The character used to quote fields.
|
1284
|
+
# This has to be a single character
|
1285
|
+
# String. This is useful for
|
1286
|
+
# application that incorrectly use
|
1287
|
+
# <tt>'</tt> as the quote character
|
1288
|
+
# instead of the correct <tt>"</tt>.
|
1289
|
+
# FasterCSV will always consider a
|
1290
|
+
# double sequence this character to be
|
1291
|
+
# an escaped quote.
|
1270
1292
|
# <b><tt>:converters</tt></b>:: An Array of names from the Converters
|
1271
1293
|
# Hash and/or lambdas that handle custom
|
1272
1294
|
# conversion. A single converter
|
@@ -1368,9 +1390,16 @@ class FasterCSV
|
|
1368
1390
|
# The data source must be open for writing.
|
1369
1391
|
#
|
1370
1392
|
def <<(row)
|
1371
|
-
#
|
1372
|
-
row =
|
1373
|
-
|
1393
|
+
# Handle FasterCSV::Row objects and Hashes
|
1394
|
+
row = case row
|
1395
|
+
when self.class::Row then row.fields
|
1396
|
+
when Hash then @headers.map { |header| row[header] }
|
1397
|
+
else row
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
@headers = row if header_row?
|
1401
|
+
@lineno += 1
|
1402
|
+
|
1374
1403
|
@io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
|
1375
1404
|
|
1376
1405
|
self # for chaining
|
@@ -1533,7 +1562,7 @@ class FasterCSV
|
|
1533
1562
|
end
|
1534
1563
|
end
|
1535
1564
|
else # we found a quoted field...
|
1536
|
-
$1.gsub(
|
1565
|
+
$1.gsub(@quote_char * 2, @quote_char) # unescape contents
|
1537
1566
|
end
|
1538
1567
|
"" # gsub!'s replacement, clear the field
|
1539
1568
|
end
|
@@ -1582,8 +1611,13 @@ class FasterCSV
|
|
1582
1611
|
#
|
1583
1612
|
def init_separators(options)
|
1584
1613
|
# store the selected separators
|
1585
|
-
@col_sep
|
1586
|
-
@row_sep
|
1614
|
+
@col_sep = options.delete(:col_sep)
|
1615
|
+
@row_sep = options.delete(:row_sep)
|
1616
|
+
@quote_char = options.delete(:quote_char)
|
1617
|
+
|
1618
|
+
if @quote_char.length != 1
|
1619
|
+
raise ArgumentError, ":quote_char has to be a single character String"
|
1620
|
+
end
|
1587
1621
|
|
1588
1622
|
# automatically discover row separator when requested
|
1589
1623
|
if @row_sep == :auto
|
@@ -1620,8 +1654,13 @@ class FasterCSV
|
|
1620
1654
|
end
|
1621
1655
|
|
1622
1656
|
# establish quoting rules
|
1657
|
+
do_quote = lambda do |field|
|
1658
|
+
@quote_char +
|
1659
|
+
String(field).gsub(@quote_char, @quote_char * 2) +
|
1660
|
+
@quote_char
|
1661
|
+
end
|
1623
1662
|
@quote = if options.delete(:force_quotes)
|
1624
|
-
|
1663
|
+
do_quote
|
1625
1664
|
else
|
1626
1665
|
lambda do |field|
|
1627
1666
|
if field.nil? # represent +nil+ fields as empty unquoted fields
|
@@ -1629,8 +1668,9 @@ class FasterCSV
|
|
1629
1668
|
else
|
1630
1669
|
field = String(field) # Stringify fields
|
1631
1670
|
# represent empty fields as empty quoted fields
|
1632
|
-
if field.empty? or
|
1633
|
-
|
1671
|
+
if field.empty? or
|
1672
|
+
field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
|
1673
|
+
do_quote.call(field)
|
1634
1674
|
else
|
1635
1675
|
field # unquoted field
|
1636
1676
|
end
|
@@ -1645,19 +1685,24 @@ class FasterCSV
|
|
1645
1685
|
@skip_blanks = options.delete(:skip_blanks)
|
1646
1686
|
|
1647
1687
|
# prebuild Regexps for faster parsing
|
1648
|
-
|
1688
|
+
esc_col_sep = Regexp.escape(@col_sep)
|
1689
|
+
esc_row_sep = Regexp.escape(@row_sep)
|
1690
|
+
esc_quote = Regexp.escape(@quote_char)
|
1691
|
+
@parsers = {
|
1649
1692
|
:leading_fields =>
|
1650
|
-
/\A(?:#{
|
1693
|
+
/\A(?:#{esc_col_sep})+/, # for empty leading fields
|
1651
1694
|
:csv_row =>
|
1652
1695
|
### The Primary Parser ###
|
1653
|
-
/ \G(?:^|#{
|
1654
|
-
(?:
|
1696
|
+
/ \G(?:^|#{esc_col_sep}) # anchor the match
|
1697
|
+
(?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
|
1698
|
+
(?> #{esc_quote*2}
|
1699
|
+
[^#{esc_quote}]* )* )#{esc_quote}
|
1655
1700
|
| # ... or ...
|
1656
|
-
([
|
1701
|
+
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1657
1702
|
)/x,
|
1658
1703
|
### End Primary Parser ###
|
1659
1704
|
:line_end =>
|
1660
|
-
/#{
|
1705
|
+
/#{esc_row_sep}\z/ # safer than chomp!()
|
1661
1706
|
}
|
1662
1707
|
end
|
1663
1708
|
|
data/test/tc_data_converters.rb
CHANGED
@@ -45,7 +45,10 @@ class TestDataConverters < Test::Unit::TestCase
|
|
45
45
|
|
46
46
|
def test_builtin_date_converter
|
47
47
|
# does convert
|
48
|
-
assert_instance_of(
|
48
|
+
assert_instance_of(
|
49
|
+
Date,
|
50
|
+
FasterCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
|
51
|
+
)
|
49
52
|
|
50
53
|
# does not convert
|
51
54
|
assert_instance_of(String, FasterCSV::Converters[:date]["junk"])
|
data/test/tc_features.rb
CHANGED
@@ -59,6 +59,14 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
|
|
59
59
|
:row_sep => "\r\n") )
|
60
60
|
end
|
61
61
|
|
62
|
+
def test_quote_char
|
63
|
+
TEST_CASES.each do |test_case|
|
64
|
+
assert_equal( test_case.last.map { |t| t.tr('"', "'") unless t.nil? },
|
65
|
+
FasterCSV.parse_line( test_case.first.tr('"', "'"),
|
66
|
+
:quote_char => "'" ) )
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
62
70
|
def test_row_sep_auto_discovery
|
63
71
|
["\r\n", "\n", "\r"].each do |line_end|
|
64
72
|
data = "1,2,3#{line_end}4,5#{line_end}"
|
data/test/tc_interface.rb
CHANGED
@@ -89,7 +89,7 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
89
89
|
end
|
90
90
|
assert_equal(@expected, data)
|
91
91
|
end
|
92
|
-
|
92
|
+
|
93
93
|
def test_table
|
94
94
|
table = FasterCSV.table(@path, :col_sep => "\t", :row_sep => "\r\n")
|
95
95
|
assert_instance_of(FasterCSV::Table, table)
|
@@ -134,6 +134,44 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
134
134
|
assert_instance_of(String, line)
|
135
135
|
assert_equal("1;2;3\n", line)
|
136
136
|
end
|
137
|
+
|
138
|
+
def test_write_header_detection
|
139
|
+
File.unlink(@path)
|
140
|
+
|
141
|
+
headers = %w{a b c}
|
142
|
+
FasterCSV.open(@path, "w", :headers => true) do |csv|
|
143
|
+
csv << headers
|
144
|
+
csv << %w{1 2 3}
|
145
|
+
assert_equal(headers, csv.instance_variable_get(:@headers))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_write_lineno
|
150
|
+
File.unlink(@path)
|
151
|
+
|
152
|
+
FasterCSV.open(@path, "w") do |csv|
|
153
|
+
lines = 20
|
154
|
+
lines.times { csv << %w{a b c} }
|
155
|
+
assert_equal(lines, csv.lineno)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_write_hash
|
160
|
+
File.unlink(@path)
|
161
|
+
|
162
|
+
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
163
|
+
FasterCSV.open( @path, "w", :headers => true,
|
164
|
+
:converters => :all,
|
165
|
+
:header_converters => :symbol ) do |csv|
|
166
|
+
csv << lines.first.keys
|
167
|
+
lines.each { |line| csv << line }
|
168
|
+
end
|
169
|
+
FasterCSV.open( @path, "w", :headers => true,
|
170
|
+
:converters => :all,
|
171
|
+
:header_converters => :symbol ) do |csv|
|
172
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
173
|
+
end
|
174
|
+
end
|
137
175
|
|
138
176
|
def test_append # aliased add_row() and puts()
|
139
177
|
File.unlink(@path)
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: fastercsv
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.2.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.2.1
|
7
|
+
date: 2007-09-20 00:00:00 -05:00
|
8
8
|
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
9
9
|
require_paths:
|
10
10
|
- lib
|