fastercsv 1.2.0 → 1.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/LICENSE +1 -1
- data/README +10 -0
- data/Rakefile +2 -0
- data/lib/faster_csv.rb +67 -22
- data/test/tc_data_converters.rb +4 -1
- data/test/tc_features.rb +8 -0
- data/test/tc_interface.rb +39 -1
- metadata +3 -3
data/CHANGELOG
CHANGED
@@ -2,6 +2,15 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.2.1
|
6
|
+
|
7
|
+
* Worked around an odd incompatibility with the Regexps used to remove line
|
8
|
+
endings in some (seemingly rare) Ruby environments.
|
9
|
+
* Made FasterCSV::lineno() writer aware.
|
10
|
+
* Support Hashes in FasterCSV#<<(), by detecting headers when writing.
|
11
|
+
* Added limited support for switching the quote character.
|
12
|
+
* Refining Data and DateTime matching for built-in convertors.
|
13
|
+
|
5
14
|
== 1.2.0
|
6
15
|
|
7
16
|
* Added the FasterCSV::table() shortcut.
|
data/LICENSE
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= License Terms
|
2
2
|
|
3
|
-
Distributed under the user's choice of the GPL[http://www.gnu.org/
|
3
|
+
Distributed under the user's choice of the {GPL Version 2}[http://www.gnu.org/licenses/old-licenses/gpl-2.0.html] (see COPYING for details) or the
|
4
4
|
{Ruby software license}[http://www.ruby-lang.org/en/LICENSE.txt] by
|
5
5
|
James Edward Gray II.
|
6
6
|
|
data/README
CHANGED
@@ -55,6 +55,16 @@ See FasterCSV for documentation.
|
|
55
55
|
|
56
56
|
See the INSTALL file for instructions.
|
57
57
|
|
58
|
+
== What is CSV, really?
|
59
|
+
|
60
|
+
FasterCSV maintains a pretty strict definition of CSV taken directly from {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one place and that is to make using this library easier. FasterCSV will parse all valid CSV.
|
61
|
+
|
62
|
+
What you don't want to do is feed FasterCSV invalid CSV. Because of the way the CSV format works, it's common for a parser to need to read until the end of the file to be sure a field is invalid. This eats a lot of time and memory.
|
63
|
+
|
64
|
+
Luckily, when working with invalid CSV, Ruby's built-in methods will almost always be superior in every way. For example, parsing non-quoted fields is as easy as:
|
65
|
+
|
66
|
+
data.split(",")
|
67
|
+
|
58
68
|
== Questions and/or Comments
|
59
69
|
|
60
70
|
Feel free to email {James Edward Gray II}[mailto:james@grayproductions.net] with
|
data/Rakefile
CHANGED
@@ -45,6 +45,8 @@ task :benchmark do
|
|
45
45
|
path = "test/test_data.csv"
|
46
46
|
sh %Q{time ruby -r csv -e } +
|
47
47
|
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
48
|
+
sh %Q{time ruby -r lightcsv -e } +
|
49
|
+
%Q{'#{TESTS}.times { LightCsv.foreach("#{path}") { |row| } }'}
|
48
50
|
sh %Q{time ruby -r lib/faster_csv -e } +
|
49
51
|
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
50
52
|
end
|
data/lib/faster_csv.rb
CHANGED
@@ -75,7 +75,7 @@ require "stringio"
|
|
75
75
|
#
|
76
76
|
class FasterCSV
|
77
77
|
# The version of the installed library.
|
78
|
-
VERSION = "1.2.
|
78
|
+
VERSION = "1.2.1".freeze
|
79
79
|
|
80
80
|
#
|
81
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -712,6 +712,13 @@ class FasterCSV
|
|
712
712
|
#
|
713
713
|
FieldInfo = Struct.new(:index, :line, :header)
|
714
714
|
|
715
|
+
# A Regexp used to find and convert some common Date formats.
|
716
|
+
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
717
|
+
\d{4}-\d{2}-\d{2} )\z /x
|
718
|
+
# A Regexp used to find and convert some common DateTime formats.
|
719
|
+
DateTimeMatcher =
|
720
|
+
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
721
|
+
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
|
715
722
|
#
|
716
723
|
# This Hash holds the built-in converters of FasterCSV that can be accessed by
|
717
724
|
# name. You can select Converters with FasterCSV.convert() or through the
|
@@ -732,12 +739,16 @@ class FasterCSV
|
|
732
739
|
# To add a combo field, the value should be an Array of names. Combo fields
|
733
740
|
# can be nested with other combo fields.
|
734
741
|
#
|
735
|
-
Converters
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
742
|
+
Converters = { :integer => lambda { |f| Integer(f) rescue f },
|
743
|
+
:float => lambda { |f| Float(f) rescue f },
|
744
|
+
:numeric => [:integer, :float],
|
745
|
+
:date => lambda { |f|
|
746
|
+
f =~ DateMatcher ? (Date.parse(f) rescue f) : f
|
747
|
+
},
|
748
|
+
:date_time => lambda { |f|
|
749
|
+
f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
|
750
|
+
},
|
751
|
+
:all => [:date_time, :numeric] }
|
741
752
|
|
742
753
|
#
|
743
754
|
# This Hash holds the built-in header converters of FasterCSV that can be
|
@@ -768,6 +779,7 @@ class FasterCSV
|
|
768
779
|
#
|
769
780
|
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
770
781
|
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
782
|
+
# <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
|
771
783
|
# <b><tt>:converters</tt></b>:: +nil+
|
772
784
|
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
773
785
|
# <b><tt>:headers</tt></b>:: +false+
|
@@ -778,6 +790,7 @@ class FasterCSV
|
|
778
790
|
#
|
779
791
|
DEFAULT_OPTIONS = { :col_sep => ",",
|
780
792
|
:row_sep => :auto,
|
793
|
+
:quote_char => '"',
|
781
794
|
:converters => nil,
|
782
795
|
:unconverted_fields => nil,
|
783
796
|
:headers => false,
|
@@ -1267,6 +1280,15 @@ class FasterCSV
|
|
1267
1280
|
# (<tt>$/</tt>) is used. Obviously,
|
1268
1281
|
# discovery takes a little time. Set
|
1269
1282
|
# manually if speed is important.
|
1283
|
+
# <b><tt>:quote_char</tt></b>:: The character used to quote fields.
|
1284
|
+
# This has to be a single character
|
1285
|
+
# String. This is useful for
|
1286
|
+
# application that incorrectly use
|
1287
|
+
# <tt>'</tt> as the quote character
|
1288
|
+
# instead of the correct <tt>"</tt>.
|
1289
|
+
# FasterCSV will always consider a
|
1290
|
+
# double sequence this character to be
|
1291
|
+
# an escaped quote.
|
1270
1292
|
# <b><tt>:converters</tt></b>:: An Array of names from the Converters
|
1271
1293
|
# Hash and/or lambdas that handle custom
|
1272
1294
|
# conversion. A single converter
|
@@ -1368,9 +1390,16 @@ class FasterCSV
|
|
1368
1390
|
# The data source must be open for writing.
|
1369
1391
|
#
|
1370
1392
|
def <<(row)
|
1371
|
-
#
|
1372
|
-
row =
|
1373
|
-
|
1393
|
+
# Handle FasterCSV::Row objects and Hashes
|
1394
|
+
row = case row
|
1395
|
+
when self.class::Row then row.fields
|
1396
|
+
when Hash then @headers.map { |header| row[header] }
|
1397
|
+
else row
|
1398
|
+
end
|
1399
|
+
|
1400
|
+
@headers = row if header_row?
|
1401
|
+
@lineno += 1
|
1402
|
+
|
1374
1403
|
@io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
|
1375
1404
|
|
1376
1405
|
self # for chaining
|
@@ -1533,7 +1562,7 @@ class FasterCSV
|
|
1533
1562
|
end
|
1534
1563
|
end
|
1535
1564
|
else # we found a quoted field...
|
1536
|
-
$1.gsub(
|
1565
|
+
$1.gsub(@quote_char * 2, @quote_char) # unescape contents
|
1537
1566
|
end
|
1538
1567
|
"" # gsub!'s replacement, clear the field
|
1539
1568
|
end
|
@@ -1582,8 +1611,13 @@ class FasterCSV
|
|
1582
1611
|
#
|
1583
1612
|
def init_separators(options)
|
1584
1613
|
# store the selected separators
|
1585
|
-
@col_sep
|
1586
|
-
@row_sep
|
1614
|
+
@col_sep = options.delete(:col_sep)
|
1615
|
+
@row_sep = options.delete(:row_sep)
|
1616
|
+
@quote_char = options.delete(:quote_char)
|
1617
|
+
|
1618
|
+
if @quote_char.length != 1
|
1619
|
+
raise ArgumentError, ":quote_char has to be a single character String"
|
1620
|
+
end
|
1587
1621
|
|
1588
1622
|
# automatically discover row separator when requested
|
1589
1623
|
if @row_sep == :auto
|
@@ -1620,8 +1654,13 @@ class FasterCSV
|
|
1620
1654
|
end
|
1621
1655
|
|
1622
1656
|
# establish quoting rules
|
1657
|
+
do_quote = lambda do |field|
|
1658
|
+
@quote_char +
|
1659
|
+
String(field).gsub(@quote_char, @quote_char * 2) +
|
1660
|
+
@quote_char
|
1661
|
+
end
|
1623
1662
|
@quote = if options.delete(:force_quotes)
|
1624
|
-
|
1663
|
+
do_quote
|
1625
1664
|
else
|
1626
1665
|
lambda do |field|
|
1627
1666
|
if field.nil? # represent +nil+ fields as empty unquoted fields
|
@@ -1629,8 +1668,9 @@ class FasterCSV
|
|
1629
1668
|
else
|
1630
1669
|
field = String(field) # Stringify fields
|
1631
1670
|
# represent empty fields as empty quoted fields
|
1632
|
-
if field.empty? or
|
1633
|
-
|
1671
|
+
if field.empty? or
|
1672
|
+
field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
|
1673
|
+
do_quote.call(field)
|
1634
1674
|
else
|
1635
1675
|
field # unquoted field
|
1636
1676
|
end
|
@@ -1645,19 +1685,24 @@ class FasterCSV
|
|
1645
1685
|
@skip_blanks = options.delete(:skip_blanks)
|
1646
1686
|
|
1647
1687
|
# prebuild Regexps for faster parsing
|
1648
|
-
|
1688
|
+
esc_col_sep = Regexp.escape(@col_sep)
|
1689
|
+
esc_row_sep = Regexp.escape(@row_sep)
|
1690
|
+
esc_quote = Regexp.escape(@quote_char)
|
1691
|
+
@parsers = {
|
1649
1692
|
:leading_fields =>
|
1650
|
-
/\A(?:#{
|
1693
|
+
/\A(?:#{esc_col_sep})+/, # for empty leading fields
|
1651
1694
|
:csv_row =>
|
1652
1695
|
### The Primary Parser ###
|
1653
|
-
/ \G(?:^|#{
|
1654
|
-
(?:
|
1696
|
+
/ \G(?:^|#{esc_col_sep}) # anchor the match
|
1697
|
+
(?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
|
1698
|
+
(?> #{esc_quote*2}
|
1699
|
+
[^#{esc_quote}]* )* )#{esc_quote}
|
1655
1700
|
| # ... or ...
|
1656
|
-
([
|
1701
|
+
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1657
1702
|
)/x,
|
1658
1703
|
### End Primary Parser ###
|
1659
1704
|
:line_end =>
|
1660
|
-
/#{
|
1705
|
+
/#{esc_row_sep}\z/ # safer than chomp!()
|
1661
1706
|
}
|
1662
1707
|
end
|
1663
1708
|
|
data/test/tc_data_converters.rb
CHANGED
@@ -45,7 +45,10 @@ class TestDataConverters < Test::Unit::TestCase
|
|
45
45
|
|
46
46
|
def test_builtin_date_converter
|
47
47
|
# does convert
|
48
|
-
assert_instance_of(
|
48
|
+
assert_instance_of(
|
49
|
+
Date,
|
50
|
+
FasterCSV::Converters[:date][@win_safe_time_str.sub(/\d+:\d+:\d+ /, "")]
|
51
|
+
)
|
49
52
|
|
50
53
|
# does not convert
|
51
54
|
assert_instance_of(String, FasterCSV::Converters[:date]["junk"])
|
data/test/tc_features.rb
CHANGED
@@ -59,6 +59,14 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
|
|
59
59
|
:row_sep => "\r\n") )
|
60
60
|
end
|
61
61
|
|
62
|
+
def test_quote_char
|
63
|
+
TEST_CASES.each do |test_case|
|
64
|
+
assert_equal( test_case.last.map { |t| t.tr('"', "'") unless t.nil? },
|
65
|
+
FasterCSV.parse_line( test_case.first.tr('"', "'"),
|
66
|
+
:quote_char => "'" ) )
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
62
70
|
def test_row_sep_auto_discovery
|
63
71
|
["\r\n", "\n", "\r"].each do |line_end|
|
64
72
|
data = "1,2,3#{line_end}4,5#{line_end}"
|
data/test/tc_interface.rb
CHANGED
@@ -89,7 +89,7 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
89
89
|
end
|
90
90
|
assert_equal(@expected, data)
|
91
91
|
end
|
92
|
-
|
92
|
+
|
93
93
|
def test_table
|
94
94
|
table = FasterCSV.table(@path, :col_sep => "\t", :row_sep => "\r\n")
|
95
95
|
assert_instance_of(FasterCSV::Table, table)
|
@@ -134,6 +134,44 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
134
134
|
assert_instance_of(String, line)
|
135
135
|
assert_equal("1;2;3\n", line)
|
136
136
|
end
|
137
|
+
|
138
|
+
def test_write_header_detection
|
139
|
+
File.unlink(@path)
|
140
|
+
|
141
|
+
headers = %w{a b c}
|
142
|
+
FasterCSV.open(@path, "w", :headers => true) do |csv|
|
143
|
+
csv << headers
|
144
|
+
csv << %w{1 2 3}
|
145
|
+
assert_equal(headers, csv.instance_variable_get(:@headers))
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
def test_write_lineno
|
150
|
+
File.unlink(@path)
|
151
|
+
|
152
|
+
FasterCSV.open(@path, "w") do |csv|
|
153
|
+
lines = 20
|
154
|
+
lines.times { csv << %w{a b c} }
|
155
|
+
assert_equal(lines, csv.lineno)
|
156
|
+
end
|
157
|
+
end
|
158
|
+
|
159
|
+
def test_write_hash
|
160
|
+
File.unlink(@path)
|
161
|
+
|
162
|
+
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
163
|
+
FasterCSV.open( @path, "w", :headers => true,
|
164
|
+
:converters => :all,
|
165
|
+
:header_converters => :symbol ) do |csv|
|
166
|
+
csv << lines.first.keys
|
167
|
+
lines.each { |line| csv << line }
|
168
|
+
end
|
169
|
+
FasterCSV.open( @path, "w", :headers => true,
|
170
|
+
:converters => :all,
|
171
|
+
:header_converters => :symbol ) do |csv|
|
172
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
173
|
+
end
|
174
|
+
end
|
137
175
|
|
138
176
|
def test_append # aliased add_row() and puts()
|
139
177
|
File.unlink(@path)
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: fastercsv
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.2.
|
7
|
-
date: 2007-
|
6
|
+
version: 1.2.1
|
7
|
+
date: 2007-09-20 00:00:00 -05:00
|
8
8
|
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
9
9
|
require_paths:
|
10
10
|
- lib
|