fastercsv 1.2.3 → 1.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +16 -0
- data/lib/faster_csv.rb +142 -33
- data/test/tc_csv_parsing.rb +1 -1
- data/test/tc_encodings.rb +23 -0
- data/test/tc_features.rb +29 -0
- data/test/tc_headers.rb +15 -0
- data/test/tc_interface.rb +69 -1
- data/test/tc_row.rb +17 -0
- data/test/tc_speed.rb +27 -1
- data/test/tc_table.rb +9 -0
- data/test/ts_all.rb +1 -0
- metadata +48 -40
data/CHANGELOG
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.4.0
|
6
|
+
|
7
|
+
* Added encoding support patch from Michael Reinsch.
|
8
|
+
* Improved inspect() messages for better IRb support.
|
9
|
+
* Fixed header writing bug reported by Dov Murik.
|
10
|
+
* Use custom separators in parsing header Strings as suggested by Shmulik Regev.
|
11
|
+
* Added a <tt>:write_headers</tt> option for outputting headers.
|
12
|
+
* Handle open() calls in binary mode whenever we can to workaround a Windows
|
13
|
+
issue where line-ending translation can cause an off-by-one error in seeking
|
14
|
+
back to a non-zero starting position after auto-discovery for
|
15
|
+
<tt>:row_sep</tt> as suggested by Robert Battle.
|
16
|
+
* Improved the parser to fail faster when fed some forms of invalid CSV that can
|
17
|
+
be detected without reading ahead.
|
18
|
+
* Added a <tt>:field_size_limit</tt> option to control FasterCSV's lookahead and
|
19
|
+
prevent the parser from biting off more data than it can chew.
|
20
|
+
|
5
21
|
== 1.2.3
|
6
22
|
|
7
23
|
* Default to the system line ending when passed a GzipWriter object to wrap.
|
data/lib/faster_csv.rb
CHANGED
@@ -75,7 +75,7 @@ require "stringio"
|
|
75
75
|
#
|
76
76
|
class FasterCSV
|
77
77
|
# The version of the installed library.
|
78
|
-
VERSION = "1.
|
78
|
+
VERSION = "1.4.0".freeze
|
79
79
|
|
80
80
|
#
|
81
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -363,6 +363,16 @@ class FasterCSV
|
|
363
363
|
fields.to_csv(options)
|
364
364
|
end
|
365
365
|
alias_method :to_s, :to_csv
|
366
|
+
|
367
|
+
# A summary of fields, by header.
|
368
|
+
def inspect
|
369
|
+
str = "#<#{self.class}"
|
370
|
+
each do |header, field|
|
371
|
+
str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
|
372
|
+
field.inspect
|
373
|
+
end
|
374
|
+
str << ">"
|
375
|
+
end
|
366
376
|
end
|
367
377
|
|
368
378
|
#
|
@@ -695,6 +705,10 @@ class FasterCSV
|
|
695
705
|
end.join
|
696
706
|
end
|
697
707
|
alias_method :to_s, :to_csv
|
708
|
+
|
709
|
+
def inspect
|
710
|
+
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
711
|
+
end
|
698
712
|
end
|
699
713
|
|
700
714
|
# The error thrown when the parser encounters illegal CSV formatting.
|
@@ -999,7 +1013,7 @@ class FasterCSV
|
|
999
1013
|
# The +options+ parameter can be anything FasterCSV::new() understands.
|
1000
1014
|
#
|
1001
1015
|
def self.foreach(path, options = Hash.new, &block)
|
1002
|
-
open(path, options) do |csv|
|
1016
|
+
open(path, "rb", options) do |csv|
|
1003
1017
|
csv.each(&block)
|
1004
1018
|
end
|
1005
1019
|
end
|
@@ -1120,8 +1134,8 @@ class FasterCSV
|
|
1120
1134
|
|
1121
1135
|
#
|
1122
1136
|
# :call-seq:
|
1123
|
-
# open( filename, mode="
|
1124
|
-
# open( filename, mode="
|
1137
|
+
# open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
|
1138
|
+
# open( filename, mode="rb", options = Hash.new )
|
1125
1139
|
#
|
1126
1140
|
# This method opens an IO object, and wraps that with FasterCSV. This is
|
1127
1141
|
# intended as the primary interface for writing a CSV file.
|
@@ -1166,6 +1180,8 @@ class FasterCSV
|
|
1166
1180
|
def self.open(*args)
|
1167
1181
|
# find the +options+ Hash
|
1168
1182
|
options = if args.last.is_a? Hash then args.pop else Hash.new end
|
1183
|
+
# default to a binary open mode
|
1184
|
+
args << "rb" if args.size == 1
|
1169
1185
|
# wrap a File opened with the remaining +args+
|
1170
1186
|
csv = new(File.open(*args), options)
|
1171
1187
|
|
@@ -1222,7 +1238,7 @@ class FasterCSV
|
|
1222
1238
|
# file and any +options+ FasterCSV::new() understands.
|
1223
1239
|
#
|
1224
1240
|
def self.read(path, options = Hash.new)
|
1225
|
-
open(path, options) { |csv| csv.read }
|
1241
|
+
open(path, "rb", options) { |csv| csv.read }
|
1226
1242
|
end
|
1227
1243
|
|
1228
1244
|
# Alias for FasterCSV::read().
|
@@ -1279,7 +1295,14 @@ class FasterCSV
|
|
1279
1295
|
# <tt>$INPUT_RECORD_SEPARATOR</tt>
|
1280
1296
|
# (<tt>$/</tt>) is used. Obviously,
|
1281
1297
|
# discovery takes a little time. Set
|
1282
|
-
# manually if speed is important.
|
1298
|
+
# manually if speed is important. Also
|
1299
|
+
# note that IO objects should be opened
|
1300
|
+
# in binary mode on Windows if this
|
1301
|
+
# feature will be used as the
|
1302
|
+
# line-ending translation can cause
|
1303
|
+
# problems with resetting the document
|
1304
|
+
# position to where it was before the
|
1305
|
+
# read ahead.
|
1283
1306
|
# <b><tt>:quote_char</tt></b>:: The character used to quote fields.
|
1284
1307
|
# This has to be a single character
|
1285
1308
|
# String. This is useful for
|
@@ -1289,6 +1312,27 @@ class FasterCSV
|
|
1289
1312
|
# FasterCSV will always consider a
|
1290
1313
|
# double sequence this character to be
|
1291
1314
|
# an escaped quote.
|
1315
|
+
# <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
|
1316
|
+
# file. Defaults to your <tt>$KDOCE</tt>
|
1317
|
+
# setting. Valid values: <tt>`n’</tt> or
|
1318
|
+
# <tt>`N’</tt> for none, <tt>`e’</tt> or
|
1319
|
+
# <tt>`E’</tt> for EUC, <tt>`s’</tt> or
|
1320
|
+
# <tt>`S’</tt> for SJIS, and
|
1321
|
+
# <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
|
1322
|
+
# (see Regexp.new()).
|
1323
|
+
# <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
|
1324
|
+
# read ahead looking for the closing
|
1325
|
+
# quote for a field. (In truth, it
|
1326
|
+
# reads to the first line ending beyond
|
1327
|
+
# this size.) If a quote cannot be
|
1328
|
+
# found within the limit FasterCSV will
|
1329
|
+
# raise a MalformedCSVError, assuming
|
1330
|
+
# the data is faulty. You can use this
|
1331
|
+
# limit to prevent what are effectively
|
1332
|
+
# DoS attacks on the parser. However,
|
1333
|
+
# this limit can cause a legitimate
|
1334
|
+
# parse to fail and thus is set to
|
1335
|
+
# +nil+, or off, by default.
|
1292
1336
|
# <b><tt>:converters</tt></b>:: An Array of names from the Converters
|
1293
1337
|
# Hash and/or lambdas that handle custom
|
1294
1338
|
# conversion. A single converter
|
@@ -1309,8 +1353,11 @@ class FasterCSV
|
|
1309
1353
|
# contents will be used as the headers.
|
1310
1354
|
# If set to a String, the String is run
|
1311
1355
|
# through a call of
|
1312
|
-
# FasterCSV::parse_line()
|
1313
|
-
#
|
1356
|
+
# FasterCSV::parse_line() with the same
|
1357
|
+
# <tt>:col_sep</tt>, <tt>:row_sep</tt>,
|
1358
|
+
# and <tt>:quote_char</tt> as this
|
1359
|
+
# instance to produce an Array of
|
1360
|
+
# headers. This setting causes
|
1314
1361
|
# FasterCSV.shift() to return rows as
|
1315
1362
|
# FasterCSV::Row objects instead of
|
1316
1363
|
# Arrays and FasterCSV.read() to return
|
@@ -1322,6 +1369,9 @@ class FasterCSV
|
|
1322
1369
|
# object with identical headers and
|
1323
1370
|
# fields (save that the fields do not go
|
1324
1371
|
# through the converters).
|
1372
|
+
# <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
|
1373
|
+
# set, a header row will be added to the
|
1374
|
+
# output.
|
1325
1375
|
# <b><tt>:header_converters</tt></b>:: Identical in functionality to
|
1326
1376
|
# <tt>:converters</tt> save that the
|
1327
1377
|
# conversions are only made to header
|
@@ -1390,12 +1440,18 @@ class FasterCSV
|
|
1390
1440
|
# The data source must be open for writing.
|
1391
1441
|
#
|
1392
1442
|
def <<(row)
|
1443
|
+
# make sure headers have been assigned
|
1444
|
+
if header_row? and [Array, String].include? @use_headers.class
|
1445
|
+
parse_headers # won't read data for Array or String
|
1446
|
+
self << @headers if @write_headers
|
1447
|
+
end
|
1448
|
+
|
1393
1449
|
# Handle FasterCSV::Row objects and Hashes
|
1394
1450
|
row = case row
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1451
|
+
when self.class::Row then row.fields
|
1452
|
+
when Hash then @headers.map { |header| row[header] }
|
1453
|
+
else row
|
1454
|
+
end
|
1399
1455
|
|
1400
1456
|
@headers = row if header_row?
|
1401
1457
|
@lineno += 1
|
@@ -1513,7 +1569,7 @@ class FasterCSV
|
|
1513
1569
|
# add another read to the line
|
1514
1570
|
line += @io.gets(@row_sep) rescue return nil
|
1515
1571
|
# copy the line so we can chop it up in parsing
|
1516
|
-
parse =
|
1572
|
+
parse = line.dup
|
1517
1573
|
parse.sub!(@parsers[:line_end], "")
|
1518
1574
|
|
1519
1575
|
#
|
@@ -1590,6 +1646,10 @@ class FasterCSV
|
|
1590
1646
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
1591
1647
|
if @io.eof?
|
1592
1648
|
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1649
|
+
elsif parse =~ @parsers[:bad_field]
|
1650
|
+
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1651
|
+
elsif @field_size_limit and parse.length >= @field_size_limit
|
1652
|
+
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
1593
1653
|
end
|
1594
1654
|
# otherwise, we need to loop and pull some more data to complete the row
|
1595
1655
|
end
|
@@ -1597,6 +1657,32 @@ class FasterCSV
|
|
1597
1657
|
alias_method :gets, :shift
|
1598
1658
|
alias_method :readline, :shift
|
1599
1659
|
|
1660
|
+
# Returns a simplified description of the key FasterCSV attributes.
|
1661
|
+
def inspect
|
1662
|
+
str = "<##{self.class} io_type:"
|
1663
|
+
# show type of wrapped IO
|
1664
|
+
if @io == $stdout then str << "$stdout"
|
1665
|
+
elsif @io == $stdin then str << "$stdin"
|
1666
|
+
elsif @io == $stderr then str << "$stderr"
|
1667
|
+
else str << @io.class.to_s
|
1668
|
+
end
|
1669
|
+
# show IO.path(), if available
|
1670
|
+
if @io.respond_to?(:path) and (p = @io.path)
|
1671
|
+
str << " io_path:#{p.inspect}"
|
1672
|
+
end
|
1673
|
+
# show other attributes
|
1674
|
+
%w[ lineno col_sep row_sep
|
1675
|
+
quote_char skip_blanks encoding ].each do |attr_name|
|
1676
|
+
if a = instance_variable_get("@#{attr_name}")
|
1677
|
+
str << " #{attr_name}:#{a.inspect}"
|
1678
|
+
end
|
1679
|
+
end
|
1680
|
+
if @use_headers
|
1681
|
+
str << " headers:#{(@headers || true).inspect}"
|
1682
|
+
end
|
1683
|
+
str << ">"
|
1684
|
+
end
|
1685
|
+
|
1600
1686
|
private
|
1601
1687
|
|
1602
1688
|
#
|
@@ -1690,27 +1776,42 @@ class FasterCSV
|
|
1690
1776
|
# Pre-compiles parsers and stores them by name for access during reads.
|
1691
1777
|
def init_parsers(options)
|
1692
1778
|
# store the parser behaviors
|
1693
|
-
@skip_blanks
|
1694
|
-
|
1779
|
+
@skip_blanks = options.delete(:skip_blanks)
|
1780
|
+
@encoding = options.delete(:encoding) # nil will use $KCODE
|
1781
|
+
@field_size_limit = options.delete(:field_size_limit)
|
1782
|
+
|
1695
1783
|
# prebuild Regexps for faster parsing
|
1696
1784
|
esc_col_sep = Regexp.escape(@col_sep)
|
1697
1785
|
esc_row_sep = Regexp.escape(@row_sep)
|
1698
1786
|
esc_quote = Regexp.escape(@quote_char)
|
1699
1787
|
@parsers = {
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
|
1704
|
-
|
1705
|
-
|
1706
|
-
|
1707
|
-
|
1708
|
-
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
1713
|
-
|
1788
|
+
# for empty leading fields
|
1789
|
+
:leading_fields => Regexp.new("\\A(?:#{esc_col_sep})+", nil, @encoding),
|
1790
|
+
# The Primary Parser
|
1791
|
+
:csv_row => Regexp.new(<<-END_PARSER, Regexp::EXTENDED, @encoding),
|
1792
|
+
\\G(?:\\A|#{esc_col_sep}) # anchor the match
|
1793
|
+
(?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
|
1794
|
+
(?> #{esc_quote*2}
|
1795
|
+
[^#{esc_quote}]* )* )#{esc_quote}
|
1796
|
+
| # ... or ...
|
1797
|
+
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1798
|
+
)
|
1799
|
+
(?=#{esc_col_sep}|\\z) # ensure we are at field's end
|
1800
|
+
END_PARSER
|
1801
|
+
# a test for unescaped quotes
|
1802
|
+
:bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
|
1803
|
+
\\A#{esc_col_sep}? # starts with an optional comma
|
1804
|
+
(?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
|
1805
|
+
(?> #{esc_quote*2}
|
1806
|
+
[^#{esc_quote}]* )*
|
1807
|
+
#{esc_quote}[^#{esc_quote}]
|
1808
|
+
| # ... or ...
|
1809
|
+
[^#{esc_quote}#{esc_col_sep}]+
|
1810
|
+
#{esc_quote} # unescaped quote
|
1811
|
+
)
|
1812
|
+
END_BAD
|
1813
|
+
# safer than chomp!()
|
1814
|
+
:line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
|
1714
1815
|
}
|
1715
1816
|
end
|
1716
1817
|
|
@@ -1757,6 +1858,7 @@ class FasterCSV
|
|
1757
1858
|
def init_headers(options)
|
1758
1859
|
@use_headers = options.delete(:headers)
|
1759
1860
|
@return_headers = options.delete(:return_headers)
|
1861
|
+
@write_headers = options.delete(:write_headers)
|
1760
1862
|
|
1761
1863
|
# headers must be delayed until shift(), in case they need a row of content
|
1762
1864
|
@headers = nil
|
@@ -1827,10 +1929,17 @@ class FasterCSV
|
|
1827
1929
|
def parse_headers(row = nil)
|
1828
1930
|
if @headers.nil? # header row
|
1829
1931
|
@headers = case @use_headers # save headers
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1932
|
+
# Array of headers
|
1933
|
+
when Array then @use_headers
|
1934
|
+
# CSV header String
|
1935
|
+
when String
|
1936
|
+
self.class.parse_line( @use_headers,
|
1937
|
+
:col_sep => @col_sep,
|
1938
|
+
:row_sep => @row_sep,
|
1939
|
+
:quote_char => @quote_char )
|
1940
|
+
# first row is headers
|
1941
|
+
else row
|
1942
|
+
end
|
1834
1943
|
|
1835
1944
|
# prepare converted and unconverted copies
|
1836
1945
|
row = @headers if row.nil?
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -158,7 +158,7 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
158
158
|
assert_send([csv.lineno, :<, 4])
|
159
159
|
end
|
160
160
|
rescue FasterCSV::MalformedCSVError
|
161
|
-
assert_equal("
|
161
|
+
assert_equal("Illegal quoting on line 4.", $!.message)
|
162
162
|
end
|
163
163
|
end
|
164
164
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# tc_encodings.rb
|
4
|
+
#
|
5
|
+
# Created by Michael Reinsch.
|
6
|
+
# Copyright (c) 2008 Ubiquitous Business Technology, Inc.
|
7
|
+
|
8
|
+
require "test/unit"
|
9
|
+
|
10
|
+
require "faster_csv"
|
11
|
+
|
12
|
+
class TestEncodings < Test::Unit::TestCase
|
13
|
+
def test_with_shift_jis_encoding
|
14
|
+
$KCODE = 'u' # make sure $KCODE != Shift_JIS
|
15
|
+
# this test data will not work with UTF-8 encoding
|
16
|
+
shift_jis_data = [ "82D082E782AA82C82094E0",
|
17
|
+
"82D082E7826082AA825C",
|
18
|
+
"82D082E7826082AA82C8" ].map { |f| [f].pack("H*") }
|
19
|
+
fields = FCSV.parse_line( shift_jis_data.map { |f| %Q{"#{f}"} }.join(","),
|
20
|
+
:encoding => "s" )
|
21
|
+
assert_equal(shift_jis_data, fields)
|
22
|
+
end
|
23
|
+
end
|
data/test/tc_features.rb
CHANGED
@@ -174,6 +174,35 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
|
|
174
174
|
File.unlink(file)
|
175
175
|
end
|
176
176
|
|
177
|
+
def test_inspect_is_smart_about_io_types
|
178
|
+
str = FasterCSV.new("string,data").inspect
|
179
|
+
assert(str.include?("io_type:StringIO"), "IO type not detected.")
|
180
|
+
|
181
|
+
str = FasterCSV.new($stderr).inspect
|
182
|
+
assert(str.include?("io_type:$stderr"), "IO type not detected.")
|
183
|
+
|
184
|
+
str = FasterCSV.open( File.join( File.dirname(__FILE__),
|
185
|
+
"test_data.csv" ) ) { |csv| csv.inspect }
|
186
|
+
assert(str.include?("io_type:File"), "IO type not detected.")
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_inspect_shows_key_attributes
|
190
|
+
str = @csv.inspect
|
191
|
+
%w[lineno col_sep row_sep quote_char].each do |attr_name|
|
192
|
+
assert_match(/\b#{attr_name}:[^\s>]+/, str)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_inspect_shows_headers_when_available
|
197
|
+
FasterCSV.open( File.join( File.dirname(__FILE__),
|
198
|
+
"test_data.csv" ),
|
199
|
+
:headers => true ) do |csv|
|
200
|
+
assert(csv.inspect.include?("headers:true"), "Header hint not shown.")
|
201
|
+
csv.shift # load headers
|
202
|
+
assert_match(/headers:\[[^\]]+\]/, csv.inspect)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
177
206
|
def test_version
|
178
207
|
assert_not_nil(FasterCSV::VERSION)
|
179
208
|
assert_instance_of(String, FasterCSV::VERSION)
|
data/test/tc_headers.rb
CHANGED
@@ -130,6 +130,21 @@ class TestFasterCSVHeaders < Test::Unit::TestCase
|
|
130
130
|
assert(!row.field_row?)
|
131
131
|
end
|
132
132
|
|
133
|
+
def test_csv_header_string_inherits_separators
|
134
|
+
# parse with custom col_sep
|
135
|
+
csv = nil
|
136
|
+
assert_nothing_raised(Exception) do
|
137
|
+
csv = FasterCSV.parse( @data.tr(",", "|"), :col_sep => "|",
|
138
|
+
:headers => "my|new|headers" )
|
139
|
+
end
|
140
|
+
|
141
|
+
# verify headers were recognized
|
142
|
+
row = csv[0]
|
143
|
+
assert_not_nil(row)
|
144
|
+
assert_instance_of(FasterCSV::Row, row)
|
145
|
+
assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a)
|
146
|
+
end
|
147
|
+
|
133
148
|
def test_return_headers
|
134
149
|
# activate headers and request they are returned
|
135
150
|
csv = nil
|
data/test/tc_interface.rb
CHANGED
@@ -161,7 +161,6 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
161
161
|
|
162
162
|
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
163
163
|
FasterCSV.open( @path, "w", :headers => true,
|
164
|
-
:converters => :all,
|
165
164
|
:header_converters => :symbol ) do |csv|
|
166
165
|
csv << lines.first.keys
|
167
166
|
lines.each { |line| csv << line }
|
@@ -172,6 +171,75 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
172
171
|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
173
172
|
end
|
174
173
|
end
|
174
|
+
|
175
|
+
def test_write_hash_with_headers_array
|
176
|
+
File.unlink(@path)
|
177
|
+
|
178
|
+
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
179
|
+
FasterCSV.open(@path, "w", :headers => [:b, :a, :c]) do |csv|
|
180
|
+
lines.each { |line| csv << line }
|
181
|
+
end
|
182
|
+
|
183
|
+
# test writing fields in the correct order
|
184
|
+
File.open(@path, "r") do |f|
|
185
|
+
assert_equal("2,1,3", f.gets.strip)
|
186
|
+
assert_equal("5,4,6", f.gets.strip)
|
187
|
+
end
|
188
|
+
|
189
|
+
# test reading CSV with headers
|
190
|
+
FasterCSV.open( @path, "r", :headers => [:b, :a, :c],
|
191
|
+
:converters => :all ) do |csv|
|
192
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_write_hash_with_headers_string
|
197
|
+
File.unlink(@path)
|
198
|
+
|
199
|
+
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
|
200
|
+
FasterCSV.open( @path, "w", :headers => "b|a|c",
|
201
|
+
:col_sep => "|" ) do |csv|
|
202
|
+
lines.each { |line| csv << line }
|
203
|
+
end
|
204
|
+
|
205
|
+
# test writing fields in the correct order
|
206
|
+
File.open(@path, "r") do |f|
|
207
|
+
assert_equal("2|1|3", f.gets.strip)
|
208
|
+
assert_equal("5|4|6", f.gets.strip)
|
209
|
+
end
|
210
|
+
|
211
|
+
# test reading CSV with headers
|
212
|
+
FasterCSV.open( @path, "r", :headers => "b|a|c",
|
213
|
+
:col_sep => "|",
|
214
|
+
:converters => :all ) do |csv|
|
215
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def test_write_headers
|
220
|
+
File.unlink(@path)
|
221
|
+
|
222
|
+
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
|
223
|
+
FasterCSV.open( @path, "w", :headers => "b|a|c",
|
224
|
+
:write_headers => true,
|
225
|
+
:col_sep => "|" ) do |csv|
|
226
|
+
lines.each { |line| csv << line }
|
227
|
+
end
|
228
|
+
|
229
|
+
# test writing fields in the correct order
|
230
|
+
File.open(@path, "r") do |f|
|
231
|
+
assert_equal("b|a|c", f.gets.strip)
|
232
|
+
assert_equal("2|1|3", f.gets.strip)
|
233
|
+
assert_equal("5|4|6", f.gets.strip)
|
234
|
+
end
|
235
|
+
|
236
|
+
# test reading CSV with headers
|
237
|
+
FasterCSV.open( @path, "r", :headers => true,
|
238
|
+
:col_sep => "|",
|
239
|
+
:converters => :all ) do |csv|
|
240
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
241
|
+
end
|
242
|
+
end
|
175
243
|
|
176
244
|
def test_append # aliased add_row() and puts()
|
177
245
|
File.unlink(@path)
|
data/test/tc_row.rb
CHANGED
@@ -285,4 +285,21 @@ class TestFasterCSVRow < Test::Unit::TestCase
|
|
285
285
|
|
286
286
|
assert_equal([@row.headers.size, @row.fields.size].max, @row.size)
|
287
287
|
end
|
288
|
+
|
289
|
+
def test_inspect_shows_header_field_pairs
|
290
|
+
str = @row.inspect
|
291
|
+
@row.each do |header, field|
|
292
|
+
assert( str.include?("#{header.inspect}:#{field.inspect}"),
|
293
|
+
"Header field pair not found." )
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_inspect_shows_symbol_headers_as_bare_attributes
|
298
|
+
str = FasterCSV::Row.new( @row.headers.map { |h| h.to_sym },
|
299
|
+
@row.fields ).inspect
|
300
|
+
@row.each do |header, field|
|
301
|
+
assert( str.include?("#{header}:#{field.inspect}"),
|
302
|
+
"Header field pair not found." )
|
303
|
+
end
|
304
|
+
end
|
288
305
|
end
|
data/test/tc_speed.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# Copyright 2005 Gray Productions. All rights reserved.
|
7
7
|
|
8
8
|
require "test/unit"
|
9
|
+
require "timeout"
|
9
10
|
|
10
11
|
require "faster_csv"
|
11
12
|
require "csv"
|
12
13
|
|
13
14
|
class TestFasterCSVSpeed < Test::Unit::TestCase
|
14
|
-
PATH
|
15
|
+
PATH = File.join(File.dirname(__FILE__), "test_data.csv")
|
16
|
+
BIG_DATA = "123456789\n" * 1024
|
15
17
|
|
16
18
|
def test_that_we_are_doing_the_same_work
|
17
19
|
FasterCSV.open(PATH) do |csv|
|
@@ -36,4 +38,28 @@ class TestFasterCSVSpeed < Test::Unit::TestCase
|
|
36
38
|
|
37
39
|
assert(faster_csv_time < csv_time / 3)
|
38
40
|
end
|
41
|
+
|
42
|
+
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
|
43
|
+
assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
|
47
|
+
assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_field_size_limit_controls_lookahead
|
51
|
+
assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
|
52
|
+
:field_size_limit => 2048 )
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def assert_parse_errors_out(*args)
|
58
|
+
assert_raise(FasterCSV::MalformedCSVError) do
|
59
|
+
Timeout.timeout(0.2) do
|
60
|
+
FasterCSV.parse(*args)
|
61
|
+
fail("Parse didn't error out")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
39
65
|
end
|
data/test/tc_table.rb
CHANGED
@@ -388,4 +388,13 @@ class TestFasterCSVTable < Test::Unit::TestCase
|
|
388
388
|
|
389
389
|
assert_equal(@rows.size, @table.size)
|
390
390
|
end
|
391
|
+
|
392
|
+
def test_inspect_shows_current_mode
|
393
|
+
str = @table.inspect
|
394
|
+
assert(str.include?("mode:#{@table.mode}"), "Mode not shown.")
|
395
|
+
|
396
|
+
@table.by_col!
|
397
|
+
str = @table.inspect
|
398
|
+
assert(str.include?("mode:#{@table.mode}"), "Mode not shown.")
|
399
|
+
end
|
391
400
|
end
|
data/test/ts_all.rb
CHANGED
metadata
CHANGED
@@ -1,39 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
4
2
|
name: fastercsv
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-12-02 00:00:00 -06:00
|
8
|
-
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: james@grayproductions.net
|
12
|
-
homepage: http://fastercsv.rubyforge.org
|
13
|
-
rubyforge_project: fastercsv
|
14
|
-
description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 1.4.0
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- James Edward Gray II
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-09-10 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
|
17
|
+
email: james@grayproductions.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- AUTHORS
|
24
|
+
- COPYING
|
25
|
+
- README
|
26
|
+
- INSTALL
|
27
|
+
- TODO
|
28
|
+
- CHANGELOG
|
29
|
+
- LICENSE
|
31
30
|
files:
|
32
31
|
- lib/faster_csv.rb
|
33
32
|
- lib/fastercsv.rb
|
34
33
|
- test/tc_csv_parsing.rb
|
35
34
|
- test/tc_csv_writing.rb
|
36
35
|
- test/tc_data_converters.rb
|
36
|
+
- test/tc_encodings.rb
|
37
37
|
- test/tc_features.rb
|
38
38
|
- test/tc_headers.rb
|
39
39
|
- test/tc_interface.rb
|
@@ -59,26 +59,34 @@ files:
|
|
59
59
|
- TODO
|
60
60
|
- CHANGELOG
|
61
61
|
- LICENSE
|
62
|
-
|
63
|
-
|
62
|
+
has_rdoc: true
|
63
|
+
homepage: http://fastercsv.rubyforge.org
|
64
|
+
post_install_message:
|
64
65
|
rdoc_options:
|
65
66
|
- --title
|
66
67
|
- FasterCSV Documentation
|
67
68
|
- --main
|
68
69
|
- README
|
69
|
-
|
70
|
-
-
|
71
|
-
|
72
|
-
|
73
|
-
-
|
74
|
-
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
version:
|
81
84
|
requirements: []
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
rubyforge_project: fastercsv
|
87
|
+
rubygems_version: 1.2.0
|
88
|
+
signing_key:
|
89
|
+
specification_version: 2
|
90
|
+
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
91
|
+
test_files:
|
92
|
+
- test/ts_all.rb
|