fastercsv 1.2.3 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +16 -0
- data/lib/faster_csv.rb +142 -33
- data/test/tc_csv_parsing.rb +1 -1
- data/test/tc_encodings.rb +23 -0
- data/test/tc_features.rb +29 -0
- data/test/tc_headers.rb +15 -0
- data/test/tc_interface.rb +69 -1
- data/test/tc_row.rb +17 -0
- data/test/tc_speed.rb +27 -1
- data/test/tc_table.rb +9 -0
- data/test/ts_all.rb +1 -0
- metadata +48 -40
data/CHANGELOG
CHANGED
@@ -2,6 +2,22 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 1.4.0
|
6
|
+
|
7
|
+
* Added encoding support patch from Michael Reinsch.
|
8
|
+
* Improved inspect() messages for better IRb support.
|
9
|
+
* Fixed header writing bug reported by Dov Murik.
|
10
|
+
* Use custom separators in parsing header Strings as suggested by Shmulik Regev.
|
11
|
+
* Added a <tt>:write_headers</tt> option for outputting headers.
|
12
|
+
* Handle open() calls in binary mode whenever we can to workaround a Windows
|
13
|
+
issue where line-ending translation can cause an off-by-one error in seeking
|
14
|
+
back to a non-zero starting position after auto-discovery for
|
15
|
+
<tt>:row_sep</tt> as suggested by Robert Battle.
|
16
|
+
* Improved the parser to fail faster when fed some forms of invalid CSV that can
|
17
|
+
be detected without reading ahead.
|
18
|
+
* Added a <tt>:field_size_limit</tt> option to control FasterCSV's lookahead and
|
19
|
+
prevent the parser from biting off more data than it can chew.
|
20
|
+
|
5
21
|
== 1.2.3
|
6
22
|
|
7
23
|
* Default to the system line ending when passed a GzipWriter object to wrap.
|
data/lib/faster_csv.rb
CHANGED
@@ -75,7 +75,7 @@ require "stringio"
|
|
75
75
|
#
|
76
76
|
class FasterCSV
|
77
77
|
# The version of the installed library.
|
78
|
-
VERSION = "1.
|
78
|
+
VERSION = "1.4.0".freeze
|
79
79
|
|
80
80
|
#
|
81
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
@@ -363,6 +363,16 @@ class FasterCSV
|
|
363
363
|
fields.to_csv(options)
|
364
364
|
end
|
365
365
|
alias_method :to_s, :to_csv
|
366
|
+
|
367
|
+
# A summary of fields, by header.
|
368
|
+
def inspect
|
369
|
+
str = "#<#{self.class}"
|
370
|
+
each do |header, field|
|
371
|
+
str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
|
372
|
+
field.inspect
|
373
|
+
end
|
374
|
+
str << ">"
|
375
|
+
end
|
366
376
|
end
|
367
377
|
|
368
378
|
#
|
@@ -695,6 +705,10 @@ class FasterCSV
|
|
695
705
|
end.join
|
696
706
|
end
|
697
707
|
alias_method :to_s, :to_csv
|
708
|
+
|
709
|
+
def inspect
|
710
|
+
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
711
|
+
end
|
698
712
|
end
|
699
713
|
|
700
714
|
# The error thrown when the parser encounters illegal CSV formatting.
|
@@ -999,7 +1013,7 @@ class FasterCSV
|
|
999
1013
|
# The +options+ parameter can be anything FasterCSV::new() understands.
|
1000
1014
|
#
|
1001
1015
|
def self.foreach(path, options = Hash.new, &block)
|
1002
|
-
open(path, options) do |csv|
|
1016
|
+
open(path, "rb", options) do |csv|
|
1003
1017
|
csv.each(&block)
|
1004
1018
|
end
|
1005
1019
|
end
|
@@ -1120,8 +1134,8 @@ class FasterCSV
|
|
1120
1134
|
|
1121
1135
|
#
|
1122
1136
|
# :call-seq:
|
1123
|
-
# open( filename, mode="
|
1124
|
-
# open( filename, mode="
|
1137
|
+
# open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
|
1138
|
+
# open( filename, mode="rb", options = Hash.new )
|
1125
1139
|
#
|
1126
1140
|
# This method opens an IO object, and wraps that with FasterCSV. This is
|
1127
1141
|
# intended as the primary interface for writing a CSV file.
|
@@ -1166,6 +1180,8 @@ class FasterCSV
|
|
1166
1180
|
def self.open(*args)
|
1167
1181
|
# find the +options+ Hash
|
1168
1182
|
options = if args.last.is_a? Hash then args.pop else Hash.new end
|
1183
|
+
# default to a binary open mode
|
1184
|
+
args << "rb" if args.size == 1
|
1169
1185
|
# wrap a File opened with the remaining +args+
|
1170
1186
|
csv = new(File.open(*args), options)
|
1171
1187
|
|
@@ -1222,7 +1238,7 @@ class FasterCSV
|
|
1222
1238
|
# file and any +options+ FasterCSV::new() understands.
|
1223
1239
|
#
|
1224
1240
|
def self.read(path, options = Hash.new)
|
1225
|
-
open(path, options) { |csv| csv.read }
|
1241
|
+
open(path, "rb", options) { |csv| csv.read }
|
1226
1242
|
end
|
1227
1243
|
|
1228
1244
|
# Alias for FasterCSV::read().
|
@@ -1279,7 +1295,14 @@ class FasterCSV
|
|
1279
1295
|
# <tt>$INPUT_RECORD_SEPARATOR</tt>
|
1280
1296
|
# (<tt>$/</tt>) is used. Obviously,
|
1281
1297
|
# discovery takes a little time. Set
|
1282
|
-
# manually if speed is important.
|
1298
|
+
# manually if speed is important. Also
|
1299
|
+
# note that IO objects should be opened
|
1300
|
+
# in binary mode on Windows if this
|
1301
|
+
# feature will be used as the
|
1302
|
+
# line-ending translation can cause
|
1303
|
+
# problems with resetting the document
|
1304
|
+
# position to where it was before the
|
1305
|
+
# read ahead.
|
1283
1306
|
# <b><tt>:quote_char</tt></b>:: The character used to quote fields.
|
1284
1307
|
# This has to be a single character
|
1285
1308
|
# String. This is useful for
|
@@ -1289,6 +1312,27 @@ class FasterCSV
|
|
1289
1312
|
# FasterCSV will always consider a
|
1290
1313
|
# double sequence this character to be
|
1291
1314
|
# an escaped quote.
|
1315
|
+
# <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
|
1316
|
+
# file. Defaults to your <tt>$KDOCE</tt>
|
1317
|
+
# setting. Valid values: <tt>`n’</tt> or
|
1318
|
+
# <tt>`N’</tt> for none, <tt>`e’</tt> or
|
1319
|
+
# <tt>`E’</tt> for EUC, <tt>`s’</tt> or
|
1320
|
+
# <tt>`S’</tt> for SJIS, and
|
1321
|
+
# <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
|
1322
|
+
# (see Regexp.new()).
|
1323
|
+
# <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
|
1324
|
+
# read ahead looking for the closing
|
1325
|
+
# quote for a field. (In truth, it
|
1326
|
+
# reads to the first line ending beyond
|
1327
|
+
# this size.) If a quote cannot be
|
1328
|
+
# found within the limit FasterCSV will
|
1329
|
+
# raise a MalformedCSVError, assuming
|
1330
|
+
# the data is faulty. You can use this
|
1331
|
+
# limit to prevent what are effectively
|
1332
|
+
# DoS attacks on the parser. However,
|
1333
|
+
# this limit can cause a legitimate
|
1334
|
+
# parse to fail and thus is set to
|
1335
|
+
# +nil+, or off, by default.
|
1292
1336
|
# <b><tt>:converters</tt></b>:: An Array of names from the Converters
|
1293
1337
|
# Hash and/or lambdas that handle custom
|
1294
1338
|
# conversion. A single converter
|
@@ -1309,8 +1353,11 @@ class FasterCSV
|
|
1309
1353
|
# contents will be used as the headers.
|
1310
1354
|
# If set to a String, the String is run
|
1311
1355
|
# through a call of
|
1312
|
-
# FasterCSV::parse_line()
|
1313
|
-
#
|
1356
|
+
# FasterCSV::parse_line() with the same
|
1357
|
+
# <tt>:col_sep</tt>, <tt>:row_sep</tt>,
|
1358
|
+
# and <tt>:quote_char</tt> as this
|
1359
|
+
# instance to produce an Array of
|
1360
|
+
# headers. This setting causes
|
1314
1361
|
# FasterCSV.shift() to return rows as
|
1315
1362
|
# FasterCSV::Row objects instead of
|
1316
1363
|
# Arrays and FasterCSV.read() to return
|
@@ -1322,6 +1369,9 @@ class FasterCSV
|
|
1322
1369
|
# object with identical headers and
|
1323
1370
|
# fields (save that the fields do not go
|
1324
1371
|
# through the converters).
|
1372
|
+
# <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
|
1373
|
+
# set, a header row will be added to the
|
1374
|
+
# output.
|
1325
1375
|
# <b><tt>:header_converters</tt></b>:: Identical in functionality to
|
1326
1376
|
# <tt>:converters</tt> save that the
|
1327
1377
|
# conversions are only made to header
|
@@ -1390,12 +1440,18 @@ class FasterCSV
|
|
1390
1440
|
# The data source must be open for writing.
|
1391
1441
|
#
|
1392
1442
|
def <<(row)
|
1443
|
+
# make sure headers have been assigned
|
1444
|
+
if header_row? and [Array, String].include? @use_headers.class
|
1445
|
+
parse_headers # won't read data for Array or String
|
1446
|
+
self << @headers if @write_headers
|
1447
|
+
end
|
1448
|
+
|
1393
1449
|
# Handle FasterCSV::Row objects and Hashes
|
1394
1450
|
row = case row
|
1395
|
-
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1451
|
+
when self.class::Row then row.fields
|
1452
|
+
when Hash then @headers.map { |header| row[header] }
|
1453
|
+
else row
|
1454
|
+
end
|
1399
1455
|
|
1400
1456
|
@headers = row if header_row?
|
1401
1457
|
@lineno += 1
|
@@ -1513,7 +1569,7 @@ class FasterCSV
|
|
1513
1569
|
# add another read to the line
|
1514
1570
|
line += @io.gets(@row_sep) rescue return nil
|
1515
1571
|
# copy the line so we can chop it up in parsing
|
1516
|
-
parse =
|
1572
|
+
parse = line.dup
|
1517
1573
|
parse.sub!(@parsers[:line_end], "")
|
1518
1574
|
|
1519
1575
|
#
|
@@ -1590,6 +1646,10 @@ class FasterCSV
|
|
1590
1646
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
1591
1647
|
if @io.eof?
|
1592
1648
|
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1649
|
+
elsif parse =~ @parsers[:bad_field]
|
1650
|
+
raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
|
1651
|
+
elsif @field_size_limit and parse.length >= @field_size_limit
|
1652
|
+
raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
|
1593
1653
|
end
|
1594
1654
|
# otherwise, we need to loop and pull some more data to complete the row
|
1595
1655
|
end
|
@@ -1597,6 +1657,32 @@ class FasterCSV
|
|
1597
1657
|
alias_method :gets, :shift
|
1598
1658
|
alias_method :readline, :shift
|
1599
1659
|
|
1660
|
+
# Returns a simplified description of the key FasterCSV attributes.
|
1661
|
+
def inspect
|
1662
|
+
str = "<##{self.class} io_type:"
|
1663
|
+
# show type of wrapped IO
|
1664
|
+
if @io == $stdout then str << "$stdout"
|
1665
|
+
elsif @io == $stdin then str << "$stdin"
|
1666
|
+
elsif @io == $stderr then str << "$stderr"
|
1667
|
+
else str << @io.class.to_s
|
1668
|
+
end
|
1669
|
+
# show IO.path(), if available
|
1670
|
+
if @io.respond_to?(:path) and (p = @io.path)
|
1671
|
+
str << " io_path:#{p.inspect}"
|
1672
|
+
end
|
1673
|
+
# show other attributes
|
1674
|
+
%w[ lineno col_sep row_sep
|
1675
|
+
quote_char skip_blanks encoding ].each do |attr_name|
|
1676
|
+
if a = instance_variable_get("@#{attr_name}")
|
1677
|
+
str << " #{attr_name}:#{a.inspect}"
|
1678
|
+
end
|
1679
|
+
end
|
1680
|
+
if @use_headers
|
1681
|
+
str << " headers:#{(@headers || true).inspect}"
|
1682
|
+
end
|
1683
|
+
str << ">"
|
1684
|
+
end
|
1685
|
+
|
1600
1686
|
private
|
1601
1687
|
|
1602
1688
|
#
|
@@ -1690,27 +1776,42 @@ class FasterCSV
|
|
1690
1776
|
# Pre-compiles parsers and stores them by name for access during reads.
|
1691
1777
|
def init_parsers(options)
|
1692
1778
|
# store the parser behaviors
|
1693
|
-
@skip_blanks
|
1694
|
-
|
1779
|
+
@skip_blanks = options.delete(:skip_blanks)
|
1780
|
+
@encoding = options.delete(:encoding) # nil will use $KCODE
|
1781
|
+
@field_size_limit = options.delete(:field_size_limit)
|
1782
|
+
|
1695
1783
|
# prebuild Regexps for faster parsing
|
1696
1784
|
esc_col_sep = Regexp.escape(@col_sep)
|
1697
1785
|
esc_row_sep = Regexp.escape(@row_sep)
|
1698
1786
|
esc_quote = Regexp.escape(@quote_char)
|
1699
1787
|
@parsers = {
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
|
1704
|
-
|
1705
|
-
|
1706
|
-
|
1707
|
-
|
1708
|
-
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1712
|
-
|
1713
|
-
|
1788
|
+
# for empty leading fields
|
1789
|
+
:leading_fields => Regexp.new("\\A(?:#{esc_col_sep})+", nil, @encoding),
|
1790
|
+
# The Primary Parser
|
1791
|
+
:csv_row => Regexp.new(<<-END_PARSER, Regexp::EXTENDED, @encoding),
|
1792
|
+
\\G(?:\\A|#{esc_col_sep}) # anchor the match
|
1793
|
+
(?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
|
1794
|
+
(?> #{esc_quote*2}
|
1795
|
+
[^#{esc_quote}]* )* )#{esc_quote}
|
1796
|
+
| # ... or ...
|
1797
|
+
([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
|
1798
|
+
)
|
1799
|
+
(?=#{esc_col_sep}|\\z) # ensure we are at field's end
|
1800
|
+
END_PARSER
|
1801
|
+
# a test for unescaped quotes
|
1802
|
+
:bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
|
1803
|
+
\\A#{esc_col_sep}? # starts with an optional comma
|
1804
|
+
(?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
|
1805
|
+
(?> #{esc_quote*2}
|
1806
|
+
[^#{esc_quote}]* )*
|
1807
|
+
#{esc_quote}[^#{esc_quote}]
|
1808
|
+
| # ... or ...
|
1809
|
+
[^#{esc_quote}#{esc_col_sep}]+
|
1810
|
+
#{esc_quote} # unescaped quote
|
1811
|
+
)
|
1812
|
+
END_BAD
|
1813
|
+
# safer than chomp!()
|
1814
|
+
:line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
|
1714
1815
|
}
|
1715
1816
|
end
|
1716
1817
|
|
@@ -1757,6 +1858,7 @@ class FasterCSV
|
|
1757
1858
|
def init_headers(options)
|
1758
1859
|
@use_headers = options.delete(:headers)
|
1759
1860
|
@return_headers = options.delete(:return_headers)
|
1861
|
+
@write_headers = options.delete(:write_headers)
|
1760
1862
|
|
1761
1863
|
# headers must be delayed until shift(), in case they need a row of content
|
1762
1864
|
@headers = nil
|
@@ -1827,10 +1929,17 @@ class FasterCSV
|
|
1827
1929
|
def parse_headers(row = nil)
|
1828
1930
|
if @headers.nil? # header row
|
1829
1931
|
@headers = case @use_headers # save headers
|
1830
|
-
|
1831
|
-
|
1832
|
-
|
1833
|
-
|
1932
|
+
# Array of headers
|
1933
|
+
when Array then @use_headers
|
1934
|
+
# CSV header String
|
1935
|
+
when String
|
1936
|
+
self.class.parse_line( @use_headers,
|
1937
|
+
:col_sep => @col_sep,
|
1938
|
+
:row_sep => @row_sep,
|
1939
|
+
:quote_char => @quote_char )
|
1940
|
+
# first row is headers
|
1941
|
+
else row
|
1942
|
+
end
|
1834
1943
|
|
1835
1944
|
# prepare converted and unconverted copies
|
1836
1945
|
row = @headers if row.nil?
|
data/test/tc_csv_parsing.rb
CHANGED
@@ -158,7 +158,7 @@ class TestCSVParsing < Test::Unit::TestCase
|
|
158
158
|
assert_send([csv.lineno, :<, 4])
|
159
159
|
end
|
160
160
|
rescue FasterCSV::MalformedCSVError
|
161
|
-
assert_equal("
|
161
|
+
assert_equal("Illegal quoting on line 4.", $!.message)
|
162
162
|
end
|
163
163
|
end
|
164
164
|
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# tc_encodings.rb
|
4
|
+
#
|
5
|
+
# Created by Michael Reinsch.
|
6
|
+
# Copyright (c) 2008 Ubiquitous Business Technology, Inc.
|
7
|
+
|
8
|
+
require "test/unit"
|
9
|
+
|
10
|
+
require "faster_csv"
|
11
|
+
|
12
|
+
class TestEncodings < Test::Unit::TestCase
|
13
|
+
def test_with_shift_jis_encoding
|
14
|
+
$KCODE = 'u' # make sure $KCODE != Shift_JIS
|
15
|
+
# this test data will not work with UTF-8 encoding
|
16
|
+
shift_jis_data = [ "82D082E782AA82C82094E0",
|
17
|
+
"82D082E7826082AA825C",
|
18
|
+
"82D082E7826082AA82C8" ].map { |f| [f].pack("H*") }
|
19
|
+
fields = FCSV.parse_line( shift_jis_data.map { |f| %Q{"#{f}"} }.join(","),
|
20
|
+
:encoding => "s" )
|
21
|
+
assert_equal(shift_jis_data, fields)
|
22
|
+
end
|
23
|
+
end
|
data/test/tc_features.rb
CHANGED
@@ -174,6 +174,35 @@ class TestFasterCSVFeatures < Test::Unit::TestCase
|
|
174
174
|
File.unlink(file)
|
175
175
|
end
|
176
176
|
|
177
|
+
def test_inspect_is_smart_about_io_types
|
178
|
+
str = FasterCSV.new("string,data").inspect
|
179
|
+
assert(str.include?("io_type:StringIO"), "IO type not detected.")
|
180
|
+
|
181
|
+
str = FasterCSV.new($stderr).inspect
|
182
|
+
assert(str.include?("io_type:$stderr"), "IO type not detected.")
|
183
|
+
|
184
|
+
str = FasterCSV.open( File.join( File.dirname(__FILE__),
|
185
|
+
"test_data.csv" ) ) { |csv| csv.inspect }
|
186
|
+
assert(str.include?("io_type:File"), "IO type not detected.")
|
187
|
+
end
|
188
|
+
|
189
|
+
def test_inspect_shows_key_attributes
|
190
|
+
str = @csv.inspect
|
191
|
+
%w[lineno col_sep row_sep quote_char].each do |attr_name|
|
192
|
+
assert_match(/\b#{attr_name}:[^\s>]+/, str)
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_inspect_shows_headers_when_available
|
197
|
+
FasterCSV.open( File.join( File.dirname(__FILE__),
|
198
|
+
"test_data.csv" ),
|
199
|
+
:headers => true ) do |csv|
|
200
|
+
assert(csv.inspect.include?("headers:true"), "Header hint not shown.")
|
201
|
+
csv.shift # load headers
|
202
|
+
assert_match(/headers:\[[^\]]+\]/, csv.inspect)
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
177
206
|
def test_version
|
178
207
|
assert_not_nil(FasterCSV::VERSION)
|
179
208
|
assert_instance_of(String, FasterCSV::VERSION)
|
data/test/tc_headers.rb
CHANGED
@@ -130,6 +130,21 @@ class TestFasterCSVHeaders < Test::Unit::TestCase
|
|
130
130
|
assert(!row.field_row?)
|
131
131
|
end
|
132
132
|
|
133
|
+
def test_csv_header_string_inherits_separators
|
134
|
+
# parse with custom col_sep
|
135
|
+
csv = nil
|
136
|
+
assert_nothing_raised(Exception) do
|
137
|
+
csv = FasterCSV.parse( @data.tr(",", "|"), :col_sep => "|",
|
138
|
+
:headers => "my|new|headers" )
|
139
|
+
end
|
140
|
+
|
141
|
+
# verify headers were recognized
|
142
|
+
row = csv[0]
|
143
|
+
assert_not_nil(row)
|
144
|
+
assert_instance_of(FasterCSV::Row, row)
|
145
|
+
assert_equal([%w{my first}, %w{new second}, %w{headers third}], row.to_a)
|
146
|
+
end
|
147
|
+
|
133
148
|
def test_return_headers
|
134
149
|
# activate headers and request they are returned
|
135
150
|
csv = nil
|
data/test/tc_interface.rb
CHANGED
@@ -161,7 +161,6 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
161
161
|
|
162
162
|
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
163
163
|
FasterCSV.open( @path, "w", :headers => true,
|
164
|
-
:converters => :all,
|
165
164
|
:header_converters => :symbol ) do |csv|
|
166
165
|
csv << lines.first.keys
|
167
166
|
lines.each { |line| csv << line }
|
@@ -172,6 +171,75 @@ class TestFasterCSVInterface < Test::Unit::TestCase
|
|
172
171
|
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
173
172
|
end
|
174
173
|
end
|
174
|
+
|
175
|
+
def test_write_hash_with_headers_array
|
176
|
+
File.unlink(@path)
|
177
|
+
|
178
|
+
lines = [{:a => 1, :b => 2, :c => 3}, {:a => 4, :b => 5, :c => 6}]
|
179
|
+
FasterCSV.open(@path, "w", :headers => [:b, :a, :c]) do |csv|
|
180
|
+
lines.each { |line| csv << line }
|
181
|
+
end
|
182
|
+
|
183
|
+
# test writing fields in the correct order
|
184
|
+
File.open(@path, "r") do |f|
|
185
|
+
assert_equal("2,1,3", f.gets.strip)
|
186
|
+
assert_equal("5,4,6", f.gets.strip)
|
187
|
+
end
|
188
|
+
|
189
|
+
# test reading CSV with headers
|
190
|
+
FasterCSV.open( @path, "r", :headers => [:b, :a, :c],
|
191
|
+
:converters => :all ) do |csv|
|
192
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
def test_write_hash_with_headers_string
|
197
|
+
File.unlink(@path)
|
198
|
+
|
199
|
+
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
|
200
|
+
FasterCSV.open( @path, "w", :headers => "b|a|c",
|
201
|
+
:col_sep => "|" ) do |csv|
|
202
|
+
lines.each { |line| csv << line }
|
203
|
+
end
|
204
|
+
|
205
|
+
# test writing fields in the correct order
|
206
|
+
File.open(@path, "r") do |f|
|
207
|
+
assert_equal("2|1|3", f.gets.strip)
|
208
|
+
assert_equal("5|4|6", f.gets.strip)
|
209
|
+
end
|
210
|
+
|
211
|
+
# test reading CSV with headers
|
212
|
+
FasterCSV.open( @path, "r", :headers => "b|a|c",
|
213
|
+
:col_sep => "|",
|
214
|
+
:converters => :all ) do |csv|
|
215
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
def test_write_headers
|
220
|
+
File.unlink(@path)
|
221
|
+
|
222
|
+
lines = [{"a" => 1, "b" => 2, "c" => 3}, {"a" => 4, "b" => 5, "c" => 6}]
|
223
|
+
FasterCSV.open( @path, "w", :headers => "b|a|c",
|
224
|
+
:write_headers => true,
|
225
|
+
:col_sep => "|" ) do |csv|
|
226
|
+
lines.each { |line| csv << line }
|
227
|
+
end
|
228
|
+
|
229
|
+
# test writing fields in the correct order
|
230
|
+
File.open(@path, "r") do |f|
|
231
|
+
assert_equal("b|a|c", f.gets.strip)
|
232
|
+
assert_equal("2|1|3", f.gets.strip)
|
233
|
+
assert_equal("5|4|6", f.gets.strip)
|
234
|
+
end
|
235
|
+
|
236
|
+
# test reading CSV with headers
|
237
|
+
FasterCSV.open( @path, "r", :headers => true,
|
238
|
+
:col_sep => "|",
|
239
|
+
:converters => :all ) do |csv|
|
240
|
+
csv.each { |line| assert_equal(lines.shift, line.to_hash) }
|
241
|
+
end
|
242
|
+
end
|
175
243
|
|
176
244
|
def test_append # aliased add_row() and puts()
|
177
245
|
File.unlink(@path)
|
data/test/tc_row.rb
CHANGED
@@ -285,4 +285,21 @@ class TestFasterCSVRow < Test::Unit::TestCase
|
|
285
285
|
|
286
286
|
assert_equal([@row.headers.size, @row.fields.size].max, @row.size)
|
287
287
|
end
|
288
|
+
|
289
|
+
def test_inspect_shows_header_field_pairs
|
290
|
+
str = @row.inspect
|
291
|
+
@row.each do |header, field|
|
292
|
+
assert( str.include?("#{header.inspect}:#{field.inspect}"),
|
293
|
+
"Header field pair not found." )
|
294
|
+
end
|
295
|
+
end
|
296
|
+
|
297
|
+
def test_inspect_shows_symbol_headers_as_bare_attributes
|
298
|
+
str = FasterCSV::Row.new( @row.headers.map { |h| h.to_sym },
|
299
|
+
@row.fields ).inspect
|
300
|
+
@row.each do |header, field|
|
301
|
+
assert( str.include?("#{header}:#{field.inspect}"),
|
302
|
+
"Header field pair not found." )
|
303
|
+
end
|
304
|
+
end
|
288
305
|
end
|
data/test/tc_speed.rb
CHANGED
@@ -6,12 +6,14 @@
|
|
6
6
|
# Copyright 2005 Gray Productions. All rights reserved.
|
7
7
|
|
8
8
|
require "test/unit"
|
9
|
+
require "timeout"
|
9
10
|
|
10
11
|
require "faster_csv"
|
11
12
|
require "csv"
|
12
13
|
|
13
14
|
class TestFasterCSVSpeed < Test::Unit::TestCase
|
14
|
-
PATH
|
15
|
+
PATH = File.join(File.dirname(__FILE__), "test_data.csv")
|
16
|
+
BIG_DATA = "123456789\n" * 1024
|
15
17
|
|
16
18
|
def test_that_we_are_doing_the_same_work
|
17
19
|
FasterCSV.open(PATH) do |csv|
|
@@ -36,4 +38,28 @@ class TestFasterCSVSpeed < Test::Unit::TestCase
|
|
36
38
|
|
37
39
|
assert(faster_csv_time < csv_time / 3)
|
38
40
|
end
|
41
|
+
|
42
|
+
def test_the_parse_fails_fast_when_it_can_for_unquoted_fields
|
43
|
+
assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA)
|
44
|
+
end
|
45
|
+
|
46
|
+
def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes
|
47
|
+
assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_field_size_limit_controls_lookahead
|
51
|
+
assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"',
|
52
|
+
:field_size_limit => 2048 )
|
53
|
+
end
|
54
|
+
|
55
|
+
private
|
56
|
+
|
57
|
+
def assert_parse_errors_out(*args)
|
58
|
+
assert_raise(FasterCSV::MalformedCSVError) do
|
59
|
+
Timeout.timeout(0.2) do
|
60
|
+
FasterCSV.parse(*args)
|
61
|
+
fail("Parse didn't error out")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
39
65
|
end
|
data/test/tc_table.rb
CHANGED
@@ -388,4 +388,13 @@ class TestFasterCSVTable < Test::Unit::TestCase
|
|
388
388
|
|
389
389
|
assert_equal(@rows.size, @table.size)
|
390
390
|
end
|
391
|
+
|
392
|
+
def test_inspect_shows_current_mode
|
393
|
+
str = @table.inspect
|
394
|
+
assert(str.include?("mode:#{@table.mode}"), "Mode not shown.")
|
395
|
+
|
396
|
+
@table.by_col!
|
397
|
+
str = @table.inspect
|
398
|
+
assert(str.include?("mode:#{@table.mode}"), "Mode not shown.")
|
399
|
+
end
|
391
400
|
end
|
data/test/ts_all.rb
CHANGED
metadata
CHANGED
@@ -1,39 +1,39 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.4
|
3
|
-
specification_version: 1
|
4
2
|
name: fastercsv
|
5
3
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.
|
7
|
-
date: 2007-12-02 00:00:00 -06:00
|
8
|
-
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
9
|
-
require_paths:
|
10
|
-
- lib
|
11
|
-
email: james@grayproductions.net
|
12
|
-
homepage: http://fastercsv.rubyforge.org
|
13
|
-
rubyforge_project: fastercsv
|
14
|
-
description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
|
15
|
-
autorequire:
|
16
|
-
default_executable:
|
17
|
-
bindir: bin
|
18
|
-
has_rdoc: true
|
19
|
-
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
4
|
+
version: 1.4.0
|
25
5
|
platform: ruby
|
26
|
-
signing_key:
|
27
|
-
cert_chain:
|
28
|
-
post_install_message:
|
29
6
|
authors:
|
30
7
|
- James Edward Gray II
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-09-10 00:00:00 -05:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: FasterCSV is intended as a complete replacement to the CSV standard library. It is significantly faster and smaller while still being pure Ruby code. It also strives for a better interface.
|
17
|
+
email: james@grayproductions.net
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- AUTHORS
|
24
|
+
- COPYING
|
25
|
+
- README
|
26
|
+
- INSTALL
|
27
|
+
- TODO
|
28
|
+
- CHANGELOG
|
29
|
+
- LICENSE
|
31
30
|
files:
|
32
31
|
- lib/faster_csv.rb
|
33
32
|
- lib/fastercsv.rb
|
34
33
|
- test/tc_csv_parsing.rb
|
35
34
|
- test/tc_csv_writing.rb
|
36
35
|
- test/tc_data_converters.rb
|
36
|
+
- test/tc_encodings.rb
|
37
37
|
- test/tc_features.rb
|
38
38
|
- test/tc_headers.rb
|
39
39
|
- test/tc_interface.rb
|
@@ -59,26 +59,34 @@ files:
|
|
59
59
|
- TODO
|
60
60
|
- CHANGELOG
|
61
61
|
- LICENSE
|
62
|
-
|
63
|
-
|
62
|
+
has_rdoc: true
|
63
|
+
homepage: http://fastercsv.rubyforge.org
|
64
|
+
post_install_message:
|
64
65
|
rdoc_options:
|
65
66
|
- --title
|
66
67
|
- FasterCSV Documentation
|
67
68
|
- --main
|
68
69
|
- README
|
69
|
-
|
70
|
-
-
|
71
|
-
|
72
|
-
|
73
|
-
-
|
74
|
-
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
70
|
+
require_paths:
|
71
|
+
- lib
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
|
+
requirements:
|
74
|
+
- - ">="
|
75
|
+
- !ruby/object:Gem::Version
|
76
|
+
version: "0"
|
77
|
+
version:
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: "0"
|
83
|
+
version:
|
81
84
|
requirements: []
|
82
85
|
|
83
|
-
|
84
|
-
|
86
|
+
rubyforge_project: fastercsv
|
87
|
+
rubygems_version: 1.2.0
|
88
|
+
signing_key:
|
89
|
+
specification_version: 2
|
90
|
+
summary: FasterCSV is CSV, but faster, smaller, and cleaner.
|
91
|
+
test_files:
|
92
|
+
- test/ts_all.rb
|