fastercsv 0.1.9 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +19 -0
- data/INSTALL +12 -0
- data/Rakefile +6 -3
- data/TODO +1 -0
- data/examples/shortcut_interface.rb +32 -0
- data/lib/faster_csv.rb +359 -34
- data/lib/fastercsv.rb +10 -0
- data/test/tc_csv_parsing.rb +43 -0
- data/test/tc_data_converters.rb +95 -4
- data/test/tc_features.rb +26 -0
- data/test/tc_headers.rb +87 -0
- data/test/tc_interface.rb +82 -0
- data/test/tc_serialization.rb +154 -0
- data/test/test_data.csv +0 -15161
- data/test/ts_all.rb +1 -0
- metadata +5 -2
data/CHANGELOG
CHANGED
@@ -2,6 +2,25 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 0.2.0
|
6
|
+
|
7
|
+
* Added VERSION constant.
|
8
|
+
* Significantly improved test speed.
|
9
|
+
* Worked around Date::parse bug so tests will pass on Windows.
|
10
|
+
* Documented test procedure.
|
11
|
+
* Made FasterCSV#lineno CSV aware.
|
12
|
+
* Added line numbers to MalformedCSVError messages.
|
13
|
+
* <tt>:headers</tt> can now be set to an Array of headers to use.
|
14
|
+
* <tt>:headers</tt> can now be set to an external CSV String of headers to use.
|
15
|
+
* Added an <tt>:unconverted_fields</tt> options, so those can be returned
|
16
|
+
when needed.
|
17
|
+
* Provided support for the serialization of custom Ruby objects using CSV.
|
18
|
+
* Added CSV drop-in interface.
|
19
|
+
* Added header information to FieldInfo Struct for conversions by header.
|
20
|
+
* Added an alias to support <tt>require "fastercsv"</tt>.
|
21
|
+
* Added FCSV alias for FasterCSV.
|
22
|
+
* Added FasterCSV::instance and FasterCSV()/FCSV() shortcuts for easy output.
|
23
|
+
|
5
24
|
== 0.1.9
|
6
25
|
|
7
26
|
* Fixing the require "English" bug.
|
data/INSTALL
CHANGED
@@ -21,3 +21,15 @@ Download the latest version of FasterCSV from the
|
|
21
21
|
the root project directory and enter:
|
22
22
|
|
23
23
|
$ sudo ruby setup.rb
|
24
|
+
|
25
|
+
== Running the Tests
|
26
|
+
|
27
|
+
If you would like to run FasterCSV's test suite on your system before installing
|
28
|
+
and you have Rake installed, just issue the following command from the root of
|
29
|
+
the project directory:
|
30
|
+
|
31
|
+
$ rake
|
32
|
+
|
33
|
+
If you do not have rake, use the following command instead:
|
34
|
+
|
35
|
+
$ ruby -I lib:test test/ts_all.rb
|
data/Rakefile
CHANGED
@@ -37,14 +37,17 @@ end
|
|
37
37
|
|
38
38
|
desc "Time FasterCSV and CSV"
|
39
39
|
task :benchmark do
|
40
|
+
TESTS = 6
|
40
41
|
path = "test/test_data.csv"
|
41
|
-
sh %Q{time ruby -r csv -e
|
42
|
-
|
42
|
+
sh %Q{time ruby -r csv -e } +
|
43
|
+
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
44
|
+
sh %Q{time ruby -r lib/faster_csv -e } +
|
45
|
+
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
43
46
|
end
|
44
47
|
|
45
48
|
spec = Gem::Specification.new do |spec|
|
46
49
|
spec.name = "fastercsv"
|
47
|
-
spec.version = "0.
|
50
|
+
spec.version = "0.2.0"
|
48
51
|
spec.platform = Gem::Platform::RUBY
|
49
52
|
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
50
53
|
|
data/TODO
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# shortcut_interface.rb
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2006-04-01.
|
6
|
+
# Copyright 2006 Gray Productions. All rights reserved.
|
7
|
+
#
|
8
|
+
# Feature implementation and example code by Ara.T.Howard.
|
9
|
+
|
10
|
+
require "faster_csv"
|
11
|
+
|
12
|
+
#
|
13
|
+
# So now it's this easy to write to STDOUT.
|
14
|
+
#
|
15
|
+
FCSV { |f| f << %w( a b c) << %w( d e f ) }
|
16
|
+
|
17
|
+
#
|
18
|
+
# Writing to a String.
|
19
|
+
#
|
20
|
+
FCSV(csv = '') do |f|
|
21
|
+
f << %w( q r s )
|
22
|
+
f << %w( x y z )
|
23
|
+
end
|
24
|
+
puts csv
|
25
|
+
|
26
|
+
#
|
27
|
+
# Writing to STDERR.
|
28
|
+
#
|
29
|
+
FCSV(STDERR) do |f|
|
30
|
+
f << %w( 0 1 2 )
|
31
|
+
f << %w( A B C )
|
32
|
+
end
|
data/lib/faster_csv.rb
CHANGED
@@ -67,7 +67,16 @@ require "stringio"
|
|
67
67
|
# csv_string = ["CSV", "data"].to_csv # to CSV
|
68
68
|
# csv_array = "CSV,String".parse_csv # from CSV
|
69
69
|
#
|
70
|
+
# == Shortcut Interface
|
71
|
+
#
|
72
|
+
# FCSV { |csv_out| csv_out << %w{my data here} } # to STDOUT
|
73
|
+
# FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
|
74
|
+
# FCSV(STDERR) { |csv_err| csv_err << %w{my data here} } # to STDERR
|
75
|
+
#
|
70
76
|
class FasterCSV
|
77
|
+
# The version of the installed library.
|
78
|
+
VERSION = "0.2.0".freeze
|
79
|
+
|
71
80
|
#
|
72
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
73
82
|
# fields and allows duplicates just as an Array would, but also allows you to
|
@@ -330,8 +339,9 @@ class FasterCSV
|
|
330
339
|
#
|
331
340
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
332
341
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
342
|
+
# <b><tt>header</tt></b>:: The header for the column, when available.
|
333
343
|
#
|
334
|
-
FieldInfo = Struct.new(:index, :line)
|
344
|
+
FieldInfo = Struct.new(:index, :line, :header)
|
335
345
|
|
336
346
|
#
|
337
347
|
# This Hash holds the built-in converters of FasterCSV that can be accessed by
|
@@ -390,16 +400,159 @@ class FasterCSV
|
|
390
400
|
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
391
401
|
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
392
402
|
# <b><tt>:converters</tt></b>:: +nil+
|
403
|
+
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
393
404
|
# <b><tt>:headers</tt></b>:: +false+
|
394
405
|
# <b><tt>:return_headers</tt></b>:: +false+
|
395
406
|
# <b><tt>:header_converters</tt></b>:: +nil+
|
396
407
|
#
|
397
|
-
DEFAULT_OPTIONS = { :col_sep
|
398
|
-
:row_sep
|
399
|
-
:converters
|
400
|
-
:
|
401
|
-
:
|
402
|
-
:
|
408
|
+
DEFAULT_OPTIONS = { :col_sep => ",",
|
409
|
+
:row_sep => :auto,
|
410
|
+
:converters => nil,
|
411
|
+
:unconverted_fields => nil,
|
412
|
+
:headers => false,
|
413
|
+
:return_headers => false,
|
414
|
+
:header_converters => nil }.freeze
|
415
|
+
|
416
|
+
#
|
417
|
+
# This method will build a drop-in replacement for many of the standard CSV
|
418
|
+
# methods. It allows you to write code like:
|
419
|
+
#
|
420
|
+
# begin
|
421
|
+
# require "faster_csv"
|
422
|
+
# FasterCSV.build_csv_interface
|
423
|
+
# rescue LoadError
|
424
|
+
# require "csv"
|
425
|
+
# end
|
426
|
+
# # ... use CSV here ...
|
427
|
+
#
|
428
|
+
# This is not a complete interface with completely identical behavior.
|
429
|
+
# However, it is intended to be close enough that you won't notice the
|
430
|
+
# difference in most cases. CSV methods supported are:
|
431
|
+
#
|
432
|
+
# * foreach()
|
433
|
+
# * generate_line()
|
434
|
+
# * open()
|
435
|
+
# * parse()
|
436
|
+
# * parse_line()
|
437
|
+
# * readlines()
|
438
|
+
#
|
439
|
+
# Be warned that this interface is slower than vanilla FasterCSV due to the
|
440
|
+
# extra layer of method calls. Depending on usage, this can slow it down to
|
441
|
+
# near CSV speeds.
|
442
|
+
#
|
443
|
+
def self.build_csv_interface
|
444
|
+
Object.const_set(:CSV, Class.new).class_eval do
|
445
|
+
def self.foreach( path, rs = :auto, &block ) # :nodoc:
|
446
|
+
FasterCSV.foreach(path, :row_sep => rs, &block)
|
447
|
+
end
|
448
|
+
|
449
|
+
def self.generate_line( row, fs = ",", rs = "" ) # :nodoc:
|
450
|
+
FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
|
451
|
+
end
|
452
|
+
|
453
|
+
def self.open( path, mode, fs = ",", rs = :auto, &block ) # :nodoc:
|
454
|
+
if block and mode.include? "r"
|
455
|
+
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
|
456
|
+
csv.each(&block)
|
457
|
+
end
|
458
|
+
else
|
459
|
+
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
def self.parse( str_or_readable, fs = ",", rs = :auto, &block ) # :nodoc:
|
464
|
+
FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
|
465
|
+
end
|
466
|
+
|
467
|
+
def self.parse_line( src, fs = ",", rs = :auto ) # :nodoc:
|
468
|
+
FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
|
469
|
+
end
|
470
|
+
|
471
|
+
def self.readlines( path, rs = :auto ) # :nodoc:
|
472
|
+
FasterCSV.readlines(path, :row_sep => rs)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
#
|
478
|
+
# This method allows you to serialize an Array of Ruby objects to a String or
|
479
|
+
# File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
|
480
|
+
# useful for spreadsheet and database interaction.
|
481
|
+
#
|
482
|
+
# Out of the box, this method is intended to work with simple data objects or
|
483
|
+
# Structs. It will serialize a list of instance variables and/or
|
484
|
+
# Struct.members().
|
485
|
+
#
|
486
|
+
# If you need need more complicated serialization, you can control the process
|
487
|
+
# by adding methods to the class to be serialized.
|
488
|
+
#
|
489
|
+
# A class method csv_meta() is responsible for returning the first row of the
|
490
|
+
# document (as an Array). This row is considered to be a Hash of the form
|
491
|
+
# key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
|
492
|
+
# key with a value of the stringified class name and FasterCSV::dump() will
|
493
|
+
# create this, if you do not define this method. This method is only called
|
494
|
+
# on the first object of the Array.
|
495
|
+
#
|
496
|
+
# The next method you can provide is an instance method called csv_headers().
|
497
|
+
# This method is expected to return the second line of the document (again as
|
498
|
+
# an Array), which is to be used to give each column a header. By default,
|
499
|
+
# FasterCSV::load() will set an instance variable if the field header starts
|
500
|
+
# with an @ character or call send() passing the header as the method name and
|
501
|
+
# the field value as an argument. This method is only called on the first
|
502
|
+
# object of the Array.
|
503
|
+
#
|
504
|
+
# Finally, you can provide an instance method called csv_dump(), which will
|
505
|
+
# be passed the headers. This should return an Array of fields that can be
|
506
|
+
# serialized for this object. This method is called once for every object in
|
507
|
+
# the Array.
|
508
|
+
#
|
509
|
+
# The +io+ parameter can be used to serialize to a File, and +options+ can be
|
510
|
+
# anything FasterCSV::new() accepts.
|
511
|
+
#
|
512
|
+
def self.dump( ary_of_objs, io = "", options = Hash.new )
|
513
|
+
obj_template = ary_of_objs.first
|
514
|
+
|
515
|
+
csv = FasterCSV.new(io, options)
|
516
|
+
|
517
|
+
# write meta information
|
518
|
+
begin
|
519
|
+
csv << obj_template.class.csv_meta
|
520
|
+
rescue NoMethodError
|
521
|
+
csv << [:class, obj_template.class]
|
522
|
+
end
|
523
|
+
|
524
|
+
# write headers
|
525
|
+
begin
|
526
|
+
headers = obj_template.csv_headers
|
527
|
+
rescue NoMethodError
|
528
|
+
headers = obj_template.instance_variables.sort
|
529
|
+
if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
|
530
|
+
headers += obj_template.members.map { |mem| "#{mem}=" }.sort
|
531
|
+
end
|
532
|
+
end
|
533
|
+
csv << headers
|
534
|
+
|
535
|
+
# serialize each object
|
536
|
+
ary_of_objs.each do |obj|
|
537
|
+
begin
|
538
|
+
csv << obj.csv_dump(headers)
|
539
|
+
rescue NoMethodError
|
540
|
+
csv << headers.map do |var|
|
541
|
+
if var[0] == ?@
|
542
|
+
obj.instance_variable_get(var)
|
543
|
+
else
|
544
|
+
obj[var[0..-2]]
|
545
|
+
end
|
546
|
+
end
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
if io.is_a? String
|
551
|
+
csv.string
|
552
|
+
else
|
553
|
+
csv.close
|
554
|
+
end
|
555
|
+
end
|
403
556
|
|
404
557
|
#
|
405
558
|
# :call-seq:
|
@@ -508,6 +661,77 @@ class FasterCSV
|
|
508
661
|
(new("", options) << row).string
|
509
662
|
end
|
510
663
|
|
664
|
+
#
|
665
|
+
# This method will return a FasterCSV instance, just like FasterCSV::new(),
|
666
|
+
# but the instance will be cached and returned for all future calls to this
|
667
|
+
# method for the same +data+ object (tested by Object#object_id()) with the
|
668
|
+
# same +options+
|
669
|
+
#
|
670
|
+
# If a block is given, the instance is passed to the block and the return
|
671
|
+
# value becomes the return value of the block.
|
672
|
+
#
|
673
|
+
def self.instance( data = STDOUT, options = Hash.new )
|
674
|
+
# create a _signature_ for this method call, data object and options
|
675
|
+
sig = [data.object_id] +
|
676
|
+
options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
|
677
|
+
|
678
|
+
# fetch or create the instance for this signature
|
679
|
+
@@instances ||= Hash.new
|
680
|
+
instance = (@@instances[sig] ||= new(data, options))
|
681
|
+
|
682
|
+
if block_given?
|
683
|
+
yield instance # run block, if given, returning result
|
684
|
+
else
|
685
|
+
instance # or return the instance
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
#
|
690
|
+
# This method is the reading counterpart to FasterCSV::dump(). See that
|
691
|
+
# method for a detailed description of the process.
|
692
|
+
#
|
693
|
+
# You can customize loading by adding a class method called csv_load() which
|
694
|
+
# will be passed a Hash of meta information, an Array of headers, and an Array
|
695
|
+
# of fields for the object the method is expected to return.
|
696
|
+
#
|
697
|
+
# Remember that all fields will be Strings after this load. If you need
|
698
|
+
# something else, use +options+ to setup converters or provide a custom
|
699
|
+
# csv_load() implementation.
|
700
|
+
#
|
701
|
+
def self.load( io_or_str, options = Hash.new )
|
702
|
+
csv = FasterCSV.new(io_or_str, options)
|
703
|
+
|
704
|
+
# load meta information
|
705
|
+
meta = Hash[*csv.shift]
|
706
|
+
cls = meta["class"].split("::").inject(Object) do |c, const|
|
707
|
+
c.const_get(const)
|
708
|
+
end
|
709
|
+
|
710
|
+
# load headers
|
711
|
+
headers = csv.shift
|
712
|
+
|
713
|
+
# unserialize each object stored in the file
|
714
|
+
results = csv.inject(Array.new) do |all, row|
|
715
|
+
begin
|
716
|
+
obj = cls.csv_load(meta, headers, row)
|
717
|
+
rescue NoMethodError
|
718
|
+
obj = cls.allocate
|
719
|
+
headers.zip(row) do |name, value|
|
720
|
+
if name[0] == ?@
|
721
|
+
obj.instance_variable_set(name, value)
|
722
|
+
else
|
723
|
+
obj.send(name, value)
|
724
|
+
end
|
725
|
+
end
|
726
|
+
end
|
727
|
+
all << obj
|
728
|
+
end
|
729
|
+
|
730
|
+
csv.close unless io_or_str.is_a? String
|
731
|
+
|
732
|
+
results
|
733
|
+
end
|
734
|
+
|
511
735
|
#
|
512
736
|
# :call-seq:
|
513
737
|
# open( filename, mode="r", options = Hash.new ) { |faster_csv| ... }
|
@@ -541,7 +765,6 @@ class FasterCSV
|
|
541
765
|
# * fsync()
|
542
766
|
# * ioctl()
|
543
767
|
# * isatty()
|
544
|
-
# * lineno()
|
545
768
|
# * pid()
|
546
769
|
# * pos()
|
547
770
|
# * reopen()
|
@@ -663,10 +886,24 @@ class FasterCSV
|
|
663
886
|
# Hash and/or lambdas that handle custom
|
664
887
|
# conversion. A single converter
|
665
888
|
# doesn't have to be in an Array.
|
889
|
+
# <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
|
890
|
+
# unconverted_fields() method will be
|
891
|
+
# added to all returned rows (Array or
|
892
|
+
# FasterCSV::Row) that will return the
|
893
|
+
# fields as they were before convertion.
|
894
|
+
# Note that <tt>:headers</tt> supplied
|
895
|
+
# by Array or String were not fields of
|
896
|
+
# the document and thus will have an
|
897
|
+
# empty Array attached.
|
666
898
|
# <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
|
667
899
|
# +true+, the initial row of the CSV
|
668
900
|
# file will be treated as a row of
|
669
|
-
# headers.
|
901
|
+
# headers. If set to an Array, the
|
902
|
+
# contents will be used as the headers.
|
903
|
+
# If set to a String, the String is run
|
904
|
+
# through a call of
|
905
|
+
# FasterCSV::parse_line() to produce an
|
906
|
+
# Array of headers. This setting causes
|
670
907
|
# FasterCSV.shift() to return rows as
|
671
908
|
# FasterCSV::Row objects instead of
|
672
909
|
# Arrays.
|
@@ -701,16 +938,24 @@ class FasterCSV
|
|
701
938
|
unless options.empty?
|
702
939
|
raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
|
703
940
|
end
|
941
|
+
|
942
|
+
# track our own lineno since IO gets confused about line-ends is CSV fields
|
943
|
+
@lineno = 0
|
704
944
|
end
|
705
945
|
|
946
|
+
#
|
947
|
+
# The line number of the last row read from this file. Fields with nested
|
948
|
+
# line-end characters will not affect this count.
|
949
|
+
#
|
950
|
+
attr_reader :lineno
|
951
|
+
|
706
952
|
### IO and StringIO Delegation ###
|
707
953
|
|
708
954
|
extend Forwardable
|
709
955
|
def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
|
710
956
|
:eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
|
711
|
-
:isatty, :
|
712
|
-
:
|
713
|
-
:tty?
|
957
|
+
:isatty, :pid, :pos, :reopen, :rewind, :seek, :stat,
|
958
|
+
:string, :sync, :sync=, :tell, :to_i, :to_io, :tty?
|
714
959
|
|
715
960
|
### End Delegation ###
|
716
961
|
|
@@ -820,6 +1065,21 @@ class FasterCSV
|
|
820
1065
|
# The data source must be open for reading.
|
821
1066
|
#
|
822
1067
|
def shift
|
1068
|
+
#########################################################################
|
1069
|
+
### This method is purposefully kept a bit long as simple conditional ###
|
1070
|
+
### checks are faster than numerous (expensive) method calls. ###
|
1071
|
+
#########################################################################
|
1072
|
+
|
1073
|
+
# handle headers not based on document content
|
1074
|
+
if header_row? and @return_headers and
|
1075
|
+
[Array, String].include? @use_headers.class
|
1076
|
+
if @unconverted_fields
|
1077
|
+
return add_unconverted_fields(parse_headers, Array.new)
|
1078
|
+
else
|
1079
|
+
return parse_headers
|
1080
|
+
end
|
1081
|
+
end
|
1082
|
+
|
823
1083
|
# begin with a blank line, so we can always add to it
|
824
1084
|
line = ""
|
825
1085
|
|
@@ -838,7 +1098,14 @@ class FasterCSV
|
|
838
1098
|
# I believe a blank line should be an <tt>Array.new</tt>, not
|
839
1099
|
# CSV's <tt>[nil]</tt>
|
840
1100
|
#
|
841
|
-
|
1101
|
+
if parse.empty?
|
1102
|
+
@lineno += 1
|
1103
|
+
if @unconverted_fields
|
1104
|
+
return add_unconverted_fields(Array.new, Array.new)
|
1105
|
+
else
|
1106
|
+
return Array.new
|
1107
|
+
end
|
1108
|
+
end
|
842
1109
|
|
843
1110
|
#
|
844
1111
|
# shave leading empty fields if needed, because the main parser chokes
|
@@ -863,7 +1130,8 @@ class FasterCSV
|
|
863
1130
|
$2
|
864
1131
|
else
|
865
1132
|
# or throw an Exception
|
866
|
-
raise MalformedCSVError,
|
1133
|
+
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1134
|
+
"\\r or \\n (line #{lineno + 1})."
|
867
1135
|
end
|
868
1136
|
end
|
869
1137
|
else # we found a quoted field...
|
@@ -874,15 +1142,28 @@ class FasterCSV
|
|
874
1142
|
|
875
1143
|
# if parse is empty?(), we found all the fields on the line...
|
876
1144
|
if parse.empty?
|
877
|
-
|
878
|
-
|
1145
|
+
@lineno += 1
|
1146
|
+
|
1147
|
+
# save fields unconverted fields, if needed...
|
1148
|
+
unconverted = csv.dup if @unconverted_fields
|
1149
|
+
|
1150
|
+
# convert fields, if needed...
|
1151
|
+
csv = convert_fields(csv) unless @use_headers or @converters.empty?
|
879
1152
|
# parse out header rows and handle FasterCSV::Row conversions...
|
880
1153
|
csv = parse_headers(csv) if @use_headers
|
1154
|
+
|
1155
|
+
# inject unconverted fields and accessor, if requested...
|
1156
|
+
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
1157
|
+
add_unconverted_fields(csv, unconverted)
|
1158
|
+
end
|
1159
|
+
|
881
1160
|
# return the results
|
882
1161
|
break csv
|
883
1162
|
end
|
884
1163
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
885
|
-
|
1164
|
+
if @io.eof?
|
1165
|
+
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1166
|
+
end
|
886
1167
|
# otherwise, we need to loop and pull some more data to complete the row
|
887
1168
|
end
|
888
1169
|
end
|
@@ -966,7 +1247,14 @@ class FasterCSV
|
|
966
1247
|
# are set. When +field_name+ is <tt>:header_converters</tt> header converters
|
967
1248
|
# are added instead.
|
968
1249
|
#
|
1250
|
+
# The <tt>:unconverted_fields</tt> option is also actived for
|
1251
|
+
# <tt>:converters</tt> calls, if requested.
|
1252
|
+
#
|
969
1253
|
def init_converters( options, field_name = :converters )
|
1254
|
+
if field_name == :converters
|
1255
|
+
@unconverted_fields = options.delete(:unconverted_fields)
|
1256
|
+
end
|
1257
|
+
|
970
1258
|
instance_variable_set("@#{field_name}", Array.new)
|
971
1259
|
|
972
1260
|
# find the correct method to add the coverters
|
@@ -996,6 +1284,7 @@ class FasterCSV
|
|
996
1284
|
@use_headers = options.delete(:headers)
|
997
1285
|
@return_headers = options.delete(:return_headers)
|
998
1286
|
|
1287
|
+
# headers must be delayed until shift(), in case they need a row of content
|
999
1288
|
@headers = nil
|
1000
1289
|
|
1001
1290
|
init_converters(options, :header_converters)
|
@@ -1028,24 +1317,22 @@ class FasterCSV
|
|
1028
1317
|
|
1029
1318
|
#
|
1030
1319
|
# Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
|
1031
|
-
# if
|
1320
|
+
# if +headers+ is passed as +true+, returning the converted field set. Any
|
1032
1321
|
# converter that changes the field into something other than a String halts
|
1033
1322
|
# the pipeline of conversion for that field. This is primarily an efficiency
|
1034
1323
|
# shortcut.
|
1035
1324
|
#
|
1036
|
-
def convert_fields( fields )
|
1037
|
-
|
1038
|
-
|
1039
|
-
else
|
1040
|
-
@converters
|
1041
|
-
end
|
1325
|
+
def convert_fields( fields, headers = false )
|
1326
|
+
# see if we are converting headers or fields
|
1327
|
+
converters = headers ? @header_converters : @converters
|
1042
1328
|
|
1043
1329
|
fields.enum_for(:each_with_index).map do |field, index| # map_with_index
|
1044
1330
|
converters.each do |converter|
|
1045
1331
|
field = if converter.arity == 1 # straight field converter
|
1046
1332
|
converter[field]
|
1047
1333
|
else # FieldInfo converter
|
1048
|
-
|
1334
|
+
header = @use_headers && !headers ? @headers[index] : nil
|
1335
|
+
converter[field, FieldInfo.new(index, lineno, header)]
|
1049
1336
|
end
|
1050
1337
|
break unless field.is_a? String # short-curcuit pipeline for speed
|
1051
1338
|
end
|
@@ -1060,18 +1347,56 @@ class FasterCSV
|
|
1060
1347
|
# converters) or by reading past them to return a field row. Headers are also
|
1061
1348
|
# saved in <tt>@headers</tt> for use in future rows.
|
1062
1349
|
#
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1350
|
+
# When +nil+, +row+ is assumed to be a header row not based on an actual row
|
1351
|
+
# of the stream.
|
1352
|
+
#
|
1353
|
+
def parse_headers( row = nil )
|
1354
|
+
if @headers.nil? # header row
|
1355
|
+
@headers = case @use_headers # save headers
|
1356
|
+
when Array then @use_headers # Array of headers
|
1357
|
+
when String then self.class.parse_line(@use_headers) # CSV header String
|
1358
|
+
else row # first row headers
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
# prepare converted and unconverted copies
|
1362
|
+
row = @headers if row.nil?
|
1363
|
+
@headers = convert_fields(@headers, true)
|
1364
|
+
|
1365
|
+
if @return_headers # return headers
|
1366
|
+
return FasterCSV::Row.new(@headers, row, true)
|
1367
|
+
elsif not [Array, String].include? @use_headers.class # skip to field row
|
1368
|
+
return shift
|
1070
1369
|
end
|
1071
|
-
else # field row
|
1072
|
-
FasterCSV::Row.new(@headers, row)
|
1073
1370
|
end
|
1371
|
+
|
1372
|
+
FasterCSV::Row.new(@headers, convert_fields(row)) # field row
|
1074
1373
|
end
|
1374
|
+
|
1375
|
+
#
|
1376
|
+
# Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
|
1377
|
+
# +row+ and an accessor method for it called unconverted_fields(). The
|
1378
|
+
# variable is set to the contents of +fields+.
|
1379
|
+
#
|
1380
|
+
def add_unconverted_fields( row, fields )
|
1381
|
+
class << row
|
1382
|
+
attr_reader :unconverted_fields
|
1383
|
+
end
|
1384
|
+
row.instance_eval { @unconverted_fields = fields }
|
1385
|
+
row
|
1386
|
+
end
|
1387
|
+
end
|
1388
|
+
|
1389
|
+
# Another name for FasterCSV.
|
1390
|
+
FCSV = FasterCSV
|
1391
|
+
|
1392
|
+
# Another name for FasterCSV::instance().
|
1393
|
+
def FasterCSV( *args, &block )
|
1394
|
+
FasterCSV.instance(*args, &block)
|
1395
|
+
end
|
1396
|
+
|
1397
|
+
# Another name for FCSV::instance().
|
1398
|
+
def FCSV( *args, &block )
|
1399
|
+
FCSV.instance(*args, &block)
|
1075
1400
|
end
|
1076
1401
|
|
1077
1402
|
class Array
|