fastercsv 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +19 -0
- data/INSTALL +12 -0
- data/Rakefile +6 -3
- data/TODO +1 -0
- data/examples/shortcut_interface.rb +32 -0
- data/lib/faster_csv.rb +359 -34
- data/lib/fastercsv.rb +10 -0
- data/test/tc_csv_parsing.rb +43 -0
- data/test/tc_data_converters.rb +95 -4
- data/test/tc_features.rb +26 -0
- data/test/tc_headers.rb +87 -0
- data/test/tc_interface.rb +82 -0
- data/test/tc_serialization.rb +154 -0
- data/test/test_data.csv +0 -15161
- data/test/ts_all.rb +1 -0
- metadata +5 -2
data/CHANGELOG
CHANGED
@@ -2,6 +2,25 @@
|
|
2
2
|
|
3
3
|
Below is a complete listing of changes for each revision of FasterCSV.
|
4
4
|
|
5
|
+
== 0.2.0
|
6
|
+
|
7
|
+
* Added VERSION constant.
|
8
|
+
* Significantly improved test speed.
|
9
|
+
* Worked around Date::parse bug so tests will pass on Windows.
|
10
|
+
* Documented test procedure.
|
11
|
+
* Made FasterCSV#lineno CSV aware.
|
12
|
+
* Added line numbers to MalformedCSVError messages.
|
13
|
+
* <tt>:headers</tt> can now be set to an Array of headers to use.
|
14
|
+
* <tt>:headers</tt> can now be set to an external CSV String of headers to use.
|
15
|
+
* Added an <tt>:unconverted_fields</tt> options, so those can be returned
|
16
|
+
when needed.
|
17
|
+
* Provided support for the serialization of custom Ruby objects using CSV.
|
18
|
+
* Added CSV drop-in interface.
|
19
|
+
* Added header information to FieldInfo Struct for conversions by header.
|
20
|
+
* Added an alias to support <tt>require "fastercsv"</tt>.
|
21
|
+
* Added FCSV alias for FasterCSV.
|
22
|
+
* Added FasterCSV::instance and FasterCSV()/FCSV() shortcuts for easy output.
|
23
|
+
|
5
24
|
== 0.1.9
|
6
25
|
|
7
26
|
* Fixing the require "English" bug.
|
data/INSTALL
CHANGED
@@ -21,3 +21,15 @@ Download the latest version of FasterCSV from the
|
|
21
21
|
the root project directory and enter:
|
22
22
|
|
23
23
|
$ sudo ruby setup.rb
|
24
|
+
|
25
|
+
== Running the Tests
|
26
|
+
|
27
|
+
If you would like to run FasterCSV's test suite on your system before installing
|
28
|
+
and you have Rake installed, just issue the following command from the root of
|
29
|
+
the project directory:
|
30
|
+
|
31
|
+
$ rake
|
32
|
+
|
33
|
+
If you do not have rake, use the following command instead:
|
34
|
+
|
35
|
+
$ ruby -I lib:test test/ts_all.rb
|
data/Rakefile
CHANGED
@@ -37,14 +37,17 @@ end
|
|
37
37
|
|
38
38
|
desc "Time FasterCSV and CSV"
|
39
39
|
task :benchmark do
|
40
|
+
TESTS = 6
|
40
41
|
path = "test/test_data.csv"
|
41
|
-
sh %Q{time ruby -r csv -e
|
42
|
-
|
42
|
+
sh %Q{time ruby -r csv -e } +
|
43
|
+
%Q{'#{TESTS}.times { CSV.foreach("#{path}") { |row| } }'}
|
44
|
+
sh %Q{time ruby -r lib/faster_csv -e } +
|
45
|
+
%Q{'#{TESTS}.times { FasterCSV.foreach("#{path}") { |row| } }'}
|
43
46
|
end
|
44
47
|
|
45
48
|
spec = Gem::Specification.new do |spec|
|
46
49
|
spec.name = "fastercsv"
|
47
|
-
spec.version = "0.
|
50
|
+
spec.version = "0.2.0"
|
48
51
|
spec.platform = Gem::Platform::RUBY
|
49
52
|
spec.summary = "FasterCSV is CSV, but faster, smaller, and cleaner."
|
50
53
|
|
data/TODO
CHANGED
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/local/bin/ruby -w
|
2
|
+
|
3
|
+
# shortcut_interface.rb
|
4
|
+
#
|
5
|
+
# Created by James Edward Gray II on 2006-04-01.
|
6
|
+
# Copyright 2006 Gray Productions. All rights reserved.
|
7
|
+
#
|
8
|
+
# Feature implementation and example code by Ara.T.Howard.
|
9
|
+
|
10
|
+
require "faster_csv"
|
11
|
+
|
12
|
+
#
|
13
|
+
# So now it's this easy to write to STDOUT.
|
14
|
+
#
|
15
|
+
FCSV { |f| f << %w( a b c) << %w( d e f ) }
|
16
|
+
|
17
|
+
#
|
18
|
+
# Writing to a String.
|
19
|
+
#
|
20
|
+
FCSV(csv = '') do |f|
|
21
|
+
f << %w( q r s )
|
22
|
+
f << %w( x y z )
|
23
|
+
end
|
24
|
+
puts csv
|
25
|
+
|
26
|
+
#
|
27
|
+
# Writing to STDERR.
|
28
|
+
#
|
29
|
+
FCSV(STDERR) do |f|
|
30
|
+
f << %w( 0 1 2 )
|
31
|
+
f << %w( A B C )
|
32
|
+
end
|
data/lib/faster_csv.rb
CHANGED
@@ -67,7 +67,16 @@ require "stringio"
|
|
67
67
|
# csv_string = ["CSV", "data"].to_csv # to CSV
|
68
68
|
# csv_array = "CSV,String".parse_csv # from CSV
|
69
69
|
#
|
70
|
+
# == Shortcut Interface
|
71
|
+
#
|
72
|
+
# FCSV { |csv_out| csv_out << %w{my data here} } # to STDOUT
|
73
|
+
# FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
|
74
|
+
# FCSV(STDERR) { |csv_err| csv_err << %w{my data here} } # to STDERR
|
75
|
+
#
|
70
76
|
class FasterCSV
|
77
|
+
# The version of the installed library.
|
78
|
+
VERSION = "0.2.0".freeze
|
79
|
+
|
71
80
|
#
|
72
81
|
# A FasterCSV::Row is part Array and part Hash. It retains an order for the
|
73
82
|
# fields and allows duplicates just as an Array would, but also allows you to
|
@@ -330,8 +339,9 @@ class FasterCSV
|
|
330
339
|
#
|
331
340
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
332
341
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
342
|
+
# <b><tt>header</tt></b>:: The header for the column, when available.
|
333
343
|
#
|
334
|
-
FieldInfo = Struct.new(:index, :line)
|
344
|
+
FieldInfo = Struct.new(:index, :line, :header)
|
335
345
|
|
336
346
|
#
|
337
347
|
# This Hash holds the built-in converters of FasterCSV that can be accessed by
|
@@ -390,16 +400,159 @@ class FasterCSV
|
|
390
400
|
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
391
401
|
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
392
402
|
# <b><tt>:converters</tt></b>:: +nil+
|
403
|
+
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
393
404
|
# <b><tt>:headers</tt></b>:: +false+
|
394
405
|
# <b><tt>:return_headers</tt></b>:: +false+
|
395
406
|
# <b><tt>:header_converters</tt></b>:: +nil+
|
396
407
|
#
|
397
|
-
DEFAULT_OPTIONS = { :col_sep
|
398
|
-
:row_sep
|
399
|
-
:converters
|
400
|
-
:
|
401
|
-
:
|
402
|
-
:
|
408
|
+
DEFAULT_OPTIONS = { :col_sep => ",",
|
409
|
+
:row_sep => :auto,
|
410
|
+
:converters => nil,
|
411
|
+
:unconverted_fields => nil,
|
412
|
+
:headers => false,
|
413
|
+
:return_headers => false,
|
414
|
+
:header_converters => nil }.freeze
|
415
|
+
|
416
|
+
#
|
417
|
+
# This method will build a drop-in replacement for many of the standard CSV
|
418
|
+
# methods. It allows you to write code like:
|
419
|
+
#
|
420
|
+
# begin
|
421
|
+
# require "faster_csv"
|
422
|
+
# FasterCSV.build_csv_interface
|
423
|
+
# rescue LoadError
|
424
|
+
# require "csv"
|
425
|
+
# end
|
426
|
+
# # ... use CSV here ...
|
427
|
+
#
|
428
|
+
# This is not a complete interface with completely identical behavior.
|
429
|
+
# However, it is intended to be close enough that you won't notice the
|
430
|
+
# difference in most cases. CSV methods supported are:
|
431
|
+
#
|
432
|
+
# * foreach()
|
433
|
+
# * generate_line()
|
434
|
+
# * open()
|
435
|
+
# * parse()
|
436
|
+
# * parse_line()
|
437
|
+
# * readlines()
|
438
|
+
#
|
439
|
+
# Be warned that this interface is slower than vanilla FasterCSV due to the
|
440
|
+
# extra layer of method calls. Depending on usage, this can slow it down to
|
441
|
+
# near CSV speeds.
|
442
|
+
#
|
443
|
+
def self.build_csv_interface
|
444
|
+
Object.const_set(:CSV, Class.new).class_eval do
|
445
|
+
def self.foreach( path, rs = :auto, &block ) # :nodoc:
|
446
|
+
FasterCSV.foreach(path, :row_sep => rs, &block)
|
447
|
+
end
|
448
|
+
|
449
|
+
def self.generate_line( row, fs = ",", rs = "" ) # :nodoc:
|
450
|
+
FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
|
451
|
+
end
|
452
|
+
|
453
|
+
def self.open( path, mode, fs = ",", rs = :auto, &block ) # :nodoc:
|
454
|
+
if block and mode.include? "r"
|
455
|
+
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
|
456
|
+
csv.each(&block)
|
457
|
+
end
|
458
|
+
else
|
459
|
+
FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
|
460
|
+
end
|
461
|
+
end
|
462
|
+
|
463
|
+
def self.parse( str_or_readable, fs = ",", rs = :auto, &block ) # :nodoc:
|
464
|
+
FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
|
465
|
+
end
|
466
|
+
|
467
|
+
def self.parse_line( src, fs = ",", rs = :auto ) # :nodoc:
|
468
|
+
FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
|
469
|
+
end
|
470
|
+
|
471
|
+
def self.readlines( path, rs = :auto ) # :nodoc:
|
472
|
+
FasterCSV.readlines(path, :row_sep => rs)
|
473
|
+
end
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
#
|
478
|
+
# This method allows you to serialize an Array of Ruby objects to a String or
|
479
|
+
# File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
|
480
|
+
# useful for spreadsheet and database interaction.
|
481
|
+
#
|
482
|
+
# Out of the box, this method is intended to work with simple data objects or
|
483
|
+
# Structs. It will serialize a list of instance variables and/or
|
484
|
+
# Struct.members().
|
485
|
+
#
|
486
|
+
# If you need need more complicated serialization, you can control the process
|
487
|
+
# by adding methods to the class to be serialized.
|
488
|
+
#
|
489
|
+
# A class method csv_meta() is responsible for returning the first row of the
|
490
|
+
# document (as an Array). This row is considered to be a Hash of the form
|
491
|
+
# key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
|
492
|
+
# key with a value of the stringified class name and FasterCSV::dump() will
|
493
|
+
# create this, if you do not define this method. This method is only called
|
494
|
+
# on the first object of the Array.
|
495
|
+
#
|
496
|
+
# The next method you can provide is an instance method called csv_headers().
|
497
|
+
# This method is expected to return the second line of the document (again as
|
498
|
+
# an Array), which is to be used to give each column a header. By default,
|
499
|
+
# FasterCSV::load() will set an instance variable if the field header starts
|
500
|
+
# with an @ character or call send() passing the header as the method name and
|
501
|
+
# the field value as an argument. This method is only called on the first
|
502
|
+
# object of the Array.
|
503
|
+
#
|
504
|
+
# Finally, you can provide an instance method called csv_dump(), which will
|
505
|
+
# be passed the headers. This should return an Array of fields that can be
|
506
|
+
# serialized for this object. This method is called once for every object in
|
507
|
+
# the Array.
|
508
|
+
#
|
509
|
+
# The +io+ parameter can be used to serialize to a File, and +options+ can be
|
510
|
+
# anything FasterCSV::new() accepts.
|
511
|
+
#
|
512
|
+
def self.dump( ary_of_objs, io = "", options = Hash.new )
|
513
|
+
obj_template = ary_of_objs.first
|
514
|
+
|
515
|
+
csv = FasterCSV.new(io, options)
|
516
|
+
|
517
|
+
# write meta information
|
518
|
+
begin
|
519
|
+
csv << obj_template.class.csv_meta
|
520
|
+
rescue NoMethodError
|
521
|
+
csv << [:class, obj_template.class]
|
522
|
+
end
|
523
|
+
|
524
|
+
# write headers
|
525
|
+
begin
|
526
|
+
headers = obj_template.csv_headers
|
527
|
+
rescue NoMethodError
|
528
|
+
headers = obj_template.instance_variables.sort
|
529
|
+
if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
|
530
|
+
headers += obj_template.members.map { |mem| "#{mem}=" }.sort
|
531
|
+
end
|
532
|
+
end
|
533
|
+
csv << headers
|
534
|
+
|
535
|
+
# serialize each object
|
536
|
+
ary_of_objs.each do |obj|
|
537
|
+
begin
|
538
|
+
csv << obj.csv_dump(headers)
|
539
|
+
rescue NoMethodError
|
540
|
+
csv << headers.map do |var|
|
541
|
+
if var[0] == ?@
|
542
|
+
obj.instance_variable_get(var)
|
543
|
+
else
|
544
|
+
obj[var[0..-2]]
|
545
|
+
end
|
546
|
+
end
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
if io.is_a? String
|
551
|
+
csv.string
|
552
|
+
else
|
553
|
+
csv.close
|
554
|
+
end
|
555
|
+
end
|
403
556
|
|
404
557
|
#
|
405
558
|
# :call-seq:
|
@@ -508,6 +661,77 @@ class FasterCSV
|
|
508
661
|
(new("", options) << row).string
|
509
662
|
end
|
510
663
|
|
664
|
+
#
|
665
|
+
# This method will return a FasterCSV instance, just like FasterCSV::new(),
|
666
|
+
# but the instance will be cached and returned for all future calls to this
|
667
|
+
# method for the same +data+ object (tested by Object#object_id()) with the
|
668
|
+
# same +options+
|
669
|
+
#
|
670
|
+
# If a block is given, the instance is passed to the block and the return
|
671
|
+
# value becomes the return value of the block.
|
672
|
+
#
|
673
|
+
def self.instance( data = STDOUT, options = Hash.new )
|
674
|
+
# create a _signature_ for this method call, data object and options
|
675
|
+
sig = [data.object_id] +
|
676
|
+
options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
|
677
|
+
|
678
|
+
# fetch or create the instance for this signature
|
679
|
+
@@instances ||= Hash.new
|
680
|
+
instance = (@@instances[sig] ||= new(data, options))
|
681
|
+
|
682
|
+
if block_given?
|
683
|
+
yield instance # run block, if given, returning result
|
684
|
+
else
|
685
|
+
instance # or return the instance
|
686
|
+
end
|
687
|
+
end
|
688
|
+
|
689
|
+
#
|
690
|
+
# This method is the reading counterpart to FasterCSV::dump(). See that
|
691
|
+
# method for a detailed description of the process.
|
692
|
+
#
|
693
|
+
# You can customize loading by adding a class method called csv_load() which
|
694
|
+
# will be passed a Hash of meta information, an Array of headers, and an Array
|
695
|
+
# of fields for the object the method is expected to return.
|
696
|
+
#
|
697
|
+
# Remember that all fields will be Strings after this load. If you need
|
698
|
+
# something else, use +options+ to setup converters or provide a custom
|
699
|
+
# csv_load() implementation.
|
700
|
+
#
|
701
|
+
def self.load( io_or_str, options = Hash.new )
|
702
|
+
csv = FasterCSV.new(io_or_str, options)
|
703
|
+
|
704
|
+
# load meta information
|
705
|
+
meta = Hash[*csv.shift]
|
706
|
+
cls = meta["class"].split("::").inject(Object) do |c, const|
|
707
|
+
c.const_get(const)
|
708
|
+
end
|
709
|
+
|
710
|
+
# load headers
|
711
|
+
headers = csv.shift
|
712
|
+
|
713
|
+
# unserialize each object stored in the file
|
714
|
+
results = csv.inject(Array.new) do |all, row|
|
715
|
+
begin
|
716
|
+
obj = cls.csv_load(meta, headers, row)
|
717
|
+
rescue NoMethodError
|
718
|
+
obj = cls.allocate
|
719
|
+
headers.zip(row) do |name, value|
|
720
|
+
if name[0] == ?@
|
721
|
+
obj.instance_variable_set(name, value)
|
722
|
+
else
|
723
|
+
obj.send(name, value)
|
724
|
+
end
|
725
|
+
end
|
726
|
+
end
|
727
|
+
all << obj
|
728
|
+
end
|
729
|
+
|
730
|
+
csv.close unless io_or_str.is_a? String
|
731
|
+
|
732
|
+
results
|
733
|
+
end
|
734
|
+
|
511
735
|
#
|
512
736
|
# :call-seq:
|
513
737
|
# open( filename, mode="r", options = Hash.new ) { |faster_csv| ... }
|
@@ -541,7 +765,6 @@ class FasterCSV
|
|
541
765
|
# * fsync()
|
542
766
|
# * ioctl()
|
543
767
|
# * isatty()
|
544
|
-
# * lineno()
|
545
768
|
# * pid()
|
546
769
|
# * pos()
|
547
770
|
# * reopen()
|
@@ -663,10 +886,24 @@ class FasterCSV
|
|
663
886
|
# Hash and/or lambdas that handle custom
|
664
887
|
# conversion. A single converter
|
665
888
|
# doesn't have to be in an Array.
|
889
|
+
# <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
|
890
|
+
# unconverted_fields() method will be
|
891
|
+
# added to all returned rows (Array or
|
892
|
+
# FasterCSV::Row) that will return the
|
893
|
+
# fields as they were before convertion.
|
894
|
+
# Note that <tt>:headers</tt> supplied
|
895
|
+
# by Array or String were not fields of
|
896
|
+
# the document and thus will have an
|
897
|
+
# empty Array attached.
|
666
898
|
# <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
|
667
899
|
# +true+, the initial row of the CSV
|
668
900
|
# file will be treated as a row of
|
669
|
-
# headers.
|
901
|
+
# headers. If set to an Array, the
|
902
|
+
# contents will be used as the headers.
|
903
|
+
# If set to a String, the String is run
|
904
|
+
# through a call of
|
905
|
+
# FasterCSV::parse_line() to produce an
|
906
|
+
# Array of headers. This setting causes
|
670
907
|
# FasterCSV.shift() to return rows as
|
671
908
|
# FasterCSV::Row objects instead of
|
672
909
|
# Arrays.
|
@@ -701,16 +938,24 @@ class FasterCSV
|
|
701
938
|
unless options.empty?
|
702
939
|
raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
|
703
940
|
end
|
941
|
+
|
942
|
+
# track our own lineno since IO gets confused about line-ends is CSV fields
|
943
|
+
@lineno = 0
|
704
944
|
end
|
705
945
|
|
946
|
+
#
|
947
|
+
# The line number of the last row read from this file. Fields with nested
|
948
|
+
# line-end characters will not affect this count.
|
949
|
+
#
|
950
|
+
attr_reader :lineno
|
951
|
+
|
706
952
|
### IO and StringIO Delegation ###
|
707
953
|
|
708
954
|
extend Forwardable
|
709
955
|
def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
|
710
956
|
:eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
|
711
|
-
:isatty, :
|
712
|
-
:
|
713
|
-
:tty?
|
957
|
+
:isatty, :pid, :pos, :reopen, :rewind, :seek, :stat,
|
958
|
+
:string, :sync, :sync=, :tell, :to_i, :to_io, :tty?
|
714
959
|
|
715
960
|
### End Delegation ###
|
716
961
|
|
@@ -820,6 +1065,21 @@ class FasterCSV
|
|
820
1065
|
# The data source must be open for reading.
|
821
1066
|
#
|
822
1067
|
def shift
|
1068
|
+
#########################################################################
|
1069
|
+
### This method is purposefully kept a bit long as simple conditional ###
|
1070
|
+
### checks are faster than numerous (expensive) method calls. ###
|
1071
|
+
#########################################################################
|
1072
|
+
|
1073
|
+
# handle headers not based on document content
|
1074
|
+
if header_row? and @return_headers and
|
1075
|
+
[Array, String].include? @use_headers.class
|
1076
|
+
if @unconverted_fields
|
1077
|
+
return add_unconverted_fields(parse_headers, Array.new)
|
1078
|
+
else
|
1079
|
+
return parse_headers
|
1080
|
+
end
|
1081
|
+
end
|
1082
|
+
|
823
1083
|
# begin with a blank line, so we can always add to it
|
824
1084
|
line = ""
|
825
1085
|
|
@@ -838,7 +1098,14 @@ class FasterCSV
|
|
838
1098
|
# I believe a blank line should be an <tt>Array.new</tt>, not
|
839
1099
|
# CSV's <tt>[nil]</tt>
|
840
1100
|
#
|
841
|
-
|
1101
|
+
if parse.empty?
|
1102
|
+
@lineno += 1
|
1103
|
+
if @unconverted_fields
|
1104
|
+
return add_unconverted_fields(Array.new, Array.new)
|
1105
|
+
else
|
1106
|
+
return Array.new
|
1107
|
+
end
|
1108
|
+
end
|
842
1109
|
|
843
1110
|
#
|
844
1111
|
# shave leading empty fields if needed, because the main parser chokes
|
@@ -863,7 +1130,8 @@ class FasterCSV
|
|
863
1130
|
$2
|
864
1131
|
else
|
865
1132
|
# or throw an Exception
|
866
|
-
raise MalformedCSVError,
|
1133
|
+
raise MalformedCSVError, "Unquoted fields do not allow " +
|
1134
|
+
"\\r or \\n (line #{lineno + 1})."
|
867
1135
|
end
|
868
1136
|
end
|
869
1137
|
else # we found a quoted field...
|
@@ -874,15 +1142,28 @@ class FasterCSV
|
|
874
1142
|
|
875
1143
|
# if parse is empty?(), we found all the fields on the line...
|
876
1144
|
if parse.empty?
|
877
|
-
|
878
|
-
|
1145
|
+
@lineno += 1
|
1146
|
+
|
1147
|
+
# save fields unconverted fields, if needed...
|
1148
|
+
unconverted = csv.dup if @unconverted_fields
|
1149
|
+
|
1150
|
+
# convert fields, if needed...
|
1151
|
+
csv = convert_fields(csv) unless @use_headers or @converters.empty?
|
879
1152
|
# parse out header rows and handle FasterCSV::Row conversions...
|
880
1153
|
csv = parse_headers(csv) if @use_headers
|
1154
|
+
|
1155
|
+
# inject unconverted fields and accessor, if requested...
|
1156
|
+
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
1157
|
+
add_unconverted_fields(csv, unconverted)
|
1158
|
+
end
|
1159
|
+
|
881
1160
|
# return the results
|
882
1161
|
break csv
|
883
1162
|
end
|
884
1163
|
# if we're not empty?() but at eof?(), a quoted field wasn't closed...
|
885
|
-
|
1164
|
+
if @io.eof?
|
1165
|
+
raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
|
1166
|
+
end
|
886
1167
|
# otherwise, we need to loop and pull some more data to complete the row
|
887
1168
|
end
|
888
1169
|
end
|
@@ -966,7 +1247,14 @@ class FasterCSV
|
|
966
1247
|
# are set. When +field_name+ is <tt>:header_converters</tt> header converters
|
967
1248
|
# are added instead.
|
968
1249
|
#
|
1250
|
+
# The <tt>:unconverted_fields</tt> option is also actived for
|
1251
|
+
# <tt>:converters</tt> calls, if requested.
|
1252
|
+
#
|
969
1253
|
def init_converters( options, field_name = :converters )
|
1254
|
+
if field_name == :converters
|
1255
|
+
@unconverted_fields = options.delete(:unconverted_fields)
|
1256
|
+
end
|
1257
|
+
|
970
1258
|
instance_variable_set("@#{field_name}", Array.new)
|
971
1259
|
|
972
1260
|
# find the correct method to add the coverters
|
@@ -996,6 +1284,7 @@ class FasterCSV
|
|
996
1284
|
@use_headers = options.delete(:headers)
|
997
1285
|
@return_headers = options.delete(:return_headers)
|
998
1286
|
|
1287
|
+
# headers must be delayed until shift(), in case they need a row of content
|
999
1288
|
@headers = nil
|
1000
1289
|
|
1001
1290
|
init_converters(options, :header_converters)
|
@@ -1028,24 +1317,22 @@ class FasterCSV
|
|
1028
1317
|
|
1029
1318
|
#
|
1030
1319
|
# Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
|
1031
|
-
# if
|
1320
|
+
# if +headers+ is passed as +true+, returning the converted field set. Any
|
1032
1321
|
# converter that changes the field into something other than a String halts
|
1033
1322
|
# the pipeline of conversion for that field. This is primarily an efficiency
|
1034
1323
|
# shortcut.
|
1035
1324
|
#
|
1036
|
-
def convert_fields( fields )
|
1037
|
-
|
1038
|
-
|
1039
|
-
else
|
1040
|
-
@converters
|
1041
|
-
end
|
1325
|
+
def convert_fields( fields, headers = false )
|
1326
|
+
# see if we are converting headers or fields
|
1327
|
+
converters = headers ? @header_converters : @converters
|
1042
1328
|
|
1043
1329
|
fields.enum_for(:each_with_index).map do |field, index| # map_with_index
|
1044
1330
|
converters.each do |converter|
|
1045
1331
|
field = if converter.arity == 1 # straight field converter
|
1046
1332
|
converter[field]
|
1047
1333
|
else # FieldInfo converter
|
1048
|
-
|
1334
|
+
header = @use_headers && !headers ? @headers[index] : nil
|
1335
|
+
converter[field, FieldInfo.new(index, lineno, header)]
|
1049
1336
|
end
|
1050
1337
|
break unless field.is_a? String # short-curcuit pipeline for speed
|
1051
1338
|
end
|
@@ -1060,18 +1347,56 @@ class FasterCSV
|
|
1060
1347
|
# converters) or by reading past them to return a field row. Headers are also
|
1061
1348
|
# saved in <tt>@headers</tt> for use in future rows.
|
1062
1349
|
#
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1350
|
+
# When +nil+, +row+ is assumed to be a header row not based on an actual row
|
1351
|
+
# of the stream.
|
1352
|
+
#
|
1353
|
+
def parse_headers( row = nil )
|
1354
|
+
if @headers.nil? # header row
|
1355
|
+
@headers = case @use_headers # save headers
|
1356
|
+
when Array then @use_headers # Array of headers
|
1357
|
+
when String then self.class.parse_line(@use_headers) # CSV header String
|
1358
|
+
else row # first row headers
|
1359
|
+
end
|
1360
|
+
|
1361
|
+
# prepare converted and unconverted copies
|
1362
|
+
row = @headers if row.nil?
|
1363
|
+
@headers = convert_fields(@headers, true)
|
1364
|
+
|
1365
|
+
if @return_headers # return headers
|
1366
|
+
return FasterCSV::Row.new(@headers, row, true)
|
1367
|
+
elsif not [Array, String].include? @use_headers.class # skip to field row
|
1368
|
+
return shift
|
1070
1369
|
end
|
1071
|
-
else # field row
|
1072
|
-
FasterCSV::Row.new(@headers, row)
|
1073
1370
|
end
|
1371
|
+
|
1372
|
+
FasterCSV::Row.new(@headers, convert_fields(row)) # field row
|
1074
1373
|
end
|
1374
|
+
|
1375
|
+
#
|
1376
|
+
# Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
|
1377
|
+
# +row+ and an accessor method for it called unconverted_fields(). The
|
1378
|
+
# variable is set to the contents of +fields+.
|
1379
|
+
#
|
1380
|
+
def add_unconverted_fields( row, fields )
|
1381
|
+
class << row
|
1382
|
+
attr_reader :unconverted_fields
|
1383
|
+
end
|
1384
|
+
row.instance_eval { @unconverted_fields = fields }
|
1385
|
+
row
|
1386
|
+
end
|
1387
|
+
end
|
1388
|
+
|
1389
|
+
# Another name for FasterCSV.
|
1390
|
+
FCSV = FasterCSV
|
1391
|
+
|
1392
|
+
# Another name for FasterCSV::instance().
|
1393
|
+
def FasterCSV( *args, &block )
|
1394
|
+
FasterCSV.instance(*args, &block)
|
1395
|
+
end
|
1396
|
+
|
1397
|
+
# Another name for FCSV::instance().
|
1398
|
+
def FCSV( *args, &block )
|
1399
|
+
FCSV.instance(*args, &block)
|
1075
1400
|
end
|
1076
1401
|
|
1077
1402
|
class Array
|