b2b2dot0-fastercsv 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ Quantity,Product Description,Price
2
+ 1,Text Editor,25.00
3
+ 2,MacBook Pros,2499.00
@@ -0,0 +1,36 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # shortcut_interface.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-04-01.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+ #
8
+ # Feature implementation and example code by Ara.T.Howard.
9
+
10
+ require "faster_csv"
11
+
12
+ #
13
+ # So now it's this easy to write to STDOUT.
14
+ #
15
+ FCSV { |f| f << %w( a b c) << %w( d e f ) }
16
+
17
+ #
18
+ # Writing to a String.
19
+ #
20
+ FCSV(csv = '') do |f|
21
+ f << %w( q r s )
22
+ f << %w( x y z )
23
+ end
24
+ puts csv
25
+
26
+ #
27
+ # Writing to STDERR.
28
+ #
29
+ FCSV(STDERR) do |f|
30
+ f << %w( 0 1 2 )
31
+ f << %w( A B C )
32
+ end
33
+ # >> a,b,c
34
+ # >> d,e,f
35
+ # >> q,r,s
36
+ # >> x,y,z
@@ -0,0 +1,2001 @@
1
+ #!/usr/local/bin/ruby -w
2
+ # -*- coding: utf-8 -*-
3
+
4
+ # = faster_csv.rb -- Faster CSV Reading and Writing
5
+ #
6
+ # Created by James Edward Gray II on 2005-10-31.
7
+ # Copyright 2005 Gray Productions. All rights reserved.
8
+ #
9
+ # See FasterCSV for documentation.
10
+
11
+ require "forwardable"
12
+ require "English"
13
+ require "enumerator"
14
+ require "date"
15
+ require "stringio"
16
+
17
+ #
18
+ # This class provides a complete interface to CSV files and data. It offers
19
+ # tools to enable you to read and write to and from Strings or IO objects, as
20
+ # needed.
21
+ #
22
+ # == Reading
23
+ #
24
+ # === From a File
25
+ #
26
+ # ==== A Line at a Time
27
+ #
28
+ # FasterCSV.foreach("path/to/file.csv") do |row|
29
+ # # use row here...
30
+ # end
31
+ #
32
+ # ==== All at Once
33
+ #
34
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
35
+ #
36
+ # === From a String
37
+ #
38
+ # ==== A Line at a Time
39
+ #
40
+ # FasterCSV.parse("CSV,data,String") do |row|
41
+ # # use row here...
42
+ # end
43
+ #
44
+ # ==== All at Once
45
+ #
46
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
47
+ #
48
+ # == Writing
49
+ #
50
+ # === To a File
51
+ #
52
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
53
+ # csv << ["row", "of", "CSV", "data"]
54
+ # csv << ["another", "row"]
55
+ # # ...
56
+ # end
57
+ #
58
+ # === To a String
59
+ #
60
+ # csv_string = FasterCSV.generate do |csv|
61
+ # csv << ["row", "of", "CSV", "data"]
62
+ # csv << ["another", "row"]
63
+ # # ...
64
+ # end
65
+ #
66
+ # == Convert a Single Line
67
+ #
68
+ # csv_string = ["CSV", "data"].to_csv # to CSV
69
+ # csv_array = "CSV,String".parse_csv # from CSV
70
+ #
71
+ # == Shortcut Interface
72
+ #
73
+ # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
74
+ # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
75
+ # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
76
+ #
77
+ class FasterCSV
78
+ # The version of the installed library.
79
+ VERSION = "1.4.1".freeze
80
+
81
+ #
82
+ # A FasterCSV::Row is part Array and part Hash. It retains an order for the
83
+ # fields and allows duplicates just as an Array would, but also allows you to
84
+ # access fields by name just as you could if they were in a Hash.
85
+ #
86
+ # All rows returned by FasterCSV will be constructed from this class, if
87
+ # header row processing is activated.
88
+ #
89
+ class Row
90
+ #
91
+ # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
92
+ # expected to be Arrays. If one Array is shorter than the other, it will be
93
+ # padded with +nil+ objects.
94
+ #
95
+ # The optional +header_row+ parameter can be set to +true+ to indicate, via
96
+ # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
97
+ # a header row. Otherwise, the row is assumes to be a field row.
98
+ #
99
+ # A FasterCSV::Row object supports the following Array methods through
100
+ # delegation:
101
+ #
102
+ # * empty?()
103
+ # * length()
104
+ # * size()
105
+ #
106
+ def initialize(headers, fields, header_row = false)
107
+ @header_row = header_row
108
+
109
+ # handle extra headers or fields
110
+ @row = if headers.size > fields.size
111
+ headers.zip(fields)
112
+ else
113
+ fields.zip(headers).map { |pair| pair.reverse }
114
+ end
115
+ end
116
+
117
+ # Internal data format used to compare equality.
118
+ attr_reader :row
119
+ protected :row
120
+
121
+ ### Array Delegation ###
122
+
123
+ extend Forwardable
124
+ def_delegators :@row, :empty?, :length, :size
125
+
126
+ # Returns +true+ if this is a header row.
127
+ def header_row?
128
+ @header_row
129
+ end
130
+
131
+ # Returns +true+ if this is a field row.
132
+ def field_row?
133
+ not header_row?
134
+ end
135
+
136
+ # Returns the headers of this row.
137
+ def headers
138
+ @row.map { |pair| pair.first }
139
+ end
140
+
141
+ #
142
+ # :call-seq:
143
+ # field( header )
144
+ # field( header, offset )
145
+ # field( index )
146
+ #
147
+ # This method will fetch the field value by +header+ or +index+. If a field
148
+ # is not found, +nil+ is returned.
149
+ #
150
+ # When provided, +offset+ ensures that a header match occurrs on or later
151
+ # than the +offset+ index. You can use this to find duplicate headers,
152
+ # without resorting to hard-coding exact indices.
153
+ #
154
+ def field(header_or_index, minimum_index = 0)
155
+ # locate the pair
156
+ finder = header_or_index.is_a?(Integer) ? :[] : :assoc
157
+ pair = @row[minimum_index..-1].send(finder, header_or_index)
158
+
159
+ # return the field if we have a pair
160
+ pair.nil? ? nil : pair.last
161
+ end
162
+ alias_method :[], :field
163
+
164
+ #
165
+ # :call-seq:
166
+ # []=( header, value )
167
+ # []=( header, offset, value )
168
+ # []=( index, value )
169
+ #
170
+ # Looks up the field by the semantics described in FasterCSV::Row.field()
171
+ # and assigns the +value+.
172
+ #
173
+ # Assigning past the end of the row with an index will set all pairs between
174
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
175
+ # pair.
176
+ #
177
+ def []=(*args)
178
+ value = args.pop
179
+
180
+ if args.first.is_a? Integer
181
+ if @row[args.first].nil? # extending past the end with index
182
+ @row[args.first] = [nil, value]
183
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
184
+ else # normal index assignment
185
+ @row[args.first][1] = value
186
+ end
187
+ else
188
+ index = index(*args)
189
+ if index.nil? # appending a field
190
+ self << [args.first, value]
191
+ else # normal header assignment
192
+ @row[index][1] = value
193
+ end
194
+ end
195
+ end
196
+
197
+ #
198
+ # :call-seq:
199
+ # <<( field )
200
+ # <<( header_and_field_array )
201
+ # <<( header_and_field_hash )
202
+ #
203
+ # If a two-element Array is provided, it is assumed to be a header and field
204
+ # and the pair is appended. A Hash works the same way with the key being
205
+ # the header and the value being the field. Anything else is assumed to be
206
+ # a lone field which is appended with a +nil+ header.
207
+ #
208
+ # This method returns the row for chaining.
209
+ #
210
+ def <<(arg)
211
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
212
+ @row << arg
213
+ elsif arg.is_a?(Hash) # append header and name pairs
214
+ arg.each { |pair| @row << pair }
215
+ else # append field value
216
+ @row << [nil, arg]
217
+ end
218
+
219
+ self # for chaining
220
+ end
221
+
222
+ #
223
+ # A shortcut for appending multiple fields. Equivalent to:
224
+ #
225
+ # args.each { |arg| faster_csv_row << arg }
226
+ #
227
+ # This method returns the row for chaining.
228
+ #
229
+ def push(*args)
230
+ args.each { |arg| self << arg }
231
+
232
+ self # for chaining
233
+ end
234
+
235
+ #
236
+ # :call-seq:
237
+ # delete( header )
238
+ # delete( header, offset )
239
+ # delete( index )
240
+ #
241
+ # Used to remove a pair from the row by +header+ or +index+. The pair is
242
+ # located as described in FasterCSV::Row.field(). The deleted pair is
243
+ # returned, or +nil+ if a pair could not be found.
244
+ #
245
+ def delete(header_or_index, minimum_index = 0)
246
+ if header_or_index.is_a? Integer # by index
247
+ @row.delete_at(header_or_index)
248
+ else # by header
249
+ @row.delete_at(index(header_or_index, minimum_index))
250
+ end
251
+ end
252
+
253
+ #
254
+ # The provided +block+ is passed a header and field for each pair in the row
255
+ # and expected to return +true+ or +false+, depending on whether the pair
256
+ # should be deleted.
257
+ #
258
+ # This method returns the row for chaining.
259
+ #
260
+ def delete_if(&block)
261
+ @row.delete_if(&block)
262
+
263
+ self # for chaining
264
+ end
265
+
266
+ #
267
+ # This method accepts any number of arguments which can be headers, indices,
268
+ # Ranges of either, or two-element Arrays containing a header and offset.
269
+ # Each argument will be replaced with a field lookup as described in
270
+ # FasterCSV::Row.field().
271
+ #
272
+ # If called with no arguments, all fields are returned.
273
+ #
274
+ def fields(*headers_and_or_indices)
275
+ if headers_and_or_indices.empty? # return all fields--no arguments
276
+ @row.map { |pair| pair.last }
277
+ else # or work like values_at()
278
+ headers_and_or_indices.inject(Array.new) do |all, h_or_i|
279
+ all + if h_or_i.is_a? Range
280
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
281
+ index(h_or_i.begin)
282
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
283
+ index(h_or_i.end)
284
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
285
+ (index_begin..index_end)
286
+ fields.values_at(new_range)
287
+ else
288
+ [field(*Array(h_or_i))]
289
+ end
290
+ end
291
+ end
292
+ end
293
+ alias_method :values_at, :fields
294
+
295
+ #
296
+ # :call-seq:
297
+ # index( header )
298
+ # index( header, offset )
299
+ #
300
+ # This method will return the index of a field with the provided +header+.
301
+ # The +offset+ can be used to locate duplicate header names, as described in
302
+ # FasterCSV::Row.field().
303
+ #
304
+ def index(header, minimum_index = 0)
305
+ # find the pair
306
+ index = headers[minimum_index..-1].index(header)
307
+ # return the index at the right offset, if we found one
308
+ index.nil? ? nil : index + minimum_index
309
+ end
310
+
311
+ # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
312
+ def header?(name)
313
+ headers.include? name
314
+ end
315
+ alias_method :include?, :header?
316
+
317
+ #
318
+ # Returns +true+ if +data+ matches a field in this row, and +false+
319
+ # otherwise.
320
+ #
321
+ def field?(data)
322
+ fields.include? data
323
+ end
324
+
325
+ include Enumerable
326
+
327
+ #
328
+ # Yields each pair of the row as header and field tuples (much like
329
+ # iterating over a Hash).
330
+ #
331
+ # Support for Enumerable.
332
+ #
333
+ # This method returns the row for chaining.
334
+ #
335
+ def each(&block)
336
+ @row.each(&block)
337
+
338
+ self # for chaining
339
+ end
340
+
341
+ #
342
+ # Returns +true+ if this row contains the same headers and fields in the
343
+ # same order as +other+.
344
+ #
345
+ def ==(other)
346
+ @row == other.row
347
+ end
348
+
349
+ #
350
+ # Collapses the row into a simple Hash. Be warning that this discards field
351
+ # order and clobbers duplicate fields.
352
+ #
353
+ def to_hash
354
+ # flatten just one level of the internal Array
355
+ Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
356
+ end
357
+
358
+ #
359
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
360
+ #
361
+ # faster_csv_row.fields.to_csv( options )
362
+ #
363
+ def to_csv(options = Hash.new)
364
+ fields.to_csv(options)
365
+ end
366
+ alias_method :to_s, :to_csv
367
+
368
+ # A summary of fields, by header.
369
+ def inspect
370
+ str = "#<#{self.class}"
371
+ each do |header, field|
372
+ str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
373
+ field.inspect
374
+ end
375
+ str << ">"
376
+ end
377
+ end
378
+
379
+ #
380
+ # A FasterCSV::Table is a two-dimensional data structure for representing CSV
381
+ # documents. Tables allow you to work with the data by row or column,
382
+ # manipulate the data, and even convert the results back to CSV, if needed.
383
+ #
384
+ # All tables returned by FasterCSV will be constructed from this class, if
385
+ # header row processing is activated.
386
+ #
387
+ class Table
388
+ #
389
+ # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
390
+ # to be FasterCSV::Row objects. All rows are assumed to have the same
391
+ # headers.
392
+ #
393
+ # A FasterCSV::Table object supports the following Array methods through
394
+ # delegation:
395
+ #
396
+ # * empty?()
397
+ # * length()
398
+ # * size()
399
+ #
400
+ def initialize(array_of_rows)
401
+ @table = array_of_rows
402
+ @mode = :col_or_row
403
+ end
404
+
405
+ # The current access mode for indexing and iteration.
406
+ attr_reader :mode
407
+
408
+ # Internal data format used to compare equality.
409
+ attr_reader :table
410
+ protected :table
411
+
412
+ ### Array Delegation ###
413
+
414
+ extend Forwardable
415
+ def_delegators :@table, :empty?, :length, :size
416
+
417
+ #
418
+ # Returns a duplicate table object, in column mode. This is handy for
419
+ # chaining in a single call without changing the table mode, but be aware
420
+ # that this method can consume a fair amount of memory for bigger data sets.
421
+ #
422
+ # This method returns the duplicate table for chaining. Don't chain
423
+ # destructive methods (like []=()) this way though, since you are working
424
+ # with a duplicate.
425
+ #
426
+ def by_col
427
+ self.class.new(@table.dup).by_col!
428
+ end
429
+
430
+ #
431
+ # Switches the mode of this table to column mode. All calls to indexing and
432
+ # iteration methods will work with columns until the mode is changed again.
433
+ #
434
+ # This method returns the table and is safe to chain.
435
+ #
436
+ def by_col!
437
+ @mode = :col
438
+
439
+ self
440
+ end
441
+
442
+ #
443
+ # Returns a duplicate table object, in mixed mode. This is handy for
444
+ # chaining in a single call without changing the table mode, but be aware
445
+ # that this method can consume a fair amount of memory for bigger data sets.
446
+ #
447
+ # This method returns the duplicate table for chaining. Don't chain
448
+ # destructive methods (like []=()) this way though, since you are working
449
+ # with a duplicate.
450
+ #
451
+ def by_col_or_row
452
+ self.class.new(@table.dup).by_col_or_row!
453
+ end
454
+
455
+ #
456
+ # Switches the mode of this table to mixed mode. All calls to indexing and
457
+ # iteration methods will use the default intelligent indexing system until
458
+ # the mode is changed again. In mixed mode an index is assumed to be a row
459
+ # reference while anything else is assumed to be column access by headers.
460
+ #
461
+ # This method returns the table and is safe to chain.
462
+ #
463
+ def by_col_or_row!
464
+ @mode = :col_or_row
465
+
466
+ self
467
+ end
468
+
469
+ #
470
+ # Returns a duplicate table object, in row mode. This is handy for chaining
471
+ # in a single call without changing the table mode, but be aware that this
472
+ # method can consume a fair amount of memory for bigger data sets.
473
+ #
474
+ # This method returns the duplicate table for chaining. Don't chain
475
+ # destructive methods (like []=()) this way though, since you are working
476
+ # with a duplicate.
477
+ #
478
+ def by_row
479
+ self.class.new(@table.dup).by_row!
480
+ end
481
+
482
+ #
483
+ # Switches the mode of this table to row mode. All calls to indexing and
484
+ # iteration methods will work with rows until the mode is changed again.
485
+ #
486
+ # This method returns the table and is safe to chain.
487
+ #
488
+ def by_row!
489
+ @mode = :row
490
+
491
+ self
492
+ end
493
+
494
+ #
495
+ # Returns the headers for the first row of this table (assumed to match all
496
+ # other rows). An empty Array is returned for empty tables.
497
+ #
498
+ def headers
499
+ if @table.empty?
500
+ Array.new
501
+ else
502
+ @table.first.headers
503
+ end
504
+ end
505
+
506
+ #
507
+ # In the default mixed mode, this method returns rows for index access and
508
+ # columns for header access. You can force the index association by first
509
+ # calling by_col!() or by_row!().
510
+ #
511
+ # Columns are returned as an Array of values. Altering that Array has no
512
+ # effect on the table.
513
+ #
514
+ def [](index_or_header)
515
+ if @mode == :row or # by index
516
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
517
+ @table[index_or_header]
518
+ else # by header
519
+ @table.map { |row| row[index_or_header] }
520
+ end
521
+ end
522
+
523
+ #
524
+ # In the default mixed mode, this method assigns rows for index access and
525
+ # columns for header access. You can force the index association by first
526
+ # calling by_col!() or by_row!().
527
+ #
528
+ # Rows may be set to an Array of values (which will inherit the table's
529
+ # headers()) or a FasterCSV::Row.
530
+ #
531
+ # Columns may be set to a single value, which is copied to each row of the
532
+ # column, or an Array of values. Arrays of values are assigned to rows top
533
+ # to bottom in row major order. Excess values are ignored and if the Array
534
+ # does not have a value for each row the extra rows will receive a +nil+.
535
+ #
536
+ # Assigning to an existing column or row clobbers the data. Assigning to
537
+ # new columns creates them at the right end of the table.
538
+ #
539
+ def []=(index_or_header, value)
540
+ if @mode == :row or # by index
541
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
542
+ if value.is_a? Array
543
+ @table[index_or_header] = Row.new(headers, value)
544
+ else
545
+ @table[index_or_header] = value
546
+ end
547
+ else # set column
548
+ if value.is_a? Array # multiple values
549
+ @table.each_with_index do |row, i|
550
+ if row.header_row?
551
+ row[index_or_header] = index_or_header
552
+ else
553
+ row[index_or_header] = value[i]
554
+ end
555
+ end
556
+ else # repeated value
557
+ @table.each do |row|
558
+ if row.header_row?
559
+ row[index_or_header] = index_or_header
560
+ else
561
+ row[index_or_header] = value
562
+ end
563
+ end
564
+ end
565
+ end
566
+ end
567
+
568
+ #
569
+ # The mixed mode default is to treat a list of indices as row access,
570
+ # returning the rows indicated. Anything else is considered columnar
571
+ # access. For columnar access, the return set has an Array for each row
572
+ # with the values indicated by the headers in each Array. You can force
573
+ # column or row mode using by_col!() or by_row!().
574
+ #
575
+ # You cannot mix column and row access.
576
+ #
577
+ def values_at(*indices_or_headers)
578
+ if @mode == :row or # by indices
579
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
580
+ index.is_a?(Integer) or
581
+ ( index.is_a?(Range) and
582
+ index.first.is_a?(Integer) and
583
+ index.last.is_a?(Integer) )
584
+ end )
585
+ @table.values_at(*indices_or_headers)
586
+ else # by headers
587
+ @table.map { |row| row.values_at(*indices_or_headers) }
588
+ end
589
+ end
590
+
591
+ #
592
+ # Adds a new row to the bottom end of this table. You can provide an Array,
593
+ # which will be converted to a FasterCSV::Row (inheriting the table's
594
+ # headers()), or a FasterCSV::Row.
595
+ #
596
+ # This method returns the table for chaining.
597
+ #
598
+ def <<(row_or_array)
599
+ if row_or_array.is_a? Array # append Array
600
+ @table << Row.new(headers, row_or_array)
601
+ else # append Row
602
+ @table << row_or_array
603
+ end
604
+
605
+ self # for chaining
606
+ end
607
+
608
+ #
609
+ # A shortcut for appending multiple rows. Equivalent to:
610
+ #
611
+ # rows.each { |row| self << row }
612
+ #
613
+ # This method returns the table for chaining.
614
+ #
615
+ def push(*rows)
616
+ rows.each { |row| self << row }
617
+
618
+ self # for chaining
619
+ end
620
+
621
+ #
622
+ # Removes and returns the indicated column or row. In the default mixed
623
+ # mode indices refer to rows and everything else is assumed to be a column
624
+ # header. Use by_col!() or by_row!() to force the lookup.
625
+ #
626
+ def delete(index_or_header)
627
+ if @mode == :row or # by index
628
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
629
+ @table.delete_at(index_or_header)
630
+ else # by header
631
+ @table.map { |row| row.delete(index_or_header).last }
632
+ end
633
+ end
634
+
635
+ #
636
+ # Removes any column or row for which the block returns +true+. In the
637
+ # default mixed mode or row mode, iteration is the standard row major
638
+ # walking of rows. In column mode, interation will +yield+ two element
639
+ # tuples containing the column name and an Array of values for that column.
640
+ #
641
+ # This method returns the table for chaining.
642
+ #
643
+ def delete_if(&block)
644
+ if @mode == :row or @mode == :col_or_row # by index
645
+ @table.delete_if(&block)
646
+ else # by header
647
+ to_delete = Array.new
648
+ headers.each_with_index do |header, i|
649
+ to_delete << header if block[[header, self[header]]]
650
+ end
651
+ to_delete.map { |header| delete(header) }
652
+ end
653
+
654
+ self # for chaining
655
+ end
656
+
657
+ include Enumerable
658
+
659
+ #
660
+ # In the default mixed mode or row mode, iteration is the standard row major
661
+ # walking of rows. In column mode, interation will +yield+ two element
662
+ # tuples containing the column name and an Array of values for that column.
663
+ #
664
+ # This method returns the table for chaining.
665
+ #
666
+ def each(&block)
667
+ if @mode == :col
668
+ headers.each { |header| block[[header, self[header]]] }
669
+ else
670
+ @table.each(&block)
671
+ end
672
+
673
+ self # for chaining
674
+ end
675
+
676
+ # Returns +true+ if all rows of this table ==() +other+'s rows.
677
+ def ==(other)
678
+ @table == other.table
679
+ end
680
+
681
+ #
682
+ # Returns the table as an Array of Arrays. Headers will be the first row,
683
+ # then all of the field rows will follow.
684
+ #
685
+ def to_a
686
+ @table.inject([headers]) do |array, row|
687
+ if row.header_row?
688
+ array
689
+ else
690
+ array + [row.fields]
691
+ end
692
+ end
693
+ end
694
+
695
+ #
696
+ # Returns the table as a complete CSV String. Headers will be listed first,
697
+ # then all of the field rows.
698
+ #
699
+ def to_csv(options = Hash.new)
700
+ @table.inject([headers.to_csv(options)]) do |rows, row|
701
+ if row.header_row?
702
+ rows
703
+ else
704
+ rows + [row.fields.to_csv(options)]
705
+ end
706
+ end.join
707
+ end
708
+ alias_method :to_s, :to_csv
709
+
710
+ def inspect
711
+ "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
712
+ end
713
+ end
714
+
715
+ # The error thrown when the parser encounters illegal CSV formatting.
716
+ class MalformedCSVError < RuntimeError; end
717
+
718
+ #
719
+ # A FieldInfo Struct contains details about a field's position in the data
720
+ # source it was read from. FasterCSV will pass this Struct to some blocks
721
+ # that make decisions based on field structure. See
722
+ # FasterCSV.convert_fields() for an example.
723
+ #
724
+ # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
725
+ # <b><tt>line</tt></b>:: The line of the data source this row is from.
726
+ # <b><tt>header</tt></b>:: The header for the column, when available.
727
+ #
728
+ FieldInfo = Struct.new(:index, :line, :header)
729
+
730
+ # A Regexp used to find and convert some common Date formats.
731
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
732
+ \d{4}-\d{2}-\d{2} )\z /x
733
+ # A Regexp used to find and convert some common DateTime formats.
734
+ DateTimeMatcher =
735
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
736
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
737
+ #
738
+ # This Hash holds the built-in converters of FasterCSV that can be accessed by
739
+ # name. You can select Converters with FasterCSV.convert() or through the
740
+ # +options+ Hash passed to FasterCSV::new().
741
+ #
742
+ # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
743
+ # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
744
+ # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
745
+ # and <tt>:float</tt>.
746
+ # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
747
+ # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
748
+ # <b><tt>:all</tt></b>:: All built-in converters. A combination of
749
+ # <tt>:date_time</tt> and <tt>:numeric</tt>.
750
+ #
751
+ # This Hash is intetionally left unfrozen and users should feel free to add
752
+ # values to it that can be accessed by all FasterCSV objects.
753
+ #
754
+ # To add a combo field, the value should be an Array of names. Combo fields
755
+ # can be nested with other combo fields.
756
+ #
757
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
758
+ :float => lambda { |f| Float(f) rescue f },
759
+ :numeric => [:integer, :float],
760
+ :date => lambda { |f|
761
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
762
+ },
763
+ :date_time => lambda { |f|
764
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
765
+ },
766
+ :all => [:date_time, :numeric] }
767
+
768
+ #
769
+ # This Hash holds the built-in header converters of FasterCSV that can be
770
+ # accessed by name. You can select HeaderConverters with
771
+ # FasterCSV.header_convert() or through the +options+ Hash passed to
772
+ # FasterCSV::new().
773
+ #
774
+ # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
775
+ # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
776
+ # replaced with underscores, non-word characters
777
+ # are dropped, and finally to_sym() is called.
778
+ #
779
+ # This Hash is intetionally left unfrozen and users should feel free to add
780
+ # values to it that can be accessed by all FasterCSV objects.
781
+ #
782
+ # To add a combo field, the value should be an Array of names. Combo fields
783
+ # can be nested with other combo fields.
784
+ #
785
+ HeaderConverters = {
786
+ :downcase => lambda { |h| h.downcase },
787
+ :symbol => lambda { |h|
788
+ h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
789
+ }
790
+ }
791
+
792
+ #
793
+ # The options used when no overrides are given by calling code. They are:
794
+ #
795
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
796
+ # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
797
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
798
+ # <b><tt>:converters</tt></b>:: +nil+
799
+ # <b><tt>:unconverted_fields</tt></b>:: +nil+
800
+ # <b><tt>:headers</tt></b>:: +false+
801
+ # <b><tt>:return_headers</tt></b>:: +false+
802
+ # <b><tt>:header_converters</tt></b>:: +nil+
803
+ # <b><tt>:skip_blanks</tt></b>:: +false+
804
+ # <b><tt>:force_quotes</tt></b>:: +false+
805
+ #
806
+ DEFAULT_OPTIONS = { :col_sep => ",",
807
+ :row_sep => :auto,
808
+ :quote_char => '"',
809
+ :converters => nil,
810
+ :unconverted_fields => nil,
811
+ :headers => false,
812
+ :return_headers => false,
813
+ :header_converters => nil,
814
+ :skip_blanks => false,
815
+ :force_quotes => false }.freeze
816
+
817
+ #
818
+ # This method will build a drop-in replacement for many of the standard CSV
819
+ # methods. It allows you to write code like:
820
+ #
821
+ # begin
822
+ # require "faster_csv"
823
+ # FasterCSV.build_csv_interface
824
+ # rescue LoadError
825
+ # require "csv"
826
+ # end
827
+ # # ... use CSV here ...
828
+ #
829
+ # This is not a complete interface with completely identical behavior.
830
+ # However, it is intended to be close enough that you won't notice the
831
+ # difference in most cases. CSV methods supported are:
832
+ #
833
+ # * foreach()
834
+ # * generate_line()
835
+ # * open()
836
+ # * parse()
837
+ # * parse_line()
838
+ # * readlines()
839
+ #
840
+ # Be warned that this interface is slower than vanilla FasterCSV due to the
841
+ # extra layer of method calls. Depending on usage, this can slow it down to
842
+ # near CSV speeds.
843
+ #
844
+ def self.build_csv_interface
845
+ Object.const_set(:CSV, Class.new).class_eval do
846
+ def self.foreach(path, rs = :auto, &block) # :nodoc:
847
+ FasterCSV.foreach(path, :row_sep => rs, &block)
848
+ end
849
+
850
+ def self.generate_line(row, fs = ",", rs = "") # :nodoc:
851
+ FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
852
+ end
853
+
854
+ def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
855
+ if block and mode.include? "r"
856
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
857
+ csv.each(&block)
858
+ end
859
+ else
860
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
861
+ end
862
+ end
863
+
864
+ def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
865
+ FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
866
+ end
867
+
868
+ def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
869
+ FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
870
+ end
871
+
872
+ def self.readlines(path, rs = :auto) # :nodoc:
873
+ FasterCSV.readlines(path, :row_sep => rs)
874
+ end
875
+ end
876
+ end
877
+
878
+ #
879
+ # This method allows you to serialize an Array of Ruby objects to a String or
880
+ # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
881
+ # useful for spreadsheet and database interaction.
882
+ #
883
+ # Out of the box, this method is intended to work with simple data objects or
884
+ # Structs. It will serialize a list of instance variables and/or
885
+ # Struct.members().
886
+ #
887
+ # If you need need more complicated serialization, you can control the process
888
+ # by adding methods to the class to be serialized.
889
+ #
890
+ # A class method csv_meta() is responsible for returning the first row of the
891
+ # document (as an Array). This row is considered to be a Hash of the form
892
+ # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
893
+ # key with a value of the stringified class name and FasterCSV::dump() will
894
+ # create this, if you do not define this method. This method is only called
895
+ # on the first object of the Array.
896
+ #
897
+ # The next method you can provide is an instance method called csv_headers().
898
+ # This method is expected to return the second line of the document (again as
899
+ # an Array), which is to be used to give each column a header. By default,
900
+ # FasterCSV::load() will set an instance variable if the field header starts
901
+ # with an @ character or call send() passing the header as the method name and
902
+ # the field value as an argument. This method is only called on the first
903
+ # object of the Array.
904
+ #
905
+ # Finally, you can provide an instance method called csv_dump(), which will
906
+ # be passed the headers. This should return an Array of fields that can be
907
+ # serialized for this object. This method is called once for every object in
908
+ # the Array.
909
+ #
910
+ # The +io+ parameter can be used to serialize to a File, and +options+ can be
911
+ # anything FasterCSV::new() accepts.
912
+ #
913
+ def self.dump(ary_of_objs, io = "", options = Hash.new)
914
+ obj_template = ary_of_objs.first
915
+
916
+ csv = FasterCSV.new(io, options)
917
+
918
+ # write meta information
919
+ begin
920
+ csv << obj_template.class.csv_meta
921
+ rescue NoMethodError
922
+ csv << [:class, obj_template.class]
923
+ end
924
+
925
+ # write headers
926
+ begin
927
+ headers = obj_template.csv_headers
928
+ rescue NoMethodError
929
+ headers = obj_template.instance_variables.sort
930
+ if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
931
+ headers += obj_template.members.map { |mem| "#{mem}=" }.sort
932
+ end
933
+ end
934
+ csv << headers
935
+
936
+ # serialize each object
937
+ ary_of_objs.each do |obj|
938
+ begin
939
+ csv << obj.csv_dump(headers)
940
+ rescue NoMethodError
941
+ csv << headers.map do |var|
942
+ if var[0] == ?@
943
+ obj.instance_variable_get(var)
944
+ else
945
+ obj[var[0..-2]]
946
+ end
947
+ end
948
+ end
949
+ end
950
+
951
+ if io.is_a? String
952
+ csv.string
953
+ else
954
+ csv.close
955
+ end
956
+ end
957
+
958
+ #
959
+ # :call-seq:
960
+ # filter( options = Hash.new ) { |row| ... }
961
+ # filter( input, options = Hash.new ) { |row| ... }
962
+ # filter( input, output, options = Hash.new ) { |row| ... }
963
+ #
964
+ # This method is a convenience for building Unix-like filters for CSV data.
965
+ # Each row is yielded to the provided block which can alter it as needed.
966
+ # After the block returns, the row is appended to +output+ altered or not.
967
+ #
968
+ # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
969
+ # (generally String or IO objects). If not given, they default to
970
+ # <tt>ARGF</tt> and <tt>$stdout</tt>.
971
+ #
972
+ # The +options+ parameter is also filtered down to FasterCSV::new() after some
973
+ # clever key parsing. Any key beginning with <tt>:in_</tt> or
974
+ # <tt>:input_</tt> will have that leading identifier stripped and will only
975
+ # be used in the +options+ Hash for the +input+ object. Keys starting with
976
+ # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
977
+ # are assigned to both objects.
978
+ #
979
+ # The <tt>:output_row_sep</tt> +option+ defaults to
980
+ # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
981
+ #
982
+ def self.filter(*args)
983
+ # parse options for input, output, or both
984
+ in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
985
+ if args.last.is_a? Hash
986
+ args.pop.each do |key, value|
987
+ case key.to_s
988
+ when /\Ain(?:put)?_(.+)\Z/
989
+ in_options[$1.to_sym] = value
990
+ when /\Aout(?:put)?_(.+)\Z/
991
+ out_options[$1.to_sym] = value
992
+ else
993
+ in_options[key] = value
994
+ out_options[key] = value
995
+ end
996
+ end
997
+ end
998
+ # build input and output wrappers
999
+ input = FasterCSV.new(args.shift || ARGF, in_options)
1000
+ output = FasterCSV.new(args.shift || $stdout, out_options)
1001
+
1002
+ # read, yield, write
1003
+ input.each do |row|
1004
+ yield row
1005
+ output << row
1006
+ end
1007
+ end
1008
+
1009
+ #
1010
+ # This method is intended as the primary interface for reading CSV files. You
1011
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
1012
+ # file will be passed to the provided +block+ in turn.
1013
+ #
1014
+ # The +options+ parameter can be anything FasterCSV::new() understands.
1015
+ #
1016
+ def self.foreach(path, options = Hash.new, &block)
1017
+ open(path, "rb", options) do |csv|
1018
+ csv.each(&block)
1019
+ end
1020
+ end
1021
+
1022
+ #
1023
+ # :call-seq:
1024
+ # generate( str, options = Hash.new ) { |faster_csv| ... }
1025
+ # generate( options = Hash.new ) { |faster_csv| ... }
1026
+ #
1027
+ # This method wraps a String you provide, or an empty default String, in a
1028
+ # FasterCSV object which is passed to the provided block. You can use the
1029
+ # block to append CSV rows to the String and when the block exits, the
1030
+ # final String will be returned.
1031
+ #
1032
+ # Note that a passed String *is* modfied by this method. Call dup() before
1033
+ # passing if you need a new String.
1034
+ #
1035
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1036
+ #
1037
+ def self.generate(*args)
1038
+ # add a default empty String, if none was given
1039
+ if args.first.is_a? String
1040
+ io = StringIO.new(args.shift)
1041
+ io.seek(0, IO::SEEK_END)
1042
+ args.unshift(io)
1043
+ else
1044
+ args.unshift("")
1045
+ end
1046
+ faster_csv = new(*args) # wrap
1047
+ yield faster_csv # yield for appending
1048
+ faster_csv.string # return final String
1049
+ end
1050
+
1051
+ #
1052
+ # This method is a shortcut for converting a single row (Array) into a CSV
1053
+ # String.
1054
+ #
1055
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1056
+ #
1057
+ # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1058
+ # (<tt>$/</tt>) when calling this method.
1059
+ #
1060
+ def self.generate_line(row, options = Hash.new)
1061
+ options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1062
+ (new("", options) << row).string
1063
+ end
1064
+
1065
+ #
1066
+ # This method will return a FasterCSV instance, just like FasterCSV::new(),
1067
+ # but the instance will be cached and returned for all future calls to this
1068
+ # method for the same +data+ object (tested by Object#object_id()) with the
1069
+ # same +options+.
1070
+ #
1071
+ # If a block is given, the instance is passed to the block and the return
1072
+ # value becomes the return value of the block.
1073
+ #
1074
+ def self.instance(data = $stdout, options = Hash.new)
1075
+ # create a _signature_ for this method call, data object and options
1076
+ sig = [data.object_id] +
1077
+ options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1078
+
1079
+ # fetch or create the instance for this signature
1080
+ @@instances ||= Hash.new
1081
+ instance = (@@instances[sig] ||= new(data, options))
1082
+
1083
+ if block_given?
1084
+ yield instance # run block, if given, returning result
1085
+ else
1086
+ instance # or return the instance
1087
+ end
1088
+ end
1089
+
1090
+ #
1091
+ # This method is the reading counterpart to FasterCSV::dump(). See that
1092
+ # method for a detailed description of the process.
1093
+ #
1094
+ # You can customize loading by adding a class method called csv_load() which
1095
+ # will be passed a Hash of meta information, an Array of headers, and an Array
1096
+ # of fields for the object the method is expected to return.
1097
+ #
1098
+ # Remember that all fields will be Strings after this load. If you need
1099
+ # something else, use +options+ to setup converters or provide a custom
1100
+ # csv_load() implementation.
1101
+ #
1102
+ def self.load(io_or_str, options = Hash.new)
1103
+ csv = FasterCSV.new(io_or_str, options)
1104
+
1105
+ # load meta information
1106
+ meta = Hash[*csv.shift]
1107
+ cls = meta["class"].split("::").inject(Object) do |c, const|
1108
+ c.const_get(const)
1109
+ end
1110
+
1111
+ # load headers
1112
+ headers = csv.shift
1113
+
1114
+ # unserialize each object stored in the file
1115
+ results = csv.inject(Array.new) do |all, row|
1116
+ begin
1117
+ obj = cls.csv_load(meta, headers, row)
1118
+ rescue NoMethodError
1119
+ obj = cls.allocate
1120
+ headers.zip(row) do |name, value|
1121
+ if name[0] == ?@
1122
+ obj.instance_variable_set(name, value)
1123
+ else
1124
+ obj.send(name, value)
1125
+ end
1126
+ end
1127
+ end
1128
+ all << obj
1129
+ end
1130
+
1131
+ csv.close unless io_or_str.is_a? String
1132
+
1133
+ results
1134
+ end
1135
+
1136
+ #
1137
+ # :call-seq:
1138
+ # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1139
+ # open( filename, mode="rb", options = Hash.new )
1140
+ #
1141
+ # This method opens an IO object, and wraps that with FasterCSV. This is
1142
+ # intended as the primary interface for writing a CSV file.
1143
+ #
1144
+ # You may pass any +args+ Ruby's open() understands followed by an optional
1145
+ # Hash containing any +options+ FasterCSV::new() understands.
1146
+ #
1147
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
1148
+ # object to a provided block and close it when the block termminates, or it
1149
+ # will return the FasterCSV object when no block is provided. (*Note*: This
1150
+ # is different from the standard CSV library which passes rows to the block.
1151
+ # Use FasterCSV::foreach() for that behavior.)
1152
+ #
1153
+ # An opened FasterCSV object will delegate to many IO methods, for
1154
+ # convenience. You may call:
1155
+ #
1156
+ # * binmode()
1157
+ # * close()
1158
+ # * close_read()
1159
+ # * close_write()
1160
+ # * closed?()
1161
+ # * eof()
1162
+ # * eof?()
1163
+ # * fcntl()
1164
+ # * fileno()
1165
+ # * flush()
1166
+ # * fsync()
1167
+ # * ioctl()
1168
+ # * isatty()
1169
+ # * pid()
1170
+ # * pos()
1171
+ # * reopen()
1172
+ # * seek()
1173
+ # * stat()
1174
+ # * sync()
1175
+ # * sync=()
1176
+ # * tell()
1177
+ # * to_i()
1178
+ # * to_io()
1179
+ # * tty?()
1180
+ #
1181
+ def self.open(*args)
1182
+ # find the +options+ Hash
1183
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
1184
+ # default to a binary open mode
1185
+ args << "rb" if args.size == 1
1186
+ # wrap a File opened with the remaining +args+
1187
+ csv = new(File.open(*args), options)
1188
+
1189
+ # handle blocks like Ruby's open(), not like the CSV library
1190
+ if block_given?
1191
+ begin
1192
+ yield csv
1193
+ ensure
1194
+ csv.close
1195
+ end
1196
+ else
1197
+ csv
1198
+ end
1199
+ end
1200
+
1201
+ #
1202
+ # :call-seq:
1203
+ # parse( str, options = Hash.new ) { |row| ... }
1204
+ # parse( str, options = Hash.new )
1205
+ #
1206
+ # This method can be used to easily parse CSV out of a String. You may either
1207
+ # provide a +block+ which will be called with each row of the String in turn,
1208
+ # or just use the returned Array of Arrays (when no +block+ is given).
1209
+ #
1210
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
1211
+ # anything FasterCSV::new() understands.
1212
+ #
1213
+ def self.parse(*args, &block)
1214
+ csv = new(*args)
1215
+ if block.nil? # slurp contents, if no block is given
1216
+ begin
1217
+ csv.read
1218
+ ensure
1219
+ csv.close
1220
+ end
1221
+ else # or pass each row to a provided block
1222
+ csv.each(&block)
1223
+ end
1224
+ end
1225
+
1226
+ #
1227
+ # This method is a shortcut for converting a single line of a CSV String into
1228
+ # a into an Array. Note that if +line+ contains multiple rows, anything
1229
+ # beyond the first row is ignored.
1230
+ #
1231
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1232
+ #
1233
+ def self.parse_line(line, options = Hash.new)
1234
+ new(line, options).shift
1235
+ end
1236
+
1237
+ #
1238
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1239
+ # file and any +options+ FasterCSV::new() understands.
1240
+ #
1241
+ def self.read(path, options = Hash.new)
1242
+ open(path, "rb", options) { |csv| csv.read }
1243
+ end
1244
+
1245
+ # Alias for FasterCSV::read().
1246
+ def self.readlines(*args)
1247
+ read(*args)
1248
+ end
1249
+
1250
+ #
1251
+ # A shortcut for:
1252
+ #
1253
+ # FasterCSV.read( path, { :headers => true,
1254
+ # :converters => :numeric,
1255
+ # :header_converters => :symbol }.merge(options) )
1256
+ #
1257
+ def self.table(path, options = Hash.new)
1258
+ read( path, { :headers => true,
1259
+ :converters => :numeric,
1260
+ :header_converters => :symbol }.merge(options) )
1261
+ end
1262
+
1263
+ #
1264
+ # This constructor will wrap either a String or IO object passed in +data+ for
1265
+ # reading and/or writing. In addition to the FasterCSV instance methods,
1266
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
1267
+ # list.) If you pass a String for +data+, you can later retrieve it (after
1268
+ # writing to it, for example) with FasterCSV.string().
1269
+ #
1270
+ # Note that a wrapped String will be positioned at at the beginning (for
1271
+ # reading). If you want it at the end (for writing), use
1272
+ # FasterCSV::generate(). If you want any other positioning, pass a preset
1273
+ # StringIO object instead.
1274
+ #
1275
+ # You may set any reading and/or writing preferences in the +options+ Hash.
1276
+ # Available options are:
1277
+ #
1278
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1279
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1280
+ # row. This can be set to the special
1281
+ # <tt>:auto</tt> setting, which requests
1282
+ # that FasterCSV automatically discover
1283
+ # this from the data. Auto-discovery
1284
+ # reads ahead in the data looking for
1285
+ # the next <tt>"\r\n"</tt>,
1286
+ # <tt>"\n"</tt>, or <tt>"\r"</tt>
1287
+ # sequence. A sequence will be selected
1288
+ # even if it occurs in a quoted field,
1289
+ # assuming that you would have the same
1290
+ # line endings there. If none of those
1291
+ # sequences is found, +data+ is
1292
+ # <tt>ARGF</tt>, <tt>STDIN</tt>,
1293
+ # <tt>STDOUT</tt>, or <tt>STDERR</tt>,
1294
+ # or the stream is only available for
1295
+ # output, the default
1296
+ # <tt>$INPUT_RECORD_SEPARATOR</tt>
1297
+ # (<tt>$/</tt>) is used. Obviously,
1298
+ # discovery takes a little time. Set
1299
+ # manually if speed is important. Also
1300
+ # note that IO objects should be opened
1301
+ # in binary mode on Windows if this
1302
+ # feature will be used as the
1303
+ # line-ending translation can cause
1304
+ # problems with resetting the document
1305
+ # position to where it was before the
1306
+ # read ahead.
1307
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1308
+ # This has to be a single character
1309
+ # String. This is useful for
1310
+ # application that incorrectly use
1311
+ # <tt>'</tt> as the quote character
1312
+ # instead of the correct <tt>"</tt>.
1313
+ # FasterCSV will always consider a
1314
+ # double sequence this character to be
1315
+ # an escaped quote.
1316
+ # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1317
+ # file. Defaults to your <tt>$KDOCE</tt>
1318
+ # setting. Valid values: <tt>`n’</tt> or
1319
+ # <tt>`N’</tt> for none, <tt>`e’</tt> or
1320
+ # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1321
+ # <tt>`S’</tt> for SJIS, and
1322
+ # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1323
+ # (see Regexp.new()).
1324
+ # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1325
+ # read ahead looking for the closing
1326
+ # quote for a field. (In truth, it
1327
+ # reads to the first line ending beyond
1328
+ # this size.) If a quote cannot be
1329
+ # found within the limit FasterCSV will
1330
+ # raise a MalformedCSVError, assuming
1331
+ # the data is faulty. You can use this
1332
+ # limit to prevent what are effectively
1333
+ # DoS attacks on the parser. However,
1334
+ # this limit can cause a legitimate
1335
+ # parse to fail and thus is set to
1336
+ # +nil+, or off, by default.
1337
+ # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1338
+ # Hash and/or lambdas that handle custom
1339
+ # conversion. A single converter
1340
+ # doesn't have to be in an Array.
1341
+ # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1342
+ # unconverted_fields() method will be
1343
+ # added to all returned rows (Array or
1344
+ # FasterCSV::Row) that will return the
1345
+ # fields as they were before convertion.
1346
+ # Note that <tt>:headers</tt> supplied
1347
+ # by Array or String were not fields of
1348
+ # the document and thus will have an
1349
+ # empty Array attached.
1350
+ # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1351
+ # +true+, the initial row of the CSV
1352
+ # file will be treated as a row of
1353
+ # headers. If set to an Array, the
1354
+ # contents will be used as the headers.
1355
+ # If set to a String, the String is run
1356
+ # through a call of
1357
+ # FasterCSV::parse_line() with the same
1358
+ # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1359
+ # and <tt>:quote_char</tt> as this
1360
+ # instance to produce an Array of
1361
+ # headers. This setting causes
1362
+ # FasterCSV.shift() to return rows as
1363
+ # FasterCSV::Row objects instead of
1364
+ # Arrays and FasterCSV.read() to return
1365
+ # FasterCSV::Table objects instead of
1366
+ # an Array of Arrays.
1367
+ # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1368
+ # swallowed. If set to +true+, header
1369
+ # rows are returned in a FasterCSV::Row
1370
+ # object with identical headers and
1371
+ # fields (save that the fields do not go
1372
+ # through the converters).
1373
+ # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1374
+ # set, a header row will be added to the
1375
+ # output.
1376
+ # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1377
+ # <tt>:converters</tt> save that the
1378
+ # conversions are only made to header
1379
+ # rows.
1380
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1381
+ # will skip over any rows with no
1382
+ # content.
1383
+ # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1384
+ # will quote all CSV fields it creates.
1385
+ #
1386
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1387
+ #
1388
+ # Options cannot be overriden in the instance methods for performance reasons,
1389
+ # so be sure to set what you want here.
1390
+ #
1391
+ def initialize(data, options = Hash.new)
1392
+ # build the options for this read/write
1393
+ options = DEFAULT_OPTIONS.merge(options)
1394
+
1395
+ # create the IO object we will read from
1396
+ @io = if data.is_a? String then StringIO.new(data) else data end
1397
+
1398
+ init_separators(options)
1399
+ init_parsers(options)
1400
+ init_converters(options)
1401
+ init_headers(options)
1402
+
1403
+ unless options.empty?
1404
+ raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1405
+ end
1406
+
1407
+ # track our own lineno since IO gets confused about line-ends is CSV fields
1408
+ @lineno = 0
1409
+ end
1410
+
1411
+ #
1412
+ # The line number of the last row read from this file. Fields with nested
1413
+ # line-end characters will not affect this count.
1414
+ #
1415
+ attr_reader :lineno
1416
+
1417
+ ### IO and StringIO Delegation ###
1418
+
1419
+ extend Forwardable
1420
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1421
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1422
+ :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1423
+ :sync, :sync=, :tell, :to_i, :to_io, :tty?
1424
+
1425
+ # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1426
+ def rewind
1427
+ @headers = nil
1428
+ @lineno = 0
1429
+
1430
+ @io.rewind
1431
+ end
1432
+
1433
+ ### End Delegation ###
1434
+
1435
+ #
1436
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1437
+ # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1438
+ # FasterCSV::Row is passed, only the row's fields() are appended to the
1439
+ # output.
1440
+ #
1441
+ # The data source must be open for writing.
1442
+ #
1443
+ def <<(row)
1444
+ # make sure headers have been assigned
1445
+ if header_row? and [Array, String].include? @use_headers.class
1446
+ parse_headers # won't read data for Array or String
1447
+ self << @headers if @write_headers
1448
+ end
1449
+
1450
+ # Handle FasterCSV::Row objects and Hashes
1451
+ row = case row
1452
+ when self.class::Row then row.fields
1453
+ when Hash then @headers.map { |header| row[header] }
1454
+ else row
1455
+ end
1456
+
1457
+ @headers = row if header_row?
1458
+ @lineno += 1
1459
+
1460
+ row[0] = @bom + row[0].to_s if @lineno == 1 && @bom # Add byte order marker if set
1461
+
1462
+ @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1463
+
1464
+ self # for chaining
1465
+ end
1466
+ alias_method :add_row, :<<
1467
+ alias_method :puts, :<<
1468
+
1469
+ #
1470
+ # :call-seq:
1471
+ # convert( name )
1472
+ # convert { |field| ... }
1473
+ # convert { |field, field_info| ... }
1474
+ #
1475
+ # You can use this method to install a FasterCSV::Converters built-in, or
1476
+ # provide a block that handles a custom conversion.
1477
+ #
1478
+ # If you provide a block that takes one argument, it will be passed the field
1479
+ # and is expected to return the converted value or the field itself. If your
1480
+ # block takes two arguments, it will also be passed a FieldInfo Struct,
1481
+ # containing details about the field. Again, the block should return a
1482
+ # converted field or the field itself.
1483
+ #
1484
+ def convert(name = nil, &converter)
1485
+ add_converter(:converters, self.class::Converters, name, &converter)
1486
+ end
1487
+
1488
+ #
1489
+ # :call-seq:
1490
+ # header_convert( name )
1491
+ # header_convert { |field| ... }
1492
+ # header_convert { |field, field_info| ... }
1493
+ #
1494
+ # Identical to FasterCSV.convert(), but for header rows.
1495
+ #
1496
+ # Note that this method must be called before header rows are read to have any
1497
+ # effect.
1498
+ #
1499
+ def header_convert(name = nil, &converter)
1500
+ add_converter( :header_converters,
1501
+ self.class::HeaderConverters,
1502
+ name,
1503
+ &converter )
1504
+ end
1505
+
1506
+ include Enumerable
1507
+
1508
+ #
1509
+ # Yields each row of the data source in turn.
1510
+ #
1511
+ # Support for Enumerable.
1512
+ #
1513
+ # The data source must be open for reading.
1514
+ #
1515
+ def each
1516
+ while row = shift
1517
+ yield row
1518
+ end
1519
+ end
1520
+
1521
+ #
1522
+ # Slurps the remaining rows and returns an Array of Arrays.
1523
+ #
1524
+ # The data source must be open for reading.
1525
+ #
1526
+ def read
1527
+ rows = to_a
1528
+ if @use_headers
1529
+ Table.new(rows)
1530
+ else
1531
+ rows
1532
+ end
1533
+ end
1534
+ alias_method :readlines, :read
1535
+
1536
+ # Returns +true+ if the next row read will be a header row.
1537
+ def header_row?
1538
+ @use_headers and @headers.nil?
1539
+ end
1540
+
1541
+ #
1542
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
1543
+ # from the data source, parsed and returned as an Array of fields (if header
1544
+ # rows are not used) or a FasterCSV::Row (when header rows are used).
1545
+ #
1546
+ # The data source must be open for reading.
1547
+ #
1548
+ def shift
1549
+ #########################################################################
1550
+ ### This method is purposefully kept a bit long as simple conditional ###
1551
+ ### checks are faster than numerous (expensive) method calls. ###
1552
+ #########################################################################
1553
+
1554
+ # handle headers not based on document content
1555
+ if header_row? and @return_headers and
1556
+ [Array, String].include? @use_headers.class
1557
+ if @unconverted_fields
1558
+ return add_unconverted_fields(parse_headers, Array.new)
1559
+ else
1560
+ return parse_headers
1561
+ end
1562
+ end
1563
+
1564
+ # begin with a blank line, so we can always add to it
1565
+ line = ""
1566
+
1567
+ #
1568
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1569
+ # because of \r and/or \n characters embedded in quoted fields
1570
+ #
1571
+ loop do
1572
+ # add another read to the line
1573
+ line += @io.gets(@row_sep) rescue return nil
1574
+ # copy the line so we can chop it up in parsing
1575
+ parse = line.dup
1576
+ parse.sub!(@parsers[:line_end], "")
1577
+
1578
+ #
1579
+ # I believe a blank line should be an <tt>Array.new</tt>, not
1580
+ # CSV's <tt>[nil]</tt>
1581
+ #
1582
+ if parse.empty?
1583
+ @lineno += 1
1584
+ if @skip_blanks
1585
+ line = ""
1586
+ next
1587
+ elsif @unconverted_fields
1588
+ return add_unconverted_fields(Array.new, Array.new)
1589
+ elsif @use_headers
1590
+ return FasterCSV::Row.new(Array.new, Array.new)
1591
+ else
1592
+ return Array.new
1593
+ end
1594
+ end
1595
+
1596
+ #
1597
+ # shave leading empty fields if needed, because the main parser chokes
1598
+ # on these
1599
+ #
1600
+ csv = if parse.sub!(@parsers[:leading_fields], "")
1601
+ [nil] * ($&.length / @col_sep.length)
1602
+ else
1603
+ Array.new
1604
+ end
1605
+ #
1606
+ # then parse the main fields with a hyper-tuned Regexp from
1607
+ # Mastering Regular Expressions, Second Edition
1608
+ #
1609
+ parse.gsub!(@parsers[:csv_row]) do
1610
+ csv << if $1.nil? # we found an unquoted field
1611
+ if $2.empty? # switch empty unquoted fields to +nil+...
1612
+ nil # for CSV compatibility
1613
+ else
1614
+ # I decided to take a strict approach to CSV parsing...
1615
+ if $2.count("\r\n").zero? # verify correctness of field...
1616
+ $2
1617
+ else
1618
+ # or throw an Exception
1619
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1620
+ "\\r or \\n (line #{lineno + 1})."
1621
+ end
1622
+ end
1623
+ else # we found a quoted field...
1624
+ $1.gsub(@quote_char * 2, @quote_char) # unescape contents
1625
+ end
1626
+ "" # gsub!'s replacement, clear the field
1627
+ end
1628
+
1629
+ # if parse is empty?(), we found all the fields on the line...
1630
+ if parse.empty?
1631
+ @lineno += 1
1632
+
1633
+ # save fields unconverted fields, if needed...
1634
+ unconverted = csv.dup if @unconverted_fields
1635
+
1636
+ # convert fields, if needed...
1637
+ csv = convert_fields(csv) unless @use_headers or @converters.empty?
1638
+ # parse out header rows and handle FasterCSV::Row conversions...
1639
+ csv = parse_headers(csv) if @use_headers
1640
+
1641
+ # inject unconverted fields and accessor, if requested...
1642
+ if @unconverted_fields and not csv.respond_to? :unconverted_fields
1643
+ add_unconverted_fields(csv, unconverted)
1644
+ end
1645
+
1646
+ # return the results
1647
+ break csv
1648
+ end
1649
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1650
+ if @io.eof?
1651
+ raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1652
+ elsif parse =~ @parsers[:bad_field]
1653
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1654
+ elsif @field_size_limit and parse.length >= @field_size_limit
1655
+ raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1656
+ end
1657
+ # otherwise, we need to loop and pull some more data to complete the row
1658
+ end
1659
+ end
1660
+ alias_method :gets, :shift
1661
+ alias_method :readline, :shift
1662
+
1663
+ # Returns a simplified description of the key FasterCSV attributes.
1664
+ def inspect
1665
+ str = "<##{self.class} io_type:"
1666
+ # show type of wrapped IO
1667
+ if @io == $stdout then str << "$stdout"
1668
+ elsif @io == $stdin then str << "$stdin"
1669
+ elsif @io == $stderr then str << "$stderr"
1670
+ else str << @io.class.to_s
1671
+ end
1672
+ # show IO.path(), if available
1673
+ if @io.respond_to?(:path) and (p = @io.path)
1674
+ str << " io_path:#{p.inspect}"
1675
+ end
1676
+ # show other attributes
1677
+ %w[ lineno col_sep row_sep
1678
+ quote_char skip_blanks encoding ].each do |attr_name|
1679
+ if a = instance_variable_get("@#{attr_name}")
1680
+ str << " #{attr_name}:#{a.inspect}"
1681
+ end
1682
+ end
1683
+ if @use_headers
1684
+ str << " headers:#{(@headers || true).inspect}"
1685
+ end
1686
+ str << ">"
1687
+ end
1688
+
1689
+ private
1690
+
1691
+ #
1692
+ # Stores the indicated separators for later use.
1693
+ #
1694
+ # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1695
+ # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1696
+ # +STDERR+ and any stream open for output only with a default
1697
+ # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1698
+ #
1699
+ # This method also establishes the quoting rules used for CSV output.
1700
+ #
1701
+ def init_separators(options)
1702
+ # store the selected separators
1703
+ @col_sep = options.delete(:col_sep)
1704
+ @row_sep = options.delete(:row_sep)
1705
+ @quote_char = options.delete(:quote_char)
1706
+
1707
+ if @quote_char.length != 1
1708
+ raise ArgumentError, ":quote_char has to be a single character String"
1709
+ end
1710
+
1711
+ # automatically discover row separator when requested
1712
+ if @row_sep == :auto
1713
+ if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
1714
+ (defined?(Zlib) and @io.class == Zlib::GzipWriter)
1715
+ @row_sep = $INPUT_RECORD_SEPARATOR
1716
+ else
1717
+ begin
1718
+ saved_pos = @io.pos # remember where we were
1719
+ while @row_sep == :auto
1720
+ #
1721
+ # if we run out of data, it's probably a single line
1722
+ # (use a sensible default)
1723
+ #
1724
+ if @io.eof?
1725
+ @row_sep = $INPUT_RECORD_SEPARATOR
1726
+ break
1727
+ end
1728
+
1729
+ # read ahead a bit
1730
+ sample = @io.read(1024)
1731
+ sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1732
+
1733
+ # try to find a standard separator
1734
+ if sample =~ /\r\n?|\n/
1735
+ @row_sep = $&
1736
+ break
1737
+ end
1738
+ end
1739
+ # tricky seek() clone to work around GzipReader's lack of seek()
1740
+ @io.rewind
1741
+ # reset back to the remembered position
1742
+ while saved_pos > 1024 # avoid loading a lot of data into memory
1743
+ @io.read(1024)
1744
+ saved_pos -= 1024
1745
+ end
1746
+ @io.read(saved_pos) if saved_pos.nonzero?
1747
+ rescue IOError # stream not opened for reading
1748
+ @row_sep = $INPUT_RECORD_SEPARATOR
1749
+ end
1750
+ end
1751
+ end
1752
+
1753
+ # establish quoting rules
1754
+ do_quote = lambda do |field|
1755
+ @quote_char +
1756
+ String(field).gsub(@quote_char, @quote_char * 2) +
1757
+ @quote_char
1758
+ end
1759
+ @quote = if options.delete(:force_quotes)
1760
+ do_quote
1761
+ else
1762
+ lambda do |field|
1763
+ if field.nil? # represent +nil+ fields as empty unquoted fields
1764
+ ""
1765
+ else
1766
+ field = String(field) # Stringify fields
1767
+ # represent empty fields as empty quoted fields
1768
+ if field.empty? or
1769
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1770
+ do_quote.call(field)
1771
+ else
1772
+ field # unquoted field
1773
+ end
1774
+ end
1775
+ end
1776
+ end
1777
+ end
1778
+
1779
+ # Pre-compiles parsers and stores them by name for access during reads.
1780
+ def init_parsers(options)
1781
+ # store the parser behaviors
1782
+ @skip_blanks = options.delete(:skip_blanks)
1783
+ @encoding = options.delete(:encoding) # nil will use $KCODE
1784
+ @field_size_limit = options.delete(:field_size_limit)
1785
+ @bom = options.delete(:bom) # nil will not use a byte order marker
1786
+
1787
+ # prebuild Regexps for faster parsing
1788
+ esc_col_sep = Regexp.escape(@col_sep)
1789
+ esc_row_sep = Regexp.escape(@row_sep)
1790
+ esc_quote = Regexp.escape(@quote_char)
1791
+ @parsers = {
1792
+ # for empty leading fields
1793
+ :leading_fields => Regexp.new("\\A(?:#{esc_col_sep})+", nil, @encoding),
1794
+ # The Primary Parser
1795
+ :csv_row => Regexp.new(<<-END_PARSER, Regexp::EXTENDED, @encoding),
1796
+ \\G(?:\\A|#{esc_col_sep}) # anchor the match
1797
+ (?: #{esc_quote}( (?>[^#{esc_quote}]*) # find quoted fields
1798
+ (?> #{esc_quote*2}
1799
+ [^#{esc_quote}]* )* )#{esc_quote}
1800
+ | # ... or ...
1801
+ ([^#{esc_quote}#{esc_col_sep}]*) # unquoted fields
1802
+ )
1803
+ (?=#{esc_col_sep}|\\z) # ensure we are at field's end
1804
+ END_PARSER
1805
+ # a test for unescaped quotes
1806
+ :bad_field => Regexp.new(<<-END_BAD, Regexp::EXTENDED, @encoding),
1807
+ \\A#{esc_col_sep}? # starts with an optional comma
1808
+ (?: #{esc_quote} (?>[^#{esc_quote}]*) # an extra quote
1809
+ (?> #{esc_quote*2}
1810
+ [^#{esc_quote}]* )*
1811
+ #{esc_quote}[^#{esc_quote}]
1812
+ | # ... or ...
1813
+ [^#{esc_quote}#{esc_col_sep}]+
1814
+ #{esc_quote} # unescaped quote
1815
+ )
1816
+ END_BAD
1817
+ # safer than chomp!()
1818
+ :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1819
+ }
1820
+ end
1821
+
1822
+ #
1823
+ # Loads any converters requested during construction.
1824
+ #
1825
+ # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1826
+ # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1827
+ # are added instead.
1828
+ #
1829
+ # The <tt>:unconverted_fields</tt> option is also actived for
1830
+ # <tt>:converters</tt> calls, if requested.
1831
+ #
1832
+ def init_converters(options, field_name = :converters)
1833
+ if field_name == :converters
1834
+ @unconverted_fields = options.delete(:unconverted_fields)
1835
+ end
1836
+
1837
+ instance_variable_set("@#{field_name}", Array.new)
1838
+
1839
+ # find the correct method to add the coverters
1840
+ convert = method(field_name.to_s.sub(/ers\Z/, ""))
1841
+
1842
+ # load converters
1843
+ unless options[field_name].nil?
1844
+ # allow a single converter not wrapped in an Array
1845
+ unless options[field_name].is_a? Array
1846
+ options[field_name] = [options[field_name]]
1847
+ end
1848
+ # load each converter...
1849
+ options[field_name].each do |converter|
1850
+ if converter.is_a? Proc # custom code block
1851
+ convert.call(&converter)
1852
+ else # by name
1853
+ convert.call(converter)
1854
+ end
1855
+ end
1856
+ end
1857
+
1858
+ options.delete(field_name)
1859
+ end
1860
+
1861
+ # Stores header row settings and loads header converters, if needed.
1862
+ def init_headers(options)
1863
+ @use_headers = options.delete(:headers)
1864
+ @return_headers = options.delete(:return_headers)
1865
+ @write_headers = options.delete(:write_headers)
1866
+
1867
+ # headers must be delayed until shift(), in case they need a row of content
1868
+ @headers = nil
1869
+
1870
+ init_converters(options, :header_converters)
1871
+ end
1872
+
1873
+ #
1874
+ # The actual work method for adding converters, used by both
1875
+ # FasterCSV.convert() and FasterCSV.header_convert().
1876
+ #
1877
+ # This method requires the +var_name+ of the instance variable to place the
1878
+ # converters in, the +const+ Hash to lookup named converters in, and the
1879
+ # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1880
+ # methods.
1881
+ #
1882
+ def add_converter(var_name, const, name = nil, &converter)
1883
+ if name.nil? # custom converter
1884
+ instance_variable_get("@#{var_name}") << converter
1885
+ else # named converter
1886
+ combo = const[name]
1887
+ case combo
1888
+ when Array # combo converter
1889
+ combo.each do |converter_name|
1890
+ add_converter(var_name, const, converter_name)
1891
+ end
1892
+ else # individual named converter
1893
+ instance_variable_get("@#{var_name}") << combo
1894
+ end
1895
+ end
1896
+ end
1897
+
1898
+ #
1899
+ # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1900
+ # if +headers+ is passed as +true+, returning the converted field set. Any
1901
+ # converter that changes the field into something other than a String halts
1902
+ # the pipeline of conversion for that field. This is primarily an efficiency
1903
+ # shortcut.
1904
+ #
1905
+ def convert_fields(fields, headers = false)
1906
+ # see if we are converting headers or fields
1907
+ converters = headers ? @header_converters : @converters
1908
+
1909
+ fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1910
+ converters.each do |converter|
1911
+ field = if converter.arity == 1 # straight field converter
1912
+ converter[field]
1913
+ else # FieldInfo converter
1914
+ header = @use_headers && !headers ? @headers[index] : nil
1915
+ converter[field, FieldInfo.new(index, lineno, header)]
1916
+ end
1917
+ break unless field.is_a? String # short-curcuit pipeline for speed
1918
+ end
1919
+ field # return final state of each field, converted or original
1920
+ end
1921
+ end
1922
+
1923
+ #
1924
+ # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1925
+ # rows are also dealt with here, either by returning a FasterCSV::Row with
1926
+ # identical headers and fields (save that the fields do not go through the
1927
+ # converters) or by reading past them to return a field row. Headers are also
1928
+ # saved in <tt>@headers</tt> for use in future rows.
1929
+ #
1930
+ # When +nil+, +row+ is assumed to be a header row not based on an actual row
1931
+ # of the stream.
1932
+ #
1933
+ def parse_headers(row = nil)
1934
+ if @headers.nil? # header row
1935
+ @headers = case @use_headers # save headers
1936
+ # Array of headers
1937
+ when Array then @use_headers
1938
+ # CSV header String
1939
+ when String
1940
+ self.class.parse_line( @use_headers,
1941
+ :col_sep => @col_sep,
1942
+ :row_sep => @row_sep,
1943
+ :quote_char => @quote_char )
1944
+ # first row is headers
1945
+ else row
1946
+ end
1947
+
1948
+ # prepare converted and unconverted copies
1949
+ row = @headers if row.nil?
1950
+ @headers = convert_fields(@headers, true)
1951
+
1952
+ if @return_headers # return headers
1953
+ return FasterCSV::Row.new(@headers, row, true)
1954
+ elsif not [Array, String].include? @use_headers.class # skip to field row
1955
+ return shift
1956
+ end
1957
+ end
1958
+
1959
+ FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1960
+ end
1961
+
1962
+ #
1963
+ # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1964
+ # +row+ and an accessor method for it called unconverted_fields(). The
1965
+ # variable is set to the contents of +fields+.
1966
+ #
1967
+ def add_unconverted_fields(row, fields)
1968
+ class << row
1969
+ attr_reader :unconverted_fields
1970
+ end
1971
+ row.instance_eval { @unconverted_fields = fields }
1972
+ row
1973
+ end
1974
+ end
1975
+
1976
+ # Another name for FasterCSV.
1977
+ FCSV = FasterCSV
1978
+
1979
+ # Another name for FasterCSV::instance().
1980
+ def FasterCSV(*args, &block)
1981
+ FasterCSV.instance(*args, &block)
1982
+ end
1983
+
1984
+ # Another name for FCSV::instance().
1985
+ def FCSV(*args, &block)
1986
+ FCSV.instance(*args, &block)
1987
+ end
1988
+
1989
+ class Array
1990
+ # Equivalent to <tt>FasterCSV::generate_line(self, options)</tt>.
1991
+ def to_csv(options = Hash.new)
1992
+ FasterCSV.generate_line(self, options)
1993
+ end
1994
+ end
1995
+
1996
+ class String
1997
+ # Equivalent to <tt>FasterCSV::parse_line(self, options)</tt>.
1998
+ def parse_csv(options = Hash.new)
1999
+ FasterCSV.parse_line(self, options)
2000
+ end
2001
+ end