rw_fastercsv 1.5.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ Quantity,Product Description,Price
2
+ 1,Text Editor,25.00
3
+ 2,MacBook Pros,2499.00
@@ -0,0 +1,36 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # shortcut_interface.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-04-01.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+ #
8
+ # Feature implementation and example code by Ara.T.Howard.
9
+
10
+ require "faster_csv"
11
+
12
+ #
13
+ # So now it's this easy to write to STDOUT.
14
+ #
15
+ FCSV { |f| f << %w( a b c) << %w( d e f ) }
16
+
17
+ #
18
+ # Writing to a String.
19
+ #
20
+ FCSV(csv = '') do |f|
21
+ f << %w( q r s )
22
+ f << %w( x y z )
23
+ end
24
+ puts csv
25
+
26
+ #
27
+ # Writing to STDERR.
28
+ #
29
+ FCSV(STDERR) do |f|
30
+ f << %w( 0 1 2 )
31
+ f << %w( A B C )
32
+ end
33
+ # >> a,b,c
34
+ # >> d,e,f
35
+ # >> q,r,s
36
+ # >> x,y,z
data/lib/faster_csv.rb ADDED
@@ -0,0 +1,2006 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # = faster_csv.rb -- Faster CSV Reading and Writing
4
+ #
5
+ # Created by James Edward Gray II on 2005-10-31.
6
+ # Copyright 2005 Gray Productions. All rights reserved.
7
+ #
8
+ # See FasterCSV for documentation.
9
+
10
+ if RUBY_VERSION >= "1.9"
11
+ abort <<-VERSION_WARNING.gsub(/^\s+/, "")
12
+ Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
13
+ support for Ruby 1.9's m17n encoding engine.
14
+ VERSION_WARNING
15
+ end
16
+
17
+ require "forwardable"
18
+ require "English"
19
+ require "enumerator"
20
+ require "date"
21
+ require "stringio"
22
+
23
+ #
24
+ # This class provides a complete interface to CSV files and data. It offers
25
+ # tools to enable you to read and write to and from Strings or IO objects, as
26
+ # needed.
27
+ #
28
+ # == Reading
29
+ #
30
+ # === From a File
31
+ #
32
+ # ==== A Line at a Time
33
+ #
34
+ # FasterCSV.foreach("path/to/file.csv") do |row|
35
+ # # use row here...
36
+ # end
37
+ #
38
+ # ==== All at Once
39
+ #
40
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
41
+ #
42
+ # === From a String
43
+ #
44
+ # ==== A Line at a Time
45
+ #
46
+ # FasterCSV.parse("CSV,data,String") do |row|
47
+ # # use row here...
48
+ # end
49
+ #
50
+ # ==== All at Once
51
+ #
52
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
53
+ #
54
+ # == Writing
55
+ #
56
+ # === To a File
57
+ #
58
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
59
+ # csv << ["row", "of", "CSV", "data"]
60
+ # csv << ["another", "row"]
61
+ # # ...
62
+ # end
63
+ #
64
+ # === To a String
65
+ #
66
+ # csv_string = FasterCSV.generate do |csv|
67
+ # csv << ["row", "of", "CSV", "data"]
68
+ # csv << ["another", "row"]
69
+ # # ...
70
+ # end
71
+ #
72
+ # == Convert a Single Line
73
+ #
74
+ # csv_string = ["CSV", "data"].to_csv # to CSV
75
+ # csv_array = "CSV,String".parse_csv # from CSV
76
+ #
77
+ # == Shortcut Interface
78
+ #
79
+ # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
80
+ # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
81
+ # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
82
+ # FCSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
83
+ #
84
+ # == Advanced Usage
85
+ #
86
+ # === Wrap an IO Object
87
+ #
88
+ # csv = FCSV.new(io, options)
89
+ # # ... read (with gets() or each()) from and write (with <<) to csv here ...
90
+ #
91
+ class FasterCSV
92
+ # The version of the installed library.
93
+ #VERSION = "1.5.6".freeze
94
+
95
+ #
96
+ # A FasterCSV::Row is part Array and part Hash. It retains an order for the
97
+ # fields and allows duplicates just as an Array would, but also allows you to
98
+ # access fields by name just as you could if they were in a Hash.
99
+ #
100
+ # All rows returned by FasterCSV will be constructed from this class, if
101
+ # header row processing is activated.
102
+ #
103
+ class Row
104
+ #
105
+ # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
106
+ # expected to be Arrays. If one Array is shorter than the other, it will be
107
+ # padded with +nil+ objects.
108
+ #
109
+ # The optional +header_row+ parameter can be set to +true+ to indicate, via
110
+ # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
111
+ # a header row. Otherwise, the row is assumes to be a field row.
112
+ #
113
+ # A FasterCSV::Row object supports the following Array methods through
114
+ # delegation:
115
+ #
116
+ # * empty?()
117
+ # * length()
118
+ # * size()
119
+ #
120
+ def initialize(headers, fields, header_row = false)
121
+ @header_row = header_row
122
+
123
+ # handle extra headers or fields
124
+ @row = if headers.size > fields.size
125
+ headers.zip(fields)
126
+ else
127
+ fields.zip(headers).map { |pair| pair.reverse }
128
+ end
129
+ end
130
+
131
+ # Internal data format used to compare equality.
132
+ attr_reader :row
133
+ protected :row
134
+
135
+ ### Array Delegation ###
136
+
137
+ extend Forwardable
138
+ def_delegators :@row, :empty?, :length, :size
139
+
140
+ # Returns +true+ if this is a header row.
141
+ def header_row?
142
+ @header_row
143
+ end
144
+
145
+ # Returns +true+ if this is a field row.
146
+ def field_row?
147
+ not header_row?
148
+ end
149
+
150
+ # Returns the headers of this row.
151
+ def headers
152
+ @row.map { |pair| pair.first }
153
+ end
154
+
155
+ #
156
+ # :call-seq:
157
+ # field( header )
158
+ # field( header, offset )
159
+ # field( index )
160
+ #
161
+ # This method will fetch the field value by +header+ or +index+. If a field
162
+ # is not found, +nil+ is returned.
163
+ #
164
+ # When provided, +offset+ ensures that a header match occurrs on or later
165
+ # than the +offset+ index. You can use this to find duplicate headers,
166
+ # without resorting to hard-coding exact indices.
167
+ #
168
+ def field(header_or_index, minimum_index = 0)
169
+ # locate the pair
170
+ finder = header_or_index.is_a?(Integer) ? :[] : :assoc
171
+ pair = @row[minimum_index..-1].send(finder, header_or_index)
172
+
173
+ # return the field if we have a pair
174
+ pair.nil? ? nil : pair.last
175
+ end
176
+ alias_method :[], :field
177
+
178
+ #
179
+ # :call-seq:
180
+ # []=( header, value )
181
+ # []=( header, offset, value )
182
+ # []=( index, value )
183
+ #
184
+ # Looks up the field by the semantics described in FasterCSV::Row.field()
185
+ # and assigns the +value+.
186
+ #
187
+ # Assigning past the end of the row with an index will set all pairs between
188
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
189
+ # pair.
190
+ #
191
+ def []=(*args)
192
+ value = args.pop
193
+
194
+ if args.first.is_a? Integer
195
+ if @row[args.first].nil? # extending past the end with index
196
+ @row[args.first] = [nil, value]
197
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
198
+ else # normal index assignment
199
+ @row[args.first][1] = value
200
+ end
201
+ else
202
+ index = index(*args)
203
+ if index.nil? # appending a field
204
+ self << [args.first, value]
205
+ else # normal header assignment
206
+ @row[index][1] = value
207
+ end
208
+ end
209
+ end
210
+
211
+ #
212
+ # :call-seq:
213
+ # <<( field )
214
+ # <<( header_and_field_array )
215
+ # <<( header_and_field_hash )
216
+ #
217
+ # If a two-element Array is provided, it is assumed to be a header and field
218
+ # and the pair is appended. A Hash works the same way with the key being
219
+ # the header and the value being the field. Anything else is assumed to be
220
+ # a lone field which is appended with a +nil+ header.
221
+ #
222
+ # This method returns the row for chaining.
223
+ #
224
+ def <<(arg)
225
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
226
+ @row << arg
227
+ elsif arg.is_a?(Hash) # append header and name pairs
228
+ arg.each { |pair| @row << pair }
229
+ else # append field value
230
+ @row << [nil, arg]
231
+ end
232
+
233
+ self # for chaining
234
+ end
235
+
236
+ #
237
+ # A shortcut for appending multiple fields. Equivalent to:
238
+ #
239
+ # args.each { |arg| faster_csv_row << arg }
240
+ #
241
+ # This method returns the row for chaining.
242
+ #
243
+ def push(*args)
244
+ args.each { |arg| self << arg }
245
+
246
+ self # for chaining
247
+ end
248
+
249
+ #
250
+ # :call-seq:
251
+ # delete( header )
252
+ # delete( header, offset )
253
+ # delete( index )
254
+ #
255
+ # Used to remove a pair from the row by +header+ or +index+. The pair is
256
+ # located as described in FasterCSV::Row.field(). The deleted pair is
257
+ # returned, or +nil+ if a pair could not be found.
258
+ #
259
+ def delete(header_or_index, minimum_index = 0)
260
+ if header_or_index.is_a? Integer # by index
261
+ @row.delete_at(header_or_index)
262
+ elsif i = index(header_or_index, minimum_index) # by header
263
+ @row.delete_at(i)
264
+ else
265
+ [ ]
266
+ end
267
+ end
268
+
269
+ #
270
+ # The provided +block+ is passed a header and field for each pair in the row
271
+ # and expected to return +true+ or +false+, depending on whether the pair
272
+ # should be deleted.
273
+ #
274
+ # This method returns the row for chaining.
275
+ #
276
+ def delete_if(&block)
277
+ @row.delete_if(&block)
278
+
279
+ self # for chaining
280
+ end
281
+
282
+ #
283
+ # This method accepts any number of arguments which can be headers, indices,
284
+ # Ranges of either, or two-element Arrays containing a header and offset.
285
+ # Each argument will be replaced with a field lookup as described in
286
+ # FasterCSV::Row.field().
287
+ #
288
+ # If called with no arguments, all fields are returned.
289
+ #
290
+ def fields(*headers_and_or_indices)
291
+ if headers_and_or_indices.empty? # return all fields--no arguments
292
+ @row.map { |pair| pair.last }
293
+ else # or work like values_at()
294
+ headers_and_or_indices.inject(Array.new) do |all, h_or_i|
295
+ all + if h_or_i.is_a? Range
296
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
297
+ index(h_or_i.begin)
298
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
299
+ index(h_or_i.end)
300
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
301
+ (index_begin..index_end)
302
+ fields.values_at(new_range)
303
+ else
304
+ [field(*Array(h_or_i))]
305
+ end
306
+ end
307
+ end
308
+ end
309
+ alias_method :values_at, :fields
310
+
311
+ #
312
+ # :call-seq:
313
+ # index( header )
314
+ # index( header, offset )
315
+ #
316
+ # This method will return the index of a field with the provided +header+.
317
+ # The +offset+ can be used to locate duplicate header names, as described in
318
+ # FasterCSV::Row.field().
319
+ #
320
+ def index(header, minimum_index = 0)
321
+ # find the pair
322
+ index = headers[minimum_index..-1].index(header)
323
+ # return the index at the right offset, if we found one
324
+ index.nil? ? nil : index + minimum_index
325
+ end
326
+
327
+ # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
328
+ def header?(name)
329
+ headers.include? name
330
+ end
331
+ alias_method :include?, :header?
332
+
333
+ #
334
+ # Returns +true+ if +data+ matches a field in this row, and +false+
335
+ # otherwise.
336
+ #
337
+ def field?(data)
338
+ fields.include? data
339
+ end
340
+
341
+ include Enumerable
342
+
343
+ #
344
+ # Yields each pair of the row as header and field tuples (much like
345
+ # iterating over a Hash).
346
+ #
347
+ # Support for Enumerable.
348
+ #
349
+ # This method returns the row for chaining.
350
+ #
351
+ def each(&block)
352
+ @row.each(&block)
353
+
354
+ self # for chaining
355
+ end
356
+
357
+ #
358
+ # Returns +true+ if this row contains the same headers and fields in the
359
+ # same order as +other+.
360
+ #
361
+ def ==(other)
362
+ @row == other.row
363
+ end
364
+
365
+ #
366
+ # Collapses the row into a simple Hash. Be warning that this discards field
367
+ # order and clobbers duplicate fields.
368
+ #
369
+ def to_hash
370
+ # flatten just one level of the internal Array
371
+ Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
372
+ end
373
+
374
+ #
375
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
376
+ #
377
+ # faster_csv_row.fields.to_csv( options )
378
+ #
379
+ def to_csv(options = Hash.new)
380
+ fields.to_csv(options)
381
+ end
382
+ alias_method :to_s, :to_csv
383
+
384
+ # A summary of fields, by header.
385
+ def inspect
386
+ str = "#<#{self.class}"
387
+ each do |header, field|
388
+ str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
389
+ field.inspect
390
+ end
391
+ str << ">"
392
+ end
393
+ end
394
+
395
+ #
396
+ # A FasterCSV::Table is a two-dimensional data structure for representing CSV
397
+ # documents. Tables allow you to work with the data by row or column,
398
+ # manipulate the data, and even convert the results back to CSV, if needed.
399
+ #
400
+ # All tables returned by FasterCSV will be constructed from this class, if
401
+ # header row processing is activated.
402
+ #
403
+ class Table
404
+ #
405
+ # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
406
+ # to be FasterCSV::Row objects. All rows are assumed to have the same
407
+ # headers.
408
+ #
409
+ # A FasterCSV::Table object supports the following Array methods through
410
+ # delegation:
411
+ #
412
+ # * empty?()
413
+ # * length()
414
+ # * size()
415
+ #
416
+ def initialize(array_of_rows)
417
+ @table = array_of_rows
418
+ @mode = :col_or_row
419
+ end
420
+
421
+ # The current access mode for indexing and iteration.
422
+ attr_reader :mode
423
+
424
+ # Internal data format used to compare equality.
425
+ attr_reader :table
426
+ protected :table
427
+
428
+ ### Array Delegation ###
429
+
430
+ extend Forwardable
431
+ def_delegators :@table, :empty?, :length, :size
432
+
433
+ #
434
+ # Returns a duplicate table object, in column mode. This is handy for
435
+ # chaining in a single call without changing the table mode, but be aware
436
+ # that this method can consume a fair amount of memory for bigger data sets.
437
+ #
438
+ # This method returns the duplicate table for chaining. Don't chain
439
+ # destructive methods (like []=()) this way though, since you are working
440
+ # with a duplicate.
441
+ #
442
+ def by_col
443
+ self.class.new(@table.dup).by_col!
444
+ end
445
+
446
+ #
447
+ # Switches the mode of this table to column mode. All calls to indexing and
448
+ # iteration methods will work with columns until the mode is changed again.
449
+ #
450
+ # This method returns the table and is safe to chain.
451
+ #
452
+ def by_col!
453
+ @mode = :col
454
+
455
+ self
456
+ end
457
+
458
+ #
459
+ # Returns a duplicate table object, in mixed mode. This is handy for
460
+ # chaining in a single call without changing the table mode, but be aware
461
+ # that this method can consume a fair amount of memory for bigger data sets.
462
+ #
463
+ # This method returns the duplicate table for chaining. Don't chain
464
+ # destructive methods (like []=()) this way though, since you are working
465
+ # with a duplicate.
466
+ #
467
+ def by_col_or_row
468
+ self.class.new(@table.dup).by_col_or_row!
469
+ end
470
+
471
+ #
472
+ # Switches the mode of this table to mixed mode. All calls to indexing and
473
+ # iteration methods will use the default intelligent indexing system until
474
+ # the mode is changed again. In mixed mode an index is assumed to be a row
475
+ # reference while anything else is assumed to be column access by headers.
476
+ #
477
+ # This method returns the table and is safe to chain.
478
+ #
479
+ def by_col_or_row!
480
+ @mode = :col_or_row
481
+
482
+ self
483
+ end
484
+
485
+ #
486
+ # Returns a duplicate table object, in row mode. This is handy for chaining
487
+ # in a single call without changing the table mode, but be aware that this
488
+ # method can consume a fair amount of memory for bigger data sets.
489
+ #
490
+ # This method returns the duplicate table for chaining. Don't chain
491
+ # destructive methods (like []=()) this way though, since you are working
492
+ # with a duplicate.
493
+ #
494
+ def by_row
495
+ self.class.new(@table.dup).by_row!
496
+ end
497
+
498
+ #
499
+ # Switches the mode of this table to row mode. All calls to indexing and
500
+ # iteration methods will work with rows until the mode is changed again.
501
+ #
502
+ # This method returns the table and is safe to chain.
503
+ #
504
+ def by_row!
505
+ @mode = :row
506
+
507
+ self
508
+ end
509
+
510
+ #
511
+ # Returns the headers for the first row of this table (assumed to match all
512
+ # other rows). An empty Array is returned for empty tables.
513
+ #
514
+ def headers
515
+ if @table.empty?
516
+ Array.new
517
+ else
518
+ @table.first.headers
519
+ end
520
+ end
521
+
522
+ #
523
+ # In the default mixed mode, this method returns rows for index access and
524
+ # columns for header access. You can force the index association by first
525
+ # calling by_col!() or by_row!().
526
+ #
527
+ # Columns are returned as an Array of values. Altering that Array has no
528
+ # effect on the table.
529
+ #
530
+ def [](index_or_header)
531
+ if @mode == :row or # by index
532
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
533
+ @table[index_or_header]
534
+ else # by header
535
+ @table.map { |row| row[index_or_header] }
536
+ end
537
+ end
538
+
539
+ #
540
+ # In the default mixed mode, this method assigns rows for index access and
541
+ # columns for header access. You can force the index association by first
542
+ # calling by_col!() or by_row!().
543
+ #
544
+ # Rows may be set to an Array of values (which will inherit the table's
545
+ # headers()) or a FasterCSV::Row.
546
+ #
547
+ # Columns may be set to a single value, which is copied to each row of the
548
+ # column, or an Array of values. Arrays of values are assigned to rows top
549
+ # to bottom in row major order. Excess values are ignored and if the Array
550
+ # does not have a value for each row the extra rows will receive a +nil+.
551
+ #
552
+ # Assigning to an existing column or row clobbers the data. Assigning to
553
+ # new columns creates them at the right end of the table.
554
+ #
555
+ def []=(index_or_header, value)
556
+ if @mode == :row or # by index
557
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
558
+ if value.is_a? Array
559
+ @table[index_or_header] = Row.new(headers, value)
560
+ else
561
+ @table[index_or_header] = value
562
+ end
563
+ else # set column
564
+ if value.is_a? Array # multiple values
565
+ @table.each_with_index do |row, i|
566
+ if row.header_row?
567
+ row[index_or_header] = index_or_header
568
+ else
569
+ row[index_or_header] = value[i]
570
+ end
571
+ end
572
+ else # repeated value
573
+ @table.each do |row|
574
+ if row.header_row?
575
+ row[index_or_header] = index_or_header
576
+ else
577
+ row[index_or_header] = value
578
+ end
579
+ end
580
+ end
581
+ end
582
+ end
583
+
584
+ #
585
+ # The mixed mode default is to treat a list of indices as row access,
586
+ # returning the rows indicated. Anything else is considered columnar
587
+ # access. For columnar access, the return set has an Array for each row
588
+ # with the values indicated by the headers in each Array. You can force
589
+ # column or row mode using by_col!() or by_row!().
590
+ #
591
+ # You cannot mix column and row access.
592
+ #
593
+ def values_at(*indices_or_headers)
594
+ if @mode == :row or # by indices
595
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
596
+ index.is_a?(Integer) or
597
+ ( index.is_a?(Range) and
598
+ index.first.is_a?(Integer) and
599
+ index.last.is_a?(Integer) )
600
+ end )
601
+ @table.values_at(*indices_or_headers)
602
+ else # by headers
603
+ @table.map { |row| row.values_at(*indices_or_headers) }
604
+ end
605
+ end
606
+
607
+ #
608
+ # Adds a new row to the bottom end of this table. You can provide an Array,
609
+ # which will be converted to a FasterCSV::Row (inheriting the table's
610
+ # headers()), or a FasterCSV::Row.
611
+ #
612
+ # This method returns the table for chaining.
613
+ #
614
+ def <<(row_or_array)
615
+ if row_or_array.is_a? Array # append Array
616
+ @table << Row.new(headers, row_or_array)
617
+ else # append Row
618
+ @table << row_or_array
619
+ end
620
+
621
+ self # for chaining
622
+ end
623
+
624
+ #
625
+ # A shortcut for appending multiple rows. Equivalent to:
626
+ #
627
+ # rows.each { |row| self << row }
628
+ #
629
+ # This method returns the table for chaining.
630
+ #
631
+ def push(*rows)
632
+ rows.each { |row| self << row }
633
+
634
+ self # for chaining
635
+ end
636
+
637
+ #
638
+ # Removes and returns the indicated column or row. In the default mixed
639
+ # mode indices refer to rows and everything else is assumed to be a column
640
+ # header. Use by_col!() or by_row!() to force the lookup.
641
+ #
642
+ def delete(index_or_header)
643
+ if @mode == :row or # by index
644
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
645
+ @table.delete_at(index_or_header)
646
+ else # by header
647
+ @table.map { |row| row.delete(index_or_header).last }
648
+ end
649
+ end
650
+
651
+ #
652
+ # Removes any column or row for which the block returns +true+. In the
653
+ # default mixed mode or row mode, iteration is the standard row major
654
+ # walking of rows. In column mode, interation will +yield+ two element
655
+ # tuples containing the column name and an Array of values for that column.
656
+ #
657
+ # This method returns the table for chaining.
658
+ #
659
+ def delete_if(&block)
660
+ if @mode == :row or @mode == :col_or_row # by index
661
+ @table.delete_if(&block)
662
+ else # by header
663
+ to_delete = Array.new
664
+ headers.each_with_index do |header, i|
665
+ to_delete << header if block[[header, self[header]]]
666
+ end
667
+ to_delete.map { |header| delete(header) }
668
+ end
669
+
670
+ self # for chaining
671
+ end
672
+
673
+ include Enumerable
674
+
675
+ #
676
+ # In the default mixed mode or row mode, iteration is the standard row major
677
+ # walking of rows. In column mode, interation will +yield+ two element
678
+ # tuples containing the column name and an Array of values for that column.
679
+ #
680
+ # This method returns the table for chaining.
681
+ #
682
+ def each(&block)
683
+ if @mode == :col
684
+ headers.each { |header| block[[header, self[header]]] }
685
+ else
686
+ @table.each(&block)
687
+ end
688
+
689
+ self # for chaining
690
+ end
691
+
692
+ # Returns +true+ if all rows of this table ==() +other+'s rows.
693
+ def ==(other)
694
+ @table == other.table
695
+ end
696
+
697
+ #
698
+ # Returns the table as an Array of Arrays. Headers will be the first row,
699
+ # then all of the field rows will follow.
700
+ #
701
+ def to_a
702
+ @table.inject([headers]) do |array, row|
703
+ if row.header_row?
704
+ array
705
+ else
706
+ array + [row.fields]
707
+ end
708
+ end
709
+ end
710
+
711
+ #
712
+ # Returns the table as a complete CSV String. Headers will be listed first,
713
+ # then all of the field rows.
714
+ #
715
+ # This method assumes you want the Table.headers(), unless you explicitly
716
+ # pass <tt>:write_headers => false</tt>.
717
+ #
718
+ def to_csv(options = Hash.new)
719
+ wh = options.fetch(:write_headers, true)
720
+ @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
721
+ if row.header_row?
722
+ rows
723
+ else
724
+ rows + [row.fields.to_csv(options)]
725
+ end
726
+ end.join
727
+ end
728
+ alias_method :to_s, :to_csv
729
+
730
+ def inspect
731
+ "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
732
+ end
733
+ end
734
+
735
+ # The error thrown when the parser encounters illegal CSV formatting.
736
+ class MalformedCSVError < RuntimeError; end
737
+
738
+ #
739
+ # A FieldInfo Struct contains details about a field's position in the data
740
+ # source it was read from. FasterCSV will pass this Struct to some blocks
741
+ # that make decisions based on field structure. See
742
+ # FasterCSV.convert_fields() for an example.
743
+ #
744
+ # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
745
+ # <b><tt>line</tt></b>:: The line of the data source this row is from.
746
+ # <b><tt>header</tt></b>:: The header for the column, when available.
747
+ #
748
+ FieldInfo = Struct.new(:index, :line, :header)
749
+
750
+ # A Regexp used to find and convert some common Date formats.
751
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
752
+ \d{4}-\d{2}-\d{2} )\z /x
753
+ # A Regexp used to find and convert some common DateTime formats.
754
+ DateTimeMatcher =
755
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
756
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
757
+ #
758
+ # This Hash holds the built-in converters of FasterCSV that can be accessed by
759
+ # name. You can select Converters with FasterCSV.convert() or through the
760
+ # +options+ Hash passed to FasterCSV::new().
761
+ #
762
+ # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
763
+ # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
764
+ # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
765
+ # and <tt>:float</tt>.
766
+ # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
767
+ # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
768
+ # <b><tt>:all</tt></b>:: All built-in converters. A combination of
769
+ # <tt>:date_time</tt> and <tt>:numeric</tt>.
770
+ #
771
+ # This Hash is intetionally left unfrozen and users should feel free to add
772
+ # values to it that can be accessed by all FasterCSV objects.
773
+ #
774
+ # To add a combo field, the value should be an Array of names. Combo fields
775
+ # can be nested with other combo fields.
776
+ #
777
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
778
+ :float => lambda { |f| Float(f) rescue f },
779
+ :numeric => [:integer, :float],
780
+ :date => lambda { |f|
781
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
782
+ },
783
+ :date_time => lambda { |f|
784
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
785
+ },
786
+ :all => [:date_time, :numeric] }
787
+
788
+ #
789
+ # This Hash holds the built-in header converters of FasterCSV that can be
790
+ # accessed by name. You can select HeaderConverters with
791
+ # FasterCSV.header_convert() or through the +options+ Hash passed to
792
+ # FasterCSV::new().
793
+ #
794
+ # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
795
+ # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
796
+ # replaced with underscores, non-word characters
797
+ # are dropped, and finally to_sym() is called.
798
+ #
799
+ # This Hash is intetionally left unfrozen and users should feel free to add
800
+ # values to it that can be accessed by all FasterCSV objects.
801
+ #
802
+ # To add a combo field, the value should be an Array of names. Combo fields
803
+ # can be nested with other combo fields.
804
+ #
805
+ HeaderConverters = {
806
+ :downcase => lambda { |h| h.downcase },
807
+ :symbol => lambda { |h|
808
+ h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
809
+ }
810
+ }
811
+
812
+ #
813
+ # The options used when no overrides are given by calling code. They are:
814
+ #
815
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
816
+ # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
817
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
818
+ # <b><tt>:converters</tt></b>:: +nil+
819
+ # <b><tt>:unconverted_fields</tt></b>:: +nil+
820
+ # <b><tt>:headers</tt></b>:: +false+
821
+ # <b><tt>:return_headers</tt></b>:: +false+
822
+ # <b><tt>:header_converters</tt></b>:: +nil+
823
+ # <b><tt>:skip_blanks</tt></b>:: +false+
824
+ # <b><tt>:force_quotes</tt></b>:: +false+
825
+ #
826
+ DEFAULT_OPTIONS = { :col_sep => ",",
827
+ :row_sep => :auto,
828
+ :quote_char => '"',
829
+ :converters => nil,
830
+ :unconverted_fields => nil,
831
+ :headers => false,
832
+ :return_headers => false,
833
+ :header_converters => nil,
834
+ :skip_blanks => false,
835
+ :force_quotes => false,
836
+ :raise_exception => false}.freeze
837
+
838
+ #
839
+ # This method will build a drop-in replacement for many of the standard CSV
840
+ # methods. It allows you to write code like:
841
+ #
842
+ # begin
843
+ # require "faster_csv"
844
+ # FasterCSV.build_csv_interface
845
+ # rescue LoadError
846
+ # require "csv"
847
+ # end
848
+ # # ... use CSV here ...
849
+ #
850
+ # This is not a complete interface with completely identical behavior.
851
+ # However, it is intended to be close enough that you won't notice the
852
+ # difference in most cases. CSV methods supported are:
853
+ #
854
+ # * foreach()
855
+ # * generate_line()
856
+ # * open()
857
+ # * parse()
858
+ # * parse_line()
859
+ # * readlines()
860
+ #
861
+ # Be warned that this interface is slower than vanilla FasterCSV due to the
862
+ # extra layer of method calls. Depending on usage, this can slow it down to
863
+ # near CSV speeds.
864
+ #
865
+ def self.build_csv_interface
866
+ Object.const_set(:CSV, Class.new).class_eval do
867
+ def self.foreach(path, rs = :auto, &block) # :nodoc:
868
+ FasterCSV.foreach(path, :row_sep => rs, &block)
869
+ end
870
+
871
+ def self.generate_line(row, fs = ",", rs = "") # :nodoc:
872
+ FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
873
+ end
874
+
875
+ def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
876
+ if block and mode.include? "r"
877
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
878
+ csv.each(&block)
879
+ end
880
+ else
881
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
882
+ end
883
+ end
884
+
885
+ def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
886
+ FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
887
+ end
888
+
889
+ def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
890
+ FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
891
+ end
892
+
893
+ def self.readlines(path, rs = :auto) # :nodoc:
894
+ FasterCSV.readlines(path, :row_sep => rs)
895
+ end
896
+ end
897
+ end
898
+
899
+ #
900
+ # This method allows you to serialize an Array of Ruby objects to a String or
901
+ # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
902
+ # useful for spreadsheet and database interaction.
903
+ #
904
+ # Out of the box, this method is intended to work with simple data objects or
905
+ # Structs. It will serialize a list of instance variables and/or
906
+ # Struct.members().
907
+ #
908
+ # If you need need more complicated serialization, you can control the process
909
+ # by adding methods to the class to be serialized.
910
+ #
911
+ # A class method csv_meta() is responsible for returning the first row of the
912
+ # document (as an Array). This row is considered to be a Hash of the form
913
+ # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
914
+ # key with a value of the stringified class name and FasterCSV::dump() will
915
+ # create this, if you do not define this method. This method is only called
916
+ # on the first object of the Array.
917
+ #
918
+ # The next method you can provide is an instance method called csv_headers().
919
+ # This method is expected to return the second line of the document (again as
920
+ # an Array), which is to be used to give each column a header. By default,
921
+ # FasterCSV::load() will set an instance variable if the field header starts
922
+ # with an @ character or call send() passing the header as the method name and
923
+ # the field value as an argument. This method is only called on the first
924
+ # object of the Array.
925
+ #
926
+ # Finally, you can provide an instance method called csv_dump(), which will
927
+ # be passed the headers. This should return an Array of fields that can be
928
+ # serialized for this object. This method is called once for every object in
929
+ # the Array.
930
+ #
931
+ # The +io+ parameter can be used to serialize to a File, and +options+ can be
932
+ # anything FasterCSV::new() accepts.
933
+ #
934
+ def self.dump(ary_of_objs, io = "", options = Hash.new)
935
+ obj_template = ary_of_objs.first
936
+
937
+ csv = FasterCSV.new(io, options)
938
+
939
+ # write meta information
940
+ begin
941
+ csv << obj_template.class.csv_meta
942
+ rescue NoMethodError
943
+ csv << [:class, obj_template.class]
944
+ end
945
+
946
+ # write headers
947
+ begin
948
+ headers = obj_template.csv_headers
949
+ rescue NoMethodError
950
+ headers = obj_template.instance_variables.sort
951
+ if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
952
+ headers += obj_template.members.map { |mem| "#{mem}=" }.sort
953
+ end
954
+ end
955
+ csv << headers
956
+
957
+ # serialize each object
958
+ ary_of_objs.each do |obj|
959
+ begin
960
+ csv << obj.csv_dump(headers)
961
+ rescue NoMethodError
962
+ csv << headers.map do |var|
963
+ if var[0] == ?@
964
+ obj.instance_variable_get(var)
965
+ else
966
+ obj[var[0..-2]]
967
+ end
968
+ end
969
+ end
970
+ end
971
+
972
+ if io.is_a? String
973
+ csv.string
974
+ else
975
+ csv.close
976
+ end
977
+ end
978
+
979
+ #
980
+ # :call-seq:
981
+ # filter( options = Hash.new ) { |row| ... }
982
+ # filter( input, options = Hash.new ) { |row| ... }
983
+ # filter( input, output, options = Hash.new ) { |row| ... }
984
+ #
985
+ # This method is a convenience for building Unix-like filters for CSV data.
986
+ # Each row is yielded to the provided block which can alter it as needed.
987
+ # After the block returns, the row is appended to +output+ altered or not.
988
+ #
989
+ # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
990
+ # (generally String or IO objects). If not given, they default to
991
+ # <tt>ARGF</tt> and <tt>$stdout</tt>.
992
+ #
993
+ # The +options+ parameter is also filtered down to FasterCSV::new() after some
994
+ # clever key parsing. Any key beginning with <tt>:in_</tt> or
995
+ # <tt>:input_</tt> will have that leading identifier stripped and will only
996
+ # be used in the +options+ Hash for the +input+ object. Keys starting with
997
+ # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
998
+ # are assigned to both objects.
999
+ #
1000
+ # The <tt>:output_row_sep</tt> +option+ defaults to
1001
+ # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1002
+ #
1003
+ def self.filter(*args)
1004
+ # parse options for input, output, or both
1005
+ in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
1006
+ if args.last.is_a? Hash
1007
+ args.pop.each do |key, value|
1008
+ case key.to_s
1009
+ when /\Ain(?:put)?_(.+)\Z/
1010
+ in_options[$1.to_sym] = value
1011
+ when /\Aout(?:put)?_(.+)\Z/
1012
+ out_options[$1.to_sym] = value
1013
+ else
1014
+ in_options[key] = value
1015
+ out_options[key] = value
1016
+ end
1017
+ end
1018
+ end
1019
+ # build input and output wrappers
1020
+ input = FasterCSV.new(args.shift || ARGF, in_options)
1021
+ output = FasterCSV.new(args.shift || $stdout, out_options)
1022
+
1023
+ # read, yield, write
1024
+ input.each do |row|
1025
+ yield row
1026
+ output << row
1027
+ end
1028
+ end
1029
+
1030
+ #
1031
+ # This method is intended as the primary interface for reading CSV files. You
1032
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
1033
+ # file will be passed to the provided +block+ in turn.
1034
+ #
1035
+ # The +options+ parameter can be anything FasterCSV::new() understands.
1036
+ #
1037
+ def self.foreach(path, options = Hash.new, &block)
1038
+ open(path, "rb", options) do |csv|
1039
+ csv.each(&block)
1040
+ end
1041
+ end
1042
+
1043
+ #
1044
+ # :call-seq:
1045
+ # generate( str, options = Hash.new ) { |faster_csv| ... }
1046
+ # generate( options = Hash.new ) { |faster_csv| ... }
1047
+ #
1048
+ # This method wraps a String you provide, or an empty default String, in a
1049
+ # FasterCSV object which is passed to the provided block. You can use the
1050
+ # block to append CSV rows to the String and when the block exits, the
1051
+ # final String will be returned.
1052
+ #
1053
+ # Note that a passed String *is* modfied by this method. Call dup() before
1054
+ # passing if you need a new String.
1055
+ #
1056
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1057
+ #
1058
+ def self.generate(*args)
1059
+ # add a default empty String, if none was given
1060
+ if args.first.is_a? String
1061
+ io = StringIO.new(args.shift)
1062
+ io.seek(0, IO::SEEK_END)
1063
+ args.unshift(io)
1064
+ else
1065
+ args.unshift("")
1066
+ end
1067
+ faster_csv = new(*args) # wrap
1068
+ yield faster_csv # yield for appending
1069
+ faster_csv.string # return final String
1070
+ end
1071
+
1072
+ #
1073
+ # This method is a shortcut for converting a single row (Array) into a CSV
1074
+ # String.
1075
+ #
1076
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1077
+ #
1078
+ # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1079
+ # (<tt>$/</tt>) when calling this method.
1080
+ #
1081
+ def self.generate_line(row, options = Hash.new)
1082
+ options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1083
+ (new("", options) << row).string
1084
+ end
1085
+
1086
+ #
1087
+ # This method will return a FasterCSV instance, just like FasterCSV::new(),
1088
+ # but the instance will be cached and returned for all future calls to this
1089
+ # method for the same +data+ object (tested by Object#object_id()) with the
1090
+ # same +options+.
1091
+ #
1092
+ # If a block is given, the instance is passed to the block and the return
1093
+ # value becomes the return value of the block.
1094
+ #
1095
+ def self.instance(data = $stdout, options = Hash.new)
1096
+ # create a _signature_ for this method call, data object and options
1097
+ sig = [data.object_id] +
1098
+ options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1099
+
1100
+ # fetch or create the instance for this signature
1101
+ @@instances ||= Hash.new
1102
+ instance = (@@instances[sig] ||= new(data, options))
1103
+
1104
+ if block_given?
1105
+ yield instance # run block, if given, returning result
1106
+ else
1107
+ instance # or return the instance
1108
+ end
1109
+ end
1110
+
1111
+ #
1112
+ # This method is the reading counterpart to FasterCSV::dump(). See that
1113
+ # method for a detailed description of the process.
1114
+ #
1115
+ # You can customize loading by adding a class method called csv_load() which
1116
+ # will be passed a Hash of meta information, an Array of headers, and an Array
1117
+ # of fields for the object the method is expected to return.
1118
+ #
1119
+ # Remember that all fields will be Strings after this load. If you need
1120
+ # something else, use +options+ to setup converters or provide a custom
1121
+ # csv_load() implementation.
1122
+ #
1123
+ def self.load(io_or_str, options = Hash.new)
1124
+ csv = FasterCSV.new(io_or_str, options)
1125
+
1126
+ # load meta information
1127
+ meta = Hash[*csv.shift]
1128
+ cls = meta["class"].split("::").inject(Object) do |c, const|
1129
+ c.const_get(const)
1130
+ end
1131
+
1132
+ # load headers
1133
+ headers = csv.shift
1134
+
1135
+ # unserialize each object stored in the file
1136
+ results = csv.inject(Array.new) do |all, row|
1137
+ begin
1138
+ obj = cls.csv_load(meta, headers, row)
1139
+ rescue NoMethodError
1140
+ obj = cls.allocate
1141
+ headers.zip(row) do |name, value|
1142
+ if name[0] == ?@
1143
+ obj.instance_variable_set(name, value)
1144
+ else
1145
+ obj.send(name, value)
1146
+ end
1147
+ end
1148
+ end
1149
+ all << obj
1150
+ end
1151
+
1152
+ csv.close unless io_or_str.is_a? String
1153
+
1154
+ results
1155
+ end
1156
+
1157
+ #
1158
+ # :call-seq:
1159
+ # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1160
+ # open( filename, mode="rb", options = Hash.new )
1161
+ #
1162
+ # This method opens an IO object, and wraps that with FasterCSV. This is
1163
+ # intended as the primary interface for writing a CSV file.
1164
+ #
1165
+ # You may pass any +args+ Ruby's open() understands followed by an optional
1166
+ # Hash containing any +options+ FasterCSV::new() understands.
1167
+ #
1168
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
1169
+ # object to a provided block and close it when the block termminates, or it
1170
+ # will return the FasterCSV object when no block is provided. (*Note*: This
1171
+ # is different from the standard CSV library which passes rows to the block.
1172
+ # Use FasterCSV::foreach() for that behavior.)
1173
+ #
1174
+ # An opened FasterCSV object will delegate to many IO methods, for
1175
+ # convenience. You may call:
1176
+ #
1177
+ # * binmode()
1178
+ # * close()
1179
+ # * close_read()
1180
+ # * close_write()
1181
+ # * closed?()
1182
+ # * eof()
1183
+ # * eof?()
1184
+ # * fcntl()
1185
+ # * fileno()
1186
+ # * flush()
1187
+ # * fsync()
1188
+ # * ioctl()
1189
+ # * isatty()
1190
+ # * pid()
1191
+ # * pos()
1192
+ # * reopen()
1193
+ # * seek()
1194
+ # * stat()
1195
+ # * sync()
1196
+ # * sync=()
1197
+ # * tell()
1198
+ # * to_i()
1199
+ # * to_io()
1200
+ # * tty?()
1201
+ #
1202
+ def self.open(*args)
1203
+ # find the +options+ Hash
1204
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
1205
+ # default to a binary open mode
1206
+ args << "rb" if args.size == 1
1207
+ # wrap a File opened with the remaining +args+
1208
+ csv = new(File.open(*args), options)
1209
+
1210
+ # handle blocks like Ruby's open(), not like the CSV library
1211
+ if block_given?
1212
+ begin
1213
+ yield csv
1214
+ ensure
1215
+ csv.close
1216
+ end
1217
+ else
1218
+ csv
1219
+ end
1220
+ end
1221
+
1222
+ #
1223
+ # :call-seq:
1224
+ # parse( str, options = Hash.new ) { |row| ... }
1225
+ # parse( str, options = Hash.new )
1226
+ #
1227
+ # This method can be used to easily parse CSV out of a String. You may either
1228
+ # provide a +block+ which will be called with each row of the String in turn,
1229
+ # or just use the returned Array of Arrays (when no +block+ is given).
1230
+ #
1231
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
1232
+ # anything FasterCSV::new() understands.
1233
+ #
1234
+ def self.parse(*args, &block)
1235
+ csv = new(*args)
1236
+ if block.nil? # slurp contents, if no block is given
1237
+ begin
1238
+ csv.read
1239
+ ensure
1240
+ csv.close
1241
+ end
1242
+ else # or pass each row to a provided block
1243
+ csv.each(&block)
1244
+ end
1245
+ end
1246
+
1247
+ #
1248
+ # This method is a shortcut for converting a single line of a CSV String into
1249
+ # a into an Array. Note that if +line+ contains multiple rows, anything
1250
+ # beyond the first row is ignored.
1251
+ #
1252
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1253
+ #
1254
+ def self.parse_line(line, options = Hash.new)
1255
+ new(line, options).shift
1256
+ end
1257
+
1258
+ #
1259
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1260
+ # file and any +options+ FasterCSV::new() understands.
1261
+ #
1262
+ def self.read(path, options = Hash.new)
1263
+ open(path, "rb", options) { |csv| csv.read }
1264
+ end
1265
+
1266
+ # Alias for FasterCSV::read().
1267
+ def self.readlines(*args)
1268
+ read(*args)
1269
+ end
1270
+
1271
+ #
1272
+ # A shortcut for:
1273
+ #
1274
+ # FasterCSV.read( path, { :headers => true,
1275
+ # :converters => :numeric,
1276
+ # :header_converters => :symbol }.merge(options) )
1277
+ #
1278
+ def self.table(path, options = Hash.new)
1279
+ read( path, { :headers => true,
1280
+ :converters => :numeric,
1281
+ :header_converters => :symbol }.merge(options) )
1282
+ end
1283
+
1284
+ #
1285
+ # This constructor will wrap either a String or IO object passed in +data+ for
1286
+ # reading and/or writing. In addition to the FasterCSV instance methods,
1287
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
1288
+ # list.) If you pass a String for +data+, you can later retrieve it (after
1289
+ # writing to it, for example) with FasterCSV.string().
1290
+ #
1291
+ # Note that a wrapped String will be positioned at at the beginning (for
1292
+ # reading). If you want it at the end (for writing), use
1293
+ # FasterCSV::generate(). If you want any other positioning, pass a preset
1294
+ # StringIO object instead.
1295
+ #
1296
+ # You may set any reading and/or writing preferences in the +options+ Hash.
1297
+ # Available options are:
1298
+ #
1299
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1300
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1301
+ # row. This can be set to the special
1302
+ # <tt>:auto</tt> setting, which requests
1303
+ # that FasterCSV automatically discover
1304
+ # this from the data. Auto-discovery
1305
+ # reads ahead in the data looking for
1306
+ # the next <tt>"\r\n"</tt>,
1307
+ # <tt>"\n"</tt>, or <tt>"\r"</tt>
1308
+ # sequence. A sequence will be selected
1309
+ # even if it occurs in a quoted field,
1310
+ # assuming that you would have the same
1311
+ # line endings there. If none of those
1312
+ # sequences is found, +data+ is
1313
+ # <tt>ARGF</tt>, <tt>STDIN</tt>,
1314
+ # <tt>STDOUT</tt>, or <tt>STDERR</tt>,
1315
+ # or the stream is only available for
1316
+ # output, the default
1317
+ # <tt>$INPUT_RECORD_SEPARATOR</tt>
1318
+ # (<tt>$/</tt>) is used. Obviously,
1319
+ # discovery takes a little time. Set
1320
+ # manually if speed is important. Also
1321
+ # note that IO objects should be opened
1322
+ # in binary mode on Windows if this
1323
+ # feature will be used as the
1324
+ # line-ending translation can cause
1325
+ # problems with resetting the document
1326
+ # position to where it was before the
1327
+ # read ahead.
1328
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1329
+ # This has to be a single character
1330
+ # String. This is useful for
1331
+ # application that incorrectly use
1332
+ # <tt>'</tt> as the quote character
1333
+ # instead of the correct <tt>"</tt>.
1334
+ # FasterCSV will always consider a
1335
+ # double sequence this character to be
1336
+ # an escaped quote.
1337
+ # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1338
+ # file. Defaults to your <tt>$KDOCE</tt>
1339
+ # setting. Valid values: <tt>`n’</tt> or
1340
+ # <tt>`N’</tt> for none, <tt>`e’</tt> or
1341
+ # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1342
+ # <tt>`S’</tt> for SJIS, and
1343
+ # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1344
+ # (see Regexp.new()).
1345
+ # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1346
+ # read ahead looking for the closing
1347
+ # quote for a field. (In truth, it
1348
+ # reads to the first line ending beyond
1349
+ # this size.) If a quote cannot be
1350
+ # found within the limit FasterCSV will
1351
+ # raise a MalformedCSVError, assuming
1352
+ # the data is faulty. You can use this
1353
+ # limit to prevent what are effectively
1354
+ # DoS attacks on the parser. However,
1355
+ # this limit can cause a legitimate
1356
+ # parse to fail and thus is set to
1357
+ # +nil+, or off, by default.
1358
+ # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1359
+ # Hash and/or lambdas that handle custom
1360
+ # conversion. A single converter
1361
+ # doesn't have to be in an Array.
1362
+ # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1363
+ # unconverted_fields() method will be
1364
+ # added to all returned rows (Array or
1365
+ # FasterCSV::Row) that will return the
1366
+ # fields as they were before convertion.
1367
+ # Note that <tt>:headers</tt> supplied
1368
+ # by Array or String were not fields of
1369
+ # the document and thus will have an
1370
+ # empty Array attached.
1371
+ # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1372
+ # +true+, the initial row of the CSV
1373
+ # file will be treated as a row of
1374
+ # headers. If set to an Array, the
1375
+ # contents will be used as the headers.
1376
+ # If set to a String, the String is run
1377
+ # through a call of
1378
+ # FasterCSV::parse_line() with the same
1379
+ # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1380
+ # and <tt>:quote_char</tt> as this
1381
+ # instance to produce an Array of
1382
+ # headers. This setting causes
1383
+ # FasterCSV.shift() to return rows as
1384
+ # FasterCSV::Row objects instead of
1385
+ # Arrays and FasterCSV.read() to return
1386
+ # FasterCSV::Table objects instead of
1387
+ # an Array of Arrays.
1388
+ # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1389
+ # swallowed. If set to +true+, header
1390
+ # rows are returned in a FasterCSV::Row
1391
+ # object with identical headers and
1392
+ # fields (save that the fields do not go
1393
+ # through the converters).
1394
+ # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1395
+ # set, a header row will be added to the
1396
+ # output.
1397
+ # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1398
+ # <tt>:converters</tt> save that the
1399
+ # conversions are only made to header
1400
+ # rows.
1401
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1402
+ # will skip over any rows with no
1403
+ # content.
1404
+ # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1405
+ # will quote all CSV fields it creates.
1406
+ #
1407
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1408
+ #
1409
+ # Options cannot be overriden in the instance methods for performance reasons,
1410
+ # so be sure to set what you want here.
1411
+ #
1412
+ def initialize(data, options = Hash.new)
1413
+ # build the options for this read/write
1414
+ options = DEFAULT_OPTIONS.merge(options)
1415
+
1416
+ # create the IO object we will read from
1417
+ @io = if data.is_a? String then StringIO.new(data) else data end
1418
+
1419
+ init_separators(options)
1420
+ init_parsers(options)
1421
+ init_converters(options)
1422
+ init_headers(options)
1423
+
1424
+ unless options.empty?
1425
+ raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1426
+ end
1427
+
1428
+ # track our own lineno since IO gets confused about line-ends is CSV fields
1429
+ @lineno = 0
1430
+ end
1431
+
1432
+ #
1433
+ # The line number of the last row read from this file. Fields with nested
1434
+ # line-end characters will not affect this count.
1435
+ #
1436
+ attr_reader :lineno
1437
+
1438
+ ### IO and StringIO Delegation ###
1439
+
1440
+ extend Forwardable
1441
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1442
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1443
+ :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1444
+ :sync, :sync=, :tell, :to_i, :to_io, :tty?
1445
+
1446
+ # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1447
+ def rewind
1448
+ @headers = nil
1449
+ @lineno = 0
1450
+
1451
+ @io.rewind
1452
+ end
1453
+
1454
+ ### End Delegation ###
1455
+
1456
+ #
1457
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1458
+ # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1459
+ # FasterCSV::Row is passed, only the row's fields() are appended to the
1460
+ # output.
1461
+ #
1462
+ # The data source must be open for writing.
1463
+ #
1464
+ def <<(row)
1465
+ # make sure headers have been assigned
1466
+ if header_row? and [Array, String].include? @use_headers.class
1467
+ parse_headers # won't read data for Array or String
1468
+ self << @headers if @write_headers
1469
+ end
1470
+
1471
+ # Handle FasterCSV::Row objects and Hashes
1472
+ row = case row
1473
+ when self.class::Row then row.fields
1474
+ when Hash then @headers.map { |header| row[header] }
1475
+ else row
1476
+ end
1477
+
1478
+ @headers = row if header_row?
1479
+ @lineno += 1
1480
+
1481
+ @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1482
+
1483
+ self # for chaining
1484
+ end
1485
+ alias_method :add_row, :<<
1486
+ alias_method :puts, :<<
1487
+
1488
+ #
1489
+ # :call-seq:
1490
+ # convert( name )
1491
+ # convert { |field| ... }
1492
+ # convert { |field, field_info| ... }
1493
+ #
1494
+ # You can use this method to install a FasterCSV::Converters built-in, or
1495
+ # provide a block that handles a custom conversion.
1496
+ #
1497
+ # If you provide a block that takes one argument, it will be passed the field
1498
+ # and is expected to return the converted value or the field itself. If your
1499
+ # block takes two arguments, it will also be passed a FieldInfo Struct,
1500
+ # containing details about the field. Again, the block should return a
1501
+ # converted field or the field itself.
1502
+ #
1503
+ def convert(name = nil, &converter)
1504
+ add_converter(:converters, self.class::Converters, name, &converter)
1505
+ end
1506
+
1507
+ #
1508
+ # :call-seq:
1509
+ # header_convert( name )
1510
+ # header_convert { |field| ... }
1511
+ # header_convert { |field, field_info| ... }
1512
+ #
1513
+ # Identical to FasterCSV.convert(), but for header rows.
1514
+ #
1515
+ # Note that this method must be called before header rows are read to have any
1516
+ # effect.
1517
+ #
1518
+ def header_convert(name = nil, &converter)
1519
+ add_converter( :header_converters,
1520
+ self.class::HeaderConverters,
1521
+ name,
1522
+ &converter )
1523
+ end
1524
+
1525
+ include Enumerable
1526
+
1527
+ #
1528
+ # Yields each row of the data source in turn.
1529
+ #
1530
+ # Support for Enumerable.
1531
+ #
1532
+ # The data source must be open for reading.
1533
+ #
1534
+ def each
1535
+ while row = shift
1536
+ yield row
1537
+ end
1538
+ end
1539
+
1540
+ #
1541
+ # Slurps the remaining rows and returns an Array of Arrays.
1542
+ #
1543
+ # The data source must be open for reading.
1544
+ #
1545
+ def read
1546
+ rows = to_a
1547
+ if @use_headers
1548
+ Table.new(rows)
1549
+ else
1550
+ rows
1551
+ end
1552
+ end
1553
+ alias_method :readlines, :read
1554
+
1555
+ # Returns +true+ if the next row read will be a header row.
1556
+ def header_row?
1557
+ @use_headers and @headers.nil?
1558
+ end
1559
+
1560
+ #
1561
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
1562
+ # from the data source, parsed and returned as an Array of fields (if header
1563
+ # rows are not used) or a FasterCSV::Row (when header rows are used).
1564
+ #
1565
+ # The data source must be open for reading.
1566
+ #
1567
+ def shift
1568
+ #########################################################################
1569
+ ### This method is purposefully kept a bit long as simple conditional ###
1570
+ ### checks are faster than numerous (expensive) method calls. ###
1571
+ #########################################################################
1572
+
1573
+ # handle headers not based on document content
1574
+ if header_row? and @return_headers and
1575
+ [Array, String].include? @use_headers.class
1576
+ if @unconverted_fields
1577
+ return add_unconverted_fields(parse_headers, Array.new)
1578
+ else
1579
+ return parse_headers
1580
+ end
1581
+ end
1582
+
1583
+ # begin with a blank line, so we can always add to it
1584
+ line = String.new
1585
+
1586
+ #
1587
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1588
+ # because of \r and/or \n characters embedded in quoted fields
1589
+ #
1590
+ loop do
1591
+ # add another read to the line
1592
+ if read_line = @io.gets(@row_sep)
1593
+ line += read_line
1594
+ else
1595
+ return nil
1596
+ end
1597
+ # copy the line so we can chop it up in parsing
1598
+ parse = line.dup
1599
+ parse.sub!(@parsers[:line_end], "")
1600
+
1601
+ #
1602
+ # I believe a blank line should be an <tt>Array.new</tt>, not
1603
+ # CSV's <tt>[nil]</tt>
1604
+ #
1605
+ if parse.empty?
1606
+ @lineno += 1
1607
+ if @skip_blanks
1608
+ line = ""
1609
+ next
1610
+ elsif @unconverted_fields
1611
+ return add_unconverted_fields(Array.new, Array.new)
1612
+ elsif @use_headers
1613
+ return FasterCSV::Row.new(Array.new, Array.new)
1614
+ else
1615
+ return Array.new
1616
+ end
1617
+ end
1618
+
1619
+ # parse the fields with a mix of String#split and regular expressions
1620
+ csv = Array.new
1621
+ current_field = String.new
1622
+ field_quotes = 0
1623
+ parse.split(@col_sep, -1).each do |match|
1624
+ if current_field.empty? && match.count(@quote_and_newlines).zero?
1625
+ csv << (match.empty? ? nil : match)
1626
+ elsif (current_field.empty? ? match[0] : current_field[0]) ==
1627
+ @quote_char[0]
1628
+ current_field << match
1629
+ field_quotes += match.count(@quote_char)
1630
+ if field_quotes % 2 == 0
1631
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1632
+ raise MalformedCSVError if !in_quotes ||
1633
+ in_quotes[@parsers[:stray_quote]]
1634
+ current_field = in_quotes
1635
+ current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1636
+ csv << current_field
1637
+ current_field = String.new
1638
+ field_quotes = 0
1639
+ else # we found a quoted field that spans multiple lines
1640
+ current_field << @col_sep
1641
+ end
1642
+ elsif match.count("\r\n").zero?
1643
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}." if @raise_exception
1644
+ else
1645
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1646
+ "\\r or \\n (line #{lineno + 1})." if @raise_exception
1647
+ end
1648
+ end
1649
+
1650
+ # if parse is empty?(), we found all the fields on the line...
1651
+ if field_quotes % 2 == 0
1652
+ @lineno += 1
1653
+
1654
+ # save fields unconverted fields, if needed...
1655
+ unconverted = csv.dup if @unconverted_fields
1656
+
1657
+ # convert fields, if needed...
1658
+ csv = convert_fields(csv) unless @use_headers or @converters.empty?
1659
+ # parse out header rows and handle FasterCSV::Row conversions...
1660
+ csv = parse_headers(csv) if @use_headers
1661
+
1662
+ # inject unconverted fields and accessor, if requested...
1663
+ if @unconverted_fields and not csv.respond_to? :unconverted_fields
1664
+ add_unconverted_fields(csv, unconverted)
1665
+ end
1666
+
1667
+ # return the results
1668
+ break csv
1669
+ end
1670
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1671
+ if @io.eof?
1672
+ raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}." if @raise_exception
1673
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1674
+ raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1675
+ end
1676
+ # otherwise, we need to loop and pull some more data to complete the row
1677
+ end
1678
+ end
1679
+ alias_method :gets, :shift
1680
+ alias_method :readline, :shift
1681
+
1682
+ # Returns a simplified description of the key FasterCSV attributes.
1683
+ def inspect
1684
+ str = "<##{self.class} io_type:"
1685
+ # show type of wrapped IO
1686
+ if @io == $stdout then str << "$stdout"
1687
+ elsif @io == $stdin then str << "$stdin"
1688
+ elsif @io == $stderr then str << "$stderr"
1689
+ else str << @io.class.to_s
1690
+ end
1691
+ # show IO.path(), if available
1692
+ if @io.respond_to?(:path) and (p = @io.path)
1693
+ str << " io_path:#{p.inspect}"
1694
+ end
1695
+ # show other attributes
1696
+ %w[ lineno col_sep row_sep
1697
+ quote_char skip_blanks encoding ].each do |attr_name|
1698
+ if a = instance_variable_get("@#{attr_name}")
1699
+ str << " #{attr_name}:#{a.inspect}"
1700
+ end
1701
+ end
1702
+ if @use_headers
1703
+ str << " headers:#{(@headers || true).inspect}"
1704
+ end
1705
+ str << ">"
1706
+ end
1707
+
1708
+ private
1709
+
1710
+ #
1711
+ # Stores the indicated separators for later use.
1712
+ #
1713
+ # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1714
+ # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1715
+ # +STDERR+ and any stream open for output only with a default
1716
+ # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1717
+ #
1718
+ # This method also establishes the quoting rules used for CSV output.
1719
+ #
1720
+ def init_separators(options)
1721
+ # store the selected separators
1722
+ @col_sep = options.delete(:col_sep)
1723
+ @row_sep = options.delete(:row_sep)
1724
+ @quote_char = options.delete(:quote_char)
1725
+ @quote_and_newlines = "\r\n#{@quote_char}"
1726
+
1727
+ if @quote_char.length != 1
1728
+ raise ArgumentError, ":quote_char has to be a single character String"
1729
+ end
1730
+
1731
+ # automatically discover row separator when requested
1732
+ if @row_sep == :auto
1733
+ if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
1734
+ (defined?(Zlib) and @io.class == Zlib::GzipWriter)
1735
+ @row_sep = $INPUT_RECORD_SEPARATOR
1736
+ else
1737
+ begin
1738
+ raise IOError, "IO is nil" unless @io
1739
+ saved_pos = @io.pos # remember where we were
1740
+ while @row_sep == :auto
1741
+ #
1742
+ # if we run out of data, it's probably a single line
1743
+ # (use a sensible default)
1744
+ #
1745
+ if @io.eof?
1746
+ @row_sep = $INPUT_RECORD_SEPARATOR
1747
+ break
1748
+ end
1749
+
1750
+ # read ahead a bit
1751
+ sample = @io.read(1024)
1752
+ sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1753
+
1754
+ # try to find a standard separator
1755
+ if sample =~ /\r\n?|\n/
1756
+ @row_sep = $&
1757
+ break
1758
+ end
1759
+ end
1760
+ # tricky seek() clone to work around GzipReader's lack of seek()
1761
+ @io.rewind
1762
+ # reset back to the remembered position
1763
+ while saved_pos > 1024 # avoid loading a lot of data into memory
1764
+ @io.read(1024)
1765
+ saved_pos -= 1024
1766
+ end
1767
+ @io.read(saved_pos) if saved_pos.nonzero?
1768
+ rescue IOError # stream not opened for reading
1769
+ @row_sep = $INPUT_RECORD_SEPARATOR
1770
+ end
1771
+ end
1772
+ end
1773
+
1774
+ # establish quoting rules
1775
+ do_quote = lambda do |field|
1776
+ @quote_char +
1777
+ String(field).gsub(@quote_char, @quote_char * 2) +
1778
+ @quote_char
1779
+ end
1780
+ @quote = if options.delete(:force_quotes)
1781
+ do_quote
1782
+ else
1783
+ lambda do |field|
1784
+ if field.nil? # represent +nil+ fields as empty unquoted fields
1785
+ ""
1786
+ else
1787
+ field = String(field) # Stringify fields
1788
+ # represent empty fields as empty quoted fields
1789
+ if field.empty? or
1790
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1791
+ do_quote.call(field)
1792
+ else
1793
+ field # unquoted field
1794
+ end
1795
+ end
1796
+ end
1797
+ end
1798
+ end
1799
+
1800
+ # Pre-compiles parsers and stores them by name for access during reads.
1801
+ def init_parsers(options)
1802
+ # store the parser behaviors
1803
+ @skip_blanks = options.delete(:skip_blanks)
1804
+ @encoding = options.delete(:encoding) # nil will use $KCODE
1805
+ @field_size_limit = options.delete(:field_size_limit)
1806
+ @raise_exception = options.delete(:raise_exception)
1807
+
1808
+ # prebuild Regexps for faster parsing
1809
+ esc_col_sep = Regexp.escape(@col_sep)
1810
+ esc_row_sep = Regexp.escape(@row_sep)
1811
+ esc_quote = Regexp.escape(@quote_char)
1812
+ @parsers = {
1813
+ :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1814
+ Regexp::MULTILINE,
1815
+ @encoding ),
1816
+ :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1817
+ Regexp::MULTILINE,
1818
+ @encoding ),
1819
+ :stray_quote => Regexp.new( "[^#{esc_quote}]#{esc_quote}[^#{esc_quote}]",
1820
+ Regexp::MULTILINE,
1821
+ @encoding ),
1822
+ # safer than chomp!()
1823
+ :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1824
+ }
1825
+ end
1826
+
1827
+ #
1828
+ # Loads any converters requested during construction.
1829
+ #
1830
+ # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1831
+ # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1832
+ # are added instead.
1833
+ #
1834
+ # The <tt>:unconverted_fields</tt> option is also actived for
1835
+ # <tt>:converters</tt> calls, if requested.
1836
+ #
1837
+ def init_converters(options, field_name = :converters)
1838
+ if field_name == :converters
1839
+ @unconverted_fields = options.delete(:unconverted_fields)
1840
+ end
1841
+
1842
+ instance_variable_set("@#{field_name}", Array.new)
1843
+
1844
+ # find the correct method to add the coverters
1845
+ convert = method(field_name.to_s.sub(/ers\Z/, ""))
1846
+
1847
+ # load converters
1848
+ unless options[field_name].nil?
1849
+ # allow a single converter not wrapped in an Array
1850
+ unless options[field_name].is_a? Array
1851
+ options[field_name] = [options[field_name]]
1852
+ end
1853
+ # load each converter...
1854
+ options[field_name].each do |converter|
1855
+ if converter.is_a? Proc # custom code block
1856
+ convert.call(&converter)
1857
+ else # by name
1858
+ convert.call(converter)
1859
+ end
1860
+ end
1861
+ end
1862
+
1863
+ options.delete(field_name)
1864
+ end
1865
+
1866
+ # Stores header row settings and loads header converters, if needed.
1867
+ def init_headers(options)
1868
+ @use_headers = options.delete(:headers)
1869
+ @return_headers = options.delete(:return_headers)
1870
+ @write_headers = options.delete(:write_headers)
1871
+
1872
+ # headers must be delayed until shift(), in case they need a row of content
1873
+ @headers = nil
1874
+
1875
+ init_converters(options, :header_converters)
1876
+ end
1877
+
1878
+ #
1879
+ # The actual work method for adding converters, used by both
1880
+ # FasterCSV.convert() and FasterCSV.header_convert().
1881
+ #
1882
+ # This method requires the +var_name+ of the instance variable to place the
1883
+ # converters in, the +const+ Hash to lookup named converters in, and the
1884
+ # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1885
+ # methods.
1886
+ #
1887
+ def add_converter(var_name, const, name = nil, &converter)
1888
+ if name.nil? # custom converter
1889
+ instance_variable_get("@#{var_name}") << converter
1890
+ else # named converter
1891
+ combo = const[name]
1892
+ case combo
1893
+ when Array # combo converter
1894
+ combo.each do |converter_name|
1895
+ add_converter(var_name, const, converter_name)
1896
+ end
1897
+ else # individual named converter
1898
+ instance_variable_get("@#{var_name}") << combo
1899
+ end
1900
+ end
1901
+ end
1902
+
1903
+ #
1904
+ # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1905
+ # if +headers+ is passed as +true+, returning the converted field set. Any
1906
+ # converter that changes the field into something other than a String halts
1907
+ # the pipeline of conversion for that field. This is primarily an efficiency
1908
+ # shortcut.
1909
+ #
1910
+ def convert_fields(fields, headers = false)
1911
+ # see if we are converting headers or fields
1912
+ converters = headers ? @header_converters : @converters
1913
+
1914
+ fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1915
+ converters.each do |converter|
1916
+ field = if converter.arity == 1 # straight field converter
1917
+ converter[field]
1918
+ else # FieldInfo converter
1919
+ header = @use_headers && !headers ? @headers[index] : nil
1920
+ converter[field, FieldInfo.new(index, lineno, header)]
1921
+ end
1922
+ break unless field.is_a? String # short-curcuit pipeline for speed
1923
+ end
1924
+ field # return final state of each field, converted or original
1925
+ end
1926
+ end
1927
+
1928
+ #
1929
+ # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1930
+ # rows are also dealt with here, either by returning a FasterCSV::Row with
1931
+ # identical headers and fields (save that the fields do not go through the
1932
+ # converters) or by reading past them to return a field row. Headers are also
1933
+ # saved in <tt>@headers</tt> for use in future rows.
1934
+ #
1935
+ # When +nil+, +row+ is assumed to be a header row not based on an actual row
1936
+ # of the stream.
1937
+ #
1938
+ def parse_headers(row = nil)
1939
+ if @headers.nil? # header row
1940
+ @headers = case @use_headers # save headers
1941
+ # Array of headers
1942
+ when Array then @use_headers
1943
+ # CSV header String
1944
+ when String
1945
+ self.class.parse_line( @use_headers,
1946
+ :col_sep => @col_sep,
1947
+ :row_sep => @row_sep,
1948
+ :quote_char => @quote_char )
1949
+ # first row is headers
1950
+ else row
1951
+ end
1952
+
1953
+ # prepare converted and unconverted copies
1954
+ row = @headers if row.nil?
1955
+ @headers = convert_fields(@headers, true)
1956
+
1957
+ if @return_headers # return headers
1958
+ return FasterCSV::Row.new(@headers, row, true)
1959
+ elsif not [Array, String].include? @use_headers.class # skip to field row
1960
+ return shift
1961
+ end
1962
+ end
1963
+
1964
+ FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1965
+ end
1966
+
1967
+ #
1968
+ # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1969
+ # +row+ and an accessor method for it called unconverted_fields(). The
1970
+ # variable is set to the contents of +fields+.
1971
+ #
1972
+ def add_unconverted_fields(row, fields)
1973
+ class << row
1974
+ attr_reader :unconverted_fields
1975
+ end
1976
+ row.instance_eval { @unconverted_fields = fields }
1977
+ row
1978
+ end
1979
+ end
1980
+
1981
+ # Another name for FasterCSV.
1982
+ FCSV = FasterCSV
1983
+
1984
+ # Another name for FasterCSV::instance().
1985
+ def FasterCSV(*args, &block)
1986
+ FasterCSV.instance(*args, &block)
1987
+ end
1988
+
1989
+ # Another name for FCSV::instance().
1990
+ def FCSV(*args, &block)
1991
+ FCSV.instance(*args, &block)
1992
+ end
1993
+
1994
+ class Array
1995
+ # Equivalent to <tt>FasterCSV::generate_line(self, options)</tt>.
1996
+ def to_csv(options = Hash.new)
1997
+ FasterCSV.generate_line(self, options)
1998
+ end
1999
+ end
2000
+
2001
+ class String
2002
+ # Equivalent to <tt>FasterCSV::parse_line(self, options)</tt>.
2003
+ def parse_csv(options = Hash.new)
2004
+ FasterCSV.parse_line(self, options)
2005
+ end
2006
+ end