glennfu-faster_csv 1.5.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ Quantity,Product Description,Price
2
+ 1,Text Editor,25.00
3
+ 2,MacBook Pros,2499.00
@@ -0,0 +1,36 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # shortcut_interface.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-04-01.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+ #
8
+ # Feature implementation and example code by Ara.T.Howard.
9
+
10
+ require "faster_csv"
11
+
12
+ #
13
+ # So now it's this easy to write to STDOUT.
14
+ #
15
+ FCSV { |f| f << %w( a b c) << %w( d e f ) }
16
+
17
+ #
18
+ # Writing to a String.
19
+ #
20
+ FCSV(csv = '') do |f|
21
+ f << %w( q r s )
22
+ f << %w( x y z )
23
+ end
24
+ puts csv
25
+
26
+ #
27
+ # Writing to STDERR.
28
+ #
29
+ FCSV(STDERR) do |f|
30
+ f << %w( 0 1 2 )
31
+ f << %w( A B C )
32
+ end
33
+ # >> a,b,c
34
+ # >> d,e,f
35
+ # >> q,r,s
36
+ # >> x,y,z
data/lib/faster_csv.rb ADDED
@@ -0,0 +1,2021 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # = faster_csv.rb -- Faster CSV Reading and Writing
4
+ #
5
+ # Created by James Edward Gray II on 2005-10-31.
6
+ # Copyright 2005 Gray Productions. All rights reserved.
7
+ #
8
+ # See FasterCSV for documentation.
9
+
10
+ if RUBY_VERSION >= "1.9"
11
+ class FasterCSV
12
+ def self.const_missing(*_)
13
+ raise NotImplementedError, "Please switch to Ruby 1.9's standard CSV " +
14
+ "library. It's FasterCSV plus support for " +
15
+ "Ruby 1.9's m17n encoding engine."
16
+ end
17
+
18
+ def self.method_missing(*_)
19
+ const_missing
20
+ end
21
+
22
+ def method_missing(*_)
23
+ self.class.const_missing
24
+ end
25
+ end
26
+ else
27
+ require "forwardable"
28
+ require "English"
29
+ require "enumerator"
30
+ require "date"
31
+ require "stringio"
32
+
33
+ #
34
+ # This class provides a complete interface to CSV files and data. It offers
35
+ # tools to enable you to read and write to and from Strings or IO objects, as
36
+ # needed.
37
+ #
38
+ # == Reading
39
+ #
40
+ # === From a File
41
+ #
42
+ # ==== A Line at a Time
43
+ #
44
+ # FasterCSV.foreach("path/to/file.csv") do |row|
45
+ # # use row here...
46
+ # end
47
+ #
48
+ # ==== All at Once
49
+ #
50
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
51
+ #
52
+ # === From a String
53
+ #
54
+ # ==== A Line at a Time
55
+ #
56
+ # FasterCSV.parse("CSV,data,String") do |row|
57
+ # # use row here...
58
+ # end
59
+ #
60
+ # ==== All at Once
61
+ #
62
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
63
+ #
64
+ # == Writing
65
+ #
66
+ # === To a File
67
+ #
68
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
69
+ # csv << ["row", "of", "CSV", "data"]
70
+ # csv << ["another", "row"]
71
+ # # ...
72
+ # end
73
+ #
74
+ # === To a String
75
+ #
76
+ # csv_string = FasterCSV.generate do |csv|
77
+ # csv << ["row", "of", "CSV", "data"]
78
+ # csv << ["another", "row"]
79
+ # # ...
80
+ # end
81
+ #
82
+ # == Convert a Single Line
83
+ #
84
+ # csv_string = ["CSV", "data"].to_csv # to CSV
85
+ # csv_array = "CSV,String".parse_csv # from CSV
86
+ #
87
+ # == Shortcut Interface
88
+ #
89
+ # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
90
+ # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
91
+ # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
92
+ # FCSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
93
+ #
94
+ # == Advanced Usage
95
+ #
96
+ # === Wrap an IO Object
97
+ #
98
+ # csv = FCSV.new(io, options)
99
+ # # ... read (with gets() or each()) from and write (with <<) to csv here ...
100
+ #
101
+ class FasterCSV
102
+ # The version of the installed library.
103
+ VERSION = "1.5.5".freeze
104
+
105
+ #
106
+ # A FasterCSV::Row is part Array and part Hash. It retains an order for the
107
+ # fields and allows duplicates just as an Array would, but also allows you to
108
+ # access fields by name just as you could if they were in a Hash.
109
+ #
110
+ # All rows returned by FasterCSV will be constructed from this class, if
111
+ # header row processing is activated.
112
+ #
113
+ class Row
114
+ #
115
+ # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
116
+ # expected to be Arrays. If one Array is shorter than the other, it will be
117
+ # padded with +nil+ objects.
118
+ #
119
+ # The optional +header_row+ parameter can be set to +true+ to indicate, via
120
+ # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
121
+ # a header row. Otherwise, the row is assumes to be a field row.
122
+ #
123
+ # A FasterCSV::Row object supports the following Array methods through
124
+ # delegation:
125
+ #
126
+ # * empty?()
127
+ # * length()
128
+ # * size()
129
+ #
130
+ def initialize(headers, fields, header_row = false)
131
+ @header_row = header_row
132
+
133
+ # handle extra headers or fields
134
+ @row = if headers.size > fields.size
135
+ headers.zip(fields)
136
+ else
137
+ fields.zip(headers).map { |pair| pair.reverse }
138
+ end
139
+ end
140
+
141
+ # Internal data format used to compare equality.
142
+ attr_reader :row
143
+ protected :row
144
+
145
+ ### Array Delegation ###
146
+
147
+ extend Forwardable
148
+ def_delegators :@row, :empty?, :length, :size
149
+
150
+ # Returns +true+ if this is a header row.
151
+ def header_row?
152
+ @header_row
153
+ end
154
+
155
+ # Returns +true+ if this is a field row.
156
+ def field_row?
157
+ not header_row?
158
+ end
159
+
160
+ # Returns the headers of this row.
161
+ def headers
162
+ @row.map { |pair| pair.first }
163
+ end
164
+
165
+ #
166
+ # :call-seq:
167
+ # field( header )
168
+ # field( header, offset )
169
+ # field( index )
170
+ #
171
+ # This method will fetch the field value by +header+ or +index+. If a field
172
+ # is not found, +nil+ is returned.
173
+ #
174
+ # When provided, +offset+ ensures that a header match occurrs on or later
175
+ # than the +offset+ index. You can use this to find duplicate headers,
176
+ # without resorting to hard-coding exact indices.
177
+ #
178
+ def field(header_or_index, minimum_index = 0)
179
+ # locate the pair
180
+ finder = header_or_index.is_a?(Integer) ? :[] : :assoc
181
+ pair = @row[minimum_index..-1].send(finder, header_or_index)
182
+
183
+ # return the field if we have a pair
184
+ pair.nil? ? nil : pair.last
185
+ end
186
+ alias_method :[], :field
187
+
188
+ #
189
+ # :call-seq:
190
+ # []=( header, value )
191
+ # []=( header, offset, value )
192
+ # []=( index, value )
193
+ #
194
+ # Looks up the field by the semantics described in FasterCSV::Row.field()
195
+ # and assigns the +value+.
196
+ #
197
+ # Assigning past the end of the row with an index will set all pairs between
198
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
199
+ # pair.
200
+ #
201
+ def []=(*args)
202
+ value = args.pop
203
+
204
+ if args.first.is_a? Integer
205
+ if @row[args.first].nil? # extending past the end with index
206
+ @row[args.first] = [nil, value]
207
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
208
+ else # normal index assignment
209
+ @row[args.first][1] = value
210
+ end
211
+ else
212
+ index = index(*args)
213
+ if index.nil? # appending a field
214
+ self << [args.first, value]
215
+ else # normal header assignment
216
+ @row[index][1] = value
217
+ end
218
+ end
219
+ end
220
+
221
+ #
222
+ # :call-seq:
223
+ # <<( field )
224
+ # <<( header_and_field_array )
225
+ # <<( header_and_field_hash )
226
+ #
227
+ # If a two-element Array is provided, it is assumed to be a header and field
228
+ # and the pair is appended. A Hash works the same way with the key being
229
+ # the header and the value being the field. Anything else is assumed to be
230
+ # a lone field which is appended with a +nil+ header.
231
+ #
232
+ # This method returns the row for chaining.
233
+ #
234
+ def <<(arg)
235
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
236
+ @row << arg
237
+ elsif arg.is_a?(Hash) # append header and name pairs
238
+ arg.each { |pair| @row << pair }
239
+ else # append field value
240
+ @row << [nil, arg]
241
+ end
242
+
243
+ self # for chaining
244
+ end
245
+
246
+ #
247
+ # A shortcut for appending multiple fields. Equivalent to:
248
+ #
249
+ # args.each { |arg| faster_csv_row << arg }
250
+ #
251
+ # This method returns the row for chaining.
252
+ #
253
+ def push(*args)
254
+ args.each { |arg| self << arg }
255
+
256
+ self # for chaining
257
+ end
258
+
259
+ #
260
+ # :call-seq:
261
+ # delete( header )
262
+ # delete( header, offset )
263
+ # delete( index )
264
+ #
265
+ # Used to remove a pair from the row by +header+ or +index+. The pair is
266
+ # located as described in FasterCSV::Row.field(). The deleted pair is
267
+ # returned, or +nil+ if a pair could not be found.
268
+ #
269
+ def delete(header_or_index, minimum_index = 0)
270
+ if header_or_index.is_a? Integer # by index
271
+ @row.delete_at(header_or_index)
272
+ elsif i = index(header_or_index, minimum_index) # by header
273
+ @row.delete_at(i)
274
+ else
275
+ [ ]
276
+ end
277
+ end
278
+
279
+ #
280
+ # The provided +block+ is passed a header and field for each pair in the row
281
+ # and expected to return +true+ or +false+, depending on whether the pair
282
+ # should be deleted.
283
+ #
284
+ # This method returns the row for chaining.
285
+ #
286
+ def delete_if(&block)
287
+ @row.delete_if(&block)
288
+
289
+ self # for chaining
290
+ end
291
+
292
+ #
293
+ # This method accepts any number of arguments which can be headers, indices,
294
+ # Ranges of either, or two-element Arrays containing a header and offset.
295
+ # Each argument will be replaced with a field lookup as described in
296
+ # FasterCSV::Row.field().
297
+ #
298
+ # If called with no arguments, all fields are returned.
299
+ #
300
+ def fields(*headers_and_or_indices)
301
+ if headers_and_or_indices.empty? # return all fields--no arguments
302
+ @row.map { |pair| pair.last }
303
+ else # or work like values_at()
304
+ headers_and_or_indices.inject(Array.new) do |all, h_or_i|
305
+ all + if h_or_i.is_a? Range
306
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
307
+ index(h_or_i.begin)
308
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
309
+ index(h_or_i.end)
310
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
311
+ (index_begin..index_end)
312
+ fields.values_at(new_range)
313
+ else
314
+ [field(*Array(h_or_i))]
315
+ end
316
+ end
317
+ end
318
+ end
319
+ alias_method :values_at, :fields
320
+
321
+ #
322
+ # :call-seq:
323
+ # index( header )
324
+ # index( header, offset )
325
+ #
326
+ # This method will return the index of a field with the provided +header+.
327
+ # The +offset+ can be used to locate duplicate header names, as described in
328
+ # FasterCSV::Row.field().
329
+ #
330
+ def index(header, minimum_index = 0)
331
+ # find the pair
332
+ index = headers[minimum_index..-1].index(header)
333
+ # return the index at the right offset, if we found one
334
+ index.nil? ? nil : index + minimum_index
335
+ end
336
+
337
+ # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
338
+ def header?(name)
339
+ headers.include? name
340
+ end
341
+ alias_method :include?, :header?
342
+
343
+ #
344
+ # Returns +true+ if +data+ matches a field in this row, and +false+
345
+ # otherwise.
346
+ #
347
+ def field?(data)
348
+ fields.include? data
349
+ end
350
+
351
+ include Enumerable
352
+
353
+ #
354
+ # Yields each pair of the row as header and field tuples (much like
355
+ # iterating over a Hash).
356
+ #
357
+ # Support for Enumerable.
358
+ #
359
+ # This method returns the row for chaining.
360
+ #
361
+ def each(&block)
362
+ @row.each(&block)
363
+
364
+ self # for chaining
365
+ end
366
+
367
+ #
368
+ # Returns +true+ if this row contains the same headers and fields in the
369
+ # same order as +other+.
370
+ #
371
+ def ==(other)
372
+ @row == other.row
373
+ end
374
+
375
+ #
376
+ # Collapses the row into a simple Hash. Be warning that this discards field
377
+ # order and clobbers duplicate fields.
378
+ #
379
+ def to_hash
380
+ # flatten just one level of the internal Array
381
+ Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
382
+ end
383
+
384
+ #
385
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
386
+ #
387
+ # faster_csv_row.fields.to_csv( options )
388
+ #
389
+ def to_csv(options = Hash.new)
390
+ fields.to_csv(options)
391
+ end
392
+ alias_method :to_s, :to_csv
393
+
394
+ # A summary of fields, by header.
395
+ def inspect
396
+ str = "#<#{self.class}"
397
+ each do |header, field|
398
+ str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
399
+ field.inspect
400
+ end
401
+ str << ">"
402
+ end
403
+ end
404
+
405
+ #
406
+ # A FasterCSV::Table is a two-dimensional data structure for representing CSV
407
+ # documents. Tables allow you to work with the data by row or column,
408
+ # manipulate the data, and even convert the results back to CSV, if needed.
409
+ #
410
+ # All tables returned by FasterCSV will be constructed from this class, if
411
+ # header row processing is activated.
412
+ #
413
+ class Table
414
+ #
415
+ # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
416
+ # to be FasterCSV::Row objects. All rows are assumed to have the same
417
+ # headers.
418
+ #
419
+ # A FasterCSV::Table object supports the following Array methods through
420
+ # delegation:
421
+ #
422
+ # * empty?()
423
+ # * length()
424
+ # * size()
425
+ #
426
+ def initialize(array_of_rows)
427
+ @table = array_of_rows
428
+ @mode = :col_or_row
429
+ end
430
+
431
+ # The current access mode for indexing and iteration.
432
+ attr_reader :mode
433
+
434
+ # Internal data format used to compare equality.
435
+ attr_reader :table
436
+ protected :table
437
+
438
+ ### Array Delegation ###
439
+
440
+ extend Forwardable
441
+ def_delegators :@table, :empty?, :length, :size
442
+
443
+ #
444
+ # Returns a duplicate table object, in column mode. This is handy for
445
+ # chaining in a single call without changing the table mode, but be aware
446
+ # that this method can consume a fair amount of memory for bigger data sets.
447
+ #
448
+ # This method returns the duplicate table for chaining. Don't chain
449
+ # destructive methods (like []=()) this way though, since you are working
450
+ # with a duplicate.
451
+ #
452
+ def by_col
453
+ self.class.new(@table.dup).by_col!
454
+ end
455
+
456
+ #
457
+ # Switches the mode of this table to column mode. All calls to indexing and
458
+ # iteration methods will work with columns until the mode is changed again.
459
+ #
460
+ # This method returns the table and is safe to chain.
461
+ #
462
+ def by_col!
463
+ @mode = :col
464
+
465
+ self
466
+ end
467
+
468
+ #
469
+ # Returns a duplicate table object, in mixed mode. This is handy for
470
+ # chaining in a single call without changing the table mode, but be aware
471
+ # that this method can consume a fair amount of memory for bigger data sets.
472
+ #
473
+ # This method returns the duplicate table for chaining. Don't chain
474
+ # destructive methods (like []=()) this way though, since you are working
475
+ # with a duplicate.
476
+ #
477
+ def by_col_or_row
478
+ self.class.new(@table.dup).by_col_or_row!
479
+ end
480
+
481
+ #
482
+ # Switches the mode of this table to mixed mode. All calls to indexing and
483
+ # iteration methods will use the default intelligent indexing system until
484
+ # the mode is changed again. In mixed mode an index is assumed to be a row
485
+ # reference while anything else is assumed to be column access by headers.
486
+ #
487
+ # This method returns the table and is safe to chain.
488
+ #
489
+ def by_col_or_row!
490
+ @mode = :col_or_row
491
+
492
+ self
493
+ end
494
+
495
+ #
496
+ # Returns a duplicate table object, in row mode. This is handy for chaining
497
+ # in a single call without changing the table mode, but be aware that this
498
+ # method can consume a fair amount of memory for bigger data sets.
499
+ #
500
+ # This method returns the duplicate table for chaining. Don't chain
501
+ # destructive methods (like []=()) this way though, since you are working
502
+ # with a duplicate.
503
+ #
504
+ def by_row
505
+ self.class.new(@table.dup).by_row!
506
+ end
507
+
508
+ #
509
+ # Switches the mode of this table to row mode. All calls to indexing and
510
+ # iteration methods will work with rows until the mode is changed again.
511
+ #
512
+ # This method returns the table and is safe to chain.
513
+ #
514
+ def by_row!
515
+ @mode = :row
516
+
517
+ self
518
+ end
519
+
520
+ #
521
+ # Returns the headers for the first row of this table (assumed to match all
522
+ # other rows). An empty Array is returned for empty tables.
523
+ #
524
+ def headers
525
+ if @table.empty?
526
+ Array.new
527
+ else
528
+ @table.first.headers
529
+ end
530
+ end
531
+
532
+ #
533
+ # In the default mixed mode, this method returns rows for index access and
534
+ # columns for header access. You can force the index association by first
535
+ # calling by_col!() or by_row!().
536
+ #
537
+ # Columns are returned as an Array of values. Altering that Array has no
538
+ # effect on the table.
539
+ #
540
+ def [](index_or_header)
541
+ if @mode == :row or # by index
542
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
543
+ @table[index_or_header]
544
+ else # by header
545
+ @table.map { |row| row[index_or_header] }
546
+ end
547
+ end
548
+
549
+ #
550
+ # In the default mixed mode, this method assigns rows for index access and
551
+ # columns for header access. You can force the index association by first
552
+ # calling by_col!() or by_row!().
553
+ #
554
+ # Rows may be set to an Array of values (which will inherit the table's
555
+ # headers()) or a FasterCSV::Row.
556
+ #
557
+ # Columns may be set to a single value, which is copied to each row of the
558
+ # column, or an Array of values. Arrays of values are assigned to rows top
559
+ # to bottom in row major order. Excess values are ignored and if the Array
560
+ # does not have a value for each row the extra rows will receive a +nil+.
561
+ #
562
+ # Assigning to an existing column or row clobbers the data. Assigning to
563
+ # new columns creates them at the right end of the table.
564
+ #
565
+ def []=(index_or_header, value)
566
+ if @mode == :row or # by index
567
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
568
+ if value.is_a? Array
569
+ @table[index_or_header] = Row.new(headers, value)
570
+ else
571
+ @table[index_or_header] = value
572
+ end
573
+ else # set column
574
+ if value.is_a? Array # multiple values
575
+ @table.each_with_index do |row, i|
576
+ if row.header_row?
577
+ row[index_or_header] = index_or_header
578
+ else
579
+ row[index_or_header] = value[i]
580
+ end
581
+ end
582
+ else # repeated value
583
+ @table.each do |row|
584
+ if row.header_row?
585
+ row[index_or_header] = index_or_header
586
+ else
587
+ row[index_or_header] = value
588
+ end
589
+ end
590
+ end
591
+ end
592
+ end
593
+
594
+ #
595
+ # The mixed mode default is to treat a list of indices as row access,
596
+ # returning the rows indicated. Anything else is considered columnar
597
+ # access. For columnar access, the return set has an Array for each row
598
+ # with the values indicated by the headers in each Array. You can force
599
+ # column or row mode using by_col!() or by_row!().
600
+ #
601
+ # You cannot mix column and row access.
602
+ #
603
+ def values_at(*indices_or_headers)
604
+ if @mode == :row or # by indices
605
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
606
+ index.is_a?(Integer) or
607
+ ( index.is_a?(Range) and
608
+ index.first.is_a?(Integer) and
609
+ index.last.is_a?(Integer) )
610
+ end )
611
+ @table.values_at(*indices_or_headers)
612
+ else # by headers
613
+ @table.map { |row| row.values_at(*indices_or_headers) }
614
+ end
615
+ end
616
+
617
+ #
618
+ # Adds a new row to the bottom end of this table. You can provide an Array,
619
+ # which will be converted to a FasterCSV::Row (inheriting the table's
620
+ # headers()), or a FasterCSV::Row.
621
+ #
622
+ # This method returns the table for chaining.
623
+ #
624
+ def <<(row_or_array)
625
+ if row_or_array.is_a? Array # append Array
626
+ @table << Row.new(headers, row_or_array)
627
+ else # append Row
628
+ @table << row_or_array
629
+ end
630
+
631
+ self # for chaining
632
+ end
633
+
634
+ #
635
+ # A shortcut for appending multiple rows. Equivalent to:
636
+ #
637
+ # rows.each { |row| self << row }
638
+ #
639
+ # This method returns the table for chaining.
640
+ #
641
+ def push(*rows)
642
+ rows.each { |row| self << row }
643
+
644
+ self # for chaining
645
+ end
646
+
647
+ #
648
+ # Removes and returns the indicated column or row. In the default mixed
649
+ # mode indices refer to rows and everything else is assumed to be a column
650
+ # header. Use by_col!() or by_row!() to force the lookup.
651
+ #
652
+ def delete(index_or_header)
653
+ if @mode == :row or # by index
654
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
655
+ @table.delete_at(index_or_header)
656
+ else # by header
657
+ @table.map { |row| row.delete(index_or_header).last }
658
+ end
659
+ end
660
+
661
+ #
662
+ # Removes any column or row for which the block returns +true+. In the
663
+ # default mixed mode or row mode, iteration is the standard row major
664
+ # walking of rows. In column mode, interation will +yield+ two element
665
+ # tuples containing the column name and an Array of values for that column.
666
+ #
667
+ # This method returns the table for chaining.
668
+ #
669
+ def delete_if(&block)
670
+ if @mode == :row or @mode == :col_or_row # by index
671
+ @table.delete_if(&block)
672
+ else # by header
673
+ to_delete = Array.new
674
+ headers.each_with_index do |header, i|
675
+ to_delete << header if block[[header, self[header]]]
676
+ end
677
+ to_delete.map { |header| delete(header) }
678
+ end
679
+
680
+ self # for chaining
681
+ end
682
+
683
+ include Enumerable
684
+
685
+ #
686
+ # In the default mixed mode or row mode, iteration is the standard row major
687
+ # walking of rows. In column mode, interation will +yield+ two element
688
+ # tuples containing the column name and an Array of values for that column.
689
+ #
690
+ # This method returns the table for chaining.
691
+ #
692
+ def each(&block)
693
+ if @mode == :col
694
+ headers.each { |header| block[[header, self[header]]] }
695
+ else
696
+ @table.each(&block)
697
+ end
698
+
699
+ self # for chaining
700
+ end
701
+
702
+ # Returns +true+ if all rows of this table ==() +other+'s rows.
703
+ def ==(other)
704
+ @table == other.table
705
+ end
706
+
707
+ #
708
+ # Returns the table as an Array of Arrays. Headers will be the first row,
709
+ # then all of the field rows will follow.
710
+ #
711
+ def to_a
712
+ @table.inject([headers]) do |array, row|
713
+ if row.header_row?
714
+ array
715
+ else
716
+ array + [row.fields]
717
+ end
718
+ end
719
+ end
720
+
721
+ #
722
+ # Returns the table as a complete CSV String. Headers will be listed first,
723
+ # then all of the field rows.
724
+ #
725
+ # This method assumes you want the Table.headers(), unless you explicitly
726
+ # pass <tt>:write_headers => false</tt>.
727
+ #
728
+ def to_csv(options = Hash.new)
729
+ wh = options.fetch(:write_headers, true)
730
+ @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
731
+ if row.header_row?
732
+ rows
733
+ else
734
+ rows + [row.fields.to_csv(options)]
735
+ end
736
+ end.join
737
+ end
738
+ alias_method :to_s, :to_csv
739
+
740
+ def inspect
741
+ "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
742
+ end
743
+ end
744
+
745
+ # The error thrown when the parser encounters illegal CSV formatting.
746
+ class MalformedCSVError < RuntimeError; end
747
+
748
+ #
749
+ # A FieldInfo Struct contains details about a field's position in the data
750
+ # source it was read from. FasterCSV will pass this Struct to some blocks
751
+ # that make decisions based on field structure. See
752
+ # FasterCSV.convert_fields() for an example.
753
+ #
754
+ # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
755
+ # <b><tt>line</tt></b>:: The line of the data source this row is from.
756
+ # <b><tt>header</tt></b>:: The header for the column, when available.
757
+ #
758
+ FieldInfo = Struct.new(:index, :line, :header)
759
+
760
+ # A Regexp used to find and convert some common Date formats.
761
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
762
+ \d{4}-\d{2}-\d{2} )\z /x
763
+ # A Regexp used to find and convert some common DateTime formats.
764
+ DateTimeMatcher =
765
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
766
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
767
+ #
768
+ # This Hash holds the built-in converters of FasterCSV that can be accessed by
769
+ # name. You can select Converters with FasterCSV.convert() or through the
770
+ # +options+ Hash passed to FasterCSV::new().
771
+ #
772
+ # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
773
+ # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
774
+ # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
775
+ # and <tt>:float</tt>.
776
+ # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
777
+ # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
778
+ # <b><tt>:all</tt></b>:: All built-in converters. A combination of
779
+ # <tt>:date_time</tt> and <tt>:numeric</tt>.
780
+ #
781
+ # This Hash is intetionally left unfrozen and users should feel free to add
782
+ # values to it that can be accessed by all FasterCSV objects.
783
+ #
784
+ # To add a combo field, the value should be an Array of names. Combo fields
785
+ # can be nested with other combo fields.
786
+ #
787
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
788
+ :float => lambda { |f| Float(f) rescue f },
789
+ :numeric => [:integer, :float],
790
+ :date => lambda { |f|
791
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
792
+ },
793
+ :date_time => lambda { |f|
794
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
795
+ },
796
+ :all => [:date_time, :numeric] }
797
+
798
+ #
799
+ # This Hash holds the built-in header converters of FasterCSV that can be
800
+ # accessed by name. You can select HeaderConverters with
801
+ # FasterCSV.header_convert() or through the +options+ Hash passed to
802
+ # FasterCSV::new().
803
+ #
804
+ # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
805
+ # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
806
+ # replaced with underscores, non-word characters
807
+ # are dropped, and finally to_sym() is called.
808
+ #
809
+ # This Hash is intetionally left unfrozen and users should feel free to add
810
+ # values to it that can be accessed by all FasterCSV objects.
811
+ #
812
+ # To add a combo field, the value should be an Array of names. Combo fields
813
+ # can be nested with other combo fields.
814
+ #
815
+ HeaderConverters = {
816
+ :downcase => lambda { |h| h.downcase },
817
+ :symbol => lambda { |h|
818
+ h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
819
+ }
820
+ }
821
+
822
+ #
823
+ # The options used when no overrides are given by calling code. They are:
824
+ #
825
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
826
+ # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
827
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
828
+ # <b><tt>:converters</tt></b>:: +nil+
829
+ # <b><tt>:unconverted_fields</tt></b>:: +nil+
830
+ # <b><tt>:headers</tt></b>:: +false+
831
+ # <b><tt>:return_headers</tt></b>:: +false+
832
+ # <b><tt>:header_converters</tt></b>:: +nil+
833
+ # <b><tt>:skip_blanks</tt></b>:: +false+
834
+ # <b><tt>:force_quotes</tt></b>:: +false+
835
+ #
836
+ DEFAULT_OPTIONS = { :col_sep => ",",
837
+ :row_sep => :auto,
838
+ :quote_char => '"',
839
+ :converters => nil,
840
+ :unconverted_fields => nil,
841
+ :headers => false,
842
+ :return_headers => false,
843
+ :header_converters => nil,
844
+ :skip_blanks => false,
845
+ :force_quotes => false }.freeze
846
+
847
+ #
848
+ # This method will build a drop-in replacement for many of the standard CSV
849
+ # methods. It allows you to write code like:
850
+ #
851
+ # begin
852
+ # require "faster_csv"
853
+ # FasterCSV.build_csv_interface
854
+ # rescue LoadError
855
+ # require "csv"
856
+ # end
857
+ # # ... use CSV here ...
858
+ #
859
+ # This is not a complete interface with completely identical behavior.
860
+ # However, it is intended to be close enough that you won't notice the
861
+ # difference in most cases. CSV methods supported are:
862
+ #
863
+ # * foreach()
864
+ # * generate_line()
865
+ # * open()
866
+ # * parse()
867
+ # * parse_line()
868
+ # * readlines()
869
+ #
870
+ # Be warned that this interface is slower than vanilla FasterCSV due to the
871
+ # extra layer of method calls. Depending on usage, this can slow it down to
872
+ # near CSV speeds.
873
+ #
874
+ def self.build_csv_interface
875
+ Object.const_set(:CSV, Class.new).class_eval do
876
+ def self.foreach(path, rs = :auto, &block) # :nodoc:
877
+ FasterCSV.foreach(path, :row_sep => rs, &block)
878
+ end
879
+
880
+ def self.generate_line(row, fs = ",", rs = "") # :nodoc:
881
+ FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
882
+ end
883
+
884
+ def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
885
+ if block and mode.include? "r"
886
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
887
+ csv.each(&block)
888
+ end
889
+ else
890
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
891
+ end
892
+ end
893
+
894
+ def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
895
+ FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
896
+ end
897
+
898
+ def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
899
+ FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
900
+ end
901
+
902
+ def self.readlines(path, rs = :auto) # :nodoc:
903
+ FasterCSV.readlines(path, :row_sep => rs)
904
+ end
905
+ end
906
+ end
907
+
908
+ #
909
+ # This method allows you to serialize an Array of Ruby objects to a String or
910
+ # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
911
+ # useful for spreadsheet and database interaction.
912
+ #
913
+ # Out of the box, this method is intended to work with simple data objects or
914
+ # Structs. It will serialize a list of instance variables and/or
915
+ # Struct.members().
916
+ #
917
+ # If you need need more complicated serialization, you can control the process
918
+ # by adding methods to the class to be serialized.
919
+ #
920
+ # A class method csv_meta() is responsible for returning the first row of the
921
+ # document (as an Array). This row is considered to be a Hash of the form
922
+ # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
923
+ # key with a value of the stringified class name and FasterCSV::dump() will
924
+ # create this, if you do not define this method. This method is only called
925
+ # on the first object of the Array.
926
+ #
927
+ # The next method you can provide is an instance method called csv_headers().
928
+ # This method is expected to return the second line of the document (again as
929
+ # an Array), which is to be used to give each column a header. By default,
930
+ # FasterCSV::load() will set an instance variable if the field header starts
931
+ # with an @ character or call send() passing the header as the method name and
932
+ # the field value as an argument. This method is only called on the first
933
+ # object of the Array.
934
+ #
935
+ # Finally, you can provide an instance method called csv_dump(), which will
936
+ # be passed the headers. This should return an Array of fields that can be
937
+ # serialized for this object. This method is called once for every object in
938
+ # the Array.
939
+ #
940
+ # The +io+ parameter can be used to serialize to a File, and +options+ can be
941
+ # anything FasterCSV::new() accepts.
942
+ #
943
+ def self.dump(ary_of_objs, io = "", options = Hash.new)
944
+ obj_template = ary_of_objs.first
945
+
946
+ csv = FasterCSV.new(io, options)
947
+
948
+ # write meta information
949
+ begin
950
+ csv << obj_template.class.csv_meta
951
+ rescue NoMethodError
952
+ csv << [:class, obj_template.class]
953
+ end
954
+
955
+ # write headers
956
+ begin
957
+ headers = obj_template.csv_headers
958
+ rescue NoMethodError
959
+ headers = obj_template.instance_variables.sort
960
+ if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
961
+ headers += obj_template.members.map { |mem| "#{mem}=" }.sort
962
+ end
963
+ end
964
+ csv << headers
965
+
966
+ # serialize each object
967
+ ary_of_objs.each do |obj|
968
+ begin
969
+ csv << obj.csv_dump(headers)
970
+ rescue NoMethodError
971
+ csv << headers.map do |var|
972
+ if var[0] == ?@
973
+ obj.instance_variable_get(var)
974
+ else
975
+ obj[var[0..-2]]
976
+ end
977
+ end
978
+ end
979
+ end
980
+
981
+ if io.is_a? String
982
+ csv.string
983
+ else
984
+ csv.close
985
+ end
986
+ end
987
+
988
+ #
989
+ # :call-seq:
990
+ # filter( options = Hash.new ) { |row| ... }
991
+ # filter( input, options = Hash.new ) { |row| ... }
992
+ # filter( input, output, options = Hash.new ) { |row| ... }
993
+ #
994
+ # This method is a convenience for building Unix-like filters for CSV data.
995
+ # Each row is yielded to the provided block which can alter it as needed.
996
+ # After the block returns, the row is appended to +output+ altered or not.
997
+ #
998
+ # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
999
+ # (generally String or IO objects). If not given, they default to
1000
+ # <tt>ARGF</tt> and <tt>$stdout</tt>.
1001
+ #
1002
+ # The +options+ parameter is also filtered down to FasterCSV::new() after some
1003
+ # clever key parsing. Any key beginning with <tt>:in_</tt> or
1004
+ # <tt>:input_</tt> will have that leading identifier stripped and will only
1005
+ # be used in the +options+ Hash for the +input+ object. Keys starting with
1006
+ # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
1007
+ # are assigned to both objects.
1008
+ #
1009
+ # The <tt>:output_row_sep</tt> +option+ defaults to
1010
+ # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1011
+ #
1012
+ def self.filter(*args)
1013
+ # parse options for input, output, or both
1014
+ in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
1015
+ if args.last.is_a? Hash
1016
+ args.pop.each do |key, value|
1017
+ case key.to_s
1018
+ when /\Ain(?:put)?_(.+)\Z/
1019
+ in_options[$1.to_sym] = value
1020
+ when /\Aout(?:put)?_(.+)\Z/
1021
+ out_options[$1.to_sym] = value
1022
+ else
1023
+ in_options[key] = value
1024
+ out_options[key] = value
1025
+ end
1026
+ end
1027
+ end
1028
+ # build input and output wrappers
1029
+ input = FasterCSV.new(args.shift || ARGF, in_options)
1030
+ output = FasterCSV.new(args.shift || $stdout, out_options)
1031
+
1032
+ # read, yield, write
1033
+ input.each do |row|
1034
+ yield row
1035
+ output << row
1036
+ end
1037
+ end
1038
+
1039
+ #
1040
+ # This method is intended as the primary interface for reading CSV files. You
1041
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
1042
+ # file will be passed to the provided +block+ in turn.
1043
+ #
1044
+ # The +options+ parameter can be anything FasterCSV::new() understands.
1045
+ #
1046
+ def self.foreach(path, options = Hash.new, &block)
1047
+ open(path, "rb", options) do |csv|
1048
+ csv.each(&block)
1049
+ end
1050
+ end
1051
+
1052
+ #
1053
+ # :call-seq:
1054
+ # generate( str, options = Hash.new ) { |faster_csv| ... }
1055
+ # generate( options = Hash.new ) { |faster_csv| ... }
1056
+ #
1057
+ # This method wraps a String you provide, or an empty default String, in a
1058
+ # FasterCSV object which is passed to the provided block. You can use the
1059
+ # block to append CSV rows to the String and when the block exits, the
1060
+ # final String will be returned.
1061
+ #
1062
+ # Note that a passed String *is* modfied by this method. Call dup() before
1063
+ # passing if you need a new String.
1064
+ #
1065
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1066
+ #
1067
+ def self.generate(*args)
1068
+ # add a default empty String, if none was given
1069
+ if args.first.is_a? String
1070
+ io = StringIO.new(args.shift)
1071
+ io.seek(0, IO::SEEK_END)
1072
+ args.unshift(io)
1073
+ else
1074
+ args.unshift("")
1075
+ end
1076
+ faster_csv = new(*args) # wrap
1077
+ yield faster_csv # yield for appending
1078
+ faster_csv.string # return final String
1079
+ end
1080
+
1081
+ #
1082
+ # This method is a shortcut for converting a single row (Array) into a CSV
1083
+ # String.
1084
+ #
1085
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1086
+ #
1087
+ # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1088
+ # (<tt>$/</tt>) when calling this method.
1089
+ #
1090
+ def self.generate_line(row, options = Hash.new)
1091
+ options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1092
+ (new("", options) << row).string
1093
+ end
1094
+
1095
+ #
1096
+ # This method will return a FasterCSV instance, just like FasterCSV::new(),
1097
+ # but the instance will be cached and returned for all future calls to this
1098
+ # method for the same +data+ object (tested by Object#object_id()) with the
1099
+ # same +options+.
1100
+ #
1101
+ # If a block is given, the instance is passed to the block and the return
1102
+ # value becomes the return value of the block.
1103
+ #
1104
+ def self.instance(data = $stdout, options = Hash.new)
1105
+ # create a _signature_ for this method call, data object and options
1106
+ sig = [data.object_id] +
1107
+ options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1108
+
1109
+ # fetch or create the instance for this signature
1110
+ @@instances ||= Hash.new
1111
+ instance = (@@instances[sig] ||= new(data, options))
1112
+
1113
+ if block_given?
1114
+ yield instance # run block, if given, returning result
1115
+ else
1116
+ instance # or return the instance
1117
+ end
1118
+ end
1119
+
1120
+ #
1121
+ # This method is the reading counterpart to FasterCSV::dump(). See that
1122
+ # method for a detailed description of the process.
1123
+ #
1124
+ # You can customize loading by adding a class method called csv_load() which
1125
+ # will be passed a Hash of meta information, an Array of headers, and an Array
1126
+ # of fields for the object the method is expected to return.
1127
+ #
1128
+ # Remember that all fields will be Strings after this load. If you need
1129
+ # something else, use +options+ to setup converters or provide a custom
1130
+ # csv_load() implementation.
1131
+ #
1132
+ def self.load(io_or_str, options = Hash.new)
1133
+ csv = FasterCSV.new(io_or_str, options)
1134
+
1135
+ # load meta information
1136
+ meta = Hash[*csv.shift]
1137
+ cls = meta["class"].split("::").inject(Object) do |c, const|
1138
+ c.const_get(const)
1139
+ end
1140
+
1141
+ # load headers
1142
+ headers = csv.shift
1143
+
1144
+ # unserialize each object stored in the file
1145
+ results = csv.inject(Array.new) do |all, row|
1146
+ begin
1147
+ obj = cls.csv_load(meta, headers, row)
1148
+ rescue NoMethodError
1149
+ obj = cls.allocate
1150
+ headers.zip(row) do |name, value|
1151
+ if name[0] == ?@
1152
+ obj.instance_variable_set(name, value)
1153
+ else
1154
+ obj.send(name, value)
1155
+ end
1156
+ end
1157
+ end
1158
+ all << obj
1159
+ end
1160
+
1161
+ csv.close unless io_or_str.is_a? String
1162
+
1163
+ results
1164
+ end
1165
+
1166
+ #
1167
+ # :call-seq:
1168
+ # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1169
+ # open( filename, mode="rb", options = Hash.new )
1170
+ #
1171
+ # This method opens an IO object, and wraps that with FasterCSV. This is
1172
+ # intended as the primary interface for writing a CSV file.
1173
+ #
1174
+ # You may pass any +args+ Ruby's open() understands followed by an optional
1175
+ # Hash containing any +options+ FasterCSV::new() understands.
1176
+ #
1177
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
1178
+ # object to a provided block and close it when the block termminates, or it
1179
+ # will return the FasterCSV object when no block is provided. (*Note*: This
1180
+ # is different from the standard CSV library which passes rows to the block.
1181
+ # Use FasterCSV::foreach() for that behavior.)
1182
+ #
1183
+ # An opened FasterCSV object will delegate to many IO methods, for
1184
+ # convenience. You may call:
1185
+ #
1186
+ # * binmode()
1187
+ # * close()
1188
+ # * close_read()
1189
+ # * close_write()
1190
+ # * closed?()
1191
+ # * eof()
1192
+ # * eof?()
1193
+ # * fcntl()
1194
+ # * fileno()
1195
+ # * flush()
1196
+ # * fsync()
1197
+ # * ioctl()
1198
+ # * isatty()
1199
+ # * pid()
1200
+ # * pos()
1201
+ # * reopen()
1202
+ # * seek()
1203
+ # * stat()
1204
+ # * sync()
1205
+ # * sync=()
1206
+ # * tell()
1207
+ # * to_i()
1208
+ # * to_io()
1209
+ # * tty?()
1210
+ #
1211
+ def self.open(*args)
1212
+ # find the +options+ Hash
1213
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
1214
+ # default to a binary open mode
1215
+ args << "rb" if args.size == 1
1216
+ # wrap a File opened with the remaining +args+
1217
+ csv = new(File.open(*args), options)
1218
+
1219
+ # handle blocks like Ruby's open(), not like the CSV library
1220
+ if block_given?
1221
+ begin
1222
+ yield csv
1223
+ ensure
1224
+ csv.close
1225
+ end
1226
+ else
1227
+ csv
1228
+ end
1229
+ end
1230
+
1231
+ #
1232
+ # :call-seq:
1233
+ # parse( str, options = Hash.new ) { |row| ... }
1234
+ # parse( str, options = Hash.new )
1235
+ #
1236
+ # This method can be used to easily parse CSV out of a String. You may either
1237
+ # provide a +block+ which will be called with each row of the String in turn,
1238
+ # or just use the returned Array of Arrays (when no +block+ is given).
1239
+ #
1240
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
1241
+ # anything FasterCSV::new() understands.
1242
+ #
1243
+ def self.parse(*args, &block)
1244
+ csv = new(*args)
1245
+ if block.nil? # slurp contents, if no block is given
1246
+ begin
1247
+ csv.read
1248
+ ensure
1249
+ csv.close
1250
+ end
1251
+ else # or pass each row to a provided block
1252
+ csv.each(&block)
1253
+ end
1254
+ end
1255
+
1256
+ #
1257
+ # This method is a shortcut for converting a single line of a CSV String into
1258
+ # a into an Array. Note that if +line+ contains multiple rows, anything
1259
+ # beyond the first row is ignored.
1260
+ #
1261
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1262
+ #
1263
+ def self.parse_line(line, options = Hash.new)
1264
+ new(line, options).shift
1265
+ end
1266
+
1267
+ #
1268
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1269
+ # file and any +options+ FasterCSV::new() understands.
1270
+ #
1271
+ def self.read(path, options = Hash.new)
1272
+ open(path, "rb", options) { |csv| csv.read }
1273
+ end
1274
+
1275
+ # Alias for FasterCSV::read().
1276
+ def self.readlines(*args)
1277
+ read(*args)
1278
+ end
1279
+
1280
+ #
1281
+ # A shortcut for:
1282
+ #
1283
+ # FasterCSV.read( path, { :headers => true,
1284
+ # :converters => :numeric,
1285
+ # :header_converters => :symbol }.merge(options) )
1286
+ #
1287
+ def self.table(path, options = Hash.new)
1288
+ read( path, { :headers => true,
1289
+ :converters => :numeric,
1290
+ :header_converters => :symbol }.merge(options) )
1291
+ end
1292
+
1293
+ #
1294
+ # This constructor will wrap either a String or IO object passed in +data+ for
1295
+ # reading and/or writing. In addition to the FasterCSV instance methods,
1296
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
1297
+ # list.) If you pass a String for +data+, you can later retrieve it (after
1298
+ # writing to it, for example) with FasterCSV.string().
1299
+ #
1300
+ # Note that a wrapped String will be positioned at at the beginning (for
1301
+ # reading). If you want it at the end (for writing), use
1302
+ # FasterCSV::generate(). If you want any other positioning, pass a preset
1303
+ # StringIO object instead.
1304
+ #
1305
+ # You may set any reading and/or writing preferences in the +options+ Hash.
1306
+ # Available options are:
1307
+ #
1308
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1309
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1310
+ # row. This can be set to the special
1311
+ # <tt>:auto</tt> setting, which requests
1312
+ # that FasterCSV automatically discover
1313
+ # this from the data. Auto-discovery
1314
+ # reads ahead in the data looking for
1315
+ # the next <tt>"\r\n"</tt>,
1316
+ # <tt>"\n"</tt>, or <tt>"\r"</tt>
1317
+ # sequence. A sequence will be selected
1318
+ # even if it occurs in a quoted field,
1319
+ # assuming that you would have the same
1320
+ # line endings there. If none of those
1321
+ # sequences is found,
1322
+ # or the stream is only available for
1323
+ # output, the default
1324
+ # <tt>$INPUT_RECORD_SEPARATOR</tt>
1325
+ # (<tt>$/</tt>) is used. Obviously,
1326
+ # discovery takes a little time. Set
1327
+ # manually if speed is important. Also
1328
+ # note that IO objects should be opened
1329
+ # in binary mode on Windows if this
1330
+ # feature will be used as the
1331
+ # line-ending translation can cause
1332
+ # problems with resetting the document
1333
+ # position to where it was before the
1334
+ # read ahead.
1335
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1336
+ # This has to be a single character
1337
+ # String. This is useful for
1338
+ # application that incorrectly use
1339
+ # <tt>'</tt> as the quote character
1340
+ # instead of the correct <tt>"</tt>.
1341
+ # FasterCSV will always consider a
1342
+ # double sequence this character to be
1343
+ # an escaped quote.
1344
+ # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1345
+ # file. Defaults to your <tt>$KDOCE</tt>
1346
+ # setting. Valid values: <tt>`n’</tt> or
1347
+ # <tt>`N’</tt> for none, <tt>`e’</tt> or
1348
+ # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1349
+ # <tt>`S’</tt> for SJIS, and
1350
+ # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1351
+ # (see Regexp.new()).
1352
+ # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1353
+ # read ahead looking for the closing
1354
+ # quote for a field. (In truth, it
1355
+ # reads to the first line ending beyond
1356
+ # this size.) If a quote cannot be
1357
+ # found within the limit FasterCSV will
1358
+ # raise a MalformedCSVError, assuming
1359
+ # the data is faulty. You can use this
1360
+ # limit to prevent what are effectively
1361
+ # DoS attacks on the parser. However,
1362
+ # this limit can cause a legitimate
1363
+ # parse to fail and thus is set to
1364
+ # +nil+, or off, by default.
1365
+ # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1366
+ # Hash and/or lambdas that handle custom
1367
+ # conversion. A single converter
1368
+ # doesn't have to be in an Array.
1369
+ # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1370
+ # unconverted_fields() method will be
1371
+ # added to all returned rows (Array or
1372
+ # FasterCSV::Row) that will return the
1373
+ # fields as they were before convertion.
1374
+ # Note that <tt>:headers</tt> supplied
1375
+ # by Array or String were not fields of
1376
+ # the document and thus will have an
1377
+ # empty Array attached.
1378
+ # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1379
+ # +true+, the initial row of the CSV
1380
+ # file will be treated as a row of
1381
+ # headers. If set to an Array, the
1382
+ # contents will be used as the headers.
1383
+ # If set to a String, the String is run
1384
+ # through a call of
1385
+ # FasterCSV::parse_line() with the same
1386
+ # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1387
+ # and <tt>:quote_char</tt> as this
1388
+ # instance to produce an Array of
1389
+ # headers. This setting causes
1390
+ # FasterCSV.shift() to return rows as
1391
+ # FasterCSV::Row objects instead of
1392
+ # Arrays and FasterCSV.read() to return
1393
+ # FasterCSV::Table objects instead of
1394
+ # an Array of Arrays.
1395
+ # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1396
+ # swallowed. If set to +true+, header
1397
+ # rows are returned in a FasterCSV::Row
1398
+ # object with identical headers and
1399
+ # fields (save that the fields do not go
1400
+ # through the converters).
1401
+ # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1402
+ # set, a header row will be added to the
1403
+ # output. Note that if the table only
1404
+ # contains header rows,
1405
+ # <tt>:return_headers</tt> must also be
1406
+ # set in order for a header row to be
1407
+ # output.
1408
+ # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1409
+ # <tt>:converters</tt> save that the
1410
+ # conversions are only made to header
1411
+ # rows.
1412
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1413
+ # will skip over any rows with no
1414
+ # content.
1415
+ # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1416
+ # will quote all CSV fields it creates.
1417
+ #
1418
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1419
+ #
1420
+ # Options cannot be overriden in the instance methods for performance reasons,
1421
+ # so be sure to set what you want here.
1422
+ #
1423
+ def initialize(data, options = Hash.new)
1424
+ # build the options for this read/write
1425
+ options = DEFAULT_OPTIONS.merge(options)
1426
+
1427
+ # create the IO object we will read from
1428
+ @io = if data.is_a? String then StringIO.new(data) else data end
1429
+
1430
+ init_separators(options)
1431
+ init_parsers(options)
1432
+ init_converters(options)
1433
+ init_headers(options)
1434
+
1435
+ unless options.empty?
1436
+ raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1437
+ end
1438
+
1439
+ # track our own lineno since IO gets confused about line-ends is CSV fields
1440
+ @lineno = 0
1441
+ end
1442
+
1443
+ #
1444
+ # The line number of the last row read from this file. Fields with nested
1445
+ # line-end characters will not affect this count.
1446
+ #
1447
+ attr_reader :lineno
1448
+
1449
+ ### IO and StringIO Delegation ###
1450
+
1451
+ extend Forwardable
1452
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1453
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1454
+ :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1455
+ :sync, :sync=, :tell, :to_i, :to_io, :tty?
1456
+
1457
+ # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1458
+ def rewind
1459
+ @headers = nil
1460
+ @lineno = 0
1461
+
1462
+ @io.rewind
1463
+ end
1464
+
1465
+ ### End Delegation ###
1466
+
1467
+ #
1468
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1469
+ # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1470
+ # FasterCSV::Row is passed, only the row's fields() are appended to the
1471
+ # output.
1472
+ #
1473
+ # The data source must be open for writing.
1474
+ #
1475
+ def <<(row)
1476
+ # make sure headers have been assigned
1477
+ if header_row? and [Array, String].include? @use_headers.class
1478
+ parse_headers # won't read data for Array or String
1479
+ self << @headers if @write_headers
1480
+ end
1481
+
1482
+ # Handle FasterCSV::Row objects and Hashes
1483
+ row = case row
1484
+ when self.class::Row then row.fields
1485
+ when Hash then @headers.map { |header| row[header] }
1486
+ else row
1487
+ end
1488
+
1489
+ @headers = row if header_row?
1490
+ @lineno += 1
1491
+
1492
+ @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1493
+
1494
+ self # for chaining
1495
+ end
1496
+ alias_method :add_row, :<<
1497
+ alias_method :puts, :<<
1498
+
1499
+ #
1500
+ # :call-seq:
1501
+ # convert( name )
1502
+ # convert { |field| ... }
1503
+ # convert { |field, field_info| ... }
1504
+ #
1505
+ # You can use this method to install a FasterCSV::Converters built-in, or
1506
+ # provide a block that handles a custom conversion.
1507
+ #
1508
+ # If you provide a block that takes one argument, it will be passed the field
1509
+ # and is expected to return the converted value or the field itself. If your
1510
+ # block takes two arguments, it will also be passed a FieldInfo Struct,
1511
+ # containing details about the field. Again, the block should return a
1512
+ # converted field or the field itself.
1513
+ #
1514
+ def convert(name = nil, &converter)
1515
+ add_converter(:converters, self.class::Converters, name, &converter)
1516
+ end
1517
+
1518
+ #
1519
+ # :call-seq:
1520
+ # header_convert( name )
1521
+ # header_convert { |field| ... }
1522
+ # header_convert { |field, field_info| ... }
1523
+ #
1524
+ # Identical to FasterCSV.convert(), but for header rows.
1525
+ #
1526
+ # Note that this method must be called before header rows are read to have any
1527
+ # effect.
1528
+ #
1529
+ def header_convert(name = nil, &converter)
1530
+ add_converter( :header_converters,
1531
+ self.class::HeaderConverters,
1532
+ name,
1533
+ &converter )
1534
+ end
1535
+
1536
+ include Enumerable
1537
+
1538
+ #
1539
+ # Yields each row of the data source in turn.
1540
+ #
1541
+ # Support for Enumerable.
1542
+ #
1543
+ # The data source must be open for reading.
1544
+ #
1545
+ def each
1546
+ while row = shift
1547
+ yield row
1548
+ end
1549
+ end
1550
+
1551
+ #
1552
+ # Slurps the remaining rows and returns an Array of Arrays.
1553
+ #
1554
+ # The data source must be open for reading.
1555
+ #
1556
+ def read
1557
+ rows = to_a
1558
+ if @use_headers
1559
+ Table.new(rows)
1560
+ else
1561
+ rows
1562
+ end
1563
+ end
1564
+ alias_method :readlines, :read
1565
+
1566
+ # Returns +true+ if the next row read will be a header row.
1567
+ def header_row?
1568
+ @use_headers and @headers.nil?
1569
+ end
1570
+
1571
+ #
1572
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
1573
+ # from the data source, parsed and returned as an Array of fields (if header
1574
+ # rows are not used) or a FasterCSV::Row (when header rows are used).
1575
+ #
1576
+ # The data source must be open for reading.
1577
+ #
1578
+ def shift
1579
+ #########################################################################
1580
+ ### This method is purposefully kept a bit long as simple conditional ###
1581
+ ### checks are faster than numerous (expensive) method calls. ###
1582
+ #########################################################################
1583
+
1584
+ # handle headers not based on document content
1585
+ if header_row? and @return_headers and
1586
+ [Array, String].include? @use_headers.class
1587
+ if @unconverted_fields
1588
+ return add_unconverted_fields(parse_headers, Array.new)
1589
+ else
1590
+ return parse_headers
1591
+ end
1592
+ end
1593
+
1594
+ # begin with a blank line, so we can always add to it
1595
+ line = String.new
1596
+
1597
+ #
1598
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1599
+ # because of \r and/or \n characters embedded in quoted fields
1600
+ #
1601
+ loop do
1602
+ # add another read to the line
1603
+ if read_line = @io.gets(@row_sep)
1604
+ line += read_line
1605
+ else
1606
+ return nil
1607
+ end
1608
+ # copy the line so we can chop it up in parsing
1609
+ parse = line.dup
1610
+ parse.sub!(@parsers[:line_end], "")
1611
+
1612
+ #
1613
+ # I believe a blank line should be an <tt>Array.new</tt>, not
1614
+ # CSV's <tt>[nil]</tt>
1615
+ #
1616
+ if parse.empty?
1617
+ @lineno += 1
1618
+ if @skip_blanks
1619
+ line = ""
1620
+ next
1621
+ elsif @unconverted_fields
1622
+ return add_unconverted_fields(Array.new, Array.new)
1623
+ elsif @use_headers
1624
+ return FasterCSV::Row.new(Array.new, Array.new)
1625
+ else
1626
+ return Array.new
1627
+ end
1628
+ end
1629
+
1630
+ # parse the fields with a mix of String#split and regular expressions
1631
+ csv = Array.new
1632
+ current_field = String.new
1633
+ field_quotes = 0
1634
+ parse.split(@col_sep, -1).each do |match|
1635
+ if current_field.empty? && match.count(@quote_and_newlines).zero?
1636
+ csv << (match.empty? ? nil : match)
1637
+ elsif (current_field.empty? ? match[0] : current_field[0]) ==
1638
+ @quote_char[0]
1639
+ current_field << match
1640
+ field_quotes += match.count(@quote_char)
1641
+ if field_quotes % 2 == 0
1642
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1643
+ current_field = in_quotes
1644
+ current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1645
+ csv << current_field
1646
+ current_field = String.new
1647
+ field_quotes = 0
1648
+ else # we found a quoted field that spans multiple lines
1649
+ current_field << @col_sep
1650
+ end
1651
+ elsif match.count("\r\n").zero?
1652
+ raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1653
+ else
1654
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1655
+ "\\r or \\n (line #{lineno + 1})."
1656
+ end
1657
+ end
1658
+
1659
+ # if parse is empty?(), we found all the fields on the line...
1660
+ if field_quotes % 2 == 0
1661
+ @lineno += 1
1662
+
1663
+ # save fields unconverted fields, if needed...
1664
+ unconverted = csv.dup if @unconverted_fields
1665
+
1666
+ # convert fields, if needed...
1667
+ csv = convert_fields(csv) unless @use_headers or @converters.empty?
1668
+ # parse out header rows and handle FasterCSV::Row conversions...
1669
+ csv = parse_headers(csv) if @use_headers
1670
+
1671
+ # inject unconverted fields and accessor, if requested...
1672
+ if @unconverted_fields and not csv.respond_to? :unconverted_fields
1673
+ add_unconverted_fields(csv, unconverted)
1674
+ end
1675
+
1676
+ # return the results
1677
+ break csv
1678
+ end
1679
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1680
+ if @io.eof?
1681
+ raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1682
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1683
+ raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1684
+ end
1685
+ # otherwise, we need to loop and pull some more data to complete the row
1686
+ end
1687
+ end
1688
+ alias_method :gets, :shift
1689
+ alias_method :readline, :shift
1690
+
1691
+ # Returns a simplified description of the key FasterCSV attributes.
1692
+ def inspect
1693
+ str = "<##{self.class} io_type:"
1694
+ # show type of wrapped IO
1695
+ if @io == $stdout then str << "$stdout"
1696
+ elsif @io == $stdin then str << "$stdin"
1697
+ elsif @io == $stderr then str << "$stderr"
1698
+ else str << @io.class.to_s
1699
+ end
1700
+ # show IO.path(), if available
1701
+ if @io.respond_to?(:path) and (p = @io.path)
1702
+ str << " io_path:#{p.inspect}"
1703
+ end
1704
+ # show other attributes
1705
+ %w[ lineno col_sep row_sep
1706
+ quote_char skip_blanks encoding ].each do |attr_name|
1707
+ if a = instance_variable_get("@#{attr_name}")
1708
+ str << " #{attr_name}:#{a.inspect}"
1709
+ end
1710
+ end
1711
+ if @use_headers
1712
+ str << " headers:#{(@headers || true).inspect}"
1713
+ end
1714
+ str << ">"
1715
+ end
1716
+
1717
+ private
1718
+
1719
+ #
1720
+ # Stores the indicated separators for later use.
1721
+ #
1722
+ # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1723
+ # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1724
+ # +STDERR+ and any stream open for output only with a default
1725
+ # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1726
+ #
1727
+ # This method also establishes the quoting rules used for CSV output.
1728
+ #
1729
+ def init_separators(options)
1730
+ # store the selected separators
1731
+ @col_sep = options.delete(:col_sep)
1732
+ @row_sep = options.delete(:row_sep)
1733
+ @quote_char = options.delete(:quote_char)
1734
+ @quote_and_newlines = "\r\n#{@quote_char}"
1735
+
1736
+ if @quote_char.length != 1
1737
+ raise ArgumentError, ":quote_char has to be a single character String"
1738
+ end
1739
+
1740
+ # automatically discover row separator when requested
1741
+ if @row_sep == :auto
1742
+ begin
1743
+ #
1744
+ # remember where we were (pos() will raise an axception if @io is pipe
1745
+ # or not opened for reading)
1746
+ #
1747
+ saved_pos = @io.pos
1748
+ while @row_sep == :auto
1749
+ #
1750
+ # if we run out of data, it's probably a single line
1751
+ # (ensure will set default value)
1752
+ #
1753
+ break if @io.eof?
1754
+
1755
+ # read ahead a bit
1756
+ sample = @io.read(1024)
1757
+ sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1758
+
1759
+ # try to find a standard separator
1760
+ if sample =~ /\r\n?|\n/
1761
+ @row_sep = $&
1762
+ break
1763
+ end
1764
+ end
1765
+
1766
+ # tricky seek() clone to work around GzipReader's lack of seek()
1767
+ @io.rewind
1768
+ # reset back to the remembered position
1769
+ while saved_pos > 1024 # avoid loading a lot of data into memory
1770
+ @io.read(1024)
1771
+ saved_pos -= 1024
1772
+ end
1773
+ @io.read(saved_pos) if saved_pos.nonzero?
1774
+ rescue IOError # not opened for reading
1775
+ # do nothing: ensure will set default
1776
+ rescue NoMethodError # Zlib::GzipWriter doesn't have eof?
1777
+ # do nothing: ensure will set default
1778
+ rescue SystemCallError # pipe
1779
+ # do nothing: ensure will set default
1780
+ ensure
1781
+ #
1782
+ # set default if we failed to detect
1783
+ # (stream not opened for reading, a pipe, or a single line of data)
1784
+ #
1785
+ @row_sep = $INPUT_RECORD_SEPARATOR if @row_sep == :auto
1786
+ end
1787
+ end
1788
+
1789
+ # establish quoting rules
1790
+ do_quote = lambda do |field|
1791
+ @quote_char +
1792
+ String(field).gsub(@quote_char, @quote_char * 2) +
1793
+ @quote_char
1794
+ end
1795
+ @quote = if options.delete(:force_quotes)
1796
+ do_quote
1797
+ else
1798
+ lambda do |field|
1799
+ if field.nil? # represent +nil+ fields as empty unquoted fields
1800
+ ""
1801
+ else
1802
+ field = String(field) # Stringify fields
1803
+ # represent empty fields as empty quoted fields
1804
+ if field.empty? or
1805
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1806
+ do_quote.call(field)
1807
+ else
1808
+ field # unquoted field
1809
+ end
1810
+ end
1811
+ end
1812
+ end
1813
+ end
1814
+
1815
+ # Pre-compiles parsers and stores them by name for access during reads.
1816
+ def init_parsers(options)
1817
+ # store the parser behaviors
1818
+ @skip_blanks = options.delete(:skip_blanks)
1819
+ @encoding = options.delete(:encoding) # nil will use $KCODE
1820
+ @field_size_limit = options.delete(:field_size_limit)
1821
+
1822
+ # prebuild Regexps for faster parsing
1823
+ esc_col_sep = Regexp.escape(@col_sep)
1824
+ esc_row_sep = Regexp.escape(@row_sep)
1825
+ esc_quote = Regexp.escape(@quote_char)
1826
+ @parsers = {
1827
+ :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1828
+ Regexp::MULTILINE,
1829
+ @encoding ),
1830
+ :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1831
+ Regexp::MULTILINE,
1832
+ @encoding ),
1833
+ :stray_quote => Regexp.new( "[^#{esc_quote}]#{esc_quote}[^#{esc_quote}]",
1834
+ Regexp::MULTILINE,
1835
+ @encoding ),
1836
+ # safer than chomp!()
1837
+ :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1838
+ }
1839
+ end
1840
+
1841
+ #
1842
+ # Loads any converters requested during construction.
1843
+ #
1844
+ # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1845
+ # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1846
+ # are added instead.
1847
+ #
1848
+ # The <tt>:unconverted_fields</tt> option is also actived for
1849
+ # <tt>:converters</tt> calls, if requested.
1850
+ #
1851
+ def init_converters(options, field_name = :converters)
1852
+ if field_name == :converters
1853
+ @unconverted_fields = options.delete(:unconverted_fields)
1854
+ end
1855
+
1856
+ instance_variable_set("@#{field_name}", Array.new)
1857
+
1858
+ # find the correct method to add the coverters
1859
+ convert = method(field_name.to_s.sub(/ers\Z/, ""))
1860
+
1861
+ # load converters
1862
+ unless options[field_name].nil?
1863
+ # allow a single converter not wrapped in an Array
1864
+ unless options[field_name].is_a? Array
1865
+ options[field_name] = [options[field_name]]
1866
+ end
1867
+ # load each converter...
1868
+ options[field_name].each do |converter|
1869
+ if converter.is_a? Proc # custom code block
1870
+ convert.call(&converter)
1871
+ else # by name
1872
+ convert.call(converter)
1873
+ end
1874
+ end
1875
+ end
1876
+
1877
+ options.delete(field_name)
1878
+ end
1879
+
1880
+ # Stores header row settings and loads header converters, if needed.
1881
+ def init_headers(options)
1882
+ @use_headers = options.delete(:headers)
1883
+ @return_headers = options.delete(:return_headers)
1884
+ @write_headers = options.delete(:write_headers)
1885
+
1886
+ # headers must be delayed until shift(), in case they need a row of content
1887
+ @headers = nil
1888
+
1889
+ init_converters(options, :header_converters)
1890
+ end
1891
+
1892
+ #
1893
+ # The actual work method for adding converters, used by both
1894
+ # FasterCSV.convert() and FasterCSV.header_convert().
1895
+ #
1896
+ # This method requires the +var_name+ of the instance variable to place the
1897
+ # converters in, the +const+ Hash to lookup named converters in, and the
1898
+ # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1899
+ # methods.
1900
+ #
1901
+ def add_converter(var_name, const, name = nil, &converter)
1902
+ if name.nil? # custom converter
1903
+ instance_variable_get("@#{var_name}") << converter
1904
+ else # named converter
1905
+ combo = const[name]
1906
+ case combo
1907
+ when Array # combo converter
1908
+ combo.each do |converter_name|
1909
+ add_converter(var_name, const, converter_name)
1910
+ end
1911
+ else # individual named converter
1912
+ instance_variable_get("@#{var_name}") << combo
1913
+ end
1914
+ end
1915
+ end
1916
+
1917
+ #
1918
+ # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1919
+ # if +headers+ is passed as +true+, returning the converted field set. Any
1920
+ # converter that changes the field into something other than a String halts
1921
+ # the pipeline of conversion for that field. This is primarily an efficiency
1922
+ # shortcut.
1923
+ #
1924
+ def convert_fields(fields, headers = false)
1925
+ # see if we are converting headers or fields
1926
+ converters = headers ? @header_converters : @converters
1927
+
1928
+ fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1929
+ converters.each do |converter|
1930
+ field = if converter.arity == 1 # straight field converter
1931
+ converter[field]
1932
+ else # FieldInfo converter
1933
+ header = @use_headers && !headers ? @headers[index] : nil
1934
+ converter[field, FieldInfo.new(index, lineno, header)]
1935
+ end
1936
+ break unless field.is_a? String # short-curcuit pipeline for speed
1937
+ end
1938
+ field # return final state of each field, converted or original
1939
+ end
1940
+ end
1941
+
1942
+ #
1943
+ # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1944
+ # rows are also dealt with here, either by returning a FasterCSV::Row with
1945
+ # identical headers and fields (save that the fields do not go through the
1946
+ # converters) or by reading past them to return a field row. Headers are also
1947
+ # saved in <tt>@headers</tt> for use in future rows.
1948
+ #
1949
+ # When +nil+, +row+ is assumed to be a header row not based on an actual row
1950
+ # of the stream.
1951
+ #
1952
+ def parse_headers(row = nil)
1953
+ if @headers.nil? # header row
1954
+ @headers = case @use_headers # save headers
1955
+ # Array of headers
1956
+ when Array then @use_headers
1957
+ # CSV header String
1958
+ when String
1959
+ self.class.parse_line( @use_headers,
1960
+ :col_sep => @col_sep,
1961
+ :row_sep => @row_sep,
1962
+ :quote_char => @quote_char )
1963
+ # first row is headers
1964
+ else row
1965
+ end
1966
+
1967
+ # prepare converted and unconverted copies
1968
+ row = @headers if row.nil?
1969
+ @headers = convert_fields(@headers, true)
1970
+
1971
+ if @return_headers # return headers
1972
+ return FasterCSV::Row.new(@headers, row, true)
1973
+ elsif not [Array, String].include? @use_headers.class # skip to field row
1974
+ return shift
1975
+ end
1976
+ end
1977
+
1978
+ FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1979
+ end
1980
+
1981
+ #
1982
+ # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1983
+ # +row+ and an accessor method for it called unconverted_fields(). The
1984
+ # variable is set to the contents of +fields+.
1985
+ #
1986
+ def add_unconverted_fields(row, fields)
1987
+ class << row
1988
+ attr_reader :unconverted_fields
1989
+ end
1990
+ row.instance_eval { @unconverted_fields = fields }
1991
+ row
1992
+ end
1993
+ end
1994
+ end
1995
+
1996
+ # Another name for FasterCSV.
1997
+ FCSV = FasterCSV
1998
+
1999
+ # Another name for FasterCSV::instance().
2000
+ def FasterCSV(*args, &block)
2001
+ FasterCSV.instance(*args, &block)
2002
+ end
2003
+
2004
+ # Another name for FCSV::instance().
2005
+ def FCSV(*args, &block)
2006
+ FCSV.instance(*args, &block)
2007
+ end
2008
+
2009
+ class Array
2010
+ # Equivalent to <tt>FasterCSV::generate_line(self, options)</tt>.
2011
+ def to_csv(options = Hash.new)
2012
+ FasterCSV.generate_line(self, options)
2013
+ end
2014
+ end
2015
+
2016
+ class String
2017
+ # Equivalent to <tt>FasterCSV::parse_line(self, options)</tt>.
2018
+ def parse_csv(options = Hash.new)
2019
+ FasterCSV.parse_line(self, options)
2020
+ end
2021
+ end