StephanZ-fastercsv 1.4.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # csv_converters.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-11-05.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+
8
+ require "faster_csv"
9
+
10
+ # convert a specific column
11
+ options = {
12
+ :headers => true,
13
+ :header_converters => :symbol,
14
+ :converters => [
15
+ lambda { |f, info| info.index.zero? ? f.to_i : f },
16
+ lambda { |f, info| info.header == :floats ? f.to_f : f }
17
+ ]
18
+ }
19
+ table = FCSV(DATA, options) { |csv| csv.read }
20
+
21
+ table[:ints] # => [1, 2, 3]
22
+ table[:floats] # => [1.0, 2.0, 3.0]
23
+
24
+ __END__
25
+ ints,floats
26
+ 1,1.000
27
+ 2,2
28
+ 3,3.0
@@ -0,0 +1,23 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # = csv_filter.rb -- Faster CSV Reading and Writing
4
+ #
5
+ # Created by James Edward Gray II on 2006-04-01.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+
8
+ require "faster_csv"
9
+
10
+ running_total = 0
11
+ FasterCSV.filter( :headers => true,
12
+ :return_headers => true,
13
+ :header_converters => :symbol,
14
+ :converters => :numeric ) do |row|
15
+ if row.header_row?
16
+ row << "Running Total"
17
+ else
18
+ row << (running_total += row[:quantity] * row[:price])
19
+ end
20
+ end
21
+ # >> Quantity,Product Description,Price,Running Total
22
+ # >> 1,Text Editor,25.0,25.0
23
+ # >> 2,MacBook Pros,2499.0,5023.0
@@ -0,0 +1,57 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # csv_reading.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-11-05.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+
8
+ require "faster_csv"
9
+
10
+ CSV_FILE_PATH = File.join(File.dirname(__FILE__), "purchase.csv")
11
+ CSV_STR = <<END_CSV
12
+ first,last
13
+ James,Gray
14
+ Dana,Gray
15
+ END_CSV
16
+
17
+ # read a file line by line
18
+ FasterCSV.foreach(CSV_FILE_PATH) do |line|
19
+ puts line[1]
20
+ end
21
+ # >> Product Description
22
+ # >> Text Editor
23
+ # >> MacBook Pros
24
+
25
+ # slurp file data
26
+ data = FasterCSV.read(CSV_FILE_PATH)
27
+ puts data.flatten.grep(/\A\d+\.\d+\Z/)
28
+ # >> 25.00
29
+ # >> 2499.00
30
+
31
+ # read a string line by line
32
+ FasterCSV.parse(CSV_STR) do |line|
33
+ puts line[0]
34
+ end
35
+ # >> first
36
+ # >> James
37
+ # >> Dana
38
+
39
+ # slurp string data
40
+ data = FasterCSV.parse(CSV_STR)
41
+ puts data[1..-1].map { |line| "#{line[0][0, 1].downcase}.#{line[1].downcase}" }
42
+ # >> j.gray
43
+ # >> d.gray
44
+
45
+ # adding options to make data manipulation easy
46
+ total = 0
47
+ FasterCSV.foreach( CSV_FILE_PATH, :headers => true,
48
+ :header_converters => :symbol,
49
+ :converters => :numeric ) do |line|
50
+ line_total = line[:quantity] * line[:price]
51
+ total += line_total
52
+ puts "%s: %.2f" % [line[:product_description], line_total]
53
+ end
54
+ puts "Total: %.2f" % total
55
+ # >> Text Editor: 25.00
56
+ # >> MacBook Pros: 4998.00
57
+ # >> Total: 5023.00
@@ -0,0 +1,56 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # csv_table.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-11-04.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+ #
8
+ # Feature implementation and example code by Ara.T.Howard.
9
+
10
+ require "faster_csv"
11
+
12
+ table = FCSV.parse(DATA, :headers => true, :header_converters => :symbol)
13
+
14
+ # row access
15
+ table[0].class # => FasterCSV::Row
16
+ table[0].fields # => ["zaphod", "beeblebrox", "42"]
17
+
18
+ # column access
19
+ table[:first_name] # => ["zaphod", "ara"]
20
+
21
+ # cell access
22
+ table[1][0] # => "ara"
23
+ table[1][:first_name] # => "ara"
24
+ table[:first_name][1] # => "ara"
25
+
26
+ # manipulation
27
+ table << %w[james gray 30]
28
+ table[-1].fields # => ["james", "gray", "30"]
29
+
30
+ table[:type] = "name"
31
+ table[:type] # => ["name", "name", "name"]
32
+
33
+ table[:ssn] = %w[123-456-7890 098-765-4321]
34
+ table[:ssn] # => ["123-456-7890", "098-765-4321", nil]
35
+
36
+ # iteration
37
+ table.each do |row|
38
+ # ...
39
+ end
40
+
41
+ table.by_col!
42
+ table.each do |col_name, col_values|
43
+ # ...
44
+ end
45
+
46
+ # output
47
+ puts table
48
+ # >> first_name,last_name,age,type,ssn
49
+ # >> zaphod,beeblebrox,42,name,123-456-7890
50
+ # >> ara,howard,34,name,098-765-4321
51
+ # >> james,gray,30,name,
52
+
53
+ __END__
54
+ first_name,last_name,age
55
+ zaphod,beeblebrox,42
56
+ ara,howard,34
@@ -0,0 +1,67 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # csv_rails_import.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-11-05.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+
8
+ require "faster_csv"
9
+
10
+ CSV_FILE_PATH = File.join(File.dirname(__FILE__), "output.csv")
11
+
12
+ # writing to a file
13
+ FasterCSV.open(CSV_FILE_PATH, "w") do |csv|
14
+ csv << %w[first last]
15
+ csv << %w[James Gray]
16
+ csv << %w[Dana Gray]
17
+ end
18
+ puts File.read(CSV_FILE_PATH)
19
+ # >> first,last
20
+ # >> James,Gray
21
+ # >> Dana,Gray
22
+
23
+ # appending to an existing file
24
+ FasterCSV.open(CSV_FILE_PATH, "a") do |csv|
25
+ csv << %w[Gypsy]
26
+ csv << %w[Storm]
27
+ end
28
+ puts File.read(CSV_FILE_PATH)
29
+ # >> first,last
30
+ # >> James,Gray
31
+ # >> Dana,Gray
32
+ # >> Gypsy
33
+ # >> Storm
34
+
35
+ # writing to a string
36
+ csv_str = FasterCSV.generate do |csv|
37
+ csv << %w[first last]
38
+ csv << %w[James Gray]
39
+ csv << %w[Dana Gray]
40
+ end
41
+ puts csv_str
42
+ # >> first,last
43
+ # >> James,Gray
44
+ # >> Dana,Gray
45
+
46
+ # appending to an existing string
47
+ FasterCSV.generate(csv_str) do |csv|
48
+ csv << %w[Gypsy]
49
+ csv << %w[Storm]
50
+ end
51
+ puts csv_str
52
+ # >> first,last
53
+ # >> James,Gray
54
+ # >> Dana,Gray
55
+ # >> Gypsy
56
+ # >> Storm
57
+
58
+ # changing the output format
59
+ csv_str = FasterCSV.generate(:col_sep => "\t") do |csv|
60
+ csv << %w[first last]
61
+ csv << %w[James Gray]
62
+ csv << %w[Dana Gray]
63
+ end
64
+ puts csv_str
65
+ # >> first last
66
+ # >> James Gray
67
+ # >> Dana Gray
@@ -0,0 +1,3 @@
1
+ Quantity,Product Description,Price
2
+ 1,Text Editor,25.00
3
+ 2,MacBook Pros,2499.00
@@ -0,0 +1,36 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # shortcut_interface.rb
4
+ #
5
+ # Created by James Edward Gray II on 2006-04-01.
6
+ # Copyright 2006 Gray Productions. All rights reserved.
7
+ #
8
+ # Feature implementation and example code by Ara.T.Howard.
9
+
10
+ require "faster_csv"
11
+
12
+ #
13
+ # So now it's this easy to write to STDOUT.
14
+ #
15
+ FCSV { |f| f << %w( a b c) << %w( d e f ) }
16
+
17
+ #
18
+ # Writing to a String.
19
+ #
20
+ FCSV(csv = '') do |f|
21
+ f << %w( q r s )
22
+ f << %w( x y z )
23
+ end
24
+ puts csv
25
+
26
+ #
27
+ # Writing to STDERR.
28
+ #
29
+ FCSV(STDERR) do |f|
30
+ f << %w( 0 1 2 )
31
+ f << %w( A B C )
32
+ end
33
+ # >> a,b,c
34
+ # >> d,e,f
35
+ # >> q,r,s
36
+ # >> x,y,z
data/lib/faster_csv.rb ADDED
@@ -0,0 +1,1972 @@
1
+ #!/usr/local/bin/ruby -w
2
+
3
+ # = faster_csv.rb -- Faster CSV Reading and Writing
4
+ #
5
+ # Created by James Edward Gray II on 2005-10-31.
6
+ # Copyright 2005 Gray Productions. All rights reserved.
7
+ #
8
+ # See FasterCSV for documentation.
9
+
10
+ require "forwardable"
11
+ require "English"
12
+ require "enumerator"
13
+ require "date"
14
+ require "stringio"
15
+
16
+ #
17
+ # This class provides a complete interface to CSV files and data. It offers
18
+ # tools to enable you to read and write to and from Strings or IO objects, as
19
+ # needed.
20
+ #
21
+ # == Reading
22
+ #
23
+ # === From a File
24
+ #
25
+ # ==== A Line at a Time
26
+ #
27
+ # FasterCSV.foreach("path/to/file.csv") do |row|
28
+ # # use row here...
29
+ # end
30
+ #
31
+ # ==== All at Once
32
+ #
33
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
34
+ #
35
+ # === From a String
36
+ #
37
+ # ==== A Line at a Time
38
+ #
39
+ # FasterCSV.parse("CSV,data,String") do |row|
40
+ # # use row here...
41
+ # end
42
+ #
43
+ # ==== All at Once
44
+ #
45
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
46
+ #
47
+ # == Writing
48
+ #
49
+ # === To a File
50
+ #
51
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
52
+ # csv << ["row", "of", "CSV", "data"]
53
+ # csv << ["another", "row"]
54
+ # # ...
55
+ # end
56
+ #
57
+ # === To a String
58
+ #
59
+ # csv_string = FasterCSV.generate do |csv|
60
+ # csv << ["row", "of", "CSV", "data"]
61
+ # csv << ["another", "row"]
62
+ # # ...
63
+ # end
64
+ #
65
+ # == Convert a Single Line
66
+ #
67
+ # csv_string = ["CSV", "data"].to_csv # to CSV
68
+ # csv_array = "CSV,String".parse_csv # from CSV
69
+ #
70
+ # == Shortcut Interface
71
+ #
72
+ # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
73
+ # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
74
+ # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
75
+ #
76
+ class FasterCSV
77
+ # The version of the installed library.
78
+ VERSION = "1.4.0".freeze
79
+
80
+ #
81
+ # A FasterCSV::Row is part Array and part Hash. It retains an order for the
82
+ # fields and allows duplicates just as an Array would, but also allows you to
83
+ # access fields by name just as you could if they were in a Hash.
84
+ #
85
+ # All rows returned by FasterCSV will be constructed from this class, if
86
+ # header row processing is activated.
87
+ #
88
+ class Row
89
+ #
90
+ # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
91
+ # expected to be Arrays. If one Array is shorter than the other, it will be
92
+ # padded with +nil+ objects.
93
+ #
94
+ # The optional +header_row+ parameter can be set to +true+ to indicate, via
95
+ # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
96
+ # a header row. Otherwise, the row is assumes to be a field row.
97
+ #
98
+ # A FasterCSV::Row object supports the following Array methods through
99
+ # delegation:
100
+ #
101
+ # * empty?()
102
+ # * length()
103
+ # * size()
104
+ #
105
+ def initialize(headers, fields, header_row = false)
106
+ @header_row = header_row
107
+
108
+ # handle extra headers or fields
109
+ @row = if headers.size > fields.size
110
+ headers.zip(fields)
111
+ else
112
+ fields.zip(headers).map { |pair| pair.reverse }
113
+ end
114
+ end
115
+
116
+ # Internal data format used to compare equality.
117
+ attr_reader :row
118
+ protected :row
119
+
120
+ ### Array Delegation ###
121
+
122
+ extend Forwardable
123
+ def_delegators :@row, :empty?, :length, :size
124
+
125
+ # Returns +true+ if this is a header row.
126
+ def header_row?
127
+ @header_row
128
+ end
129
+
130
+ # Returns +true+ if this is a field row.
131
+ def field_row?
132
+ not header_row?
133
+ end
134
+
135
+ # Returns the headers of this row.
136
+ def headers
137
+ @row.map { |pair| pair.first }
138
+ end
139
+
140
+ #
141
+ # :call-seq:
142
+ # field( header )
143
+ # field( header, offset )
144
+ # field( index )
145
+ #
146
+ # This method will fetch the field value by +header+ or +index+. If a field
147
+ # is not found, +nil+ is returned.
148
+ #
149
+ # When provided, +offset+ ensures that a header match occurrs on or later
150
+ # than the +offset+ index. You can use this to find duplicate headers,
151
+ # without resorting to hard-coding exact indices.
152
+ #
153
+ def field(header_or_index, minimum_index = 0)
154
+ # locate the pair
155
+ finder = header_or_index.is_a?(Integer) ? :[] : :assoc
156
+ pair = @row[minimum_index..-1].send(finder, header_or_index)
157
+
158
+ # return the field if we have a pair
159
+ pair.nil? ? nil : pair.last
160
+ end
161
+ alias_method :[], :field
162
+
163
+ #
164
+ # :call-seq:
165
+ # []=( header, value )
166
+ # []=( header, offset, value )
167
+ # []=( index, value )
168
+ #
169
+ # Looks up the field by the semantics described in FasterCSV::Row.field()
170
+ # and assigns the +value+.
171
+ #
172
+ # Assigning past the end of the row with an index will set all pairs between
173
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
174
+ # pair.
175
+ #
176
+ def []=(*args)
177
+ value = args.pop
178
+
179
+ if args.first.is_a? Integer
180
+ if @row[args.first].nil? # extending past the end with index
181
+ @row[args.first] = [nil, value]
182
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
183
+ else # normal index assignment
184
+ @row[args.first][1] = value
185
+ end
186
+ else
187
+ index = index(*args)
188
+ if index.nil? # appending a field
189
+ self << [args.first, value]
190
+ else # normal header assignment
191
+ @row[index][1] = value
192
+ end
193
+ end
194
+ end
195
+
196
+ #
197
+ # :call-seq:
198
+ # <<( field )
199
+ # <<( header_and_field_array )
200
+ # <<( header_and_field_hash )
201
+ #
202
+ # If a two-element Array is provided, it is assumed to be a header and field
203
+ # and the pair is appended. A Hash works the same way with the key being
204
+ # the header and the value being the field. Anything else is assumed to be
205
+ # a lone field which is appended with a +nil+ header.
206
+ #
207
+ # This method returns the row for chaining.
208
+ #
209
+ def <<(arg)
210
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
211
+ @row << arg
212
+ elsif arg.is_a?(Hash) # append header and name pairs
213
+ arg.each { |pair| @row << pair }
214
+ else # append field value
215
+ @row << [nil, arg]
216
+ end
217
+
218
+ self # for chaining
219
+ end
220
+
221
+ #
222
+ # A shortcut for appending multiple fields. Equivalent to:
223
+ #
224
+ # args.each { |arg| faster_csv_row << arg }
225
+ #
226
+ # This method returns the row for chaining.
227
+ #
228
+ def push(*args)
229
+ args.each { |arg| self << arg }
230
+
231
+ self # for chaining
232
+ end
233
+
234
+ #
235
+ # :call-seq:
236
+ # delete( header )
237
+ # delete( header, offset )
238
+ # delete( index )
239
+ #
240
+ # Used to remove a pair from the row by +header+ or +index+. The pair is
241
+ # located as described in FasterCSV::Row.field(). The deleted pair is
242
+ # returned, or +nil+ if a pair could not be found.
243
+ #
244
+ def delete(header_or_index, minimum_index = 0)
245
+ if header_or_index.is_a? Integer # by index
246
+ @row.delete_at(header_or_index)
247
+ else # by header
248
+ @row.delete_at(index(header_or_index, minimum_index))
249
+ end
250
+ end
251
+
252
+ #
253
+ # The provided +block+ is passed a header and field for each pair in the row
254
+ # and expected to return +true+ or +false+, depending on whether the pair
255
+ # should be deleted.
256
+ #
257
+ # This method returns the row for chaining.
258
+ #
259
+ def delete_if(&block)
260
+ @row.delete_if(&block)
261
+
262
+ self # for chaining
263
+ end
264
+
265
+ #
266
+ # This method accepts any number of arguments which can be headers, indices,
267
+ # Ranges of either, or two-element Arrays containing a header and offset.
268
+ # Each argument will be replaced with a field lookup as described in
269
+ # FasterCSV::Row.field().
270
+ #
271
+ # If called with no arguments, all fields are returned.
272
+ #
273
+ def fields(*headers_and_or_indices)
274
+ if headers_and_or_indices.empty? # return all fields--no arguments
275
+ @row.map { |pair| pair.last }
276
+ else # or work like values_at()
277
+ headers_and_or_indices.inject(Array.new) do |all, h_or_i|
278
+ all + if h_or_i.is_a? Range
279
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
280
+ index(h_or_i.begin)
281
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
282
+ index(h_or_i.end)
283
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
284
+ (index_begin..index_end)
285
+ fields.values_at(new_range)
286
+ else
287
+ [field(*Array(h_or_i))]
288
+ end
289
+ end
290
+ end
291
+ end
292
+ alias_method :values_at, :fields
293
+
294
+ #
295
+ # :call-seq:
296
+ # index( header )
297
+ # index( header, offset )
298
+ #
299
+ # This method will return the index of a field with the provided +header+.
300
+ # The +offset+ can be used to locate duplicate header names, as described in
301
+ # FasterCSV::Row.field().
302
+ #
303
+ def index(header, minimum_index = 0)
304
+ # find the pair
305
+ index = headers[minimum_index..-1].index(header)
306
+ # return the index at the right offset, if we found one
307
+ index.nil? ? nil : index + minimum_index
308
+ end
309
+
310
+ # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
311
+ def header?(name)
312
+ headers.include? name
313
+ end
314
+ alias_method :include?, :header?
315
+
316
+ #
317
+ # Returns +true+ if +data+ matches a field in this row, and +false+
318
+ # otherwise.
319
+ #
320
+ def field?(data)
321
+ fields.include? data
322
+ end
323
+
324
+ include Enumerable
325
+
326
+ #
327
+ # Yields each pair of the row as header and field tuples (much like
328
+ # iterating over a Hash).
329
+ #
330
+ # Support for Enumerable.
331
+ #
332
+ # This method returns the row for chaining.
333
+ #
334
+ def each(&block)
335
+ @row.each(&block)
336
+
337
+ self # for chaining
338
+ end
339
+
340
+ #
341
+ # Returns +true+ if this row contains the same headers and fields in the
342
+ # same order as +other+.
343
+ #
344
+ def ==(other)
345
+ @row == other.row
346
+ end
347
+
348
+ #
349
+ # Collapses the row into a simple Hash. Be warning that this discards field
350
+ # order and clobbers duplicate fields.
351
+ #
352
+ def to_hash
353
+ # flatten just one level of the internal Array
354
+ Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
355
+ end
356
+
357
+ #
358
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
359
+ #
360
+ # faster_csv_row.fields.to_csv( options )
361
+ #
362
+ def to_csv(options = Hash.new)
363
+ fields.to_csv(options)
364
+ end
365
+ alias_method :to_s, :to_csv
366
+
367
+ # A summary of fields, by header.
368
+ def inspect
369
+ str = "#<#{self.class}"
370
+ each do |header, field|
371
+ str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
372
+ field.inspect
373
+ end
374
+ str << ">"
375
+ end
376
+ end
377
+
378
+ #
379
+ # A FasterCSV::Table is a two-dimensional data structure for representing CSV
380
+ # documents. Tables allow you to work with the data by row or column,
381
+ # manipulate the data, and even convert the results back to CSV, if needed.
382
+ #
383
+ # All tables returned by FasterCSV will be constructed from this class, if
384
+ # header row processing is activated.
385
+ #
386
+ class Table
387
+ #
388
+ # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
389
+ # to be FasterCSV::Row objects. All rows are assumed to have the same
390
+ # headers.
391
+ #
392
+ # A FasterCSV::Table object supports the following Array methods through
393
+ # delegation:
394
+ #
395
+ # * empty?()
396
+ # * length()
397
+ # * size()
398
+ #
399
+ def initialize(array_of_rows)
400
+ @table = array_of_rows
401
+ @mode = :col_or_row
402
+ end
403
+
404
+ # The current access mode for indexing and iteration.
405
+ attr_reader :mode
406
+
407
+ # Internal data format used to compare equality.
408
+ attr_reader :table
409
+ protected :table
410
+
411
+ ### Array Delegation ###
412
+
413
+ extend Forwardable
414
+ def_delegators :@table, :empty?, :length, :size
415
+
416
+ #
417
+ # Returns a duplicate table object, in column mode. This is handy for
418
+ # chaining in a single call without changing the table mode, but be aware
419
+ # that this method can consume a fair amount of memory for bigger data sets.
420
+ #
421
+ # This method returns the duplicate table for chaining. Don't chain
422
+ # destructive methods (like []=()) this way though, since you are working
423
+ # with a duplicate.
424
+ #
425
+ def by_col
426
+ self.class.new(@table.dup).by_col!
427
+ end
428
+
429
+ #
430
+ # Switches the mode of this table to column mode. All calls to indexing and
431
+ # iteration methods will work with columns until the mode is changed again.
432
+ #
433
+ # This method returns the table and is safe to chain.
434
+ #
435
+ def by_col!
436
+ @mode = :col
437
+
438
+ self
439
+ end
440
+
441
+ #
442
+ # Returns a duplicate table object, in mixed mode. This is handy for
443
+ # chaining in a single call without changing the table mode, but be aware
444
+ # that this method can consume a fair amount of memory for bigger data sets.
445
+ #
446
+ # This method returns the duplicate table for chaining. Don't chain
447
+ # destructive methods (like []=()) this way though, since you are working
448
+ # with a duplicate.
449
+ #
450
+ def by_col_or_row
451
+ self.class.new(@table.dup).by_col_or_row!
452
+ end
453
+
454
+ #
455
+ # Switches the mode of this table to mixed mode. All calls to indexing and
456
+ # iteration methods will use the default intelligent indexing system until
457
+ # the mode is changed again. In mixed mode an index is assumed to be a row
458
+ # reference while anything else is assumed to be column access by headers.
459
+ #
460
+ # This method returns the table and is safe to chain.
461
+ #
462
+ def by_col_or_row!
463
+ @mode = :col_or_row
464
+
465
+ self
466
+ end
467
+
468
+ #
469
+ # Returns a duplicate table object, in row mode. This is handy for chaining
470
+ # in a single call without changing the table mode, but be aware that this
471
+ # method can consume a fair amount of memory for bigger data sets.
472
+ #
473
+ # This method returns the duplicate table for chaining. Don't chain
474
+ # destructive methods (like []=()) this way though, since you are working
475
+ # with a duplicate.
476
+ #
477
+ def by_row
478
+ self.class.new(@table.dup).by_row!
479
+ end
480
+
481
+ #
482
+ # Switches the mode of this table to row mode. All calls to indexing and
483
+ # iteration methods will work with rows until the mode is changed again.
484
+ #
485
+ # This method returns the table and is safe to chain.
486
+ #
487
+ def by_row!
488
+ @mode = :row
489
+
490
+ self
491
+ end
492
+
493
+ #
494
+ # Returns the headers for the first row of this table (assumed to match all
495
+ # other rows). An empty Array is returned for empty tables.
496
+ #
497
+ def headers
498
+ if @table.empty?
499
+ Array.new
500
+ else
501
+ @table.first.headers
502
+ end
503
+ end
504
+
505
+ #
506
+ # In the default mixed mode, this method returns rows for index access and
507
+ # columns for header access. You can force the index association by first
508
+ # calling by_col!() or by_row!().
509
+ #
510
+ # Columns are returned as an Array of values. Altering that Array has no
511
+ # effect on the table.
512
+ #
513
+ def [](index_or_header)
514
+ if @mode == :row or # by index
515
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
516
+ @table[index_or_header]
517
+ else # by header
518
+ @table.map { |row| row[index_or_header] }
519
+ end
520
+ end
521
+
522
+ #
523
+ # In the default mixed mode, this method assigns rows for index access and
524
+ # columns for header access. You can force the index association by first
525
+ # calling by_col!() or by_row!().
526
+ #
527
+ # Rows may be set to an Array of values (which will inherit the table's
528
+ # headers()) or a FasterCSV::Row.
529
+ #
530
+ # Columns may be set to a single value, which is copied to each row of the
531
+ # column, or an Array of values. Arrays of values are assigned to rows top
532
+ # to bottom in row major order. Excess values are ignored and if the Array
533
+ # does not have a value for each row the extra rows will receive a +nil+.
534
+ #
535
+ # Assigning to an existing column or row clobbers the data. Assigning to
536
+ # new columns creates them at the right end of the table.
537
+ #
538
+ def []=(index_or_header, value)
539
+ if @mode == :row or # by index
540
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
541
+ if value.is_a? Array
542
+ @table[index_or_header] = Row.new(headers, value)
543
+ else
544
+ @table[index_or_header] = value
545
+ end
546
+ else # set column
547
+ if value.is_a? Array # multiple values
548
+ @table.each_with_index do |row, i|
549
+ if row.header_row?
550
+ row[index_or_header] = index_or_header
551
+ else
552
+ row[index_or_header] = value[i]
553
+ end
554
+ end
555
+ else # repeated value
556
+ @table.each do |row|
557
+ if row.header_row?
558
+ row[index_or_header] = index_or_header
559
+ else
560
+ row[index_or_header] = value
561
+ end
562
+ end
563
+ end
564
+ end
565
+ end
566
+
567
+ #
568
+ # The mixed mode default is to treat a list of indices as row access,
569
+ # returning the rows indicated. Anything else is considered columnar
570
+ # access. For columnar access, the return set has an Array for each row
571
+ # with the values indicated by the headers in each Array. You can force
572
+ # column or row mode using by_col!() or by_row!().
573
+ #
574
+ # You cannot mix column and row access.
575
+ #
576
+ def values_at(*indices_or_headers)
577
+ if @mode == :row or # by indices
578
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
579
+ index.is_a?(Integer) or
580
+ ( index.is_a?(Range) and
581
+ index.first.is_a?(Integer) and
582
+ index.last.is_a?(Integer) )
583
+ end )
584
+ @table.values_at(*indices_or_headers)
585
+ else # by headers
586
+ @table.map { |row| row.values_at(*indices_or_headers) }
587
+ end
588
+ end
589
+
590
+ #
591
+ # Adds a new row to the bottom end of this table. You can provide an Array,
592
+ # which will be converted to a FasterCSV::Row (inheriting the table's
593
+ # headers()), or a FasterCSV::Row.
594
+ #
595
+ # This method returns the table for chaining.
596
+ #
597
+ def <<(row_or_array)
598
+ if row_or_array.is_a? Array # append Array
599
+ @table << Row.new(headers, row_or_array)
600
+ else # append Row
601
+ @table << row_or_array
602
+ end
603
+
604
+ self # for chaining
605
+ end
606
+
607
+ #
608
+ # A shortcut for appending multiple rows. Equivalent to:
609
+ #
610
+ # rows.each { |row| self << row }
611
+ #
612
+ # This method returns the table for chaining.
613
+ #
614
+ def push(*rows)
615
+ rows.each { |row| self << row }
616
+
617
+ self # for chaining
618
+ end
619
+
620
+ #
621
+ # Removes and returns the indicated column or row. In the default mixed
622
+ # mode indices refer to rows and everything else is assumed to be a column
623
+ # header. Use by_col!() or by_row!() to force the lookup.
624
+ #
625
+ def delete(index_or_header)
626
+ if @mode == :row or # by index
627
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
628
+ @table.delete_at(index_or_header)
629
+ else # by header
630
+ @table.map { |row| row.delete(index_or_header).last }
631
+ end
632
+ end
633
+
634
+ #
635
+ # Removes any column or row for which the block returns +true+. In the
636
+ # default mixed mode or row mode, iteration is the standard row major
637
+ # walking of rows. In column mode, interation will +yield+ two element
638
+ # tuples containing the column name and an Array of values for that column.
639
+ #
640
+ # This method returns the table for chaining.
641
+ #
642
+ def delete_if(&block)
643
+ if @mode == :row or @mode == :col_or_row # by index
644
+ @table.delete_if(&block)
645
+ else # by header
646
+ to_delete = Array.new
647
+ headers.each_with_index do |header, i|
648
+ to_delete << header if block[[header, self[header]]]
649
+ end
650
+ to_delete.map { |header| delete(header) }
651
+ end
652
+
653
+ self # for chaining
654
+ end
655
+
656
+ include Enumerable
657
+
658
+ #
659
+ # In the default mixed mode or row mode, iteration is the standard row major
660
+ # walking of rows. In column mode, interation will +yield+ two element
661
+ # tuples containing the column name and an Array of values for that column.
662
+ #
663
+ # This method returns the table for chaining.
664
+ #
665
+ def each(&block)
666
+ if @mode == :col
667
+ headers.each { |header| block[[header, self[header]]] }
668
+ else
669
+ @table.each(&block)
670
+ end
671
+
672
+ self # for chaining
673
+ end
674
+
675
+ # Returns +true+ if all rows of this table ==() +other+'s rows.
676
+ def ==(other)
677
+ @table == other.table
678
+ end
679
+
680
+ #
681
+ # Returns the table as an Array of Arrays. Headers will be the first row,
682
+ # then all of the field rows will follow.
683
+ #
684
+ def to_a
685
+ @table.inject([headers]) do |array, row|
686
+ if row.header_row?
687
+ array
688
+ else
689
+ array + [row.fields]
690
+ end
691
+ end
692
+ end
693
+
694
+ #
695
+ # Returns the table as a complete CSV String. Headers will be listed first,
696
+ # then all of the field rows.
697
+ #
698
+ def to_csv(options = Hash.new)
699
+ @table.inject([headers.to_csv(options)]) do |rows, row|
700
+ if row.header_row?
701
+ rows
702
+ else
703
+ rows + [row.fields.to_csv(options)]
704
+ end
705
+ end.join
706
+ end
707
+ alias_method :to_s, :to_csv
708
+
709
+ def inspect
710
+ "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
711
+ end
712
+ end
713
+
714
+ # The error thrown when the parser encounters illegal CSV formatting.
715
+ class MalformedCSVError < RuntimeError; end
716
+
717
+ #
718
+ # A FieldInfo Struct contains details about a field's position in the data
719
+ # source it was read from. FasterCSV will pass this Struct to some blocks
720
+ # that make decisions based on field structure. See
721
+ # FasterCSV.convert_fields() for an example.
722
+ #
723
+ # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
724
+ # <b><tt>line</tt></b>:: The line of the data source this row is from.
725
+ # <b><tt>header</tt></b>:: The header for the column, when available.
726
+ #
727
+ FieldInfo = Struct.new(:index, :line, :header)
728
+
729
+ # A Regexp used to find and convert some common Date formats.
730
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
731
+ \d{4}-\d{2}-\d{2} )\z /x
732
+ # A Regexp used to find and convert some common DateTime formats.
733
+ DateTimeMatcher =
734
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
735
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
736
+ #
737
+ # This Hash holds the built-in converters of FasterCSV that can be accessed by
738
+ # name. You can select Converters with FasterCSV.convert() or through the
739
+ # +options+ Hash passed to FasterCSV::new().
740
+ #
741
+ # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
742
+ # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
743
+ # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
744
+ # and <tt>:float</tt>.
745
+ # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
746
+ # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
747
+ # <b><tt>:all</tt></b>:: All built-in converters. A combination of
748
+ # <tt>:date_time</tt> and <tt>:numeric</tt>.
749
+ #
750
+ # This Hash is intetionally left unfrozen and users should feel free to add
751
+ # values to it that can be accessed by all FasterCSV objects.
752
+ #
753
+ # To add a combo field, the value should be an Array of names. Combo fields
754
+ # can be nested with other combo fields.
755
+ #
756
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
757
+ :float => lambda { |f| Float(f) rescue f },
758
+ :numeric => [:integer, :float],
759
+ :date => lambda { |f|
760
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
761
+ },
762
+ :date_time => lambda { |f|
763
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
764
+ },
765
+ :all => [:date_time, :numeric] }
766
+
767
+ #
768
+ # This Hash holds the built-in header converters of FasterCSV that can be
769
+ # accessed by name. You can select HeaderConverters with
770
+ # FasterCSV.header_convert() or through the +options+ Hash passed to
771
+ # FasterCSV::new().
772
+ #
773
+ # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
774
+ # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
775
+ # replaced with underscores, non-word characters
776
+ # are dropped, and finally to_sym() is called.
777
+ #
778
+ # This Hash is intetionally left unfrozen and users should feel free to add
779
+ # values to it that can be accessed by all FasterCSV objects.
780
+ #
781
+ # To add a combo field, the value should be an Array of names. Combo fields
782
+ # can be nested with other combo fields.
783
+ #
784
+ HeaderConverters = {
785
+ :downcase => lambda { |h| h.downcase },
786
+ :symbol => lambda { |h|
787
+ h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
788
+ }
789
+ }
790
+
791
+ #
792
+ # The options used when no overrides are given by calling code. They are:
793
+ #
794
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
795
+ # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
796
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
797
+ # <b><tt>:converters</tt></b>:: +nil+
798
+ # <b><tt>:unconverted_fields</tt></b>:: +nil+
799
+ # <b><tt>:headers</tt></b>:: +false+
800
+ # <b><tt>:return_headers</tt></b>:: +false+
801
+ # <b><tt>:header_converters</tt></b>:: +nil+
802
+ # <b><tt>:skip_blanks</tt></b>:: +false+
803
+ # <b><tt>:force_quotes</tt></b>:: +false+
804
+ #
805
+ DEFAULT_OPTIONS = { :col_sep => ",",
806
+ :row_sep => :auto,
807
+ :quote_char => '"',
808
+ :converters => nil,
809
+ :unconverted_fields => nil,
810
+ :headers => false,
811
+ :return_headers => false,
812
+ :header_converters => nil,
813
+ :skip_blanks => false,
814
+ :force_quotes => false }.freeze
815
+
816
+ #
817
+ # This method will build a drop-in replacement for many of the standard CSV
818
+ # methods. It allows you to write code like:
819
+ #
820
+ # begin
821
+ # require "faster_csv"
822
+ # FasterCSV.build_csv_interface
823
+ # rescue LoadError
824
+ # require "csv"
825
+ # end
826
+ # # ... use CSV here ...
827
+ #
828
+ # This is not a complete interface with completely identical behavior.
829
+ # However, it is intended to be close enough that you won't notice the
830
+ # difference in most cases. CSV methods supported are:
831
+ #
832
+ # * foreach()
833
+ # * generate_line()
834
+ # * open()
835
+ # * parse()
836
+ # * parse_line()
837
+ # * readlines()
838
+ #
839
+ # Be warned that this interface is slower than vanilla FasterCSV due to the
840
+ # extra layer of method calls. Depending on usage, this can slow it down to
841
+ # near CSV speeds.
842
+ #
843
+ def self.build_csv_interface
844
+ Object.const_set(:CSV, Class.new).class_eval do
845
+ def self.foreach(path, rs = :auto, &block) # :nodoc:
846
+ FasterCSV.foreach(path, :row_sep => rs, &block)
847
+ end
848
+
849
+ def self.generate_line(row, fs = ",", rs = "") # :nodoc:
850
+ FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
851
+ end
852
+
853
+ def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
854
+ if block and mode.include? "r"
855
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
856
+ csv.each(&block)
857
+ end
858
+ else
859
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
860
+ end
861
+ end
862
+
863
+ def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
864
+ FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
865
+ end
866
+
867
+ def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
868
+ FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
869
+ end
870
+
871
+ def self.readlines(path, rs = :auto) # :nodoc:
872
+ FasterCSV.readlines(path, :row_sep => rs)
873
+ end
874
+ end
875
+ end
876
+
877
+ #
878
+ # This method allows you to serialize an Array of Ruby objects to a String or
879
+ # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
880
+ # useful for spreadsheet and database interaction.
881
+ #
882
+ # Out of the box, this method is intended to work with simple data objects or
883
+ # Structs. It will serialize a list of instance variables and/or
884
+ # Struct.members().
885
+ #
886
+ # If you need need more complicated serialization, you can control the process
887
+ # by adding methods to the class to be serialized.
888
+ #
889
+ # A class method csv_meta() is responsible for returning the first row of the
890
+ # document (as an Array). This row is considered to be a Hash of the form
891
+ # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
892
+ # key with a value of the stringified class name and FasterCSV::dump() will
893
+ # create this, if you do not define this method. This method is only called
894
+ # on the first object of the Array.
895
+ #
896
+ # The next method you can provide is an instance method called csv_headers().
897
+ # This method is expected to return the second line of the document (again as
898
+ # an Array), which is to be used to give each column a header. By default,
899
+ # FasterCSV::load() will set an instance variable if the field header starts
900
+ # with an @ character or call send() passing the header as the method name and
901
+ # the field value as an argument. This method is only called on the first
902
+ # object of the Array.
903
+ #
904
+ # Finally, you can provide an instance method called csv_dump(), which will
905
+ # be passed the headers. This should return an Array of fields that can be
906
+ # serialized for this object. This method is called once for every object in
907
+ # the Array.
908
+ #
909
+ # The +io+ parameter can be used to serialize to a File, and +options+ can be
910
+ # anything FasterCSV::new() accepts.
911
+ #
912
+ def self.dump(ary_of_objs, io = "", options = Hash.new)
913
+ obj_template = ary_of_objs.first
914
+
915
+ csv = FasterCSV.new(io, options)
916
+
917
+ # write meta information
918
+ begin
919
+ csv << obj_template.class.csv_meta
920
+ rescue NoMethodError
921
+ csv << [:class, obj_template.class]
922
+ end
923
+
924
+ # write headers
925
+ begin
926
+ headers = obj_template.csv_headers
927
+ rescue NoMethodError
928
+ headers = obj_template.instance_variables.sort
929
+ if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
930
+ headers += obj_template.members.map { |mem| "#{mem}=" }.sort
931
+ end
932
+ end
933
+ csv << headers
934
+
935
+ # serialize each object
936
+ ary_of_objs.each do |obj|
937
+ begin
938
+ csv << obj.csv_dump(headers)
939
+ rescue NoMethodError
940
+ csv << headers.map do |var|
941
+ if var[0] == ?@
942
+ obj.instance_variable_get(var)
943
+ else
944
+ obj[var[0..-2]]
945
+ end
946
+ end
947
+ end
948
+ end
949
+
950
+ if io.is_a? String
951
+ csv.string
952
+ else
953
+ csv.close
954
+ end
955
+ end
956
+
957
+ #
958
+ # :call-seq:
959
+ # filter( options = Hash.new ) { |row| ... }
960
+ # filter( input, options = Hash.new ) { |row| ... }
961
+ # filter( input, output, options = Hash.new ) { |row| ... }
962
+ #
963
+ # This method is a convenience for building Unix-like filters for CSV data.
964
+ # Each row is yielded to the provided block which can alter it as needed.
965
+ # After the block returns, the row is appended to +output+ altered or not.
966
+ #
967
+ # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
968
+ # (generally String or IO objects). If not given, they default to
969
+ # <tt>ARGF</tt> and <tt>$stdout</tt>.
970
+ #
971
+ # The +options+ parameter is also filtered down to FasterCSV::new() after some
972
+ # clever key parsing. Any key beginning with <tt>:in_</tt> or
973
+ # <tt>:input_</tt> will have that leading identifier stripped and will only
974
+ # be used in the +options+ Hash for the +input+ object. Keys starting with
975
+ # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
976
+ # are assigned to both objects.
977
+ #
978
+ # The <tt>:output_row_sep</tt> +option+ defaults to
979
+ # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
980
+ #
981
+ def self.filter(*args)
982
+ # parse options for input, output, or both
983
+ in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
984
+ if args.last.is_a? Hash
985
+ args.pop.each do |key, value|
986
+ case key.to_s
987
+ when /\Ain(?:put)?_(.+)\Z/
988
+ in_options[$1.to_sym] = value
989
+ when /\Aout(?:put)?_(.+)\Z/
990
+ out_options[$1.to_sym] = value
991
+ else
992
+ in_options[key] = value
993
+ out_options[key] = value
994
+ end
995
+ end
996
+ end
997
+ # build input and output wrappers
998
+ input = FasterCSV.new(args.shift || ARGF, in_options)
999
+ output = FasterCSV.new(args.shift || $stdout, out_options)
1000
+
1001
+ # read, yield, write
1002
+ input.each do |row|
1003
+ yield row
1004
+ output << row
1005
+ end
1006
+ end
1007
+
1008
+ #
1009
+ # This method is intended as the primary interface for reading CSV files. You
1010
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
1011
+ # file will be passed to the provided +block+ in turn.
1012
+ #
1013
+ # The +options+ parameter can be anything FasterCSV::new() understands.
1014
+ #
1015
+ def self.foreach(path, options = Hash.new, &block)
1016
+ open(path, "rb", options) do |csv|
1017
+ csv.each(&block)
1018
+ end
1019
+ end
1020
+
1021
+ #
1022
+ # :call-seq:
1023
+ # generate( str, options = Hash.new ) { |faster_csv| ... }
1024
+ # generate( options = Hash.new ) { |faster_csv| ... }
1025
+ #
1026
+ # This method wraps a String you provide, or an empty default String, in a
1027
+ # FasterCSV object which is passed to the provided block. You can use the
1028
+ # block to append CSV rows to the String and when the block exits, the
1029
+ # final String will be returned.
1030
+ #
1031
+ # Note that a passed String *is* modfied by this method. Call dup() before
1032
+ # passing if you need a new String.
1033
+ #
1034
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1035
+ #
1036
+ def self.generate(*args)
1037
+ # add a default empty String, if none was given
1038
+ if args.first.is_a? String
1039
+ io = StringIO.new(args.shift)
1040
+ io.seek(0, IO::SEEK_END)
1041
+ args.unshift(io)
1042
+ else
1043
+ args.unshift("")
1044
+ end
1045
+ faster_csv = new(*args) # wrap
1046
+ yield faster_csv # yield for appending
1047
+ faster_csv.string # return final String
1048
+ end
1049
+
1050
+ #
1051
+ # This method is a shortcut for converting a single row (Array) into a CSV
1052
+ # String.
1053
+ #
1054
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1055
+ #
1056
+ # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1057
+ # (<tt>$/</tt>) when calling this method.
1058
+ #
1059
+ def self.generate_line(row, options = Hash.new)
1060
+ options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1061
+ (new("", options) << row).string
1062
+ end
1063
+
1064
+ #
1065
+ # This method will return a FasterCSV instance, just like FasterCSV::new(),
1066
+ # but the instance will be cached and returned for all future calls to this
1067
+ # method for the same +data+ object (tested by Object#object_id()) with the
1068
+ # same +options+.
1069
+ #
1070
+ # If a block is given, the instance is passed to the block and the return
1071
+ # value becomes the return value of the block.
1072
+ #
1073
+ def self.instance(data = $stdout, options = Hash.new)
1074
+ # create a _signature_ for this method call, data object and options
1075
+ sig = [data.object_id] +
1076
+ options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1077
+
1078
+ # fetch or create the instance for this signature
1079
+ @@instances ||= Hash.new
1080
+ instance = (@@instances[sig] ||= new(data, options))
1081
+
1082
+ if block_given?
1083
+ yield instance # run block, if given, returning result
1084
+ else
1085
+ instance # or return the instance
1086
+ end
1087
+ end
1088
+
1089
+ #
1090
+ # This method is the reading counterpart to FasterCSV::dump(). See that
1091
+ # method for a detailed description of the process.
1092
+ #
1093
+ # You can customize loading by adding a class method called csv_load() which
1094
+ # will be passed a Hash of meta information, an Array of headers, and an Array
1095
+ # of fields for the object the method is expected to return.
1096
+ #
1097
+ # Remember that all fields will be Strings after this load. If you need
1098
+ # something else, use +options+ to setup converters or provide a custom
1099
+ # csv_load() implementation.
1100
+ #
1101
+ def self.load(io_or_str, options = Hash.new)
1102
+ csv = FasterCSV.new(io_or_str, options)
1103
+
1104
+ # load meta information
1105
+ meta = Hash[*csv.shift]
1106
+ cls = meta["class"].split("::").inject(Object) do |c, const|
1107
+ c.const_get(const)
1108
+ end
1109
+
1110
+ # load headers
1111
+ headers = csv.shift
1112
+
1113
+ # unserialize each object stored in the file
1114
+ results = csv.inject(Array.new) do |all, row|
1115
+ begin
1116
+ obj = cls.csv_load(meta, headers, row)
1117
+ rescue NoMethodError
1118
+ obj = cls.allocate
1119
+ headers.zip(row) do |name, value|
1120
+ if name[0] == ?@
1121
+ obj.instance_variable_set(name, value)
1122
+ else
1123
+ obj.send(name, value)
1124
+ end
1125
+ end
1126
+ end
1127
+ all << obj
1128
+ end
1129
+
1130
+ csv.close unless io_or_str.is_a? String
1131
+
1132
+ results
1133
+ end
1134
+
1135
+ #
1136
+ # :call-seq:
1137
+ # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1138
+ # open( filename, mode="rb", options = Hash.new )
1139
+ #
1140
+ # This method opens an IO object, and wraps that with FasterCSV. This is
1141
+ # intended as the primary interface for writing a CSV file.
1142
+ #
1143
+ # You may pass any +args+ Ruby's open() understands followed by an optional
1144
+ # Hash containing any +options+ FasterCSV::new() understands.
1145
+ #
1146
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
1147
+ # object to a provided block and close it when the block termminates, or it
1148
+ # will return the FasterCSV object when no block is provided. (*Note*: This
1149
+ # is different from the standard CSV library which passes rows to the block.
1150
+ # Use FasterCSV::foreach() for that behavior.)
1151
+ #
1152
+ # An opened FasterCSV object will delegate to many IO methods, for
1153
+ # convenience. You may call:
1154
+ #
1155
+ # * binmode()
1156
+ # * close()
1157
+ # * close_read()
1158
+ # * close_write()
1159
+ # * closed?()
1160
+ # * eof()
1161
+ # * eof?()
1162
+ # * fcntl()
1163
+ # * fileno()
1164
+ # * flush()
1165
+ # * fsync()
1166
+ # * ioctl()
1167
+ # * isatty()
1168
+ # * pid()
1169
+ # * pos()
1170
+ # * reopen()
1171
+ # * seek()
1172
+ # * stat()
1173
+ # * sync()
1174
+ # * sync=()
1175
+ # * tell()
1176
+ # * to_i()
1177
+ # * to_io()
1178
+ # * tty?()
1179
+ #
1180
+ def self.open(*args)
1181
+ # find the +options+ Hash
1182
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
1183
+ # default to a binary open mode
1184
+ args << "rb" if args.size == 1
1185
+ # wrap a File opened with the remaining +args+
1186
+ csv = new(File.open(*args), options)
1187
+
1188
+ # handle blocks like Ruby's open(), not like the CSV library
1189
+ if block_given?
1190
+ begin
1191
+ yield csv
1192
+ ensure
1193
+ csv.close
1194
+ end
1195
+ else
1196
+ csv
1197
+ end
1198
+ end
1199
+
1200
+ #
1201
+ # :call-seq:
1202
+ # parse( str, options = Hash.new ) { |row| ... }
1203
+ # parse( str, options = Hash.new )
1204
+ #
1205
+ # This method can be used to easily parse CSV out of a String. You may either
1206
+ # provide a +block+ which will be called with each row of the String in turn,
1207
+ # or just use the returned Array of Arrays (when no +block+ is given).
1208
+ #
1209
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
1210
+ # anything FasterCSV::new() understands.
1211
+ #
1212
+ def self.parse(*args, &block)
1213
+ csv = new(*args)
1214
+ if block.nil? # slurp contents, if no block is given
1215
+ begin
1216
+ csv.read
1217
+ ensure
1218
+ csv.close
1219
+ end
1220
+ else # or pass each row to a provided block
1221
+ csv.each(&block)
1222
+ end
1223
+ end
1224
+
1225
+ #
1226
+ # This method is a shortcut for converting a single line of a CSV String into
1227
+ # a into an Array. Note that if +line+ contains multiple rows, anything
1228
+ # beyond the first row is ignored.
1229
+ #
1230
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1231
+ #
1232
+ def self.parse_line(line, options = Hash.new)
1233
+ new(line, options).shift
1234
+ end
1235
+
1236
+ #
1237
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1238
+ # file and any +options+ FasterCSV::new() understands.
1239
+ #
1240
+ def self.read(path, options = Hash.new)
1241
+ open(path, "rb", options) { |csv| csv.read }
1242
+ end
1243
+
1244
+ # Alias for FasterCSV::read().
1245
+ def self.readlines(*args)
1246
+ read(*args)
1247
+ end
1248
+
1249
+ #
1250
+ # A shortcut for:
1251
+ #
1252
+ # FasterCSV.read( path, { :headers => true,
1253
+ # :converters => :numeric,
1254
+ # :header_converters => :symbol }.merge(options) )
1255
+ #
1256
+ def self.table(path, options = Hash.new)
1257
+ read( path, { :headers => true,
1258
+ :converters => :numeric,
1259
+ :header_converters => :symbol }.merge(options) )
1260
+ end
1261
+
1262
+ #
1263
+ # This constructor will wrap either a String or IO object passed in +data+ for
1264
+ # reading and/or writing. In addition to the FasterCSV instance methods,
1265
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
1266
+ # list.) If you pass a String for +data+, you can later retrieve it (after
1267
+ # writing to it, for example) with FasterCSV.string().
1268
+ #
1269
+ # Note that a wrapped String will be positioned at at the beginning (for
1270
+ # reading). If you want it at the end (for writing), use
1271
+ # FasterCSV::generate(). If you want any other positioning, pass a preset
1272
+ # StringIO object instead.
1273
+ #
1274
+ # You may set any reading and/or writing preferences in the +options+ Hash.
1275
+ # Available options are:
1276
+ #
1277
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1278
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1279
+ # row. This can be set to the special
1280
+ # <tt>:auto</tt> setting, which requests
1281
+ # that FasterCSV automatically discover
1282
+ # this from the data. Auto-discovery
1283
+ # reads ahead in the data looking for
1284
+ # the next <tt>"\r\n"</tt>,
1285
+ # <tt>"\n"</tt>, or <tt>"\r"</tt>
1286
+ # sequence. A sequence will be selected
1287
+ # even if it occurs in a quoted field,
1288
+ # assuming that you would have the same
1289
+ # line endings there. If none of those
1290
+ # sequences is found, +data+ is
1291
+ # <tt>ARGF</tt>, <tt>STDIN</tt>,
1292
+ # <tt>STDOUT</tt>, or <tt>STDERR</tt>,
1293
+ # or the stream is only available for
1294
+ # output, the default
1295
+ # <tt>$INPUT_RECORD_SEPARATOR</tt>
1296
+ # (<tt>$/</tt>) is used. Obviously,
1297
+ # discovery takes a little time. Set
1298
+ # manually if speed is important. Also
1299
+ # note that IO objects should be opened
1300
+ # in binary mode on Windows if this
1301
+ # feature will be used as the
1302
+ # line-ending translation can cause
1303
+ # problems with resetting the document
1304
+ # position to where it was before the
1305
+ # read ahead.
1306
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1307
+ # This has to be a single character
1308
+ # String. This is useful for
1309
+ # application that incorrectly use
1310
+ # <tt>'</tt> as the quote character
1311
+ # instead of the correct <tt>"</tt>.
1312
+ # FasterCSV will always consider a
1313
+ # double sequence this character to be
1314
+ # an escaped quote.
1315
+ # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1316
+ # file. Defaults to your <tt>$KDOCE</tt>
1317
+ # setting. Valid values: <tt>`n’</tt> or
1318
+ # <tt>`N’</tt> for none, <tt>`e’</tt> or
1319
+ # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1320
+ # <tt>`S’</tt> for SJIS, and
1321
+ # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1322
+ # (see Regexp.new()).
1323
+ # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1324
+ # read ahead looking for the closing
1325
+ # quote for a field. (In truth, it
1326
+ # reads to the first line ending beyond
1327
+ # this size.) If a quote cannot be
1328
+ # found within the limit FasterCSV will
1329
+ # raise a MalformedCSVError, assuming
1330
+ # the data is faulty. You can use this
1331
+ # limit to prevent what are effectively
1332
+ # DoS attacks on the parser. However,
1333
+ # this limit can cause a legitimate
1334
+ # parse to fail and thus is set to
1335
+ # +nil+, or off, by default.
1336
+ # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1337
+ # Hash and/or lambdas that handle custom
1338
+ # conversion. A single converter
1339
+ # doesn't have to be in an Array.
1340
+ # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1341
+ # unconverted_fields() method will be
1342
+ # added to all returned rows (Array or
1343
+ # FasterCSV::Row) that will return the
1344
+ # fields as they were before convertion.
1345
+ # Note that <tt>:headers</tt> supplied
1346
+ # by Array or String were not fields of
1347
+ # the document and thus will have an
1348
+ # empty Array attached.
1349
+ # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1350
+ # +true+, the initial row of the CSV
1351
+ # file will be treated as a row of
1352
+ # headers. If set to an Array, the
1353
+ # contents will be used as the headers.
1354
+ # If set to a String, the String is run
1355
+ # through a call of
1356
+ # FasterCSV::parse_line() with the same
1357
+ # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1358
+ # and <tt>:quote_char</tt> as this
1359
+ # instance to produce an Array of
1360
+ # headers. This setting causes
1361
+ # FasterCSV.shift() to return rows as
1362
+ # FasterCSV::Row objects instead of
1363
+ # Arrays and FasterCSV.read() to return
1364
+ # FasterCSV::Table objects instead of
1365
+ # an Array of Arrays.
1366
+ # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1367
+ # swallowed. If set to +true+, header
1368
+ # rows are returned in a FasterCSV::Row
1369
+ # object with identical headers and
1370
+ # fields (save that the fields do not go
1371
+ # through the converters).
1372
+ # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1373
+ # set, a header row will be added to the
1374
+ # output.
1375
+ # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1376
+ # <tt>:converters</tt> save that the
1377
+ # conversions are only made to header
1378
+ # rows.
1379
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1380
+ # will skip over any rows with no
1381
+ # content.
1382
+ # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1383
+ # will quote all CSV fields it creates.
1384
+ #
1385
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1386
+ #
1387
+ # Options cannot be overriden in the instance methods for performance reasons,
1388
+ # so be sure to set what you want here.
1389
+ #
1390
+ def initialize(data, options = Hash.new)
1391
+ # build the options for this read/write
1392
+ options = DEFAULT_OPTIONS.merge(options)
1393
+
1394
+ # create the IO object we will read from
1395
+ @io = if data.is_a? String then StringIO.new(data) else data end
1396
+
1397
+ init_separators(options)
1398
+ init_parsers(options)
1399
+ init_converters(options)
1400
+ init_headers(options)
1401
+
1402
+ unless options.empty?
1403
+ raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1404
+ end
1405
+
1406
+ # track our own lineno since IO gets confused about line-ends is CSV fields
1407
+ @lineno = 0
1408
+ end
1409
+
1410
+ #
1411
+ # The line number of the last row read from this file. Fields with nested
1412
+ # line-end characters will not affect this count.
1413
+ #
1414
+ attr_reader :lineno
1415
+
1416
+ ### IO and StringIO Delegation ###
1417
+
1418
+ extend Forwardable
1419
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1420
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1421
+ :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1422
+ :sync, :sync=, :tell, :to_i, :to_io, :tty?
1423
+
1424
+ # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1425
+ def rewind
1426
+ @headers = nil
1427
+ @lineno = 0
1428
+
1429
+ @io.rewind
1430
+ end
1431
+
1432
+ ### End Delegation ###
1433
+
1434
+ #
1435
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1436
+ # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1437
+ # FasterCSV::Row is passed, only the row's fields() are appended to the
1438
+ # output.
1439
+ #
1440
+ # The data source must be open for writing.
1441
+ #
1442
+ def <<(row)
1443
+ # make sure headers have been assigned
1444
+ if header_row? and [Array, String].include? @use_headers.class
1445
+ parse_headers # won't read data for Array or String
1446
+ self << @headers if @write_headers
1447
+ end
1448
+
1449
+ # Handle FasterCSV::Row objects and Hashes
1450
+ row = case row
1451
+ when self.class::Row then row.fields
1452
+ when Hash then @headers.map { |header| row[header] }
1453
+ else row
1454
+ end
1455
+
1456
+ was_header_row,@headers = true,row if header_row?
1457
+ @lineno += 1
1458
+
1459
+ @io << row.map(&@quote).join(@col_sep) + @row_sep if (!was_header_row || @write_headers) # quote and separate
1460
+
1461
+ self # for chaining
1462
+ end
1463
+ alias_method :add_row, :<<
1464
+ alias_method :puts, :<<
1465
+
1466
+ #
1467
+ # :call-seq:
1468
+ # convert( name )
1469
+ # convert { |field| ... }
1470
+ # convert { |field, field_info| ... }
1471
+ #
1472
+ # You can use this method to install a FasterCSV::Converters built-in, or
1473
+ # provide a block that handles a custom conversion.
1474
+ #
1475
+ # If you provide a block that takes one argument, it will be passed the field
1476
+ # and is expected to return the converted value or the field itself. If your
1477
+ # block takes two arguments, it will also be passed a FieldInfo Struct,
1478
+ # containing details about the field. Again, the block should return a
1479
+ # converted field or the field itself.
1480
+ #
1481
+ def convert(name = nil, &converter)
1482
+ add_converter(:converters, self.class::Converters, name, &converter)
1483
+ end
1484
+
1485
+ #
1486
+ # :call-seq:
1487
+ # header_convert( name )
1488
+ # header_convert { |field| ... }
1489
+ # header_convert { |field, field_info| ... }
1490
+ #
1491
+ # Identical to FasterCSV.convert(), but for header rows.
1492
+ #
1493
+ # Note that this method must be called before header rows are read to have any
1494
+ # effect.
1495
+ #
1496
+ def header_convert(name = nil, &converter)
1497
+ add_converter( :header_converters,
1498
+ self.class::HeaderConverters,
1499
+ name,
1500
+ &converter )
1501
+ end
1502
+
1503
+ include Enumerable
1504
+
1505
+ #
1506
+ # Yields each row of the data source in turn.
1507
+ #
1508
+ # Support for Enumerable.
1509
+ #
1510
+ # The data source must be open for reading.
1511
+ #
1512
+ def each
1513
+ while row = shift
1514
+ yield row
1515
+ end
1516
+ end
1517
+
1518
+ #
1519
+ # Slurps the remaining rows and returns an Array of Arrays.
1520
+ #
1521
+ # The data source must be open for reading.
1522
+ #
1523
+ def read
1524
+ rows = to_a
1525
+ if @use_headers
1526
+ Table.new(rows)
1527
+ else
1528
+ rows
1529
+ end
1530
+ end
1531
+ alias_method :readlines, :read
1532
+
1533
+ # Returns +true+ if the next row read will be a header row.
1534
+ def header_row?
1535
+ @use_headers and @headers.nil?
1536
+ end
1537
+
1538
+ #
1539
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
1540
+ # from the data source, parsed and returned as an Array of fields (if header
1541
+ # rows are not used) or a FasterCSV::Row (when header rows are used).
1542
+ #
1543
+ # The data source must be open for reading.
1544
+ #
1545
+ def shift
1546
+ #########################################################################
1547
+ ### This method is purposefully kept a bit long as simple conditional ###
1548
+ ### checks are faster than numerous (expensive) method calls. ###
1549
+ #########################################################################
1550
+
1551
+ # handle headers not based on document content
1552
+ if header_row? and @return_headers and
1553
+ [Array, String].include? @use_headers.class
1554
+ if @unconverted_fields
1555
+ return add_unconverted_fields(parse_headers, Array.new)
1556
+ else
1557
+ return parse_headers
1558
+ end
1559
+ end
1560
+
1561
+ # begin with a blank line, so we can always add to it
1562
+ line = ""
1563
+
1564
+ #
1565
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1566
+ # because of \r and/or \n characters embedded in quoted fields
1567
+ #
1568
+ loop do
1569
+ # add another read to the line
1570
+ line += @io.gets(@row_sep) rescue return nil
1571
+ # copy the line so we can chop it up in parsing
1572
+ parse = line.dup
1573
+ parse.sub!(@parsers[:line_end], "")
1574
+
1575
+ #
1576
+ # I believe a blank line should be an <tt>Array.new</tt>, not
1577
+ # CSV's <tt>[nil]</tt>
1578
+ #
1579
+ if parse.empty?
1580
+ @lineno += 1
1581
+ if @skip_blanks
1582
+ line = ""
1583
+ next
1584
+ elsif @unconverted_fields
1585
+ return add_unconverted_fields(Array.new, Array.new)
1586
+ elsif @use_headers
1587
+ return FasterCSV::Row.new(Array.new, Array.new)
1588
+ else
1589
+ return Array.new
1590
+ end
1591
+ end
1592
+
1593
+ csv = []
1594
+ #
1595
+ # then parse the main fields with a mix of String#split and regular
1596
+ # expressions
1597
+ #
1598
+ current_field = ''
1599
+ field_quotes = 0
1600
+ quote_and_newlines = @quote_char + "\r\n"
1601
+ parse.split(@col_sep, -1).each do |match|
1602
+ if current_field.empty? && match.count(quote_and_newlines).zero?
1603
+ csv << (match.empty? ? nil : match)
1604
+ elsif(current_field.empty? ? match[0] : current_field[0]) == @quote_char[0]
1605
+ current_field << match
1606
+ field_quotes += match.count @quote_char
1607
+ if field_quotes % 2 == 0
1608
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1609
+ raise MalformedCSVError unless in_quotes
1610
+ current_field = in_quotes
1611
+ current_field.gsub! @quote_char * 2, @quote_char # unescape contents
1612
+ csv << current_field
1613
+ current_field = ''
1614
+ field_quotes = 0
1615
+ else # we found a quoted field that spans multiple lines
1616
+ current_field << @col_sep
1617
+ end
1618
+ elsif match.count("\r\n").zero?
1619
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1620
+ else
1621
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1622
+ "\\r or \\n (line #{lineno + 1})."
1623
+ end
1624
+ end
1625
+
1626
+ # if parse is empty?(), we found all the fields on the line...
1627
+ if field_quotes % 2 == 0
1628
+ @lineno += 1
1629
+
1630
+ # save fields unconverted fields, if needed...
1631
+ unconverted = csv.dup if @unconverted_fields
1632
+
1633
+ # convert fields, if needed...
1634
+ csv = convert_fields(csv) unless @use_headers or @converters.empty?
1635
+ # parse out header rows and handle FasterCSV::Row conversions...
1636
+ csv = parse_headers(csv) if @use_headers
1637
+
1638
+ # inject unconverted fields and accessor, if requested...
1639
+ if @unconverted_fields and not csv.respond_to? :unconverted_fields
1640
+ add_unconverted_fields(csv, unconverted)
1641
+ end
1642
+
1643
+ # return the results
1644
+ break csv
1645
+ end
1646
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1647
+ if @io.eof?
1648
+ raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1649
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1650
+ raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1651
+ end
1652
+ # otherwise, we need to loop and pull some more data to complete the row
1653
+ end
1654
+ end
1655
+ alias_method :gets, :shift
1656
+ alias_method :readline, :shift
1657
+
1658
+ # Returns a simplified description of the key FasterCSV attributes.
1659
+ def inspect
1660
+ str = "<##{self.class} io_type:"
1661
+ # show type of wrapped IO
1662
+ if @io == $stdout then str << "$stdout"
1663
+ elsif @io == $stdin then str << "$stdin"
1664
+ elsif @io == $stderr then str << "$stderr"
1665
+ else str << @io.class.to_s
1666
+ end
1667
+ # show IO.path(), if available
1668
+ if @io.respond_to?(:path) and (p = @io.path)
1669
+ str << " io_path:#{p.inspect}"
1670
+ end
1671
+ # show other attributes
1672
+ %w[ lineno col_sep row_sep
1673
+ quote_char skip_blanks encoding ].each do |attr_name|
1674
+ if a = instance_variable_get("@#{attr_name}")
1675
+ str << " #{attr_name}:#{a.inspect}"
1676
+ end
1677
+ end
1678
+ if @use_headers
1679
+ str << " headers:#{(@headers || true).inspect}"
1680
+ end
1681
+ str << ">"
1682
+ end
1683
+
1684
+ private
1685
+
1686
+ #
1687
+ # Stores the indicated separators for later use.
1688
+ #
1689
+ # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1690
+ # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1691
+ # +STDERR+ and any stream open for output only with a default
1692
+ # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1693
+ #
1694
+ # This method also establishes the quoting rules used for CSV output.
1695
+ #
1696
+ def init_separators(options)
1697
+ # store the selected separators
1698
+ @col_sep = options.delete(:col_sep)
1699
+ @row_sep = options.delete(:row_sep)
1700
+ @quote_char = options.delete(:quote_char)
1701
+
1702
+ if @quote_char.length != 1
1703
+ raise ArgumentError, ":quote_char has to be a single character String"
1704
+ end
1705
+
1706
+ # automatically discover row separator when requested
1707
+ if @row_sep == :auto
1708
+ if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
1709
+ (defined?(Zlib) and @io.class == Zlib::GzipWriter)
1710
+ @row_sep = $INPUT_RECORD_SEPARATOR
1711
+ else
1712
+ begin
1713
+ saved_pos = @io.pos # remember where we were
1714
+ while @row_sep == :auto
1715
+ #
1716
+ # if we run out of data, it's probably a single line
1717
+ # (use a sensible default)
1718
+ #
1719
+ if @io.eof?
1720
+ @row_sep = $INPUT_RECORD_SEPARATOR
1721
+ break
1722
+ end
1723
+
1724
+ # read ahead a bit
1725
+ sample = @io.read(1024)
1726
+ sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1727
+
1728
+ # try to find a standard separator
1729
+ if sample =~ /\r\n?|\n/
1730
+ @row_sep = $&
1731
+ break
1732
+ end
1733
+ end
1734
+ # tricky seek() clone to work around GzipReader's lack of seek()
1735
+ @io.rewind
1736
+ # reset back to the remembered position
1737
+ while saved_pos > 1024 # avoid loading a lot of data into memory
1738
+ @io.read(1024)
1739
+ saved_pos -= 1024
1740
+ end
1741
+ @io.read(saved_pos) if saved_pos.nonzero?
1742
+ rescue IOError # stream not opened for reading
1743
+ @row_sep = $INPUT_RECORD_SEPARATOR
1744
+ end
1745
+ end
1746
+ end
1747
+
1748
+ # establish quoting rules
1749
+ do_quote = lambda do |field|
1750
+ @quote_char +
1751
+ String(field).gsub(@quote_char, @quote_char * 2) +
1752
+ @quote_char
1753
+ end
1754
+ @quote = if options.delete(:force_quotes)
1755
+ do_quote
1756
+ else
1757
+ lambda do |field|
1758
+ if field.nil? # represent +nil+ fields as empty unquoted fields
1759
+ ""
1760
+ else
1761
+ field = String(field) # Stringify fields
1762
+ # represent empty fields as empty quoted fields
1763
+ if field.empty? or
1764
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1765
+ do_quote.call(field)
1766
+ else
1767
+ field # unquoted field
1768
+ end
1769
+ end
1770
+ end
1771
+ end
1772
+ end
1773
+
1774
+ # Pre-compiles parsers and stores them by name for access during reads.
1775
+ def init_parsers(options)
1776
+ # store the parser behaviors
1777
+ @skip_blanks = options.delete(:skip_blanks)
1778
+ @encoding = options.delete(:encoding) # nil will use $KCODE
1779
+ @field_size_limit = options.delete(:field_size_limit)
1780
+
1781
+ # prebuild Regexps for faster parsing
1782
+ esc_col_sep = Regexp.escape(@col_sep)
1783
+ esc_row_sep = Regexp.escape(@row_sep)
1784
+ esc_quote = Regexp.escape(@quote_char)
1785
+ @parsers = {
1786
+ :any_field => Regexp.new("[^#{esc_col_sep}]+", Regexp::MULTILINE, @encoding),
1787
+ :quoted_field => Regexp.new("^#{esc_quote}(.*)#{esc_quote}$", Regexp::MULTILINE, @encoding),
1788
+ # safer than chomp!()
1789
+ :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1790
+ }
1791
+ end
1792
+
1793
+ #
1794
+ # Loads any converters requested during construction.
1795
+ #
1796
+ # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1797
+ # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1798
+ # are added instead.
1799
+ #
1800
+ # The <tt>:unconverted_fields</tt> option is also actived for
1801
+ # <tt>:converters</tt> calls, if requested.
1802
+ #
1803
+ def init_converters(options, field_name = :converters)
1804
+ if field_name == :converters
1805
+ @unconverted_fields = options.delete(:unconverted_fields)
1806
+ end
1807
+
1808
+ instance_variable_set("@#{field_name}", Array.new)
1809
+
1810
+ # find the correct method to add the coverters
1811
+ convert = method(field_name.to_s.sub(/ers\Z/, ""))
1812
+
1813
+ # load converters
1814
+ unless options[field_name].nil?
1815
+ # allow a single converter not wrapped in an Array
1816
+ unless options[field_name].is_a? Array
1817
+ options[field_name] = [options[field_name]]
1818
+ end
1819
+ # load each converter...
1820
+ options[field_name].each do |converter|
1821
+ if converter.is_a? Proc # custom code block
1822
+ convert.call(&converter)
1823
+ else # by name
1824
+ convert.call(converter)
1825
+ end
1826
+ end
1827
+ end
1828
+
1829
+ options.delete(field_name)
1830
+ end
1831
+
1832
+ # Stores header row settings and loads header converters, if needed.
1833
+ def init_headers(options)
1834
+ @use_headers = options.delete(:headers)
1835
+ @return_headers = options.delete(:return_headers)
1836
+ @write_headers = options.delete(:write_headers)
1837
+
1838
+ # headers must be delayed until shift(), in case they need a row of content
1839
+ @headers = nil
1840
+
1841
+ init_converters(options, :header_converters)
1842
+ end
1843
+
1844
+ #
1845
+ # The actual work method for adding converters, used by both
1846
+ # FasterCSV.convert() and FasterCSV.header_convert().
1847
+ #
1848
+ # This method requires the +var_name+ of the instance variable to place the
1849
+ # converters in, the +const+ Hash to lookup named converters in, and the
1850
+ # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1851
+ # methods.
1852
+ #
1853
+ def add_converter(var_name, const, name = nil, &converter)
1854
+ if name.nil? # custom converter
1855
+ instance_variable_get("@#{var_name}") << converter
1856
+ else # named converter
1857
+ combo = const[name]
1858
+ case combo
1859
+ when Array # combo converter
1860
+ combo.each do |converter_name|
1861
+ add_converter(var_name, const, converter_name)
1862
+ end
1863
+ else # individual named converter
1864
+ instance_variable_get("@#{var_name}") << combo
1865
+ end
1866
+ end
1867
+ end
1868
+
1869
+ #
1870
+ # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1871
+ # if +headers+ is passed as +true+, returning the converted field set. Any
1872
+ # converter that changes the field into something other than a String halts
1873
+ # the pipeline of conversion for that field. This is primarily an efficiency
1874
+ # shortcut.
1875
+ #
1876
+ def convert_fields(fields, headers = false)
1877
+ # see if we are converting headers or fields
1878
+ converters = headers ? @header_converters : @converters
1879
+
1880
+ fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1881
+ converters.each do |converter|
1882
+ field = if converter.arity == 1 # straight field converter
1883
+ converter[field]
1884
+ else # FieldInfo converter
1885
+ header = @use_headers && !headers ? @headers[index] : nil
1886
+ converter[field, FieldInfo.new(index, lineno, header)]
1887
+ end
1888
+ break unless field.is_a? String # short-curcuit pipeline for speed
1889
+ end
1890
+ field # return final state of each field, converted or original
1891
+ end
1892
+ end
1893
+
1894
+ #
1895
+ # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1896
+ # rows are also dealt with here, either by returning a FasterCSV::Row with
1897
+ # identical headers and fields (save that the fields do not go through the
1898
+ # converters) or by reading past them to return a field row. Headers are also
1899
+ # saved in <tt>@headers</tt> for use in future rows.
1900
+ #
1901
+ # When +nil+, +row+ is assumed to be a header row not based on an actual row
1902
+ # of the stream.
1903
+ #
1904
+ def parse_headers(row = nil)
1905
+ if @headers.nil? # header row
1906
+ @headers = case @use_headers # save headers
1907
+ # Array of headers
1908
+ when Array then @use_headers
1909
+ # CSV header String
1910
+ when String
1911
+ self.class.parse_line( @use_headers,
1912
+ :col_sep => @col_sep,
1913
+ :row_sep => @row_sep,
1914
+ :quote_char => @quote_char )
1915
+ # first row is headers
1916
+ else row
1917
+ end
1918
+
1919
+ # prepare converted and unconverted copies
1920
+ row = @headers if row.nil?
1921
+ @headers = convert_fields(@headers, true)
1922
+
1923
+ if @return_headers # return headers
1924
+ return FasterCSV::Row.new(@headers, row, true)
1925
+ elsif not [Array, String].include? @use_headers.class # skip to field row
1926
+ return shift
1927
+ end
1928
+ end
1929
+
1930
+ FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1931
+ end
1932
+
1933
+ #
1934
+ # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1935
+ # +row+ and an accessor method for it called unconverted_fields(). The
1936
+ # variable is set to the contents of +fields+.
1937
+ #
1938
+ def add_unconverted_fields(row, fields)
1939
+ class << row
1940
+ attr_reader :unconverted_fields
1941
+ end
1942
+ row.instance_eval { @unconverted_fields = fields }
1943
+ row
1944
+ end
1945
+ end
1946
+
1947
+ # Another name for FasterCSV.
1948
+ FCSV = FasterCSV
1949
+
1950
+ # Another name for FasterCSV::instance().
1951
+ def FasterCSV(*args, &block)
1952
+ FasterCSV.instance(*args, &block)
1953
+ end
1954
+
1955
+ # Another name for FCSV::instance().
1956
+ def FCSV(*args, &block)
1957
+ FCSV.instance(*args, &block)
1958
+ end
1959
+
1960
+ class Array
1961
+ # Equivalent to <tt>FasterCSV::generate_line(self, options)</tt>.
1962
+ def to_csv(options = Hash.new)
1963
+ FasterCSV.generate_line(self, options)
1964
+ end
1965
+ end
1966
+
1967
+ class String
1968
+ # Equivalent to <tt>FasterCSV::parse_line(self, options)</tt>.
1969
+ def parse_csv(options = Hash.new)
1970
+ FasterCSV.parse_line(self, options)
1971
+ end
1972
+ end