fastercsv 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/CHANGELOG +7 -0
  2. data/lib/faster_csv.rb +1819 -1800
  3. data/test/tc_csv_parsing.rb +16 -0
  4. metadata +9 -4
data/CHANGELOG CHANGED
@@ -2,6 +2,13 @@
2
2
 
3
3
  Below is a complete listing of changes for each revision of FasterCSV.
4
4
 
5
+ == 1.5.4
6
+
7
+ * Improved test coverage for the parser.
8
+ * Improved documentation.
9
+ * Fixed a bug that prevented <tt>^</tt> from being used as <tt>:quote_char</tt>.
10
+ * Switched from abort() to throwing exceptions on Ruby 1.9.
11
+
5
12
  == 1.5.3
6
13
 
7
14
  * A bug fix from Timothy Elliott to return the new parser to its strict quote
@@ -8,1962 +8,1981 @@
8
8
  # See FasterCSV for documentation.
9
9
 
10
10
  if RUBY_VERSION >= "1.9"
11
- abort <<-VERSION_WARNING.gsub(/^\s+/, "")
12
- Please switch to Ruby 1.9's standard CSV library. It's FasterCSV plus
13
- support for Ruby 1.9's m17n encoding engine.
14
- VERSION_WARNING
15
- end
16
-
17
- require "forwardable"
18
- require "English"
19
- require "enumerator"
20
- require "date"
21
- require "stringio"
11
+ class FasterCSV
12
+ def self.const_missing(*_)
13
+ raise NotImplementedError, "Please switch to Ruby 1.9's standard CSV " +
14
+ "library. It's FasterCSV plus support for " +
15
+ "Ruby 1.9's m17n encoding engine."
16
+ end
17
+
18
+ def self.method_missing(*_)
19
+ const_missing
20
+ end
21
+
22
+ def method_missing(*_)
23
+ self.class.const_missing
24
+ end
25
+ end
26
+ else
27
+ require "forwardable"
28
+ require "English"
29
+ require "enumerator"
30
+ require "date"
31
+ require "stringio"
22
32
 
23
- #
24
- # This class provides a complete interface to CSV files and data. It offers
25
- # tools to enable you to read and write to and from Strings or IO objects, as
26
- # needed.
27
- #
28
- # == Reading
29
- #
30
- # === From a File
31
- #
32
- # ==== A Line at a Time
33
- #
34
- # FasterCSV.foreach("path/to/file.csv") do |row|
35
- # # use row here...
36
- # end
37
- #
38
- # ==== All at Once
39
- #
40
- # arr_of_arrs = FasterCSV.read("path/to/file.csv")
41
- #
42
- # === From a String
43
- #
44
- # ==== A Line at a Time
45
- #
46
- # FasterCSV.parse("CSV,data,String") do |row|
47
- # # use row here...
48
- # end
49
- #
50
- # ==== All at Once
51
- #
52
- # arr_of_arrs = FasterCSV.parse("CSV,data,String")
53
- #
54
- # == Writing
55
- #
56
- # === To a File
57
- #
58
- # FasterCSV.open("path/to/file.csv", "w") do |csv|
59
- # csv << ["row", "of", "CSV", "data"]
60
- # csv << ["another", "row"]
61
- # # ...
62
- # end
63
- #
64
- # === To a String
65
- #
66
- # csv_string = FasterCSV.generate do |csv|
67
- # csv << ["row", "of", "CSV", "data"]
68
- # csv << ["another", "row"]
69
- # # ...
70
- # end
71
- #
72
- # == Convert a Single Line
73
- #
74
- # csv_string = ["CSV", "data"].to_csv # to CSV
75
- # csv_array = "CSV,String".parse_csv # from CSV
76
- #
77
- # == Shortcut Interface
78
- #
79
- # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
80
- # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
81
- # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
82
- #
83
- class FasterCSV
84
- # The version of the installed library.
85
- VERSION = "1.5.3".freeze
86
-
87
33
  #
88
- # A FasterCSV::Row is part Array and part Hash. It retains an order for the
89
- # fields and allows duplicates just as an Array would, but also allows you to
90
- # access fields by name just as you could if they were in a Hash.
34
+ # This class provides a complete interface to CSV files and data. It offers
35
+ # tools to enable you to read and write to and from Strings or IO objects, as
36
+ # needed.
91
37
  #
92
- # All rows returned by FasterCSV will be constructed from this class, if
93
- # header row processing is activated.
38
+ # == Reading
94
39
  #
95
- class Row
96
- #
97
- # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
98
- # expected to be Arrays. If one Array is shorter than the other, it will be
99
- # padded with +nil+ objects.
100
- #
101
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
102
- # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
103
- # a header row. Otherwise, the row is assumes to be a field row.
40
+ # === From a File
41
+ #
42
+ # ==== A Line at a Time
43
+ #
44
+ # FasterCSV.foreach("path/to/file.csv") do |row|
45
+ # # use row here...
46
+ # end
47
+ #
48
+ # ==== All at Once
49
+ #
50
+ # arr_of_arrs = FasterCSV.read("path/to/file.csv")
51
+ #
52
+ # === From a String
53
+ #
54
+ # ==== A Line at a Time
55
+ #
56
+ # FasterCSV.parse("CSV,data,String") do |row|
57
+ # # use row here...
58
+ # end
59
+ #
60
+ # ==== All at Once
61
+ #
62
+ # arr_of_arrs = FasterCSV.parse("CSV,data,String")
63
+ #
64
+ # == Writing
65
+ #
66
+ # === To a File
67
+ #
68
+ # FasterCSV.open("path/to/file.csv", "w") do |csv|
69
+ # csv << ["row", "of", "CSV", "data"]
70
+ # csv << ["another", "row"]
71
+ # # ...
72
+ # end
73
+ #
74
+ # === To a String
75
+ #
76
+ # csv_string = FasterCSV.generate do |csv|
77
+ # csv << ["row", "of", "CSV", "data"]
78
+ # csv << ["another", "row"]
79
+ # # ...
80
+ # end
81
+ #
82
+ # == Convert a Single Line
83
+ #
84
+ # csv_string = ["CSV", "data"].to_csv # to CSV
85
+ # csv_array = "CSV,String".parse_csv # from CSV
86
+ #
87
+ # == Shortcut Interface
88
+ #
89
+ # FCSV { |csv_out| csv_out << %w{my data here} } # to $stdout
90
+ # FCSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
91
+ # FCSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
92
+ # FCSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
93
+ #
94
+ # == Advanced Usage
95
+ #
96
+ # === Wrap an IO Object
97
+ #
98
+ # csv = FCSV.new(io, options)
99
+ # # ... read (with gets() or each()) from and write (with <<) to csv here ...
100
+ #
101
+ class FasterCSV
102
+ # The version of the installed library.
103
+ VERSION = "1.5.4".freeze
104
+
104
105
  #
105
- # A FasterCSV::Row object supports the following Array methods through
106
- # delegation:
106
+ # A FasterCSV::Row is part Array and part Hash. It retains an order for the
107
+ # fields and allows duplicates just as an Array would, but also allows you to
108
+ # access fields by name just as you could if they were in a Hash.
107
109
  #
108
- # * empty?()
109
- # * length()
110
- # * size()
110
+ # All rows returned by FasterCSV will be constructed from this class, if
111
+ # header row processing is activated.
111
112
  #
112
- def initialize(headers, fields, header_row = false)
113
- @header_row = header_row
114
-
115
- # handle extra headers or fields
116
- @row = if headers.size > fields.size
117
- headers.zip(fields)
118
- else
119
- fields.zip(headers).map { |pair| pair.reverse }
113
+ class Row
114
+ #
115
+ # Construct a new FasterCSV::Row from +headers+ and +fields+, which are
116
+ # expected to be Arrays. If one Array is shorter than the other, it will be
117
+ # padded with +nil+ objects.
118
+ #
119
+ # The optional +header_row+ parameter can be set to +true+ to indicate, via
120
+ # FasterCSV::Row.header_row?() and FasterCSV::Row.field_row?(), that this is
121
+ # a header row. Otherwise, the row is assumes to be a field row.
122
+ #
123
+ # A FasterCSV::Row object supports the following Array methods through
124
+ # delegation:
125
+ #
126
+ # * empty?()
127
+ # * length()
128
+ # * size()
129
+ #
130
+ def initialize(headers, fields, header_row = false)
131
+ @header_row = header_row
132
+
133
+ # handle extra headers or fields
134
+ @row = if headers.size > fields.size
135
+ headers.zip(fields)
136
+ else
137
+ fields.zip(headers).map { |pair| pair.reverse }
138
+ end
120
139
  end
121
- end
122
-
123
- # Internal data format used to compare equality.
124
- attr_reader :row
125
- protected :row
126
140
 
127
- ### Array Delegation ###
141
+ # Internal data format used to compare equality.
142
+ attr_reader :row
143
+ protected :row
128
144
 
129
- extend Forwardable
130
- def_delegators :@row, :empty?, :length, :size
131
-
132
- # Returns +true+ if this is a header row.
133
- def header_row?
134
- @header_row
135
- end
136
-
137
- # Returns +true+ if this is a field row.
138
- def field_row?
139
- not header_row?
140
- end
141
-
142
- # Returns the headers of this row.
143
- def headers
144
- @row.map { |pair| pair.first }
145
- end
146
-
147
- #
148
- # :call-seq:
149
- # field( header )
150
- # field( header, offset )
151
- # field( index )
152
- #
153
- # This method will fetch the field value by +header+ or +index+. If a field
154
- # is not found, +nil+ is returned.
155
- #
156
- # When provided, +offset+ ensures that a header match occurrs on or later
157
- # than the +offset+ index. You can use this to find duplicate headers,
158
- # without resorting to hard-coding exact indices.
159
- #
160
- def field(header_or_index, minimum_index = 0)
161
- # locate the pair
162
- finder = header_or_index.is_a?(Integer) ? :[] : :assoc
163
- pair = @row[minimum_index..-1].send(finder, header_or_index)
145
+ ### Array Delegation ###
164
146
 
165
- # return the field if we have a pair
166
- pair.nil? ? nil : pair.last
167
- end
168
- alias_method :[], :field
169
-
170
- #
171
- # :call-seq:
172
- # []=( header, value )
173
- # []=( header, offset, value )
174
- # []=( index, value )
175
- #
176
- # Looks up the field by the semantics described in FasterCSV::Row.field()
177
- # and assigns the +value+.
178
- #
179
- # Assigning past the end of the row with an index will set all pairs between
180
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
181
- # pair.
182
- #
183
- def []=(*args)
184
- value = args.pop
185
-
186
- if args.first.is_a? Integer
187
- if @row[args.first].nil? # extending past the end with index
188
- @row[args.first] = [nil, value]
189
- @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
190
- else # normal index assignment
191
- @row[args.first][1] = value
147
+ extend Forwardable
148
+ def_delegators :@row, :empty?, :length, :size
149
+
150
+ # Returns +true+ if this is a header row.
151
+ def header_row?
152
+ @header_row
153
+ end
154
+
155
+ # Returns +true+ if this is a field row.
156
+ def field_row?
157
+ not header_row?
158
+ end
159
+
160
+ # Returns the headers of this row.
161
+ def headers
162
+ @row.map { |pair| pair.first }
163
+ end
164
+
165
+ #
166
+ # :call-seq:
167
+ # field( header )
168
+ # field( header, offset )
169
+ # field( index )
170
+ #
171
+ # This method will fetch the field value by +header+ or +index+. If a field
172
+ # is not found, +nil+ is returned.
173
+ #
174
+ # When provided, +offset+ ensures that a header match occurrs on or later
175
+ # than the +offset+ index. You can use this to find duplicate headers,
176
+ # without resorting to hard-coding exact indices.
177
+ #
178
+ def field(header_or_index, minimum_index = 0)
179
+ # locate the pair
180
+ finder = header_or_index.is_a?(Integer) ? :[] : :assoc
181
+ pair = @row[minimum_index..-1].send(finder, header_or_index)
182
+
183
+ # return the field if we have a pair
184
+ pair.nil? ? nil : pair.last
185
+ end
186
+ alias_method :[], :field
187
+
188
+ #
189
+ # :call-seq:
190
+ # []=( header, value )
191
+ # []=( header, offset, value )
192
+ # []=( index, value )
193
+ #
194
+ # Looks up the field by the semantics described in FasterCSV::Row.field()
195
+ # and assigns the +value+.
196
+ #
197
+ # Assigning past the end of the row with an index will set all pairs between
198
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
199
+ # pair.
200
+ #
201
+ def []=(*args)
202
+ value = args.pop
203
+
204
+ if args.first.is_a? Integer
205
+ if @row[args.first].nil? # extending past the end with index
206
+ @row[args.first] = [nil, value]
207
+ @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
208
+ else # normal index assignment
209
+ @row[args.first][1] = value
210
+ end
211
+ else
212
+ index = index(*args)
213
+ if index.nil? # appending a field
214
+ self << [args.first, value]
215
+ else # normal header assignment
216
+ @row[index][1] = value
217
+ end
192
218
  end
193
- else
194
- index = index(*args)
195
- if index.nil? # appending a field
196
- self << [args.first, value]
197
- else # normal header assignment
198
- @row[index][1] = value
219
+ end
220
+
221
+ #
222
+ # :call-seq:
223
+ # <<( field )
224
+ # <<( header_and_field_array )
225
+ # <<( header_and_field_hash )
226
+ #
227
+ # If a two-element Array is provided, it is assumed to be a header and field
228
+ # and the pair is appended. A Hash works the same way with the key being
229
+ # the header and the value being the field. Anything else is assumed to be
230
+ # a lone field which is appended with a +nil+ header.
231
+ #
232
+ # This method returns the row for chaining.
233
+ #
234
+ def <<(arg)
235
+ if arg.is_a?(Array) and arg.size == 2 # appending a header and name
236
+ @row << arg
237
+ elsif arg.is_a?(Hash) # append header and name pairs
238
+ arg.each { |pair| @row << pair }
239
+ else # append field value
240
+ @row << [nil, arg]
199
241
  end
242
+
243
+ self # for chaining
200
244
  end
201
- end
202
-
203
- #
204
- # :call-seq:
205
- # <<( field )
206
- # <<( header_and_field_array )
207
- # <<( header_and_field_hash )
208
- #
209
- # If a two-element Array is provided, it is assumed to be a header and field
210
- # and the pair is appended. A Hash works the same way with the key being
211
- # the header and the value being the field. Anything else is assumed to be
212
- # a lone field which is appended with a +nil+ header.
213
- #
214
- # This method returns the row for chaining.
215
- #
216
- def <<(arg)
217
- if arg.is_a?(Array) and arg.size == 2 # appending a header and name
218
- @row << arg
219
- elsif arg.is_a?(Hash) # append header and name pairs
220
- arg.each { |pair| @row << pair }
221
- else # append field value
222
- @row << [nil, arg]
223
- end
224
-
225
- self # for chaining
226
- end
227
-
228
- #
229
- # A shortcut for appending multiple fields. Equivalent to:
230
- #
231
- # args.each { |arg| faster_csv_row << arg }
232
- #
233
- # This method returns the row for chaining.
234
- #
235
- def push(*args)
236
- args.each { |arg| self << arg }
237
-
238
- self # for chaining
239
- end
240
-
241
- #
242
- # :call-seq:
243
- # delete( header )
244
- # delete( header, offset )
245
- # delete( index )
246
- #
247
- # Used to remove a pair from the row by +header+ or +index+. The pair is
248
- # located as described in FasterCSV::Row.field(). The deleted pair is
249
- # returned, or +nil+ if a pair could not be found.
250
- #
251
- def delete(header_or_index, minimum_index = 0)
252
- if header_or_index.is_a? Integer # by index
253
- @row.delete_at(header_or_index)
254
- elsif i = index(header_or_index, minimum_index) # by header
255
- @row.delete_at(i)
256
- else
257
- [ ]
245
+
246
+ #
247
+ # A shortcut for appending multiple fields. Equivalent to:
248
+ #
249
+ # args.each { |arg| faster_csv_row << arg }
250
+ #
251
+ # This method returns the row for chaining.
252
+ #
253
+ def push(*args)
254
+ args.each { |arg| self << arg }
255
+
256
+ self # for chaining
257
+ end
258
+
259
+ #
260
+ # :call-seq:
261
+ # delete( header )
262
+ # delete( header, offset )
263
+ # delete( index )
264
+ #
265
+ # Used to remove a pair from the row by +header+ or +index+. The pair is
266
+ # located as described in FasterCSV::Row.field(). The deleted pair is
267
+ # returned, or +nil+ if a pair could not be found.
268
+ #
269
+ def delete(header_or_index, minimum_index = 0)
270
+ if header_or_index.is_a? Integer # by index
271
+ @row.delete_at(header_or_index)
272
+ elsif i = index(header_or_index, minimum_index) # by header
273
+ @row.delete_at(i)
274
+ else
275
+ [ ]
276
+ end
277
+ end
278
+
279
+ #
280
+ # The provided +block+ is passed a header and field for each pair in the row
281
+ # and expected to return +true+ or +false+, depending on whether the pair
282
+ # should be deleted.
283
+ #
284
+ # This method returns the row for chaining.
285
+ #
286
+ def delete_if(&block)
287
+ @row.delete_if(&block)
288
+
289
+ self # for chaining
290
+ end
291
+
292
+ #
293
+ # This method accepts any number of arguments which can be headers, indices,
294
+ # Ranges of either, or two-element Arrays containing a header and offset.
295
+ # Each argument will be replaced with a field lookup as described in
296
+ # FasterCSV::Row.field().
297
+ #
298
+ # If called with no arguments, all fields are returned.
299
+ #
300
+ def fields(*headers_and_or_indices)
301
+ if headers_and_or_indices.empty? # return all fields--no arguments
302
+ @row.map { |pair| pair.last }
303
+ else # or work like values_at()
304
+ headers_and_or_indices.inject(Array.new) do |all, h_or_i|
305
+ all + if h_or_i.is_a? Range
306
+ index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
307
+ index(h_or_i.begin)
308
+ index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
309
+ index(h_or_i.end)
310
+ new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
311
+ (index_begin..index_end)
312
+ fields.values_at(new_range)
313
+ else
314
+ [field(*Array(h_or_i))]
315
+ end
316
+ end
317
+ end
318
+ end
319
+ alias_method :values_at, :fields
320
+
321
+ #
322
+ # :call-seq:
323
+ # index( header )
324
+ # index( header, offset )
325
+ #
326
+ # This method will return the index of a field with the provided +header+.
327
+ # The +offset+ can be used to locate duplicate header names, as described in
328
+ # FasterCSV::Row.field().
329
+ #
330
+ def index(header, minimum_index = 0)
331
+ # find the pair
332
+ index = headers[minimum_index..-1].index(header)
333
+ # return the index at the right offset, if we found one
334
+ index.nil? ? nil : index + minimum_index
335
+ end
336
+
337
+ # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
338
+ def header?(name)
339
+ headers.include? name
340
+ end
341
+ alias_method :include?, :header?
342
+
343
+ #
344
+ # Returns +true+ if +data+ matches a field in this row, and +false+
345
+ # otherwise.
346
+ #
347
+ def field?(data)
348
+ fields.include? data
349
+ end
350
+
351
+ include Enumerable
352
+
353
+ #
354
+ # Yields each pair of the row as header and field tuples (much like
355
+ # iterating over a Hash).
356
+ #
357
+ # Support for Enumerable.
358
+ #
359
+ # This method returns the row for chaining.
360
+ #
361
+ def each(&block)
362
+ @row.each(&block)
363
+
364
+ self # for chaining
365
+ end
366
+
367
+ #
368
+ # Returns +true+ if this row contains the same headers and fields in the
369
+ # same order as +other+.
370
+ #
371
+ def ==(other)
372
+ @row == other.row
373
+ end
374
+
375
+ #
376
+ # Collapses the row into a simple Hash. Be warning that this discards field
377
+ # order and clobbers duplicate fields.
378
+ #
379
+ def to_hash
380
+ # flatten just one level of the internal Array
381
+ Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
382
+ end
383
+
384
+ #
385
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
386
+ #
387
+ # faster_csv_row.fields.to_csv( options )
388
+ #
389
+ def to_csv(options = Hash.new)
390
+ fields.to_csv(options)
391
+ end
392
+ alias_method :to_s, :to_csv
393
+
394
+ # A summary of fields, by header.
395
+ def inspect
396
+ str = "#<#{self.class}"
397
+ each do |header, field|
398
+ str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
399
+ field.inspect
400
+ end
401
+ str << ">"
258
402
  end
259
403
  end
260
-
404
+
261
405
  #
262
- # The provided +block+ is passed a header and field for each pair in the row
263
- # and expected to return +true+ or +false+, depending on whether the pair
264
- # should be deleted.
406
+ # A FasterCSV::Table is a two-dimensional data structure for representing CSV
407
+ # documents. Tables allow you to work with the data by row or column,
408
+ # manipulate the data, and even convert the results back to CSV, if needed.
265
409
  #
266
- # This method returns the row for chaining.
410
+ # All tables returned by FasterCSV will be constructed from this class, if
411
+ # header row processing is activated.
267
412
  #
268
- def delete_if(&block)
269
- @row.delete_if(&block)
270
-
271
- self # for chaining
272
- end
273
-
274
- #
275
- # This method accepts any number of arguments which can be headers, indices,
276
- # Ranges of either, or two-element Arrays containing a header and offset.
277
- # Each argument will be replaced with a field lookup as described in
278
- # FasterCSV::Row.field().
279
- #
280
- # If called with no arguments, all fields are returned.
281
- #
282
- def fields(*headers_and_or_indices)
283
- if headers_and_or_indices.empty? # return all fields--no arguments
284
- @row.map { |pair| pair.last }
285
- else # or work like values_at()
286
- headers_and_or_indices.inject(Array.new) do |all, h_or_i|
287
- all + if h_or_i.is_a? Range
288
- index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
289
- index(h_or_i.begin)
290
- index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
291
- index(h_or_i.end)
292
- new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
293
- (index_begin..index_end)
294
- fields.values_at(new_range)
413
+ class Table
414
+ #
415
+ # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
416
+ # to be FasterCSV::Row objects. All rows are assumed to have the same
417
+ # headers.
418
+ #
419
+ # A FasterCSV::Table object supports the following Array methods through
420
+ # delegation:
421
+ #
422
+ # * empty?()
423
+ # * length()
424
+ # * size()
425
+ #
426
+ def initialize(array_of_rows)
427
+ @table = array_of_rows
428
+ @mode = :col_or_row
429
+ end
430
+
431
+ # The current access mode for indexing and iteration.
432
+ attr_reader :mode
433
+
434
+ # Internal data format used to compare equality.
435
+ attr_reader :table
436
+ protected :table
437
+
438
+ ### Array Delegation ###
439
+
440
+ extend Forwardable
441
+ def_delegators :@table, :empty?, :length, :size
442
+
443
+ #
444
+ # Returns a duplicate table object, in column mode. This is handy for
445
+ # chaining in a single call without changing the table mode, but be aware
446
+ # that this method can consume a fair amount of memory for bigger data sets.
447
+ #
448
+ # This method returns the duplicate table for chaining. Don't chain
449
+ # destructive methods (like []=()) this way though, since you are working
450
+ # with a duplicate.
451
+ #
452
+ def by_col
453
+ self.class.new(@table.dup).by_col!
454
+ end
455
+
456
+ #
457
+ # Switches the mode of this table to column mode. All calls to indexing and
458
+ # iteration methods will work with columns until the mode is changed again.
459
+ #
460
+ # This method returns the table and is safe to chain.
461
+ #
462
+ def by_col!
463
+ @mode = :col
464
+
465
+ self
466
+ end
467
+
468
+ #
469
+ # Returns a duplicate table object, in mixed mode. This is handy for
470
+ # chaining in a single call without changing the table mode, but be aware
471
+ # that this method can consume a fair amount of memory for bigger data sets.
472
+ #
473
+ # This method returns the duplicate table for chaining. Don't chain
474
+ # destructive methods (like []=()) this way though, since you are working
475
+ # with a duplicate.
476
+ #
477
+ def by_col_or_row
478
+ self.class.new(@table.dup).by_col_or_row!
479
+ end
480
+
481
+ #
482
+ # Switches the mode of this table to mixed mode. All calls to indexing and
483
+ # iteration methods will use the default intelligent indexing system until
484
+ # the mode is changed again. In mixed mode an index is assumed to be a row
485
+ # reference while anything else is assumed to be column access by headers.
486
+ #
487
+ # This method returns the table and is safe to chain.
488
+ #
489
+ def by_col_or_row!
490
+ @mode = :col_or_row
491
+
492
+ self
493
+ end
494
+
495
+ #
496
+ # Returns a duplicate table object, in row mode. This is handy for chaining
497
+ # in a single call without changing the table mode, but be aware that this
498
+ # method can consume a fair amount of memory for bigger data sets.
499
+ #
500
+ # This method returns the duplicate table for chaining. Don't chain
501
+ # destructive methods (like []=()) this way though, since you are working
502
+ # with a duplicate.
503
+ #
504
+ def by_row
505
+ self.class.new(@table.dup).by_row!
506
+ end
507
+
508
+ #
509
+ # Switches the mode of this table to row mode. All calls to indexing and
510
+ # iteration methods will work with rows until the mode is changed again.
511
+ #
512
+ # This method returns the table and is safe to chain.
513
+ #
514
+ def by_row!
515
+ @mode = :row
516
+
517
+ self
518
+ end
519
+
520
+ #
521
+ # Returns the headers for the first row of this table (assumed to match all
522
+ # other rows). An empty Array is returned for empty tables.
523
+ #
524
+ def headers
525
+ if @table.empty?
526
+ Array.new
527
+ else
528
+ @table.first.headers
529
+ end
530
+ end
531
+
532
+ #
533
+ # In the default mixed mode, this method returns rows for index access and
534
+ # columns for header access. You can force the index association by first
535
+ # calling by_col!() or by_row!().
536
+ #
537
+ # Columns are returned as an Array of values. Altering that Array has no
538
+ # effect on the table.
539
+ #
540
+ def [](index_or_header)
541
+ if @mode == :row or # by index
542
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
543
+ @table[index_or_header]
544
+ else # by header
545
+ @table.map { |row| row[index_or_header] }
546
+ end
547
+ end
548
+
549
+ #
550
+ # In the default mixed mode, this method assigns rows for index access and
551
+ # columns for header access. You can force the index association by first
552
+ # calling by_col!() or by_row!().
553
+ #
554
+ # Rows may be set to an Array of values (which will inherit the table's
555
+ # headers()) or a FasterCSV::Row.
556
+ #
557
+ # Columns may be set to a single value, which is copied to each row of the
558
+ # column, or an Array of values. Arrays of values are assigned to rows top
559
+ # to bottom in row major order. Excess values are ignored and if the Array
560
+ # does not have a value for each row the extra rows will receive a +nil+.
561
+ #
562
+ # Assigning to an existing column or row clobbers the data. Assigning to
563
+ # new columns creates them at the right end of the table.
564
+ #
565
+ def []=(index_or_header, value)
566
+ if @mode == :row or # by index
567
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
568
+ if value.is_a? Array
569
+ @table[index_or_header] = Row.new(headers, value)
295
570
  else
296
- [field(*Array(h_or_i))]
571
+ @table[index_or_header] = value
572
+ end
573
+ else # set column
574
+ if value.is_a? Array # multiple values
575
+ @table.each_with_index do |row, i|
576
+ if row.header_row?
577
+ row[index_or_header] = index_or_header
578
+ else
579
+ row[index_or_header] = value[i]
580
+ end
581
+ end
582
+ else # repeated value
583
+ @table.each do |row|
584
+ if row.header_row?
585
+ row[index_or_header] = index_or_header
586
+ else
587
+ row[index_or_header] = value
588
+ end
589
+ end
297
590
  end
298
591
  end
299
592
  end
300
- end
301
- alias_method :values_at, :fields
302
-
303
- #
304
- # :call-seq:
305
- # index( header )
306
- # index( header, offset )
307
- #
308
- # This method will return the index of a field with the provided +header+.
309
- # The +offset+ can be used to locate duplicate header names, as described in
310
- # FasterCSV::Row.field().
311
- #
312
- def index(header, minimum_index = 0)
313
- # find the pair
314
- index = headers[minimum_index..-1].index(header)
315
- # return the index at the right offset, if we found one
316
- index.nil? ? nil : index + minimum_index
317
- end
318
-
319
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
320
- def header?(name)
321
- headers.include? name
322
- end
323
- alias_method :include?, :header?
324
-
325
- #
326
- # Returns +true+ if +data+ matches a field in this row, and +false+
327
- # otherwise.
328
- #
329
- def field?(data)
330
- fields.include? data
331
- end
332
593
 
333
- include Enumerable
334
-
335
- #
336
- # Yields each pair of the row as header and field tuples (much like
337
- # iterating over a Hash).
338
- #
339
- # Support for Enumerable.
340
- #
341
- # This method returns the row for chaining.
342
- #
343
- def each(&block)
344
- @row.each(&block)
345
-
346
- self # for chaining
347
- end
348
-
349
- #
350
- # Returns +true+ if this row contains the same headers and fields in the
351
- # same order as +other+.
352
- #
353
- def ==(other)
354
- @row == other.row
355
- end
356
-
357
- #
358
- # Collapses the row into a simple Hash. Be warning that this discards field
359
- # order and clobbers duplicate fields.
360
- #
361
- def to_hash
362
- # flatten just one level of the internal Array
363
- Hash[*@row.inject(Array.new) { |ary, pair| ary.push(*pair) }]
364
- end
365
-
366
- #
367
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
368
- #
369
- # faster_csv_row.fields.to_csv( options )
370
- #
371
- def to_csv(options = Hash.new)
372
- fields.to_csv(options)
373
- end
374
- alias_method :to_s, :to_csv
375
-
376
- # A summary of fields, by header.
377
- def inspect
378
- str = "#<#{self.class}"
379
- each do |header, field|
380
- str << " #{header.is_a?(Symbol) ? header.to_s : header.inspect}:" <<
381
- field.inspect
594
+ #
595
+ # The mixed mode default is to treat a list of indices as row access,
596
+ # returning the rows indicated. Anything else is considered columnar
597
+ # access. For columnar access, the return set has an Array for each row
598
+ # with the values indicated by the headers in each Array. You can force
599
+ # column or row mode using by_col!() or by_row!().
600
+ #
601
+ # You cannot mix column and row access.
602
+ #
603
+ def values_at(*indices_or_headers)
604
+ if @mode == :row or # by indices
605
+ ( @mode == :col_or_row and indices_or_headers.all? do |index|
606
+ index.is_a?(Integer) or
607
+ ( index.is_a?(Range) and
608
+ index.first.is_a?(Integer) and
609
+ index.last.is_a?(Integer) )
610
+ end )
611
+ @table.values_at(*indices_or_headers)
612
+ else # by headers
613
+ @table.map { |row| row.values_at(*indices_or_headers) }
614
+ end
615
+ end
616
+
617
+ #
618
+ # Adds a new row to the bottom end of this table. You can provide an Array,
619
+ # which will be converted to a FasterCSV::Row (inheriting the table's
620
+ # headers()), or a FasterCSV::Row.
621
+ #
622
+ # This method returns the table for chaining.
623
+ #
624
+ def <<(row_or_array)
625
+ if row_or_array.is_a? Array # append Array
626
+ @table << Row.new(headers, row_or_array)
627
+ else # append Row
628
+ @table << row_or_array
629
+ end
630
+
631
+ self # for chaining
632
+ end
633
+
634
+ #
635
+ # A shortcut for appending multiple rows. Equivalent to:
636
+ #
637
+ # rows.each { |row| self << row }
638
+ #
639
+ # This method returns the table for chaining.
640
+ #
641
+ def push(*rows)
642
+ rows.each { |row| self << row }
643
+
644
+ self # for chaining
645
+ end
646
+
647
+ #
648
+ # Removes and returns the indicated column or row. In the default mixed
649
+ # mode indices refer to rows and everything else is assumed to be a column
650
+ # header. Use by_col!() or by_row!() to force the lookup.
651
+ #
652
+ def delete(index_or_header)
653
+ if @mode == :row or # by index
654
+ (@mode == :col_or_row and index_or_header.is_a? Integer)
655
+ @table.delete_at(index_or_header)
656
+ else # by header
657
+ @table.map { |row| row.delete(index_or_header).last }
658
+ end
659
+ end
660
+
661
+ #
662
+ # Removes any column or row for which the block returns +true+. In the
663
+ # default mixed mode or row mode, iteration is the standard row major
664
+ # walking of rows. In column mode, interation will +yield+ two element
665
+ # tuples containing the column name and an Array of values for that column.
666
+ #
667
+ # This method returns the table for chaining.
668
+ #
669
+ def delete_if(&block)
670
+ if @mode == :row or @mode == :col_or_row # by index
671
+ @table.delete_if(&block)
672
+ else # by header
673
+ to_delete = Array.new
674
+ headers.each_with_index do |header, i|
675
+ to_delete << header if block[[header, self[header]]]
676
+ end
677
+ to_delete.map { |header| delete(header) }
678
+ end
679
+
680
+ self # for chaining
681
+ end
682
+
683
+ include Enumerable
684
+
685
+ #
686
+ # In the default mixed mode or row mode, iteration is the standard row major
687
+ # walking of rows. In column mode, interation will +yield+ two element
688
+ # tuples containing the column name and an Array of values for that column.
689
+ #
690
+ # This method returns the table for chaining.
691
+ #
692
+ def each(&block)
693
+ if @mode == :col
694
+ headers.each { |header| block[[header, self[header]]] }
695
+ else
696
+ @table.each(&block)
697
+ end
698
+
699
+ self # for chaining
700
+ end
701
+
702
+ # Returns +true+ if all rows of this table ==() +other+'s rows.
703
+ def ==(other)
704
+ @table == other.table
705
+ end
706
+
707
+ #
708
+ # Returns the table as an Array of Arrays. Headers will be the first row,
709
+ # then all of the field rows will follow.
710
+ #
711
+ def to_a
712
+ @table.inject([headers]) do |array, row|
713
+ if row.header_row?
714
+ array
715
+ else
716
+ array + [row.fields]
717
+ end
718
+ end
719
+ end
720
+
721
+ #
722
+ # Returns the table as a complete CSV String. Headers will be listed first,
723
+ # then all of the field rows.
724
+ #
725
+ # This method assumes you want the Table.headers(), unless you explicitly
726
+ # pass <tt>:write_headers => false</tt>.
727
+ #
728
+ def to_csv(options = Hash.new)
729
+ wh = options.fetch(:write_headers, true)
730
+ @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
731
+ if row.header_row?
732
+ rows
733
+ else
734
+ rows + [row.fields.to_csv(options)]
735
+ end
736
+ end.join
737
+ end
738
+ alias_method :to_s, :to_csv
739
+
740
+ def inspect
741
+ "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
382
742
  end
383
- str << ">"
384
- end
385
- end
386
-
387
- #
388
- # A FasterCSV::Table is a two-dimensional data structure for representing CSV
389
- # documents. Tables allow you to work with the data by row or column,
390
- # manipulate the data, and even convert the results back to CSV, if needed.
391
- #
392
- # All tables returned by FasterCSV will be constructed from this class, if
393
- # header row processing is activated.
394
- #
395
- class Table
396
- #
397
- # Construct a new FasterCSV::Table from +array_of_rows+, which are expected
398
- # to be FasterCSV::Row objects. All rows are assumed to have the same
399
- # headers.
400
- #
401
- # A FasterCSV::Table object supports the following Array methods through
402
- # delegation:
403
- #
404
- # * empty?()
405
- # * length()
406
- # * size()
407
- #
408
- def initialize(array_of_rows)
409
- @table = array_of_rows
410
- @mode = :col_or_row
411
743
  end
412
-
413
- # The current access mode for indexing and iteration.
414
- attr_reader :mode
415
-
416
- # Internal data format used to compare equality.
417
- attr_reader :table
418
- protected :table
419
744
 
420
- ### Array Delegation ###
745
+ # The error thrown when the parser encounters illegal CSV formatting.
746
+ class MalformedCSVError < RuntimeError; end
421
747
 
422
- extend Forwardable
423
- def_delegators :@table, :empty?, :length, :size
424
-
425
- #
426
- # Returns a duplicate table object, in column mode. This is handy for
427
- # chaining in a single call without changing the table mode, but be aware
428
- # that this method can consume a fair amount of memory for bigger data sets.
429
- #
430
- # This method returns the duplicate table for chaining. Don't chain
431
- # destructive methods (like []=()) this way though, since you are working
432
- # with a duplicate.
433
748
  #
434
- def by_col
435
- self.class.new(@table.dup).by_col!
436
- end
437
-
438
- #
439
- # Switches the mode of this table to column mode. All calls to indexing and
440
- # iteration methods will work with columns until the mode is changed again.
441
- #
442
- # This method returns the table and is safe to chain.
443
- #
444
- def by_col!
445
- @mode = :col
446
-
447
- self
448
- end
449
-
450
- #
451
- # Returns a duplicate table object, in mixed mode. This is handy for
452
- # chaining in a single call without changing the table mode, but be aware
453
- # that this method can consume a fair amount of memory for bigger data sets.
749
+ # A FieldInfo Struct contains details about a field's position in the data
750
+ # source it was read from. FasterCSV will pass this Struct to some blocks
751
+ # that make decisions based on field structure. See
752
+ # FasterCSV.convert_fields() for an example.
454
753
  #
455
- # This method returns the duplicate table for chaining. Don't chain
456
- # destructive methods (like []=()) this way though, since you are working
457
- # with a duplicate.
458
- #
459
- def by_col_or_row
460
- self.class.new(@table.dup).by_col_or_row!
461
- end
462
-
463
- #
464
- # Switches the mode of this table to mixed mode. All calls to indexing and
465
- # iteration methods will use the default intelligent indexing system until
466
- # the mode is changed again. In mixed mode an index is assumed to be a row
467
- # reference while anything else is assumed to be column access by headers.
468
- #
469
- # This method returns the table and is safe to chain.
470
- #
471
- def by_col_or_row!
472
- @mode = :col_or_row
473
-
474
- self
475
- end
476
-
754
+ # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
755
+ # <b><tt>line</tt></b>:: The line of the data source this row is from.
756
+ # <b><tt>header</tt></b>:: The header for the column, when available.
477
757
  #
478
- # Returns a duplicate table object, in row mode. This is handy for chaining
479
- # in a single call without changing the table mode, but be aware that this
480
- # method can consume a fair amount of memory for bigger data sets.
758
+ FieldInfo = Struct.new(:index, :line, :header)
759
+
760
+ # A Regexp used to find and convert some common Date formats.
761
+ DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
762
+ \d{4}-\d{2}-\d{2} )\z /x
763
+ # A Regexp used to find and convert some common DateTime formats.
764
+ DateTimeMatcher =
765
+ / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
766
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
767
+ #
768
+ # This Hash holds the built-in converters of FasterCSV that can be accessed by
769
+ # name. You can select Converters with FasterCSV.convert() or through the
770
+ # +options+ Hash passed to FasterCSV::new().
771
+ #
772
+ # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
773
+ # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
774
+ # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
775
+ # and <tt>:float</tt>.
776
+ # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
777
+ # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
778
+ # <b><tt>:all</tt></b>:: All built-in converters. A combination of
779
+ # <tt>:date_time</tt> and <tt>:numeric</tt>.
780
+ #
781
+ # This Hash is intetionally left unfrozen and users should feel free to add
782
+ # values to it that can be accessed by all FasterCSV objects.
783
+ #
784
+ # To add a combo field, the value should be an Array of names. Combo fields
785
+ # can be nested with other combo fields.
786
+ #
787
+ Converters = { :integer => lambda { |f| Integer(f) rescue f },
788
+ :float => lambda { |f| Float(f) rescue f },
789
+ :numeric => [:integer, :float],
790
+ :date => lambda { |f|
791
+ f =~ DateMatcher ? (Date.parse(f) rescue f) : f
792
+ },
793
+ :date_time => lambda { |f|
794
+ f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
795
+ },
796
+ :all => [:date_time, :numeric] }
797
+
481
798
  #
482
- # This method returns the duplicate table for chaining. Don't chain
483
- # destructive methods (like []=()) this way though, since you are working
484
- # with a duplicate.
799
+ # This Hash holds the built-in header converters of FasterCSV that can be
800
+ # accessed by name. You can select HeaderConverters with
801
+ # FasterCSV.header_convert() or through the +options+ Hash passed to
802
+ # FasterCSV::new().
485
803
  #
486
- def by_row
487
- self.class.new(@table.dup).by_row!
488
- end
489
-
804
+ # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
805
+ # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
806
+ # replaced with underscores, non-word characters
807
+ # are dropped, and finally to_sym() is called.
490
808
  #
491
- # Switches the mode of this table to row mode. All calls to indexing and
492
- # iteration methods will work with rows until the mode is changed again.
809
+ # This Hash is intetionally left unfrozen and users should feel free to add
810
+ # values to it that can be accessed by all FasterCSV objects.
493
811
  #
494
- # This method returns the table and is safe to chain.
812
+ # To add a combo field, the value should be an Array of names. Combo fields
813
+ # can be nested with other combo fields.
495
814
  #
496
- def by_row!
497
- @mode = :row
498
-
499
- self
500
- end
501
-
815
+ HeaderConverters = {
816
+ :downcase => lambda { |h| h.downcase },
817
+ :symbol => lambda { |h|
818
+ h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
819
+ }
820
+ }
821
+
502
822
  #
503
- # Returns the headers for the first row of this table (assumed to match all
504
- # other rows). An empty Array is returned for empty tables.
823
+ # The options used when no overrides are given by calling code. They are:
824
+ #
825
+ # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
826
+ # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
827
+ # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
828
+ # <b><tt>:converters</tt></b>:: +nil+
829
+ # <b><tt>:unconverted_fields</tt></b>:: +nil+
830
+ # <b><tt>:headers</tt></b>:: +false+
831
+ # <b><tt>:return_headers</tt></b>:: +false+
832
+ # <b><tt>:header_converters</tt></b>:: +nil+
833
+ # <b><tt>:skip_blanks</tt></b>:: +false+
834
+ # <b><tt>:force_quotes</tt></b>:: +false+
835
+ #
836
+ DEFAULT_OPTIONS = { :col_sep => ",",
837
+ :row_sep => :auto,
838
+ :quote_char => '"',
839
+ :converters => nil,
840
+ :unconverted_fields => nil,
841
+ :headers => false,
842
+ :return_headers => false,
843
+ :header_converters => nil,
844
+ :skip_blanks => false,
845
+ :force_quotes => false }.freeze
846
+
505
847
  #
506
- def headers
507
- if @table.empty?
508
- Array.new
509
- else
510
- @table.first.headers
848
+ # This method will build a drop-in replacement for many of the standard CSV
849
+ # methods. It allows you to write code like:
850
+ #
851
+ # begin
852
+ # require "faster_csv"
853
+ # FasterCSV.build_csv_interface
854
+ # rescue LoadError
855
+ # require "csv"
856
+ # end
857
+ # # ... use CSV here ...
858
+ #
859
+ # This is not a complete interface with completely identical behavior.
860
+ # However, it is intended to be close enough that you won't notice the
861
+ # difference in most cases. CSV methods supported are:
862
+ #
863
+ # * foreach()
864
+ # * generate_line()
865
+ # * open()
866
+ # * parse()
867
+ # * parse_line()
868
+ # * readlines()
869
+ #
870
+ # Be warned that this interface is slower than vanilla FasterCSV due to the
871
+ # extra layer of method calls. Depending on usage, this can slow it down to
872
+ # near CSV speeds.
873
+ #
874
+ def self.build_csv_interface
875
+ Object.const_set(:CSV, Class.new).class_eval do
876
+ def self.foreach(path, rs = :auto, &block) # :nodoc:
877
+ FasterCSV.foreach(path, :row_sep => rs, &block)
878
+ end
879
+
880
+ def self.generate_line(row, fs = ",", rs = "") # :nodoc:
881
+ FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
882
+ end
883
+
884
+ def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
885
+ if block and mode.include? "r"
886
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
887
+ csv.each(&block)
888
+ end
889
+ else
890
+ FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
891
+ end
892
+ end
893
+
894
+ def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
895
+ FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
896
+ end
897
+
898
+ def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
899
+ FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
900
+ end
901
+
902
+ def self.readlines(path, rs = :auto) # :nodoc:
903
+ FasterCSV.readlines(path, :row_sep => rs)
904
+ end
511
905
  end
512
906
  end
513
-
514
- #
515
- # In the default mixed mode, this method returns rows for index access and
516
- # columns for header access. You can force the index association by first
517
- # calling by_col!() or by_row!().
518
- #
519
- # Columns are returned as an Array of values. Altering that Array has no
520
- # effect on the table.
907
+
521
908
  #
522
- def [](index_or_header)
523
- if @mode == :row or # by index
524
- (@mode == :col_or_row and index_or_header.is_a? Integer)
525
- @table[index_or_header]
526
- else # by header
527
- @table.map { |row| row[index_or_header] }
909
+ # This method allows you to serialize an Array of Ruby objects to a String or
910
+ # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
911
+ # useful for spreadsheet and database interaction.
912
+ #
913
+ # Out of the box, this method is intended to work with simple data objects or
914
+ # Structs. It will serialize a list of instance variables and/or
915
+ # Struct.members().
916
+ #
917
+ # If you need need more complicated serialization, you can control the process
918
+ # by adding methods to the class to be serialized.
919
+ #
920
+ # A class method csv_meta() is responsible for returning the first row of the
921
+ # document (as an Array). This row is considered to be a Hash of the form
922
+ # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
923
+ # key with a value of the stringified class name and FasterCSV::dump() will
924
+ # create this, if you do not define this method. This method is only called
925
+ # on the first object of the Array.
926
+ #
927
+ # The next method you can provide is an instance method called csv_headers().
928
+ # This method is expected to return the second line of the document (again as
929
+ # an Array), which is to be used to give each column a header. By default,
930
+ # FasterCSV::load() will set an instance variable if the field header starts
931
+ # with an @ character or call send() passing the header as the method name and
932
+ # the field value as an argument. This method is only called on the first
933
+ # object of the Array.
934
+ #
935
+ # Finally, you can provide an instance method called csv_dump(), which will
936
+ # be passed the headers. This should return an Array of fields that can be
937
+ # serialized for this object. This method is called once for every object in
938
+ # the Array.
939
+ #
940
+ # The +io+ parameter can be used to serialize to a File, and +options+ can be
941
+ # anything FasterCSV::new() accepts.
942
+ #
943
+ def self.dump(ary_of_objs, io = "", options = Hash.new)
944
+ obj_template = ary_of_objs.first
945
+
946
+ csv = FasterCSV.new(io, options)
947
+
948
+ # write meta information
949
+ begin
950
+ csv << obj_template.class.csv_meta
951
+ rescue NoMethodError
952
+ csv << [:class, obj_template.class]
528
953
  end
529
- end
530
-
531
- #
532
- # In the default mixed mode, this method assigns rows for index access and
533
- # columns for header access. You can force the index association by first
534
- # calling by_col!() or by_row!().
535
- #
536
- # Rows may be set to an Array of values (which will inherit the table's
537
- # headers()) or a FasterCSV::Row.
538
- #
539
- # Columns may be set to a single value, which is copied to each row of the
540
- # column, or an Array of values. Arrays of values are assigned to rows top
541
- # to bottom in row major order. Excess values are ignored and if the Array
542
- # does not have a value for each row the extra rows will receive a +nil+.
543
- #
544
- # Assigning to an existing column or row clobbers the data. Assigning to
545
- # new columns creates them at the right end of the table.
546
- #
547
- def []=(index_or_header, value)
548
- if @mode == :row or # by index
549
- (@mode == :col_or_row and index_or_header.is_a? Integer)
550
- if value.is_a? Array
551
- @table[index_or_header] = Row.new(headers, value)
552
- else
553
- @table[index_or_header] = value
954
+
955
+ # write headers
956
+ begin
957
+ headers = obj_template.csv_headers
958
+ rescue NoMethodError
959
+ headers = obj_template.instance_variables.sort
960
+ if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
961
+ headers += obj_template.members.map { |mem| "#{mem}=" }.sort
554
962
  end
555
- else # set column
556
- if value.is_a? Array # multiple values
557
- @table.each_with_index do |row, i|
558
- if row.header_row?
559
- row[index_or_header] = index_or_header
560
- else
561
- row[index_or_header] = value[i]
562
- end
563
- end
564
- else # repeated value
565
- @table.each do |row|
566
- if row.header_row?
567
- row[index_or_header] = index_or_header
963
+ end
964
+ csv << headers
965
+
966
+ # serialize each object
967
+ ary_of_objs.each do |obj|
968
+ begin
969
+ csv << obj.csv_dump(headers)
970
+ rescue NoMethodError
971
+ csv << headers.map do |var|
972
+ if var[0] == ?@
973
+ obj.instance_variable_get(var)
568
974
  else
569
- row[index_or_header] = value
975
+ obj[var[0..-2]]
570
976
  end
571
977
  end
572
978
  end
573
979
  end
980
+
981
+ if io.is_a? String
982
+ csv.string
983
+ else
984
+ csv.close
985
+ end
574
986
  end
575
-
987
+
576
988
  #
577
- # The mixed mode default is to treat a list of indices as row access,
578
- # returning the rows indicated. Anything else is considered columnar
579
- # access. For columnar access, the return set has an Array for each row
580
- # with the values indicated by the headers in each Array. You can force
581
- # column or row mode using by_col!() or by_row!().
582
- #
583
- # You cannot mix column and row access.
584
- #
585
- def values_at(*indices_or_headers)
586
- if @mode == :row or # by indices
587
- ( @mode == :col_or_row and indices_or_headers.all? do |index|
588
- index.is_a?(Integer) or
589
- ( index.is_a?(Range) and
590
- index.first.is_a?(Integer) and
591
- index.last.is_a?(Integer) )
592
- end )
593
- @table.values_at(*indices_or_headers)
594
- else # by headers
595
- @table.map { |row| row.values_at(*indices_or_headers) }
989
+ # :call-seq:
990
+ # filter( options = Hash.new ) { |row| ... }
991
+ # filter( input, options = Hash.new ) { |row| ... }
992
+ # filter( input, output, options = Hash.new ) { |row| ... }
993
+ #
994
+ # This method is a convenience for building Unix-like filters for CSV data.
995
+ # Each row is yielded to the provided block which can alter it as needed.
996
+ # After the block returns, the row is appended to +output+ altered or not.
997
+ #
998
+ # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
999
+ # (generally String or IO objects). If not given, they default to
1000
+ # <tt>ARGF</tt> and <tt>$stdout</tt>.
1001
+ #
1002
+ # The +options+ parameter is also filtered down to FasterCSV::new() after some
1003
+ # clever key parsing. Any key beginning with <tt>:in_</tt> or
1004
+ # <tt>:input_</tt> will have that leading identifier stripped and will only
1005
+ # be used in the +options+ Hash for the +input+ object. Keys starting with
1006
+ # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
1007
+ # are assigned to both objects.
1008
+ #
1009
+ # The <tt>:output_row_sep</tt> +option+ defaults to
1010
+ # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1011
+ #
1012
+ def self.filter(*args)
1013
+ # parse options for input, output, or both
1014
+ in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
1015
+ if args.last.is_a? Hash
1016
+ args.pop.each do |key, value|
1017
+ case key.to_s
1018
+ when /\Ain(?:put)?_(.+)\Z/
1019
+ in_options[$1.to_sym] = value
1020
+ when /\Aout(?:put)?_(.+)\Z/
1021
+ out_options[$1.to_sym] = value
1022
+ else
1023
+ in_options[key] = value
1024
+ out_options[key] = value
1025
+ end
1026
+ end
1027
+ end
1028
+ # build input and output wrappers
1029
+ input = FasterCSV.new(args.shift || ARGF, in_options)
1030
+ output = FasterCSV.new(args.shift || $stdout, out_options)
1031
+
1032
+ # read, yield, write
1033
+ input.each do |row|
1034
+ yield row
1035
+ output << row
596
1036
  end
597
1037
  end
598
1038
 
599
1039
  #
600
- # Adds a new row to the bottom end of this table. You can provide an Array,
601
- # which will be converted to a FasterCSV::Row (inheriting the table's
602
- # headers()), or a FasterCSV::Row.
1040
+ # This method is intended as the primary interface for reading CSV files. You
1041
+ # pass a +path+ and any +options+ you wish to set for the read. Each row of
1042
+ # file will be passed to the provided +block+ in turn.
603
1043
  #
604
- # This method returns the table for chaining.
1044
+ # The +options+ parameter can be anything FasterCSV::new() understands.
605
1045
  #
606
- def <<(row_or_array)
607
- if row_or_array.is_a? Array # append Array
608
- @table << Row.new(headers, row_or_array)
609
- else # append Row
610
- @table << row_or_array
1046
+ def self.foreach(path, options = Hash.new, &block)
1047
+ open(path, "rb", options) do |csv|
1048
+ csv.each(&block)
611
1049
  end
612
-
613
- self # for chaining
614
1050
  end
615
-
616
- #
617
- # A shortcut for appending multiple rows. Equivalent to:
1051
+
618
1052
  #
619
- # rows.each { |row| self << row }
1053
+ # :call-seq:
1054
+ # generate( str, options = Hash.new ) { |faster_csv| ... }
1055
+ # generate( options = Hash.new ) { |faster_csv| ... }
620
1056
  #
621
- # This method returns the table for chaining.
1057
+ # This method wraps a String you provide, or an empty default String, in a
1058
+ # FasterCSV object which is passed to the provided block. You can use the
1059
+ # block to append CSV rows to the String and when the block exits, the
1060
+ # final String will be returned.
622
1061
  #
623
- def push(*rows)
624
- rows.each { |row| self << row }
625
-
626
- self # for chaining
627
- end
628
-
1062
+ # Note that a passed String *is* modfied by this method. Call dup() before
1063
+ # passing if you need a new String.
629
1064
  #
630
- # Removes and returns the indicated column or row. In the default mixed
631
- # mode indices refer to rows and everything else is assumed to be a column
632
- # header. Use by_col!() or by_row!() to force the lookup.
1065
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
633
1066
  #
634
- def delete(index_or_header)
635
- if @mode == :row or # by index
636
- (@mode == :col_or_row and index_or_header.is_a? Integer)
637
- @table.delete_at(index_or_header)
638
- else # by header
639
- @table.map { |row| row.delete(index_or_header).last }
1067
+ def self.generate(*args)
1068
+ # add a default empty String, if none was given
1069
+ if args.first.is_a? String
1070
+ io = StringIO.new(args.shift)
1071
+ io.seek(0, IO::SEEK_END)
1072
+ args.unshift(io)
1073
+ else
1074
+ args.unshift("")
640
1075
  end
1076
+ faster_csv = new(*args) # wrap
1077
+ yield faster_csv # yield for appending
1078
+ faster_csv.string # return final String
641
1079
  end
642
-
1080
+
643
1081
  #
644
- # Removes any column or row for which the block returns +true+. In the
645
- # default mixed mode or row mode, iteration is the standard row major
646
- # walking of rows. In column mode, interation will +yield+ two element
647
- # tuples containing the column name and an Array of values for that column.
1082
+ # This method is a shortcut for converting a single row (Array) into a CSV
1083
+ # String.
648
1084
  #
649
- # This method returns the table for chaining.
1085
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
650
1086
  #
651
- def delete_if(&block)
652
- if @mode == :row or @mode == :col_or_row # by index
653
- @table.delete_if(&block)
654
- else # by header
655
- to_delete = Array.new
656
- headers.each_with_index do |header, i|
657
- to_delete << header if block[[header, self[header]]]
658
- end
659
- to_delete.map { |header| delete(header) }
660
- end
661
-
662
- self # for chaining
1087
+ # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1088
+ # (<tt>$/</tt>) when calling this method.
1089
+ #
1090
+ def self.generate_line(row, options = Hash.new)
1091
+ options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1092
+ (new("", options) << row).string
663
1093
  end
664
-
665
- include Enumerable
666
-
1094
+
667
1095
  #
668
- # In the default mixed mode or row mode, iteration is the standard row major
669
- # walking of rows. In column mode, interation will +yield+ two element
670
- # tuples containing the column name and an Array of values for that column.
1096
+ # This method will return a FasterCSV instance, just like FasterCSV::new(),
1097
+ # but the instance will be cached and returned for all future calls to this
1098
+ # method for the same +data+ object (tested by Object#object_id()) with the
1099
+ # same +options+.
671
1100
  #
672
- # This method returns the table for chaining.
1101
+ # If a block is given, the instance is passed to the block and the return
1102
+ # value becomes the return value of the block.
673
1103
  #
674
- def each(&block)
675
- if @mode == :col
676
- headers.each { |header| block[[header, self[header]]] }
1104
+ def self.instance(data = $stdout, options = Hash.new)
1105
+ # create a _signature_ for this method call, data object and options
1106
+ sig = [data.object_id] +
1107
+ options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1108
+
1109
+ # fetch or create the instance for this signature
1110
+ @@instances ||= Hash.new
1111
+ instance = (@@instances[sig] ||= new(data, options))
1112
+
1113
+ if block_given?
1114
+ yield instance # run block, if given, returning result
677
1115
  else
678
- @table.each(&block)
1116
+ instance # or return the instance
679
1117
  end
680
-
681
- self # for chaining
682
- end
683
-
684
- # Returns +true+ if all rows of this table ==() +other+'s rows.
685
- def ==(other)
686
- @table == other.table
687
1118
  end
688
-
689
- #
690
- # Returns the table as an Array of Arrays. Headers will be the first row,
691
- # then all of the field rows will follow.
1119
+
692
1120
  #
693
- def to_a
694
- @table.inject([headers]) do |array, row|
695
- if row.header_row?
696
- array
697
- else
698
- array + [row.fields]
699
- end
700
- end
701
- end
702
-
1121
+ # This method is the reading counterpart to FasterCSV::dump(). See that
1122
+ # method for a detailed description of the process.
703
1123
  #
704
- # Returns the table as a complete CSV String. Headers will be listed first,
705
- # then all of the field rows.
1124
+ # You can customize loading by adding a class method called csv_load() which
1125
+ # will be passed a Hash of meta information, an Array of headers, and an Array
1126
+ # of fields for the object the method is expected to return.
706
1127
  #
707
- # This method assumes you want the Table.headers(), unless you explicitly
708
- # pass <tt>:write_headers => false</tt>.
1128
+ # Remember that all fields will be Strings after this load. If you need
1129
+ # something else, use +options+ to setup converters or provide a custom
1130
+ # csv_load() implementation.
709
1131
  #
710
- def to_csv(options = Hash.new)
711
- wh = options.fetch(:write_headers, true)
712
- @table.inject(wh ? [headers.to_csv(options)] : [ ]) do |rows, row|
713
- if row.header_row?
714
- rows
715
- else
716
- rows + [row.fields.to_csv(options)]
717
- end
718
- end.join
719
- end
720
- alias_method :to_s, :to_csv
721
-
722
- def inspect
723
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
724
- end
725
- end
1132
+ def self.load(io_or_str, options = Hash.new)
1133
+ csv = FasterCSV.new(io_or_str, options)
726
1134
 
727
- # The error thrown when the parser encounters illegal CSV formatting.
728
- class MalformedCSVError < RuntimeError; end
729
-
730
- #
731
- # A FieldInfo Struct contains details about a field's position in the data
732
- # source it was read from. FasterCSV will pass this Struct to some blocks
733
- # that make decisions based on field structure. See
734
- # FasterCSV.convert_fields() for an example.
735
- #
736
- # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
737
- # <b><tt>line</tt></b>:: The line of the data source this row is from.
738
- # <b><tt>header</tt></b>:: The header for the column, when available.
739
- #
740
- FieldInfo = Struct.new(:index, :line, :header)
741
-
742
- # A Regexp used to find and convert some common Date formats.
743
- DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
744
- \d{4}-\d{2}-\d{2} )\z /x
745
- # A Regexp used to find and convert some common DateTime formats.
746
- DateTimeMatcher =
747
- / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
748
- \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
749
- #
750
- # This Hash holds the built-in converters of FasterCSV that can be accessed by
751
- # name. You can select Converters with FasterCSV.convert() or through the
752
- # +options+ Hash passed to FasterCSV::new().
753
- #
754
- # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
755
- # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
756
- # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
757
- # and <tt>:float</tt>.
758
- # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
759
- # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
760
- # <b><tt>:all</tt></b>:: All built-in converters. A combination of
761
- # <tt>:date_time</tt> and <tt>:numeric</tt>.
762
- #
763
- # This Hash is intetionally left unfrozen and users should feel free to add
764
- # values to it that can be accessed by all FasterCSV objects.
765
- #
766
- # To add a combo field, the value should be an Array of names. Combo fields
767
- # can be nested with other combo fields.
768
- #
769
- Converters = { :integer => lambda { |f| Integer(f) rescue f },
770
- :float => lambda { |f| Float(f) rescue f },
771
- :numeric => [:integer, :float],
772
- :date => lambda { |f|
773
- f =~ DateMatcher ? (Date.parse(f) rescue f) : f
774
- },
775
- :date_time => lambda { |f|
776
- f =~ DateTimeMatcher ? (DateTime.parse(f) rescue f) : f
777
- },
778
- :all => [:date_time, :numeric] }
1135
+ # load meta information
1136
+ meta = Hash[*csv.shift]
1137
+ cls = meta["class"].split("::").inject(Object) do |c, const|
1138
+ c.const_get(const)
1139
+ end
779
1140
 
780
- #
781
- # This Hash holds the built-in header converters of FasterCSV that can be
782
- # accessed by name. You can select HeaderConverters with
783
- # FasterCSV.header_convert() or through the +options+ Hash passed to
784
- # FasterCSV::new().
785
- #
786
- # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
787
- # <b><tt>:symbol</tt></b>:: The header String is downcased, spaces are
788
- # replaced with underscores, non-word characters
789
- # are dropped, and finally to_sym() is called.
790
- #
791
- # This Hash is intetionally left unfrozen and users should feel free to add
792
- # values to it that can be accessed by all FasterCSV objects.
793
- #
794
- # To add a combo field, the value should be an Array of names. Combo fields
795
- # can be nested with other combo fields.
796
- #
797
- HeaderConverters = {
798
- :downcase => lambda { |h| h.downcase },
799
- :symbol => lambda { |h|
800
- h.downcase.tr(" ", "_").delete("^a-z0-9_").to_sym
801
- }
802
- }
803
-
804
- #
805
- # The options used when no overrides are given by calling code. They are:
806
- #
807
- # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
808
- # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
809
- # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
810
- # <b><tt>:converters</tt></b>:: +nil+
811
- # <b><tt>:unconverted_fields</tt></b>:: +nil+
812
- # <b><tt>:headers</tt></b>:: +false+
813
- # <b><tt>:return_headers</tt></b>:: +false+
814
- # <b><tt>:header_converters</tt></b>:: +nil+
815
- # <b><tt>:skip_blanks</tt></b>:: +false+
816
- # <b><tt>:force_quotes</tt></b>:: +false+
817
- #
818
- DEFAULT_OPTIONS = { :col_sep => ",",
819
- :row_sep => :auto,
820
- :quote_char => '"',
821
- :converters => nil,
822
- :unconverted_fields => nil,
823
- :headers => false,
824
- :return_headers => false,
825
- :header_converters => nil,
826
- :skip_blanks => false,
827
- :force_quotes => false }.freeze
828
-
829
- #
830
- # This method will build a drop-in replacement for many of the standard CSV
831
- # methods. It allows you to write code like:
832
- #
833
- # begin
834
- # require "faster_csv"
835
- # FasterCSV.build_csv_interface
836
- # rescue LoadError
837
- # require "csv"
838
- # end
839
- # # ... use CSV here ...
840
- #
841
- # This is not a complete interface with completely identical behavior.
842
- # However, it is intended to be close enough that you won't notice the
843
- # difference in most cases. CSV methods supported are:
844
- #
845
- # * foreach()
846
- # * generate_line()
847
- # * open()
848
- # * parse()
849
- # * parse_line()
850
- # * readlines()
851
- #
852
- # Be warned that this interface is slower than vanilla FasterCSV due to the
853
- # extra layer of method calls. Depending on usage, this can slow it down to
854
- # near CSV speeds.
855
- #
856
- def self.build_csv_interface
857
- Object.const_set(:CSV, Class.new).class_eval do
858
- def self.foreach(path, rs = :auto, &block) # :nodoc:
859
- FasterCSV.foreach(path, :row_sep => rs, &block)
860
- end
861
-
862
- def self.generate_line(row, fs = ",", rs = "") # :nodoc:
863
- FasterCSV.generate_line(row, :col_sep => fs, :row_sep => rs)
864
- end
865
-
866
- def self.open(path, mode, fs = ",", rs = :auto, &block) # :nodoc:
867
- if block and mode.include? "r"
868
- FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs) do |csv|
869
- csv.each(&block)
1141
+ # load headers
1142
+ headers = csv.shift
1143
+
1144
+ # unserialize each object stored in the file
1145
+ results = csv.inject(Array.new) do |all, row|
1146
+ begin
1147
+ obj = cls.csv_load(meta, headers, row)
1148
+ rescue NoMethodError
1149
+ obj = cls.allocate
1150
+ headers.zip(row) do |name, value|
1151
+ if name[0] == ?@
1152
+ obj.instance_variable_set(name, value)
1153
+ else
1154
+ obj.send(name, value)
1155
+ end
870
1156
  end
871
- else
872
- FasterCSV.open(path, mode, :col_sep => fs, :row_sep => rs, &block)
873
1157
  end
1158
+ all << obj
874
1159
  end
875
-
876
- def self.parse(str_or_readable, fs = ",", rs = :auto, &block) # :nodoc:
877
- FasterCSV.parse(str_or_readable, :col_sep => fs, :row_sep => rs, &block)
878
- end
879
-
880
- def self.parse_line(src, fs = ",", rs = :auto) # :nodoc:
881
- FasterCSV.parse_line(src, :col_sep => fs, :row_sep => rs)
882
- end
883
-
884
- def self.readlines(path, rs = :auto) # :nodoc:
885
- FasterCSV.readlines(path, :row_sep => rs)
886
- end
887
- end
888
- end
889
-
890
- #
891
- # This method allows you to serialize an Array of Ruby objects to a String or
892
- # File of CSV data. This is not as powerful as Marshal or YAML, but perhaps
893
- # useful for spreadsheet and database interaction.
894
- #
895
- # Out of the box, this method is intended to work with simple data objects or
896
- # Structs. It will serialize a list of instance variables and/or
897
- # Struct.members().
898
- #
899
- # If you need need more complicated serialization, you can control the process
900
- # by adding methods to the class to be serialized.
901
- #
902
- # A class method csv_meta() is responsible for returning the first row of the
903
- # document (as an Array). This row is considered to be a Hash of the form
904
- # key_1,value_1,key_2,value_2,... FasterCSV::load() expects to find a class
905
- # key with a value of the stringified class name and FasterCSV::dump() will
906
- # create this, if you do not define this method. This method is only called
907
- # on the first object of the Array.
908
- #
909
- # The next method you can provide is an instance method called csv_headers().
910
- # This method is expected to return the second line of the document (again as
911
- # an Array), which is to be used to give each column a header. By default,
912
- # FasterCSV::load() will set an instance variable if the field header starts
913
- # with an @ character or call send() passing the header as the method name and
914
- # the field value as an argument. This method is only called on the first
915
- # object of the Array.
916
- #
917
- # Finally, you can provide an instance method called csv_dump(), which will
918
- # be passed the headers. This should return an Array of fields that can be
919
- # serialized for this object. This method is called once for every object in
920
- # the Array.
921
- #
922
- # The +io+ parameter can be used to serialize to a File, and +options+ can be
923
- # anything FasterCSV::new() accepts.
924
- #
925
- def self.dump(ary_of_objs, io = "", options = Hash.new)
926
- obj_template = ary_of_objs.first
927
-
928
- csv = FasterCSV.new(io, options)
929
-
930
- # write meta information
931
- begin
932
- csv << obj_template.class.csv_meta
933
- rescue NoMethodError
934
- csv << [:class, obj_template.class]
935
- end
936
1160
 
937
- # write headers
938
- begin
939
- headers = obj_template.csv_headers
940
- rescue NoMethodError
941
- headers = obj_template.instance_variables.sort
942
- if obj_template.class.ancestors.find { |cls| cls.to_s =~ /\AStruct\b/ }
943
- headers += obj_template.members.map { |mem| "#{mem}=" }.sort
944
- end
1161
+ csv.close unless io_or_str.is_a? String
1162
+
1163
+ results
945
1164
  end
946
- csv << headers
947
-
948
- # serialize each object
949
- ary_of_objs.each do |obj|
950
- begin
951
- csv << obj.csv_dump(headers)
952
- rescue NoMethodError
953
- csv << headers.map do |var|
954
- if var[0] == ?@
955
- obj.instance_variable_get(var)
956
- else
957
- obj[var[0..-2]]
958
- end
1165
+
1166
+ #
1167
+ # :call-seq:
1168
+ # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1169
+ # open( filename, mode="rb", options = Hash.new )
1170
+ #
1171
+ # This method opens an IO object, and wraps that with FasterCSV. This is
1172
+ # intended as the primary interface for writing a CSV file.
1173
+ #
1174
+ # You may pass any +args+ Ruby's open() understands followed by an optional
1175
+ # Hash containing any +options+ FasterCSV::new() understands.
1176
+ #
1177
+ # This method works like Ruby's open() call, in that it will pass a FasterCSV
1178
+ # object to a provided block and close it when the block termminates, or it
1179
+ # will return the FasterCSV object when no block is provided. (*Note*: This
1180
+ # is different from the standard CSV library which passes rows to the block.
1181
+ # Use FasterCSV::foreach() for that behavior.)
1182
+ #
1183
+ # An opened FasterCSV object will delegate to many IO methods, for
1184
+ # convenience. You may call:
1185
+ #
1186
+ # * binmode()
1187
+ # * close()
1188
+ # * close_read()
1189
+ # * close_write()
1190
+ # * closed?()
1191
+ # * eof()
1192
+ # * eof?()
1193
+ # * fcntl()
1194
+ # * fileno()
1195
+ # * flush()
1196
+ # * fsync()
1197
+ # * ioctl()
1198
+ # * isatty()
1199
+ # * pid()
1200
+ # * pos()
1201
+ # * reopen()
1202
+ # * seek()
1203
+ # * stat()
1204
+ # * sync()
1205
+ # * sync=()
1206
+ # * tell()
1207
+ # * to_i()
1208
+ # * to_io()
1209
+ # * tty?()
1210
+ #
1211
+ def self.open(*args)
1212
+ # find the +options+ Hash
1213
+ options = if args.last.is_a? Hash then args.pop else Hash.new end
1214
+ # default to a binary open mode
1215
+ args << "rb" if args.size == 1
1216
+ # wrap a File opened with the remaining +args+
1217
+ csv = new(File.open(*args), options)
1218
+
1219
+ # handle blocks like Ruby's open(), not like the CSV library
1220
+ if block_given?
1221
+ begin
1222
+ yield csv
1223
+ ensure
1224
+ csv.close
959
1225
  end
1226
+ else
1227
+ csv
960
1228
  end
961
1229
  end
962
-
963
- if io.is_a? String
964
- csv.string
965
- else
966
- csv.close
967
- end
968
- end
969
-
970
- #
971
- # :call-seq:
972
- # filter( options = Hash.new ) { |row| ... }
973
- # filter( input, options = Hash.new ) { |row| ... }
974
- # filter( input, output, options = Hash.new ) { |row| ... }
975
- #
976
- # This method is a convenience for building Unix-like filters for CSV data.
977
- # Each row is yielded to the provided block which can alter it as needed.
978
- # After the block returns, the row is appended to +output+ altered or not.
979
- #
980
- # The +input+ and +output+ arguments can be anything FasterCSV::new() accepts
981
- # (generally String or IO objects). If not given, they default to
982
- # <tt>ARGF</tt> and <tt>$stdout</tt>.
983
- #
984
- # The +options+ parameter is also filtered down to FasterCSV::new() after some
985
- # clever key parsing. Any key beginning with <tt>:in_</tt> or
986
- # <tt>:input_</tt> will have that leading identifier stripped and will only
987
- # be used in the +options+ Hash for the +input+ object. Keys starting with
988
- # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
989
- # are assigned to both objects.
990
- #
991
- # The <tt>:output_row_sep</tt> +option+ defaults to
992
- # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
993
- #
994
- def self.filter(*args)
995
- # parse options for input, output, or both
996
- in_options, out_options = Hash.new, {:row_sep => $INPUT_RECORD_SEPARATOR}
997
- if args.last.is_a? Hash
998
- args.pop.each do |key, value|
999
- case key.to_s
1000
- when /\Ain(?:put)?_(.+)\Z/
1001
- in_options[$1.to_sym] = value
1002
- when /\Aout(?:put)?_(.+)\Z/
1003
- out_options[$1.to_sym] = value
1004
- else
1005
- in_options[key] = value
1006
- out_options[key] = value
1230
+
1231
+ #
1232
+ # :call-seq:
1233
+ # parse( str, options = Hash.new ) { |row| ... }
1234
+ # parse( str, options = Hash.new )
1235
+ #
1236
+ # This method can be used to easily parse CSV out of a String. You may either
1237
+ # provide a +block+ which will be called with each row of the String in turn,
1238
+ # or just use the returned Array of Arrays (when no +block+ is given).
1239
+ #
1240
+ # You pass your +str+ to read from, and an optional +options+ Hash containing
1241
+ # anything FasterCSV::new() understands.
1242
+ #
1243
+ def self.parse(*args, &block)
1244
+ csv = new(*args)
1245
+ if block.nil? # slurp contents, if no block is given
1246
+ begin
1247
+ csv.read
1248
+ ensure
1249
+ csv.close
1007
1250
  end
1251
+ else # or pass each row to a provided block
1252
+ csv.each(&block)
1008
1253
  end
1009
1254
  end
1010
- # build input and output wrappers
1011
- input = FasterCSV.new(args.shift || ARGF, in_options)
1012
- output = FasterCSV.new(args.shift || $stdout, out_options)
1013
-
1014
- # read, yield, write
1015
- input.each do |row|
1016
- yield row
1017
- output << row
1018
- end
1019
- end
1020
-
1021
- #
1022
- # This method is intended as the primary interface for reading CSV files. You
1023
- # pass a +path+ and any +options+ you wish to set for the read. Each row of
1024
- # file will be passed to the provided +block+ in turn.
1025
- #
1026
- # The +options+ parameter can be anything FasterCSV::new() understands.
1027
- #
1028
- def self.foreach(path, options = Hash.new, &block)
1029
- open(path, "rb", options) do |csv|
1030
- csv.each(&block)
1255
+
1256
+ #
1257
+ # This method is a shortcut for converting a single line of a CSV String into
1258
+ # a into an Array. Note that if +line+ contains multiple rows, anything
1259
+ # beyond the first row is ignored.
1260
+ #
1261
+ # The +options+ parameter can be anthing FasterCSV::new() understands.
1262
+ #
1263
+ def self.parse_line(line, options = Hash.new)
1264
+ new(line, options).shift
1031
1265
  end
1032
- end
1033
1266
 
1034
- #
1035
- # :call-seq:
1036
- # generate( str, options = Hash.new ) { |faster_csv| ... }
1037
- # generate( options = Hash.new ) { |faster_csv| ... }
1038
- #
1039
- # This method wraps a String you provide, or an empty default String, in a
1040
- # FasterCSV object which is passed to the provided block. You can use the
1041
- # block to append CSV rows to the String and when the block exits, the
1042
- # final String will be returned.
1043
- #
1044
- # Note that a passed String *is* modfied by this method. Call dup() before
1045
- # passing if you need a new String.
1046
- #
1047
- # The +options+ parameter can be anthing FasterCSV::new() understands.
1048
- #
1049
- def self.generate(*args)
1050
- # add a default empty String, if none was given
1051
- if args.first.is_a? String
1052
- io = StringIO.new(args.shift)
1053
- io.seek(0, IO::SEEK_END)
1054
- args.unshift(io)
1055
- else
1056
- args.unshift("")
1267
+ #
1268
+ # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1269
+ # file and any +options+ FasterCSV::new() understands.
1270
+ #
1271
+ def self.read(path, options = Hash.new)
1272
+ open(path, "rb", options) { |csv| csv.read }
1057
1273
  end
1058
- faster_csv = new(*args) # wrap
1059
- yield faster_csv # yield for appending
1060
- faster_csv.string # return final String
1061
- end
1062
1274
 
1063
- #
1064
- # This method is a shortcut for converting a single row (Array) into a CSV
1065
- # String.
1066
- #
1067
- # The +options+ parameter can be anthing FasterCSV::new() understands.
1068
- #
1069
- # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1070
- # (<tt>$/</tt>) when calling this method.
1071
- #
1072
- def self.generate_line(row, options = Hash.new)
1073
- options = {:row_sep => $INPUT_RECORD_SEPARATOR}.merge(options)
1074
- (new("", options) << row).string
1075
- end
1076
-
1077
- #
1078
- # This method will return a FasterCSV instance, just like FasterCSV::new(),
1079
- # but the instance will be cached and returned for all future calls to this
1080
- # method for the same +data+ object (tested by Object#object_id()) with the
1081
- # same +options+.
1082
- #
1083
- # If a block is given, the instance is passed to the block and the return
1084
- # value becomes the return value of the block.
1085
- #
1086
- def self.instance(data = $stdout, options = Hash.new)
1087
- # create a _signature_ for this method call, data object and options
1088
- sig = [data.object_id] +
1089
- options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1090
-
1091
- # fetch or create the instance for this signature
1092
- @@instances ||= Hash.new
1093
- instance = (@@instances[sig] ||= new(data, options))
1094
-
1095
- if block_given?
1096
- yield instance # run block, if given, returning result
1097
- else
1098
- instance # or return the instance
1275
+ # Alias for FasterCSV::read().
1276
+ def self.readlines(*args)
1277
+ read(*args)
1099
1278
  end
1100
- end
1101
-
1102
- #
1103
- # This method is the reading counterpart to FasterCSV::dump(). See that
1104
- # method for a detailed description of the process.
1105
- #
1106
- # You can customize loading by adding a class method called csv_load() which
1107
- # will be passed a Hash of meta information, an Array of headers, and an Array
1108
- # of fields for the object the method is expected to return.
1109
- #
1110
- # Remember that all fields will be Strings after this load. If you need
1111
- # something else, use +options+ to setup converters or provide a custom
1112
- # csv_load() implementation.
1113
- #
1114
- def self.load(io_or_str, options = Hash.new)
1115
- csv = FasterCSV.new(io_or_str, options)
1116
-
1117
- # load meta information
1118
- meta = Hash[*csv.shift]
1119
- cls = meta["class"].split("::").inject(Object) do |c, const|
1120
- c.const_get(const)
1279
+
1280
+ #
1281
+ # A shortcut for:
1282
+ #
1283
+ # FasterCSV.read( path, { :headers => true,
1284
+ # :converters => :numeric,
1285
+ # :header_converters => :symbol }.merge(options) )
1286
+ #
1287
+ def self.table(path, options = Hash.new)
1288
+ read( path, { :headers => true,
1289
+ :converters => :numeric,
1290
+ :header_converters => :symbol }.merge(options) )
1121
1291
  end
1122
-
1123
- # load headers
1124
- headers = csv.shift
1125
-
1126
- # unserialize each object stored in the file
1127
- results = csv.inject(Array.new) do |all, row|
1128
- begin
1129
- obj = cls.csv_load(meta, headers, row)
1130
- rescue NoMethodError
1131
- obj = cls.allocate
1132
- headers.zip(row) do |name, value|
1133
- if name[0] == ?@
1134
- obj.instance_variable_set(name, value)
1135
- else
1136
- obj.send(name, value)
1137
- end
1138
- end
1292
+
1293
+ #
1294
+ # This constructor will wrap either a String or IO object passed in +data+ for
1295
+ # reading and/or writing. In addition to the FasterCSV instance methods,
1296
+ # several IO methods are delegated. (See FasterCSV::open() for a complete
1297
+ # list.) If you pass a String for +data+, you can later retrieve it (after
1298
+ # writing to it, for example) with FasterCSV.string().
1299
+ #
1300
+ # Note that a wrapped String will be positioned at at the beginning (for
1301
+ # reading). If you want it at the end (for writing), use
1302
+ # FasterCSV::generate(). If you want any other positioning, pass a preset
1303
+ # StringIO object instead.
1304
+ #
1305
+ # You may set any reading and/or writing preferences in the +options+ Hash.
1306
+ # Available options are:
1307
+ #
1308
+ # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1309
+ # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1310
+ # row. This can be set to the special
1311
+ # <tt>:auto</tt> setting, which requests
1312
+ # that FasterCSV automatically discover
1313
+ # this from the data. Auto-discovery
1314
+ # reads ahead in the data looking for
1315
+ # the next <tt>"\r\n"</tt>,
1316
+ # <tt>"\n"</tt>, or <tt>"\r"</tt>
1317
+ # sequence. A sequence will be selected
1318
+ # even if it occurs in a quoted field,
1319
+ # assuming that you would have the same
1320
+ # line endings there. If none of those
1321
+ # sequences is found, +data+ is
1322
+ # <tt>ARGF</tt>, <tt>STDIN</tt>,
1323
+ # <tt>STDOUT</tt>, or <tt>STDERR</tt>,
1324
+ # or the stream is only available for
1325
+ # output, the default
1326
+ # <tt>$INPUT_RECORD_SEPARATOR</tt>
1327
+ # (<tt>$/</tt>) is used. Obviously,
1328
+ # discovery takes a little time. Set
1329
+ # manually if speed is important. Also
1330
+ # note that IO objects should be opened
1331
+ # in binary mode on Windows if this
1332
+ # feature will be used as the
1333
+ # line-ending translation can cause
1334
+ # problems with resetting the document
1335
+ # position to where it was before the
1336
+ # read ahead.
1337
+ # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1338
+ # This has to be a single character
1339
+ # String. This is useful for
1340
+ # application that incorrectly use
1341
+ # <tt>'</tt> as the quote character
1342
+ # instead of the correct <tt>"</tt>.
1343
+ # FasterCSV will always consider a
1344
+ # double sequence this character to be
1345
+ # an escaped quote.
1346
+ # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1347
+ # file. Defaults to your <tt>$KDOCE</tt>
1348
+ # setting. Valid values: <tt>`n’</tt> or
1349
+ # <tt>`N’</tt> for none, <tt>`e’</tt> or
1350
+ # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1351
+ # <tt>`S’</tt> for SJIS, and
1352
+ # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1353
+ # (see Regexp.new()).
1354
+ # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1355
+ # read ahead looking for the closing
1356
+ # quote for a field. (In truth, it
1357
+ # reads to the first line ending beyond
1358
+ # this size.) If a quote cannot be
1359
+ # found within the limit FasterCSV will
1360
+ # raise a MalformedCSVError, assuming
1361
+ # the data is faulty. You can use this
1362
+ # limit to prevent what are effectively
1363
+ # DoS attacks on the parser. However,
1364
+ # this limit can cause a legitimate
1365
+ # parse to fail and thus is set to
1366
+ # +nil+, or off, by default.
1367
+ # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1368
+ # Hash and/or lambdas that handle custom
1369
+ # conversion. A single converter
1370
+ # doesn't have to be in an Array.
1371
+ # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1372
+ # unconverted_fields() method will be
1373
+ # added to all returned rows (Array or
1374
+ # FasterCSV::Row) that will return the
1375
+ # fields as they were before convertion.
1376
+ # Note that <tt>:headers</tt> supplied
1377
+ # by Array or String were not fields of
1378
+ # the document and thus will have an
1379
+ # empty Array attached.
1380
+ # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1381
+ # +true+, the initial row of the CSV
1382
+ # file will be treated as a row of
1383
+ # headers. If set to an Array, the
1384
+ # contents will be used as the headers.
1385
+ # If set to a String, the String is run
1386
+ # through a call of
1387
+ # FasterCSV::parse_line() with the same
1388
+ # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1389
+ # and <tt>:quote_char</tt> as this
1390
+ # instance to produce an Array of
1391
+ # headers. This setting causes
1392
+ # FasterCSV.shift() to return rows as
1393
+ # FasterCSV::Row objects instead of
1394
+ # Arrays and FasterCSV.read() to return
1395
+ # FasterCSV::Table objects instead of
1396
+ # an Array of Arrays.
1397
+ # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1398
+ # swallowed. If set to +true+, header
1399
+ # rows are returned in a FasterCSV::Row
1400
+ # object with identical headers and
1401
+ # fields (save that the fields do not go
1402
+ # through the converters).
1403
+ # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1404
+ # set, a header row will be added to the
1405
+ # output.
1406
+ # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1407
+ # <tt>:converters</tt> save that the
1408
+ # conversions are only made to header
1409
+ # rows.
1410
+ # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1411
+ # will skip over any rows with no
1412
+ # content.
1413
+ # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1414
+ # will quote all CSV fields it creates.
1415
+ #
1416
+ # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1417
+ #
1418
+ # Options cannot be overriden in the instance methods for performance reasons,
1419
+ # so be sure to set what you want here.
1420
+ #
1421
+ def initialize(data, options = Hash.new)
1422
+ # build the options for this read/write
1423
+ options = DEFAULT_OPTIONS.merge(options)
1424
+
1425
+ # create the IO object we will read from
1426
+ @io = if data.is_a? String then StringIO.new(data) else data end
1427
+
1428
+ init_separators(options)
1429
+ init_parsers(options)
1430
+ init_converters(options)
1431
+ init_headers(options)
1432
+
1433
+ unless options.empty?
1434
+ raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1139
1435
  end
1140
- all << obj
1436
+
1437
+ # track our own lineno since IO gets confused about line-ends is CSV fields
1438
+ @lineno = 0
1141
1439
  end
1142
-
1143
- csv.close unless io_or_str.is_a? String
1144
-
1145
- results
1146
- end
1147
-
1148
- #
1149
- # :call-seq:
1150
- # open( filename, mode="rb", options = Hash.new ) { |faster_csv| ... }
1151
- # open( filename, mode="rb", options = Hash.new )
1152
- #
1153
- # This method opens an IO object, and wraps that with FasterCSV. This is
1154
- # intended as the primary interface for writing a CSV file.
1155
- #
1156
- # You may pass any +args+ Ruby's open() understands followed by an optional
1157
- # Hash containing any +options+ FasterCSV::new() understands.
1158
- #
1159
- # This method works like Ruby's open() call, in that it will pass a FasterCSV
1160
- # object to a provided block and close it when the block termminates, or it
1161
- # will return the FasterCSV object when no block is provided. (*Note*: This
1162
- # is different from the standard CSV library which passes rows to the block.
1163
- # Use FasterCSV::foreach() for that behavior.)
1164
- #
1165
- # An opened FasterCSV object will delegate to many IO methods, for
1166
- # convenience. You may call:
1167
- #
1168
- # * binmode()
1169
- # * close()
1170
- # * close_read()
1171
- # * close_write()
1172
- # * closed?()
1173
- # * eof()
1174
- # * eof?()
1175
- # * fcntl()
1176
- # * fileno()
1177
- # * flush()
1178
- # * fsync()
1179
- # * ioctl()
1180
- # * isatty()
1181
- # * pid()
1182
- # * pos()
1183
- # * reopen()
1184
- # * seek()
1185
- # * stat()
1186
- # * sync()
1187
- # * sync=()
1188
- # * tell()
1189
- # * to_i()
1190
- # * to_io()
1191
- # * tty?()
1192
- #
1193
- def self.open(*args)
1194
- # find the +options+ Hash
1195
- options = if args.last.is_a? Hash then args.pop else Hash.new end
1196
- # default to a binary open mode
1197
- args << "rb" if args.size == 1
1198
- # wrap a File opened with the remaining +args+
1199
- csv = new(File.open(*args), options)
1200
-
1201
- # handle blocks like Ruby's open(), not like the CSV library
1202
- if block_given?
1203
- begin
1204
- yield csv
1205
- ensure
1206
- csv.close
1207
- end
1208
- else
1209
- csv
1440
+
1441
+ #
1442
+ # The line number of the last row read from this file. Fields with nested
1443
+ # line-end characters will not affect this count.
1444
+ #
1445
+ attr_reader :lineno
1446
+
1447
+ ### IO and StringIO Delegation ###
1448
+
1449
+ extend Forwardable
1450
+ def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1451
+ :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1452
+ :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1453
+ :sync, :sync=, :tell, :to_i, :to_io, :tty?
1454
+
1455
+ # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1456
+ def rewind
1457
+ @headers = nil
1458
+ @lineno = 0
1459
+
1460
+ @io.rewind
1210
1461
  end
1211
- end
1212
-
1213
- #
1214
- # :call-seq:
1215
- # parse( str, options = Hash.new ) { |row| ... }
1216
- # parse( str, options = Hash.new )
1217
- #
1218
- # This method can be used to easily parse CSV out of a String. You may either
1219
- # provide a +block+ which will be called with each row of the String in turn,
1220
- # or just use the returned Array of Arrays (when no +block+ is given).
1221
- #
1222
- # You pass your +str+ to read from, and an optional +options+ Hash containing
1223
- # anything FasterCSV::new() understands.
1224
- #
1225
- def self.parse(*args, &block)
1226
- csv = new(*args)
1227
- if block.nil? # slurp contents, if no block is given
1228
- begin
1229
- csv.read
1230
- ensure
1231
- csv.close
1462
+
1463
+ ### End Delegation ###
1464
+
1465
+ #
1466
+ # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1467
+ # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1468
+ # FasterCSV::Row is passed, only the row's fields() are appended to the
1469
+ # output.
1470
+ #
1471
+ # The data source must be open for writing.
1472
+ #
1473
+ def <<(row)
1474
+ # make sure headers have been assigned
1475
+ if header_row? and [Array, String].include? @use_headers.class
1476
+ parse_headers # won't read data for Array or String
1477
+ self << @headers if @write_headers
1232
1478
  end
1233
- else # or pass each row to a provided block
1234
- csv.each(&block)
1235
- end
1236
- end
1237
-
1238
- #
1239
- # This method is a shortcut for converting a single line of a CSV String into
1240
- # a into an Array. Note that if +line+ contains multiple rows, anything
1241
- # beyond the first row is ignored.
1242
- #
1243
- # The +options+ parameter can be anthing FasterCSV::new() understands.
1244
- #
1245
- def self.parse_line(line, options = Hash.new)
1246
- new(line, options).shift
1247
- end
1248
-
1249
- #
1250
- # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1251
- # file and any +options+ FasterCSV::new() understands.
1252
- #
1253
- def self.read(path, options = Hash.new)
1254
- open(path, "rb", options) { |csv| csv.read }
1255
- end
1256
-
1257
- # Alias for FasterCSV::read().
1258
- def self.readlines(*args)
1259
- read(*args)
1260
- end
1261
-
1262
- #
1263
- # A shortcut for:
1264
- #
1265
- # FasterCSV.read( path, { :headers => true,
1266
- # :converters => :numeric,
1267
- # :header_converters => :symbol }.merge(options) )
1268
- #
1269
- def self.table(path, options = Hash.new)
1270
- read( path, { :headers => true,
1271
- :converters => :numeric,
1272
- :header_converters => :symbol }.merge(options) )
1273
- end
1274
-
1275
- #
1276
- # This constructor will wrap either a String or IO object passed in +data+ for
1277
- # reading and/or writing. In addition to the FasterCSV instance methods,
1278
- # several IO methods are delegated. (See FasterCSV::open() for a complete
1279
- # list.) If you pass a String for +data+, you can later retrieve it (after
1280
- # writing to it, for example) with FasterCSV.string().
1281
- #
1282
- # Note that a wrapped String will be positioned at at the beginning (for
1283
- # reading). If you want it at the end (for writing), use
1284
- # FasterCSV::generate(). If you want any other positioning, pass a preset
1285
- # StringIO object instead.
1286
- #
1287
- # You may set any reading and/or writing preferences in the +options+ Hash.
1288
- # Available options are:
1289
- #
1290
- # <b><tt>:col_sep</tt></b>:: The String placed between each field.
1291
- # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
1292
- # row. This can be set to the special
1293
- # <tt>:auto</tt> setting, which requests
1294
- # that FasterCSV automatically discover
1295
- # this from the data. Auto-discovery
1296
- # reads ahead in the data looking for
1297
- # the next <tt>"\r\n"</tt>,
1298
- # <tt>"\n"</tt>, or <tt>"\r"</tt>
1299
- # sequence. A sequence will be selected
1300
- # even if it occurs in a quoted field,
1301
- # assuming that you would have the same
1302
- # line endings there. If none of those
1303
- # sequences is found, +data+ is
1304
- # <tt>ARGF</tt>, <tt>STDIN</tt>,
1305
- # <tt>STDOUT</tt>, or <tt>STDERR</tt>,
1306
- # or the stream is only available for
1307
- # output, the default
1308
- # <tt>$INPUT_RECORD_SEPARATOR</tt>
1309
- # (<tt>$/</tt>) is used. Obviously,
1310
- # discovery takes a little time. Set
1311
- # manually if speed is important. Also
1312
- # note that IO objects should be opened
1313
- # in binary mode on Windows if this
1314
- # feature will be used as the
1315
- # line-ending translation can cause
1316
- # problems with resetting the document
1317
- # position to where it was before the
1318
- # read ahead.
1319
- # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
1320
- # This has to be a single character
1321
- # String. This is useful for
1322
- # application that incorrectly use
1323
- # <tt>'</tt> as the quote character
1324
- # instead of the correct <tt>"</tt>.
1325
- # FasterCSV will always consider a
1326
- # double sequence this character to be
1327
- # an escaped quote.
1328
- # <b><tt>:encoding</tt></b>:: The encoding to use when parsing the
1329
- # file. Defaults to your <tt>$KDOCE</tt>
1330
- # setting. Valid values: <tt>`n’</tt> or
1331
- # <tt>`N’</tt> for none, <tt>`e’</tt> or
1332
- # <tt>`E’</tt> for EUC, <tt>`s’</tt> or
1333
- # <tt>`S’</tt> for SJIS, and
1334
- # <tt>`u’</tt> or <tt>`U’</tt> for UTF-8
1335
- # (see Regexp.new()).
1336
- # <b><tt>:field_size_limit</tt></b>:: This is a maximum size FasterCSV will
1337
- # read ahead looking for the closing
1338
- # quote for a field. (In truth, it
1339
- # reads to the first line ending beyond
1340
- # this size.) If a quote cannot be
1341
- # found within the limit FasterCSV will
1342
- # raise a MalformedCSVError, assuming
1343
- # the data is faulty. You can use this
1344
- # limit to prevent what are effectively
1345
- # DoS attacks on the parser. However,
1346
- # this limit can cause a legitimate
1347
- # parse to fail and thus is set to
1348
- # +nil+, or off, by default.
1349
- # <b><tt>:converters</tt></b>:: An Array of names from the Converters
1350
- # Hash and/or lambdas that handle custom
1351
- # conversion. A single converter
1352
- # doesn't have to be in an Array.
1353
- # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
1354
- # unconverted_fields() method will be
1355
- # added to all returned rows (Array or
1356
- # FasterCSV::Row) that will return the
1357
- # fields as they were before convertion.
1358
- # Note that <tt>:headers</tt> supplied
1359
- # by Array or String were not fields of
1360
- # the document and thus will have an
1361
- # empty Array attached.
1362
- # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
1363
- # +true+, the initial row of the CSV
1364
- # file will be treated as a row of
1365
- # headers. If set to an Array, the
1366
- # contents will be used as the headers.
1367
- # If set to a String, the String is run
1368
- # through a call of
1369
- # FasterCSV::parse_line() with the same
1370
- # <tt>:col_sep</tt>, <tt>:row_sep</tt>,
1371
- # and <tt>:quote_char</tt> as this
1372
- # instance to produce an Array of
1373
- # headers. This setting causes
1374
- # FasterCSV.shift() to return rows as
1375
- # FasterCSV::Row objects instead of
1376
- # Arrays and FasterCSV.read() to return
1377
- # FasterCSV::Table objects instead of
1378
- # an Array of Arrays.
1379
- # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
1380
- # swallowed. If set to +true+, header
1381
- # rows are returned in a FasterCSV::Row
1382
- # object with identical headers and
1383
- # fields (save that the fields do not go
1384
- # through the converters).
1385
- # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
1386
- # set, a header row will be added to the
1387
- # output.
1388
- # <b><tt>:header_converters</tt></b>:: Identical in functionality to
1389
- # <tt>:converters</tt> save that the
1390
- # conversions are only made to header
1391
- # rows.
1392
- # <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, FasterCSV
1393
- # will skip over any rows with no
1394
- # content.
1395
- # <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, FasterCSV
1396
- # will quote all CSV fields it creates.
1397
- #
1398
- # See FasterCSV::DEFAULT_OPTIONS for the default settings.
1399
- #
1400
- # Options cannot be overriden in the instance methods for performance reasons,
1401
- # so be sure to set what you want here.
1402
- #
1403
- def initialize(data, options = Hash.new)
1404
- # build the options for this read/write
1405
- options = DEFAULT_OPTIONS.merge(options)
1406
-
1407
- # create the IO object we will read from
1408
- @io = if data.is_a? String then StringIO.new(data) else data end
1409
-
1410
- init_separators(options)
1411
- init_parsers(options)
1412
- init_converters(options)
1413
- init_headers(options)
1414
-
1415
- unless options.empty?
1416
- raise ArgumentError, "Unknown options: #{options.keys.join(', ')}."
1479
+
1480
+ # Handle FasterCSV::Row objects and Hashes
1481
+ row = case row
1482
+ when self.class::Row then row.fields
1483
+ when Hash then @headers.map { |header| row[header] }
1484
+ else row
1485
+ end
1486
+
1487
+ @headers = row if header_row?
1488
+ @lineno += 1
1489
+
1490
+ @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1491
+
1492
+ self # for chaining
1417
1493
  end
1418
-
1419
- # track our own lineno since IO gets confused about line-ends is CSV fields
1420
- @lineno = 0
1421
- end
1422
-
1423
- #
1424
- # The line number of the last row read from this file. Fields with nested
1425
- # line-end characters will not affect this count.
1426
- #
1427
- attr_reader :lineno
1428
-
1429
- ### IO and StringIO Delegation ###
1430
-
1431
- extend Forwardable
1432
- def_delegators :@io, :binmode, :close, :close_read, :close_write, :closed?,
1433
- :eof, :eof?, :fcntl, :fileno, :flush, :fsync, :ioctl,
1434
- :isatty, :pid, :pos, :reopen, :seek, :stat, :string,
1435
- :sync, :sync=, :tell, :to_i, :to_io, :tty?
1436
-
1437
- # Rewinds the underlying IO object and resets FasterCSV's lineno() counter.
1438
- def rewind
1439
- @headers = nil
1440
- @lineno = 0
1441
-
1442
- @io.rewind
1443
- end
1494
+ alias_method :add_row, :<<
1495
+ alias_method :puts, :<<
1444
1496
 
1445
- ### End Delegation ###
1446
-
1447
- #
1448
- # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1449
- # FasterCSV::Row) is converted to CSV and appended to the data source. When a
1450
- # FasterCSV::Row is passed, only the row's fields() are appended to the
1451
- # output.
1452
- #
1453
- # The data source must be open for writing.
1454
- #
1455
- def <<(row)
1456
- # make sure headers have been assigned
1457
- if header_row? and [Array, String].include? @use_headers.class
1458
- parse_headers # won't read data for Array or String
1459
- self << @headers if @write_headers
1497
+ #
1498
+ # :call-seq:
1499
+ # convert( name )
1500
+ # convert { |field| ... }
1501
+ # convert { |field, field_info| ... }
1502
+ #
1503
+ # You can use this method to install a FasterCSV::Converters built-in, or
1504
+ # provide a block that handles a custom conversion.
1505
+ #
1506
+ # If you provide a block that takes one argument, it will be passed the field
1507
+ # and is expected to return the converted value or the field itself. If your
1508
+ # block takes two arguments, it will also be passed a FieldInfo Struct,
1509
+ # containing details about the field. Again, the block should return a
1510
+ # converted field or the field itself.
1511
+ #
1512
+ def convert(name = nil, &converter)
1513
+ add_converter(:converters, self.class::Converters, name, &converter)
1460
1514
  end
1461
-
1462
- # Handle FasterCSV::Row objects and Hashes
1463
- row = case row
1464
- when self.class::Row then row.fields
1465
- when Hash then @headers.map { |header| row[header] }
1466
- else row
1467
- end
1468
1515
 
1469
- @headers = row if header_row?
1470
- @lineno += 1
1516
+ #
1517
+ # :call-seq:
1518
+ # header_convert( name )
1519
+ # header_convert { |field| ... }
1520
+ # header_convert { |field, field_info| ... }
1521
+ #
1522
+ # Identical to FasterCSV.convert(), but for header rows.
1523
+ #
1524
+ # Note that this method must be called before header rows are read to have any
1525
+ # effect.
1526
+ #
1527
+ def header_convert(name = nil, &converter)
1528
+ add_converter( :header_converters,
1529
+ self.class::HeaderConverters,
1530
+ name,
1531
+ &converter )
1532
+ end
1471
1533
 
1472
- @io << row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
1473
-
1474
- self # for chaining
1475
- end
1476
- alias_method :add_row, :<<
1477
- alias_method :puts, :<<
1478
-
1479
- #
1480
- # :call-seq:
1481
- # convert( name )
1482
- # convert { |field| ... }
1483
- # convert { |field, field_info| ... }
1484
- #
1485
- # You can use this method to install a FasterCSV::Converters built-in, or
1486
- # provide a block that handles a custom conversion.
1487
- #
1488
- # If you provide a block that takes one argument, it will be passed the field
1489
- # and is expected to return the converted value or the field itself. If your
1490
- # block takes two arguments, it will also be passed a FieldInfo Struct,
1491
- # containing details about the field. Again, the block should return a
1492
- # converted field or the field itself.
1493
- #
1494
- def convert(name = nil, &converter)
1495
- add_converter(:converters, self.class::Converters, name, &converter)
1496
- end
1534
+ include Enumerable
1497
1535
 
1498
- #
1499
- # :call-seq:
1500
- # header_convert( name )
1501
- # header_convert { |field| ... }
1502
- # header_convert { |field, field_info| ... }
1503
- #
1504
- # Identical to FasterCSV.convert(), but for header rows.
1505
- #
1506
- # Note that this method must be called before header rows are read to have any
1507
- # effect.
1508
- #
1509
- def header_convert(name = nil, &converter)
1510
- add_converter( :header_converters,
1511
- self.class::HeaderConverters,
1512
- name,
1513
- &converter )
1514
- end
1515
-
1516
- include Enumerable
1517
-
1518
- #
1519
- # Yields each row of the data source in turn.
1520
- #
1521
- # Support for Enumerable.
1522
- #
1523
- # The data source must be open for reading.
1524
- #
1525
- def each
1526
- while row = shift
1527
- yield row
1528
- end
1529
- end
1530
-
1531
- #
1532
- # Slurps the remaining rows and returns an Array of Arrays.
1533
- #
1534
- # The data source must be open for reading.
1535
- #
1536
- def read
1537
- rows = to_a
1538
- if @use_headers
1539
- Table.new(rows)
1540
- else
1541
- rows
1536
+ #
1537
+ # Yields each row of the data source in turn.
1538
+ #
1539
+ # Support for Enumerable.
1540
+ #
1541
+ # The data source must be open for reading.
1542
+ #
1543
+ def each
1544
+ while row = shift
1545
+ yield row
1546
+ end
1542
1547
  end
1543
- end
1544
- alias_method :readlines, :read
1545
-
1546
- # Returns +true+ if the next row read will be a header row.
1547
- def header_row?
1548
- @use_headers and @headers.nil?
1549
- end
1550
-
1551
- #
1552
- # The primary read method for wrapped Strings and IOs, a single row is pulled
1553
- # from the data source, parsed and returned as an Array of fields (if header
1554
- # rows are not used) or a FasterCSV::Row (when header rows are used).
1555
- #
1556
- # The data source must be open for reading.
1557
- #
1558
- def shift
1559
- #########################################################################
1560
- ### This method is purposefully kept a bit long as simple conditional ###
1561
- ### checks are faster than numerous (expensive) method calls. ###
1562
- #########################################################################
1563
-
1564
- # handle headers not based on document content
1565
- if header_row? and @return_headers and
1566
- [Array, String].include? @use_headers.class
1567
- if @unconverted_fields
1568
- return add_unconverted_fields(parse_headers, Array.new)
1548
+
1549
+ #
1550
+ # Slurps the remaining rows and returns an Array of Arrays.
1551
+ #
1552
+ # The data source must be open for reading.
1553
+ #
1554
+ def read
1555
+ rows = to_a
1556
+ if @use_headers
1557
+ Table.new(rows)
1569
1558
  else
1570
- return parse_headers
1559
+ rows
1571
1560
  end
1572
1561
  end
1573
-
1574
- # begin with a blank line, so we can always add to it
1575
- line = String.new
1562
+ alias_method :readlines, :read
1563
+
1564
+ # Returns +true+ if the next row read will be a header row.
1565
+ def header_row?
1566
+ @use_headers and @headers.nil?
1567
+ end
1576
1568
 
1577
1569
  #
1578
- # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1579
- # because of \r and/or \n characters embedded in quoted fields
1570
+ # The primary read method for wrapped Strings and IOs, a single row is pulled
1571
+ # from the data source, parsed and returned as an Array of fields (if header
1572
+ # rows are not used) or a FasterCSV::Row (when header rows are used).
1580
1573
  #
1581
- loop do
1582
- # add another read to the line
1583
- if read_line = @io.gets(@row_sep)
1584
- line += read_line
1585
- else
1586
- return nil
1587
- end
1588
- # copy the line so we can chop it up in parsing
1589
- parse = line.dup
1590
- parse.sub!(@parsers[:line_end], "")
1591
-
1592
- #
1593
- # I believe a blank line should be an <tt>Array.new</tt>, not
1594
- # CSV's <tt>[nil]</tt>
1595
- #
1596
- if parse.empty?
1597
- @lineno += 1
1598
- if @skip_blanks
1599
- line = ""
1600
- next
1601
- elsif @unconverted_fields
1602
- return add_unconverted_fields(Array.new, Array.new)
1603
- elsif @use_headers
1604
- return FasterCSV::Row.new(Array.new, Array.new)
1574
+ # The data source must be open for reading.
1575
+ #
1576
+ def shift
1577
+ #########################################################################
1578
+ ### This method is purposefully kept a bit long as simple conditional ###
1579
+ ### checks are faster than numerous (expensive) method calls. ###
1580
+ #########################################################################
1581
+
1582
+ # handle headers not based on document content
1583
+ if header_row? and @return_headers and
1584
+ [Array, String].include? @use_headers.class
1585
+ if @unconverted_fields
1586
+ return add_unconverted_fields(parse_headers, Array.new)
1605
1587
  else
1606
- return Array.new
1588
+ return parse_headers
1607
1589
  end
1608
1590
  end
1609
1591
 
1610
- # parse the fields with a mix of String#split and regular expressions
1611
- csv = Array.new
1612
- current_field = String.new
1613
- field_quotes = 0
1614
- parse.split(@col_sep, -1).each do |match|
1615
- if current_field.empty? && match.count(@quote_and_newlines).zero?
1616
- csv << (match.empty? ? nil : match)
1617
- elsif (current_field.empty? ? match[0] : current_field[0]) ==
1618
- @quote_char[0]
1619
- current_field << match
1620
- field_quotes += match.count(@quote_char)
1621
- if field_quotes % 2 == 0
1622
- in_quotes = current_field[@parsers[:quoted_field], 1]
1623
- raise MalformedCSVError if !in_quotes ||
1624
- in_quotes[@parsers[:stray_quote]]
1625
- current_field = in_quotes
1626
- current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1627
- csv << current_field
1628
- current_field = String.new
1629
- field_quotes = 0
1630
- else # we found a quoted field that spans multiple lines
1631
- current_field << @col_sep
1632
- end
1633
- elsif match.count("\r\n").zero?
1634
- raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1592
+ # begin with a blank line, so we can always add to it
1593
+ line = String.new
1594
+
1595
+ #
1596
+ # it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
1597
+ # because of \r and/or \n characters embedded in quoted fields
1598
+ #
1599
+ loop do
1600
+ # add another read to the line
1601
+ if read_line = @io.gets(@row_sep)
1602
+ line += read_line
1635
1603
  else
1636
- raise MalformedCSVError, "Unquoted fields do not allow " +
1637
- "\\r or \\n (line #{lineno + 1})."
1604
+ return nil
1605
+ end
1606
+ # copy the line so we can chop it up in parsing
1607
+ parse = line.dup
1608
+ parse.sub!(@parsers[:line_end], "")
1609
+
1610
+ #
1611
+ # I believe a blank line should be an <tt>Array.new</tt>, not
1612
+ # CSV's <tt>[nil]</tt>
1613
+ #
1614
+ if parse.empty?
1615
+ @lineno += 1
1616
+ if @skip_blanks
1617
+ line = ""
1618
+ next
1619
+ elsif @unconverted_fields
1620
+ return add_unconverted_fields(Array.new, Array.new)
1621
+ elsif @use_headers
1622
+ return FasterCSV::Row.new(Array.new, Array.new)
1623
+ else
1624
+ return Array.new
1625
+ end
1626
+ end
1627
+
1628
+ # parse the fields with a mix of String#split and regular expressions
1629
+ csv = Array.new
1630
+ current_field = String.new
1631
+ field_quotes = 0
1632
+ parse.split(@col_sep, -1).each do |match|
1633
+ if current_field.empty? && match.count(@quote_and_newlines).zero?
1634
+ csv << (match.empty? ? nil : match)
1635
+ elsif (current_field.empty? ? match[0] : current_field[0]) ==
1636
+ @quote_char[0]
1637
+ current_field << match
1638
+ field_quotes += match.count(@quote_char)
1639
+ if field_quotes % 2 == 0
1640
+ in_quotes = current_field[@parsers[:quoted_field], 1]
1641
+ raise MalformedCSVError if !in_quotes ||
1642
+ in_quotes[@parsers[:stray_quote]]
1643
+ current_field = in_quotes
1644
+ current_field.gsub!(@quote_char * 2, @quote_char) # unescape contents
1645
+ csv << current_field
1646
+ current_field = String.new
1647
+ field_quotes = 0
1648
+ else # we found a quoted field that spans multiple lines
1649
+ current_field << @col_sep
1650
+ end
1651
+ elsif match.count("\r\n").zero?
1652
+ raise MalformedCSVError, "Illegal quoting on line #{lineno + 1}."
1653
+ else
1654
+ raise MalformedCSVError, "Unquoted fields do not allow " +
1655
+ "\\r or \\n (line #{lineno + 1})."
1656
+ end
1638
1657
  end
1639
- end
1640
1658
 
1641
- # if parse is empty?(), we found all the fields on the line...
1642
- if field_quotes % 2 == 0
1643
- @lineno += 1
1659
+ # if parse is empty?(), we found all the fields on the line...
1660
+ if field_quotes % 2 == 0
1661
+ @lineno += 1
1644
1662
 
1645
- # save fields unconverted fields, if needed...
1646
- unconverted = csv.dup if @unconverted_fields
1663
+ # save fields unconverted fields, if needed...
1664
+ unconverted = csv.dup if @unconverted_fields
1647
1665
 
1648
- # convert fields, if needed...
1649
- csv = convert_fields(csv) unless @use_headers or @converters.empty?
1650
- # parse out header rows and handle FasterCSV::Row conversions...
1651
- csv = parse_headers(csv) if @use_headers
1666
+ # convert fields, if needed...
1667
+ csv = convert_fields(csv) unless @use_headers or @converters.empty?
1668
+ # parse out header rows and handle FasterCSV::Row conversions...
1669
+ csv = parse_headers(csv) if @use_headers
1652
1670
 
1653
- # inject unconverted fields and accessor, if requested...
1654
- if @unconverted_fields and not csv.respond_to? :unconverted_fields
1655
- add_unconverted_fields(csv, unconverted)
1671
+ # inject unconverted fields and accessor, if requested...
1672
+ if @unconverted_fields and not csv.respond_to? :unconverted_fields
1673
+ add_unconverted_fields(csv, unconverted)
1674
+ end
1675
+
1676
+ # return the results
1677
+ break csv
1656
1678
  end
1679
+ # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1680
+ if @io.eof?
1681
+ raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1682
+ elsif @field_size_limit and current_field.size >= @field_size_limit
1683
+ raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1684
+ end
1685
+ # otherwise, we need to loop and pull some more data to complete the row
1686
+ end
1687
+ end
1688
+ alias_method :gets, :shift
1689
+ alias_method :readline, :shift
1657
1690
 
1658
- # return the results
1659
- break csv
1691
+ # Returns a simplified description of the key FasterCSV attributes.
1692
+ def inspect
1693
+ str = "<##{self.class} io_type:"
1694
+ # show type of wrapped IO
1695
+ if @io == $stdout then str << "$stdout"
1696
+ elsif @io == $stdin then str << "$stdin"
1697
+ elsif @io == $stderr then str << "$stderr"
1698
+ else str << @io.class.to_s
1660
1699
  end
1661
- # if we're not empty?() but at eof?(), a quoted field wasn't closed...
1662
- if @io.eof?
1663
- raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}."
1664
- elsif @field_size_limit and current_field.size >= @field_size_limit
1665
- raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1700
+ # show IO.path(), if available
1701
+ if @io.respond_to?(:path) and (p = @io.path)
1702
+ str << " io_path:#{p.inspect}"
1666
1703
  end
1667
- # otherwise, we need to loop and pull some more data to complete the row
1668
- end
1669
- end
1670
- alias_method :gets, :shift
1671
- alias_method :readline, :shift
1672
-
1673
- # Returns a simplified description of the key FasterCSV attributes.
1674
- def inspect
1675
- str = "<##{self.class} io_type:"
1676
- # show type of wrapped IO
1677
- if @io == $stdout then str << "$stdout"
1678
- elsif @io == $stdin then str << "$stdin"
1679
- elsif @io == $stderr then str << "$stderr"
1680
- else str << @io.class.to_s
1681
- end
1682
- # show IO.path(), if available
1683
- if @io.respond_to?(:path) and (p = @io.path)
1684
- str << " io_path:#{p.inspect}"
1685
- end
1686
- # show other attributes
1687
- %w[ lineno col_sep row_sep
1688
- quote_char skip_blanks encoding ].each do |attr_name|
1689
- if a = instance_variable_get("@#{attr_name}")
1690
- str << " #{attr_name}:#{a.inspect}"
1704
+ # show other attributes
1705
+ %w[ lineno col_sep row_sep
1706
+ quote_char skip_blanks encoding ].each do |attr_name|
1707
+ if a = instance_variable_get("@#{attr_name}")
1708
+ str << " #{attr_name}:#{a.inspect}"
1709
+ end
1691
1710
  end
1711
+ if @use_headers
1712
+ str << " headers:#{(@headers || true).inspect}"
1713
+ end
1714
+ str << ">"
1692
1715
  end
1693
- if @use_headers
1694
- str << " headers:#{(@headers || true).inspect}"
1695
- end
1696
- str << ">"
1697
- end
1698
-
1699
- private
1700
-
1701
- #
1702
- # Stores the indicated separators for later use.
1703
- #
1704
- # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1705
- # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1706
- # +STDERR+ and any stream open for output only with a default
1707
- # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1708
- #
1709
- # This method also establishes the quoting rules used for CSV output.
1710
- #
1711
- def init_separators(options)
1712
- # store the selected separators
1713
- @col_sep = options.delete(:col_sep)
1714
- @row_sep = options.delete(:row_sep)
1715
- @quote_char = options.delete(:quote_char)
1716
- @quote_and_newlines = "#{@quote_char}\r\n"
1717
-
1718
- if @quote_char.length != 1
1719
- raise ArgumentError, ":quote_char has to be a single character String"
1720
- end
1721
-
1722
- # automatically discover row separator when requested
1723
- if @row_sep == :auto
1724
- if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
1725
- (defined?(Zlib) and @io.class == Zlib::GzipWriter)
1726
- @row_sep = $INPUT_RECORD_SEPARATOR
1727
- else
1728
- begin
1729
- saved_pos = @io.pos # remember where we were
1730
- while @row_sep == :auto
1731
- #
1732
- # if we run out of data, it's probably a single line
1733
- # (use a sensible default)
1734
- #
1735
- if @io.eof?
1736
- @row_sep = $INPUT_RECORD_SEPARATOR
1737
- break
1716
+
1717
+ private
1718
+
1719
+ #
1720
+ # Stores the indicated separators for later use.
1721
+ #
1722
+ # If auto-discovery was requested for <tt>@row_sep</tt>, this method will read
1723
+ # ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
1724
+ # +STDERR+ and any stream open for output only with a default
1725
+ # <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1726
+ #
1727
+ # This method also establishes the quoting rules used for CSV output.
1728
+ #
1729
+ def init_separators(options)
1730
+ # store the selected separators
1731
+ @col_sep = options.delete(:col_sep)
1732
+ @row_sep = options.delete(:row_sep)
1733
+ @quote_char = options.delete(:quote_char)
1734
+ @quote_and_newlines = "\r\n#{@quote_char}"
1735
+
1736
+ if @quote_char.length != 1
1737
+ raise ArgumentError, ":quote_char has to be a single character String"
1738
+ end
1739
+
1740
+ # automatically discover row separator when requested
1741
+ if @row_sep == :auto
1742
+ if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
1743
+ (defined?(Zlib) and @io.class == Zlib::GzipWriter)
1744
+ @row_sep = $INPUT_RECORD_SEPARATOR
1745
+ else
1746
+ begin
1747
+ saved_pos = @io.pos # remember where we were
1748
+ while @row_sep == :auto
1749
+ #
1750
+ # if we run out of data, it's probably a single line
1751
+ # (use a sensible default)
1752
+ #
1753
+ if @io.eof?
1754
+ @row_sep = $INPUT_RECORD_SEPARATOR
1755
+ break
1756
+ end
1757
+
1758
+ # read ahead a bit
1759
+ sample = @io.read(1024)
1760
+ sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1761
+
1762
+ # try to find a standard separator
1763
+ if sample =~ /\r\n?|\n/
1764
+ @row_sep = $&
1765
+ break
1766
+ end
1738
1767
  end
1739
-
1740
- # read ahead a bit
1741
- sample = @io.read(1024)
1742
- sample += @io.read(1) if sample[-1..-1] == "\r" and not @io.eof?
1743
-
1744
- # try to find a standard separator
1745
- if sample =~ /\r\n?|\n/
1746
- @row_sep = $&
1747
- break
1768
+ # tricky seek() clone to work around GzipReader's lack of seek()
1769
+ @io.rewind
1770
+ # reset back to the remembered position
1771
+ while saved_pos > 1024 # avoid loading a lot of data into memory
1772
+ @io.read(1024)
1773
+ saved_pos -= 1024
1748
1774
  end
1775
+ @io.read(saved_pos) if saved_pos.nonzero?
1776
+ rescue IOError # stream not opened for reading
1777
+ @row_sep = $INPUT_RECORD_SEPARATOR
1749
1778
  end
1750
- # tricky seek() clone to work around GzipReader's lack of seek()
1751
- @io.rewind
1752
- # reset back to the remembered position
1753
- while saved_pos > 1024 # avoid loading a lot of data into memory
1754
- @io.read(1024)
1755
- saved_pos -= 1024
1756
- end
1757
- @io.read(saved_pos) if saved_pos.nonzero?
1758
- rescue IOError # stream not opened for reading
1759
- @row_sep = $INPUT_RECORD_SEPARATOR
1760
1779
  end
1761
1780
  end
1762
- end
1763
-
1764
- # establish quoting rules
1765
- do_quote = lambda do |field|
1766
- @quote_char +
1767
- String(field).gsub(@quote_char, @quote_char * 2) +
1768
- @quote_char
1769
- end
1770
- @quote = if options.delete(:force_quotes)
1771
- do_quote
1772
- else
1773
- lambda do |field|
1774
- if field.nil? # represent +nil+ fields as empty unquoted fields
1775
- ""
1776
- else
1777
- field = String(field) # Stringify fields
1778
- # represent empty fields as empty quoted fields
1779
- if field.empty? or
1780
- field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1781
- do_quote.call(field)
1781
+
1782
+ # establish quoting rules
1783
+ do_quote = lambda do |field|
1784
+ @quote_char +
1785
+ String(field).gsub(@quote_char, @quote_char * 2) +
1786
+ @quote_char
1787
+ end
1788
+ @quote = if options.delete(:force_quotes)
1789
+ do_quote
1790
+ else
1791
+ lambda do |field|
1792
+ if field.nil? # represent +nil+ fields as empty unquoted fields
1793
+ ""
1782
1794
  else
1783
- field # unquoted field
1795
+ field = String(field) # Stringify fields
1796
+ # represent empty fields as empty quoted fields
1797
+ if field.empty? or
1798
+ field.count("\r\n#{@col_sep}#{@quote_char}").nonzero?
1799
+ do_quote.call(field)
1800
+ else
1801
+ field # unquoted field
1802
+ end
1784
1803
  end
1785
1804
  end
1786
1805
  end
1787
1806
  end
1788
- end
1789
-
1790
- # Pre-compiles parsers and stores them by name for access during reads.
1791
- def init_parsers(options)
1792
- # store the parser behaviors
1793
- @skip_blanks = options.delete(:skip_blanks)
1794
- @encoding = options.delete(:encoding) # nil will use $KCODE
1795
- @field_size_limit = options.delete(:field_size_limit)
1796
-
1797
- # prebuild Regexps for faster parsing
1798
- esc_col_sep = Regexp.escape(@col_sep)
1799
- esc_row_sep = Regexp.escape(@row_sep)
1800
- esc_quote = Regexp.escape(@quote_char)
1801
- @parsers = {
1802
- :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1803
- Regexp::MULTILINE,
1804
- @encoding ),
1805
- :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1806
- Regexp::MULTILINE,
1807
- @encoding ),
1808
- :stray_quote => Regexp.new( "[^#{esc_quote}]#{esc_quote}[^#{esc_quote}]",
1809
- Regexp::MULTILINE,
1810
- @encoding ),
1811
- # safer than chomp!()
1812
- :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1813
- }
1814
- end
1815
-
1816
- #
1817
- # Loads any converters requested during construction.
1818
- #
1819
- # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1820
- # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1821
- # are added instead.
1822
- #
1823
- # The <tt>:unconverted_fields</tt> option is also actived for
1824
- # <tt>:converters</tt> calls, if requested.
1825
- #
1826
- def init_converters(options, field_name = :converters)
1827
- if field_name == :converters
1828
- @unconverted_fields = options.delete(:unconverted_fields)
1807
+
1808
+ # Pre-compiles parsers and stores them by name for access during reads.
1809
+ def init_parsers(options)
1810
+ # store the parser behaviors
1811
+ @skip_blanks = options.delete(:skip_blanks)
1812
+ @encoding = options.delete(:encoding) # nil will use $KCODE
1813
+ @field_size_limit = options.delete(:field_size_limit)
1814
+
1815
+ # prebuild Regexps for faster parsing
1816
+ esc_col_sep = Regexp.escape(@col_sep)
1817
+ esc_row_sep = Regexp.escape(@row_sep)
1818
+ esc_quote = Regexp.escape(@quote_char)
1819
+ @parsers = {
1820
+ :any_field => Regexp.new( "[^#{esc_col_sep}]+",
1821
+ Regexp::MULTILINE,
1822
+ @encoding ),
1823
+ :quoted_field => Regexp.new( "^#{esc_quote}(.*)#{esc_quote}$",
1824
+ Regexp::MULTILINE,
1825
+ @encoding ),
1826
+ :stray_quote => Regexp.new( "[^#{esc_quote}]#{esc_quote}[^#{esc_quote}]",
1827
+ Regexp::MULTILINE,
1828
+ @encoding ),
1829
+ # safer than chomp!()
1830
+ :line_end => Regexp.new("#{esc_row_sep}\\z", nil, @encoding)
1831
+ }
1829
1832
  end
1830
1833
 
1831
- instance_variable_set("@#{field_name}", Array.new)
1832
-
1833
- # find the correct method to add the coverters
1834
- convert = method(field_name.to_s.sub(/ers\Z/, ""))
1835
-
1836
- # load converters
1837
- unless options[field_name].nil?
1838
- # allow a single converter not wrapped in an Array
1839
- unless options[field_name].is_a? Array
1840
- options[field_name] = [options[field_name]]
1841
- end
1842
- # load each converter...
1843
- options[field_name].each do |converter|
1844
- if converter.is_a? Proc # custom code block
1845
- convert.call(&converter)
1846
- else # by name
1847
- convert.call(converter)
1834
+ #
1835
+ # Loads any converters requested during construction.
1836
+ #
1837
+ # If +field_name+ is set <tt>:converters</tt> (the default) field converters
1838
+ # are set. When +field_name+ is <tt>:header_converters</tt> header converters
1839
+ # are added instead.
1840
+ #
1841
+ # The <tt>:unconverted_fields</tt> option is also actived for
1842
+ # <tt>:converters</tt> calls, if requested.
1843
+ #
1844
+ def init_converters(options, field_name = :converters)
1845
+ if field_name == :converters
1846
+ @unconverted_fields = options.delete(:unconverted_fields)
1847
+ end
1848
+
1849
+ instance_variable_set("@#{field_name}", Array.new)
1850
+
1851
+ # find the correct method to add the coverters
1852
+ convert = method(field_name.to_s.sub(/ers\Z/, ""))
1853
+
1854
+ # load converters
1855
+ unless options[field_name].nil?
1856
+ # allow a single converter not wrapped in an Array
1857
+ unless options[field_name].is_a? Array
1858
+ options[field_name] = [options[field_name]]
1859
+ end
1860
+ # load each converter...
1861
+ options[field_name].each do |converter|
1862
+ if converter.is_a? Proc # custom code block
1863
+ convert.call(&converter)
1864
+ else # by name
1865
+ convert.call(converter)
1866
+ end
1848
1867
  end
1849
1868
  end
1869
+
1870
+ options.delete(field_name)
1850
1871
  end
1851
-
1852
- options.delete(field_name)
1853
- end
1854
-
1855
- # Stores header row settings and loads header converters, if needed.
1856
- def init_headers(options)
1857
- @use_headers = options.delete(:headers)
1858
- @return_headers = options.delete(:return_headers)
1859
- @write_headers = options.delete(:write_headers)
1860
-
1861
- # headers must be delayed until shift(), in case they need a row of content
1862
- @headers = nil
1863
-
1864
- init_converters(options, :header_converters)
1865
- end
1866
-
1867
- #
1868
- # The actual work method for adding converters, used by both
1869
- # FasterCSV.convert() and FasterCSV.header_convert().
1870
- #
1871
- # This method requires the +var_name+ of the instance variable to place the
1872
- # converters in, the +const+ Hash to lookup named converters in, and the
1873
- # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1874
- # methods.
1875
- #
1876
- def add_converter(var_name, const, name = nil, &converter)
1877
- if name.nil? # custom converter
1878
- instance_variable_get("@#{var_name}") << converter
1879
- else # named converter
1880
- combo = const[name]
1881
- case combo
1882
- when Array # combo converter
1883
- combo.each do |converter_name|
1884
- add_converter(var_name, const, converter_name)
1872
+
1873
+ # Stores header row settings and loads header converters, if needed.
1874
+ def init_headers(options)
1875
+ @use_headers = options.delete(:headers)
1876
+ @return_headers = options.delete(:return_headers)
1877
+ @write_headers = options.delete(:write_headers)
1878
+
1879
+ # headers must be delayed until shift(), in case they need a row of content
1880
+ @headers = nil
1881
+
1882
+ init_converters(options, :header_converters)
1883
+ end
1884
+
1885
+ #
1886
+ # The actual work method for adding converters, used by both
1887
+ # FasterCSV.convert() and FasterCSV.header_convert().
1888
+ #
1889
+ # This method requires the +var_name+ of the instance variable to place the
1890
+ # converters in, the +const+ Hash to lookup named converters in, and the
1891
+ # normal parameters of the FasterCSV.convert() and FasterCSV.header_convert()
1892
+ # methods.
1893
+ #
1894
+ def add_converter(var_name, const, name = nil, &converter)
1895
+ if name.nil? # custom converter
1896
+ instance_variable_get("@#{var_name}") << converter
1897
+ else # named converter
1898
+ combo = const[name]
1899
+ case combo
1900
+ when Array # combo converter
1901
+ combo.each do |converter_name|
1902
+ add_converter(var_name, const, converter_name)
1903
+ end
1904
+ else # individual named converter
1905
+ instance_variable_get("@#{var_name}") << combo
1885
1906
  end
1886
- else # individual named converter
1887
- instance_variable_get("@#{var_name}") << combo
1888
1907
  end
1889
1908
  end
1890
- end
1891
-
1892
- #
1893
- # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1894
- # if +headers+ is passed as +true+, returning the converted field set. Any
1895
- # converter that changes the field into something other than a String halts
1896
- # the pipeline of conversion for that field. This is primarily an efficiency
1897
- # shortcut.
1898
- #
1899
- def convert_fields(fields, headers = false)
1900
- # see if we are converting headers or fields
1901
- converters = headers ? @header_converters : @converters
1902
-
1903
- fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1904
- converters.each do |converter|
1905
- field = if converter.arity == 1 # straight field converter
1906
- converter[field]
1907
- else # FieldInfo converter
1908
- header = @use_headers && !headers ? @headers[index] : nil
1909
- converter[field, FieldInfo.new(index, lineno, header)]
1909
+
1910
+ #
1911
+ # Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
1912
+ # if +headers+ is passed as +true+, returning the converted field set. Any
1913
+ # converter that changes the field into something other than a String halts
1914
+ # the pipeline of conversion for that field. This is primarily an efficiency
1915
+ # shortcut.
1916
+ #
1917
+ def convert_fields(fields, headers = false)
1918
+ # see if we are converting headers or fields
1919
+ converters = headers ? @header_converters : @converters
1920
+
1921
+ fields.enum_for(:each_with_index).map do |field, index| # map_with_index
1922
+ converters.each do |converter|
1923
+ field = if converter.arity == 1 # straight field converter
1924
+ converter[field]
1925
+ else # FieldInfo converter
1926
+ header = @use_headers && !headers ? @headers[index] : nil
1927
+ converter[field, FieldInfo.new(index, lineno, header)]
1928
+ end
1929
+ break unless field.is_a? String # short-curcuit pipeline for speed
1910
1930
  end
1911
- break unless field.is_a? String # short-curcuit pipeline for speed
1931
+ field # return final state of each field, converted or original
1912
1932
  end
1913
- field # return final state of each field, converted or original
1914
1933
  end
1915
- end
1916
-
1917
- #
1918
- # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1919
- # rows are also dealt with here, either by returning a FasterCSV::Row with
1920
- # identical headers and fields (save that the fields do not go through the
1921
- # converters) or by reading past them to return a field row. Headers are also
1922
- # saved in <tt>@headers</tt> for use in future rows.
1923
- #
1924
- # When +nil+, +row+ is assumed to be a header row not based on an actual row
1925
- # of the stream.
1926
- #
1927
- def parse_headers(row = nil)
1928
- if @headers.nil? # header row
1929
- @headers = case @use_headers # save headers
1930
- # Array of headers
1931
- when Array then @use_headers
1932
- # CSV header String
1933
- when String
1934
- self.class.parse_line( @use_headers,
1935
- :col_sep => @col_sep,
1936
- :row_sep => @row_sep,
1937
- :quote_char => @quote_char )
1938
- # first row is headers
1939
- else row
1940
- end
1941
-
1942
- # prepare converted and unconverted copies
1943
- row = @headers if row.nil?
1944
- @headers = convert_fields(@headers, true)
1945
-
1946
- if @return_headers # return headers
1947
- return FasterCSV::Row.new(@headers, row, true)
1948
- elsif not [Array, String].include? @use_headers.class # skip to field row
1949
- return shift
1934
+
1935
+ #
1936
+ # This methods is used to turn a finished +row+ into a FasterCSV::Row. Header
1937
+ # rows are also dealt with here, either by returning a FasterCSV::Row with
1938
+ # identical headers and fields (save that the fields do not go through the
1939
+ # converters) or by reading past them to return a field row. Headers are also
1940
+ # saved in <tt>@headers</tt> for use in future rows.
1941
+ #
1942
+ # When +nil+, +row+ is assumed to be a header row not based on an actual row
1943
+ # of the stream.
1944
+ #
1945
+ def parse_headers(row = nil)
1946
+ if @headers.nil? # header row
1947
+ @headers = case @use_headers # save headers
1948
+ # Array of headers
1949
+ when Array then @use_headers
1950
+ # CSV header String
1951
+ when String
1952
+ self.class.parse_line( @use_headers,
1953
+ :col_sep => @col_sep,
1954
+ :row_sep => @row_sep,
1955
+ :quote_char => @quote_char )
1956
+ # first row is headers
1957
+ else row
1958
+ end
1959
+
1960
+ # prepare converted and unconverted copies
1961
+ row = @headers if row.nil?
1962
+ @headers = convert_fields(@headers, true)
1963
+
1964
+ if @return_headers # return headers
1965
+ return FasterCSV::Row.new(@headers, row, true)
1966
+ elsif not [Array, String].include? @use_headers.class # skip to field row
1967
+ return shift
1968
+ end
1950
1969
  end
1970
+
1971
+ FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1951
1972
  end
1952
1973
 
1953
- FasterCSV::Row.new(@headers, convert_fields(row)) # field row
1954
- end
1955
-
1956
- #
1957
- # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1958
- # +row+ and an accessor method for it called unconverted_fields(). The
1959
- # variable is set to the contents of +fields+.
1960
- #
1961
- def add_unconverted_fields(row, fields)
1962
- class << row
1963
- attr_reader :unconverted_fields
1974
+ #
1975
+ # Thiw methods injects an instance variable <tt>unconverted_fields</tt> into
1976
+ # +row+ and an accessor method for it called unconverted_fields(). The
1977
+ # variable is set to the contents of +fields+.
1978
+ #
1979
+ def add_unconverted_fields(row, fields)
1980
+ class << row
1981
+ attr_reader :unconverted_fields
1982
+ end
1983
+ row.instance_eval { @unconverted_fields = fields }
1984
+ row
1964
1985
  end
1965
- row.instance_eval { @unconverted_fields = fields }
1966
- row
1967
1986
  end
1968
1987
  end
1969
1988