csv 1.0.2 → 3.2.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/NEWS.md +868 -0
  3. data/README.md +6 -3
  4. data/doc/csv/arguments/io.rdoc +5 -0
  5. data/doc/csv/options/common/col_sep.rdoc +57 -0
  6. data/doc/csv/options/common/quote_char.rdoc +42 -0
  7. data/doc/csv/options/common/row_sep.rdoc +91 -0
  8. data/doc/csv/options/generating/force_quotes.rdoc +17 -0
  9. data/doc/csv/options/generating/quote_empty.rdoc +12 -0
  10. data/doc/csv/options/generating/write_converters.rdoc +25 -0
  11. data/doc/csv/options/generating/write_empty_value.rdoc +15 -0
  12. data/doc/csv/options/generating/write_headers.rdoc +29 -0
  13. data/doc/csv/options/generating/write_nil_value.rdoc +14 -0
  14. data/doc/csv/options/parsing/converters.rdoc +46 -0
  15. data/doc/csv/options/parsing/empty_value.rdoc +13 -0
  16. data/doc/csv/options/parsing/field_size_limit.rdoc +39 -0
  17. data/doc/csv/options/parsing/header_converters.rdoc +43 -0
  18. data/doc/csv/options/parsing/headers.rdoc +63 -0
  19. data/doc/csv/options/parsing/liberal_parsing.rdoc +38 -0
  20. data/doc/csv/options/parsing/nil_value.rdoc +12 -0
  21. data/doc/csv/options/parsing/return_headers.rdoc +22 -0
  22. data/doc/csv/options/parsing/skip_blanks.rdoc +31 -0
  23. data/doc/csv/options/parsing/skip_lines.rdoc +37 -0
  24. data/doc/csv/options/parsing/strip.rdoc +15 -0
  25. data/doc/csv/options/parsing/unconverted_fields.rdoc +27 -0
  26. data/doc/csv/recipes/filtering.rdoc +158 -0
  27. data/doc/csv/recipes/generating.rdoc +298 -0
  28. data/doc/csv/recipes/parsing.rdoc +545 -0
  29. data/doc/csv/recipes/recipes.rdoc +6 -0
  30. data/lib/csv/core_ext/array.rb +1 -1
  31. data/lib/csv/core_ext/string.rb +1 -1
  32. data/lib/csv/fields_converter.rb +89 -0
  33. data/lib/csv/input_record_separator.rb +18 -0
  34. data/lib/csv/parser.rb +1290 -0
  35. data/lib/csv/row.rb +505 -136
  36. data/lib/csv/table.rb +791 -114
  37. data/lib/csv/version.rb +1 -1
  38. data/lib/csv/writer.rb +210 -0
  39. data/lib/csv.rb +2432 -1329
  40. metadata +66 -13
  41. data/news.md +0 -112
@@ -0,0 +1,545 @@
1
+ == Recipes for Parsing \CSV
2
+
3
+ These recipes are specific code examples for specific \CSV parsing tasks.
4
+
5
+ For other recipes, see {Recipes for CSV}[./recipes_rdoc.html].
6
+
7
+ All code snippets on this page assume that the following has been executed:
8
+ require 'csv'
9
+
10
+ === Contents
11
+
12
+ - {Source Formats}[#label-Source+Formats]
13
+ - {Parsing from a String}[#label-Parsing+from+a+String]
14
+ - {Recipe: Parse from String with Headers}[#label-Recipe-3A+Parse+from+String+with+Headers]
15
+ - {Recipe: Parse from String Without Headers}[#label-Recipe-3A+Parse+from+String+Without+Headers]
16
+ - {Parsing from a File}[#label-Parsing+from+a+File]
17
+ - {Recipe: Parse from File with Headers}[#label-Recipe-3A+Parse+from+File+with+Headers]
18
+ - {Recipe: Parse from File Without Headers}[#label-Recipe-3A+Parse+from+File+Without+Headers]
19
+ - {Parsing from an IO Stream}[#label-Parsing+from+an+IO+Stream]
20
+ - {Recipe: Parse from IO Stream with Headers}[#label-Recipe-3A+Parse+from+IO+Stream+with+Headers]
21
+ - {Recipe: Parse from IO Stream Without Headers}[#label-Recipe-3A+Parse+from+IO+Stream+Without+Headers]
22
+ - {RFC 4180 Compliance}[#label-RFC+4180+Compliance]
23
+ - {Row Separator}[#label-Row+Separator]
24
+ - {Recipe: Handle Compliant Row Separator}[#label-Recipe-3A+Handle+Compliant+Row+Separator]
25
+ - {Recipe: Handle Non-Compliant Row Separator}[#label-Recipe-3A+Handle+Non-Compliant+Row+Separator]
26
+ - {Column Separator}[#label-Column+Separator]
27
+ - {Recipe: Handle Compliant Column Separator}[#label-Recipe-3A+Handle+Compliant+Column+Separator]
28
+ - {Recipe: Handle Non-Compliant Column Separator}[#label-Recipe-3A+Handle+Non-Compliant+Column+Separator]
29
+ - {Quote Character}[#label-Quote+Character]
30
+ - {Recipe: Handle Compliant Quote Character}[#label-Recipe-3A+Handle+Compliant+Quote+Character]
31
+ - {Recipe: Handle Non-Compliant Quote Character}[#label-Recipe-3A+Handle+Non-Compliant+Quote+Character]
32
+ - {Recipe: Allow Liberal Parsing}[#label-Recipe-3A+Allow+Liberal+Parsing]
33
+ - {Special Handling}[#label-Special+Handling]
34
+ - {Special Line Handling}[#label-Special+Line+Handling]
35
+ - {Recipe: Ignore Blank Lines}[#label-Recipe-3A+Ignore+Blank+Lines]
36
+ - {Recipe: Ignore Selected Lines}[#label-Recipe-3A+Ignore+Selected+Lines]
37
+ - {Special Field Handling}[#label-Special+Field+Handling]
38
+ - {Recipe: Strip Fields}[#label-Recipe-3A+Strip+Fields]
39
+ - {Recipe: Handle Null Fields}[#label-Recipe-3A+Handle+Null+Fields]
40
+ - {Recipe: Handle Empty Fields}[#label-Recipe-3A+Handle+Empty+Fields]
41
+ - {Converting Fields}[#label-Converting+Fields]
42
+ - {Converting Fields to Objects}[#label-Converting+Fields+to+Objects]
43
+ - {Recipe: Convert Fields to Integers}[#label-Recipe-3A+Convert+Fields+to+Integers]
44
+ - {Recipe: Convert Fields to Floats}[#label-Recipe-3A+Convert+Fields+to+Floats]
45
+ - {Recipe: Convert Fields to Numerics}[#label-Recipe-3A+Convert+Fields+to+Numerics]
46
+ - {Recipe: Convert Fields to Dates}[#label-Recipe-3A+Convert+Fields+to+Dates]
47
+ - {Recipe: Convert Fields to DateTimes}[#label-Recipe-3A+Convert+Fields+to+DateTimes]
48
+ - {Recipe: Convert Assorted Fields to Objects}[#label-Recipe-3A+Convert+Assorted+Fields+to+Objects]
49
+ - {Recipe: Convert Fields to Other Objects}[#label-Recipe-3A+Convert+Fields+to+Other+Objects]
50
+ - {Recipe: Filter Field Strings}[#label-Recipe-3A+Filter+Field+Strings]
51
+ - {Recipe: Register Field Converters}[#label-Recipe-3A+Register+Field+Converters]
52
+ - {Using Multiple Field Converters}[#label-Using+Multiple+Field+Converters]
53
+ - {Recipe: Specify Multiple Field Converters in Option :converters}[#label-Recipe-3A+Specify+Multiple+Field+Converters+in+Option+-3Aconverters]
54
+ - {Recipe: Specify Multiple Field Converters in a Custom Converter List}[#label-Recipe-3A+Specify+Multiple+Field+Converters+in+a+Custom+Converter+List]
55
+ - {Converting Headers}[#label-Converting+Headers]
56
+ - {Recipe: Convert Headers to Lowercase}[#label-Recipe-3A+Convert+Headers+to+Lowercase]
57
+ - {Recipe: Convert Headers to Symbols}[#label-Recipe-3A+Convert+Headers+to+Symbols]
58
+ - {Recipe: Filter Header Strings}[#label-Recipe-3A+Filter+Header+Strings]
59
+ - {Recipe: Register Header Converters}[#label-Recipe-3A+Register+Header+Converters]
60
+ - {Using Multiple Header Converters}[#label-Using+Multiple+Header+Converters]
61
+ - {Recipe: Specify Multiple Header Converters in Option :header_converters}[#label-Recipe-3A+Specify+Multiple+Header+Converters+in+Option+-3Aheader_converters]
62
+ - {Recipe: Specify Multiple Header Converters in a Custom Header Converter List}[#label-Recipe-3A+Specify+Multiple+Header+Converters+in+a+Custom+Header+Converter+List]
63
+ - {Diagnostics}[#label-Diagnostics]
64
+ - {Recipe: Capture Unconverted Fields}[#label-Recipe-3A+Capture+Unconverted+Fields]
65
+ - {Recipe: Capture Field Info}[#label-Recipe-3A+Capture+Field+Info]
66
+
67
+ === Source Formats
68
+
69
+ You can parse \CSV data from a \String, from a \File (via its path), or from an \IO stream.
70
+
71
+ ==== Parsing from a \String
72
+
73
+ You can parse \CSV data from a \String, with or without headers.
74
+
75
+ ===== Recipe: Parse from \String with Headers
76
+
77
+ Use class method CSV.parse with option +headers+ to read a source \String all at once
78
+ (may have memory resource implications):
79
+ string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
80
+ CSV.parse(string, headers: true) # => #<CSV::Table mode:col_or_row row_count:4>
81
+
82
+ Use instance method CSV#each with option +headers+ to read a source \String one row at a time:
83
+ CSV.new(string, headers: true).each do |row|
84
+ p row
85
+ end
86
+ Output:
87
+ #<CSV::Row "Name":"foo" "Value":"0">
88
+ #<CSV::Row "Name":"bar" "Value":"1">
89
+ #<CSV::Row "Name":"baz" "Value":"2">
90
+
91
+ ===== Recipe: Parse from \String Without Headers
92
+
93
+ Use class method CSV.parse without option +headers+ to read a source \String all at once
94
+ (may have memory resource implications):
95
+ string = "foo,0\nbar,1\nbaz,2\n"
96
+ CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
97
+
98
+ Use instance method CSV#each without option +headers+ to read a source \String one row at a time:
99
+ CSV.new(string).each do |row|
100
+ p row
101
+ end
102
+ Output:
103
+ ["foo", "0"]
104
+ ["bar", "1"]
105
+ ["baz", "2"]
106
+
107
+ ==== Parsing from a \File
108
+
109
+ You can parse \CSV data from a \File, with or without headers.
110
+
111
+ ===== Recipe: Parse from \File with Headers
112
+
113
+ Use instance method CSV#read with option +headers+ to read a file all at once:
114
+ string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
115
+ path = 't.csv'
116
+ File.write(path, string)
117
+ CSV.read(path, headers: true) # => #<CSV::Table mode:col_or_row row_count:4>
118
+
119
+ Use class method CSV.foreach with option +headers+ to read one row at a time:
120
+ CSV.foreach(path, headers: true) do |row|
121
+ p row
122
+ end
123
+ Output:
124
+ #<CSV::Row "Name":"foo" "Value":"0">
125
+ #<CSV::Row "Name":"bar" "Value":"1">
126
+ #<CSV::Row "Name":"baz" "Value":"2">
127
+
128
+ ===== Recipe: Parse from \File Without Headers
129
+
130
+ Use class method CSV.read without option +headers+ to read a file all at once:
131
+ string = "foo,0\nbar,1\nbaz,2\n"
132
+ path = 't.csv'
133
+ File.write(path, string)
134
+ CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
135
+
136
+ Use class method CSV.foreach without option +headers+ to read one row at a time:
137
+ CSV.foreach(path) do |row|
138
+ p row
139
+ end
140
+ Output:
141
+ ["foo", "0"]
142
+ ["bar", "1"]
143
+ ["baz", "2"]
144
+
145
+ ==== Parsing from an \IO Stream
146
+
147
+ You can parse \CSV data from an \IO stream, with or without headers.
148
+
149
+ ===== Recipe: Parse from \IO Stream with Headers
150
+
151
+ Use class method CSV.parse with option +headers+ to read an \IO stream all at once:
152
+ string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
153
+ path = 't.csv'
154
+ File.write(path, string)
155
+ File.open(path) do |file|
156
+ CSV.parse(file, headers: true)
157
+ end # => #<CSV::Table mode:col_or_row row_count:4>
158
+
159
+ Use class method CSV.foreach with option +headers+ to read one row at a time:
160
+ File.open(path) do |file|
161
+ CSV.foreach(file, headers: true) do |row|
162
+ p row
163
+ end
164
+ end
165
+ Output:
166
+ #<CSV::Row "Name":"foo" "Value":"0">
167
+ #<CSV::Row "Name":"bar" "Value":"1">
168
+ #<CSV::Row "Name":"baz" "Value":"2">
169
+
170
+ ===== Recipe: Parse from \IO Stream Without Headers
171
+
172
+ Use class method CSV.parse without option +headers+ to read an \IO stream all at once:
173
+ string = "foo,0\nbar,1\nbaz,2\n"
174
+ path = 't.csv'
175
+ File.write(path, string)
176
+ File.open(path) do |file|
177
+ CSV.parse(file)
178
+ end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
179
+
180
+ Use class method CSV.foreach without option +headers+ to read one row at a time:
181
+ File.open(path) do |file|
182
+ CSV.foreach(file) do |row|
183
+ p row
184
+ end
185
+ end
186
+ Output:
187
+ ["foo", "0"]
188
+ ["bar", "1"]
189
+ ["baz", "2"]
190
+
191
+ === RFC 4180 Compliance
192
+
193
+ By default, \CSV parses data that is compliant with
194
+ {RFC 4180}[https://tools.ietf.org/html/rfc4180]
195
+ with respect to:
196
+ - Row separator.
197
+ - Column separator.
198
+ - Quote character.
199
+
200
+ ==== Row Separator
201
+
202
+ RFC 4180 specifies the row separator CRLF (Ruby <tt>"\r\n"</tt>).
203
+
204
+ Although the \CSV default row separator is <tt>"\n"</tt>,
205
+ the parser also by default handles row separator <tt>"\r"</tt> and the RFC-compliant <tt>"\r\n"</tt>.
206
+
207
+ ===== Recipe: Handle Compliant Row Separator
208
+
209
+ For strict compliance, use option +:row_sep+ to specify row separator <tt>"\r\n"</tt>,
210
+ which allows the compliant row separator:
211
+ source = "foo,1\r\nbar,1\r\nbaz,2\r\n"
212
+ CSV.parse(source, row_sep: "\r\n") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
213
+ But rejects other row separators:
214
+ source = "foo,1\nbar,1\nbaz,2\n"
215
+ CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError
216
+ source = "foo,1\rbar,1\rbaz,2\r"
217
+ CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError
218
+ source = "foo,1\n\rbar,1\n\rbaz,2\n\r"
219
+ CSV.parse(source, row_sep: "\r\n") # Raised MalformedCSVError
220
+
221
+ ===== Recipe: Handle Non-Compliant Row Separator
222
+
223
+ For data with non-compliant row separators, use option +:row_sep+.
224
+ This example source uses semicolon (<tt>";"</tt>) as its row separator:
225
+ source = "foo,1;bar,1;baz,2;"
226
+ CSV.parse(source, row_sep: ';') # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
227
+
228
+ ==== Column Separator
229
+
230
+ RFC 4180 specifies column separator COMMA (Ruby <tt>","</tt>).
231
+
232
+ ===== Recipe: Handle Compliant Column Separator
233
+
234
+ Because the \CSV default comma separator is ',',
235
+ you need not specify option +:col_sep+ for compliant data:
236
+ source = "foo,1\nbar,1\nbaz,2\n"
237
+ CSV.parse(source) # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
238
+
239
+ ===== Recipe: Handle Non-Compliant Column Separator
240
+
241
+ For data with non-compliant column separators, use option +:col_sep+.
242
+ This example source uses TAB (<tt>"\t"</tt>) as its column separator:
243
+ source = "foo,1\tbar,1\tbaz,2"
244
+ CSV.parse(source, col_sep: "\t") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
245
+
246
+ ==== Quote Character
247
+
248
+ RFC 4180 specifies quote character DQUOTE (Ruby <tt>"\""</tt>).
249
+
250
+ ===== Recipe: Handle Compliant Quote Character
251
+
252
+ Because the \CSV default quote character is <tt>"\""</tt>,
253
+ you need not specify option +:quote_char+ for compliant data:
254
+ source = "\"foo\",\"1\"\n\"bar\",\"1\"\n\"baz\",\"2\"\n"
255
+ CSV.parse(source) # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
256
+
257
+ ===== Recipe: Handle Non-Compliant Quote Character
258
+
259
+ For data with non-compliant quote characters, use option +:quote_char+.
260
+ This example source uses SQUOTE (<tt>"'"</tt>) as its quote character:
261
+ source = "'foo','1'\n'bar','1'\n'baz','2'\n"
262
+ CSV.parse(source, quote_char: "'") # => [["foo", "1"], ["bar", "1"], ["baz", "2"]]
263
+
264
+ ==== Recipe: Allow Liberal Parsing
265
+
266
+ Use option +:liberal_parsing+ to specify that \CSV should
267
+ attempt to parse input not conformant with RFC 4180, such as double quotes in unquoted fields:
268
+ source = 'is,this "three, or four",fields'
269
+ CSV.parse(source) # Raises MalformedCSVError
270
+ CSV.parse(source, liberal_parsing: true) # => [["is", "this \"three", " or four\"", "fields"]]
271
+
272
+ === Special Handling
273
+
274
+ You can use parsing options to specify special handling for certain lines and fields.
275
+
276
+ ==== Special Line Handling
277
+
278
+ Use parsing options to specify special handling for blank lines, or for other selected lines.
279
+
280
+ ===== Recipe: Ignore Blank Lines
281
+
282
+ Use option +:skip_blanks+ to ignore blank lines:
283
+ source = <<-EOT
284
+ foo,0
285
+
286
+ bar,1
287
+ baz,2
288
+
289
+ ,
290
+ EOT
291
+ parsed = CSV.parse(source, skip_blanks: true)
292
+ parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"], [nil, nil]]
293
+
294
+ ===== Recipe: Ignore Selected Lines
295
+
296
+ Use option +:skip_lines+ to ignore selected lines.
297
+ source = <<-EOT
298
+ # Comment
299
+ foo,0
300
+ bar,1
301
+ baz,2
302
+ # Another comment
303
+ EOT
304
+ parsed = CSV.parse(source, skip_lines: /^#/)
305
+ parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
306
+
307
+ ==== Special Field Handling
308
+
309
+ Use parsing options to specify special handling for certain field values.
310
+
311
+ ===== Recipe: Strip Fields
312
+
313
+ Use option +:strip+ to strip parsed field values:
314
+ CSV.parse_line(' a , b ', strip: true) # => ["a", "b"]
315
+
316
+ ===== Recipe: Handle Null Fields
317
+
318
+ Use option +:nil_value+ to specify a value that will replace each field
319
+ that is null (no text):
320
+ CSV.parse_line('a,,b,,c', nil_value: 0) # => ["a", 0, "b", 0, "c"]
321
+
322
+ ===== Recipe: Handle Empty Fields
323
+
324
+ Use option +:empty_value+ to specify a value that will replace each field
325
+ that is empty (\String of length 0);
326
+ CSV.parse_line('a,"",b,"",c', empty_value: 'x') # => ["a", "x", "b", "x", "c"]
327
+
328
+ === Converting Fields
329
+
330
+ You can use field converters to change parsed \String fields into other objects,
331
+ or to otherwise modify the \String fields.
332
+
333
+ ==== Converting Fields to Objects
334
+
335
+ Use field converters to change parsed \String objects into other, more specific, objects.
336
+
337
+ There are built-in field converters for converting to objects of certain classes:
338
+ - \Float
339
+ - \Integer
340
+ - \Date
341
+ - \DateTime
342
+
343
+ Other built-in field converters include:
344
+ - +:numeric+: converts to \Integer and \Float.
345
+ - +:all+: converts to \DateTime, \Integer, \Float.
346
+
347
+ You can also define field converters to convert to objects of other classes.
348
+
349
+ ===== Recipe: Convert Fields to Integers
350
+
351
+ Convert fields to \Integer objects using built-in converter +:integer+:
352
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
353
+ parsed = CSV.parse(source, headers: true, converters: :integer)
354
+ parsed.map {|row| row['Value'].class} # => [Integer, Integer, Integer]
355
+
356
+ ===== Recipe: Convert Fields to Floats
357
+
358
+ Convert fields to \Float objects using built-in converter +:float+:
359
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
360
+ parsed = CSV.parse(source, headers: true, converters: :float)
361
+ parsed.map {|row| row['Value'].class} # => [Float, Float, Float]
362
+
363
+ ===== Recipe: Convert Fields to Numerics
364
+
365
+ Convert fields to \Integer and \Float objects using built-in converter +:numeric+:
366
+ source = "Name,Value\nfoo,0\nbar,1.1\nbaz,2.2\n"
367
+ parsed = CSV.parse(source, headers: true, converters: :numeric)
368
+ parsed.map {|row| row['Value'].class} # => [Integer, Float, Float]
369
+
370
+ ===== Recipe: Convert Fields to Dates
371
+
372
+ Convert fields to \Date objects using built-in converter +:date+:
373
+ source = "Name,Date\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2001-02-03\n"
374
+ parsed = CSV.parse(source, headers: true, converters: :date)
375
+ parsed.map {|row| row['Date'].class} # => [Date, Date, Date]
376
+
377
+ ===== Recipe: Convert Fields to DateTimes
378
+
379
+ Convert fields to \DateTime objects using built-in converter +:date_time+:
380
+ source = "Name,DateTime\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n"
381
+ parsed = CSV.parse(source, headers: true, converters: :date_time)
382
+ parsed.map {|row| row['DateTime'].class} # => [DateTime, DateTime, DateTime]
383
+
384
+ ===== Recipe: Convert Assorted Fields to Objects
385
+
386
+ Convert assorted fields to objects using built-in converter +:all+:
387
+ source = "Type,Value\nInteger,0\nFloat,1.0\nDateTime,2001-02-04\n"
388
+ parsed = CSV.parse(source, headers: true, converters: :all)
389
+ parsed.map {|row| row['Value'].class} # => [Integer, Float, DateTime]
390
+
391
+ ===== Recipe: Convert Fields to Other Objects
392
+
393
+ Define a custom field converter to convert \String fields into other objects.
394
+ This example defines and uses a custom field converter
395
+ that converts each column-1 value to a \Rational object:
396
+ rational_converter = proc do |field, field_context|
397
+ field_context.index == 1 ? field.to_r : field
398
+ end
399
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
400
+ parsed = CSV.parse(source, headers: true, converters: rational_converter)
401
+ parsed.map {|row| row['Value'].class} # => [Rational, Rational, Rational]
402
+
403
+ ==== Recipe: Filter Field Strings
404
+
405
+ Define a custom field converter to modify \String fields.
406
+ This example defines and uses a custom field converter
407
+ that strips whitespace from each field value:
408
+ strip_converter = proc {|field| field.strip }
409
+ source = "Name,Value\n foo , 0 \n bar , 1 \n baz , 2 \n"
410
+ parsed = CSV.parse(source, headers: true, converters: strip_converter)
411
+ parsed['Name'] # => ["foo", "bar", "baz"]
412
+ parsed['Value'] # => ["0", "1", "2"]
413
+
414
+ ==== Recipe: Register Field Converters
415
+
416
+ Register a custom field converter, assigning it a name;
417
+ then refer to the converter by its name:
418
+ rational_converter = proc do |field, field_context|
419
+ field_context.index == 1 ? field.to_r : field
420
+ end
421
+ CSV::Converters[:rational] = rational_converter
422
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
423
+ parsed = CSV.parse(source, headers: true, converters: :rational)
424
+ parsed['Value'] # => [(0/1), (1/1), (2/1)]
425
+
426
+ ==== Using Multiple Field Converters
427
+
428
+ You can use multiple field converters in either of these ways:
429
+ - Specify converters in option +:converters+.
430
+ - Specify converters in a custom converter list.
431
+
432
+ ===== Recipe: Specify Multiple Field Converters in Option +:converters+
433
+
434
+ Apply multiple field converters by specifying them in option +:converters+:
435
+ source = "Name,Value\nfoo,0\nbar,1.0\nbaz,2.0\n"
436
+ parsed = CSV.parse(source, headers: true, converters: [:integer, :float])
437
+ parsed['Value'] # => [0, 1.0, 2.0]
438
+
439
+ ===== Recipe: Specify Multiple Field Converters in a Custom Converter List
440
+
441
+ Apply multiple field converters by defining and registering a custom converter list:
442
+ strip_converter = proc {|field| field.strip }
443
+ CSV::Converters[:strip] = strip_converter
444
+ CSV::Converters[:my_converters] = [:integer, :float, :strip]
445
+ source = "Name,Value\n foo , 0 \n bar , 1.0 \n baz , 2.0 \n"
446
+ parsed = CSV.parse(source, headers: true, converters: :my_converters)
447
+ parsed['Name'] # => ["foo", "bar", "baz"]
448
+ parsed['Value'] # => [0, 1.0, 2.0]
449
+
450
+ === Converting Headers
451
+
452
+ You can use header converters to modify parsed \String headers.
453
+
454
+ Built-in header converters include:
455
+ - +:symbol+: converts \String header to \Symbol.
456
+ - +:downcase+: converts \String header to lowercase.
457
+
458
+ You can also define header converters to otherwise modify header \Strings.
459
+
460
+ ==== Recipe: Convert Headers to Lowercase
461
+
462
+ Convert headers to lowercase using built-in converter +:downcase+:
463
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
464
+ parsed = CSV.parse(source, headers: true, header_converters: :downcase)
465
+ parsed.headers # => ["name", "value"]
466
+
467
+ ==== Recipe: Convert Headers to Symbols
468
+
469
+ Convert headers to downcased Symbols using built-in converter +:symbol+:
470
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
471
+ parsed = CSV.parse(source, headers: true, header_converters: :symbol)
472
+ parsed.headers # => [:name, :value]
473
+ parsed.headers.map {|header| header.class} # => [Symbol, Symbol]
474
+
475
+ ==== Recipe: Filter Header Strings
476
+
477
+ Define a custom header converter to modify \String fields.
478
+ This example defines and uses a custom header converter
479
+ that capitalizes each header \String:
480
+ capitalize_converter = proc {|header| header.capitalize }
481
+ source = "NAME,VALUE\nfoo,0\nbar,1\nbaz,2\n"
482
+ parsed = CSV.parse(source, headers: true, header_converters: capitalize_converter)
483
+ parsed.headers # => ["Name", "Value"]
484
+
485
+ ==== Recipe: Register Header Converters
486
+
487
+ Register a custom header converter, assigning it a name;
488
+ then refer to the converter by its name:
489
+ capitalize_converter = proc {|header| header.capitalize }
490
+ CSV::HeaderConverters[:capitalize] = capitalize_converter
491
+ source = "NAME,VALUE\nfoo,0\nbar,1\nbaz,2\n"
492
+ parsed = CSV.parse(source, headers: true, header_converters: :capitalize)
493
+ parsed.headers # => ["Name", "Value"]
494
+
495
+ ==== Using Multiple Header Converters
496
+
497
+ You can use multiple header converters in either of these ways:
498
+ - Specify header converters in option +:header_converters+.
499
+ - Specify header converters in a custom header converter list.
500
+
501
+ ===== Recipe: Specify Multiple Header Converters in Option :header_converters
502
+
503
+ Apply multiple header converters by specifying them in option +:header_converters+:
504
+ source = "Name,Value\nfoo,0\nbar,1.0\nbaz,2.0\n"
505
+ parsed = CSV.parse(source, headers: true, header_converters: [:downcase, :symbol])
506
+ parsed.headers # => [:name, :value]
507
+
508
+ ===== Recipe: Specify Multiple Header Converters in a Custom Header Converter List
509
+
510
+ Apply multiple header converters by defining and registering a custom header converter list:
511
+ CSV::HeaderConverters[:my_header_converters] = [:symbol, :downcase]
512
+ source = "NAME,VALUE\nfoo,0\nbar,1.0\nbaz,2.0\n"
513
+ parsed = CSV.parse(source, headers: true, header_converters: :my_header_converters)
514
+ parsed.headers # => [:name, :value]
515
+
516
+ === Diagnostics
517
+
518
+ ==== Recipe: Capture Unconverted Fields
519
+
520
+ To capture unconverted field values, use option +:unconverted_fields+:
521
+ source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
522
+ parsed = CSV.parse(source, converters: :integer, unconverted_fields: true)
523
+ parsed # => [["Name", "Value"], ["foo", 0], ["bar", 1], ["baz", 2]]
524
+ parsed.each {|row| p row.unconverted_fields }
525
+ Output:
526
+ ["Name", "Value"]
527
+ ["foo", "0"]
528
+ ["bar", "1"]
529
+ ["baz", "2"]
530
+
531
+ ==== Recipe: Capture Field Info
532
+
533
+ To capture field info in a custom converter, accept two block arguments.
534
+ The first is the field value; the second is a +CSV::FieldInfo+ object:
535
+ strip_converter = proc {|field, field_info| p field_info; field.strip }
536
+ source = " foo , 0 \n bar , 1 \n baz , 2 \n"
537
+ parsed = CSV.parse(source, converters: strip_converter)
538
+ parsed # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
539
+ Output:
540
+ #<struct CSV::FieldInfo index=0, line=1, header=nil>
541
+ #<struct CSV::FieldInfo index=1, line=1, header=nil>
542
+ #<struct CSV::FieldInfo index=0, line=2, header=nil>
543
+ #<struct CSV::FieldInfo index=1, line=2, header=nil>
544
+ #<struct CSV::FieldInfo index=0, line=3, header=nil>
545
+ #<struct CSV::FieldInfo index=1, line=3, header=nil>
@@ -0,0 +1,6 @@
1
+ == Recipes for \CSV
2
+
3
+ The recipes are specific code examples for specific tasks. See:
4
+ - {Recipes for Parsing CSV}[./parsing_rdoc.html]
5
+ - {Recipes for Generating CSV}[./generating_rdoc.html]
6
+ - {Recipes for Filtering CSV}[./filtering_rdoc.html]
@@ -4,6 +4,6 @@ class Array # :nodoc:
4
4
  # ["CSV", "data"].to_csv
5
5
  # #=> "CSV,data\n"
6
6
  def to_csv(**options)
7
- CSV.generate_line(self, options)
7
+ CSV.generate_line(self, **options)
8
8
  end
9
9
  end
@@ -4,6 +4,6 @@ class String # :nodoc:
4
4
  # "CSV,data".parse_csv
5
5
  # #=> ["CSV", "data"]
6
6
  def parse_csv(**options)
7
- CSV.parse_line(self, options)
7
+ CSV.parse_line(self, **options)
8
8
  end
9
9
  end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ class CSV
4
+ # Note: Don't use this class directly. This is an internal class.
5
+ class FieldsConverter
6
+ include Enumerable
7
+ #
8
+ # A CSV::FieldsConverter is a data structure for storing the
9
+ # fields converter properties to be passed as a parameter
10
+ # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
11
+ #
12
+
13
+ def initialize(options={})
14
+ @converters = []
15
+ @nil_value = options[:nil_value]
16
+ @empty_value = options[:empty_value]
17
+ @empty_value_is_empty_string = (@empty_value == "")
18
+ @accept_nil = options[:accept_nil]
19
+ @builtin_converters_name = options[:builtin_converters_name]
20
+ @need_static_convert = need_static_convert?
21
+ end
22
+
23
+ def add_converter(name=nil, &converter)
24
+ if name.nil? # custom converter
25
+ @converters << converter
26
+ else # named converter
27
+ combo = builtin_converters[name]
28
+ case combo
29
+ when Array # combo converter
30
+ combo.each do |sub_name|
31
+ add_converter(sub_name)
32
+ end
33
+ else # individual named converter
34
+ @converters << combo
35
+ end
36
+ end
37
+ end
38
+
39
+ def each(&block)
40
+ @converters.each(&block)
41
+ end
42
+
43
+ def empty?
44
+ @converters.empty?
45
+ end
46
+
47
+ def convert(fields, headers, lineno, quoted_fields)
48
+ return fields unless need_convert?
49
+
50
+ fields.collect.with_index do |field, index|
51
+ if field.nil?
52
+ field = @nil_value
53
+ elsif field.is_a?(String) and field.empty?
54
+ field = @empty_value unless @empty_value_is_empty_string
55
+ end
56
+ @converters.each do |converter|
57
+ break if field.nil? and @accept_nil
58
+ if converter.arity == 1 # straight field converter
59
+ field = converter[field]
60
+ else # FieldInfo converter
61
+ if headers
62
+ header = headers[index]
63
+ else
64
+ header = nil
65
+ end
66
+ quoted = quoted_fields[index]
67
+ field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
68
+ end
69
+ break unless field.is_a?(String) # short-circuit pipeline for speed
70
+ end
71
+ field # final state of each field, converted or original
72
+ end
73
+ end
74
+
75
+ private
76
+ def need_static_convert?
77
+ not (@nil_value.nil? and @empty_value_is_empty_string)
78
+ end
79
+
80
+ def need_convert?
81
+ @need_static_convert or
82
+ (not @converters.empty?)
83
+ end
84
+
85
+ def builtin_converters
86
+ @builtin_converters ||= ::CSV.const_get(@builtin_converters_name)
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,18 @@
1
+ require "English"
2
+ require "stringio"
3
+
4
+ class CSV
5
+ module InputRecordSeparator
6
+ class << self
7
+ if RUBY_VERSION >= "3.0.0"
8
+ def value
9
+ "\n"
10
+ end
11
+ else
12
+ def value
13
+ $INPUT_RECORD_SEPARATOR
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end