csv 3.1.5 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/NEWS.md +110 -0
  3. data/README.md +5 -3
  4. data/doc/csv/arguments/io.rdoc +5 -0
  5. data/doc/csv/options/common/col_sep.rdoc +57 -0
  6. data/doc/csv/options/common/quote_char.rdoc +42 -0
  7. data/doc/{row_sep.rdoc → csv/options/common/row_sep.rdoc} +14 -14
  8. data/doc/{force_quotes.rdoc → csv/options/generating/force_quotes.rdoc} +0 -0
  9. data/doc/{quote_empty.rdoc → csv/options/generating/quote_empty.rdoc} +0 -0
  10. data/doc/{write_converters.rdoc → csv/options/generating/write_converters.rdoc} +6 -12
  11. data/doc/{write_empty_value.rdoc → csv/options/generating/write_empty_value.rdoc} +0 -0
  12. data/doc/{write_headers.rdoc → csv/options/generating/write_headers.rdoc} +0 -0
  13. data/doc/{write_nil_value.rdoc → csv/options/generating/write_nil_value.rdoc} +1 -1
  14. data/doc/csv/options/parsing/converters.rdoc +46 -0
  15. data/doc/{empty_value.rdoc → csv/options/parsing/empty_value.rdoc} +0 -0
  16. data/doc/{field_size_limit.rdoc → csv/options/parsing/field_size_limit.rdoc} +0 -0
  17. data/doc/csv/options/parsing/header_converters.rdoc +43 -0
  18. data/doc/{headers.rdoc → csv/options/parsing/headers.rdoc} +0 -0
  19. data/doc/{liberal_parsing.rdoc → csv/options/parsing/liberal_parsing.rdoc} +0 -0
  20. data/doc/{nil_value.rdoc → csv/options/parsing/nil_value.rdoc} +0 -0
  21. data/doc/{return_headers.rdoc → csv/options/parsing/return_headers.rdoc} +0 -0
  22. data/doc/{skip_blanks.rdoc → csv/options/parsing/skip_blanks.rdoc} +0 -0
  23. data/doc/{skip_lines.rdoc → csv/options/parsing/skip_lines.rdoc} +0 -0
  24. data/doc/{strip.rdoc → csv/options/parsing/strip.rdoc} +0 -0
  25. data/doc/{unconverted_fields.rdoc → csv/options/parsing/unconverted_fields.rdoc} +0 -0
  26. data/doc/csv/recipes/filtering.rdoc +158 -0
  27. data/doc/csv/recipes/generating.rdoc +298 -0
  28. data/doc/csv/recipes/parsing.rdoc +545 -0
  29. data/doc/csv/recipes/recipes.rdoc +6 -0
  30. data/lib/csv.rb +1604 -515
  31. data/lib/csv/parser.rb +1 -0
  32. data/lib/csv/row.rb +499 -132
  33. data/lib/csv/table.rb +753 -109
  34. data/lib/csv/version.rb +1 -1
  35. data/lib/csv/writer.rb +45 -4
  36. metadata +38 -28
  37. data/doc/col_sep.rdoc +0 -45
  38. data/doc/converters.rdoc +0 -45
  39. data/doc/header_converters.rdoc +0 -31
  40. data/doc/quote_char.rdoc +0 -32
@@ -0,0 +1,6 @@
1
+ == Recipes for \CSV
2
+
3
+ The recipes are specific code examples for specific tasks. See:
4
+ - {Recipes for Parsing CSV}[./parsing_rdoc.html]
5
+ - {Recipes for Generating CSV}[./generating_rdoc.html]
6
+ - {Recipes for Filtering CSV}[./filtering_rdoc.html]
data/lib/csv.rb CHANGED
@@ -34,7 +34,7 @@
34
34
  # I'm sure I'll miss something, but I'll try to mention most of the major
35
35
  # differences I am aware of, to help others quickly get up to speed:
36
36
  #
37
- # === CSV Parsing
37
+ # === \CSV Parsing
38
38
  #
39
39
  # * This parser is m17n aware. See CSV for full details.
40
40
  # * This library has a stricter parser and will throw MalformedCSVErrors on
@@ -48,7 +48,7 @@
48
48
  #
49
49
  # === Interface
50
50
  #
51
- # * CSV now uses Hash-style parameters to set options.
51
+ # * CSV now uses keyword parameters to set options.
52
52
  # * CSV no longer has generate_row() or parse_row().
53
53
  # * The old CSV's Reader and Writer classes have been dropped.
54
54
  # * CSV::open() is now more like Ruby's open().
@@ -103,82 +103,225 @@ require_relative "csv/writer"
103
103
 
104
104
  using CSV::MatchP if CSV.const_defined?(:MatchP)
105
105
 
106
- # This class provides a complete interface to CSV files and data. It offers
107
- # tools to enable you to read and write to and from Strings or IO objects, as
108
- # needed.
106
+ # == \CSV
109
107
  #
110
- # The most generic interface of the library is:
108
+ # === In a Hurry?
111
109
  #
112
- # csv = CSV.new(io, **options)
110
+ # If you are familiar with \CSV data and have a particular task in mind,
111
+ # you may want to go directly to the:
112
+ # - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
113
113
  #
114
- # # Reading: IO object should be open for read
115
- # csv.read # => array of rows
116
- # # or
117
- # csv.each do |row|
118
- # # ...
119
- # end
120
- # # or
121
- # row = csv.shift
114
+ # Otherwise, read on here, about the API: classes, methods, and constants.
122
115
  #
123
- # # Writing: IO object should be open for write
124
- # csv << row
116
+ # === \CSV Data
125
117
  #
126
- # There are several specialized class methods for one-statement reading or writing,
127
- # described in the Specialized Methods section.
118
+ # \CSV (comma-separated values) data is a text representation of a table:
119
+ # - A _row_ _separator_ delimits table rows.
120
+ # A common row separator is the newline character <tt>"\n"</tt>.
121
+ # - A _column_ _separator_ delimits fields in a row.
122
+ # A common column separator is the comma character <tt>","</tt>.
128
123
  #
129
- # If a String is passed into ::new, it is internally wrapped into a StringIO object.
124
+ # This \CSV \String, with row separator <tt>"\n"</tt>
125
+ # and column separator <tt>","</tt>,
126
+ # has three rows and two columns:
127
+ # "foo,0\nbar,1\nbaz,2\n"
130
128
  #
131
- # +options+ can be used for specifying the particular CSV flavor (column
132
- # separators, row separators, value quoting and so on), and for data conversion,
133
- # see Data Conversion section for the description of the latter.
129
+ # Despite the name \CSV, a \CSV representation can use different separators.
134
130
  #
135
- # == Specialized Methods
131
+ # For more about tables, see the Wikipedia article
132
+ # "{Table (information)}[https://en.wikipedia.org/wiki/Table_(information)]",
133
+ # especially its section
134
+ # "{Simple table}[https://en.wikipedia.org/wiki/Table_(information)#Simple_table]"
136
135
  #
137
- # === Reading
136
+ # == \Class \CSV
138
137
  #
139
- # # From a file: all at once
140
- # arr_of_rows = CSV.read("path/to/file.csv", **options)
141
- # # iterator-style:
142
- # CSV.foreach("path/to/file.csv", **options) do |row|
143
- # # ...
144
- # end
138
+ # Class \CSV provides methods for:
139
+ # - Parsing \CSV data from a \String object, a \File (via its file path), or an \IO object.
140
+ # - Generating \CSV data to a \String object.
145
141
  #
146
- # # From a string
147
- # arr_of_rows = CSV.parse("CSV,data,String", **options)
148
- # # or
149
- # CSV.parse("CSV,data,String", **options) do |row|
150
- # # ...
151
- # end
142
+ # To make \CSV available:
143
+ # require 'csv'
152
144
  #
153
- # === Writing
145
+ # All examples here assume that this has been done.
154
146
  #
155
- # # To a file
156
- # CSV.open("path/to/file.csv", "wb") do |csv|
157
- # csv << ["row", "of", "CSV", "data"]
158
- # csv << ["another", "row"]
159
- # # ...
160
- # end
147
+ # == Keeping It Simple
148
+ #
149
+ # A \CSV object has dozens of instance methods that offer fine-grained control
150
+ # of parsing and generating \CSV data.
151
+ # For many needs, though, simpler approaches will do.
152
+ #
153
+ # This section summarizes the singleton methods in \CSV
154
+ # that allow you to parse and generate without explicitly
155
+ # creating \CSV objects.
156
+ # For details, follow the links.
157
+ #
158
+ # === Simple Parsing
159
+ #
160
+ # Parsing methods commonly return either of:
161
+ # - An \Array of Arrays of Strings:
162
+ # - The outer \Array is the entire "table".
163
+ # - Each inner \Array is a row.
164
+ # - Each \String is a field.
165
+ # - A CSV::Table object. For details, see
166
+ # {\CSV with Headers}[#class-CSV-label-CSV+with+Headers].
167
+ #
168
+ # ==== Parsing a \String
169
+ #
170
+ # The input to be parsed can be a string:
171
+ # string = "foo,0\nbar,1\nbaz,2\n"
172
+ #
173
+ # \Method CSV.parse returns the entire \CSV data:
174
+ # CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
175
+ #
176
+ # \Method CSV.parse_line returns only the first row:
177
+ # CSV.parse_line(string) # => ["foo", "0"]
178
+ #
179
+ # \CSV extends class \String with instance method String#parse_csv,
180
+ # which also returns only the first row:
181
+ # string.parse_csv # => ["foo", "0"]
182
+ #
183
+ # ==== Parsing Via a \File Path
184
+ #
185
+ # The input to be parsed can be in a file:
186
+ # string = "foo,0\nbar,1\nbaz,2\n"
187
+ # path = 't.csv'
188
+ # File.write(path, string)
189
+ #
190
+ # \Method CSV.read returns the entire \CSV data:
191
+ # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
192
+ #
193
+ # \Method CSV.foreach iterates, passing each row to the given block:
194
+ # CSV.foreach(path) do |row|
195
+ # p row
196
+ # end
197
+ # Output:
198
+ # ["foo", "0"]
199
+ # ["bar", "1"]
200
+ # ["baz", "2"]
201
+ #
202
+ # \Method CSV.table returns the entire \CSV data as a CSV::Table object:
203
+ # CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:3>
204
+ #
205
+ # ==== Parsing from an Open \IO Stream
206
+ #
207
+ # The input to be parsed can be in an open \IO stream:
161
208
  #
162
- # # To a String
163
- # csv_string = CSV.generate do |csv|
164
- # csv << ["row", "of", "CSV", "data"]
165
- # csv << ["another", "row"]
166
- # # ...
209
+ # \Method CSV.read returns the entire \CSV data:
210
+ # File.open(path) do |file|
211
+ # CSV.read(file)
212
+ # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
213
+ #
214
+ # As does method CSV.parse:
215
+ # File.open(path) do |file|
216
+ # CSV.parse(file)
217
+ # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
218
+ #
219
+ # \Method CSV.parse_line returns only the first row:
220
+ # File.open(path) do |file|
221
+ # CSV.parse_line(file)
222
+ # end # => ["foo", "0"]
223
+ #
224
+ # \Method CSV.foreach iterates, passing each row to the given block:
225
+ # File.open(path) do |file|
226
+ # CSV.foreach(file) do |row|
227
+ # p row
228
+ # end
167
229
  # end
230
+ # Output:
231
+ # ["foo", "0"]
232
+ # ["bar", "1"]
233
+ # ["baz", "2"]
234
+ #
235
+ # \Method CSV.table returns the entire \CSV data as a CSV::Table object:
236
+ # File.open(path) do |file|
237
+ # CSV.table(file)
238
+ # end # => #<CSV::Table mode:col_or_row row_count:3>
239
+ #
240
+ # === Simple Generating
241
+ #
242
+ # \Method CSV.generate returns a \String;
243
+ # this example uses method CSV#<< to append the rows
244
+ # that are to be generated:
245
+ # output_string = CSV.generate do |csv|
246
+ # csv << ['foo', 0]
247
+ # csv << ['bar', 1]
248
+ # csv << ['baz', 2]
249
+ # end
250
+ # output_string # => "foo,0\nbar,1\nbaz,2\n"
168
251
  #
169
- # === Shortcuts
252
+ # \Method CSV.generate_line returns a \String containing the single row
253
+ # constructed from an \Array:
254
+ # CSV.generate_line(['foo', '0']) # => "foo,0\n"
170
255
  #
171
- # # Core extensions for converting one line
172
- # csv_string = ["CSV", "data"].to_csv # to CSV
173
- # csv_array = "CSV,String".parse_csv # from CSV
256
+ # \CSV extends class \Array with instance method <tt>Array#to_csv</tt>,
257
+ # which forms an \Array into a \String:
258
+ # ['foo', '0'].to_csv # => "foo,0\n"
174
259
  #
175
- # # CSV() method
176
- # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
177
- # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
178
- # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
179
- # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
260
+ # === "Filtering" \CSV
180
261
  #
181
- # == Options
262
+ # \Method CSV.filter provides a Unix-style filter for \CSV data.
263
+ # The input data is processed to form the output data:
264
+ # in_string = "foo,0\nbar,1\nbaz,2\n"
265
+ # out_string = ''
266
+ # CSV.filter(in_string, out_string) do |row|
267
+ # row[0] = row[0].upcase
268
+ # row[1] *= 4
269
+ # end
270
+ # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
271
+ #
272
+ # == \CSV Objects
273
+ #
274
+ # There are three ways to create a \CSV object:
275
+ # - \Method CSV.new returns a new \CSV object.
276
+ # - \Method CSV.instance returns a new or cached \CSV object.
277
+ # - \Method \CSV() also returns a new or cached \CSV object.
278
+ #
279
+ # === Instance Methods
280
+ #
281
+ # \CSV has three groups of instance methods:
282
+ # - Its own internally defined instance methods.
283
+ # - Methods included by module Enumerable.
284
+ # - Methods delegated to class IO. See below.
285
+ #
286
+ # ==== Delegated Methods
287
+ #
288
+ # For convenience, a CSV object will delegate to many methods in class IO.
289
+ # (A few have wrapper "guard code" in \CSV.) You may call:
290
+ # * IO#binmode
291
+ # * #binmode?
292
+ # * IO#close
293
+ # * IO#close_read
294
+ # * IO#close_write
295
+ # * IO#closed?
296
+ # * #eof
297
+ # * #eof?
298
+ # * IO#external_encoding
299
+ # * IO#fcntl
300
+ # * IO#fileno
301
+ # * #flock
302
+ # * IO#flush
303
+ # * IO#fsync
304
+ # * IO#internal_encoding
305
+ # * #ioctl
306
+ # * IO#isatty
307
+ # * #path
308
+ # * IO#pid
309
+ # * IO#pos
310
+ # * IO#pos=
311
+ # * IO#reopen
312
+ # * #rewind
313
+ # * IO#seek
314
+ # * #stat
315
+ # * IO#string
316
+ # * IO#sync
317
+ # * IO#sync=
318
+ # * IO#tell
319
+ # * #to_i
320
+ # * #to_io
321
+ # * IO#truncate
322
+ # * IO#tty?
323
+ #
324
+ # === Options
182
325
  #
183
326
  # The default values for options are:
184
327
  # DEFAULT_OPTIONS = {
@@ -208,59 +351,90 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
208
351
  # strip: false,
209
352
  # }
210
353
  #
211
- # === Options for Parsing
354
+ # ==== Options for Parsing
212
355
  #
213
- # :include: ../doc/col_sep.rdoc
356
+ # Options for parsing, described in detail below, include:
357
+ # - +row_sep+: Specifies the row separator; used to delimit rows.
358
+ # - +col_sep+: Specifies the column separator; used to delimit fields.
359
+ # - +quote_char+: Specifies the quote character; used to quote fields.
360
+ # - +field_size_limit+: Specifies the maximum field size allowed.
361
+ # - +converters+: Specifies the field converters to be used.
362
+ # - +unconverted_fields+: Specifies whether unconverted fields are to be available.
363
+ # - +headers+: Specifies whether data contains headers,
364
+ # or specifies the headers themselves.
365
+ # - +return_headers+: Specifies whether headers are to be returned.
366
+ # - +header_converters+: Specifies the header converters to be used.
367
+ # - +skip_blanks+: Specifies whether blanks lines are to be ignored.
368
+ # - +skip_lines+: Specifies how comments lines are to be recognized.
369
+ # - +strip+: Specifies whether leading and trailing whitespace are
370
+ # to be stripped from fields..
371
+ # - +liberal_parsing+: Specifies whether \CSV should attempt to parse
372
+ # non-compliant data.
373
+ # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
374
+ # - +empty_value+: Specifies the object that is to be substituted for each empty field.
214
375
  #
215
- # :include: ../doc/row_sep.rdoc
376
+ # :include: ../doc/csv/options/common/row_sep.rdoc
216
377
  #
217
- # :include: ../doc/quote_char.rdoc
378
+ # :include: ../doc/csv/options/common/col_sep.rdoc
218
379
  #
219
- # :include: ../doc/field_size_limit.rdoc
380
+ # :include: ../doc/csv/options/common/quote_char.rdoc
220
381
  #
221
- # :include: ../doc/converters.rdoc
382
+ # :include: ../doc/csv/options/parsing/field_size_limit.rdoc
222
383
  #
223
- # :include: ../doc/unconverted_fields.rdoc
384
+ # :include: ../doc/csv/options/parsing/converters.rdoc
224
385
  #
225
- # :include: ../doc/headers.rdoc
386
+ # :include: ../doc/csv/options/parsing/unconverted_fields.rdoc
226
387
  #
227
- # :include: ../doc/return_headers.rdoc
388
+ # :include: ../doc/csv/options/parsing/headers.rdoc
228
389
  #
229
- # :include: ../doc/header_converters.rdoc
390
+ # :include: ../doc/csv/options/parsing/return_headers.rdoc
230
391
  #
231
- # :include: ../doc/skip_blanks.rdoc
392
+ # :include: ../doc/csv/options/parsing/header_converters.rdoc
232
393
  #
233
- # :include: ../doc/skip_lines.rdoc
394
+ # :include: ../doc/csv/options/parsing/skip_blanks.rdoc
234
395
  #
235
- # :include: ../doc/liberal_parsing.rdoc
396
+ # :include: ../doc/csv/options/parsing/skip_lines.rdoc
236
397
  #
237
- # :include: ../doc/nil_value.rdoc
398
+ # :include: ../doc/csv/options/parsing/strip.rdoc
238
399
  #
239
- # :include: ../doc/empty_value.rdoc
400
+ # :include: ../doc/csv/options/parsing/liberal_parsing.rdoc
240
401
  #
241
- # === Options for Generating
402
+ # :include: ../doc/csv/options/parsing/nil_value.rdoc
242
403
  #
243
- # :include: ../doc/col_sep.rdoc
404
+ # :include: ../doc/csv/options/parsing/empty_value.rdoc
244
405
  #
245
- # :include: ../doc/row_sep.rdoc
406
+ # ==== Options for Generating
246
407
  #
247
- # :include: ../doc/quote_char.rdoc
408
+ # Options for generating, described in detail below, include:
409
+ # - +row_sep+: Specifies the row separator; used to delimit rows.
410
+ # - +col_sep+: Specifies the column separator; used to delimit fields.
411
+ # - +quote_char+: Specifies the quote character; used to quote fields.
412
+ # - +write_headers+: Specifies whether headers are to be written.
413
+ # - +force_quotes+: Specifies whether each output field is to be quoted.
414
+ # - +quote_empty+: Specifies whether each empty output field is to be quoted.
415
+ # - +write_converters+: Specifies the field converters to be used in writing.
416
+ # - +write_nil_value+: Specifies the object that is to be substituted for each +nil+-valued field.
417
+ # - +write_empty_value+: Specifies the object that is to be substituted for each empty field.
248
418
  #
249
- # :include: ../doc/write_headers.rdoc
419
+ # :include: ../doc/csv/options/common/row_sep.rdoc
250
420
  #
251
- # :include: ../doc/force_quotes.rdoc
421
+ # :include: ../doc/csv/options/common/col_sep.rdoc
252
422
  #
253
- # :include: ../doc/quote_empty.rdoc
423
+ # :include: ../doc/csv/options/common/quote_char.rdoc
254
424
  #
255
- # :include: ../doc/write_converters.rdoc
425
+ # :include: ../doc/csv/options/generating/write_headers.rdoc
256
426
  #
257
- # :include: ../doc/write_nil_value.rdoc
427
+ # :include: ../doc/csv/options/generating/force_quotes.rdoc
258
428
  #
259
- # :include: ../doc/write_empty_value.rdoc
429
+ # :include: ../doc/csv/options/generating/quote_empty.rdoc
260
430
  #
261
- # :include: ../doc/strip.rdoc
431
+ # :include: ../doc/csv/options/generating/write_converters.rdoc
262
432
  #
263
- # == CSV with headers
433
+ # :include: ../doc/csv/options/generating/write_nil_value.rdoc
434
+ #
435
+ # :include: ../doc/csv/options/generating/write_empty_value.rdoc
436
+ #
437
+ # === \CSV with Headers
264
438
  #
265
439
  # CSV allows to specify column names of CSV file, whether they are in data, or
266
440
  # provided separately. If headers are specified, reading methods return an instance
@@ -282,54 +456,188 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
282
456
  # data = CSV.parse('Bob,Engineering,1000', headers: %i[name department salary])
283
457
  # data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000">
284
458
  #
285
- # == \CSV \Converters
286
- #
287
- # By default, each field parsed by \CSV is formed into a \String.
288
- # You can use a _converter_ to convert certain fields into other Ruby objects.
289
- #
290
- # When you specify a converter for parsing,
291
- # each parsed field is passed to the converter;
292
- # its return value becomes the new value for the field.
459
+ # === \Converters
460
+ #
461
+ # By default, each value (field or header) parsed by \CSV is formed into a \String.
462
+ # You can use a _field_ _converter_ or _header_ _converter_
463
+ # to intercept and modify the parsed values:
464
+ # - See {Field Converters}[#class-CSV-label-Field+Converters].
465
+ # - See {Header Converters}[#class-CSV-label-Header+Converters].
466
+ #
467
+ # Also by default, each value to be written during generation is written 'as-is'.
468
+ # You can use a _write_ _converter_ to modify values before writing.
469
+ # - See {Write Converters}[#class-CSV-label-Write+Converters].
470
+ #
471
+ # ==== Specifying \Converters
472
+ #
473
+ # You can specify converters for parsing or generating in the +options+
474
+ # argument to various \CSV methods:
475
+ # - Option +converters+ for converting parsed field values.
476
+ # - Option +header_converters+ for converting parsed header values.
477
+ # - Option +write_converters+ for converting values to be written (generated).
478
+ #
479
+ # There are three forms for specifying converters:
480
+ # - A converter proc: executable code to be used for conversion.
481
+ # - A converter name: the name of a stored converter.
482
+ # - A converter list: an array of converter procs, converter names, and converter lists.
483
+ #
484
+ # ===== Converter Procs
485
+ #
486
+ # This converter proc, +strip_converter+, accepts a value +field+
487
+ # and returns <tt>field.strip</tt>:
488
+ # strip_converter = proc {|field| field.strip }
489
+ # In this call to <tt>CSV.parse</tt>,
490
+ # the keyword argument <tt>converters: string_converter</tt>
491
+ # specifies that:
492
+ # - \Proc +string_converter+ is to be called for each parsed field.
493
+ # - The converter's return value is to replace the +field+ value.
494
+ # Example:
495
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
496
+ # array = CSV.parse(string, converters: strip_converter)
497
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
498
+ #
499
+ # A converter proc can receive a second argument, +field_info+,
500
+ # that contains details about the field.
501
+ # This modified +strip_converter+ displays its arguments:
502
+ # strip_converter = proc do |field, field_info|
503
+ # p [field, field_info]
504
+ # field.strip
505
+ # end
506
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
507
+ # array = CSV.parse(string, converters: strip_converter)
508
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
509
+ # Output:
510
+ # [" foo ", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
511
+ # [" 0 ", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
512
+ # [" bar ", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
513
+ # [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
514
+ # [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
515
+ # [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
516
+ # Each CSV::Info object shows:
517
+ # - The 0-based field index.
518
+ # - The 1-based line index.
519
+ # - The field header, if any.
520
+ #
521
+ # ===== Stored \Converters
522
+ #
523
+ # A converter may be given a name and stored in a structure where
524
+ # the parsing methods can find it by name.
525
+ #
526
+ # The storage structure for field converters is the \Hash CSV::Converters.
527
+ # It has several built-in converter procs:
528
+ # - <tt>:integer</tt>: converts each \String-embedded integer into a true \Integer.
529
+ # - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
530
+ # - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
531
+ # - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
532
+ # .
533
+ # This example creates a converter proc, then stores it:
534
+ # strip_converter = proc {|field| field.strip }
535
+ # CSV::Converters[:strip] = strip_converter
536
+ # Then the parsing method call can refer to the converter
537
+ # by its name, <tt>:strip</tt>:
538
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
539
+ # array = CSV.parse(string, converters: :strip)
540
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
541
+ #
542
+ # The storage structure for header converters is the \Hash CSV::HeaderConverters,
543
+ # which works in the same way.
544
+ # It also has built-in converter procs:
545
+ # - <tt>:downcase</tt>: Downcases each header.
546
+ # - <tt>:symbol</tt>: Converts each header to a \Symbol.
547
+ #
548
+ # There is no such storage structure for write headers.
549
+ #
550
+ # ===== Converter Lists
551
+ #
552
+ # A _converter_ _list_ is an \Array that may include any assortment of:
553
+ # - Converter procs.
554
+ # - Names of stored converters.
555
+ # - Nested converter lists.
556
+ #
557
+ # Examples:
558
+ # numeric_converters = [:integer, :float]
559
+ # date_converters = [:date, :date_time]
560
+ # [numeric_converters, strip_converter]
561
+ # [strip_converter, date_converters, :float]
562
+ #
563
+ # Like a converter proc, a converter list may be named and stored in either
564
+ # \CSV::Converters or CSV::HeaderConverters:
565
+ # CSV::Converters[:custom] = [strip_converter, date_converters, :float]
566
+ # CSV::HeaderConverters[:custom] = [:downcase, :symbol]
567
+ #
568
+ # There are two built-in converter lists:
569
+ # CSV::Converters[:numeric] # => [:integer, :float]
570
+ # CSV::Converters[:all] # => [:date_time, :numeric]
571
+ #
572
+ # ==== Field \Converters
573
+ #
574
+ # With no conversion, all parsed fields in all rows become Strings:
575
+ # string = "foo,0\nbar,1\nbaz,2\n"
576
+ # ary = CSV.parse(string)
577
+ # ary # => # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
578
+ #
579
+ # When you specify a field converter, each parsed field is passed to the converter;
580
+ # its return value becomes the stored value for the field.
293
581
  # A converter might, for example, convert an integer embedded in a \String
294
582
  # into a true \Integer.
295
583
  # (In fact, that's what built-in field converter +:integer+ does.)
296
584
  #
297
- # There are additional built-in \converters, and custom \converters are also supported.
298
- #
299
- # All \converters try to transcode fields to UTF-8 before converting.
300
- # The conversion will fail if the data cannot be transcoded, leaving the field unchanged.
301
- #
302
- # === Field \Converters
303
- #
304
- # There are three ways to use field \converters;
305
- # these examples use built-in field converter +:integer+,
306
- # which converts each parsed integer string to a true \Integer.
307
- #
308
- # Option +converters+ with a singleton parsing method:
309
- # ary = CSV.parse_line('0,1,2', converters: :integer)
310
- # ary # => [0, 1, 2]
311
- #
312
- # Option +converters+ with a new \CSV instance:
313
- # csv = CSV.new('0,1,2', converters: :integer)
314
- # # Field converters in effect:
315
- # csv.converters # => [:integer]
316
- # csv.shift # => [0, 1, 2]
317
- #
318
- # Method #convert adds a field converter to a \CSV instance:
319
- # csv = CSV.new('0,1,2')
585
+ # There are three ways to use field \converters.
586
+ #
587
+ # - Using option {converters}[#class-CSV-label-Option+converters] with a parsing method:
588
+ # ary = CSV.parse(string, converters: :integer)
589
+ # ary # => [0, 1, 2] # => [["foo", 0], ["bar", 1], ["baz", 2]]
590
+ # - Using option {converters}[#class-CSV-label-Option+converters] with a new \CSV instance:
591
+ # csv = CSV.new(string, converters: :integer)
592
+ # # Field converters in effect:
593
+ # csv.converters # => [:integer]
594
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
595
+ # - Using method #convert to add a field converter to a \CSV instance:
596
+ # csv = CSV.new(string)
597
+ # # Add a converter.
598
+ # csv.convert(:integer)
599
+ # csv.converters # => [:integer]
600
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
601
+ #
602
+ # Installing a field converter does not affect already-read rows:
603
+ # csv = CSV.new(string)
604
+ # csv.shift # => ["foo", "0"]
320
605
  # # Add a converter.
321
606
  # csv.convert(:integer)
322
607
  # csv.converters # => [:integer]
323
- # csv.shift # => [0, 1, 2]
324
- #
325
- # ---
608
+ # csv.read # => [["bar", 1], ["baz", 2]]
326
609
  #
327
- # The built-in field \converters are in \Hash CSV::Converters.
328
- # The \Symbol keys there are the names of the \converters:
329
- #
330
- # CSV::Converters.keys # => [:integer, :float, :numeric, :date, :date_time, :all]
610
+ # There are additional built-in \converters, and custom \converters are also supported.
331
611
  #
332
- # Converter +:integer+ converts each field that +Integer()+ accepts:
612
+ # ===== Built-In Field \Converters
613
+ #
614
+ # The built-in field converters are in \Hash CSV::Converters:
615
+ # - Each key is a field converter name.
616
+ # - Each value is one of:
617
+ # - A \Proc field converter.
618
+ # - An \Array of field converter names.
619
+ #
620
+ # Display:
621
+ # CSV::Converters.each_pair do |name, value|
622
+ # if value.kind_of?(Proc)
623
+ # p [name, value.class]
624
+ # else
625
+ # p [name, value]
626
+ # end
627
+ # end
628
+ # Output:
629
+ # [:integer, Proc]
630
+ # [:float, Proc]
631
+ # [:numeric, [:integer, :float]]
632
+ # [:date, Proc]
633
+ # [:date_time, Proc]
634
+ # [:all, [:date_time, :numeric]]
635
+ #
636
+ # Each of these converters transcodes values to UTF-8 before attempting conversion.
637
+ # If a value cannot be transcoded to UTF-8 the conversion will
638
+ # fail and the value will remain unconverted.
639
+ #
640
+ # Converter +:integer+ converts each field that Integer() accepts:
333
641
  # data = '0,1,2,x'
334
642
  # # Without the converter
335
643
  # csv = CSV.parse_line(data)
@@ -338,7 +646,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
338
646
  # csv = CSV.parse_line(data, converters: :integer)
339
647
  # csv # => [0, 1, 2, "x"]
340
648
  #
341
- # Converter +:float+ converts each field that +Float()+ accepts:
649
+ # Converter +:float+ converts each field that Float() accepts:
342
650
  # data = '1.0,3.14159,x'
343
651
  # # Without the converter
344
652
  # csv = CSV.parse_line(data)
@@ -349,7 +657,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
349
657
  #
350
658
  # Converter +:numeric+ converts with both +:integer+ and +:float+..
351
659
  #
352
- # Converter +:date+ converts each field that +Date::parse()+ accepts:
660
+ # Converter +:date+ converts each field that Date::parse accepts:
353
661
  # data = '2001-02-03,x'
354
662
  # # Without the converter
355
663
  # csv = CSV.parse_line(data)
@@ -358,7 +666,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
358
666
  # csv = CSV.parse_line(data, converters: :date)
359
667
  # csv # => [#<Date: 2001-02-03 ((2451944j,0s,0n),+0s,2299161j)>, "x"]
360
668
  #
361
- # Converter +:date_time+ converts each field that +DateTime::parse() accepts:
669
+ # Converter +:date_time+ converts each field that DateTime::parse accepts:
362
670
  # data = '2020-05-07T14:59:00-05:00,x'
363
671
  # # Without the converter
364
672
  # csv = CSV.parse_line(data)
@@ -378,63 +686,64 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
378
686
  # csv.convert(:date)
379
687
  # csv.converters # => [:integer, :date]
380
688
  #
381
- # You can add a custom field converter to \Hash CSV::Converters:
382
- # strip_converter = proc {|field| field.strip}
383
- # CSV::Converters[:strip] = strip_converter
384
- # CSV::Converters.keys # => [:integer, :float, :numeric, :date, :date_time, :all, :strip]
385
- #
386
- # Then use it to convert fields:
387
- # str = ' foo , 0 '
388
- # ary = CSV.parse_line(str, converters: :strip)
389
- # ary # => ["foo", "0"]
689
+ # ===== Custom Field \Converters
390
690
  #
391
- # See {Custom Converters}[#class-CSV-label-Custom+Converters].
691
+ # You can define a custom field converter:
692
+ # strip_converter = proc {|field| field.strip }
693
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
694
+ # array = CSV.parse(string, converters: strip_converter)
695
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
696
+ # You can register the converter in \Converters \Hash,
697
+ # which allows you to refer to it by name:
698
+ # CSV::Converters[:strip] = strip_converter
699
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
700
+ # array = CSV.parse(string, converters: :strip)
701
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
392
702
  #
393
- # === Header \Converters
703
+ # ==== Header \Converters
394
704
  #
395
705
  # Header converters operate only on headers (and not on other rows).
396
706
  #
397
707
  # There are three ways to use header \converters;
398
- # these examples use built-in header converter +:dowhcase+,
708
+ # these examples use built-in header converter +:downcase+,
399
709
  # which downcases each parsed header.
400
710
  #
401
- # Option +header_converters+ with a singleton parsing method:
402
- # str = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
403
- # tbl = CSV.parse(str, headers: true, header_converters: :downcase)
404
- # tbl.class # => CSV::Table
405
- # tbl.headers # => ["name", "count"]
406
- #
407
- # Option +header_converters+ with a new \CSV instance:
408
- # csv = CSV.new(str, header_converters: :downcase)
409
- # # Header converters in effect:
410
- # csv.header_converters # => [:downcase]
411
- # tbl = CSV.parse(str, headers: true)
412
- # tbl.headers # => ["Name", "Count"]
413
- #
414
- # Method #header_convert adds a header converter to a \CSV instance:
415
- # csv = CSV.new(str)
416
- # # Add a header converter.
417
- # csv.header_convert(:downcase)
418
- # csv.header_converters # => [:downcase]
419
- # tbl = CSV.parse(str, headers: true)
420
- # tbl.headers # => ["Name", "Count"]
421
- #
422
- # ---
423
- #
424
- # The built-in header \converters are in \Hash CSV::Converters.
425
- # The \Symbol keys there are the names of the \converters:
426
- #
711
+ # - Option +header_converters+ with a singleton parsing method:
712
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
713
+ # tbl = CSV.parse(string, headers: true, header_converters: :downcase)
714
+ # tbl.class # => CSV::Table
715
+ # tbl.headers # => ["name", "count"]
716
+ #
717
+ # - Option +header_converters+ with a new \CSV instance:
718
+ # csv = CSV.new(string, header_converters: :downcase)
719
+ # # Header converters in effect:
720
+ # csv.header_converters # => [:downcase]
721
+ # tbl = CSV.parse(string, headers: true)
722
+ # tbl.headers # => ["Name", "Count"]
723
+ #
724
+ # - Method #header_convert adds a header converter to a \CSV instance:
725
+ # csv = CSV.new(string)
726
+ # # Add a header converter.
727
+ # csv.header_convert(:downcase)
728
+ # csv.header_converters # => [:downcase]
729
+ # tbl = CSV.parse(string, headers: true)
730
+ # tbl.headers # => ["Name", "Count"]
731
+ #
732
+ # ===== Built-In Header \Converters
733
+ #
734
+ # The built-in header \converters are in \Hash CSV::HeaderConverters.
735
+ # The keys there are the names of the \converters:
427
736
  # CSV::HeaderConverters.keys # => [:downcase, :symbol]
428
737
  #
429
738
  # Converter +:downcase+ converts each header by downcasing it:
430
- # str = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
431
- # tbl = CSV.parse(str, headers: true, header_converters: :downcase)
739
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
740
+ # tbl = CSV.parse(string, headers: true, header_converters: :downcase)
432
741
  # tbl.class # => CSV::Table
433
742
  # tbl.headers # => ["name", "count"]
434
743
  #
435
- # Converter +:symbol+ by making it into a \Symbol:
436
- # str = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
437
- # tbl = CSV.parse(str, headers: true, header_converters: :symbol)
744
+ # Converter +:symbol+ converts each header by making it into a \Symbol:
745
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
746
+ # tbl = CSV.parse(string, headers: true, header_converters: :symbol)
438
747
  # tbl.headers # => [:name, :count]
439
748
  # Details:
440
749
  # - Strips leading and trailing whitespace.
@@ -443,46 +752,47 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
443
752
  # - Removes non-word characters.
444
753
  # - Makes the string into a \Symbol.
445
754
  #
446
- # You can add a custom header converter to \Hash CSV::HeaderConverters:
447
- # strip_converter = proc {|field| field.strip}
448
- # CSV::HeaderConverters[:strip] = strip_converter
449
- # CSV::HeaderConverters.keys # => [:downcase, :symbol, :strip]
450
- #
451
- # Then use it to convert headers:
452
- # str = " Name , Value \nfoo,0\nbar,1\nbaz,2"
453
- # tbl = CSV.parse(str, headers: true, header_converters: :strip)
454
- # tbl.headers # => ["Name", "Value"]
455
- #
456
- # See {Custom Converters}[#class-CSV-label-Custom+Converters].
457
- #
458
- # === Custom \Converters
459
- #
460
- # You can define custom \converters.
461
- #
462
- # The \converter is a \Proc that is called with two arguments,
463
- # \String +field+ and CSV::FieldInfo +field_info+;
464
- # it returns a \String that will become the field value:
465
- # converter = proc {|field, field_info| <some_string> }
466
- #
467
- # To illustrate:
468
- # converter = proc {|field, field_info| p [field, field_info]; field}
469
- # ary = CSV.parse_line('foo,0', converters: converter)
470
- #
471
- # Produces:
472
- # ["foo", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
473
- # ["0", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
474
- #
475
- # In each of the output lines:
476
- # - The first \Array element is the passed \String field.
477
- # - The second is a \FieldInfo structure containing information about the field:
478
- # - The 0-based column index.
479
- # - The 1-based line number.
480
- # - The header for the column, if available.
755
+ # ===== Custom Header \Converters
756
+ #
757
+ # You can define a custom header converter:
758
+ # upcase_converter = proc {|header| header.upcase }
759
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
760
+ # table = CSV.parse(string, headers: true, header_converters: upcase_converter)
761
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
762
+ # table.headers # => ["NAME", "VALUE"]
763
+ # You can register the converter in \HeaderConverters \Hash,
764
+ # which allows you to refer to it by name:
765
+ # CSV::HeaderConverters[:upcase] = upcase_converter
766
+ # table = CSV.parse(string, headers: true, header_converters: :upcase)
767
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
768
+ # table.headers # => ["NAME", "VALUE"]
769
+ #
770
+ # ===== Write \Converters
771
+ #
772
+ # When you specify a write converter for generating \CSV,
773
+ # each field to be written is passed to the converter;
774
+ # its return value becomes the new value for the field.
775
+ # A converter might, for example, strip whitespace from a field.
481
776
  #
482
- # If the \converter does not need +field_info+, it can be omitted:
483
- # converter = proc {|field| ... }
777
+ # Using no write converter (all fields unmodified):
778
+ # output_string = CSV.generate do |csv|
779
+ # csv << [' foo ', 0]
780
+ # csv << [' bar ', 1]
781
+ # csv << [' baz ', 2]
782
+ # end
783
+ # output_string # => " foo ,0\n bar ,1\n baz ,2\n"
784
+ # Using option +write_converters+ with two custom write converters:
785
+ # strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field }
786
+ # upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field }
787
+ # write_converters = [strip_converter, upcase_converter]
788
+ # output_string = CSV.generate(write_converters: write_converters) do |csv|
789
+ # csv << [' foo ', 0]
790
+ # csv << [' bar ', 1]
791
+ # csv << [' baz ', 2]
792
+ # end
793
+ # output_string # => "FOO,0\nBAR,1\nBAZ,2\n"
484
794
  #
485
- # == CSV and Character Encodings (M17n or Multilingualization)
795
+ # === Character Encodings (M17n or Multilingualization)
486
796
  #
487
797
  # This new CSV parser is m17n savvy. The parser works in the Encoding of the IO
488
798
  # or String object being read from or written to. Your data is never transcoded
@@ -563,30 +873,12 @@ class CSV
563
873
  # The encoding used by all converters.
564
874
  ConverterEncoding = Encoding.find("UTF-8")
565
875
 
876
+ # A \Hash containing the names and \Procs for the built-in field converters.
877
+ # See {Built-In Field Converters}[#class-CSV-label-Built-In+Field+Converters].
566
878
  #
567
- # This Hash holds the built-in converters of CSV that can be accessed by name.
568
- # You can select Converters with CSV.convert() or through the +options+ Hash
569
- # passed to CSV::new().
570
- #
571
- # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
572
- # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
573
- # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
574
- # and <tt>:float</tt>.
575
- # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
576
- # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
577
- # <b><tt>:all</tt></b>:: All built-in converters. A combination of
578
- # <tt>:date_time</tt> and <tt>:numeric</tt>.
579
- #
580
- # All built-in converters transcode field data to UTF-8 before attempting a
581
- # conversion. If your data cannot be transcoded to UTF-8 the conversion will
582
- # fail and the field will remain unchanged.
583
- #
584
- # This Hash is intentionally left unfrozen and users should feel free to add
585
- # values to it that can be accessed by all CSV objects.
586
- #
587
- # To add a combo field, the value should be an Array of names. Combo fields
588
- # can be nested with other combo fields.
589
- #
879
+ # This \Hash is intentionally left unfrozen, and may be extended with
880
+ # custom field converters.
881
+ # See {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters].
590
882
  Converters = {
591
883
  integer: lambda { |f|
592
884
  Integer(f.encode(ConverterEncoding)) rescue f
@@ -614,27 +906,12 @@ class CSV
614
906
  all: [:date_time, :numeric],
615
907
  }
616
908
 
909
+ # A \Hash containing the names and \Procs for the built-in header converters.
910
+ # See {Built-In Header Converters}[#class-CSV-label-Built-In+Header+Converters].
617
911
  #
618
- # This Hash holds the built-in header converters of CSV that can be accessed
619
- # by name. You can select HeaderConverters with CSV.header_convert() or
620
- # through the +options+ Hash passed to CSV::new().
621
- #
622
- # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
623
- # <b><tt>:symbol</tt></b>:: Leading/trailing spaces are dropped, string is
624
- # downcased, remaining spaces are replaced with
625
- # underscores, non-word characters are dropped,
626
- # and finally to_sym() is called.
627
- #
628
- # All built-in header converters transcode header data to UTF-8 before
629
- # attempting a conversion. If your data cannot be transcoded to UTF-8 the
630
- # conversion will fail and the header will remain unchanged.
631
- #
632
- # This Hash is intentionally left unfrozen and users should feel free to add
633
- # values to it that can be accessed by all CSV objects.
634
- #
635
- # To add a combo field, the value should be an Array of names. Combo fields
636
- # can be nested with other combo fields.
637
- #
912
+ # This \Hash is intentionally left unfrozen, and may be extended with
913
+ # custom field converters.
914
+ # See {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters].
638
915
  HeaderConverters = {
639
916
  downcase: lambda { |h| h.encode(ConverterEncoding).downcase },
640
917
  symbol: lambda { |h|
@@ -671,18 +948,47 @@ class CSV
671
948
  }.freeze
672
949
 
673
950
  class << self
951
+ # :call-seq:
952
+ # instance(string, **options)
953
+ # instance(io = $stdout, **options)
954
+ # instance(string, **options) {|csv| ... }
955
+ # instance(io = $stdout, **options) {|csv| ... }
956
+ #
957
+ # Creates or retrieves cached \CSV objects.
958
+ # For arguments and options, see CSV.new.
959
+ #
960
+ # ---
961
+ #
962
+ # With no block given, returns a \CSV object.
674
963
  #
675
- # This method will return a CSV instance, just like CSV::new(), but the
676
- # instance will be cached and returned for all future calls to this method for
677
- # the same +data+ object (tested by Object#object_id()) with the same
678
- # +options+.
964
+ # The first call to +instance+ creates and caches a \CSV object:
965
+ # s0 = 's0'
966
+ # csv0 = CSV.instance(s0)
967
+ # csv0.class # => CSV
679
968
  #
680
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
681
- # and {Options for Generating}[#class-CSV-label-Options+for+Generating].
969
+ # Subsequent calls to +instance+ with that _same_ +string+ or +io+
970
+ # retrieve that same cached object:
971
+ # csv1 = CSV.instance(s0)
972
+ # csv1.class # => CSV
973
+ # csv1.equal?(csv0) # => true # Same CSV object
682
974
  #
683
- # If a block is given, the instance is passed to the block and the return
684
- # value becomes the return value of the block.
975
+ # A subsequent call to +instance+ with a _different_ +string+ or +io+
976
+ # creates and caches a _different_ \CSV object.
977
+ # s1 = 's1'
978
+ # csv2 = CSV.instance(s1)
979
+ # csv2.equal?(csv0) # => false # Different CSV object
685
980
  #
981
+ # All the cached objects remains available:
982
+ # csv3 = CSV.instance(s0)
983
+ # csv3.equal?(csv0) # true # Same CSV object
984
+ # csv4 = CSV.instance(s1)
985
+ # csv4.equal?(csv2) # true # Same CSV object
986
+ #
987
+ # ---
988
+ #
989
+ # When a block is given, calls the block with the created or retrieved
990
+ # \CSV object; returns the block's return value:
991
+ # CSV.instance(s0) {|csv| :foo } # => :foo
686
992
  def instance(data = $stdout, **options)
687
993
  # create a _signature_ for this method call, data object and options
688
994
  sig = [data.object_id] +
@@ -699,33 +1005,186 @@ class CSV
699
1005
  end
700
1006
  end
701
1007
 
702
- #
703
1008
  # :call-seq:
704
- # filter( **options ) { |row| ... }
705
- # filter( input, **options ) { |row| ... }
706
- # filter( input, output, **options ) { |row| ... }
1009
+ # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
1010
+ # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
1011
+ # filter(**options) {|row| ... } -> array_of_arrays or csv_table
1012
+ #
1013
+ # - Parses \CSV from a source (\String, \IO stream, or ARGF).
1014
+ # - Calls the given block with each parsed row:
1015
+ # - Without headers, each row is an \Array.
1016
+ # - With headers, each row is a CSV::Row.
1017
+ # - Generates \CSV to an output (\String, \IO stream, or STDOUT).
1018
+ # - Returns the parsed source:
1019
+ # - Without headers, an \Array of \Arrays.
1020
+ # - With headers, a CSV::Table.
1021
+ #
1022
+ # When +in_string_or_io+ is given, but not +out_string_or_io+,
1023
+ # parses from the given +in_string_or_io+
1024
+ # and generates to STDOUT.
1025
+ #
1026
+ # \String input without headers:
1027
+ #
1028
+ # in_string = "foo,0\nbar,1\nbaz,2"
1029
+ # CSV.filter(in_string) do |row|
1030
+ # row[0].upcase!
1031
+ # row[1] = - row[1].to_i
1032
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1033
+ #
1034
+ # Output (to STDOUT):
1035
+ #
1036
+ # FOO,0
1037
+ # BAR,-1
1038
+ # BAZ,-2
1039
+ #
1040
+ # \String input with headers:
1041
+ #
1042
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1043
+ # CSV.filter(in_string, headers: true) do |row|
1044
+ # row[0].upcase!
1045
+ # row[1] = - row[1].to_i
1046
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1047
+ #
1048
+ # Output (to STDOUT):
1049
+ #
1050
+ # Name,Value
1051
+ # FOO,0
1052
+ # BAR,-1
1053
+ # BAZ,-2
1054
+ #
1055
+ # \IO stream input without headers:
1056
+ #
1057
+ # File.write('t.csv', "foo,0\nbar,1\nbaz,2")
1058
+ # File.open('t.csv') do |in_io|
1059
+ # CSV.filter(in_io) do |row|
1060
+ # row[0].upcase!
1061
+ # row[1] = - row[1].to_i
1062
+ # end
1063
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1064
+ #
1065
+ # Output (to STDOUT):
1066
+ #
1067
+ # FOO,0
1068
+ # BAR,-1
1069
+ # BAZ,-2
1070
+ #
1071
+ # \IO stream input with headers:
1072
+ #
1073
+ # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
1074
+ # File.open('t.csv') do |in_io|
1075
+ # CSV.filter(in_io, headers: true) do |row|
1076
+ # row[0].upcase!
1077
+ # row[1] = - row[1].to_i
1078
+ # end
1079
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1080
+ #
1081
+ # Output (to STDOUT):
1082
+ #
1083
+ # Name,Value
1084
+ # FOO,0
1085
+ # BAR,-1
1086
+ # BAZ,-2
1087
+ #
1088
+ # When both +in_string_or_io+ and +out_string_or_io+ are given,
1089
+ # parses from +in_string_or_io+ and generates to +out_string_or_io+.
1090
+ #
1091
+ # \String output without headers:
1092
+ #
1093
+ # in_string = "foo,0\nbar,1\nbaz,2"
1094
+ # out_string = ''
1095
+ # CSV.filter(in_string, out_string) do |row|
1096
+ # row[0].upcase!
1097
+ # row[1] = - row[1].to_i
1098
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1099
+ # out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
1100
+ #
1101
+ # \String output with headers:
1102
+ #
1103
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1104
+ # out_string = ''
1105
+ # CSV.filter(in_string, out_string, headers: true) do |row|
1106
+ # row[0].upcase!
1107
+ # row[1] = - row[1].to_i
1108
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1109
+ # out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
1110
+ #
1111
+ # \IO stream output without headers:
1112
+ #
1113
+ # in_string = "foo,0\nbar,1\nbaz,2"
1114
+ # File.open('t.csv', 'w') do |out_io|
1115
+ # CSV.filter(in_string, out_io) do |row|
1116
+ # row[0].upcase!
1117
+ # row[1] = - row[1].to_i
1118
+ # end
1119
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1120
+ # File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
1121
+ #
1122
+ # \IO stream output with headers:
1123
+ #
1124
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1125
+ # File.open('t.csv', 'w') do |out_io|
1126
+ # CSV.filter(in_string, out_io, headers: true) do |row|
1127
+ # row[0].upcase!
1128
+ # row[1] = - row[1].to_i
1129
+ # end
1130
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1131
+ # File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
1132
+ #
1133
+ # When neither +in_string_or_io+ nor +out_string_or_io+ given,
1134
+ # parses from {ARGF}[https://docs.ruby-lang.org/en/master/ARGF.html]
1135
+ # and generates to STDOUT.
1136
+ #
1137
+ # Without headers:
707
1138
  #
708
- # This method is a convenience for building Unix-like filters for CSV data.
709
- # Each row is yielded to the provided block which can alter it as needed.
710
- # After the block returns, the row is appended to +output+ altered or not.
1139
+ # # Put Ruby code into a file.
1140
+ # ruby = <<-EOT
1141
+ # require 'csv'
1142
+ # CSV.filter do |row|
1143
+ # row[0].upcase!
1144
+ # row[1] = - row[1].to_i
1145
+ # end
1146
+ # EOT
1147
+ # File.write('t.rb', ruby)
1148
+ # # Put some CSV into a file.
1149
+ # File.write('t.csv', "foo,0\nbar,1\nbaz,2")
1150
+ # # Run the Ruby code with CSV filename as argument.
1151
+ # system(Gem.ruby, "t.rb", "t.csv")
711
1152
  #
712
- # The +input+ and +output+ arguments can be anything CSV::new() accepts
713
- # (generally String or IO objects). If not given, they default to
714
- # <tt>ARGF</tt> and <tt>$stdout</tt>.
1153
+ # Output (to STDOUT):
715
1154
  #
716
- # The +options+ parameter is also filtered down to CSV::new() after some
717
- # clever key parsing. Any key beginning with <tt>:in_</tt> or
718
- # <tt>:input_</tt> will have that leading identifier stripped and will only
719
- # be used in the +options+ Hash for the +input+ object. Keys starting with
720
- # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
721
- # are assigned to both objects.
1155
+ # FOO,0
1156
+ # BAR,-1
1157
+ # BAZ,-2
722
1158
  #
723
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
724
- # and {Options for Generating}[#class-CSV-label-Options+for+Generating].
1159
+ # With headers:
725
1160
  #
726
- # The <tt>:output_row_sep</tt> +option+ defaults to
727
- # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1161
+ # # Put Ruby code into a file.
1162
+ # ruby = <<-EOT
1163
+ # require 'csv'
1164
+ # CSV.filter(headers: true) do |row|
1165
+ # row[0].upcase!
1166
+ # row[1] = - row[1].to_i
1167
+ # end
1168
+ # EOT
1169
+ # File.write('t.rb', ruby)
1170
+ # # Put some CSV into a file.
1171
+ # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
1172
+ # # Run the Ruby code with CSV filename as argument.
1173
+ # system(Gem.ruby, "t.rb", "t.csv")
728
1174
  #
1175
+ # Output (to STDOUT):
1176
+ #
1177
+ # Name,Value
1178
+ # FOO,0
1179
+ # BAR,-1
1180
+ # BAZ,-2
1181
+ #
1182
+ # Arguments:
1183
+ #
1184
+ # * Argument +in_string_or_io+ must be a \String or an \IO stream.
1185
+ # * Argument +out_string_or_io+ must be a \String or an \IO stream.
1186
+ # * Arguments <tt>**options</tt> must be keyword options.
1187
+ # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
729
1188
  def filter(input=nil, output=nil, **options)
730
1189
  # parse options for input, output, or both
731
1190
  in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR}
@@ -740,10 +1199,29 @@ class CSV
740
1199
  out_options[key] = value
741
1200
  end
742
1201
  end
1202
+
743
1203
  # build input and output wrappers
744
- input = new(input || ARGF, **in_options)
1204
+ input = new(input || ARGF, **in_options)
745
1205
  output = new(output || $stdout, **out_options)
746
1206
 
1207
+ # process headers
1208
+ need_manual_header_output =
1209
+ (in_options[:headers] and
1210
+ out_options[:headers] == true and
1211
+ out_options[:write_headers])
1212
+ if need_manual_header_output
1213
+ first_row = input.shift
1214
+ if first_row
1215
+ if first_row.is_a?(Row)
1216
+ headers = first_row.headers
1217
+ yield headers
1218
+ output << headers
1219
+ end
1220
+ yield first_row
1221
+ output << first_row
1222
+ end
1223
+ end
1224
+
747
1225
  # read, yield, write
748
1226
  input.each do |row|
749
1227
  yield row
@@ -752,21 +1230,91 @@ class CSV
752
1230
  end
753
1231
 
754
1232
  #
755
- # This method is intended as the primary interface for reading CSV files. You
756
- # pass a +path+ and any +options+ you wish to set for the read. Each row of
757
- # file will be passed to the provided +block+ in turn.
1233
+ # :call-seq:
1234
+ # foreach(path_or_io, mode='r', **options) {|row| ... )
1235
+ # foreach(path_or_io, mode='r', **options) -> new_enumerator
1236
+ #
1237
+ # Calls the block with each row read from source +path_or_io+.
1238
+ #
1239
+ # \Path input without headers:
1240
+ #
1241
+ # string = "foo,0\nbar,1\nbaz,2\n"
1242
+ # in_path = 't.csv'
1243
+ # File.write(in_path, string)
1244
+ # CSV.foreach(in_path) {|row| p row }
1245
+ #
1246
+ # Output:
1247
+ #
1248
+ # ["foo", "0"]
1249
+ # ["bar", "1"]
1250
+ # ["baz", "2"]
1251
+ #
1252
+ # \Path input with headers:
1253
+ #
1254
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1255
+ # in_path = 't.csv'
1256
+ # File.write(in_path, string)
1257
+ # CSV.foreach(in_path, headers: true) {|row| p row }
1258
+ #
1259
+ # Output:
1260
+ #
1261
+ # <CSV::Row "Name":"foo" "Value":"0">
1262
+ # <CSV::Row "Name":"bar" "Value":"1">
1263
+ # <CSV::Row "Name":"baz" "Value":"2">
1264
+ #
1265
+ # \IO stream input without headers:
1266
+ #
1267
+ # string = "foo,0\nbar,1\nbaz,2\n"
1268
+ # path = 't.csv'
1269
+ # File.write(path, string)
1270
+ # File.open('t.csv') do |in_io|
1271
+ # CSV.foreach(in_io) {|row| p row }
1272
+ # end
758
1273
  #
759
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1274
+ # Output:
760
1275
  #
761
- # The +options+ parameter can be anything CSV::new() understands. This method
762
- # also understands an additional <tt>:encoding</tt> parameter that you can use
763
- # to specify the Encoding of the data in the file to be read. You must provide
764
- # this unless your data is in Encoding::default_external(). CSV will use this
765
- # to determine how to parse the data. You may provide a second Encoding to
766
- # have the data transcoded as it is read. For example,
767
- # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
768
- # but transcode it to UTF-8 before CSV parses it.
1276
+ # ["foo", "0"]
1277
+ # ["bar", "1"]
1278
+ # ["baz", "2"]
769
1279
  #
1280
+ # \IO stream input with headers:
1281
+ #
1282
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1283
+ # path = 't.csv'
1284
+ # File.write(path, string)
1285
+ # File.open('t.csv') do |in_io|
1286
+ # CSV.foreach(in_io, headers: true) {|row| p row }
1287
+ # end
1288
+ #
1289
+ # Output:
1290
+ #
1291
+ # <CSV::Row "Name":"foo" "Value":"0">
1292
+ # <CSV::Row "Name":"bar" "Value":"1">
1293
+ # <CSV::Row "Name":"baz" "Value":"2">
1294
+ #
1295
+ # With no block given, returns an \Enumerator:
1296
+ #
1297
+ # string = "foo,0\nbar,1\nbaz,2\n"
1298
+ # path = 't.csv'
1299
+ # File.write(path, string)
1300
+ # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
1301
+ #
1302
+ # Arguments:
1303
+ # * Argument +path_or_io+ must be a file path or an \IO stream.
1304
+ # * Argument +mode+, if given, must be a \File mode
1305
+ # See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode].
1306
+ # * Arguments <tt>**options</tt> must be keyword options.
1307
+ # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1308
+ # * This method optionally accepts an additional <tt>:encoding</tt> option
1309
+ # that you can use to specify the Encoding of the data read from +path+ or +io+.
1310
+ # You must provide this unless your data is in the encoding
1311
+ # given by <tt>Encoding::default_external</tt>.
1312
+ # Parsing will use this to determine how to parse the data.
1313
+ # You may provide a second Encoding to
1314
+ # have the data transcoded as it is read. For example,
1315
+ # encoding: 'UTF-32BE:UTF-8'
1316
+ # would read +UTF-32BE+ data from the file
1317
+ # but transcode it to +UTF-8+ before parsing.
770
1318
  def foreach(path, mode="r", **options, &block)
771
1319
  return to_enum(__method__, path, mode, **options) unless block_given?
772
1320
  open(path, mode, **options) do |csv|
@@ -776,23 +1324,63 @@ class CSV
776
1324
 
777
1325
  #
778
1326
  # :call-seq:
779
- # generate( str, **options ) { |csv| ... }
780
- # generate( **options ) { |csv| ... }
1327
+ # generate(csv_string, **options) {|csv| ... }
1328
+ # generate(**options) {|csv| ... }
1329
+ #
1330
+ # * Argument +csv_string+, if given, must be a \String object;
1331
+ # defaults to a new empty \String.
1332
+ # * Arguments +options+, if given, should be generating options.
1333
+ # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
781
1334
  #
782
- # This method wraps a String you provide, or an empty default String, in a
783
- # CSV object which is passed to the provided block. You can use the block to
784
- # append CSV rows to the String and when the block exits, the final String
785
- # will be returned.
1335
+ # ---
786
1336
  #
787
- # Note that a passed String *is* modified by this method. Call dup() before
788
- # passing if you need a new String.
1337
+ # Creates a new \CSV object via <tt>CSV.new(csv_string, **options)</tt>;
1338
+ # calls the block with the \CSV object, which the block may modify;
1339
+ # returns the \String generated from the \CSV object.
789
1340
  #
790
- # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
1341
+ # Note that a passed \String *is* modified by this method.
1342
+ # Pass <tt>csv_string</tt>.dup if the \String must be preserved.
791
1343
  #
792
1344
  # This method has one additional option: <tt>:encoding</tt>,
793
1345
  # which sets the base Encoding for the output if no no +str+ is specified.
794
1346
  # CSV needs this hint if you plan to output non-ASCII compatible data.
795
1347
  #
1348
+ # ---
1349
+ #
1350
+ # Add lines:
1351
+ # input_string = "foo,0\nbar,1\nbaz,2\n"
1352
+ # output_string = CSV.generate(input_string) do |csv|
1353
+ # csv << ['bat', 3]
1354
+ # csv << ['bam', 4]
1355
+ # end
1356
+ # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1357
+ # input_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1358
+ # output_string.equal?(input_string) # => true # Same string, modified
1359
+ #
1360
+ # Add lines into new string, preserving old string:
1361
+ # input_string = "foo,0\nbar,1\nbaz,2\n"
1362
+ # output_string = CSV.generate(input_string.dup) do |csv|
1363
+ # csv << ['bat', 3]
1364
+ # csv << ['bam', 4]
1365
+ # end
1366
+ # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1367
+ # input_string # => "foo,0\nbar,1\nbaz,2\n"
1368
+ # output_string.equal?(input_string) # => false # Different strings
1369
+ #
1370
+ # Create lines from nothing:
1371
+ # output_string = CSV.generate do |csv|
1372
+ # csv << ['foo', 0]
1373
+ # csv << ['bar', 1]
1374
+ # csv << ['baz', 2]
1375
+ # end
1376
+ # output_string # => "foo,0\nbar,1\nbaz,2\n"
1377
+ #
1378
+ # ---
1379
+ #
1380
+ # Raises an exception if +csv_string+ is not a \String object:
1381
+ # # Raises TypeError (no implicit conversion of Integer into String)
1382
+ # CSV.generate(0)
1383
+ #
796
1384
  def generate(str=nil, **options)
797
1385
  encoding = options[:encoding]
798
1386
  # add a default empty String, if none was given
@@ -846,80 +1434,107 @@ class CSV
846
1434
  str = +""
847
1435
  if options[:encoding]
848
1436
  str.force_encoding(options[:encoding])
849
- elsif field = row.find {|f| f.is_a?(String)}
850
- str.force_encoding(field.encoding)
1437
+ else
1438
+ fallback_encoding = nil
1439
+ output_encoding = nil
1440
+ row.each do |field|
1441
+ next unless field.is_a?(String)
1442
+ fallback_encoding ||= field.encoding
1443
+ next if field.ascii_only?
1444
+ output_encoding = field.encoding
1445
+ break
1446
+ end
1447
+ output_encoding ||= fallback_encoding
1448
+ if output_encoding
1449
+ str.force_encoding(output_encoding)
1450
+ end
851
1451
  end
852
1452
  (new(str, **options) << row).string
853
1453
  end
854
1454
 
855
1455
  #
856
1456
  # :call-seq:
857
- # open( filename, mode = "rb", **options ) { |faster_csv| ... }
858
- # open( filename, **options ) { |faster_csv| ... }
859
- # open( filename, mode = "rb", **options )
860
- # open( filename, **options )
861
- #
862
- # This method opens an IO object, and wraps that with CSV. This is intended
863
- # as the primary interface for writing a CSV file.
864
- #
865
- # You must pass a +filename+ and may optionally add a +mode+ for Ruby's
866
- # open().
867
- #
868
- # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
869
- #
870
- # This method works like Ruby's open() call, in that it will pass a CSV object
871
- # to a provided block and close it when the block terminates, or it will
872
- # return the CSV object when no block is provided. (*Note*: This is different
873
- # from the Ruby 1.8 CSV library which passed rows to the block. Use
874
- # CSV::foreach() for that behavior.)
875
- #
876
- # You must provide a +mode+ with an embedded Encoding designator unless your
877
- # data is in Encoding::default_external(). CSV will check the Encoding of the
878
- # underlying IO object (set by the +mode+ you pass) to determine how to parse
879
- # the data. You may provide a second Encoding to have the data transcoded as
880
- # it is read just as you can with a normal call to IO::open(). For example,
881
- # <tt>"rb:UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file but
882
- # transcode it to UTF-8 before CSV parses it.
883
- #
884
- # An opened CSV object will delegate to many IO methods for convenience. You
885
- # may call:
886
- #
887
- # * binmode()
888
- # * binmode?()
889
- # * close()
890
- # * close_read()
891
- # * close_write()
892
- # * closed?()
893
- # * eof()
894
- # * eof?()
895
- # * external_encoding()
896
- # * fcntl()
897
- # * fileno()
898
- # * flock()
899
- # * flush()
900
- # * fsync()
901
- # * internal_encoding()
902
- # * ioctl()
903
- # * isatty()
904
- # * path()
905
- # * pid()
906
- # * pos()
907
- # * pos=()
908
- # * reopen()
909
- # * seek()
910
- # * stat()
911
- # * sync()
912
- # * sync=()
913
- # * tell()
914
- # * to_i()
915
- # * to_io()
916
- # * truncate()
917
- # * tty?()
1457
+ # open(file_path, mode = "rb", **options ) -> new_csv
1458
+ # open(io, mode = "rb", **options ) -> new_csv
1459
+ # open(file_path, mode = "rb", **options ) { |csv| ... } -> object
1460
+ # open(io, mode = "rb", **options ) { |csv| ... } -> object
1461
+ #
1462
+ # possible options elements:
1463
+ # keyword form:
1464
+ # :invalid => nil # raise error on invalid byte sequence (default)
1465
+ # :invalid => :replace # replace invalid byte sequence
1466
+ # :undef => :replace # replace undefined conversion
1467
+ # :replace => string # replacement string ("?" or "\uFFFD" if not specified)
1468
+ #
1469
+ # * Argument +path+, if given, must be the path to a file.
1470
+ # :include: ../doc/csv/arguments/io.rdoc
1471
+ # * Argument +mode+, if given, must be a \File mode
1472
+ # See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
1473
+ # * Arguments <tt>**options</tt> must be keyword options.
1474
+ # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
1475
+ # * This method optionally accepts an additional <tt>:encoding</tt> option
1476
+ # that you can use to specify the Encoding of the data read from +path+ or +io+.
1477
+ # You must provide this unless your data is in the encoding
1478
+ # given by <tt>Encoding::default_external</tt>.
1479
+ # Parsing will use this to determine how to parse the data.
1480
+ # You may provide a second Encoding to
1481
+ # have the data transcoded as it is read. For example,
1482
+ # encoding: 'UTF-32BE:UTF-8'
1483
+ # would read +UTF-32BE+ data from the file
1484
+ # but transcode it to +UTF-8+ before parsing.
1485
+ #
1486
+ # ---
1487
+ #
1488
+ # These examples assume prior execution of:
1489
+ # string = "foo,0\nbar,1\nbaz,2\n"
1490
+ # path = 't.csv'
1491
+ # File.write(path, string)
1492
+ #
1493
+ # ---
1494
+ #
1495
+ # With no block given, returns a new \CSV object.
1496
+ #
1497
+ # Create a \CSV object using a file path:
1498
+ # csv = CSV.open(path)
1499
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
918
1500
  #
1501
+ # Create a \CSV object using an open \File:
1502
+ # csv = CSV.open(File.open(path))
1503
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1504
+ #
1505
+ # ---
1506
+ #
1507
+ # With a block given, calls the block with the created \CSV object;
1508
+ # returns the block's return value:
1509
+ #
1510
+ # Using a file path:
1511
+ # csv = CSV.open(path) {|csv| p csv}
1512
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1513
+ # Output:
1514
+ # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1515
+ #
1516
+ # Using an open \File:
1517
+ # csv = CSV.open(File.open(path)) {|csv| p csv}
1518
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1519
+ # Output:
1520
+ # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1521
+ #
1522
+ # ---
1523
+ #
1524
+ # Raises an exception if the argument is not a \String object or \IO object:
1525
+ # # Raises TypeError (no implicit conversion of Symbol into String)
1526
+ # CSV.open(:foo)
919
1527
  def open(filename, mode="r", **options)
920
1528
  # wrap a File opened with the remaining +args+ with no newline
921
1529
  # decorator
922
- file_opts = {universal_newline: false}.merge(options)
1530
+ file_opts = options.dup
1531
+ unless file_opts.key?(:newline)
1532
+ file_opts[:universal_newline] ||= false
1533
+ end
1534
+ options.delete(:invalid)
1535
+ options.delete(:undef)
1536
+ options.delete(:replace)
1537
+ options.delete_if {|k, _| /newline\z/.match?(k)}
923
1538
 
924
1539
  begin
925
1540
  f = File.open(filename, mode, **file_opts)
@@ -950,16 +1565,116 @@ class CSV
950
1565
 
951
1566
  #
952
1567
  # :call-seq:
953
- # parse( str, **options ) { |row| ... }
954
- # parse( str, **options )
1568
+ # parse(string) -> array_of_arrays
1569
+ # parse(io) -> array_of_arrays
1570
+ # parse(string, headers: ..., **options) -> csv_table
1571
+ # parse(io, headers: ..., **options) -> csv_table
1572
+ # parse(string, **options) {|row| ... }
1573
+ # parse(io, **options) {|row| ... }
955
1574
  #
956
- # This method can be used to easily parse CSV out of a String. You may either
957
- # provide a +block+ which will be called with each row of the String in turn,
958
- # or just use the returned Array of Arrays (when no +block+ is given).
1575
+ # Parses +string+ or +io+ using the specified +options+.
959
1576
  #
960
- # You pass your +str+ to read from, and an optional +options+.
961
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1577
+ # - Argument +string+ should be a \String object;
1578
+ # it will be put into a new StringIO object positioned at the beginning.
1579
+ # :include: ../doc/csv/arguments/io.rdoc
1580
+ # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
962
1581
  #
1582
+ # ====== Without Option +headers+
1583
+ #
1584
+ # Without {option +headers+}[#class-CSV-label-Option+headers] case.
1585
+ #
1586
+ # These examples assume prior execution of:
1587
+ # string = "foo,0\nbar,1\nbaz,2\n"
1588
+ # path = 't.csv'
1589
+ # File.write(path, string)
1590
+ #
1591
+ # ---
1592
+ #
1593
+ # With no block given, returns an \Array of Arrays formed from the source.
1594
+ #
1595
+ # Parse a \String:
1596
+ # a_of_a = CSV.parse(string)
1597
+ # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
1598
+ #
1599
+ # Parse an open \File:
1600
+ # a_of_a = File.open(path) do |file|
1601
+ # CSV.parse(file)
1602
+ # end
1603
+ # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
1604
+ #
1605
+ # ---
1606
+ #
1607
+ # With a block given, calls the block with each parsed row:
1608
+ #
1609
+ # Parse a \String:
1610
+ # CSV.parse(string) {|row| p row }
1611
+ #
1612
+ # Output:
1613
+ # ["foo", "0"]
1614
+ # ["bar", "1"]
1615
+ # ["baz", "2"]
1616
+ #
1617
+ # Parse an open \File:
1618
+ # File.open(path) do |file|
1619
+ # CSV.parse(file) {|row| p row }
1620
+ # end
1621
+ #
1622
+ # Output:
1623
+ # ["foo", "0"]
1624
+ # ["bar", "1"]
1625
+ # ["baz", "2"]
1626
+ #
1627
+ # ====== With Option +headers+
1628
+ #
1629
+ # With {option +headers+}[#class-CSV-label-Option+headers] case.
1630
+ #
1631
+ # These examples assume prior execution of:
1632
+ # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1633
+ # path = 't.csv'
1634
+ # File.write(path, string)
1635
+ #
1636
+ # ---
1637
+ #
1638
+ # With no block given, returns a CSV::Table object formed from the source.
1639
+ #
1640
+ # Parse a \String:
1641
+ # csv_table = CSV.parse(string, headers: ['Name', 'Count'])
1642
+ # csv_table # => #<CSV::Table mode:col_or_row row_count:5>
1643
+ #
1644
+ # Parse an open \File:
1645
+ # csv_table = File.open(path) do |file|
1646
+ # CSV.parse(file, headers: ['Name', 'Count'])
1647
+ # end
1648
+ # csv_table # => #<CSV::Table mode:col_or_row row_count:4>
1649
+ #
1650
+ # ---
1651
+ #
1652
+ # With a block given, calls the block with each parsed row,
1653
+ # which has been formed into a CSV::Row object:
1654
+ #
1655
+ # Parse a \String:
1656
+ # CSV.parse(string, headers: ['Name', 'Count']) {|row| p row }
1657
+ #
1658
+ # Output:
1659
+ # # <CSV::Row "Name":"foo" "Count":"0">
1660
+ # # <CSV::Row "Name":"bar" "Count":"1">
1661
+ # # <CSV::Row "Name":"baz" "Count":"2">
1662
+ #
1663
+ # Parse an open \File:
1664
+ # File.open(path) do |file|
1665
+ # CSV.parse(file, headers: ['Name', 'Count']) {|row| p row }
1666
+ # end
1667
+ #
1668
+ # Output:
1669
+ # # <CSV::Row "Name":"foo" "Count":"0">
1670
+ # # <CSV::Row "Name":"bar" "Count":"1">
1671
+ # # <CSV::Row "Name":"baz" "Count":"2">
1672
+ #
1673
+ # ---
1674
+ #
1675
+ # Raises an exception if the argument is not a \String object or \IO object:
1676
+ # # Raises NoMethodError (undefined method `close' for :foo:Symbol)
1677
+ # CSV.parse(:foo)
963
1678
  def parse(str, **options, &block)
964
1679
  csv = new(str, **options)
965
1680
 
@@ -974,35 +1689,59 @@ class CSV
974
1689
  end
975
1690
 
976
1691
  # :call-seq:
977
- # CSV.parse_line(string)
978
- # CSV.parse_line(io)
979
- # CSV.parse_line(string, **options)
980
- # CSV.parse_line(io, **options)
1692
+ # CSV.parse_line(string) -> new_array or nil
1693
+ # CSV.parse_line(io) -> new_array or nil
1694
+ # CSV.parse_line(string, **options) -> new_array or nil
1695
+ # CSV.parse_line(io, **options) -> new_array or nil
1696
+ # CSV.parse_line(string, headers: true, **options) -> csv_row or nil
1697
+ # CSV.parse_line(io, headers: true, **options) -> csv_row or nil
981
1698
  #
982
- # Returns the new \Array created by parsing the first line of +string+ or +io+
1699
+ # Returns the data created by parsing the first line of +string+ or +io+
983
1700
  # using the specified +options+.
984
1701
  #
985
- # Argument +string+ should be a \String object;
986
- # it will be put into a new \StringIO object positioned at the beginning.
1702
+ # - Argument +string+ should be a \String object;
1703
+ # it will be put into a new StringIO object positioned at the beginning.
1704
+ # :include: ../doc/csv/arguments/io.rdoc
1705
+ # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
987
1706
  #
988
- # Argument +io+ should be an \IO object; it will be positioned at the beginning.
1707
+ # ====== Without Option +headers+
989
1708
  #
990
- # For +options+, see {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1709
+ # Without option +headers+, returns the first row as a new \Array.
991
1710
  #
992
- # ---
993
- # Returns data from the first line from a String object:
994
- # CSV.parse_line('foo,0') # => ["foo", "0"]
1711
+ # These examples assume prior execution of:
1712
+ # string = "foo,0\nbar,1\nbaz,2\n"
1713
+ # path = 't.csv'
1714
+ # File.write(path, string)
995
1715
  #
996
- # Returns data from the first line from a File object:
997
- # File.write('t.csv', 'foo,0')
998
- # CSV.parse_line(File.open('t.csv')) # => ["foo", "0"]
1716
+ # Parse the first line from a \String object:
1717
+ # CSV.parse_line(string) # => ["foo", "0"]
999
1718
  #
1000
- # Ignores lines after the first:
1001
- # CSV.parse_line("foo,0\nbar,1\nbaz,2") # => ["foo", "0"]
1719
+ # Parse the first line from a File object:
1720
+ # File.open(path) do |file|
1721
+ # CSV.parse_line(file) # => ["foo", "0"]
1722
+ # end # => ["foo", "0"]
1002
1723
  #
1003
1724
  # Returns +nil+ if the argument is an empty \String:
1004
1725
  # CSV.parse_line('') # => nil
1005
1726
  #
1727
+ # ====== With Option +headers+
1728
+ #
1729
+ # With {option +headers+}[#class-CSV-label-Option+headers],
1730
+ # returns the first row as a CSV::Row object.
1731
+ #
1732
+ # These examples assume prior execution of:
1733
+ # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1734
+ # path = 't.csv'
1735
+ # File.write(path, string)
1736
+ #
1737
+ # Parse the first line from a \String object:
1738
+ # CSV.parse_line(string, headers: true) # => #<CSV::Row "Name":"foo" "Count":"0">
1739
+ #
1740
+ # Parse the first line from a File object:
1741
+ # File.open(path) do |file|
1742
+ # CSV.parse_line(file, headers: true)
1743
+ # end # => #<CSV::Row "Name":"foo" "Count":"0">
1744
+ #
1006
1745
  # ---
1007
1746
  #
1008
1747
  # Raises an exception if the argument is +nil+:
@@ -1014,36 +1753,52 @@ class CSV
1014
1753
  end
1015
1754
 
1016
1755
  #
1017
- # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
1018
- # file and +options+.
1019
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1756
+ # :call-seq:
1757
+ # read(source, **options) -> array_of_arrays
1758
+ # read(source, headers: true, **options) -> csv_table
1759
+ #
1760
+ # Opens the given +source+ with the given +options+ (see CSV.open),
1761
+ # reads the source (see CSV#read), and returns the result,
1762
+ # which will be either an \Array of Arrays or a CSV::Table.
1020
1763
  #
1021
- # This method also understands
1022
- # an additional <tt>:encoding</tt> parameter that you can use to specify the
1023
- # Encoding of the data in the file to be read. You must provide this unless
1024
- # your data is in Encoding::default_external(). CSV will use this to determine
1025
- # how to parse the data. You may provide a second Encoding to have the data
1026
- # transcoded as it is read. For example,
1027
- # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
1028
- # but transcode it to UTF-8 before CSV parses it.
1764
+ # Without headers:
1765
+ # string = "foo,0\nbar,1\nbaz,2\n"
1766
+ # path = 't.csv'
1767
+ # File.write(path, string)
1768
+ # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
1029
1769
  #
1770
+ # With headers:
1771
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1772
+ # path = 't.csv'
1773
+ # File.write(path, string)
1774
+ # CSV.read(path, headers: true) # => #<CSV::Table mode:col_or_row row_count:4>
1030
1775
  def read(path, **options)
1031
1776
  open(path, **options) { |csv| csv.read }
1032
1777
  end
1033
1778
 
1034
- # Alias for CSV::read().
1779
+ # :call-seq:
1780
+ # CSV.readlines(source, **options)
1781
+ #
1782
+ # Alias for CSV.read.
1035
1783
  def readlines(path, **options)
1036
1784
  read(path, **options)
1037
1785
  end
1038
1786
 
1787
+ # :call-seq:
1788
+ # CSV.table(source, **options)
1039
1789
  #
1040
- # A shortcut for:
1790
+ # Calls CSV.read with +source+, +options+, and certain default options:
1791
+ # - +headers+: +true+
1792
+ # - +converters+: +:numeric+
1793
+ # - +header_converters+: +:symbol+
1041
1794
  #
1042
- # CSV.read( path, { headers: true,
1043
- # converters: :numeric,
1044
- # header_converters: :symbol }.merge(options) )
1795
+ # Returns a CSV::Table object.
1045
1796
  #
1046
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1797
+ # Example:
1798
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1799
+ # path = 't.csv'
1800
+ # File.write(path, string)
1801
+ # CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:4>
1047
1802
  def table(path, **options)
1048
1803
  default_options = {
1049
1804
  headers: true,
@@ -1064,23 +1819,17 @@ class CSV
1064
1819
  # Returns the new \CSV object created using +string+ or +io+
1065
1820
  # and the specified +options+.
1066
1821
  #
1067
- # Argument +string+ should be a \String object;
1068
- # it will be put into a new \StringIO object positioned at the beginning.
1069
- #
1070
- # Argument +io+ should be an \IO object; it will be positioned at the beginning.
1071
- #
1072
- # To position at the end, for appending, use method CSV.generate.
1073
- # For any other positioning, pass a preset StringIO object instead.
1074
- #
1075
- # In addition to the \CSV instance methods, several \IO
1076
- # methods are delegated. See CSV::open for a complete list.
1077
- #
1078
- # For +options+, see:
1079
- # * {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1080
- # * {Options for Generating}[#class-CSV-label-Options+for+Generating]
1822
+ # - Argument +string+ should be a \String object;
1823
+ # it will be put into a new StringIO object positioned at the beginning.
1824
+ # :include: ../doc/csv/arguments/io.rdoc
1825
+ # - Argument +options+: See:
1826
+ # * {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1827
+ # * {Options for Generating}[#class-CSV-label-Options+for+Generating]
1828
+ # For performance reasons, the options cannot be overridden
1829
+ # in a \CSV object, so those specified here will endure.
1081
1830
  #
1082
- # For performance reasons, the options cannot be overridden
1083
- # in a \CSV object, so the options specified here will endure.
1831
+ # In addition to the \CSV instance methods, several \IO methods are delegated.
1832
+ # See {Delegated Methods}[#class-CSV-label-Delegated+Methods].
1084
1833
  #
1085
1834
  # ---
1086
1835
  #
@@ -1182,51 +1931,67 @@ class CSV
1182
1931
  writer if @writer_options[:write_headers]
1183
1932
  end
1184
1933
 
1934
+ # :call-seq:
1935
+ # csv.col_sep -> string
1185
1936
  #
1186
- # The encoded <tt>:col_sep</tt> used in parsing and writing.
1187
- # See CSV::new for details.
1188
- #
1937
+ # Returns the encoded column separator; used for parsing and writing;
1938
+ # see {Option +col_sep+}[#class-CSV-label-Option+col_sep]:
1939
+ # CSV.new('').col_sep # => ","
1189
1940
  def col_sep
1190
1941
  parser.column_separator
1191
1942
  end
1192
1943
 
1944
+ # :call-seq:
1945
+ # csv.row_sep -> string
1193
1946
  #
1194
- # The encoded <tt>:row_sep</tt> used in parsing and writing.
1195
- # See CSV::new for details.
1196
- #
1947
+ # Returns the encoded row separator; used for parsing and writing;
1948
+ # see {Option +row_sep+}[#class-CSV-label-Option+row_sep]:
1949
+ # CSV.new('').row_sep # => "\n"
1197
1950
  def row_sep
1198
1951
  parser.row_separator
1199
1952
  end
1200
1953
 
1954
+ # :call-seq:
1955
+ # csv.quote_char -> character
1201
1956
  #
1202
- # The encoded <tt>:quote_char</tt> used in parsing and writing.
1203
- # See CSV::new for details.
1204
- #
1957
+ # Returns the encoded quote character; used for parsing and writing;
1958
+ # see {Option +quote_char+}[#class-CSV-label-Option+quote_char]:
1959
+ # CSV.new('').quote_char # => "\""
1205
1960
  def quote_char
1206
1961
  parser.quote_character
1207
1962
  end
1208
1963
 
1964
+ # :call-seq:
1965
+ # csv.field_size_limit -> integer or nil
1209
1966
  #
1210
- # The limit for field size, if any.
1211
- # See CSV::new for details.
1212
- #
1967
+ # Returns the limit for field size; used for parsing;
1968
+ # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
1969
+ # CSV.new('').field_size_limit # => nil
1213
1970
  def field_size_limit
1214
1971
  parser.field_size_limit
1215
1972
  end
1216
1973
 
1974
+ # :call-seq:
1975
+ # csv.skip_lines -> regexp or nil
1217
1976
  #
1218
- # The regex marking a line as a comment.
1219
- # See CSV::new for details.
1220
- #
1977
+ # Returns the \Regexp used to identify comment lines; used for parsing;
1978
+ # see {Option +skip_lines+}[#class-CSV-label-Option+skip_lines]:
1979
+ # CSV.new('').skip_lines # => nil
1221
1980
  def skip_lines
1222
1981
  parser.skip_lines
1223
1982
  end
1224
1983
 
1225
- #
1226
- # Returns the current list of converters in effect. See CSV::new for details.
1227
- # Built-in converters will be returned by name, while others will be returned
1228
- # as is.
1229
- #
1984
+ # :call-seq:
1985
+ # csv.converters -> array
1986
+ #
1987
+ # Returns an \Array containing field converters;
1988
+ # see {Field Converters}[#class-CSV-label-Field+Converters]:
1989
+ # csv = CSV.new('')
1990
+ # csv.converters # => []
1991
+ # csv.convert(:integer)
1992
+ # csv.converters # => [:integer]
1993
+ # csv.convert(proc {|x| x.to_s })
1994
+ # csv.converters
1230
1995
  def converters
1231
1996
  parser_fields_converter.map do |converter|
1232
1997
  name = Converters.rassoc(converter)
@@ -1234,19 +1999,23 @@ class CSV
1234
1999
  end
1235
2000
  end
1236
2001
 
2002
+ # :call-seq:
2003
+ # csv.unconverted_fields? -> object
1237
2004
  #
1238
- # Returns +true+ if unconverted_fields() to parsed results.
1239
- # See CSV::new for details.
1240
- #
2005
+ # Returns the value that determines whether unconverted fields are to be
2006
+ # available; used for parsing;
2007
+ # see {Option +unconverted_fields+}[#class-CSV-label-Option+unconverted_fields]:
2008
+ # CSV.new('').unconverted_fields? # => nil
1241
2009
  def unconverted_fields?
1242
2010
  parser.unconverted_fields?
1243
2011
  end
1244
2012
 
2013
+ # :call-seq:
2014
+ # csv.headers -> object
1245
2015
  #
1246
- # Returns +nil+ if headers will not be used, +true+ if they will but have not
1247
- # yet been read, or the actual headers after they have been read.
1248
- # See CSV::new for details.
1249
- #
2016
+ # Returns the value that determines whether headers are used; used for parsing;
2017
+ # see {Option +headers+}[#class-CSV-label-Option+headers]:
2018
+ # CSV.new('').headers # => nil
1250
2019
  def headers
1251
2020
  if @writer
1252
2021
  @writer.headers
@@ -1258,27 +2027,33 @@ class CSV
1258
2027
  raw_headers
1259
2028
  end
1260
2029
  end
2030
+
2031
+ # :call-seq:
2032
+ # csv.return_headers? -> true or false
1261
2033
  #
1262
- # Returns +true+ if headers will be returned as a row of results.
1263
- # See CSV::new for details.
1264
- #
2034
+ # Returns the value that determines whether headers are to be returned; used for parsing;
2035
+ # see {Option +return_headers+}[#class-CSV-label-Option+return_headers]:
2036
+ # CSV.new('').return_headers? # => false
1265
2037
  def return_headers?
1266
2038
  parser.return_headers?
1267
2039
  end
1268
2040
 
2041
+ # :call-seq:
2042
+ # csv.write_headers? -> true or false
1269
2043
  #
1270
- # Returns +true+ if headers are written in output.
1271
- # See CSV::new for details.
1272
- #
2044
+ # Returns the value that determines whether headers are to be written; used for generating;
2045
+ # see {Option +write_headers+}[#class-CSV-label-Option+write_headers]:
2046
+ # CSV.new('').write_headers? # => nil
1273
2047
  def write_headers?
1274
2048
  @writer_options[:write_headers]
1275
2049
  end
1276
2050
 
2051
+ # :call-seq:
2052
+ # csv.header_converters -> array
1277
2053
  #
1278
- # Returns the current list of converters in effect for headers. See CSV::new
1279
- # for details. Built-in converters will be returned by name, while others
1280
- # will be returned as is.
1281
- #
2054
+ # Returns an \Array containing header converters; used for parsing;
2055
+ # see {Header Converters}[#class-CSV-label-Header+Converters]:
2056
+ # CSV.new('').header_converters # => []
1282
2057
  def header_converters
1283
2058
  header_fields_converter.map do |converter|
1284
2059
  name = HeaderConverters.rassoc(converter)
@@ -1286,34 +2061,74 @@ class CSV
1286
2061
  end
1287
2062
  end
1288
2063
 
2064
+ # :call-seq:
2065
+ # csv.skip_blanks? -> true or false
1289
2066
  #
1290
- # Returns +true+ blank lines are skipped by the parser. See CSV::new
1291
- # for details.
1292
- #
2067
+ # Returns the value that determines whether blank lines are to be ignored; used for parsing;
2068
+ # see {Option +skip_blanks+}[#class-CSV-label-Option+skip_blanks]:
2069
+ # CSV.new('').skip_blanks? # => false
1293
2070
  def skip_blanks?
1294
2071
  parser.skip_blanks?
1295
2072
  end
1296
2073
 
1297
- # Returns +true+ if all output fields are quoted. See CSV::new for details.
2074
+ # :call-seq:
2075
+ # csv.force_quotes? -> true or false
2076
+ #
2077
+ # Returns the value that determines whether all output fields are to be quoted;
2078
+ # used for generating;
2079
+ # see {Option +force_quotes+}[#class-CSV-label-Option+force_quotes]:
2080
+ # CSV.new('').force_quotes? # => false
1298
2081
  def force_quotes?
1299
2082
  @writer_options[:force_quotes]
1300
2083
  end
1301
2084
 
1302
- # Returns +true+ if illegal input is handled. See CSV::new for details.
2085
+ # :call-seq:
2086
+ # csv.liberal_parsing? -> true or false
2087
+ #
2088
+ # Returns the value that determines whether illegal input is to be handled; used for parsing;
2089
+ # see {Option +liberal_parsing+}[#class-CSV-label-Option+liberal_parsing]:
2090
+ # CSV.new('').liberal_parsing? # => false
1303
2091
  def liberal_parsing?
1304
2092
  parser.liberal_parsing?
1305
2093
  end
1306
2094
 
2095
+ # :call-seq:
2096
+ # csv.encoding -> encoding
1307
2097
  #
1308
- # The Encoding CSV is parsing or writing in. This will be the Encoding you
1309
- # receive parsed data in and/or the Encoding data will be written in.
1310
- #
2098
+ # Returns the encoding used for parsing and generating;
2099
+ # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
2100
+ # CSV.new('').encoding # => #<Encoding:UTF-8>
1311
2101
  attr_reader :encoding
1312
2102
 
1313
- #
1314
- # The line number of the last row read from this file. Fields with nested
1315
- # line-end characters will not affect this count.
1316
- #
2103
+ # :call-seq:
2104
+ # csv.line_no -> integer
2105
+ #
2106
+ # Returns the count of the rows parsed or generated.
2107
+ #
2108
+ # Parsing:
2109
+ # string = "foo,0\nbar,1\nbaz,2\n"
2110
+ # path = 't.csv'
2111
+ # File.write(path, string)
2112
+ # CSV.open(path) do |csv|
2113
+ # csv.each do |row|
2114
+ # p [csv.lineno, row]
2115
+ # end
2116
+ # end
2117
+ # Output:
2118
+ # [1, ["foo", "0"]]
2119
+ # [2, ["bar", "1"]]
2120
+ # [3, ["baz", "2"]]
2121
+ #
2122
+ # Generating:
2123
+ # CSV.generate do |csv|
2124
+ # p csv.lineno; csv << ['foo', 0]
2125
+ # p csv.lineno; csv << ['bar', 1]
2126
+ # p csv.lineno; csv << ['baz', 2]
2127
+ # end
2128
+ # Output:
2129
+ # 0
2130
+ # 1
2131
+ # 2
1317
2132
  def lineno
1318
2133
  if @writer
1319
2134
  @writer.lineno
@@ -1322,9 +2137,22 @@ class CSV
1322
2137
  end
1323
2138
  end
1324
2139
 
1325
- #
1326
- # The last row read from this file.
1327
- #
2140
+ # :call-seq:
2141
+ # csv.line -> array
2142
+ #
2143
+ # Returns the line most recently read:
2144
+ # string = "foo,0\nbar,1\nbaz,2\n"
2145
+ # path = 't.csv'
2146
+ # File.write(path, string)
2147
+ # CSV.open(path) do |csv|
2148
+ # csv.each do |row|
2149
+ # p [csv.lineno, csv.line]
2150
+ # end
2151
+ # end
2152
+ # Output:
2153
+ # [1, "foo,0\n"]
2154
+ # [2, "bar,1\n"]
2155
+ # [3, "baz,2\n"]
1328
2156
  def line
1329
2157
  parser.line
1330
2158
  end
@@ -1400,13 +2228,56 @@ class CSV
1400
2228
 
1401
2229
  ### End Delegation ###
1402
2230
 
2231
+ # :call-seq:
2232
+ # csv << row -> self
2233
+ #
2234
+ # Appends a row to +self+.
1403
2235
  #
1404
- # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1405
- # CSV::Row) is converted to CSV and appended to the data source. When a
1406
- # CSV::Row is passed, only the row's fields() are appended to the output.
2236
+ # - Argument +row+ must be an \Array object or a CSV::Row object.
2237
+ # - The output stream must be open for writing.
1407
2238
  #
1408
- # The data source must be open for writing.
2239
+ # ---
1409
2240
  #
2241
+ # Append Arrays:
2242
+ # CSV.generate do |csv|
2243
+ # csv << ['foo', 0]
2244
+ # csv << ['bar', 1]
2245
+ # csv << ['baz', 2]
2246
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2247
+ #
2248
+ # Append CSV::Rows:
2249
+ # headers = []
2250
+ # CSV.generate do |csv|
2251
+ # csv << CSV::Row.new(headers, ['foo', 0])
2252
+ # csv << CSV::Row.new(headers, ['bar', 1])
2253
+ # csv << CSV::Row.new(headers, ['baz', 2])
2254
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2255
+ #
2256
+ # Headers in CSV::Row objects are not appended:
2257
+ # headers = ['Name', 'Count']
2258
+ # CSV.generate do |csv|
2259
+ # csv << CSV::Row.new(headers, ['foo', 0])
2260
+ # csv << CSV::Row.new(headers, ['bar', 1])
2261
+ # csv << CSV::Row.new(headers, ['baz', 2])
2262
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2263
+ #
2264
+ # ---
2265
+ #
2266
+ # Raises an exception if +row+ is not an \Array or \CSV::Row:
2267
+ # CSV.generate do |csv|
2268
+ # # Raises NoMethodError (undefined method `collect' for :foo:Symbol)
2269
+ # csv << :foo
2270
+ # end
2271
+ #
2272
+ # Raises an exception if the output stream is not opened for writing:
2273
+ # path = 't.csv'
2274
+ # File.write(path, '')
2275
+ # File.open(path) do |file|
2276
+ # CSV.open(file) do |csv|
2277
+ # # Raises IOError (not opened for writing)
2278
+ # csv << ['foo', 0]
2279
+ # end
2280
+ # end
1410
2281
  def <<(row)
1411
2282
  writer << row
1412
2283
  self
@@ -1414,58 +2285,216 @@ class CSV
1414
2285
  alias_method :add_row, :<<
1415
2286
  alias_method :puts, :<<
1416
2287
 
1417
- #
1418
2288
  # :call-seq:
1419
- # convert( name )
1420
- # convert { |field| ... }
1421
- # convert { |field, field_info| ... }
2289
+ # convert(converter_name) -> array_of_procs
2290
+ # convert {|field, field_info| ... } -> array_of_procs
1422
2291
  #
1423
- # You can use this method to install a CSV::Converters built-in, or provide a
1424
- # block that handles a custom conversion.
2292
+ # - With no block, installs a field converter (a \Proc).
2293
+ # - With a block, defines and installs a custom field converter.
2294
+ # - Returns the \Array of installed field converters.
1425
2295
  #
1426
- # If you provide a block that takes one argument, it will be passed the field
1427
- # and is expected to return the converted value or the field itself. If your
1428
- # block takes two arguments, it will also be passed a CSV::FieldInfo Struct,
1429
- # containing details about the field. Again, the block should return a
1430
- # converted field or the field itself.
2296
+ # - Argument +converter_name+, if given, should be the name
2297
+ # of an existing field converter.
2298
+ #
2299
+ # See {Field Converters}[#class-CSV-label-Field+Converters].
2300
+ # ---
1431
2301
  #
2302
+ # With no block, installs a field converter:
2303
+ # csv = CSV.new('')
2304
+ # csv.convert(:integer)
2305
+ # csv.convert(:float)
2306
+ # csv.convert(:date)
2307
+ # csv.converters # => [:integer, :float, :date]
2308
+ #
2309
+ # ---
2310
+ #
2311
+ # The block, if given, is called for each field:
2312
+ # - Argument +field+ is the field value.
2313
+ # - Argument +field_info+ is a CSV::FieldInfo object
2314
+ # containing details about the field.
2315
+ #
2316
+ # The examples here assume the prior execution of:
2317
+ # string = "foo,0\nbar,1\nbaz,2\n"
2318
+ # path = 't.csv'
2319
+ # File.write(path, string)
2320
+ #
2321
+ # Example giving a block:
2322
+ # csv = CSV.open(path)
2323
+ # csv.convert {|field, field_info| p [field, field_info]; field.upcase }
2324
+ # csv.read # => [["FOO", "0"], ["BAR", "1"], ["BAZ", "2"]]
2325
+ #
2326
+ # Output:
2327
+ # ["foo", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
2328
+ # ["0", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
2329
+ # ["bar", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
2330
+ # ["1", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
2331
+ # ["baz", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
2332
+ # ["2", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
2333
+ #
2334
+ # The block need not return a \String object:
2335
+ # csv = CSV.open(path)
2336
+ # csv.convert {|field, field_info| field.to_sym }
2337
+ # csv.read # => [[:foo, :"0"], [:bar, :"1"], [:baz, :"2"]]
2338
+ #
2339
+ # If +converter_name+ is given, the block is not called:
2340
+ # csv = CSV.open(path)
2341
+ # csv.convert(:integer) {|field, field_info| fail 'Cannot happen' }
2342
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
2343
+ #
2344
+ # ---
2345
+ #
2346
+ # Raises a parse-time exception if +converter_name+ is not the name of a built-in
2347
+ # field converter:
2348
+ # csv = CSV.open(path)
2349
+ # csv.convert(:nosuch) => [nil]
2350
+ # # Raises NoMethodError (undefined method `arity' for nil:NilClass)
2351
+ # csv.read
1432
2352
  def convert(name = nil, &converter)
1433
2353
  parser_fields_converter.add_converter(name, &converter)
1434
2354
  end
1435
2355
 
1436
- #
1437
2356
  # :call-seq:
1438
- # header_convert( name )
1439
- # header_convert { |field| ... }
1440
- # header_convert { |field, field_info| ... }
2357
+ # header_convert(converter_name) -> array_of_procs
2358
+ # header_convert {|header, field_info| ... } -> array_of_procs
2359
+ #
2360
+ # - With no block, installs a header converter (a \Proc).
2361
+ # - With a block, defines and installs a custom header converter.
2362
+ # - Returns the \Array of installed header converters.
1441
2363
  #
1442
- # Identical to CSV#convert(), but for header rows.
2364
+ # - Argument +converter_name+, if given, should be the name
2365
+ # of an existing header converter.
2366
+ #
2367
+ # See {Header Converters}[#class-CSV-label-Header+Converters].
2368
+ # ---
1443
2369
  #
1444
- # Note that this method must be called before header rows are read to have any
1445
- # effect.
2370
+ # With no block, installs a header converter:
2371
+ # csv = CSV.new('')
2372
+ # csv.header_convert(:symbol)
2373
+ # csv.header_convert(:downcase)
2374
+ # csv.header_converters # => [:symbol, :downcase]
1446
2375
  #
2376
+ # ---
2377
+ #
2378
+ # The block, if given, is called for each header:
2379
+ # - Argument +header+ is the header value.
2380
+ # - Argument +field_info+ is a CSV::FieldInfo object
2381
+ # containing details about the header.
2382
+ #
2383
+ # The examples here assume the prior execution of:
2384
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2385
+ # path = 't.csv'
2386
+ # File.write(path, string)
2387
+ #
2388
+ # Example giving a block:
2389
+ # csv = CSV.open(path, headers: true)
2390
+ # csv.header_convert {|header, field_info| p [header, field_info]; header.upcase }
2391
+ # table = csv.read
2392
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
2393
+ # table.headers # => ["NAME", "VALUE"]
2394
+ #
2395
+ # Output:
2396
+ # ["Name", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
2397
+ # ["Value", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
2398
+
2399
+ # The block need not return a \String object:
2400
+ # csv = CSV.open(path, headers: true)
2401
+ # csv.header_convert {|header, field_info| header.to_sym }
2402
+ # table = csv.read
2403
+ # table.headers # => [:Name, :Value]
2404
+ #
2405
+ # If +converter_name+ is given, the block is not called:
2406
+ # csv = CSV.open(path, headers: true)
2407
+ # csv.header_convert(:downcase) {|header, field_info| fail 'Cannot happen' }
2408
+ # table = csv.read
2409
+ # table.headers # => ["name", "value"]
2410
+ # ---
2411
+ #
2412
+ # Raises a parse-time exception if +converter_name+ is not the name of a built-in
2413
+ # field converter:
2414
+ # csv = CSV.open(path, headers: true)
2415
+ # csv.header_convert(:nosuch)
2416
+ # # Raises NoMethodError (undefined method `arity' for nil:NilClass)
2417
+ # csv.read
1447
2418
  def header_convert(name = nil, &converter)
1448
2419
  header_fields_converter.add_converter(name, &converter)
1449
2420
  end
1450
2421
 
1451
2422
  include Enumerable
1452
2423
 
2424
+ # :call-seq:
2425
+ # csv.each -> enumerator
2426
+ # csv.each {|row| ...}
2427
+ #
2428
+ # Calls the block with each successive row.
2429
+ # The data source must be opened for reading.
2430
+ #
2431
+ # Without headers:
2432
+ # string = "foo,0\nbar,1\nbaz,2\n"
2433
+ # csv = CSV.new(string)
2434
+ # csv.each do |row|
2435
+ # p row
2436
+ # end
2437
+ # Output:
2438
+ # ["foo", "0"]
2439
+ # ["bar", "1"]
2440
+ # ["baz", "2"]
2441
+ #
2442
+ # With headers:
2443
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2444
+ # csv = CSV.new(string, headers: true)
2445
+ # csv.each do |row|
2446
+ # p row
2447
+ # end
2448
+ # Output:
2449
+ # <CSV::Row "Name":"foo" "Value":"0">
2450
+ # <CSV::Row "Name":"bar" "Value":"1">
2451
+ # <CSV::Row "Name":"baz" "Value":"2">
1453
2452
  #
1454
- # Yields each row of the data source in turn.
1455
- #
1456
- # Support for Enumerable.
1457
- #
1458
- # The data source must be open for reading.
2453
+ # ---
1459
2454
  #
2455
+ # Raises an exception if the source is not opened for reading:
2456
+ # string = "foo,0\nbar,1\nbaz,2\n"
2457
+ # csv = CSV.new(string)
2458
+ # csv.close
2459
+ # # Raises IOError (not opened for reading)
2460
+ # csv.each do |row|
2461
+ # p row
2462
+ # end
1460
2463
  def each(&block)
1461
2464
  parser_enumerator.each(&block)
1462
2465
  end
1463
2466
 
2467
+ # :call-seq:
2468
+ # csv.read -> array or csv_table
1464
2469
  #
1465
- # Slurps the remaining rows and returns an Array of Arrays.
2470
+ # Forms the remaining rows from +self+ into:
2471
+ # - A CSV::Table object, if headers are in use.
2472
+ # - An \Array of Arrays, otherwise.
1466
2473
  #
1467
- # The data source must be open for reading.
2474
+ # The data source must be opened for reading.
1468
2475
  #
2476
+ # Without headers:
2477
+ # string = "foo,0\nbar,1\nbaz,2\n"
2478
+ # path = 't.csv'
2479
+ # File.write(path, string)
2480
+ # csv = CSV.open(path)
2481
+ # csv.read # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
2482
+ #
2483
+ # With headers:
2484
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2485
+ # path = 't.csv'
2486
+ # File.write(path, string)
2487
+ # csv = CSV.open(path, headers: true)
2488
+ # csv.read # => #<CSV::Table mode:col_or_row row_count:4>
2489
+ #
2490
+ # ---
2491
+ #
2492
+ # Raises an exception if the source is not opened for reading:
2493
+ # string = "foo,0\nbar,1\nbaz,2\n"
2494
+ # csv = CSV.new(string)
2495
+ # csv.close
2496
+ # # Raises IOError (not opened for reading)
2497
+ # csv.read
1469
2498
  def read
1470
2499
  rows = to_a
1471
2500
  if parser.use_headers?
@@ -1476,18 +2505,69 @@ class CSV
1476
2505
  end
1477
2506
  alias_method :readlines, :read
1478
2507
 
1479
- # Returns +true+ if the next row read will be a header row.
2508
+ # :call-seq:
2509
+ # csv.header_row? -> true or false
2510
+ #
2511
+ # Returns +true+ if the next row to be read is a header row\;
2512
+ # +false+ otherwise.
2513
+ #
2514
+ # Without headers:
2515
+ # string = "foo,0\nbar,1\nbaz,2\n"
2516
+ # csv = CSV.new(string)
2517
+ # csv.header_row? # => false
2518
+ #
2519
+ # With headers:
2520
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2521
+ # csv = CSV.new(string, headers: true)
2522
+ # csv.header_row? # => true
2523
+ # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
2524
+ # csv.header_row? # => false
2525
+ #
2526
+ # ---
2527
+ #
2528
+ # Raises an exception if the source is not opened for reading:
2529
+ # string = "foo,0\nbar,1\nbaz,2\n"
2530
+ # csv = CSV.new(string)
2531
+ # csv.close
2532
+ # # Raises IOError (not opened for reading)
2533
+ # csv.header_row?
1480
2534
  def header_row?
1481
2535
  parser.header_row?
1482
2536
  end
1483
2537
 
2538
+ # :call-seq:
2539
+ # csv.shift -> array, csv_row, or nil
2540
+ #
2541
+ # Returns the next row of data as:
2542
+ # - An \Array if no headers are used.
2543
+ # - A CSV::Row object if headers are used.
2544
+ #
2545
+ # The data source must be opened for reading.
2546
+ #
2547
+ # Without headers:
2548
+ # string = "foo,0\nbar,1\nbaz,2\n"
2549
+ # csv = CSV.new(string)
2550
+ # csv.shift # => ["foo", "0"]
2551
+ # csv.shift # => ["bar", "1"]
2552
+ # csv.shift # => ["baz", "2"]
2553
+ # csv.shift # => nil
2554
+ #
2555
+ # With headers:
2556
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2557
+ # csv = CSV.new(string, headers: true)
2558
+ # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
2559
+ # csv.shift # => #<CSV::Row "Name":"bar" "Value":"1">
2560
+ # csv.shift # => #<CSV::Row "Name":"baz" "Value":"2">
2561
+ # csv.shift # => nil
1484
2562
  #
1485
- # The primary read method for wrapped Strings and IOs, a single row is pulled
1486
- # from the data source, parsed and returned as an Array of fields (if header
1487
- # rows are not used) or a CSV::Row (when header rows are used).
1488
- #
1489
- # The data source must be open for reading.
2563
+ # ---
1490
2564
  #
2565
+ # Raises an exception if the source is not opened for reading:
2566
+ # string = "foo,0\nbar,1\nbaz,2\n"
2567
+ # csv = CSV.new(string)
2568
+ # csv.close
2569
+ # # Raises IOError (not opened for reading)
2570
+ # csv.shift
1491
2571
  def shift
1492
2572
  if @eof_error
1493
2573
  eof_error, @eof_error = @eof_error, nil
@@ -1502,10 +2582,14 @@ class CSV
1502
2582
  alias_method :gets, :shift
1503
2583
  alias_method :readline, :shift
1504
2584
 
2585
+ # :call-seq:
2586
+ # csv.inspect -> string
1505
2587
  #
1506
- # Returns a simplified description of the key CSV attributes in an
1507
- # ASCII compatible String.
1508
- #
2588
+ # Returns a \String showing certain properties of +self+:
2589
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2590
+ # csv = CSV.new(string, headers: true)
2591
+ # s = csv.inspect
2592
+ # s # => "#<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:\",\" row_sep:\"\\n\" quote_char:\"\\\"\" headers:true>"
1509
2593
  def inspect
1510
2594
  str = ["#<", self.class.to_s, " io_type:"]
1511
2595
  # show type of wrapped IO
@@ -1685,8 +2769,13 @@ end
1685
2769
  # c.read.any? { |a| a.include?("zombies") }
1686
2770
  # } #=> false
1687
2771
  #
1688
- def CSV(*args, &block)
1689
- CSV.instance(*args, &block)
2772
+ # CSV options may also be given.
2773
+ #
2774
+ # io = StringIO.new
2775
+ # CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] }
2776
+ #
2777
+ def CSV(*args, **options, &block)
2778
+ CSV.instance(*args, **options, &block)
1690
2779
  end
1691
2780
 
1692
2781
  require_relative "csv/version"