csv 3.1.3 → 3.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/NEWS.md +110 -0
  3. data/README.md +5 -0
  4. data/doc/csv/arguments/io.rdoc +5 -0
  5. data/doc/csv/options/common/col_sep.rdoc +57 -0
  6. data/doc/csv/options/common/quote_char.rdoc +42 -0
  7. data/doc/csv/options/common/row_sep.rdoc +91 -0
  8. data/doc/csv/options/generating/force_quotes.rdoc +17 -0
  9. data/doc/csv/options/generating/quote_empty.rdoc +12 -0
  10. data/doc/csv/options/generating/write_converters.rdoc +25 -0
  11. data/doc/csv/options/generating/write_empty_value.rdoc +15 -0
  12. data/doc/csv/options/generating/write_headers.rdoc +29 -0
  13. data/doc/csv/options/generating/write_nil_value.rdoc +14 -0
  14. data/doc/csv/options/parsing/converters.rdoc +46 -0
  15. data/doc/csv/options/parsing/empty_value.rdoc +13 -0
  16. data/doc/csv/options/parsing/field_size_limit.rdoc +39 -0
  17. data/doc/csv/options/parsing/header_converters.rdoc +43 -0
  18. data/doc/csv/options/parsing/headers.rdoc +63 -0
  19. data/doc/csv/options/parsing/liberal_parsing.rdoc +19 -0
  20. data/doc/csv/options/parsing/nil_value.rdoc +12 -0
  21. data/doc/csv/options/parsing/return_headers.rdoc +22 -0
  22. data/doc/csv/options/parsing/skip_blanks.rdoc +31 -0
  23. data/doc/csv/options/parsing/skip_lines.rdoc +37 -0
  24. data/doc/csv/options/parsing/strip.rdoc +15 -0
  25. data/doc/csv/options/parsing/unconverted_fields.rdoc +27 -0
  26. data/doc/csv/recipes/filtering.rdoc +158 -0
  27. data/doc/csv/recipes/generating.rdoc +298 -0
  28. data/doc/csv/recipes/parsing.rdoc +545 -0
  29. data/doc/csv/recipes/recipes.rdoc +6 -0
  30. data/lib/csv.rb +1724 -568
  31. data/lib/csv/fields_converter.rb +1 -1
  32. data/lib/csv/parser.rb +1 -1
  33. data/lib/csv/row.rb +477 -132
  34. data/lib/csv/table.rb +750 -108
  35. data/lib/csv/version.rb +1 -1
  36. data/lib/csv/writer.rb +45 -4
  37. metadata +41 -6
@@ -0,0 +1,6 @@
1
+ == Recipes for \CSV
2
+
3
+ The recipes are specific code examples for specific tasks. See:
4
+ - {Recipes for Parsing CSV}[./parsing_rdoc.html]
5
+ - {Recipes for Generating CSV}[./generating_rdoc.html]
6
+ - {Recipes for Filtering CSV}[./filtering_rdoc.html]
data/lib/csv.rb CHANGED
@@ -34,7 +34,7 @@
34
34
  # I'm sure I'll miss something, but I'll try to mention most of the major
35
35
  # differences I am aware of, to help others quickly get up to speed:
36
36
  #
37
- # === CSV Parsing
37
+ # === \CSV Parsing
38
38
  #
39
39
  # * This parser is m17n aware. See CSV for full details.
40
40
  # * This library has a stricter parser and will throw MalformedCSVErrors on
@@ -103,85 +103,338 @@ require_relative "csv/writer"
103
103
 
104
104
  using CSV::MatchP if CSV.const_defined?(:MatchP)
105
105
 
106
+ # == \CSV
106
107
  #
107
- # This class provides a complete interface to CSV files and data. It offers
108
- # tools to enable you to read and write to and from Strings or IO objects, as
109
- # needed.
108
+ # === In a Hurry?
110
109
  #
111
- # The most generic interface of the library is:
110
+ # If you are familiar with \CSV data and have a particular task in mind,
111
+ # you may want to go directly to the:
112
+ # - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
112
113
  #
113
- # csv = CSV.new(string_or_io, **options)
114
+ # Otherwise, read on here, about the API: classes, methods, and constants.
114
115
  #
115
- # # Reading: IO object should be open for read
116
- # csv.read # => array of rows
117
- # # or
118
- # csv.each do |row|
119
- # # ...
120
- # end
121
- # # or
122
- # row = csv.shift
116
+ # === \CSV Data
123
117
  #
124
- # # Writing: IO object should be open for write
125
- # csv << row
118
+ # \CSV (comma-separated values) data is a text representation of a table:
119
+ # - A _row_ _separator_ delimits table rows.
120
+ # A common row separator is the newline character <tt>"\n"</tt>.
121
+ # - A _column_ _separator_ delimits fields in a row.
122
+ # A common column separator is the comma character <tt>","</tt>.
126
123
  #
127
- # There are several specialized class methods for one-statement reading or writing,
128
- # described in the Specialized Methods section.
124
+ # This \CSV \String, with row separator <tt>"\n"</tt>
125
+ # and column separator <tt>","</tt>,
126
+ # has three rows and two columns:
127
+ # "foo,0\nbar,1\nbaz,2\n"
129
128
  #
130
- # If a String is passed into ::new, it is internally wrapped into a StringIO object.
129
+ # Despite the name \CSV, a \CSV representation can use different separators.
131
130
  #
132
- # +options+ can be used for specifying the particular CSV flavor (column
133
- # separators, row separators, value quoting and so on), and for data conversion,
134
- # see Data Conversion section for the description of the latter.
131
+ # For more about tables, see the Wikipedia article
132
+ # "{Table (information)}[https://en.wikipedia.org/wiki/Table_(information)]",
133
+ # especially its section
134
+ # "{Simple table}[https://en.wikipedia.org/wiki/Table_(information)#Simple_table]"
135
135
  #
136
- # == Specialized Methods
136
+ # == \Class \CSV
137
137
  #
138
- # === Reading
138
+ # Class \CSV provides methods for:
139
+ # - Parsing \CSV data from a \String object, a \File (via its file path), or an \IO object.
140
+ # - Generating \CSV data to a \String object.
139
141
  #
140
- # # From a file: all at once
141
- # arr_of_rows = CSV.read("path/to/file.csv", **options)
142
- # # iterator-style:
143
- # CSV.foreach("path/to/file.csv", **options) do |row|
144
- # # ...
145
- # end
142
+ # To make \CSV available:
143
+ # require 'csv'
146
144
  #
147
- # # From a string
148
- # arr_of_rows = CSV.parse("CSV,data,String", **options)
149
- # # or
150
- # CSV.parse("CSV,data,String", **options) do |row|
151
- # # ...
152
- # end
145
+ # All examples here assume that this has been done.
146
+ #
147
+ # == Keeping It Simple
148
+ #
149
+ # A \CSV object has dozens of instance methods that offer fine-grained control
150
+ # of parsing and generating \CSV data.
151
+ # For many needs, though, simpler approaches will do.
152
+ #
153
+ # This section summarizes the singleton methods in \CSV
154
+ # that allow you to parse and generate without explicitly
155
+ # creating \CSV objects.
156
+ # For details, follow the links.
157
+ #
158
+ # === Simple Parsing
159
+ #
160
+ # Parsing methods commonly return either of:
161
+ # - An \Array of Arrays of Strings:
162
+ # - The outer \Array is the entire "table".
163
+ # - Each inner \Array is a row.
164
+ # - Each \String is a field.
165
+ # - A CSV::Table object. For details, see
166
+ # {\CSV with Headers}[#class-CSV-label-CSV+with+Headers].
167
+ #
168
+ # ==== Parsing a \String
169
+ #
170
+ # The input to be parsed can be a string:
171
+ # string = "foo,0\nbar,1\nbaz,2\n"
172
+ #
173
+ # \Method CSV.parse returns the entire \CSV data:
174
+ # CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
175
+ #
176
+ # \Method CSV.parse_line returns only the first row:
177
+ # CSV.parse_line(string) # => ["foo", "0"]
178
+ #
179
+ # \CSV extends class \String with instance method String#parse_csv,
180
+ # which also returns only the first row:
181
+ # string.parse_csv # => ["foo", "0"]
182
+ #
183
+ # ==== Parsing Via a \File Path
184
+ #
185
+ # The input to be parsed can be in a file:
186
+ # string = "foo,0\nbar,1\nbaz,2\n"
187
+ # path = 't.csv'
188
+ # File.write(path, string)
189
+ #
190
+ # \Method CSV.read returns the entire \CSV data:
191
+ # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
153
192
  #
154
- # === Writing
193
+ # \Method CSV.foreach iterates, passing each row to the given block:
194
+ # CSV.foreach(path) do |row|
195
+ # p row
196
+ # end
197
+ # Output:
198
+ # ["foo", "0"]
199
+ # ["bar", "1"]
200
+ # ["baz", "2"]
155
201
  #
156
- # # To a file
157
- # CSV.open("path/to/file.csv", "wb") do |csv|
158
- # csv << ["row", "of", "CSV", "data"]
159
- # csv << ["another", "row"]
160
- # # ...
202
+ # \Method CSV.table returns the entire \CSV data as a CSV::Table object:
203
+ # CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:3>
204
+ #
205
+ # ==== Parsing from an Open \IO Stream
206
+ #
207
+ # The input to be parsed can be in an open \IO stream:
208
+ #
209
+ # \Method CSV.read returns the entire \CSV data:
210
+ # File.open(path) do |file|
211
+ # CSV.read(file)
212
+ # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
213
+ #
214
+ # As does method CSV.parse:
215
+ # File.open(path) do |file|
216
+ # CSV.parse(file)
217
+ # end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
218
+ #
219
+ # \Method CSV.parse_line returns only the first row:
220
+ # File.open(path) do |file|
221
+ # CSV.parse_line(file)
222
+ # end # => ["foo", "0"]
223
+ #
224
+ # \Method CSV.foreach iterates, passing each row to the given block:
225
+ # File.open(path) do |file|
226
+ # CSV.foreach(file) do |row|
227
+ # p row
228
+ # end
161
229
  # end
230
+ # Output:
231
+ # ["foo", "0"]
232
+ # ["bar", "1"]
233
+ # ["baz", "2"]
234
+ #
235
+ # \Method CSV.table returns the entire \CSV data as a CSV::Table object:
236
+ # File.open(path) do |file|
237
+ # CSV.table(file)
238
+ # end # => #<CSV::Table mode:col_or_row row_count:3>
239
+ #
240
+ # === Simple Generating
241
+ #
242
+ # \Method CSV.generate returns a \String;
243
+ # this example uses method CSV#<< to append the rows
244
+ # that are to be generated:
245
+ # output_string = CSV.generate do |csv|
246
+ # csv << ['foo', 0]
247
+ # csv << ['bar', 1]
248
+ # csv << ['baz', 2]
249
+ # end
250
+ # output_string # => "foo,0\nbar,1\nbaz,2\n"
251
+ #
252
+ # \Method CSV.generate_line returns a \String containing the single row
253
+ # constructed from an \Array:
254
+ # CSV.generate_line(['foo', '0']) # => "foo,0\n"
162
255
  #
163
- # # To a String
164
- # csv_string = CSV.generate do |csv|
165
- # csv << ["row", "of", "CSV", "data"]
166
- # csv << ["another", "row"]
167
- # # ...
256
+ # \CSV extends class \Array with instance method <tt>Array#to_csv</tt>,
257
+ # which forms an \Array into a \String:
258
+ # ['foo', '0'].to_csv # => "foo,0\n"
259
+ #
260
+ # === "Filtering" \CSV
261
+ #
262
+ # \Method CSV.filter provides a Unix-style filter for \CSV data.
263
+ # The input data is processed to form the output data:
264
+ # in_string = "foo,0\nbar,1\nbaz,2\n"
265
+ # out_string = ''
266
+ # CSV.filter(in_string, out_string) do |row|
267
+ # row[0] = row[0].upcase
268
+ # row[1] *= 4
168
269
  # end
270
+ # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
271
+ #
272
+ # == \CSV Objects
273
+ #
274
+ # There are three ways to create a \CSV object:
275
+ # - \Method CSV.new returns a new \CSV object.
276
+ # - \Method CSV.instance returns a new or cached \CSV object.
277
+ # - \Method \CSV() also returns a new or cached \CSV object.
278
+ #
279
+ # === Instance Methods
280
+ #
281
+ # \CSV has three groups of instance methods:
282
+ # - Its own internally defined instance methods.
283
+ # - Methods included by module Enumerable.
284
+ # - Methods delegated to class IO. See below.
285
+ #
286
+ # ==== Delegated Methods
287
+ #
288
+ # For convenience, a CSV object will delegate to many methods in class IO.
289
+ # (A few have wrapper "guard code" in \CSV.) You may call:
290
+ # * IO#binmode
291
+ # * #binmode?
292
+ # * IO#close
293
+ # * IO#close_read
294
+ # * IO#close_write
295
+ # * IO#closed?
296
+ # * #eof
297
+ # * #eof?
298
+ # * IO#external_encoding
299
+ # * IO#fcntl
300
+ # * IO#fileno
301
+ # * #flock
302
+ # * IO#flush
303
+ # * IO#fsync
304
+ # * IO#internal_encoding
305
+ # * #ioctl
306
+ # * IO#isatty
307
+ # * #path
308
+ # * IO#pid
309
+ # * IO#pos
310
+ # * IO#pos=
311
+ # * IO#reopen
312
+ # * #rewind
313
+ # * IO#seek
314
+ # * #stat
315
+ # * IO#string
316
+ # * IO#sync
317
+ # * IO#sync=
318
+ # * IO#tell
319
+ # * #to_i
320
+ # * #to_io
321
+ # * IO#truncate
322
+ # * IO#tty?
323
+ #
324
+ # === Options
325
+ #
326
+ # The default values for options are:
327
+ # DEFAULT_OPTIONS = {
328
+ # # For both parsing and generating.
329
+ # col_sep: ",",
330
+ # row_sep: :auto,
331
+ # quote_char: '"',
332
+ # # For parsing.
333
+ # field_size_limit: nil,
334
+ # converters: nil,
335
+ # unconverted_fields: nil,
336
+ # headers: false,
337
+ # return_headers: false,
338
+ # header_converters: nil,
339
+ # skip_blanks: false,
340
+ # skip_lines: nil,
341
+ # liberal_parsing: false,
342
+ # nil_value: nil,
343
+ # empty_value: "",
344
+ # # For generating.
345
+ # write_headers: nil,
346
+ # quote_empty: true,
347
+ # force_quotes: false,
348
+ # write_converters: nil,
349
+ # write_nil_value: nil,
350
+ # write_empty_value: "",
351
+ # strip: false,
352
+ # }
353
+ #
354
+ # ==== Options for Parsing
355
+ #
356
+ # Options for parsing, described in detail below, include:
357
+ # - +row_sep+: Specifies the row separator; used to delimit rows.
358
+ # - +col_sep+: Specifies the column separator; used to delimit fields.
359
+ # - +quote_char+: Specifies the quote character; used to quote fields.
360
+ # - +field_size_limit+: Specifies the maximum field size allowed.
361
+ # - +converters+: Specifies the field converters to be used.
362
+ # - +unconverted_fields+: Specifies whether unconverted fields are to be available.
363
+ # - +headers+: Specifies whether data contains headers,
364
+ # or specifies the headers themselves.
365
+ # - +return_headers+: Specifies whether headers are to be returned.
366
+ # - +header_converters+: Specifies the header converters to be used.
367
+ # - +skip_blanks+: Specifies whether blanks lines are to be ignored.
368
+ # - +skip_lines+: Specifies how comments lines are to be recognized.
369
+ # - +strip+: Specifies whether leading and trailing whitespace are
370
+ # to be stripped from fields..
371
+ # - +liberal_parsing+: Specifies whether \CSV should attempt to parse
372
+ # non-compliant data.
373
+ # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
374
+ # - +empty_value+: Specifies the object that is to be substituted for each empty field.
375
+ #
376
+ # :include: ../doc/csv/options/common/row_sep.rdoc
377
+ #
378
+ # :include: ../doc/csv/options/common/col_sep.rdoc
379
+ #
380
+ # :include: ../doc/csv/options/common/quote_char.rdoc
381
+ #
382
+ # :include: ../doc/csv/options/parsing/field_size_limit.rdoc
383
+ #
384
+ # :include: ../doc/csv/options/parsing/converters.rdoc
385
+ #
386
+ # :include: ../doc/csv/options/parsing/unconverted_fields.rdoc
387
+ #
388
+ # :include: ../doc/csv/options/parsing/headers.rdoc
389
+ #
390
+ # :include: ../doc/csv/options/parsing/return_headers.rdoc
391
+ #
392
+ # :include: ../doc/csv/options/parsing/header_converters.rdoc
393
+ #
394
+ # :include: ../doc/csv/options/parsing/skip_blanks.rdoc
395
+ #
396
+ # :include: ../doc/csv/options/parsing/skip_lines.rdoc
397
+ #
398
+ # :include: ../doc/csv/options/parsing/strip.rdoc
399
+ #
400
+ # :include: ../doc/csv/options/parsing/liberal_parsing.rdoc
401
+ #
402
+ # :include: ../doc/csv/options/parsing/nil_value.rdoc
403
+ #
404
+ # :include: ../doc/csv/options/parsing/empty_value.rdoc
405
+ #
406
+ # ==== Options for Generating
407
+ #
408
+ # Options for generating, described in detail below, include:
409
+ # - +row_sep+: Specifies the row separator; used to delimit rows.
410
+ # - +col_sep+: Specifies the column separator; used to delimit fields.
411
+ # - +quote_char+: Specifies the quote character; used to quote fields.
412
+ # - +write_headers+: Specifies whether headers are to be written.
413
+ # - +force_quotes+: Specifies whether each output field is to be quoted.
414
+ # - +quote_empty+: Specifies whether each empty output field is to be quoted.
415
+ # - +write_converters+: Specifies the field converters to be used in writing.
416
+ # - +write_nil_value+: Specifies the object that is to be substituted for each +nil+-valued field.
417
+ # - +write_empty_value+: Specifies the object that is to be substituted for each empty field.
418
+ #
419
+ # :include: ../doc/csv/options/common/row_sep.rdoc
420
+ #
421
+ # :include: ../doc/csv/options/common/col_sep.rdoc
422
+ #
423
+ # :include: ../doc/csv/options/common/quote_char.rdoc
424
+ #
425
+ # :include: ../doc/csv/options/generating/write_headers.rdoc
426
+ #
427
+ # :include: ../doc/csv/options/generating/force_quotes.rdoc
169
428
  #
170
- # === Shortcuts
429
+ # :include: ../doc/csv/options/generating/quote_empty.rdoc
171
430
  #
172
- # # Core extensions for converting one line
173
- # csv_string = ["CSV", "data"].to_csv # to CSV
174
- # csv_array = "CSV,String".parse_csv # from CSV
431
+ # :include: ../doc/csv/options/generating/write_converters.rdoc
175
432
  #
176
- # # CSV() method
177
- # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
178
- # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
179
- # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
180
- # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
433
+ # :include: ../doc/csv/options/generating/write_nil_value.rdoc
181
434
  #
182
- # == Data Conversion
435
+ # :include: ../doc/csv/options/generating/write_empty_value.rdoc
183
436
  #
184
- # === CSV with headers
437
+ # === \CSV with Headers
185
438
  #
186
439
  # CSV allows to specify column names of CSV file, whether they are in data, or
187
440
  # provided separately. If headers are specified, reading methods return an instance
@@ -203,24 +456,343 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
203
456
  # data = CSV.parse('Bob,Engineering,1000', headers: %i[name department salary])
204
457
  # data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000">
205
458
  #
206
- # === Typed data reading
207
- #
208
- # CSV allows to provide a set of data _converters_ e.g. transformations to try on input
209
- # data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
210
- #
211
- # # Without any converters:
212
- # CSV.parse('Bob,2018-03-01,100')
213
- # #=> [["Bob", "2018-03-01", "100"]]
214
- #
215
- # # With built-in converters:
216
- # CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
217
- # #=> [["Bob", #<Date: 2018-03-01>, 100]]
218
- #
219
- # # With custom converters:
220
- # CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
221
- # #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
459
+ # === \Converters
460
+ #
461
+ # By default, each value (field or header) parsed by \CSV is formed into a \String.
462
+ # You can use a _field_ _converter_ or _header_ _converter_
463
+ # to intercept and modify the parsed values:
464
+ # - See {Field Converters}[#class-CSV-label-Field+Converters].
465
+ # - See {Header Converters}[#class-CSV-label-Header+Converters].
466
+ #
467
+ # Also by default, each value to be written during generation is written 'as-is'.
468
+ # You can use a _write_ _converter_ to modify values before writing.
469
+ # - See {Write Converters}[#class-CSV-label-Write+Converters].
470
+ #
471
+ # ==== Specifying \Converters
472
+ #
473
+ # You can specify converters for parsing or generating in the +options+
474
+ # argument to various \CSV methods:
475
+ # - Option +converters+ for converting parsed field values.
476
+ # - Option +header_converters+ for converting parsed header values.
477
+ # - Option +write_converters+ for converting values to be written (generated).
478
+ #
479
+ # There are three forms for specifying converters:
480
+ # - A converter proc: executable code to be used for conversion.
481
+ # - A converter name: the name of a stored converter.
482
+ # - A converter list: an array of converter procs, converter names, and converter lists.
483
+ #
484
+ # ===== Converter Procs
485
+ #
486
+ # This converter proc, +strip_converter+, accepts a value +field+
487
+ # and returns <tt>field.strip</tt>:
488
+ # strip_converter = proc {|field| field.strip }
489
+ # In this call to <tt>CSV.parse</tt>,
490
+ # the keyword argument <tt>converters: string_converter</tt>
491
+ # specifies that:
492
+ # - \Proc +string_converter+ is to be called for each parsed field.
493
+ # - The converter's return value is to replace the +field+ value.
494
+ # Example:
495
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
496
+ # array = CSV.parse(string, converters: strip_converter)
497
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
498
+ #
499
+ # A converter proc can receive a second argument, +field_info+,
500
+ # that contains details about the field.
501
+ # This modified +strip_converter+ displays its arguments:
502
+ # strip_converter = proc do |field, field_info|
503
+ # p [field, field_info]
504
+ # field.strip
505
+ # end
506
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
507
+ # array = CSV.parse(string, converters: strip_converter)
508
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
509
+ # Output:
510
+ # [" foo ", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
511
+ # [" 0 ", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
512
+ # [" bar ", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
513
+ # [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
514
+ # [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
515
+ # [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
516
+ # Each CSV::Info object shows:
517
+ # - The 0-based field index.
518
+ # - The 1-based line index.
519
+ # - The field header, if any.
520
+ #
521
+ # ===== Stored \Converters
522
+ #
523
+ # A converter may be given a name and stored in a structure where
524
+ # the parsing methods can find it by name.
525
+ #
526
+ # The storage structure for field converters is the \Hash CSV::Converters.
527
+ # It has several built-in converter procs:
528
+ # - <tt>:integer</tt>: converts each \String-embedded integer into a true \Integer.
529
+ # - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
530
+ # - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
531
+ # - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
532
+ # .
533
+ # This example creates a converter proc, then stores it:
534
+ # strip_converter = proc {|field| field.strip }
535
+ # CSV::Converters[:strip] = strip_converter
536
+ # Then the parsing method call can refer to the converter
537
+ # by its name, <tt>:strip</tt>:
538
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
539
+ # array = CSV.parse(string, converters: :strip)
540
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
541
+ #
542
+ # The storage structure for header converters is the \Hash CSV::HeaderConverters,
543
+ # which works in the same way.
544
+ # It also has built-in converter procs:
545
+ # - <tt>:downcase</tt>: Downcases each header.
546
+ # - <tt>:symbol</tt>: Converts each header to a \Symbol.
547
+ #
548
+ # There is no such storage structure for write headers.
549
+ #
550
+ # ===== Converter Lists
551
+ #
552
+ # A _converter_ _list_ is an \Array that may include any assortment of:
553
+ # - Converter procs.
554
+ # - Names of stored converters.
555
+ # - Nested converter lists.
556
+ #
557
+ # Examples:
558
+ # numeric_converters = [:integer, :float]
559
+ # date_converters = [:date, :date_time]
560
+ # [numeric_converters, strip_converter]
561
+ # [strip_converter, date_converters, :float]
562
+ #
563
+ # Like a converter proc, a converter list may be named and stored in either
564
+ # \CSV::Converters or CSV::HeaderConverters:
565
+ # CSV::Converters[:custom] = [strip_converter, date_converters, :float]
566
+ # CSV::HeaderConverters[:custom] = [:downcase, :symbol]
567
+ #
568
+ # There are two built-in converter lists:
569
+ # CSV::Converters[:numeric] # => [:integer, :float]
570
+ # CSV::Converters[:all] # => [:date_time, :numeric]
571
+ #
572
+ # ==== Field \Converters
573
+ #
574
+ # With no conversion, all parsed fields in all rows become Strings:
575
+ # string = "foo,0\nbar,1\nbaz,2\n"
576
+ # ary = CSV.parse(string)
577
+ # ary # => # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
578
+ #
579
+ # When you specify a field converter, each parsed field is passed to the converter;
580
+ # its return value becomes the stored value for the field.
581
+ # A converter might, for example, convert an integer embedded in a \String
582
+ # into a true \Integer.
583
+ # (In fact, that's what built-in field converter +:integer+ does.)
584
+ #
585
+ # There are three ways to use field \converters.
586
+ #
587
+ # - Using option {converters}[#class-CSV-label-Option+converters] with a parsing method:
588
+ # ary = CSV.parse(string, converters: :integer)
589
+ # ary # => [0, 1, 2] # => [["foo", 0], ["bar", 1], ["baz", 2]]
590
+ # - Using option {converters}[#class-CSV-label-Option+converters] with a new \CSV instance:
591
+ # csv = CSV.new(string, converters: :integer)
592
+ # # Field converters in effect:
593
+ # csv.converters # => [:integer]
594
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
595
+ # - Using method #convert to add a field converter to a \CSV instance:
596
+ # csv = CSV.new(string)
597
+ # # Add a converter.
598
+ # csv.convert(:integer)
599
+ # csv.converters # => [:integer]
600
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
601
+ #
602
+ # Installing a field converter does not affect already-read rows:
603
+ # csv = CSV.new(string)
604
+ # csv.shift # => ["foo", "0"]
605
+ # # Add a converter.
606
+ # csv.convert(:integer)
607
+ # csv.converters # => [:integer]
608
+ # csv.read # => [["bar", 1], ["baz", 2]]
609
+ #
610
+ # There are additional built-in \converters, and custom \converters are also supported.
611
+ #
612
+ # ===== Built-In Field \Converters
613
+ #
614
+ # The built-in field converters are in \Hash CSV::Converters:
615
+ # - Each key is a field converter name.
616
+ # - Each value is one of:
617
+ # - A \Proc field converter.
618
+ # - An \Array of field converter names.
619
+ #
620
+ # Display:
621
+ # CSV::Converters.each_pair do |name, value|
622
+ # if value.kind_of?(Proc)
623
+ # p [name, value.class]
624
+ # else
625
+ # p [name, value]
626
+ # end
627
+ # end
628
+ # Output:
629
+ # [:integer, Proc]
630
+ # [:float, Proc]
631
+ # [:numeric, [:integer, :float]]
632
+ # [:date, Proc]
633
+ # [:date_time, Proc]
634
+ # [:all, [:date_time, :numeric]]
635
+ #
636
+ # Each of these converters transcodes values to UTF-8 before attempting conversion.
637
+ # If a value cannot be transcoded to UTF-8 the conversion will
638
+ # fail and the value will remain unconverted.
639
+ #
640
+ # Converter +:integer+ converts each field that Integer() accepts:
641
+ # data = '0,1,2,x'
642
+ # # Without the converter
643
+ # csv = CSV.parse_line(data)
644
+ # csv # => ["0", "1", "2", "x"]
645
+ # # With the converter
646
+ # csv = CSV.parse_line(data, converters: :integer)
647
+ # csv # => [0, 1, 2, "x"]
648
+ #
649
+ # Converter +:float+ converts each field that Float() accepts:
650
+ # data = '1.0,3.14159,x'
651
+ # # Without the converter
652
+ # csv = CSV.parse_line(data)
653
+ # csv # => ["1.0", "3.14159", "x"]
654
+ # # With the converter
655
+ # csv = CSV.parse_line(data, converters: :float)
656
+ # csv # => [1.0, 3.14159, "x"]
657
+ #
658
+ # Converter +:numeric+ converts with both +:integer+ and +:float+..
659
+ #
660
+ # Converter +:date+ converts each field that Date::parse accepts:
661
+ # data = '2001-02-03,x'
662
+ # # Without the converter
663
+ # csv = CSV.parse_line(data)
664
+ # csv # => ["2001-02-03", "x"]
665
+ # # With the converter
666
+ # csv = CSV.parse_line(data, converters: :date)
667
+ # csv # => [#<Date: 2001-02-03 ((2451944j,0s,0n),+0s,2299161j)>, "x"]
668
+ #
669
+ # Converter +:date_time+ converts each field that DateTime::parse accepts:
670
+ # data = '2020-05-07T14:59:00-05:00,x'
671
+ # # Without the converter
672
+ # csv = CSV.parse_line(data)
673
+ # csv # => ["2020-05-07T14:59:00-05:00", "x"]
674
+ # # With the converter
675
+ # csv = CSV.parse_line(data, converters: :date_time)
676
+ # csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"]
677
+ #
678
+ # Converter +:numeric+ converts with both +:date_time+ and +:numeric+..
679
+ #
680
+ # As seen above, method #convert adds \converters to a \CSV instance,
681
+ # and method #converters returns an \Array of the \converters in effect:
682
+ # csv = CSV.new('0,1,2')
683
+ # csv.converters # => []
684
+ # csv.convert(:integer)
685
+ # csv.converters # => [:integer]
686
+ # csv.convert(:date)
687
+ # csv.converters # => [:integer, :date]
688
+ #
689
+ # ===== Custom Field \Converters
690
+ #
691
+ # You can define a custom field converter:
692
+ # strip_converter = proc {|field| field.strip }
693
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
694
+ # array = CSV.parse(string, converters: strip_converter)
695
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
696
+ # You can register the converter in \Converters \Hash,
697
+ # which allows you to refer to it by name:
698
+ # CSV::Converters[:strip] = strip_converter
699
+ # string = " foo , 0 \n bar , 1 \n baz , 2 \n"
700
+ # array = CSV.parse(string, converters: :strip)
701
+ # array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
702
+ #
703
+ # ==== Header \Converters
704
+ #
705
+ # Header converters operate only on headers (and not on other rows).
706
+ #
707
+ # There are three ways to use header \converters;
708
+ # these examples use built-in header converter +:dowhcase+,
709
+ # which downcases each parsed header.
710
+ #
711
+ # - Option +header_converters+ with a singleton parsing method:
712
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
713
+ # tbl = CSV.parse(string, headers: true, header_converters: :downcase)
714
+ # tbl.class # => CSV::Table
715
+ # tbl.headers # => ["name", "count"]
716
+ #
717
+ # - Option +header_converters+ with a new \CSV instance:
718
+ # csv = CSV.new(string, header_converters: :downcase)
719
+ # # Header converters in effect:
720
+ # csv.header_converters # => [:downcase]
721
+ # tbl = CSV.parse(string, headers: true)
722
+ # tbl.headers # => ["Name", "Count"]
723
+ #
724
+ # - Method #header_convert adds a header converter to a \CSV instance:
725
+ # csv = CSV.new(string)
726
+ # # Add a header converter.
727
+ # csv.header_convert(:downcase)
728
+ # csv.header_converters # => [:downcase]
729
+ # tbl = CSV.parse(string, headers: true)
730
+ # tbl.headers # => ["Name", "Count"]
731
+ #
732
+ # ===== Built-In Header \Converters
733
+ #
734
+ # The built-in header \converters are in \Hash CSV::HeaderConverters.
735
+ # The keys there are the names of the \converters:
736
+ # CSV::HeaderConverters.keys # => [:downcase, :symbol]
737
+ #
738
+ # Converter +:downcase+ converts each header by downcasing it:
739
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
740
+ # tbl = CSV.parse(string, headers: true, header_converters: :downcase)
741
+ # tbl.class # => CSV::Table
742
+ # tbl.headers # => ["name", "count"]
743
+ #
744
+ # Converter +:symbol+ converts each header by making it into a \Symbol:
745
+ # string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
746
+ # tbl = CSV.parse(string, headers: true, header_converters: :symbol)
747
+ # tbl.headers # => [:name, :count]
748
+ # Details:
749
+ # - Strips leading and trailing whitespace.
750
+ # - Downcases the header.
751
+ # - Replaces embedded spaces with underscores.
752
+ # - Removes non-word characters.
753
+ # - Makes the string into a \Symbol.
754
+ #
755
+ # ===== Custom Header \Converters
756
+ #
757
+ # You can define a custom header converter:
758
+ # upcase_converter = proc {|header| header.upcase }
759
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
760
+ # table = CSV.parse(string, headers: true, header_converters: upcase_converter)
761
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
762
+ # table.headers # => ["NAME", "VALUE"]
763
+ # You can register the converter in \HeaderConverters \Hash,
764
+ # which allows you to refer to it by name:
765
+ # CSV::HeaderConverters[:upcase] = upcase_converter
766
+ # table = CSV.parse(string, headers: true, header_converters: :upcase)
767
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
768
+ # table.headers # => ["NAME", "VALUE"]
769
+ #
770
+ # ===== Write \Converters
771
+ #
772
+ # When you specify a write converter for generating \CSV,
773
+ # each field to be written is passed to the converter;
774
+ # its return value becomes the new value for the field.
775
+ # A converter might, for example, strip whitespace from a field.
776
+ #
777
+ # Using no write converter (all fields unmodified):
778
+ # output_string = CSV.generate do |csv|
779
+ # csv << [' foo ', 0]
780
+ # csv << [' bar ', 1]
781
+ # csv << [' baz ', 2]
782
+ # end
783
+ # output_string # => " foo ,0\n bar ,1\n baz ,2\n"
784
+ # Using option +write_converters+ with two custom write converters:
785
+ # strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field }
786
+ # upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field }
787
+ # write_converters = [strip_converter, upcase_converter]
788
+ # output_string = CSV.generate(write_converters: write_converters) do |csv|
789
+ # csv << [' foo ', 0]
790
+ # csv << [' bar ', 1]
791
+ # csv << [' baz ', 2]
792
+ # end
793
+ # output_string # => "FOO,0\nBAR,1\nBAZ,2\n"
222
794
  #
223
- # == CSV and Character Encodings (M17n or Multilingualization)
795
+ # === Character Encodings (M17n or Multilingualization)
224
796
  #
225
797
  # This new CSV parser is m17n savvy. The parser works in the Encoding of the IO
226
798
  # or String object being read from or written to. Your data is never transcoded
@@ -301,30 +873,12 @@ class CSV
301
873
  # The encoding used by all converters.
302
874
  ConverterEncoding = Encoding.find("UTF-8")
303
875
 
876
+ # A \Hash containing the names and \Procs for the built-in field converters.
877
+ # See {Built-In Field Converters}[#class-CSV-label-Built-In+Field+Converters].
304
878
  #
305
- # This Hash holds the built-in converters of CSV that can be accessed by name.
306
- # You can select Converters with CSV.convert() or through the +options+ Hash
307
- # passed to CSV::new().
308
- #
309
- # <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
310
- # <b><tt>:float</tt></b>:: Converts any field Float() accepts.
311
- # <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
312
- # and <tt>:float</tt>.
313
- # <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
314
- # <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
315
- # <b><tt>:all</tt></b>:: All built-in converters. A combination of
316
- # <tt>:date_time</tt> and <tt>:numeric</tt>.
317
- #
318
- # All built-in converters transcode field data to UTF-8 before attempting a
319
- # conversion. If your data cannot be transcoded to UTF-8 the conversion will
320
- # fail and the field will remain unchanged.
321
- #
322
- # This Hash is intentionally left unfrozen and users should feel free to add
323
- # values to it that can be accessed by all CSV objects.
324
- #
325
- # To add a combo field, the value should be an Array of names. Combo fields
326
- # can be nested with other combo fields.
327
- #
879
+ # This \Hash is intentionally left unfrozen, and may be extended with
880
+ # custom field converters.
881
+ # See {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters].
328
882
  Converters = {
329
883
  integer: lambda { |f|
330
884
  Integer(f.encode(ConverterEncoding)) rescue f
@@ -352,27 +906,12 @@ class CSV
352
906
  all: [:date_time, :numeric],
353
907
  }
354
908
 
909
+ # A \Hash containing the names and \Procs for the built-in header converters.
910
+ # See {Built-In Header Converters}[#class-CSV-label-Built-In+Header+Converters].
355
911
  #
356
- # This Hash holds the built-in header converters of CSV that can be accessed
357
- # by name. You can select HeaderConverters with CSV.header_convert() or
358
- # through the +options+ Hash passed to CSV::new().
359
- #
360
- # <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
361
- # <b><tt>:symbol</tt></b>:: Leading/trailing spaces are dropped, string is
362
- # downcased, remaining spaces are replaced with
363
- # underscores, non-word characters are dropped,
364
- # and finally to_sym() is called.
365
- #
366
- # All built-in header converters transcode header data to UTF-8 before
367
- # attempting a conversion. If your data cannot be transcoded to UTF-8 the
368
- # conversion will fail and the header will remain unchanged.
369
- #
370
- # This Hash is intentionally left unfrozen and users should feel free to add
371
- # values to it that can be accessed by all CSV objects.
372
- #
373
- # To add a combo field, the value should be an Array of names. Combo fields
374
- # can be nested with other combo fields.
375
- #
912
+ # This \Hash is intentionally left unfrozen, and may be extended with
913
+ # custom field converters.
914
+ # See {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters].
376
915
  HeaderConverters = {
377
916
  downcase: lambda { |h| h.encode(ConverterEncoding).downcase },
378
917
  symbol: lambda { |h|
@@ -380,29 +919,13 @@ class CSV
380
919
  gsub(/\s+/, "_").to_sym
381
920
  }
382
921
  }
383
-
384
- #
385
- # The options used when no overrides are given by calling code. They are:
386
- #
387
- # <b><tt>:col_sep</tt></b>:: <tt>","</tt>
388
- # <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
389
- # <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
390
- # <b><tt>:field_size_limit</tt></b>:: +nil+
391
- # <b><tt>:converters</tt></b>:: +nil+
392
- # <b><tt>:unconverted_fields</tt></b>:: +nil+
393
- # <b><tt>:headers</tt></b>:: +false+
394
- # <b><tt>:return_headers</tt></b>:: +false+
395
- # <b><tt>:header_converters</tt></b>:: +nil+
396
- # <b><tt>:skip_blanks</tt></b>:: +false+
397
- # <b><tt>:force_quotes</tt></b>:: +false+
398
- # <b><tt>:skip_lines</tt></b>:: +nil+
399
- # <b><tt>:liberal_parsing</tt></b>:: +false+
400
- # <b><tt>:quote_empty</tt></b>:: +true+
401
- #
922
+ # Default values for method options.
402
923
  DEFAULT_OPTIONS = {
924
+ # For both parsing and generating.
403
925
  col_sep: ",",
404
926
  row_sep: :auto,
405
927
  quote_char: '"',
928
+ # For parsing.
406
929
  field_size_limit: nil,
407
930
  converters: nil,
408
931
  unconverted_fields: nil,
@@ -410,22 +933,62 @@ class CSV
410
933
  return_headers: false,
411
934
  header_converters: nil,
412
935
  skip_blanks: false,
413
- force_quotes: false,
414
936
  skip_lines: nil,
415
937
  liberal_parsing: false,
938
+ nil_value: nil,
939
+ empty_value: "",
940
+ # For generating.
941
+ write_headers: nil,
416
942
  quote_empty: true,
943
+ force_quotes: false,
944
+ write_converters: nil,
945
+ write_nil_value: nil,
946
+ write_empty_value: "",
947
+ strip: false,
417
948
  }.freeze
418
949
 
419
950
  class << self
951
+ # :call-seq:
952
+ # instance(string, **options)
953
+ # instance(io = $stdout, **options)
954
+ # instance(string, **options) {|csv| ... }
955
+ # instance(io = $stdout, **options) {|csv| ... }
956
+ #
957
+ # Creates or retrieves cached \CSV objects.
958
+ # For arguments and options, see CSV.new.
959
+ #
960
+ # ---
961
+ #
962
+ # With no block given, returns a \CSV object.
963
+ #
964
+ # The first call to +instance+ creates and caches a \CSV object:
965
+ # s0 = 's0'
966
+ # csv0 = CSV.instance(s0)
967
+ # csv0.class # => CSV
968
+ #
969
+ # Subsequent calls to +instance+ with that _same_ +string+ or +io+
970
+ # retrieve that same cached object:
971
+ # csv1 = CSV.instance(s0)
972
+ # csv1.class # => CSV
973
+ # csv1.equal?(csv0) # => true # Same CSV object
420
974
  #
421
- # This method will return a CSV instance, just like CSV::new(), but the
422
- # instance will be cached and returned for all future calls to this method for
423
- # the same +data+ object (tested by Object#object_id()) with the same
424
- # +options+.
975
+ # A subsequent call to +instance+ with a _different_ +string+ or +io+
976
+ # creates and caches a _different_ \CSV object.
977
+ # s1 = 's1'
978
+ # csv2 = CSV.instance(s1)
979
+ # csv2.equal?(csv0) # => false # Different CSV object
425
980
  #
426
- # If a block is given, the instance is passed to the block and the return
427
- # value becomes the return value of the block.
981
+ # All the cached objects remains available:
982
+ # csv3 = CSV.instance(s0)
983
+ # csv3.equal?(csv0) # true # Same CSV object
984
+ # csv4 = CSV.instance(s1)
985
+ # csv4.equal?(csv2) # true # Same CSV object
428
986
  #
987
+ # ---
988
+ #
989
+ # When a block is given, calls the block with the created or retrieved
990
+ # \CSV object; returns the block's return value:
991
+ # CSV.instance(s0) {|csv| :foo } # => :foo
429
992
  def instance(data = $stdout, **options)
430
993
  # create a _signature_ for this method call, data object and options
431
994
  sig = [data.object_id] +
@@ -442,30 +1005,61 @@ class CSV
442
1005
  end
443
1006
  end
444
1007
 
445
- #
446
1008
  # :call-seq:
447
- # filter( **options ) { |row| ... }
448
- # filter( input, **options ) { |row| ... }
449
- # filter( input, output, **options ) { |row| ... }
1009
+ # filter(**options) {|row| ... }
1010
+ # filter(in_string, **options) {|row| ... }
1011
+ # filter(in_io, **options) {|row| ... }
1012
+ # filter(in_string, out_string, **options) {|row| ... }
1013
+ # filter(in_string, out_io, **options) {|row| ... }
1014
+ # filter(in_io, out_string, **options) {|row| ... }
1015
+ # filter(in_io, out_io, **options) {|row| ... }
450
1016
  #
451
- # This method is a convenience for building Unix-like filters for CSV data.
452
- # Each row is yielded to the provided block which can alter it as needed.
453
- # After the block returns, the row is appended to +output+ altered or not.
1017
+ # Reads \CSV input and writes \CSV output.
454
1018
  #
455
- # The +input+ and +output+ arguments can be anything CSV::new() accepts
456
- # (generally String or IO objects). If not given, they default to
457
- # <tt>ARGF</tt> and <tt>$stdout</tt>.
1019
+ # For each input row:
1020
+ # - Forms the data into:
1021
+ # - A CSV::Row object, if headers are in use.
1022
+ # - An \Array of Arrays, otherwise.
1023
+ # - Calls the block with that object.
1024
+ # - Appends the block's return value to the output.
458
1025
  #
459
- # The +options+ parameter is also filtered down to CSV::new() after some
460
- # clever key parsing. Any key beginning with <tt>:in_</tt> or
461
- # <tt>:input_</tt> will have that leading identifier stripped and will only
462
- # be used in the +options+ Hash for the +input+ object. Keys starting with
463
- # <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
464
- # are assigned to both objects.
465
- #
466
- # The <tt>:output_row_sep</tt> +option+ defaults to
467
- # <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
1026
+ # Arguments:
1027
+ # * \CSV source:
1028
+ # * Argument +in_string+, if given, should be a \String object;
1029
+ # it will be put into a new StringIO object positioned at the beginning.
1030
+ # * Argument +in_io+, if given, should be an IO object that is
1031
+ # open for reading; on return, the IO object will be closed.
1032
+ # * If neither +in_string+ nor +in_io+ is given,
1033
+ # the input stream defaults to {ARGF}[https://ruby-doc.org/core/ARGF.html].
1034
+ # * \CSV output:
1035
+ # * Argument +out_string+, if given, should be a \String object;
1036
+ # it will be put into a new StringIO object positioned at the beginning.
1037
+ # * Argument +out_io+, if given, should be an IO object that is
1038
+ # ppen for writing; on return, the IO object will be closed.
1039
+ # * If neither +out_string+ nor +out_io+ is given,
1040
+ # the output stream defaults to <tt>$stdout</tt>.
1041
+ # * Argument +options+ should be keyword arguments.
1042
+ # - Each argument name that is prefixed with +in_+ or +input_+
1043
+ # is stripped of its prefix and is treated as an option
1044
+ # for parsing the input.
1045
+ # Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
1046
+ # - Each argument name that is prefixed with +out_+ or +output_+
1047
+ # is stripped of its prefix and is treated as an option
1048
+ # for generating the output.
1049
+ # Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
1050
+ # - Each argument not prefixed as above is treated as an option
1051
+ # both for parsing the input and for generating the output.
1052
+ # - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1053
+ # and {Options for Generating}[#class-CSV-label-Options+for+Generating].
468
1054
  #
1055
+ # Example:
1056
+ # in_string = "foo,0\nbar,1\nbaz,2\n"
1057
+ # out_string = ''
1058
+ # CSV.filter(in_string, out_string) do |row|
1059
+ # row[0] = row[0].upcase
1060
+ # row[1] *= 4
1061
+ # end
1062
+ # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
469
1063
  def filter(input=nil, output=nil, **options)
470
1064
  # parse options for input, output, or both
471
1065
  in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR}
@@ -480,10 +1074,29 @@ class CSV
480
1074
  out_options[key] = value
481
1075
  end
482
1076
  end
1077
+
483
1078
  # build input and output wrappers
484
- input = new(input || ARGF, **in_options)
1079
+ input = new(input || ARGF, **in_options)
485
1080
  output = new(output || $stdout, **out_options)
486
1081
 
1082
+ # process headers
1083
+ need_manual_header_output =
1084
+ (in_options[:headers] and
1085
+ out_options[:headers] == true and
1086
+ out_options[:write_headers])
1087
+ if need_manual_header_output
1088
+ first_row = input.shift
1089
+ if first_row
1090
+ if first_row.is_a?(Row)
1091
+ headers = first_row.headers
1092
+ yield headers
1093
+ output << headers
1094
+ end
1095
+ yield first_row
1096
+ output << first_row
1097
+ end
1098
+ end
1099
+
487
1100
  # read, yield, write
488
1101
  input.each do |row|
489
1102
  yield row
@@ -492,18 +1105,111 @@ class CSV
492
1105
  end
493
1106
 
494
1107
  #
495
- # This method is intended as the primary interface for reading CSV files. You
496
- # pass a +path+ and any +options+ you wish to set for the read. Each row of
497
- # file will be passed to the provided +block+ in turn.
1108
+ # :call-seq:
1109
+ # foreach(path, mode='r', **options) {|row| ... )
1110
+ # foreach(io, mode='r', **options {|row| ... )
1111
+ # foreach(path, mode='r', headers: ..., **options) {|row| ... )
1112
+ # foreach(io, mode='r', headers: ..., **options {|row| ... )
1113
+ # foreach(path, mode='r', **options) -> new_enumerator
1114
+ # foreach(io, mode='r', **options -> new_enumerator
1115
+ #
1116
+ # Calls the block with each row read from source +path+ or +io+.
1117
+ #
1118
+ # * Argument +path+, if given, must be the path to a file.
1119
+ # :include: ../doc/csv/arguments/io.rdoc
1120
+ # * Argument +mode+, if given, must be a \File mode
1121
+ # See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
1122
+ # * Arguments <tt>**options</tt> must be keyword options.
1123
+ # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1124
+ # * This method optionally accepts an additional <tt>:encoding</tt> option
1125
+ # that you can use to specify the Encoding of the data read from +path+ or +io+.
1126
+ # You must provide this unless your data is in the encoding
1127
+ # given by <tt>Encoding::default_external</tt>.
1128
+ # Parsing will use this to determine how to parse the data.
1129
+ # You may provide a second Encoding to
1130
+ # have the data transcoded as it is read. For example,
1131
+ # encoding: 'UTF-32BE:UTF-8'
1132
+ # would read +UTF-32BE+ data from the file
1133
+ # but transcode it to +UTF-8+ before parsing.
1134
+ #
1135
+ # ====== Without Option +headers+
1136
+ #
1137
+ # Without option +headers+, returns each row as an \Array object.
1138
+ #
1139
+ # These examples assume prior execution of:
1140
+ # string = "foo,0\nbar,1\nbaz,2\n"
1141
+ # path = 't.csv'
1142
+ # File.write(path, string)
1143
+ #
1144
+ # Read rows from a file at +path+:
1145
+ # CSV.foreach(path) {|row| p row }
1146
+ # Output:
1147
+ # ["foo", "0"]
1148
+ # ["bar", "1"]
1149
+ # ["baz", "2"]
1150
+ #
1151
+ # Read rows from an \IO object:
1152
+ # File.open(path) do |file|
1153
+ # CSV.foreach(file) {|row| p row }
1154
+ # end
498
1155
  #
499
- # The +options+ parameter can be anything CSV::new() understands. This method
500
- # also understands an additional <tt>:encoding</tt> parameter that you can use
501
- # to specify the Encoding of the data in the file to be read. You must provide
502
- # this unless your data is in Encoding::default_external(). CSV will use this
503
- # to determine how to parse the data. You may provide a second Encoding to
504
- # have the data transcoded as it is read. For example,
505
- # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
506
- # but transcode it to UTF-8 before CSV parses it.
1156
+ # Output:
1157
+ # ["foo", "0"]
1158
+ # ["bar", "1"]
1159
+ # ["baz", "2"]
1160
+ #
1161
+ # Returns a new \Enumerator if no block given:
1162
+ # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
1163
+ # CSV.foreach(File.open(path)) # => #<Enumerator: CSV:foreach(#<File:t.csv>, "r")>
1164
+ #
1165
+ # Issues a warning if an encoding is unsupported:
1166
+ # CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
1167
+ # Output:
1168
+ # warning: Unsupported encoding foo ignored
1169
+ # warning: Unsupported encoding bar ignored
1170
+ #
1171
+ # ====== With Option +headers+
1172
+ #
1173
+ # With {option +headers+}[#class-CSV-label-Option+headers],
1174
+ # returns each row as a CSV::Row object.
1175
+ #
1176
+ # These examples assume prior execution of:
1177
+ # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1178
+ # path = 't.csv'
1179
+ # File.write(path, string)
1180
+ #
1181
+ # Read rows from a file at +path+:
1182
+ # CSV.foreach(path, headers: true) {|row| p row }
1183
+ #
1184
+ # Output:
1185
+ # #<CSV::Row "Name":"foo" "Count":"0">
1186
+ # #<CSV::Row "Name":"bar" "Count":"1">
1187
+ # #<CSV::Row "Name":"baz" "Count":"2">
1188
+ #
1189
+ # Read rows from an \IO object:
1190
+ # File.open(path) do |file|
1191
+ # CSV.foreach(file, headers: true) {|row| p row }
1192
+ # end
1193
+ #
1194
+ # Output:
1195
+ # #<CSV::Row "Name":"foo" "Count":"0">
1196
+ # #<CSV::Row "Name":"bar" "Count":"1">
1197
+ # #<CSV::Row "Name":"baz" "Count":"2">
1198
+ #
1199
+ # ---
1200
+ #
1201
+ # Raises an exception if +path+ is a \String, but not the path to a readable file:
1202
+ # # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv):
1203
+ # CSV.foreach('nosuch.csv') {|row| }
1204
+ #
1205
+ # Raises an exception if +io+ is an \IO object, but not open for reading:
1206
+ # io = File.open(path, 'w') {|row| }
1207
+ # # Raises TypeError (no implicit conversion of nil into String):
1208
+ # CSV.foreach(io) {|row| }
1209
+ #
1210
+ # Raises an exception if +mode+ is invalid:
1211
+ # # Raises ArgumentError (invalid access mode nosuch):
1212
+ # CSV.foreach(path, 'nosuch') {|row| }
507
1213
  #
508
1214
  def foreach(path, mode="r", **options, &block)
509
1215
  return to_enum(__method__, path, mode, **options) unless block_given?
@@ -514,21 +1220,62 @@ class CSV
514
1220
 
515
1221
  #
516
1222
  # :call-seq:
517
- # generate( str, **options ) { |csv| ... }
518
- # generate( **options ) { |csv| ... }
1223
+ # generate(csv_string, **options) {|csv| ... }
1224
+ # generate(**options) {|csv| ... }
1225
+ #
1226
+ # * Argument +csv_string+, if given, must be a \String object;
1227
+ # defaults to a new empty \String.
1228
+ # * Arguments +options+, if given, should be generating options.
1229
+ # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
1230
+ #
1231
+ # ---
519
1232
  #
520
- # This method wraps a String you provide, or an empty default String, in a
521
- # CSV object which is passed to the provided block. You can use the block to
522
- # append CSV rows to the String and when the block exits, the final String
523
- # will be returned.
1233
+ # Creates a new \CSV object via <tt>CSV.new(csv_string, **options)</tt>;
1234
+ # calls the block with the \CSV object, which the block may modify;
1235
+ # returns the \String generated from the \CSV object.
524
1236
  #
525
- # Note that a passed String *is* modified by this method. Call dup() before
526
- # passing if you need a new String.
1237
+ # Note that a passed \String *is* modified by this method.
1238
+ # Pass <tt>csv_string</tt>.dup if the \String must be preserved.
527
1239
  #
528
- # The +options+ parameter can be anything CSV::new() understands. This method
529
- # understands an additional <tt>:encoding</tt> parameter when not passed a
530
- # String to set the base Encoding for the output. CSV needs this hint if you
531
- # plan to output non-ASCII compatible data.
1240
+ # This method has one additional option: <tt>:encoding</tt>,
1241
+ # which sets the base Encoding for the output if no no +str+ is specified.
1242
+ # CSV needs this hint if you plan to output non-ASCII compatible data.
1243
+ #
1244
+ # ---
1245
+ #
1246
+ # Add lines:
1247
+ # input_string = "foo,0\nbar,1\nbaz,2\n"
1248
+ # output_string = CSV.generate(input_string) do |csv|
1249
+ # csv << ['bat', 3]
1250
+ # csv << ['bam', 4]
1251
+ # end
1252
+ # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1253
+ # input_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1254
+ # output_string.equal?(input_string) # => true # Same string, modified
1255
+ #
1256
+ # Add lines into new string, preserving old string:
1257
+ # input_string = "foo,0\nbar,1\nbaz,2\n"
1258
+ # output_string = CSV.generate(input_string.dup) do |csv|
1259
+ # csv << ['bat', 3]
1260
+ # csv << ['bam', 4]
1261
+ # end
1262
+ # output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
1263
+ # input_string # => "foo,0\nbar,1\nbaz,2\n"
1264
+ # output_string.equal?(input_string) # => false # Different strings
1265
+ #
1266
+ # Create lines from nothing:
1267
+ # output_string = CSV.generate do |csv|
1268
+ # csv << ['foo', 0]
1269
+ # csv << ['bar', 1]
1270
+ # csv << ['baz', 2]
1271
+ # end
1272
+ # output_string # => "foo,0\nbar,1\nbaz,2\n"
1273
+ #
1274
+ # ---
1275
+ #
1276
+ # Raises an exception if +csv_string+ is not a \String object:
1277
+ # # Raises TypeError (no implicit conversion of Integer into String)
1278
+ # CSV.generate(0)
532
1279
  #
533
1280
  def generate(str=nil, **options)
534
1281
  encoding = options[:encoding]
@@ -546,97 +1293,140 @@ class CSV
546
1293
  csv.string # return final String
547
1294
  end
548
1295
 
1296
+ # :call-seq:
1297
+ # CSV.generate_line(ary)
1298
+ # CSV.generate_line(ary, **options)
1299
+ #
1300
+ # Returns the \String created by generating \CSV from +ary+
1301
+ # using the specified +options+.
549
1302
  #
550
- # This method is a shortcut for converting a single row (Array) into a CSV
551
- # String.
1303
+ # Argument +ary+ must be an \Array.
552
1304
  #
553
- # The +options+ parameter can be anything CSV::new() understands. This method
554
- # understands an additional <tt>:encoding</tt> parameter to set the base
555
- # Encoding for the output. This method will try to guess your Encoding from
556
- # the first non-+nil+ field in +row+, if possible, but you may need to use
557
- # this parameter as a backup plan.
1305
+ # Special options:
1306
+ # * Option <tt>:row_sep</tt> defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1307
+ # (<tt>$/</tt>).:
1308
+ # $INPUT_RECORD_SEPARATOR # => "\n"
1309
+ # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
1310
+ # Encoding for the output. This method will try to guess your Encoding from
1311
+ # the first non-+nil+ field in +row+, if possible, but you may need to use
1312
+ # this parameter as a backup plan.
558
1313
  #
559
- # The <tt>:row_sep</tt> +option+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
560
- # (<tt>$/</tt>) when calling this method.
1314
+ # For other +options+,
1315
+ # see {Options for Generating}[#class-CSV-label-Options+for+Generating].
1316
+ #
1317
+ # ---
1318
+ #
1319
+ # Returns the \String generated from an \Array:
1320
+ # CSV.generate_line(['foo', '0']) # => "foo,0\n"
1321
+ #
1322
+ # ---
1323
+ #
1324
+ # Raises an exception if +ary+ is not an \Array:
1325
+ # # Raises NoMethodError (undefined method `find' for :foo:Symbol)
1326
+ # CSV.generate_line(:foo)
561
1327
  #
562
1328
  def generate_line(row, **options)
563
1329
  options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
564
1330
  str = +""
565
1331
  if options[:encoding]
566
1332
  str.force_encoding(options[:encoding])
567
- elsif field = row.find {|f| f.is_a?(String)}
568
- str.force_encoding(field.encoding)
1333
+ else
1334
+ fallback_encoding = nil
1335
+ output_encoding = nil
1336
+ row.each do |field|
1337
+ next unless field.is_a?(String)
1338
+ fallback_encoding ||= field.encoding
1339
+ next if field.ascii_only?
1340
+ output_encoding = field.encoding
1341
+ break
1342
+ end
1343
+ output_encoding ||= fallback_encoding
1344
+ if output_encoding
1345
+ str.force_encoding(output_encoding)
1346
+ end
569
1347
  end
570
1348
  (new(str, **options) << row).string
571
1349
  end
572
1350
 
573
1351
  #
574
1352
  # :call-seq:
575
- # open( filename, mode = "rb", **options ) { |faster_csv| ... }
576
- # open( filename, **options ) { |faster_csv| ... }
577
- # open( filename, mode = "rb", **options )
578
- # open( filename, **options )
579
- #
580
- # This method opens an IO object, and wraps that with CSV. This is intended
581
- # as the primary interface for writing a CSV file.
582
- #
583
- # You must pass a +filename+ and may optionally add a +mode+ for Ruby's
584
- # open(). You may also pass an optional Hash containing any +options+
585
- # CSV::new() understands as the final argument.
586
- #
587
- # This method works like Ruby's open() call, in that it will pass a CSV object
588
- # to a provided block and close it when the block terminates, or it will
589
- # return the CSV object when no block is provided. (*Note*: This is different
590
- # from the Ruby 1.8 CSV library which passed rows to the block. Use
591
- # CSV::foreach() for that behavior.)
592
- #
593
- # You must provide a +mode+ with an embedded Encoding designator unless your
594
- # data is in Encoding::default_external(). CSV will check the Encoding of the
595
- # underlying IO object (set by the +mode+ you pass) to determine how to parse
596
- # the data. You may provide a second Encoding to have the data transcoded as
597
- # it is read just as you can with a normal call to IO::open(). For example,
598
- # <tt>"rb:UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file but
599
- # transcode it to UTF-8 before CSV parses it.
600
- #
601
- # An opened CSV object will delegate to many IO methods for convenience. You
602
- # may call:
603
- #
604
- # * binmode()
605
- # * binmode?()
606
- # * close()
607
- # * close_read()
608
- # * close_write()
609
- # * closed?()
610
- # * eof()
611
- # * eof?()
612
- # * external_encoding()
613
- # * fcntl()
614
- # * fileno()
615
- # * flock()
616
- # * flush()
617
- # * fsync()
618
- # * internal_encoding()
619
- # * ioctl()
620
- # * isatty()
621
- # * path()
622
- # * pid()
623
- # * pos()
624
- # * pos=()
625
- # * reopen()
626
- # * seek()
627
- # * stat()
628
- # * sync()
629
- # * sync=()
630
- # * tell()
631
- # * to_i()
632
- # * to_io()
633
- # * truncate()
634
- # * tty?()
1353
+ # open(file_path, mode = "rb", **options ) -> new_csv
1354
+ # open(io, mode = "rb", **options ) -> new_csv
1355
+ # open(file_path, mode = "rb", **options ) { |csv| ... } -> object
1356
+ # open(io, mode = "rb", **options ) { |csv| ... } -> object
635
1357
  #
1358
+ # possible options elements:
1359
+ # hash form:
1360
+ # :invalid => nil # raise error on invalid byte sequence (default)
1361
+ # :invalid => :replace # replace invalid byte sequence
1362
+ # :undef => :replace # replace undefined conversion
1363
+ # :replace => string # replacement string ("?" or "\uFFFD" if not specified)
1364
+ #
1365
+ # * Argument +path+, if given, must be the path to a file.
1366
+ # :include: ../doc/csv/arguments/io.rdoc
1367
+ # * Argument +mode+, if given, must be a \File mode
1368
+ # See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
1369
+ # * Arguments <tt>**options</tt> must be keyword options.
1370
+ # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
1371
+ # * This method optionally accepts an additional <tt>:encoding</tt> option
1372
+ # that you can use to specify the Encoding of the data read from +path+ or +io+.
1373
+ # You must provide this unless your data is in the encoding
1374
+ # given by <tt>Encoding::default_external</tt>.
1375
+ # Parsing will use this to determine how to parse the data.
1376
+ # You may provide a second Encoding to
1377
+ # have the data transcoded as it is read. For example,
1378
+ # encoding: 'UTF-32BE:UTF-8'
1379
+ # would read +UTF-32BE+ data from the file
1380
+ # but transcode it to +UTF-8+ before parsing.
1381
+ #
1382
+ # ---
1383
+ #
1384
+ # These examples assume prior execution of:
1385
+ # string = "foo,0\nbar,1\nbaz,2\n"
1386
+ # path = 't.csv'
1387
+ # File.write(path, string)
1388
+ #
1389
+ # ---
1390
+ #
1391
+ # With no block given, returns a new \CSV object.
1392
+ #
1393
+ # Create a \CSV object using a file path:
1394
+ # csv = CSV.open(path)
1395
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1396
+ #
1397
+ # Create a \CSV object using an open \File:
1398
+ # csv = CSV.open(File.open(path))
1399
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1400
+ #
1401
+ # ---
1402
+ #
1403
+ # With a block given, calls the block with the created \CSV object;
1404
+ # returns the block's return value:
1405
+ #
1406
+ # Using a file path:
1407
+ # csv = CSV.open(path) {|csv| p csv}
1408
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1409
+ # Output:
1410
+ # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1411
+ #
1412
+ # Using an open \File:
1413
+ # csv = CSV.open(File.open(path)) {|csv| p csv}
1414
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1415
+ # Output:
1416
+ # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1417
+ #
1418
+ # ---
1419
+ #
1420
+ # Raises an exception if the argument is not a \String object or \IO object:
1421
+ # # Raises TypeError (no implicit conversion of Symbol into String)
1422
+ # CSV.open(:foo)
636
1423
  def open(filename, mode="r", **options)
637
1424
  # wrap a File opened with the remaining +args+ with no newline
638
1425
  # decorator
639
1426
  file_opts = {universal_newline: false}.merge(options)
1427
+ options.delete(:invalid)
1428
+ options.delete(:undef)
1429
+ options.delete(:replace)
640
1430
 
641
1431
  begin
642
1432
  f = File.open(filename, mode, **file_opts)
@@ -667,16 +1457,116 @@ class CSV
667
1457
 
668
1458
  #
669
1459
  # :call-seq:
670
- # parse( str, **options ) { |row| ... }
671
- # parse( str, **options )
1460
+ # parse(string) -> array_of_arrays
1461
+ # parse(io) -> array_of_arrays
1462
+ # parse(string, headers: ..., **options) -> csv_table
1463
+ # parse(io, headers: ..., **options) -> csv_table
1464
+ # parse(string, **options) {|row| ... }
1465
+ # parse(io, **options) {|row| ... }
1466
+ #
1467
+ # Parses +string+ or +io+ using the specified +options+.
1468
+ #
1469
+ # - Argument +string+ should be a \String object;
1470
+ # it will be put into a new StringIO object positioned at the beginning.
1471
+ # :include: ../doc/csv/arguments/io.rdoc
1472
+ # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
672
1473
  #
673
- # This method can be used to easily parse CSV out of a String. You may either
674
- # provide a +block+ which will be called with each row of the String in turn,
675
- # or just use the returned Array of Arrays (when no +block+ is given).
1474
+ # ====== Without Option +headers+
676
1475
  #
677
- # You pass your +str+ to read from, and an optional +options+ containing
678
- # anything CSV::new() understands.
1476
+ # Without {option +headers+}[#class-CSV-label-Option+headers] case.
679
1477
  #
1478
+ # These examples assume prior execution of:
1479
+ # string = "foo,0\nbar,1\nbaz,2\n"
1480
+ # path = 't.csv'
1481
+ # File.write(path, string)
1482
+ #
1483
+ # ---
1484
+ #
1485
+ # With no block given, returns an \Array of Arrays formed from the source.
1486
+ #
1487
+ # Parse a \String:
1488
+ # a_of_a = CSV.parse(string)
1489
+ # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
1490
+ #
1491
+ # Parse an open \File:
1492
+ # a_of_a = File.open(path) do |file|
1493
+ # CSV.parse(file)
1494
+ # end
1495
+ # a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
1496
+ #
1497
+ # ---
1498
+ #
1499
+ # With a block given, calls the block with each parsed row:
1500
+ #
1501
+ # Parse a \String:
1502
+ # CSV.parse(string) {|row| p row }
1503
+ #
1504
+ # Output:
1505
+ # ["foo", "0"]
1506
+ # ["bar", "1"]
1507
+ # ["baz", "2"]
1508
+ #
1509
+ # Parse an open \File:
1510
+ # File.open(path) do |file|
1511
+ # CSV.parse(file) {|row| p row }
1512
+ # end
1513
+ #
1514
+ # Output:
1515
+ # ["foo", "0"]
1516
+ # ["bar", "1"]
1517
+ # ["baz", "2"]
1518
+ #
1519
+ # ====== With Option +headers+
1520
+ #
1521
+ # With {option +headers+}[#class-CSV-label-Option+headers] case.
1522
+ #
1523
+ # These examples assume prior execution of:
1524
+ # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1525
+ # path = 't.csv'
1526
+ # File.write(path, string)
1527
+ #
1528
+ # ---
1529
+ #
1530
+ # With no block given, returns a CSV::Table object formed from the source.
1531
+ #
1532
+ # Parse a \String:
1533
+ # csv_table = CSV.parse(string, headers: ['Name', 'Count'])
1534
+ # csv_table # => #<CSV::Table mode:col_or_row row_count:5>
1535
+ #
1536
+ # Parse an open \File:
1537
+ # csv_table = File.open(path) do |file|
1538
+ # CSV.parse(file, headers: ['Name', 'Count'])
1539
+ # end
1540
+ # csv_table # => #<CSV::Table mode:col_or_row row_count:4>
1541
+ #
1542
+ # ---
1543
+ #
1544
+ # With a block given, calls the block with each parsed row,
1545
+ # which has been formed into a CSV::Row object:
1546
+ #
1547
+ # Parse a \String:
1548
+ # CSV.parse(string, headers: ['Name', 'Count']) {|row| p row }
1549
+ #
1550
+ # Output:
1551
+ # # <CSV::Row "Name":"foo" "Count":"0">
1552
+ # # <CSV::Row "Name":"bar" "Count":"1">
1553
+ # # <CSV::Row "Name":"baz" "Count":"2">
1554
+ #
1555
+ # Parse an open \File:
1556
+ # File.open(path) do |file|
1557
+ # CSV.parse(file, headers: ['Name', 'Count']) {|row| p row }
1558
+ # end
1559
+ #
1560
+ # Output:
1561
+ # # <CSV::Row "Name":"foo" "Count":"0">
1562
+ # # <CSV::Row "Name":"bar" "Count":"1">
1563
+ # # <CSV::Row "Name":"baz" "Count":"2">
1564
+ #
1565
+ # ---
1566
+ #
1567
+ # Raises an exception if the argument is not a \String object or \IO object:
1568
+ # # Raises NoMethodError (undefined method `close' for :foo:Symbol)
1569
+ # CSV.parse(:foo)
680
1570
  def parse(str, **options, &block)
681
1571
  csv = new(str, **options)
682
1572
 
@@ -690,44 +1580,117 @@ class CSV
690
1580
  end
691
1581
  end
692
1582
 
1583
+ # :call-seq:
1584
+ # CSV.parse_line(string) -> new_array or nil
1585
+ # CSV.parse_line(io) -> new_array or nil
1586
+ # CSV.parse_line(string, **options) -> new_array or nil
1587
+ # CSV.parse_line(io, **options) -> new_array or nil
1588
+ # CSV.parse_line(string, headers: true, **options) -> csv_row or nil
1589
+ # CSV.parse_line(io, headers: true, **options) -> csv_row or nil
1590
+ #
1591
+ # Returns the data created by parsing the first line of +string+ or +io+
1592
+ # using the specified +options+.
1593
+ #
1594
+ # - Argument +string+ should be a \String object;
1595
+ # it will be put into a new StringIO object positioned at the beginning.
1596
+ # :include: ../doc/csv/arguments/io.rdoc
1597
+ # - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1598
+ #
1599
+ # ====== Without Option +headers+
1600
+ #
1601
+ # Without option +headers+, returns the first row as a new \Array.
693
1602
  #
694
- # This method is a shortcut for converting a single line of a CSV String into
695
- # an Array. Note that if +line+ contains multiple rows, anything beyond the
696
- # first row is ignored.
1603
+ # These examples assume prior execution of:
1604
+ # string = "foo,0\nbar,1\nbaz,2\n"
1605
+ # path = 't.csv'
1606
+ # File.write(path, string)
697
1607
  #
698
- # The +options+ parameter can be anything CSV::new() understands.
1608
+ # Parse the first line from a \String object:
1609
+ # CSV.parse_line(string) # => ["foo", "0"]
1610
+ #
1611
+ # Parse the first line from a File object:
1612
+ # File.open(path) do |file|
1613
+ # CSV.parse_line(file) # => ["foo", "0"]
1614
+ # end # => ["foo", "0"]
1615
+ #
1616
+ # Returns +nil+ if the argument is an empty \String:
1617
+ # CSV.parse_line('') # => nil
1618
+ #
1619
+ # ====== With Option +headers+
1620
+ #
1621
+ # With {option +headers+}[#class-CSV-label-Option+headers],
1622
+ # returns the first row as a CSV::Row object.
1623
+ #
1624
+ # These examples assume prior execution of:
1625
+ # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1626
+ # path = 't.csv'
1627
+ # File.write(path, string)
1628
+ #
1629
+ # Parse the first line from a \String object:
1630
+ # CSV.parse_line(string, headers: true) # => #<CSV::Row "Name":"foo" "Count":"0">
1631
+ #
1632
+ # Parse the first line from a File object:
1633
+ # File.open(path) do |file|
1634
+ # CSV.parse_line(file, headers: true)
1635
+ # end # => #<CSV::Row "Name":"foo" "Count":"0">
1636
+ #
1637
+ # ---
1638
+ #
1639
+ # Raises an exception if the argument is +nil+:
1640
+ # # Raises ArgumentError (Cannot parse nil as CSV):
1641
+ # CSV.parse_line(nil)
699
1642
  #
700
1643
  def parse_line(line, **options)
701
- new(line, **options).shift
1644
+ new(line, **options).each.first
702
1645
  end
703
1646
 
704
1647
  #
705
- # Use to slurp a CSV file into an Array of Arrays. Pass the +path+ to the
706
- # file and any +options+ CSV::new() understands. This method also understands
707
- # an additional <tt>:encoding</tt> parameter that you can use to specify the
708
- # Encoding of the data in the file to be read. You must provide this unless
709
- # your data is in Encoding::default_external(). CSV will use this to determine
710
- # how to parse the data. You may provide a second Encoding to have the data
711
- # transcoded as it is read. For example,
712
- # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
713
- # but transcode it to UTF-8 before CSV parses it.
1648
+ # :call-seq:
1649
+ # read(source, **options) -> array_of_arrays
1650
+ # read(source, headers: true, **options) -> csv_table
1651
+ #
1652
+ # Opens the given +source+ with the given +options+ (see CSV.open),
1653
+ # reads the source (see CSV#read), and returns the result,
1654
+ # which will be either an \Array of Arrays or a CSV::Table.
1655
+ #
1656
+ # Without headers:
1657
+ # string = "foo,0\nbar,1\nbaz,2\n"
1658
+ # path = 't.csv'
1659
+ # File.write(path, string)
1660
+ # CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
714
1661
  #
1662
+ # With headers:
1663
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1664
+ # path = 't.csv'
1665
+ # File.write(path, string)
1666
+ # CSV.read(path, headers: true) # => #<CSV::Table mode:col_or_row row_count:4>
715
1667
  def read(path, **options)
716
1668
  open(path, **options) { |csv| csv.read }
717
1669
  end
718
1670
 
719
- # Alias for CSV::read().
1671
+ # :call-seq:
1672
+ # CSV.readlines(source, **options)
1673
+ #
1674
+ # Alias for CSV.read.
720
1675
  def readlines(path, **options)
721
1676
  read(path, **options)
722
1677
  end
723
1678
 
1679
+ # :call-seq:
1680
+ # CSV.table(source, **options)
724
1681
  #
725
- # A shortcut for:
1682
+ # Calls CSV.read with +source+, +options+, and certain default options:
1683
+ # - +headers+: +true+
1684
+ # - +converbers+: +:numeric+
1685
+ # - +header_converters+: +:symbol+
726
1686
  #
727
- # CSV.read( path, { headers: true,
728
- # converters: :numeric,
729
- # header_converters: :symbol }.merge(options) )
1687
+ # Returns a CSV::Table object.
730
1688
  #
1689
+ # Example:
1690
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1691
+ # path = 't.csv'
1692
+ # File.write(path, string)
1693
+ # CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:4>
731
1694
  def table(path, **options)
732
1695
  default_options = {
733
1696
  headers: true,
@@ -739,185 +1702,43 @@ class CSV
739
1702
  end
740
1703
  end
741
1704
 
1705
+ # :call-seq:
1706
+ # CSV.new(string)
1707
+ # CSV.new(io)
1708
+ # CSV.new(string, **options)
1709
+ # CSV.new(io, **options)
1710
+ #
1711
+ # Returns the new \CSV object created using +string+ or +io+
1712
+ # and the specified +options+.
1713
+ #
1714
+ # - Argument +string+ should be a \String object;
1715
+ # it will be put into a new StringIO object positioned at the beginning.
1716
+ # :include: ../doc/csv/arguments/io.rdoc
1717
+ # - Argument +options+: See:
1718
+ # * {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1719
+ # * {Options for Generating}[#class-CSV-label-Options+for+Generating]
1720
+ # For performance reasons, the options cannot be overridden
1721
+ # in a \CSV object, so those specified here will endure.
1722
+ #
1723
+ # In addition to the \CSV instance methods, several \IO methods are delegated.
1724
+ # See {Delegated Methods}[#class-CSV-label-Delegated+Methods].
1725
+ #
1726
+ # ---
1727
+ #
1728
+ # Create a \CSV object from a \String object:
1729
+ # csv = CSV.new('foo,0')
1730
+ # csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1731
+ #
1732
+ # Create a \CSV object from a \File object:
1733
+ # File.write('t.csv', 'foo,0')
1734
+ # csv = CSV.new(File.open('t.csv'))
1735
+ # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1736
+ #
1737
+ # ---
742
1738
  #
743
- # This constructor will wrap either a String or IO object passed in +data+ for
744
- # reading and/or writing. In addition to the CSV instance methods, several IO
745
- # methods are delegated. (See CSV::open() for a complete list.) If you pass
746
- # a String for +data+, you can later retrieve it (after writing to it, for
747
- # example) with CSV.string().
748
- #
749
- # Note that a wrapped String will be positioned at the beginning (for
750
- # reading). If you want it at the end (for writing), use CSV::generate().
751
- # If you want any other positioning, pass a preset StringIO object instead.
752
- #
753
- # You may set any reading and/or writing preferences in the +options+ Hash.
754
- # Available options are:
755
- #
756
- # <b><tt>:col_sep</tt></b>:: The String placed between each field.
757
- # This String will be transcoded into
758
- # the data's Encoding before parsing.
759
- # <b><tt>:row_sep</tt></b>:: The String appended to the end of each
760
- # row. This can be set to the special
761
- # <tt>:auto</tt> setting, which requests
762
- # that CSV automatically discover this
763
- # from the data. Auto-discovery reads
764
- # ahead in the data looking for the next
765
- # <tt>"\r\n"</tt>, <tt>"\n"</tt>, or
766
- # <tt>"\r"</tt> sequence. A sequence
767
- # will be selected even if it occurs in
768
- # a quoted field, assuming that you
769
- # would have the same line endings
770
- # there. If none of those sequences is
771
- # found, +data+ is <tt>ARGF</tt>,
772
- # <tt>STDIN</tt>, <tt>STDOUT</tt>, or
773
- # <tt>STDERR</tt>, or the stream is only
774
- # available for output, the default
775
- # <tt>$INPUT_RECORD_SEPARATOR</tt>
776
- # (<tt>$/</tt>) is used. Obviously,
777
- # discovery takes a little time. Set
778
- # manually if speed is important. Also
779
- # note that IO objects should be opened
780
- # in binary mode on Windows if this
781
- # feature will be used as the
782
- # line-ending translation can cause
783
- # problems with resetting the document
784
- # position to where it was before the
785
- # read ahead. This String will be
786
- # transcoded into the data's Encoding
787
- # before parsing.
788
- # <b><tt>:quote_char</tt></b>:: The character used to quote fields.
789
- # This has to be a single character
790
- # String. This is useful for
791
- # application that incorrectly use
792
- # <tt>'</tt> as the quote character
793
- # instead of the correct <tt>"</tt>.
794
- # CSV will always consider a double
795
- # sequence of this character to be an
796
- # escaped quote. This String will be
797
- # transcoded into the data's Encoding
798
- # before parsing.
799
- # <b><tt>:field_size_limit</tt></b>:: This is a maximum size CSV will read
800
- # ahead looking for the closing quote
801
- # for a field. (In truth, it reads to
802
- # the first line ending beyond this
803
- # size.) If a quote cannot be found
804
- # within the limit CSV will raise a
805
- # MalformedCSVError, assuming the data
806
- # is faulty. You can use this limit to
807
- # prevent what are effectively DoS
808
- # attacks on the parser. However, this
809
- # limit can cause a legitimate parse to
810
- # fail and thus is set to +nil+, or off,
811
- # by default.
812
- # <b><tt>:converters</tt></b>:: An Array of names from the Converters
813
- # Hash and/or lambdas that handle custom
814
- # conversion. A single converter
815
- # doesn't have to be in an Array. All
816
- # built-in converters try to transcode
817
- # fields to UTF-8 before converting.
818
- # The conversion will fail if the data
819
- # cannot be transcoded, leaving the
820
- # field unchanged.
821
- # <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
822
- # unconverted_fields() method will be
823
- # added to all returned rows (Array or
824
- # CSV::Row) that will return the fields
825
- # as they were before conversion. Note
826
- # that <tt>:headers</tt> supplied by
827
- # Array or String were not fields of the
828
- # document and thus will have an empty
829
- # Array attached.
830
- # <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
831
- # +true+, the initial row of the CSV
832
- # file will be treated as a row of
833
- # headers. If set to an Array, the
834
- # contents will be used as the headers.
835
- # If set to a String, the String is run
836
- # through a call of CSV::parse_line()
837
- # with the same <tt>:col_sep</tt>,
838
- # <tt>:row_sep</tt>, and
839
- # <tt>:quote_char</tt> as this instance
840
- # to produce an Array of headers. This
841
- # setting causes CSV#shift() to return
842
- # rows as CSV::Row objects instead of
843
- # Arrays and CSV#read() to return
844
- # CSV::Table objects instead of an Array
845
- # of Arrays.
846
- # <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
847
- # swallowed. If set to +true+, header
848
- # rows are returned in a CSV::Row object
849
- # with identical headers and
850
- # fields (save that the fields do not go
851
- # through the converters).
852
- # <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
853
- # set, a header row will be added to the
854
- # output.
855
- # <b><tt>:header_converters</tt></b>:: Identical in functionality to
856
- # <tt>:converters</tt> save that the
857
- # conversions are only made to header
858
- # rows. All built-in converters try to
859
- # transcode headers to UTF-8 before
860
- # converting. The conversion will fail
861
- # if the data cannot be transcoded,
862
- # leaving the header unchanged.
863
- # <b><tt>:skip_blanks</tt></b>:: When setting a +true+ value, CSV will
864
- # skip over any empty rows. Note that
865
- # this setting will not skip rows that
866
- # contain column separators, even if
867
- # the rows contain no actual data. If
868
- # you want to skip rows that contain
869
- # separators but no content, consider
870
- # using <tt>:skip_lines</tt>, or
871
- # inspecting fields.compact.empty? on
872
- # each row.
873
- # <b><tt>:force_quotes</tt></b>:: When setting a +true+ value, CSV will
874
- # quote all CSV fields it creates.
875
- # <b><tt>:skip_lines</tt></b>:: When setting an object responding to
876
- # <tt>match</tt>, every line matching
877
- # it is considered a comment and ignored
878
- # during parsing. When set to a String,
879
- # it is first converted to a Regexp.
880
- # When set to +nil+ no line is considered
881
- # a comment. If the passed object does
882
- # not respond to <tt>match</tt>,
883
- # <tt>ArgumentError</tt> is thrown.
884
- # <b><tt>:liberal_parsing</tt></b>:: When setting a +true+ value, CSV will
885
- # attempt to parse input not conformant
886
- # with RFC 4180, such as double quotes
887
- # in unquoted fields.
888
- # <b><tt>:nil_value</tt></b>:: When set an object, any values of an
889
- # empty field is replaced by the set
890
- # object, not nil.
891
- # <b><tt>:empty_value</tt></b>:: When setting an object, any values of a
892
- # blank string field is replaced by
893
- # the set object.
894
- # <b><tt>:quote_empty</tt></b>:: When setting a +true+ value, CSV will
895
- # quote empty values with double quotes.
896
- # When +false+, CSV will emit an
897
- # empty string for an empty field value.
898
- # <b><tt>:write_converters</tt></b>:: Converts values on each line with the
899
- # specified <tt>Proc</tt> object(s),
900
- # which receive a <tt>String</tt> value
901
- # and return a <tt>String</tt> or +nil+
902
- # value.
903
- # When an array is specified, each
904
- # converter will be applied in order.
905
- # <b><tt>:write_nil_value</tt></b>:: When a <tt>String</tt> value, +nil+
906
- # value(s) on each line will be replaced
907
- # with the specified value.
908
- # <b><tt>:write_empty_value</tt></b>:: When a <tt>String</tt> or +nil+ value,
909
- # empty value(s) on each line will be
910
- # replaced with the specified value.
911
- # <b><tt>:strip</tt></b>:: When setting a +true+ value, CSV will
912
- # strip " \t\f\v" around the values.
913
- # If you specify a string instead of
914
- # +true+, CSV will strip string. The
915
- # length of the string must be 1.
916
- #
917
- # See CSV::DEFAULT_OPTIONS for the default settings.
918
- #
919
- # Options cannot be overridden in the instance methods for performance reasons,
920
- # so be sure to set what you want here.
1739
+ # Raises an exception if the argument is +nil+:
1740
+ # # Raises ArgumentError (Cannot parse nil as CSV):
1741
+ # CSV.new(nil)
921
1742
  #
922
1743
  def initialize(data,
923
1744
  col_sep: ",",
@@ -1002,51 +1823,67 @@ class CSV
1002
1823
  writer if @writer_options[:write_headers]
1003
1824
  end
1004
1825
 
1826
+ # :call-seq:
1827
+ # csv.col_sep -> string
1005
1828
  #
1006
- # The encoded <tt>:col_sep</tt> used in parsing and writing.
1007
- # See CSV::new for details.
1008
- #
1829
+ # Returns the encoded column separator; used for parsing and writing;
1830
+ # see {Option +col_sep+}[#class-CSV-label-Option+col_sep]:
1831
+ # CSV.new('').col_sep # => ","
1009
1832
  def col_sep
1010
1833
  parser.column_separator
1011
1834
  end
1012
1835
 
1836
+ # :call-seq:
1837
+ # csv.row_sep -> string
1013
1838
  #
1014
- # The encoded <tt>:row_sep</tt> used in parsing and writing.
1015
- # See CSV::new for details.
1016
- #
1839
+ # Returns the encoded row separator; used for parsing and writing;
1840
+ # see {Option +row_sep+}[#class-CSV-label-Option+row_sep]:
1841
+ # CSV.new('').row_sep # => "\n"
1017
1842
  def row_sep
1018
1843
  parser.row_separator
1019
1844
  end
1020
1845
 
1846
+ # :call-seq:
1847
+ # csv.quote_char -> character
1021
1848
  #
1022
- # The encoded <tt>:quote_char</tt> used in parsing and writing.
1023
- # See CSV::new for details.
1024
- #
1849
+ # Returns the encoded quote character; used for parsing and writing;
1850
+ # see {Option +quote_char+}[#class-CSV-label-Option+quote_char]:
1851
+ # CSV.new('').quote_char # => "\""
1025
1852
  def quote_char
1026
1853
  parser.quote_character
1027
1854
  end
1028
1855
 
1856
+ # :call-seq:
1857
+ # csv.field_size_limit -> integer or nil
1029
1858
  #
1030
- # The limit for field size, if any.
1031
- # See CSV::new for details.
1032
- #
1859
+ # Returns the limit for field size; used for parsing;
1860
+ # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
1861
+ # CSV.new('').field_size_limit # => nil
1033
1862
  def field_size_limit
1034
1863
  parser.field_size_limit
1035
1864
  end
1036
1865
 
1866
+ # :call-seq:
1867
+ # csv.skip_lines -> regexp or nil
1037
1868
  #
1038
- # The regex marking a line as a comment.
1039
- # See CSV::new for details.
1040
- #
1869
+ # Returns the \Regexp used to identify comment lines; used for parsing;
1870
+ # see {Option +skip_lines+}[#class-CSV-label-Option+skip_lines]:
1871
+ # CSV.new('').skip_lines # => nil
1041
1872
  def skip_lines
1042
1873
  parser.skip_lines
1043
1874
  end
1044
1875
 
1045
- #
1046
- # Returns the current list of converters in effect. See CSV::new for details.
1047
- # Built-in converters will be returned by name, while others will be returned
1048
- # as is.
1049
- #
1876
+ # :call-seq:
1877
+ # csv.converters -> array
1878
+ #
1879
+ # Returns an \Array containing field converters;
1880
+ # see {Field Converters}[#class-CSV-label-Field+Converters]:
1881
+ # csv = CSV.new('')
1882
+ # csv.converters # => []
1883
+ # csv.convert(:integer)
1884
+ # csv.converters # => [:integer]
1885
+ # csv.convert(proc {|x| x.to_s })
1886
+ # csv.converters
1050
1887
  def converters
1051
1888
  parser_fields_converter.map do |converter|
1052
1889
  name = Converters.rassoc(converter)
@@ -1054,19 +1891,23 @@ class CSV
1054
1891
  end
1055
1892
  end
1056
1893
 
1894
+ # :call-seq:
1895
+ # csv.unconverted_fields? -> object
1057
1896
  #
1058
- # Returns +true+ if unconverted_fields() to parsed results.
1059
- # See CSV::new for details.
1060
- #
1897
+ # Returns the value that determines whether unconverted fields are to be
1898
+ # available; used for parsing;
1899
+ # see {Option +unconverted_fields+}[#class-CSV-label-Option+unconverted_fields]:
1900
+ # CSV.new('').unconverted_fields? # => nil
1061
1901
  def unconverted_fields?
1062
1902
  parser.unconverted_fields?
1063
1903
  end
1064
1904
 
1905
+ # :call-seq:
1906
+ # csv.headers -> object
1065
1907
  #
1066
- # Returns +nil+ if headers will not be used, +true+ if they will but have not
1067
- # yet been read, or the actual headers after they have been read.
1068
- # See CSV::new for details.
1069
- #
1908
+ # Returns the value that determines whether headers are used; used for parsing;
1909
+ # see {Option +headers+}[#class-CSV-label-Option+headers]:
1910
+ # CSV.new('').headers # => nil
1070
1911
  def headers
1071
1912
  if @writer
1072
1913
  @writer.headers
@@ -1078,27 +1919,33 @@ class CSV
1078
1919
  raw_headers
1079
1920
  end
1080
1921
  end
1922
+
1923
+ # :call-seq:
1924
+ # csv.return_headers? -> true or false
1081
1925
  #
1082
- # Returns +true+ if headers will be returned as a row of results.
1083
- # See CSV::new for details.
1084
- #
1926
+ # Returns the value that determines whether headers are to be returned; used for parsing;
1927
+ # see {Option +return_headers+}[#class-CSV-label-Option+return_headers]:
1928
+ # CSV.new('').return_headers? # => false
1085
1929
  def return_headers?
1086
1930
  parser.return_headers?
1087
1931
  end
1088
1932
 
1933
+ # :call-seq:
1934
+ # csv.write_headers? -> true or false
1089
1935
  #
1090
- # Returns +true+ if headers are written in output.
1091
- # See CSV::new for details.
1092
- #
1936
+ # Returns the value that determines whether headers are to be written; used for generating;
1937
+ # see {Option +write_headers+}[#class-CSV-label-Option+write_headers]:
1938
+ # CSV.new('').write_headers? # => nil
1093
1939
  def write_headers?
1094
1940
  @writer_options[:write_headers]
1095
1941
  end
1096
1942
 
1943
+ # :call-seq:
1944
+ # csv.header_converters -> array
1097
1945
  #
1098
- # Returns the current list of converters in effect for headers. See CSV::new
1099
- # for details. Built-in converters will be returned by name, while others
1100
- # will be returned as is.
1101
- #
1946
+ # Returns an \Array containing header converters; used for parsing;
1947
+ # see {Header Converters}[#class-CSV-label-Header+Converters]:
1948
+ # CSV.new('').header_converters # => []
1102
1949
  def header_converters
1103
1950
  header_fields_converter.map do |converter|
1104
1951
  name = HeaderConverters.rassoc(converter)
@@ -1106,34 +1953,74 @@ class CSV
1106
1953
  end
1107
1954
  end
1108
1955
 
1956
+ # :call-seq:
1957
+ # csv.skip_blanks? -> true or false
1109
1958
  #
1110
- # Returns +true+ blank lines are skipped by the parser. See CSV::new
1111
- # for details.
1112
- #
1959
+ # Returns the value that determines whether blank lines are to be ignored; used for parsing;
1960
+ # see {Option +skip_blanks+}[#class-CSV-label-Option+skip_blanks]:
1961
+ # CSV.new('').skip_blanks? # => false
1113
1962
  def skip_blanks?
1114
1963
  parser.skip_blanks?
1115
1964
  end
1116
1965
 
1117
- # Returns +true+ if all output fields are quoted. See CSV::new for details.
1966
+ # :call-seq:
1967
+ # csv.force_quotes? -> true or false
1968
+ #
1969
+ # Returns the value that determines whether all output fields are to be quoted;
1970
+ # used for generating;
1971
+ # see {Option +force_quotes+}[#class-CSV-label-Option+force_quotes]:
1972
+ # CSV.new('').force_quotes? # => false
1118
1973
  def force_quotes?
1119
1974
  @writer_options[:force_quotes]
1120
1975
  end
1121
1976
 
1122
- # Returns +true+ if illegal input is handled. See CSV::new for details.
1977
+ # :call-seq:
1978
+ # csv.liberal_parsing? -> true or false
1979
+ #
1980
+ # Returns the value that determines whether illegal input is to be handled; used for parsing;
1981
+ # see {Option +liberal_parsing+}[#class-CSV-label-Option+liberal_parsing]:
1982
+ # CSV.new('').liberal_parsing? # => false
1123
1983
  def liberal_parsing?
1124
1984
  parser.liberal_parsing?
1125
1985
  end
1126
1986
 
1987
+ # :call-seq:
1988
+ # csv.encoding -> endcoding
1127
1989
  #
1128
- # The Encoding CSV is parsing or writing in. This will be the Encoding you
1129
- # receive parsed data in and/or the Encoding data will be written in.
1130
- #
1990
+ # Returns the encoding used for parsing and generating;
1991
+ # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
1992
+ # CSV.new('').encoding # => #<Encoding:UTF-8>
1131
1993
  attr_reader :encoding
1132
1994
 
1133
- #
1134
- # The line number of the last row read from this file. Fields with nested
1135
- # line-end characters will not affect this count.
1136
- #
1995
+ # :call-seq:
1996
+ # csv.line_no -> integer
1997
+ #
1998
+ # Returns the count of the rows parsed or generated.
1999
+ #
2000
+ # Parsing:
2001
+ # string = "foo,0\nbar,1\nbaz,2\n"
2002
+ # path = 't.csv'
2003
+ # File.write(path, string)
2004
+ # CSV.open(path) do |csv|
2005
+ # csv.each do |row|
2006
+ # p [csv.lineno, row]
2007
+ # end
2008
+ # end
2009
+ # Output:
2010
+ # [1, ["foo", "0"]]
2011
+ # [2, ["bar", "1"]]
2012
+ # [3, ["baz", "2"]]
2013
+ #
2014
+ # Generating:
2015
+ # CSV.generate do |csv|
2016
+ # p csv.lineno; csv << ['foo', 0]
2017
+ # p csv.lineno; csv << ['bar', 1]
2018
+ # p csv.lineno; csv << ['baz', 2]
2019
+ # end
2020
+ # Output:
2021
+ # 0
2022
+ # 1
2023
+ # 2
1137
2024
  def lineno
1138
2025
  if @writer
1139
2026
  @writer.lineno
@@ -1142,9 +2029,22 @@ class CSV
1142
2029
  end
1143
2030
  end
1144
2031
 
1145
- #
1146
- # The last row read from this file.
1147
- #
2032
+ # :call-seq:
2033
+ # csv.line -> array
2034
+ #
2035
+ # Returns the line most recently read:
2036
+ # string = "foo,0\nbar,1\nbaz,2\n"
2037
+ # path = 't.csv'
2038
+ # File.write(path, string)
2039
+ # CSV.open(path) do |csv|
2040
+ # csv.each do |row|
2041
+ # p [csv.lineno, csv.line]
2042
+ # end
2043
+ # end
2044
+ # Output:
2045
+ # [1, "foo,0\n"]
2046
+ # [2, "bar,1\n"]
2047
+ # [3, "baz,2\n"]
1148
2048
  def line
1149
2049
  parser.line
1150
2050
  end
@@ -1220,13 +2120,56 @@ class CSV
1220
2120
 
1221
2121
  ### End Delegation ###
1222
2122
 
1223
- #
1224
- # The primary write method for wrapped Strings and IOs, +row+ (an Array or
1225
- # CSV::Row) is converted to CSV and appended to the data source. When a
1226
- # CSV::Row is passed, only the row's fields() are appended to the output.
1227
- #
1228
- # The data source must be open for writing.
1229
- #
2123
+ # :call-seq:
2124
+ # csv << row -> self
2125
+ #
2126
+ # Appends a row to +self+.
2127
+ #
2128
+ # - Argument +row+ must be an \Array object or a CSV::Row object.
2129
+ # - The output stream must be open for writing.
2130
+ #
2131
+ # ---
2132
+ #
2133
+ # Append Arrays:
2134
+ # CSV.generate do |csv|
2135
+ # csv << ['foo', 0]
2136
+ # csv << ['bar', 1]
2137
+ # csv << ['baz', 2]
2138
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2139
+ #
2140
+ # Append CSV::Rows:
2141
+ # headers = []
2142
+ # CSV.generate do |csv|
2143
+ # csv << CSV::Row.new(headers, ['foo', 0])
2144
+ # csv << CSV::Row.new(headers, ['bar', 1])
2145
+ # csv << CSV::Row.new(headers, ['baz', 2])
2146
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2147
+ #
2148
+ # Headers in CSV::Row objects are not appended:
2149
+ # headers = ['Name', 'Count']
2150
+ # CSV.generate do |csv|
2151
+ # csv << CSV::Row.new(headers, ['foo', 0])
2152
+ # csv << CSV::Row.new(headers, ['bar', 1])
2153
+ # csv << CSV::Row.new(headers, ['baz', 2])
2154
+ # end # => "foo,0\nbar,1\nbaz,2\n"
2155
+ #
2156
+ # ---
2157
+ #
2158
+ # Raises an exception if +row+ is not an \Array or \CSV::Row:
2159
+ # CSV.generate do |csv|
2160
+ # # Raises NoMethodError (undefined method `collect' for :foo:Symbol)
2161
+ # csv << :foo
2162
+ # end
2163
+ #
2164
+ # Raises an exception if the output stream is not opened for writing:
2165
+ # path = 't.csv'
2166
+ # File.write(path, '')
2167
+ # File.open(path) do |file|
2168
+ # CSV.open(file) do |csv|
2169
+ # # Raises IOError (not opened for writing)
2170
+ # csv << ['foo', 0]
2171
+ # end
2172
+ # end
1230
2173
  def <<(row)
1231
2174
  writer << row
1232
2175
  self
@@ -1234,58 +2177,216 @@ class CSV
1234
2177
  alias_method :add_row, :<<
1235
2178
  alias_method :puts, :<<
1236
2179
 
1237
- #
1238
2180
  # :call-seq:
1239
- # convert( name )
1240
- # convert { |field| ... }
1241
- # convert { |field, field_info| ... }
1242
- #
1243
- # You can use this method to install a CSV::Converters built-in, or provide a
1244
- # block that handles a custom conversion.
1245
- #
1246
- # If you provide a block that takes one argument, it will be passed the field
1247
- # and is expected to return the converted value or the field itself. If your
1248
- # block takes two arguments, it will also be passed a CSV::FieldInfo Struct,
1249
- # containing details about the field. Again, the block should return a
1250
- # converted field or the field itself.
1251
- #
2181
+ # convert(converter_name) -> array_of_procs
2182
+ # convert {|field, field_info| ... } -> array_of_procs
2183
+ #
2184
+ # - With no block, installs a field converter (a \Proc).
2185
+ # - With a block, defines and installs a custom field converter.
2186
+ # - Returns the \Array of installed field converters.
2187
+ #
2188
+ # - Argument +converter_name+, if given, should be the name
2189
+ # of an existing field converter.
2190
+ #
2191
+ # See {Field Converters}[#class-CSV-label-Field+Converters].
2192
+ # ---
2193
+ #
2194
+ # With no block, installs a field converter:
2195
+ # csv = CSV.new('')
2196
+ # csv.convert(:integer)
2197
+ # csv.convert(:float)
2198
+ # csv.convert(:date)
2199
+ # csv.converters # => [:integer, :float, :date]
2200
+ #
2201
+ # ---
2202
+ #
2203
+ # The block, if given, is called for each field:
2204
+ # - Argument +field+ is the field value.
2205
+ # - Argument +field_info+ is a CSV::FieldInfo object
2206
+ # containing details about the field.
2207
+ #
2208
+ # The examples here assume the prior execution of:
2209
+ # string = "foo,0\nbar,1\nbaz,2\n"
2210
+ # path = 't.csv'
2211
+ # File.write(path, string)
2212
+ #
2213
+ # Example giving a block:
2214
+ # csv = CSV.open(path)
2215
+ # csv.convert {|field, field_info| p [field, field_info]; field.upcase }
2216
+ # csv.read # => [["FOO", "0"], ["BAR", "1"], ["BAZ", "2"]]
2217
+ #
2218
+ # Output:
2219
+ # ["foo", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
2220
+ # ["0", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
2221
+ # ["bar", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
2222
+ # ["1", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
2223
+ # ["baz", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
2224
+ # ["2", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
2225
+ #
2226
+ # The block need not return a \String object:
2227
+ # csv = CSV.open(path)
2228
+ # csv.convert {|field, field_info| field.to_sym }
2229
+ # csv.read # => [[:foo, :"0"], [:bar, :"1"], [:baz, :"2"]]
2230
+ #
2231
+ # If +converter_name+ is given, the block is not called:
2232
+ # csv = CSV.open(path)
2233
+ # csv.convert(:integer) {|field, field_info| fail 'Cannot happen' }
2234
+ # csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
2235
+ #
2236
+ # ---
2237
+ #
2238
+ # Raises a parse-time exception if +converter_name+ is not the name of a built-in
2239
+ # field converter:
2240
+ # csv = CSV.open(path)
2241
+ # csv.convert(:nosuch) => [nil]
2242
+ # # Raises NoMethodError (undefined method `arity' for nil:NilClass)
2243
+ # csv.read
1252
2244
  def convert(name = nil, &converter)
1253
2245
  parser_fields_converter.add_converter(name, &converter)
1254
2246
  end
1255
2247
 
1256
- #
1257
2248
  # :call-seq:
1258
- # header_convert( name )
1259
- # header_convert { |field| ... }
1260
- # header_convert { |field, field_info| ... }
1261
- #
1262
- # Identical to CSV#convert(), but for header rows.
1263
- #
1264
- # Note that this method must be called before header rows are read to have any
1265
- # effect.
1266
- #
2249
+ # header_convert(converter_name) -> array_of_procs
2250
+ # header_convert {|header, field_info| ... } -> array_of_procs
2251
+ #
2252
+ # - With no block, installs a header converter (a \Proc).
2253
+ # - With a block, defines and installs a custom header converter.
2254
+ # - Returns the \Array of installed header converters.
2255
+ #
2256
+ # - Argument +converter_name+, if given, should be the name
2257
+ # of an existing header converter.
2258
+ #
2259
+ # See {Header Converters}[#class-CSV-label-Header+Converters].
2260
+ # ---
2261
+ #
2262
+ # With no block, installs a header converter:
2263
+ # csv = CSV.new('')
2264
+ # csv.header_convert(:symbol)
2265
+ # csv.header_convert(:downcase)
2266
+ # csv.header_converters # => [:symbol, :downcase]
2267
+ #
2268
+ # ---
2269
+ #
2270
+ # The block, if given, is called for each header:
2271
+ # - Argument +header+ is the header value.
2272
+ # - Argument +field_info+ is a CSV::FieldInfo object
2273
+ # containing details about the header.
2274
+ #
2275
+ # The examples here assume the prior execution of:
2276
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2277
+ # path = 't.csv'
2278
+ # File.write(path, string)
2279
+ #
2280
+ # Example giving a block:
2281
+ # csv = CSV.open(path, headers: true)
2282
+ # csv.header_convert {|header, field_info| p [header, field_info]; header.upcase }
2283
+ # table = csv.read
2284
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
2285
+ # table.headers # => ["NAME", "VALUE"]
2286
+ #
2287
+ # Output:
2288
+ # ["Name", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
2289
+ # ["Value", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
2290
+
2291
+ # The block need not return a \String object:
2292
+ # csv = CSV.open(path, headers: true)
2293
+ # csv.header_convert {|header, field_info| header.to_sym }
2294
+ # table = csv.read
2295
+ # table.headers # => [:Name, :Value]
2296
+ #
2297
+ # If +converter_name+ is given, the block is not called:
2298
+ # csv = CSV.open(path, headers: true)
2299
+ # csv.header_convert(:downcase) {|header, field_info| fail 'Cannot happen' }
2300
+ # table = csv.read
2301
+ # table.headers # => ["name", "value"]
2302
+ # ---
2303
+ #
2304
+ # Raises a parse-time exception if +converter_name+ is not the name of a built-in
2305
+ # field converter:
2306
+ # csv = CSV.open(path, headers: true)
2307
+ # csv.header_convert(:nosuch)
2308
+ # # Raises NoMethodError (undefined method `arity' for nil:NilClass)
2309
+ # csv.read
1267
2310
  def header_convert(name = nil, &converter)
1268
2311
  header_fields_converter.add_converter(name, &converter)
1269
2312
  end
1270
2313
 
1271
2314
  include Enumerable
1272
2315
 
1273
- #
1274
- # Yields each row of the data source in turn.
1275
- #
1276
- # Support for Enumerable.
1277
- #
1278
- # The data source must be open for reading.
1279
- #
2316
+ # :call-seq:
2317
+ # csv.each -> enumerator
2318
+ # csv.each {|row| ...}
2319
+ #
2320
+ # Calls the block with each successive row.
2321
+ # The data source must be opened for reading.
2322
+ #
2323
+ # Without headers:
2324
+ # string = "foo,0\nbar,1\nbaz,2\n"
2325
+ # csv = CSV.new(string)
2326
+ # csv.each do |row|
2327
+ # p row
2328
+ # end
2329
+ # Output:
2330
+ # ["foo", "0"]
2331
+ # ["bar", "1"]
2332
+ # ["baz", "2"]
2333
+ #
2334
+ # With headers:
2335
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2336
+ # csv = CSV.new(string, headers: true)
2337
+ # csv.each do |row|
2338
+ # p row
2339
+ # end
2340
+ # Output:
2341
+ # <CSV::Row "Name":"foo" "Value":"0">
2342
+ # <CSV::Row "Name":"bar" "Value":"1">
2343
+ # <CSV::Row "Name":"baz" "Value":"2">
2344
+ #
2345
+ # ---
2346
+ #
2347
+ # Raises an exception if the source is not opened for reading:
2348
+ # string = "foo,0\nbar,1\nbaz,2\n"
2349
+ # csv = CSV.new(string)
2350
+ # csv.close
2351
+ # # Raises IOError (not opened for reading)
2352
+ # csv.each do |row|
2353
+ # p row
2354
+ # end
1280
2355
  def each(&block)
1281
2356
  parser_enumerator.each(&block)
1282
2357
  end
1283
2358
 
1284
- #
1285
- # Slurps the remaining rows and returns an Array of Arrays.
1286
- #
1287
- # The data source must be open for reading.
1288
- #
2359
+ # :call-seq:
2360
+ # csv.read -> array or csv_table
2361
+ #
2362
+ # Forms the remaining rows from +self+ into:
2363
+ # - A CSV::Table object, if headers are in use.
2364
+ # - An \Array of Arrays, otherwise.
2365
+ #
2366
+ # The data source must be opened for reading.
2367
+ #
2368
+ # Without headers:
2369
+ # string = "foo,0\nbar,1\nbaz,2\n"
2370
+ # path = 't.csv'
2371
+ # File.write(path, string)
2372
+ # csv = CSV.open(path)
2373
+ # csv.read # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
2374
+ #
2375
+ # With headers:
2376
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2377
+ # path = 't.csv'
2378
+ # File.write(path, string)
2379
+ # csv = CSV.open(path, headers: true)
2380
+ # csv.read # => #<CSV::Table mode:col_or_row row_count:4>
2381
+ #
2382
+ # ---
2383
+ #
2384
+ # Raises an exception if the source is not opened for reading:
2385
+ # string = "foo,0\nbar,1\nbaz,2\n"
2386
+ # csv = CSV.new(string)
2387
+ # csv.close
2388
+ # # Raises IOError (not opened for reading)
2389
+ # csv.read
1289
2390
  def read
1290
2391
  rows = to_a
1291
2392
  if parser.use_headers?
@@ -1296,18 +2397,69 @@ class CSV
1296
2397
  end
1297
2398
  alias_method :readlines, :read
1298
2399
 
1299
- # Returns +true+ if the next row read will be a header row.
2400
+ # :call-seq:
2401
+ # csv.header_row? -> true or false
2402
+ #
2403
+ # Returns +true+ if the next row to be read is a header row\;
2404
+ # +false+ otherwise.
2405
+ #
2406
+ # Without headers:
2407
+ # string = "foo,0\nbar,1\nbaz,2\n"
2408
+ # csv = CSV.new(string)
2409
+ # csv.header_row? # => false
2410
+ #
2411
+ # With headers:
2412
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2413
+ # csv = CSV.new(string, headers: true)
2414
+ # csv.header_row? # => true
2415
+ # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
2416
+ # csv.header_row? # => false
2417
+ #
2418
+ # ---
2419
+ #
2420
+ # Raises an exception if the source is not opened for reading:
2421
+ # string = "foo,0\nbar,1\nbaz,2\n"
2422
+ # csv = CSV.new(string)
2423
+ # csv.close
2424
+ # # Raises IOError (not opened for reading)
2425
+ # csv.header_row?
1300
2426
  def header_row?
1301
2427
  parser.header_row?
1302
2428
  end
1303
2429
 
1304
- #
1305
- # The primary read method for wrapped Strings and IOs, a single row is pulled
1306
- # from the data source, parsed and returned as an Array of fields (if header
1307
- # rows are not used) or a CSV::Row (when header rows are used).
1308
- #
1309
- # The data source must be open for reading.
1310
- #
2430
+ # :call-seq:
2431
+ # csv.shift -> array, csv_row, or nil
2432
+ #
2433
+ # Returns the next row of data as:
2434
+ # - An \Array if no headers are used.
2435
+ # - A CSV::Row object if headers are used.
2436
+ #
2437
+ # The data source must be opened for reading.
2438
+ #
2439
+ # Without headers:
2440
+ # string = "foo,0\nbar,1\nbaz,2\n"
2441
+ # csv = CSV.new(string)
2442
+ # csv.shift # => ["foo", "0"]
2443
+ # csv.shift # => ["bar", "1"]
2444
+ # csv.shift # => ["baz", "2"]
2445
+ # csv.shift # => nil
2446
+ #
2447
+ # With headers:
2448
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2449
+ # csv = CSV.new(string, headers: true)
2450
+ # csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
2451
+ # csv.shift # => #<CSV::Row "Name":"bar" "Value":"1">
2452
+ # csv.shift # => #<CSV::Row "Name":"baz" "Value":"2">
2453
+ # csv.shift # => nil
2454
+ #
2455
+ # ---
2456
+ #
2457
+ # Raises an exception if the source is not opened for reading:
2458
+ # string = "foo,0\nbar,1\nbaz,2\n"
2459
+ # csv = CSV.new(string)
2460
+ # csv.close
2461
+ # # Raises IOError (not opened for reading)
2462
+ # csv.shift
1311
2463
  def shift
1312
2464
  if @eof_error
1313
2465
  eof_error, @eof_error = @eof_error, nil
@@ -1322,10 +2474,14 @@ class CSV
1322
2474
  alias_method :gets, :shift
1323
2475
  alias_method :readline, :shift
1324
2476
 
2477
+ # :call-seq:
2478
+ # csv.inspect -> string
1325
2479
  #
1326
- # Returns a simplified description of the key CSV attributes in an
1327
- # ASCII compatible String.
1328
- #
2480
+ # Returns a \String showing certain properties of +self+:
2481
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
2482
+ # csv = CSV.new(string, headers: true)
2483
+ # s = csv.inspect
2484
+ # s # => "#<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:\",\" row_sep:\"\\n\" quote_char:\"\\\"\" headers:true>"
1329
2485
  def inspect
1330
2486
  str = ["#<", self.class.to_s, " io_type:"]
1331
2487
  # show type of wrapped IO