csv 3.1.9 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/csv.rb CHANGED
@@ -48,7 +48,7 @@
48
48
  #
49
49
  # === Interface
50
50
  #
51
- # * CSV now uses Hash-style parameters to set options.
51
+ # * CSV now uses keyword parameters to set options.
52
52
  # * CSV no longer has generate_row() or parse_row().
53
53
  # * The old CSV's Reader and Writer classes have been dropped.
54
54
  # * CSV::open() is now more like Ruby's open().
@@ -70,7 +70,7 @@
70
70
  # == What is CSV, really?
71
71
  #
72
72
  # CSV maintains a pretty strict definition of CSV taken directly from
73
- # {the RFC}[http://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
73
+ # {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
74
74
  # place and that is to make using this library easier. CSV will parse all valid
75
75
  # CSV.
76
76
  #
@@ -90,29 +90,19 @@
90
90
  # with any questions.
91
91
 
92
92
  require "forwardable"
93
- require "English"
94
93
  require "date"
94
+ require "time"
95
95
  require "stringio"
96
96
 
97
97
  require_relative "csv/fields_converter"
98
- require_relative "csv/match_p"
98
+ require_relative "csv/input_record_separator"
99
99
  require_relative "csv/parser"
100
100
  require_relative "csv/row"
101
101
  require_relative "csv/table"
102
102
  require_relative "csv/writer"
103
103
 
104
- using CSV::MatchP if CSV.const_defined?(:MatchP)
105
-
106
104
  # == \CSV
107
105
  #
108
- # === In a Hurry?
109
- #
110
- # If you are familiar with \CSV data and have a particular task in mind,
111
- # you may want to go directly to the:
112
- # - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
113
- #
114
- # Otherwise, read on here, about the API: classes, methods, and constants.
115
- #
116
106
  # === \CSV Data
117
107
  #
118
108
  # \CSV (comma-separated values) data is a text representation of a table:
@@ -341,6 +331,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
341
331
  # liberal_parsing: false,
342
332
  # nil_value: nil,
343
333
  # empty_value: "",
334
+ # strip: false,
344
335
  # # For generating.
345
336
  # write_headers: nil,
346
337
  # quote_empty: true,
@@ -348,7 +339,6 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
348
339
  # write_converters: nil,
349
340
  # write_nil_value: nil,
350
341
  # write_empty_value: "",
351
- # strip: false,
352
342
  # }
353
343
  #
354
344
  # ==== Options for Parsing
@@ -357,7 +347,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
357
347
  # - +row_sep+: Specifies the row separator; used to delimit rows.
358
348
  # - +col_sep+: Specifies the column separator; used to delimit fields.
359
349
  # - +quote_char+: Specifies the quote character; used to quote fields.
360
- # - +field_size_limit+: Specifies the maximum field size allowed.
350
+ # - +field_size_limit+: Specifies the maximum field size + 1 allowed.
351
+ # Deprecated since 3.2.3. Use +max_field_size+ instead.
352
+ # - +max_field_size+: Specifies the maximum field size allowed.
361
353
  # - +converters+: Specifies the field converters to be used.
362
354
  # - +unconverted_fields+: Specifies whether unconverted fields are to be available.
363
355
  # - +headers+: Specifies whether data contains headers,
@@ -366,8 +358,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
366
358
  # - +header_converters+: Specifies the header converters to be used.
367
359
  # - +skip_blanks+: Specifies whether blanks lines are to be ignored.
368
360
  # - +skip_lines+: Specifies how comments lines are to be recognized.
369
- # - +strip+: Specifies whether leading and trailing whitespace are
370
- # to be stripped from fields..
361
+ # - +strip+: Specifies whether leading and trailing whitespace are to be
362
+ # stripped from fields. This must be compatible with +col_sep+; if it is not,
363
+ # then an +ArgumentError+ exception will be raised.
371
364
  # - +liberal_parsing+: Specifies whether \CSV should attempt to parse
372
365
  # non-compliant data.
373
366
  # - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
@@ -513,7 +506,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
513
506
  # [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
514
507
  # [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
515
508
  # [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
516
- # Each CSV::Info object shows:
509
+ # Each CSV::FieldInfo object shows:
517
510
  # - The 0-based field index.
518
511
  # - The 1-based line index.
519
512
  # - The field header, if any.
@@ -529,6 +522,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
529
522
  # - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
530
523
  # - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
531
524
  # - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
525
+ # - <tt>:time</tt>: converts each \String-embedded time into a true \Time
532
526
  # .
533
527
  # This example creates a converter proc, then stores it:
534
528
  # strip_converter = proc {|field| field.strip }
@@ -547,6 +541,14 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
547
541
  #
548
542
  # There is no such storage structure for write headers.
549
543
  #
544
+ # In order for the parsing methods to access stored converters in non-main-Ractors, the
545
+ # storage structure must be made shareable first.
546
+ # Therefore, <tt>Ractor.make_shareable(CSV::Converters)</tt> and
547
+ # <tt>Ractor.make_shareable(CSV::HeaderConverters)</tt> must be called before the creation
548
+ # of Ractors that use the converters stored in these structures. (Since making the storage
549
+ # structures shareable involves freezing them, any custom converters that are to be used
550
+ # must be added first.)
551
+ #
550
552
  # ===== Converter Lists
551
553
  #
552
554
  # A _converter_ _list_ is an \Array that may include any assortment of:
@@ -631,6 +633,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
631
633
  # [:numeric, [:integer, :float]]
632
634
  # [:date, Proc]
633
635
  # [:date_time, Proc]
636
+ # [:time, Proc]
634
637
  # [:all, [:date_time, :numeric]]
635
638
  #
636
639
  # Each of these converters transcodes values to UTF-8 before attempting conversion.
@@ -675,6 +678,15 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
675
678
  # csv = CSV.parse_line(data, converters: :date_time)
676
679
  # csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"]
677
680
  #
681
+ # Converter +time+ converts each field that Time::parse accepts:
682
+ # data = '2020-05-07T14:59:00-05:00,x'
683
+ # # Without the converter
684
+ # csv = CSV.parse_line(data)
685
+ # csv # => ["2020-05-07T14:59:00-05:00", "x"]
686
+ # # With the converter
687
+ # csv = CSV.parse_line(data, converters: :time)
688
+ # csv # => [2020-05-07 14:59:00 -0500, "x"]
689
+ #
678
690
  # Converter +:numeric+ converts with both +:date_time+ and +:numeric+..
679
691
  #
680
692
  # As seen above, method #convert adds \converters to a \CSV instance,
@@ -705,7 +717,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
705
717
  # Header converters operate only on headers (and not on other rows).
706
718
  #
707
719
  # There are three ways to use header \converters;
708
- # these examples use built-in header converter +:dowhcase+,
720
+ # these examples use built-in header converter +:downcase+,
709
721
  # which downcases each parsed header.
710
722
  #
711
723
  # - Option +header_converters+ with a singleton parsing method:
@@ -846,6 +858,15 @@ class CSV
846
858
  end
847
859
  end
848
860
 
861
+ # The error thrown when the parser encounters invalid encoding in CSV.
862
+ class InvalidEncodingError < MalformedCSVError
863
+ attr_reader :encoding
864
+ def initialize(encoding, line_number)
865
+ @encoding = encoding
866
+ super("Invalid byte sequence in #{encoding}", line_number)
867
+ end
868
+ end
869
+
849
870
  #
850
871
  # A FieldInfo Struct contains details about a field's position in the data
851
872
  # source it was read from. CSV will pass this Struct to some blocks that make
@@ -855,19 +876,19 @@ class CSV
855
876
  # <b><tt>index</tt></b>:: The zero-based index of the field in its row.
856
877
  # <b><tt>line</tt></b>:: The line of the data source this row is from.
857
878
  # <b><tt>header</tt></b>:: The header for the column, when available.
879
+ # <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
858
880
  #
859
- FieldInfo = Struct.new(:index, :line, :header)
881
+ FieldInfo = Struct.new(:index, :line, :header, :quoted?)
860
882
 
861
883
  # A Regexp used to find and convert some common Date formats.
862
884
  DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
863
885
  \d{4}-\d{2}-\d{2} )\z /x
864
- # A Regexp used to find and convert some common DateTime formats.
886
+ # A Regexp used to find and convert some common (Date)Time formats.
865
887
  DateTimeMatcher =
866
888
  / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
867
- \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} |
868
- # ISO-8601
889
+ # ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse
869
890
  \d{4}-\d{2}-\d{2}
870
- (?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
891
+ (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
871
892
  )\z /x
872
893
 
873
894
  # The encoding used by all converters.
@@ -903,6 +924,14 @@ class CSV
903
924
  f
904
925
  end
905
926
  },
927
+ time: lambda { |f|
928
+ begin
929
+ e = f.encode(ConverterEncoding)
930
+ e.match?(DateTimeMatcher) ? Time.parse(e) : f
931
+ rescue # encoding conversion or parse errors
932
+ f
933
+ end
934
+ },
906
935
  all: [:date_time, :numeric],
907
936
  }
908
937
 
@@ -917,8 +946,10 @@ class CSV
917
946
  symbol: lambda { |h|
918
947
  h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
919
948
  gsub(/\s+/, "_").to_sym
920
- }
949
+ },
950
+ symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
921
951
  }
952
+
922
953
  # Default values for method options.
923
954
  DEFAULT_OPTIONS = {
924
955
  # For both parsing and generating.
@@ -927,6 +958,7 @@ class CSV
927
958
  quote_char: '"',
928
959
  # For parsing.
929
960
  field_size_limit: nil,
961
+ max_field_size: nil,
930
962
  converters: nil,
931
963
  unconverted_fields: nil,
932
964
  headers: false,
@@ -937,6 +969,7 @@ class CSV
937
969
  liberal_parsing: false,
938
970
  nil_value: nil,
939
971
  empty_value: "",
972
+ strip: false,
940
973
  # For generating.
941
974
  write_headers: nil,
942
975
  quote_empty: true,
@@ -944,7 +977,6 @@ class CSV
944
977
  write_converters: nil,
945
978
  write_nil_value: nil,
946
979
  write_empty_value: "",
947
- strip: false,
948
980
  }.freeze
949
981
 
950
982
  class << self
@@ -957,6 +989,8 @@ class CSV
957
989
  # Creates or retrieves cached \CSV objects.
958
990
  # For arguments and options, see CSV.new.
959
991
  #
992
+ # This API is not Ractor-safe.
993
+ #
960
994
  # ---
961
995
  #
962
996
  # With no block given, returns a \CSV object.
@@ -992,7 +1026,7 @@ class CSV
992
1026
  def instance(data = $stdout, **options)
993
1027
  # create a _signature_ for this method call, data object and options
994
1028
  sig = [data.object_id] +
995
- options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s })
1029
+ options.values_at(*DEFAULT_OPTIONS.keys)
996
1030
 
997
1031
  # fetch or create the instance for this signature
998
1032
  @@instances ||= Hash.new
@@ -1006,65 +1040,227 @@ class CSV
1006
1040
  end
1007
1041
 
1008
1042
  # :call-seq:
1009
- # filter(**options) {|row| ... }
1010
- # filter(in_string, **options) {|row| ... }
1011
- # filter(in_io, **options) {|row| ... }
1012
- # filter(in_string, out_string, **options) {|row| ... }
1013
- # filter(in_string, out_io, **options) {|row| ... }
1014
- # filter(in_io, out_string, **options) {|row| ... }
1015
- # filter(in_io, out_io, **options) {|row| ... }
1016
- #
1017
- # Reads \CSV input and writes \CSV output.
1018
- #
1019
- # For each input row:
1020
- # - Forms the data into:
1021
- # - A CSV::Row object, if headers are in use.
1022
- # - An \Array of Arrays, otherwise.
1023
- # - Calls the block with that object.
1024
- # - Appends the block's return value to the output.
1043
+ # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
1044
+ # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
1045
+ # filter(**options) {|row| ... } -> array_of_arrays or csv_table
1025
1046
  #
1026
- # Arguments:
1027
- # * \CSV source:
1028
- # * Argument +in_string+, if given, should be a \String object;
1029
- # it will be put into a new StringIO object positioned at the beginning.
1030
- # * Argument +in_io+, if given, should be an IO object that is
1031
- # open for reading; on return, the IO object will be closed.
1032
- # * If neither +in_string+ nor +in_io+ is given,
1033
- # the input stream defaults to {ARGF}[https://ruby-doc.org/core/ARGF.html].
1034
- # * \CSV output:
1035
- # * Argument +out_string+, if given, should be a \String object;
1036
- # it will be put into a new StringIO object positioned at the beginning.
1037
- # * Argument +out_io+, if given, should be an IO object that is
1038
- # ppen for writing; on return, the IO object will be closed.
1039
- # * If neither +out_string+ nor +out_io+ is given,
1040
- # the output stream defaults to <tt>$stdout</tt>.
1041
- # * Argument +options+ should be keyword arguments.
1042
- # - Each argument name that is prefixed with +in_+ or +input_+
1043
- # is stripped of its prefix and is treated as an option
1044
- # for parsing the input.
1045
- # Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
1046
- # - Each argument name that is prefixed with +out_+ or +output_+
1047
- # is stripped of its prefix and is treated as an option
1048
- # for generating the output.
1049
- # Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
1050
- # - Each argument not prefixed as above is treated as an option
1051
- # both for parsing the input and for generating the output.
1052
- # - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
1053
- # and {Options for Generating}[#class-CSV-label-Options+for+Generating].
1047
+ # - Parses \CSV from a source (\String, \IO stream, or ARGF).
1048
+ # - Calls the given block with each parsed row:
1049
+ # - Without headers, each row is an \Array.
1050
+ # - With headers, each row is a CSV::Row.
1051
+ # - Generates \CSV to an output (\String, \IO stream, or STDOUT).
1052
+ # - Returns the parsed source:
1053
+ # - Without headers, an \Array of \Arrays.
1054
+ # - With headers, a CSV::Table.
1054
1055
  #
1055
- # Example:
1056
- # in_string = "foo,0\nbar,1\nbaz,2\n"
1056
+ # When +in_string_or_io+ is given, but not +out_string_or_io+,
1057
+ # parses from the given +in_string_or_io+
1058
+ # and generates to STDOUT.
1059
+ #
1060
+ # \String input without headers:
1061
+ #
1062
+ # in_string = "foo,0\nbar,1\nbaz,2"
1063
+ # CSV.filter(in_string) do |row|
1064
+ # row[0].upcase!
1065
+ # row[1] = - row[1].to_i
1066
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1067
+ #
1068
+ # Output (to STDOUT):
1069
+ #
1070
+ # FOO,0
1071
+ # BAR,-1
1072
+ # BAZ,-2
1073
+ #
1074
+ # \String input with headers:
1075
+ #
1076
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1077
+ # CSV.filter(in_string, headers: true) do |row|
1078
+ # row[0].upcase!
1079
+ # row[1] = - row[1].to_i
1080
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1081
+ #
1082
+ # Output (to STDOUT):
1083
+ #
1084
+ # Name,Value
1085
+ # FOO,0
1086
+ # BAR,-1
1087
+ # BAZ,-2
1088
+ #
1089
+ # \IO stream input without headers:
1090
+ #
1091
+ # File.write('t.csv', "foo,0\nbar,1\nbaz,2")
1092
+ # File.open('t.csv') do |in_io|
1093
+ # CSV.filter(in_io) do |row|
1094
+ # row[0].upcase!
1095
+ # row[1] = - row[1].to_i
1096
+ # end
1097
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1098
+ #
1099
+ # Output (to STDOUT):
1100
+ #
1101
+ # FOO,0
1102
+ # BAR,-1
1103
+ # BAZ,-2
1104
+ #
1105
+ # \IO stream input with headers:
1106
+ #
1107
+ # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
1108
+ # File.open('t.csv') do |in_io|
1109
+ # CSV.filter(in_io, headers: true) do |row|
1110
+ # row[0].upcase!
1111
+ # row[1] = - row[1].to_i
1112
+ # end
1113
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1114
+ #
1115
+ # Output (to STDOUT):
1116
+ #
1117
+ # Name,Value
1118
+ # FOO,0
1119
+ # BAR,-1
1120
+ # BAZ,-2
1121
+ #
1122
+ # When both +in_string_or_io+ and +out_string_or_io+ are given,
1123
+ # parses from +in_string_or_io+ and generates to +out_string_or_io+.
1124
+ #
1125
+ # \String output without headers:
1126
+ #
1127
+ # in_string = "foo,0\nbar,1\nbaz,2"
1057
1128
  # out_string = ''
1058
1129
  # CSV.filter(in_string, out_string) do |row|
1059
- # row[0] = row[0].upcase
1060
- # row[1] *= 4
1061
- # end
1062
- # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
1130
+ # row[0].upcase!
1131
+ # row[1] = - row[1].to_i
1132
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1133
+ # out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
1134
+ #
1135
+ # \String output with headers:
1136
+ #
1137
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1138
+ # out_string = ''
1139
+ # CSV.filter(in_string, out_string, headers: true) do |row|
1140
+ # row[0].upcase!
1141
+ # row[1] = - row[1].to_i
1142
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1143
+ # out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
1144
+ #
1145
+ # \IO stream output without headers:
1146
+ #
1147
+ # in_string = "foo,0\nbar,1\nbaz,2"
1148
+ # File.open('t.csv', 'w') do |out_io|
1149
+ # CSV.filter(in_string, out_io) do |row|
1150
+ # row[0].upcase!
1151
+ # row[1] = - row[1].to_i
1152
+ # end
1153
+ # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
1154
+ # File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
1155
+ #
1156
+ # \IO stream output with headers:
1157
+ #
1158
+ # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
1159
+ # File.open('t.csv', 'w') do |out_io|
1160
+ # CSV.filter(in_string, out_io, headers: true) do |row|
1161
+ # row[0].upcase!
1162
+ # row[1] = - row[1].to_i
1163
+ # end
1164
+ # end # => #<CSV::Table mode:col_or_row row_count:4>
1165
+ # File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
1166
+ #
1167
+ # When neither +in_string_or_io+ nor +out_string_or_io+ given,
1168
+ # parses from {ARGF}[rdoc-ref:ARGF]
1169
+ # and generates to STDOUT.
1170
+ #
1171
+ # Without headers:
1172
+ #
1173
+ # # Put Ruby code into a file.
1174
+ # ruby = <<-EOT
1175
+ # require 'csv'
1176
+ # CSV.filter do |row|
1177
+ # row[0].upcase!
1178
+ # row[1] = - row[1].to_i
1179
+ # end
1180
+ # EOT
1181
+ # File.write('t.rb', ruby)
1182
+ # # Put some CSV into a file.
1183
+ # File.write('t.csv', "foo,0\nbar,1\nbaz,2")
1184
+ # # Run the Ruby code with CSV filename as argument.
1185
+ # system(Gem.ruby, "t.rb", "t.csv")
1186
+ #
1187
+ # Output (to STDOUT):
1188
+ #
1189
+ # FOO,0
1190
+ # BAR,-1
1191
+ # BAZ,-2
1192
+ #
1193
+ # With headers:
1194
+ #
1195
+ # # Put Ruby code into a file.
1196
+ # ruby = <<-EOT
1197
+ # require 'csv'
1198
+ # CSV.filter(headers: true) do |row|
1199
+ # row[0].upcase!
1200
+ # row[1] = - row[1].to_i
1201
+ # end
1202
+ # EOT
1203
+ # File.write('t.rb', ruby)
1204
+ # # Put some CSV into a file.
1205
+ # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
1206
+ # # Run the Ruby code with CSV filename as argument.
1207
+ # system(Gem.ruby, "t.rb", "t.csv")
1208
+ #
1209
+ # Output (to STDOUT):
1210
+ #
1211
+ # Name,Value
1212
+ # FOO,0
1213
+ # BAR,-1
1214
+ # BAZ,-2
1215
+ #
1216
+ # Arguments:
1217
+ #
1218
+ # * Argument +in_string_or_io+ must be a \String or an \IO stream.
1219
+ # * Argument +out_string_or_io+ must be a \String or an \IO stream.
1220
+ # * Arguments <tt>**options</tt> must be keyword options.
1221
+ #
1222
+ # - Each option defined as an {option for parsing}[#class-CSV-label-Options+for+Parsing]
1223
+ # is used for parsing the filter input.
1224
+ # - Each option defined as an {option for generating}[#class-CSV-label-Options+for+Generating]
1225
+ # is used for generator the filter input.
1226
+ #
1227
+ # However, there are three options that may be used for both parsing and generating:
1228
+ # +col_sep+, +quote_char+, and +row_sep+.
1229
+ #
1230
+ # Therefore for method +filter+ (and method +filter+ only),
1231
+ # there are special options that allow these parsing and generating options
1232
+ # to be specified separately:
1233
+ #
1234
+ # - Options +input_col_sep+ and +output_col_sep+
1235
+ # (and their aliases +in_col_sep+ and +out_col_sep+)
1236
+ # specify the column separators for parsing and generating.
1237
+ # - Options +input_quote_char+ and +output_quote_char+
1238
+ # (and their aliases +in_quote_char+ and +out_quote_char+)
1239
+ # specify the quote characters for parsing and generting.
1240
+ # - Options +input_row_sep+ and +output_row_sep+
1241
+ # (and their aliases +in_row_sep+ and +out_row_sep+)
1242
+ # specify the row separators for parsing and generating.
1243
+ #
1244
+ # Example options (for column separators):
1245
+ #
1246
+ # CSV.filter # Default for both parsing and generating.
1247
+ # CSV.filter(in_col_sep: ';') # ';' for parsing, default for generating.
1248
+ # CSV.filter(out_col_sep: '|') # Default for parsing, '|' for generating.
1249
+ # CSV.filter(in_col_sep: ';', out_col_sep: '|') # ';' for parsing, '|' for generating.
1250
+ #
1251
+ # Note that for a special option (e.g., +input_col_sep+)
1252
+ # and its corresponding "regular" option (e.g., +col_sep+),
1253
+ # the two are mutually overriding.
1254
+ #
1255
+ # Another example (possibly surprising):
1256
+ #
1257
+ # CSV.filter(in_col_sep: ';', col_sep: '|') # '|' for both parsing(!) and generating.
1258
+ #
1063
1259
  def filter(input=nil, output=nil, **options)
1064
1260
  # parse options for input, output, or both
1065
- in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR}
1261
+ in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
1066
1262
  options.each do |key, value|
1067
- case key.to_s
1263
+ case key
1068
1264
  when /\Ain(?:put)?_(.+)\Z/
1069
1265
  in_options[$1.to_sym] = value
1070
1266
  when /\Aout(?:put)?_(.+)\Z/
@@ -1106,111 +1302,90 @@ class CSV
1106
1302
 
1107
1303
  #
1108
1304
  # :call-seq:
1109
- # foreach(path, mode='r', **options) {|row| ... )
1110
- # foreach(io, mode='r', **options {|row| ... )
1111
- # foreach(path, mode='r', headers: ..., **options) {|row| ... )
1112
- # foreach(io, mode='r', headers: ..., **options {|row| ... )
1113
- # foreach(path, mode='r', **options) -> new_enumerator
1114
- # foreach(io, mode='r', **options -> new_enumerator
1115
- #
1116
- # Calls the block with each row read from source +path+ or +io+.
1117
- #
1118
- # * Argument +path+, if given, must be the path to a file.
1119
- # :include: ../doc/csv/arguments/io.rdoc
1120
- # * Argument +mode+, if given, must be a \File mode
1121
- # See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
1122
- # * Arguments <tt>**options</tt> must be keyword options.
1123
- # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1124
- # * This method optionally accepts an additional <tt>:encoding</tt> option
1125
- # that you can use to specify the Encoding of the data read from +path+ or +io+.
1126
- # You must provide this unless your data is in the encoding
1127
- # given by <tt>Encoding::default_external</tt>.
1128
- # Parsing will use this to determine how to parse the data.
1129
- # You may provide a second Encoding to
1130
- # have the data transcoded as it is read. For example,
1131
- # encoding: 'UTF-32BE:UTF-8'
1132
- # would read +UTF-32BE+ data from the file
1133
- # but transcode it to +UTF-8+ before parsing.
1305
+ # foreach(path_or_io, mode='r', **options) {|row| ... )
1306
+ # foreach(path_or_io, mode='r', **options) -> new_enumerator
1134
1307
  #
1135
- # ====== Without Option +headers+
1308
+ # Calls the block with each row read from source +path_or_io+.
1136
1309
  #
1137
- # Without option +headers+, returns each row as an \Array object.
1310
+ # \Path input without headers:
1138
1311
  #
1139
- # These examples assume prior execution of:
1140
1312
  # string = "foo,0\nbar,1\nbaz,2\n"
1141
- # path = 't.csv'
1142
- # File.write(path, string)
1313
+ # in_path = 't.csv'
1314
+ # File.write(in_path, string)
1315
+ # CSV.foreach(in_path) {|row| p row }
1143
1316
  #
1144
- # Read rows from a file at +path+:
1145
- # CSV.foreach(path) {|row| p row }
1146
1317
  # Output:
1147
- # ["foo", "0"]
1148
- # ["bar", "1"]
1149
- # ["baz", "2"]
1150
1318
  #
1151
- # Read rows from an \IO object:
1152
- # File.open(path) do |file|
1153
- # CSV.foreach(file) {|row| p row }
1154
- # end
1155
- #
1156
- # Output:
1157
1319
  # ["foo", "0"]
1158
1320
  # ["bar", "1"]
1159
1321
  # ["baz", "2"]
1160
1322
  #
1161
- # Returns a new \Enumerator if no block given:
1162
- # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
1163
- # CSV.foreach(File.open(path)) # => #<Enumerator: CSV:foreach(#<File:t.csv>, "r")>
1323
+ # \Path input with headers:
1324
+ #
1325
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1326
+ # in_path = 't.csv'
1327
+ # File.write(in_path, string)
1328
+ # CSV.foreach(in_path, headers: true) {|row| p row }
1164
1329
  #
1165
- # Issues a warning if an encoding is unsupported:
1166
- # CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
1167
1330
  # Output:
1168
- # warning: Unsupported encoding foo ignored
1169
- # warning: Unsupported encoding bar ignored
1170
1331
  #
1171
- # ====== With Option +headers+
1332
+ # <CSV::Row "Name":"foo" "Value":"0">
1333
+ # <CSV::Row "Name":"bar" "Value":"1">
1334
+ # <CSV::Row "Name":"baz" "Value":"2">
1172
1335
  #
1173
- # With {option +headers+}[#class-CSV-label-Option+headers],
1174
- # returns each row as a CSV::Row object.
1336
+ # \IO stream input without headers:
1175
1337
  #
1176
- # These examples assume prior execution of:
1177
- # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
1338
+ # string = "foo,0\nbar,1\nbaz,2\n"
1178
1339
  # path = 't.csv'
1179
1340
  # File.write(path, string)
1180
- #
1181
- # Read rows from a file at +path+:
1182
- # CSV.foreach(path, headers: true) {|row| p row }
1341
+ # File.open('t.csv') do |in_io|
1342
+ # CSV.foreach(in_io) {|row| p row }
1343
+ # end
1183
1344
  #
1184
1345
  # Output:
1185
- # #<CSV::Row "Name":"foo" "Count":"0">
1186
- # #<CSV::Row "Name":"bar" "Count":"1">
1187
- # #<CSV::Row "Name":"baz" "Count":"2">
1188
1346
  #
1189
- # Read rows from an \IO object:
1190
- # File.open(path) do |file|
1191
- # CSV.foreach(file, headers: true) {|row| p row }
1347
+ # ["foo", "0"]
1348
+ # ["bar", "1"]
1349
+ # ["baz", "2"]
1350
+ #
1351
+ # \IO stream input with headers:
1352
+ #
1353
+ # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
1354
+ # path = 't.csv'
1355
+ # File.write(path, string)
1356
+ # File.open('t.csv') do |in_io|
1357
+ # CSV.foreach(in_io, headers: true) {|row| p row }
1192
1358
  # end
1193
1359
  #
1194
1360
  # Output:
1195
- # #<CSV::Row "Name":"foo" "Count":"0">
1196
- # #<CSV::Row "Name":"bar" "Count":"1">
1197
- # #<CSV::Row "Name":"baz" "Count":"2">
1198
1361
  #
1199
- # ---
1200
- #
1201
- # Raises an exception if +path+ is a \String, but not the path to a readable file:
1202
- # # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv):
1203
- # CSV.foreach('nosuch.csv') {|row| }
1362
+ # <CSV::Row "Name":"foo" "Value":"0">
1363
+ # <CSV::Row "Name":"bar" "Value":"1">
1364
+ # <CSV::Row "Name":"baz" "Value":"2">
1204
1365
  #
1205
- # Raises an exception if +io+ is an \IO object, but not open for reading:
1206
- # io = File.open(path, 'w') {|row| }
1207
- # # Raises TypeError (no implicit conversion of nil into String):
1208
- # CSV.foreach(io) {|row| }
1366
+ # With no block given, returns an \Enumerator:
1209
1367
  #
1210
- # Raises an exception if +mode+ is invalid:
1211
- # # Raises ArgumentError (invalid access mode nosuch):
1212
- # CSV.foreach(path, 'nosuch') {|row| }
1368
+ # string = "foo,0\nbar,1\nbaz,2\n"
1369
+ # path = 't.csv'
1370
+ # File.write(path, string)
1371
+ # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
1213
1372
  #
1373
+ # Arguments:
1374
+ # * Argument +path_or_io+ must be a file path or an \IO stream.
1375
+ # * Argument +mode+, if given, must be a \File mode.
1376
+ # See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
1377
+ # * Arguments <tt>**options</tt> must be keyword options.
1378
+ # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
1379
+ # * This method optionally accepts an additional <tt>:encoding</tt> option
1380
+ # that you can use to specify the Encoding of the data read from +path+ or +io+.
1381
+ # You must provide this unless your data is in the encoding
1382
+ # given by <tt>Encoding::default_external</tt>.
1383
+ # Parsing will use this to determine how to parse the data.
1384
+ # You may provide a second Encoding to
1385
+ # have the data transcoded as it is read. For example,
1386
+ # encoding: 'UTF-32BE:UTF-8'
1387
+ # would read +UTF-32BE+ data from the file
1388
+ # but transcode it to +UTF-8+ before parsing.
1214
1389
  def foreach(path, mode="r", **options, &block)
1215
1390
  return to_enum(__method__, path, mode, **options) unless block_given?
1216
1391
  open(path, mode, **options) do |csv|
@@ -1303,8 +1478,8 @@ class CSV
1303
1478
  # Argument +ary+ must be an \Array.
1304
1479
  #
1305
1480
  # Special options:
1306
- # * Option <tt>:row_sep</tt> defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>
1307
- # (<tt>$/</tt>).:
1481
+ # * Option <tt>:row_sep</tt> defaults to <tt>"\n"> on Ruby 3.0 or later
1482
+ # and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
1308
1483
  # $INPUT_RECORD_SEPARATOR # => "\n"
1309
1484
  # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
1310
1485
  # Encoding for the output. This method will try to guess your Encoding from
@@ -1326,7 +1501,7 @@ class CSV
1326
1501
  # CSV.generate_line(:foo)
1327
1502
  #
1328
1503
  def generate_line(row, **options)
1329
- options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
1504
+ options = {row_sep: InputRecordSeparator.value}.merge(options)
1330
1505
  str = +""
1331
1506
  if options[:encoding]
1332
1507
  str.force_encoding(options[:encoding])
@@ -1348,24 +1523,62 @@ class CSV
1348
1523
  (new(str, **options) << row).string
1349
1524
  end
1350
1525
 
1526
+ # :call-seq:
1527
+ # CSV.generate_lines(rows)
1528
+ # CSV.generate_lines(rows, **options)
1529
+ #
1530
+ # Returns the \String created by generating \CSV from
1531
+ # using the specified +options+.
1532
+ #
1533
+ # Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
1534
+ #
1535
+ # Special options:
1536
+ # * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
1537
+ # and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
1538
+ # $INPUT_RECORD_SEPARATOR # => "\n"
1539
+ # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
1540
+ # Encoding for the output. This method will try to guess your Encoding from
1541
+ # the first non-+nil+ field in +row+, if possible, but you may need to use
1542
+ # this parameter as a backup plan.
1543
+ #
1544
+ # For other +options+,
1545
+ # see {Options for Generating}[#class-CSV-label-Options+for+Generating].
1546
+ #
1547
+ # ---
1548
+ #
1549
+ # Returns the \String generated from an
1550
+ # CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
1551
+ #
1552
+ # ---
1553
+ #
1554
+ # Raises an exception
1555
+ # # Raises NoMethodError (undefined method `each' for :foo:Symbol)
1556
+ # CSV.generate_lines(:foo)
1557
+ #
1558
+ def generate_lines(rows, **options)
1559
+ self.generate(**options) do |csv|
1560
+ rows.each do |row|
1561
+ csv << row
1562
+ end
1563
+ end
1564
+ end
1565
+
1351
1566
  #
1352
1567
  # :call-seq:
1353
- # open(file_path, mode = "rb", **options ) -> new_csv
1354
- # open(io, mode = "rb", **options ) -> new_csv
1355
- # open(file_path, mode = "rb", **options ) { |csv| ... } -> object
1356
- # open(io, mode = "rb", **options ) { |csv| ... } -> object
1568
+ # open(path_or_io, mode = "rb", **options ) -> new_csv
1569
+ # open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object
1357
1570
  #
1358
1571
  # possible options elements:
1359
- # hash form:
1572
+ # keyword form:
1360
1573
  # :invalid => nil # raise error on invalid byte sequence (default)
1361
1574
  # :invalid => :replace # replace invalid byte sequence
1362
1575
  # :undef => :replace # replace undefined conversion
1363
1576
  # :replace => string # replacement string ("?" or "\uFFFD" if not specified)
1364
1577
  #
1365
- # * Argument +path+, if given, must be the path to a file.
1578
+ # * Argument +path_or_io+, must be a file path or an \IO stream.
1366
1579
  # :include: ../doc/csv/arguments/io.rdoc
1367
- # * Argument +mode+, if given, must be a \File mode
1368
- # See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
1580
+ # * Argument +mode+, if given, must be a \File mode.
1581
+ # See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
1369
1582
  # * Arguments <tt>**options</tt> must be keyword options.
1370
1583
  # See {Options for Generating}[#class-CSV-label-Options+for+Generating].
1371
1584
  # * This method optionally accepts an additional <tt>:encoding</tt> option
@@ -1386,6 +1599,9 @@ class CSV
1386
1599
  # path = 't.csv'
1387
1600
  # File.write(path, string)
1388
1601
  #
1602
+ # string_io = StringIO.new
1603
+ # string_io << "foo,0\nbar,1\nbaz,2\n"
1604
+ #
1389
1605
  # ---
1390
1606
  #
1391
1607
  # With no block given, returns a new \CSV object.
@@ -1398,6 +1614,9 @@ class CSV
1398
1614
  # csv = CSV.open(File.open(path))
1399
1615
  # csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1400
1616
  #
1617
+ # Create a \CSV object using a \StringIO:
1618
+ # csv = CSV.open(string_io)
1619
+ # csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1401
1620
  # ---
1402
1621
  #
1403
1622
  # With a block given, calls the block with the created \CSV object;
@@ -1415,27 +1634,46 @@ class CSV
1415
1634
  # Output:
1416
1635
  # #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1417
1636
  #
1637
+ # Using a \StringIO:
1638
+ # csv = CSV.open(string_io) {|csv| p csv}
1639
+ # csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1640
+ # Output:
1641
+ # #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
1418
1642
  # ---
1419
1643
  #
1420
1644
  # Raises an exception if the argument is not a \String object or \IO object:
1421
1645
  # # Raises TypeError (no implicit conversion of Symbol into String)
1422
1646
  # CSV.open(:foo)
1423
- def open(filename, mode="r", **options)
1647
+ def open(filename_or_io, mode="r", **options)
1424
1648
  # wrap a File opened with the remaining +args+ with no newline
1425
1649
  # decorator
1426
- file_opts = {universal_newline: false}.merge(options)
1650
+ file_opts = {}
1651
+ may_enable_bom_detection_automatically(filename_or_io,
1652
+ mode,
1653
+ options,
1654
+ file_opts)
1655
+ file_opts.merge!(options)
1656
+ unless file_opts.key?(:newline)
1657
+ file_opts[:universal_newline] ||= false
1658
+ end
1427
1659
  options.delete(:invalid)
1428
1660
  options.delete(:undef)
1429
1661
  options.delete(:replace)
1662
+ options.delete_if {|k, _| /newline\z/.match?(k)}
1430
1663
 
1431
- begin
1432
- f = File.open(filename, mode, **file_opts)
1433
- rescue ArgumentError => e
1434
- raise unless /needs binmode/.match?(e.message) and mode == "r"
1435
- mode = "rb"
1436
- file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1437
- retry
1664
+ if filename_or_io.is_a?(StringIO)
1665
+ f = create_stringio(filename_or_io.string, mode, **file_opts)
1666
+ else
1667
+ begin
1668
+ f = File.open(filename_or_io, mode, **file_opts)
1669
+ rescue ArgumentError => e
1670
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
1671
+ mode = "rb"
1672
+ file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1673
+ retry
1674
+ end
1438
1675
  end
1676
+
1439
1677
  begin
1440
1678
  csv = new(f, **options)
1441
1679
  rescue Exception
@@ -1567,6 +1805,23 @@ class CSV
1567
1805
  # Raises an exception if the argument is not a \String object or \IO object:
1568
1806
  # # Raises NoMethodError (undefined method `close' for :foo:Symbol)
1569
1807
  # CSV.parse(:foo)
1808
+ #
1809
+ # ---
1810
+ #
1811
+ # Please make sure if your text contains \BOM or not. CSV.parse will not remove
1812
+ # \BOM automatically. You might want to remove \BOM before calling CSV.parse :
1813
+ # # remove BOM on calling File.open
1814
+ # File.open(path, encoding: 'bom|utf-8') do |file|
1815
+ # CSV.parse(file, headers: true) do |row|
1816
+ # # you can get value by column name because BOM is removed
1817
+ # p row['Name']
1818
+ # end
1819
+ # end
1820
+ #
1821
+ # Output:
1822
+ # # "foo"
1823
+ # # "bar"
1824
+ # # "baz"
1570
1825
  def parse(str, **options, &block)
1571
1826
  csv = new(str, **options)
1572
1827
 
@@ -1681,7 +1936,7 @@ class CSV
1681
1936
  #
1682
1937
  # Calls CSV.read with +source+, +options+, and certain default options:
1683
1938
  # - +headers+: +true+
1684
- # - +converbers+: +:numeric+
1939
+ # - +converters+: +:numeric+
1685
1940
  # - +header_converters+: +:symbol+
1686
1941
  #
1687
1942
  # Returns a CSV::Table object.
@@ -1700,6 +1955,42 @@ class CSV
1700
1955
  options = default_options.merge(options)
1701
1956
  read(path, **options)
1702
1957
  end
1958
+
1959
+ ON_WINDOWS = /mingw|mswin/.match?(RUBY_PLATFORM)
1960
+ private_constant :ON_WINDOWS
1961
+
1962
+ private
1963
+ def may_enable_bom_detection_automatically(filename_or_io,
1964
+ mode,
1965
+ options,
1966
+ file_opts)
1967
+ if filename_or_io.is_a?(StringIO)
1968
+ # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
1969
+ # https://github.com/ruby/stringio/pull/47
1970
+ return if RUBY_VERSION < "2.7"
1971
+ else
1972
+ # "bom|utf-8" may be buggy on Windows:
1973
+ # https://bugs.ruby-lang.org/issues/20526
1974
+ return if ON_WINDOWS
1975
+ end
1976
+ return unless Encoding.default_external == Encoding::UTF_8
1977
+ return if options.key?(:encoding)
1978
+ return if options.key?(:external_encoding)
1979
+ return if mode.include?(":")
1980
+ file_opts[:encoding] = "bom|utf-8"
1981
+ end
1982
+
1983
+ if RUBY_VERSION < "2.7"
1984
+ def create_stringio(str, mode, opts)
1985
+ opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)}
1986
+ raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty?
1987
+ StringIO.new(str, mode)
1988
+ end
1989
+ else
1990
+ def create_stringio(str, mode, opts)
1991
+ StringIO.new(str, mode, **opts)
1992
+ end
1993
+ end
1703
1994
  end
1704
1995
 
1705
1996
  # :call-seq:
@@ -1745,6 +2036,7 @@ class CSV
1745
2036
  row_sep: :auto,
1746
2037
  quote_char: '"',
1747
2038
  field_size_limit: nil,
2039
+ max_field_size: nil,
1748
2040
  converters: nil,
1749
2041
  unconverted_fields: nil,
1750
2042
  headers: false,
@@ -1760,16 +2052,27 @@ class CSV
1760
2052
  encoding: nil,
1761
2053
  nil_value: nil,
1762
2054
  empty_value: "",
2055
+ strip: false,
1763
2056
  quote_empty: true,
1764
2057
  write_converters: nil,
1765
2058
  write_nil_value: nil,
1766
- write_empty_value: "",
1767
- strip: false)
2059
+ write_empty_value: "")
1768
2060
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
1769
2061
 
1770
2062
  if data.is_a?(String)
2063
+ if encoding
2064
+ if encoding.is_a?(String)
2065
+ data_external_encoding, data_internal_encoding = encoding.split(":", 2)
2066
+ if data_internal_encoding
2067
+ data = data.encode(data_internal_encoding, data_external_encoding)
2068
+ else
2069
+ data = data.dup.force_encoding(data_external_encoding)
2070
+ end
2071
+ else
2072
+ data = data.dup.force_encoding(encoding)
2073
+ end
2074
+ end
1771
2075
  @io = StringIO.new(data)
1772
- @io.set_encoding(encoding || data.encoding)
1773
2076
  else
1774
2077
  @io = data
1775
2078
  end
@@ -1787,11 +2090,14 @@ class CSV
1787
2090
  @initial_header_converters = header_converters
1788
2091
  @initial_write_converters = write_converters
1789
2092
 
2093
+ if max_field_size.nil? and field_size_limit
2094
+ max_field_size = field_size_limit - 1
2095
+ end
1790
2096
  @parser_options = {
1791
2097
  column_separator: col_sep,
1792
2098
  row_separator: row_sep,
1793
2099
  quote_character: quote_char,
1794
- field_size_limit: field_size_limit,
2100
+ max_field_size: max_field_size,
1795
2101
  unconverted_fields: unconverted_fields,
1796
2102
  headers: headers,
1797
2103
  return_headers: return_headers,
@@ -1823,6 +2129,12 @@ class CSV
1823
2129
  writer if @writer_options[:write_headers]
1824
2130
  end
1825
2131
 
2132
+ class TSV < CSV
2133
+ def initialize(data, **options)
2134
+ super(data, **({col_sep: "\t"}.merge(options)))
2135
+ end
2136
+ end
2137
+
1826
2138
  # :call-seq:
1827
2139
  # csv.col_sep -> string
1828
2140
  #
@@ -1859,10 +2171,24 @@ class CSV
1859
2171
  # Returns the limit for field size; used for parsing;
1860
2172
  # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
1861
2173
  # CSV.new('').field_size_limit # => nil
2174
+ #
2175
+ # Deprecated since 3.2.3. Use +max_field_size+ instead.
1862
2176
  def field_size_limit
1863
2177
  parser.field_size_limit
1864
2178
  end
1865
2179
 
2180
+ # :call-seq:
2181
+ # csv.max_field_size -> integer or nil
2182
+ #
2183
+ # Returns the limit for field size; used for parsing;
2184
+ # see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
2185
+ # CSV.new('').max_field_size # => nil
2186
+ #
2187
+ # Since 3.2.3.
2188
+ def max_field_size
2189
+ parser.max_field_size
2190
+ end
2191
+
1866
2192
  # :call-seq:
1867
2193
  # csv.skip_lines -> regexp or nil
1868
2194
  #
@@ -1884,6 +2210,10 @@ class CSV
1884
2210
  # csv.converters # => [:integer]
1885
2211
  # csv.convert(proc {|x| x.to_s })
1886
2212
  # csv.converters
2213
+ #
2214
+ # Notes that you need to call
2215
+ # +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use
2216
+ # this method.
1887
2217
  def converters
1888
2218
  parser_fields_converter.map do |converter|
1889
2219
  name = Converters.rassoc(converter)
@@ -1946,6 +2276,10 @@ class CSV
1946
2276
  # Returns an \Array containing header converters; used for parsing;
1947
2277
  # see {Header Converters}[#class-CSV-label-Header+Converters]:
1948
2278
  # CSV.new('').header_converters # => []
2279
+ #
2280
+ # Notes that you need to call
2281
+ # +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor
2282
+ # to use this method.
1949
2283
  def header_converters
1950
2284
  header_fields_converter.map do |converter|
1951
2285
  name = HeaderConverters.rassoc(converter)
@@ -1985,7 +2319,7 @@ class CSV
1985
2319
  end
1986
2320
 
1987
2321
  # :call-seq:
1988
- # csv.encoding -> endcoding
2322
+ # csv.encoding -> encoding
1989
2323
  #
1990
2324
  # Returns the encoding used for parsing and generating;
1991
2325
  # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
@@ -2353,7 +2687,13 @@ class CSV
2353
2687
  # p row
2354
2688
  # end
2355
2689
  def each(&block)
2356
- parser_enumerator.each(&block)
2690
+ return to_enum(__method__) unless block_given?
2691
+ begin
2692
+ while true
2693
+ yield(parser_enumerator.next)
2694
+ end
2695
+ rescue StopIteration
2696
+ end
2357
2697
  end
2358
2698
 
2359
2699
  # :call-seq:
@@ -2586,7 +2926,7 @@ class CSV
2586
2926
 
2587
2927
  def build_parser_fields_converter
2588
2928
  specific_options = {
2589
- builtin_converters: Converters,
2929
+ builtin_converters_name: :Converters,
2590
2930
  }
2591
2931
  options = @base_fields_converter_options.merge(specific_options)
2592
2932
  build_fields_converter(@initial_converters, options)
@@ -2598,7 +2938,7 @@ class CSV
2598
2938
 
2599
2939
  def build_header_fields_converter
2600
2940
  specific_options = {
2601
- builtin_converters: HeaderConverters,
2941
+ builtin_converters_name: :HeaderConverters,
2602
2942
  accept_nil: true,
2603
2943
  }
2604
2944
  options = @base_fields_converter_options.merge(specific_options)
@@ -2661,8 +3001,15 @@ end
2661
3001
  # c.read.any? { |a| a.include?("zombies") }
2662
3002
  # } #=> false
2663
3003
  #
2664
- def CSV(*args, &block)
2665
- CSV.instance(*args, &block)
3004
+ # CSV options may also be given.
3005
+ #
3006
+ # io = StringIO.new
3007
+ # CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] }
3008
+ #
3009
+ # This API is not Ractor-safe.
3010
+ #
3011
+ def CSV(*args, **options, &block)
3012
+ CSV.instance(*args, **options, &block)
2666
3013
  end
2667
3014
 
2668
3015
  require_relative "csv/version"