csv 3.1.9 → 3.2.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +130 -0
- data/README.md +3 -6
- data/doc/csv/options/generating/write_headers.rdoc +1 -1
- data/doc/csv/recipes/generating.rdoc +1 -1
- data/doc/csv/recipes/parsing.rdoc +3 -3
- data/lib/csv/fields_converter.rb +6 -2
- data/lib/csv/input_record_separator.rb +18 -0
- data/lib/csv/parser.rb +202 -65
- data/lib/csv/row.rb +22 -0
- data/lib/csv/table.rb +17 -5
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +2 -1
- data/lib/csv.rb +309 -152
- metadata +7 -6
data/lib/csv.rb
CHANGED
@@ -48,7 +48,7 @@
|
|
48
48
|
#
|
49
49
|
# === Interface
|
50
50
|
#
|
51
|
-
# * CSV now uses
|
51
|
+
# * CSV now uses keyword parameters to set options.
|
52
52
|
# * CSV no longer has generate_row() or parse_row().
|
53
53
|
# * The old CSV's Reader and Writer classes have been dropped.
|
54
54
|
# * CSV::open() is now more like Ruby's open().
|
@@ -90,11 +90,11 @@
|
|
90
90
|
# with any questions.
|
91
91
|
|
92
92
|
require "forwardable"
|
93
|
-
require "English"
|
94
93
|
require "date"
|
95
94
|
require "stringio"
|
96
95
|
|
97
96
|
require_relative "csv/fields_converter"
|
97
|
+
require_relative "csv/input_record_separator"
|
98
98
|
require_relative "csv/match_p"
|
99
99
|
require_relative "csv/parser"
|
100
100
|
require_relative "csv/row"
|
@@ -341,6 +341,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
341
341
|
# liberal_parsing: false,
|
342
342
|
# nil_value: nil,
|
343
343
|
# empty_value: "",
|
344
|
+
# strip: false,
|
344
345
|
# # For generating.
|
345
346
|
# write_headers: nil,
|
346
347
|
# quote_empty: true,
|
@@ -348,7 +349,6 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
348
349
|
# write_converters: nil,
|
349
350
|
# write_nil_value: nil,
|
350
351
|
# write_empty_value: "",
|
351
|
-
# strip: false,
|
352
352
|
# }
|
353
353
|
#
|
354
354
|
# ==== Options for Parsing
|
@@ -357,7 +357,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
357
357
|
# - +row_sep+: Specifies the row separator; used to delimit rows.
|
358
358
|
# - +col_sep+: Specifies the column separator; used to delimit fields.
|
359
359
|
# - +quote_char+: Specifies the quote character; used to quote fields.
|
360
|
-
# - +field_size_limit+: Specifies the maximum field size allowed.
|
360
|
+
# - +field_size_limit+: Specifies the maximum field size + 1 allowed.
|
361
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
362
|
+
# - +max_field_size+: Specifies the maximum field size allowed.
|
361
363
|
# - +converters+: Specifies the field converters to be used.
|
362
364
|
# - +unconverted_fields+: Specifies whether unconverted fields are to be available.
|
363
365
|
# - +headers+: Specifies whether data contains headers,
|
@@ -366,8 +368,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
366
368
|
# - +header_converters+: Specifies the header converters to be used.
|
367
369
|
# - +skip_blanks+: Specifies whether blanks lines are to be ignored.
|
368
370
|
# - +skip_lines+: Specifies how comments lines are to be recognized.
|
369
|
-
# - +strip+: Specifies whether leading and trailing whitespace are
|
370
|
-
#
|
371
|
+
# - +strip+: Specifies whether leading and trailing whitespace are to be
|
372
|
+
# stripped from fields. This must be compatible with +col_sep+; if it is not,
|
373
|
+
# then an +ArgumentError+ exception will be raised.
|
371
374
|
# - +liberal_parsing+: Specifies whether \CSV should attempt to parse
|
372
375
|
# non-compliant data.
|
373
376
|
# - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
|
@@ -513,7 +516,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
513
516
|
# [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
|
514
517
|
# [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
|
515
518
|
# [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
|
516
|
-
# Each CSV::
|
519
|
+
# Each CSV::FieldInfo object shows:
|
517
520
|
# - The 0-based field index.
|
518
521
|
# - The 1-based line index.
|
519
522
|
# - The field header, if any.
|
@@ -547,6 +550,14 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
547
550
|
#
|
548
551
|
# There is no such storage structure for write headers.
|
549
552
|
#
|
553
|
+
# In order for the parsing methods to access stored converters in non-main-Ractors, the
|
554
|
+
# storage structure must be made shareable first.
|
555
|
+
# Therefore, <tt>Ractor.make_shareable(CSV::Converters)</tt> and
|
556
|
+
# <tt>Ractor.make_shareable(CSV::HeaderConverters)</tt> must be called before the creation
|
557
|
+
# of Ractors that use the converters stored in these structures. (Since making the storage
|
558
|
+
# structures shareable involves freezing them, any custom converters that are to be used
|
559
|
+
# must be added first.)
|
560
|
+
#
|
550
561
|
# ===== Converter Lists
|
551
562
|
#
|
552
563
|
# A _converter_ _list_ is an \Array that may include any assortment of:
|
@@ -705,7 +716,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
705
716
|
# Header converters operate only on headers (and not on other rows).
|
706
717
|
#
|
707
718
|
# There are three ways to use header \converters;
|
708
|
-
# these examples use built-in header converter +:
|
719
|
+
# these examples use built-in header converter +:downcase+,
|
709
720
|
# which downcases each parsed header.
|
710
721
|
#
|
711
722
|
# - Option +header_converters+ with a singleton parsing method:
|
@@ -917,8 +928,10 @@ class CSV
|
|
917
928
|
symbol: lambda { |h|
|
918
929
|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
|
919
930
|
gsub(/\s+/, "_").to_sym
|
920
|
-
}
|
931
|
+
},
|
932
|
+
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
|
921
933
|
}
|
934
|
+
|
922
935
|
# Default values for method options.
|
923
936
|
DEFAULT_OPTIONS = {
|
924
937
|
# For both parsing and generating.
|
@@ -927,6 +940,7 @@ class CSV
|
|
927
940
|
quote_char: '"',
|
928
941
|
# For parsing.
|
929
942
|
field_size_limit: nil,
|
943
|
+
max_field_size: nil,
|
930
944
|
converters: nil,
|
931
945
|
unconverted_fields: nil,
|
932
946
|
headers: false,
|
@@ -937,6 +951,7 @@ class CSV
|
|
937
951
|
liberal_parsing: false,
|
938
952
|
nil_value: nil,
|
939
953
|
empty_value: "",
|
954
|
+
strip: false,
|
940
955
|
# For generating.
|
941
956
|
write_headers: nil,
|
942
957
|
quote_empty: true,
|
@@ -944,7 +959,6 @@ class CSV
|
|
944
959
|
write_converters: nil,
|
945
960
|
write_nil_value: nil,
|
946
961
|
write_empty_value: "",
|
947
|
-
strip: false,
|
948
962
|
}.freeze
|
949
963
|
|
950
964
|
class << self
|
@@ -957,6 +971,8 @@ class CSV
|
|
957
971
|
# Creates or retrieves cached \CSV objects.
|
958
972
|
# For arguments and options, see CSV.new.
|
959
973
|
#
|
974
|
+
# This API is not Ractor-safe.
|
975
|
+
#
|
960
976
|
# ---
|
961
977
|
#
|
962
978
|
# With no block given, returns a \CSV object.
|
@@ -1006,63 +1022,188 @@ class CSV
|
|
1006
1022
|
end
|
1007
1023
|
|
1008
1024
|
# :call-seq:
|
1009
|
-
# filter(**options) {|row| ... }
|
1010
|
-
# filter(
|
1011
|
-
# filter(
|
1012
|
-
# filter(in_string, out_string, **options) {|row| ... }
|
1013
|
-
# filter(in_string, out_io, **options) {|row| ... }
|
1014
|
-
# filter(in_io, out_string, **options) {|row| ... }
|
1015
|
-
# filter(in_io, out_io, **options) {|row| ... }
|
1016
|
-
#
|
1017
|
-
# Reads \CSV input and writes \CSV output.
|
1018
|
-
#
|
1019
|
-
# For each input row:
|
1020
|
-
# - Forms the data into:
|
1021
|
-
# - A CSV::Row object, if headers are in use.
|
1022
|
-
# - An \Array of Arrays, otherwise.
|
1023
|
-
# - Calls the block with that object.
|
1024
|
-
# - Appends the block's return value to the output.
|
1025
|
+
# filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
1026
|
+
# filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
1027
|
+
# filter(**options) {|row| ... } -> array_of_arrays or csv_table
|
1025
1028
|
#
|
1026
|
-
#
|
1027
|
-
#
|
1028
|
-
#
|
1029
|
-
#
|
1030
|
-
#
|
1031
|
-
#
|
1032
|
-
#
|
1033
|
-
#
|
1034
|
-
# * \CSV output:
|
1035
|
-
# * Argument +out_string+, if given, should be a \String object;
|
1036
|
-
# it will be put into a new StringIO object positioned at the beginning.
|
1037
|
-
# * Argument +out_io+, if given, should be an IO object that is
|
1038
|
-
# ppen for writing; on return, the IO object will be closed.
|
1039
|
-
# * If neither +out_string+ nor +out_io+ is given,
|
1040
|
-
# the output stream defaults to <tt>$stdout</tt>.
|
1041
|
-
# * Argument +options+ should be keyword arguments.
|
1042
|
-
# - Each argument name that is prefixed with +in_+ or +input_+
|
1043
|
-
# is stripped of its prefix and is treated as an option
|
1044
|
-
# for parsing the input.
|
1045
|
-
# Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
|
1046
|
-
# - Each argument name that is prefixed with +out_+ or +output_+
|
1047
|
-
# is stripped of its prefix and is treated as an option
|
1048
|
-
# for generating the output.
|
1049
|
-
# Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
|
1050
|
-
# - Each argument not prefixed as above is treated as an option
|
1051
|
-
# both for parsing the input and for generating the output.
|
1052
|
-
# - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
|
1053
|
-
# and {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1029
|
+
# - Parses \CSV from a source (\String, \IO stream, or ARGF).
|
1030
|
+
# - Calls the given block with each parsed row:
|
1031
|
+
# - Without headers, each row is an \Array.
|
1032
|
+
# - With headers, each row is a CSV::Row.
|
1033
|
+
# - Generates \CSV to an output (\String, \IO stream, or STDOUT).
|
1034
|
+
# - Returns the parsed source:
|
1035
|
+
# - Without headers, an \Array of \Arrays.
|
1036
|
+
# - With headers, a CSV::Table.
|
1054
1037
|
#
|
1055
|
-
#
|
1056
|
-
#
|
1038
|
+
# When +in_string_or_io+ is given, but not +out_string_or_io+,
|
1039
|
+
# parses from the given +in_string_or_io+
|
1040
|
+
# and generates to STDOUT.
|
1041
|
+
#
|
1042
|
+
# \String input without headers:
|
1043
|
+
#
|
1044
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1045
|
+
# CSV.filter(in_string) do |row|
|
1046
|
+
# row[0].upcase!
|
1047
|
+
# row[1] = - row[1].to_i
|
1048
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1049
|
+
#
|
1050
|
+
# Output (to STDOUT):
|
1051
|
+
#
|
1052
|
+
# FOO,0
|
1053
|
+
# BAR,-1
|
1054
|
+
# BAZ,-2
|
1055
|
+
#
|
1056
|
+
# \String input with headers:
|
1057
|
+
#
|
1058
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1059
|
+
# CSV.filter(in_string, headers: true) do |row|
|
1060
|
+
# row[0].upcase!
|
1061
|
+
# row[1] = - row[1].to_i
|
1062
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1063
|
+
#
|
1064
|
+
# Output (to STDOUT):
|
1065
|
+
#
|
1066
|
+
# Name,Value
|
1067
|
+
# FOO,0
|
1068
|
+
# BAR,-1
|
1069
|
+
# BAZ,-2
|
1070
|
+
#
|
1071
|
+
# \IO stream input without headers:
|
1072
|
+
#
|
1073
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
1074
|
+
# File.open('t.csv') do |in_io|
|
1075
|
+
# CSV.filter(in_io) do |row|
|
1076
|
+
# row[0].upcase!
|
1077
|
+
# row[1] = - row[1].to_i
|
1078
|
+
# end
|
1079
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1080
|
+
#
|
1081
|
+
# Output (to STDOUT):
|
1082
|
+
#
|
1083
|
+
# FOO,0
|
1084
|
+
# BAR,-1
|
1085
|
+
# BAZ,-2
|
1086
|
+
#
|
1087
|
+
# \IO stream input with headers:
|
1088
|
+
#
|
1089
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
1090
|
+
# File.open('t.csv') do |in_io|
|
1091
|
+
# CSV.filter(in_io, headers: true) do |row|
|
1092
|
+
# row[0].upcase!
|
1093
|
+
# row[1] = - row[1].to_i
|
1094
|
+
# end
|
1095
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1096
|
+
#
|
1097
|
+
# Output (to STDOUT):
|
1098
|
+
#
|
1099
|
+
# Name,Value
|
1100
|
+
# FOO,0
|
1101
|
+
# BAR,-1
|
1102
|
+
# BAZ,-2
|
1103
|
+
#
|
1104
|
+
# When both +in_string_or_io+ and +out_string_or_io+ are given,
|
1105
|
+
# parses from +in_string_or_io+ and generates to +out_string_or_io+.
|
1106
|
+
#
|
1107
|
+
# \String output without headers:
|
1108
|
+
#
|
1109
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1057
1110
|
# out_string = ''
|
1058
1111
|
# CSV.filter(in_string, out_string) do |row|
|
1059
|
-
# row[0]
|
1060
|
-
# row[1]
|
1061
|
-
# end
|
1062
|
-
# out_string # => "FOO,
|
1112
|
+
# row[0].upcase!
|
1113
|
+
# row[1] = - row[1].to_i
|
1114
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1115
|
+
# out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
1116
|
+
#
|
1117
|
+
# \String output with headers:
|
1118
|
+
#
|
1119
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1120
|
+
# out_string = ''
|
1121
|
+
# CSV.filter(in_string, out_string, headers: true) do |row|
|
1122
|
+
# row[0].upcase!
|
1123
|
+
# row[1] = - row[1].to_i
|
1124
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1125
|
+
# out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1126
|
+
#
|
1127
|
+
# \IO stream output without headers:
|
1128
|
+
#
|
1129
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1130
|
+
# File.open('t.csv', 'w') do |out_io|
|
1131
|
+
# CSV.filter(in_string, out_io) do |row|
|
1132
|
+
# row[0].upcase!
|
1133
|
+
# row[1] = - row[1].to_i
|
1134
|
+
# end
|
1135
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1136
|
+
# File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
1137
|
+
#
|
1138
|
+
# \IO stream output with headers:
|
1139
|
+
#
|
1140
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1141
|
+
# File.open('t.csv', 'w') do |out_io|
|
1142
|
+
# CSV.filter(in_string, out_io, headers: true) do |row|
|
1143
|
+
# row[0].upcase!
|
1144
|
+
# row[1] = - row[1].to_i
|
1145
|
+
# end
|
1146
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1147
|
+
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1148
|
+
#
|
1149
|
+
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
|
1150
|
+
# parses from {ARGF}[https://docs.ruby-lang.org/en/master/ARGF.html]
|
1151
|
+
# and generates to STDOUT.
|
1152
|
+
#
|
1153
|
+
# Without headers:
|
1154
|
+
#
|
1155
|
+
# # Put Ruby code into a file.
|
1156
|
+
# ruby = <<-EOT
|
1157
|
+
# require 'csv'
|
1158
|
+
# CSV.filter do |row|
|
1159
|
+
# row[0].upcase!
|
1160
|
+
# row[1] = - row[1].to_i
|
1161
|
+
# end
|
1162
|
+
# EOT
|
1163
|
+
# File.write('t.rb', ruby)
|
1164
|
+
# # Put some CSV into a file.
|
1165
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
1166
|
+
# # Run the Ruby code with CSV filename as argument.
|
1167
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
1168
|
+
#
|
1169
|
+
# Output (to STDOUT):
|
1170
|
+
#
|
1171
|
+
# FOO,0
|
1172
|
+
# BAR,-1
|
1173
|
+
# BAZ,-2
|
1174
|
+
#
|
1175
|
+
# With headers:
|
1176
|
+
#
|
1177
|
+
# # Put Ruby code into a file.
|
1178
|
+
# ruby = <<-EOT
|
1179
|
+
# require 'csv'
|
1180
|
+
# CSV.filter(headers: true) do |row|
|
1181
|
+
# row[0].upcase!
|
1182
|
+
# row[1] = - row[1].to_i
|
1183
|
+
# end
|
1184
|
+
# EOT
|
1185
|
+
# File.write('t.rb', ruby)
|
1186
|
+
# # Put some CSV into a file.
|
1187
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
1188
|
+
# # Run the Ruby code with CSV filename as argument.
|
1189
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
1190
|
+
#
|
1191
|
+
# Output (to STDOUT):
|
1192
|
+
#
|
1193
|
+
# Name,Value
|
1194
|
+
# FOO,0
|
1195
|
+
# BAR,-1
|
1196
|
+
# BAZ,-2
|
1197
|
+
#
|
1198
|
+
# Arguments:
|
1199
|
+
#
|
1200
|
+
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
|
1201
|
+
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
|
1202
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
1203
|
+
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1063
1204
|
def filter(input=nil, output=nil, **options)
|
1064
1205
|
# parse options for input, output, or both
|
1065
|
-
in_options, out_options = Hash.new, {row_sep:
|
1206
|
+
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
1066
1207
|
options.each do |key, value|
|
1067
1208
|
case key.to_s
|
1068
1209
|
when /\Ain(?:put)?_(.+)\Z/
|
@@ -1106,111 +1247,90 @@ class CSV
|
|
1106
1247
|
|
1107
1248
|
#
|
1108
1249
|
# :call-seq:
|
1109
|
-
# foreach(
|
1110
|
-
# foreach(
|
1111
|
-
# foreach(path, mode='r', headers: ..., **options) {|row| ... )
|
1112
|
-
# foreach(io, mode='r', headers: ..., **options {|row| ... )
|
1113
|
-
# foreach(path, mode='r', **options) -> new_enumerator
|
1114
|
-
# foreach(io, mode='r', **options -> new_enumerator
|
1115
|
-
#
|
1116
|
-
# Calls the block with each row read from source +path+ or +io+.
|
1117
|
-
#
|
1118
|
-
# * Argument +path+, if given, must be the path to a file.
|
1119
|
-
# :include: ../doc/csv/arguments/io.rdoc
|
1120
|
-
# * Argument +mode+, if given, must be a \File mode
|
1121
|
-
# See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
|
1122
|
-
# * Arguments <tt>**options</tt> must be keyword options.
|
1123
|
-
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1124
|
-
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
1125
|
-
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
1126
|
-
# You must provide this unless your data is in the encoding
|
1127
|
-
# given by <tt>Encoding::default_external</tt>.
|
1128
|
-
# Parsing will use this to determine how to parse the data.
|
1129
|
-
# You may provide a second Encoding to
|
1130
|
-
# have the data transcoded as it is read. For example,
|
1131
|
-
# encoding: 'UTF-32BE:UTF-8'
|
1132
|
-
# would read +UTF-32BE+ data from the file
|
1133
|
-
# but transcode it to +UTF-8+ before parsing.
|
1250
|
+
# foreach(path_or_io, mode='r', **options) {|row| ... )
|
1251
|
+
# foreach(path_or_io, mode='r', **options) -> new_enumerator
|
1134
1252
|
#
|
1135
|
-
#
|
1253
|
+
# Calls the block with each row read from source +path_or_io+.
|
1136
1254
|
#
|
1137
|
-
#
|
1255
|
+
# \Path input without headers:
|
1138
1256
|
#
|
1139
|
-
# These examples assume prior execution of:
|
1140
1257
|
# string = "foo,0\nbar,1\nbaz,2\n"
|
1141
|
-
#
|
1142
|
-
# File.write(
|
1258
|
+
# in_path = 't.csv'
|
1259
|
+
# File.write(in_path, string)
|
1260
|
+
# CSV.foreach(in_path) {|row| p row }
|
1143
1261
|
#
|
1144
|
-
# Read rows from a file at +path+:
|
1145
|
-
# CSV.foreach(path) {|row| p row }
|
1146
1262
|
# Output:
|
1147
|
-
# ["foo", "0"]
|
1148
|
-
# ["bar", "1"]
|
1149
|
-
# ["baz", "2"]
|
1150
|
-
#
|
1151
|
-
# Read rows from an \IO object:
|
1152
|
-
# File.open(path) do |file|
|
1153
|
-
# CSV.foreach(file) {|row| p row }
|
1154
|
-
# end
|
1155
1263
|
#
|
1156
|
-
# Output:
|
1157
1264
|
# ["foo", "0"]
|
1158
1265
|
# ["bar", "1"]
|
1159
1266
|
# ["baz", "2"]
|
1160
1267
|
#
|
1161
|
-
#
|
1162
|
-
#
|
1163
|
-
#
|
1268
|
+
# \Path input with headers:
|
1269
|
+
#
|
1270
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1271
|
+
# in_path = 't.csv'
|
1272
|
+
# File.write(in_path, string)
|
1273
|
+
# CSV.foreach(in_path, headers: true) {|row| p row }
|
1164
1274
|
#
|
1165
|
-
# Issues a warning if an encoding is unsupported:
|
1166
|
-
# CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
|
1167
1275
|
# Output:
|
1168
|
-
# warning: Unsupported encoding foo ignored
|
1169
|
-
# warning: Unsupported encoding bar ignored
|
1170
1276
|
#
|
1171
|
-
#
|
1277
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
1278
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
1279
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
1172
1280
|
#
|
1173
|
-
#
|
1174
|
-
# returns each row as a CSV::Row object.
|
1281
|
+
# \IO stream input without headers:
|
1175
1282
|
#
|
1176
|
-
#
|
1177
|
-
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
|
1283
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1178
1284
|
# path = 't.csv'
|
1179
1285
|
# File.write(path, string)
|
1180
|
-
#
|
1181
|
-
#
|
1182
|
-
#
|
1286
|
+
# File.open('t.csv') do |in_io|
|
1287
|
+
# CSV.foreach(in_io) {|row| p row }
|
1288
|
+
# end
|
1183
1289
|
#
|
1184
1290
|
# Output:
|
1185
|
-
# #<CSV::Row "Name":"foo" "Count":"0">
|
1186
|
-
# #<CSV::Row "Name":"bar" "Count":"1">
|
1187
|
-
# #<CSV::Row "Name":"baz" "Count":"2">
|
1188
1291
|
#
|
1189
|
-
#
|
1190
|
-
#
|
1191
|
-
#
|
1292
|
+
# ["foo", "0"]
|
1293
|
+
# ["bar", "1"]
|
1294
|
+
# ["baz", "2"]
|
1295
|
+
#
|
1296
|
+
# \IO stream input with headers:
|
1297
|
+
#
|
1298
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1299
|
+
# path = 't.csv'
|
1300
|
+
# File.write(path, string)
|
1301
|
+
# File.open('t.csv') do |in_io|
|
1302
|
+
# CSV.foreach(in_io, headers: true) {|row| p row }
|
1192
1303
|
# end
|
1193
1304
|
#
|
1194
1305
|
# Output:
|
1195
|
-
# #<CSV::Row "Name":"foo" "Count":"0">
|
1196
|
-
# #<CSV::Row "Name":"bar" "Count":"1">
|
1197
|
-
# #<CSV::Row "Name":"baz" "Count":"2">
|
1198
|
-
#
|
1199
|
-
# ---
|
1200
1306
|
#
|
1201
|
-
#
|
1202
|
-
#
|
1203
|
-
# CSV
|
1307
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
1308
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
1309
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
1204
1310
|
#
|
1205
|
-
#
|
1206
|
-
# io = File.open(path, 'w') {|row| }
|
1207
|
-
# # Raises TypeError (no implicit conversion of nil into String):
|
1208
|
-
# CSV.foreach(io) {|row| }
|
1311
|
+
# With no block given, returns an \Enumerator:
|
1209
1312
|
#
|
1210
|
-
#
|
1211
|
-
#
|
1212
|
-
#
|
1313
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1314
|
+
# path = 't.csv'
|
1315
|
+
# File.write(path, string)
|
1316
|
+
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
|
1213
1317
|
#
|
1318
|
+
# Arguments:
|
1319
|
+
# * Argument +path_or_io+ must be a file path or an \IO stream.
|
1320
|
+
# * Argument +mode+, if given, must be a \File mode
|
1321
|
+
# See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode].
|
1322
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
1323
|
+
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1324
|
+
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
1325
|
+
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
1326
|
+
# You must provide this unless your data is in the encoding
|
1327
|
+
# given by <tt>Encoding::default_external</tt>.
|
1328
|
+
# Parsing will use this to determine how to parse the data.
|
1329
|
+
# You may provide a second Encoding to
|
1330
|
+
# have the data transcoded as it is read. For example,
|
1331
|
+
# encoding: 'UTF-32BE:UTF-8'
|
1332
|
+
# would read +UTF-32BE+ data from the file
|
1333
|
+
# but transcode it to +UTF-8+ before parsing.
|
1214
1334
|
def foreach(path, mode="r", **options, &block)
|
1215
1335
|
return to_enum(__method__, path, mode, **options) unless block_given?
|
1216
1336
|
open(path, mode, **options) do |csv|
|
@@ -1303,8 +1423,8 @@ class CSV
|
|
1303
1423
|
# Argument +ary+ must be an \Array.
|
1304
1424
|
#
|
1305
1425
|
# Special options:
|
1306
|
-
# * Option <tt>:row_sep</tt> defaults to <tt
|
1307
|
-
# (<tt>$/</tt>).:
|
1426
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"> on Ruby 3.0 or later
|
1427
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1308
1428
|
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1309
1429
|
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1310
1430
|
# Encoding for the output. This method will try to guess your Encoding from
|
@@ -1326,7 +1446,7 @@ class CSV
|
|
1326
1446
|
# CSV.generate_line(:foo)
|
1327
1447
|
#
|
1328
1448
|
def generate_line(row, **options)
|
1329
|
-
options = {row_sep:
|
1449
|
+
options = {row_sep: InputRecordSeparator.value}.merge(options)
|
1330
1450
|
str = +""
|
1331
1451
|
if options[:encoding]
|
1332
1452
|
str.force_encoding(options[:encoding])
|
@@ -1356,7 +1476,7 @@ class CSV
|
|
1356
1476
|
# open(io, mode = "rb", **options ) { |csv| ... } -> object
|
1357
1477
|
#
|
1358
1478
|
# possible options elements:
|
1359
|
-
#
|
1479
|
+
# keyword form:
|
1360
1480
|
# :invalid => nil # raise error on invalid byte sequence (default)
|
1361
1481
|
# :invalid => :replace # replace invalid byte sequence
|
1362
1482
|
# :undef => :replace # replace undefined conversion
|
@@ -1423,10 +1543,14 @@ class CSV
|
|
1423
1543
|
def open(filename, mode="r", **options)
|
1424
1544
|
# wrap a File opened with the remaining +args+ with no newline
|
1425
1545
|
# decorator
|
1426
|
-
file_opts =
|
1546
|
+
file_opts = options.dup
|
1547
|
+
unless file_opts.key?(:newline)
|
1548
|
+
file_opts[:universal_newline] ||= false
|
1549
|
+
end
|
1427
1550
|
options.delete(:invalid)
|
1428
1551
|
options.delete(:undef)
|
1429
1552
|
options.delete(:replace)
|
1553
|
+
options.delete_if {|k, _| /newline\z/.match?(k)}
|
1430
1554
|
|
1431
1555
|
begin
|
1432
1556
|
f = File.open(filename, mode, **file_opts)
|
@@ -1681,7 +1805,7 @@ class CSV
|
|
1681
1805
|
#
|
1682
1806
|
# Calls CSV.read with +source+, +options+, and certain default options:
|
1683
1807
|
# - +headers+: +true+
|
1684
|
-
# - +
|
1808
|
+
# - +converters+: +:numeric+
|
1685
1809
|
# - +header_converters+: +:symbol+
|
1686
1810
|
#
|
1687
1811
|
# Returns a CSV::Table object.
|
@@ -1745,6 +1869,7 @@ class CSV
|
|
1745
1869
|
row_sep: :auto,
|
1746
1870
|
quote_char: '"',
|
1747
1871
|
field_size_limit: nil,
|
1872
|
+
max_field_size: nil,
|
1748
1873
|
converters: nil,
|
1749
1874
|
unconverted_fields: nil,
|
1750
1875
|
headers: false,
|
@@ -1760,11 +1885,11 @@ class CSV
|
|
1760
1885
|
encoding: nil,
|
1761
1886
|
nil_value: nil,
|
1762
1887
|
empty_value: "",
|
1888
|
+
strip: false,
|
1763
1889
|
quote_empty: true,
|
1764
1890
|
write_converters: nil,
|
1765
1891
|
write_nil_value: nil,
|
1766
|
-
write_empty_value: ""
|
1767
|
-
strip: false)
|
1892
|
+
write_empty_value: "")
|
1768
1893
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
1769
1894
|
|
1770
1895
|
if data.is_a?(String)
|
@@ -1787,11 +1912,14 @@ class CSV
|
|
1787
1912
|
@initial_header_converters = header_converters
|
1788
1913
|
@initial_write_converters = write_converters
|
1789
1914
|
|
1915
|
+
if max_field_size.nil? and field_size_limit
|
1916
|
+
max_field_size = field_size_limit - 1
|
1917
|
+
end
|
1790
1918
|
@parser_options = {
|
1791
1919
|
column_separator: col_sep,
|
1792
1920
|
row_separator: row_sep,
|
1793
1921
|
quote_character: quote_char,
|
1794
|
-
|
1922
|
+
max_field_size: max_field_size,
|
1795
1923
|
unconverted_fields: unconverted_fields,
|
1796
1924
|
headers: headers,
|
1797
1925
|
return_headers: return_headers,
|
@@ -1859,10 +1987,24 @@ class CSV
|
|
1859
1987
|
# Returns the limit for field size; used for parsing;
|
1860
1988
|
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
|
1861
1989
|
# CSV.new('').field_size_limit # => nil
|
1990
|
+
#
|
1991
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
1862
1992
|
def field_size_limit
|
1863
1993
|
parser.field_size_limit
|
1864
1994
|
end
|
1865
1995
|
|
1996
|
+
# :call-seq:
|
1997
|
+
# csv.max_field_size -> integer or nil
|
1998
|
+
#
|
1999
|
+
# Returns the limit for field size; used for parsing;
|
2000
|
+
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
|
2001
|
+
# CSV.new('').max_field_size # => nil
|
2002
|
+
#
|
2003
|
+
# Since 3.2.3.
|
2004
|
+
def max_field_size
|
2005
|
+
parser.max_field_size
|
2006
|
+
end
|
2007
|
+
|
1866
2008
|
# :call-seq:
|
1867
2009
|
# csv.skip_lines -> regexp or nil
|
1868
2010
|
#
|
@@ -1884,6 +2026,10 @@ class CSV
|
|
1884
2026
|
# csv.converters # => [:integer]
|
1885
2027
|
# csv.convert(proc {|x| x.to_s })
|
1886
2028
|
# csv.converters
|
2029
|
+
#
|
2030
|
+
# Notes that you need to call
|
2031
|
+
# +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use
|
2032
|
+
# this method.
|
1887
2033
|
def converters
|
1888
2034
|
parser_fields_converter.map do |converter|
|
1889
2035
|
name = Converters.rassoc(converter)
|
@@ -1946,6 +2092,10 @@ class CSV
|
|
1946
2092
|
# Returns an \Array containing header converters; used for parsing;
|
1947
2093
|
# see {Header Converters}[#class-CSV-label-Header+Converters]:
|
1948
2094
|
# CSV.new('').header_converters # => []
|
2095
|
+
#
|
2096
|
+
# Notes that you need to call
|
2097
|
+
# +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor
|
2098
|
+
# to use this method.
|
1949
2099
|
def header_converters
|
1950
2100
|
header_fields_converter.map do |converter|
|
1951
2101
|
name = HeaderConverters.rassoc(converter)
|
@@ -1985,7 +2135,7 @@ class CSV
|
|
1985
2135
|
end
|
1986
2136
|
|
1987
2137
|
# :call-seq:
|
1988
|
-
# csv.encoding ->
|
2138
|
+
# csv.encoding -> encoding
|
1989
2139
|
#
|
1990
2140
|
# Returns the encoding used for parsing and generating;
|
1991
2141
|
# see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
|
@@ -2586,7 +2736,7 @@ class CSV
|
|
2586
2736
|
|
2587
2737
|
def build_parser_fields_converter
|
2588
2738
|
specific_options = {
|
2589
|
-
|
2739
|
+
builtin_converters_name: :Converters,
|
2590
2740
|
}
|
2591
2741
|
options = @base_fields_converter_options.merge(specific_options)
|
2592
2742
|
build_fields_converter(@initial_converters, options)
|
@@ -2598,7 +2748,7 @@ class CSV
|
|
2598
2748
|
|
2599
2749
|
def build_header_fields_converter
|
2600
2750
|
specific_options = {
|
2601
|
-
|
2751
|
+
builtin_converters_name: :HeaderConverters,
|
2602
2752
|
accept_nil: true,
|
2603
2753
|
}
|
2604
2754
|
options = @base_fields_converter_options.merge(specific_options)
|
@@ -2661,8 +2811,15 @@ end
|
|
2661
2811
|
# c.read.any? { |a| a.include?("zombies") }
|
2662
2812
|
# } #=> false
|
2663
2813
|
#
|
2664
|
-
|
2665
|
-
|
2814
|
+
# CSV options may also be given.
|
2815
|
+
#
|
2816
|
+
# io = StringIO.new
|
2817
|
+
# CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] }
|
2818
|
+
#
|
2819
|
+
# This API is not Ractor-safe.
|
2820
|
+
#
|
2821
|
+
def CSV(*args, **options, &block)
|
2822
|
+
CSV.instance(*args, **options, &block)
|
2666
2823
|
end
|
2667
2824
|
|
2668
2825
|
require_relative "csv/version"
|