csv 3.1.9 → 3.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +361 -0
- data/README.md +3 -6
- data/doc/csv/options/generating/write_headers.rdoc +1 -1
- data/doc/csv/options/parsing/liberal_parsing.rdoc +21 -2
- data/doc/csv/recipes/filtering.rdoc +85 -17
- data/doc/csv/recipes/generating.rdoc +2 -2
- data/doc/csv/recipes/parsing.rdoc +16 -7
- data/lib/csv/core_ext/array.rb +1 -1
- data/lib/csv/core_ext/string.rb +1 -1
- data/lib/csv/fields_converter.rb +16 -4
- data/lib/csv/input_record_separator.rb +18 -0
- data/lib/csv/parser.rb +263 -113
- data/lib/csv/row.rb +23 -1
- data/lib/csv/table.rb +18 -7
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +6 -6
- data/lib/csv.rb +535 -188
- metadata +9 -66
- data/lib/csv/delete_suffix.rb +0 -18
- data/lib/csv/match_p.rb +0 -20
data/lib/csv.rb
CHANGED
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
#
|
|
49
49
|
# === Interface
|
|
50
50
|
#
|
|
51
|
-
# * CSV now uses
|
|
51
|
+
# * CSV now uses keyword parameters to set options.
|
|
52
52
|
# * CSV no longer has generate_row() or parse_row().
|
|
53
53
|
# * The old CSV's Reader and Writer classes have been dropped.
|
|
54
54
|
# * CSV::open() is now more like Ruby's open().
|
|
@@ -70,7 +70,7 @@
|
|
|
70
70
|
# == What is CSV, really?
|
|
71
71
|
#
|
|
72
72
|
# CSV maintains a pretty strict definition of CSV taken directly from
|
|
73
|
-
# {the RFC}[
|
|
73
|
+
# {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
|
|
74
74
|
# place and that is to make using this library easier. CSV will parse all valid
|
|
75
75
|
# CSV.
|
|
76
76
|
#
|
|
@@ -90,29 +90,19 @@
|
|
|
90
90
|
# with any questions.
|
|
91
91
|
|
|
92
92
|
require "forwardable"
|
|
93
|
-
require "English"
|
|
94
93
|
require "date"
|
|
94
|
+
require "time"
|
|
95
95
|
require "stringio"
|
|
96
96
|
|
|
97
97
|
require_relative "csv/fields_converter"
|
|
98
|
-
require_relative "csv/
|
|
98
|
+
require_relative "csv/input_record_separator"
|
|
99
99
|
require_relative "csv/parser"
|
|
100
100
|
require_relative "csv/row"
|
|
101
101
|
require_relative "csv/table"
|
|
102
102
|
require_relative "csv/writer"
|
|
103
103
|
|
|
104
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
105
|
-
|
|
106
104
|
# == \CSV
|
|
107
105
|
#
|
|
108
|
-
# === In a Hurry?
|
|
109
|
-
#
|
|
110
|
-
# If you are familiar with \CSV data and have a particular task in mind,
|
|
111
|
-
# you may want to go directly to the:
|
|
112
|
-
# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
|
|
113
|
-
#
|
|
114
|
-
# Otherwise, read on here, about the API: classes, methods, and constants.
|
|
115
|
-
#
|
|
116
106
|
# === \CSV Data
|
|
117
107
|
#
|
|
118
108
|
# \CSV (comma-separated values) data is a text representation of a table:
|
|
@@ -341,6 +331,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
341
331
|
# liberal_parsing: false,
|
|
342
332
|
# nil_value: nil,
|
|
343
333
|
# empty_value: "",
|
|
334
|
+
# strip: false,
|
|
344
335
|
# # For generating.
|
|
345
336
|
# write_headers: nil,
|
|
346
337
|
# quote_empty: true,
|
|
@@ -348,7 +339,6 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
348
339
|
# write_converters: nil,
|
|
349
340
|
# write_nil_value: nil,
|
|
350
341
|
# write_empty_value: "",
|
|
351
|
-
# strip: false,
|
|
352
342
|
# }
|
|
353
343
|
#
|
|
354
344
|
# ==== Options for Parsing
|
|
@@ -357,7 +347,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
357
347
|
# - +row_sep+: Specifies the row separator; used to delimit rows.
|
|
358
348
|
# - +col_sep+: Specifies the column separator; used to delimit fields.
|
|
359
349
|
# - +quote_char+: Specifies the quote character; used to quote fields.
|
|
360
|
-
# - +field_size_limit+: Specifies the maximum field size allowed.
|
|
350
|
+
# - +field_size_limit+: Specifies the maximum field size + 1 allowed.
|
|
351
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
|
352
|
+
# - +max_field_size+: Specifies the maximum field size allowed.
|
|
361
353
|
# - +converters+: Specifies the field converters to be used.
|
|
362
354
|
# - +unconverted_fields+: Specifies whether unconverted fields are to be available.
|
|
363
355
|
# - +headers+: Specifies whether data contains headers,
|
|
@@ -366,8 +358,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
366
358
|
# - +header_converters+: Specifies the header converters to be used.
|
|
367
359
|
# - +skip_blanks+: Specifies whether blanks lines are to be ignored.
|
|
368
360
|
# - +skip_lines+: Specifies how comments lines are to be recognized.
|
|
369
|
-
# - +strip+: Specifies whether leading and trailing whitespace are
|
|
370
|
-
#
|
|
361
|
+
# - +strip+: Specifies whether leading and trailing whitespace are to be
|
|
362
|
+
# stripped from fields. This must be compatible with +col_sep+; if it is not,
|
|
363
|
+
# then an +ArgumentError+ exception will be raised.
|
|
371
364
|
# - +liberal_parsing+: Specifies whether \CSV should attempt to parse
|
|
372
365
|
# non-compliant data.
|
|
373
366
|
# - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
|
|
@@ -513,7 +506,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
513
506
|
# [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
|
|
514
507
|
# [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
|
|
515
508
|
# [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
|
|
516
|
-
# Each CSV::
|
|
509
|
+
# Each CSV::FieldInfo object shows:
|
|
517
510
|
# - The 0-based field index.
|
|
518
511
|
# - The 1-based line index.
|
|
519
512
|
# - The field header, if any.
|
|
@@ -529,6 +522,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
529
522
|
# - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
|
|
530
523
|
# - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
|
|
531
524
|
# - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
|
|
525
|
+
# - <tt>:time</tt>: converts each \String-embedded time into a true \Time
|
|
532
526
|
# .
|
|
533
527
|
# This example creates a converter proc, then stores it:
|
|
534
528
|
# strip_converter = proc {|field| field.strip }
|
|
@@ -547,6 +541,14 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
547
541
|
#
|
|
548
542
|
# There is no such storage structure for write headers.
|
|
549
543
|
#
|
|
544
|
+
# In order for the parsing methods to access stored converters in non-main-Ractors, the
|
|
545
|
+
# storage structure must be made shareable first.
|
|
546
|
+
# Therefore, <tt>Ractor.make_shareable(CSV::Converters)</tt> and
|
|
547
|
+
# <tt>Ractor.make_shareable(CSV::HeaderConverters)</tt> must be called before the creation
|
|
548
|
+
# of Ractors that use the converters stored in these structures. (Since making the storage
|
|
549
|
+
# structures shareable involves freezing them, any custom converters that are to be used
|
|
550
|
+
# must be added first.)
|
|
551
|
+
#
|
|
550
552
|
# ===== Converter Lists
|
|
551
553
|
#
|
|
552
554
|
# A _converter_ _list_ is an \Array that may include any assortment of:
|
|
@@ -631,6 +633,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
631
633
|
# [:numeric, [:integer, :float]]
|
|
632
634
|
# [:date, Proc]
|
|
633
635
|
# [:date_time, Proc]
|
|
636
|
+
# [:time, Proc]
|
|
634
637
|
# [:all, [:date_time, :numeric]]
|
|
635
638
|
#
|
|
636
639
|
# Each of these converters transcodes values to UTF-8 before attempting conversion.
|
|
@@ -675,6 +678,15 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
675
678
|
# csv = CSV.parse_line(data, converters: :date_time)
|
|
676
679
|
# csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"]
|
|
677
680
|
#
|
|
681
|
+
# Converter +time+ converts each field that Time::parse accepts:
|
|
682
|
+
# data = '2020-05-07T14:59:00-05:00,x'
|
|
683
|
+
# # Without the converter
|
|
684
|
+
# csv = CSV.parse_line(data)
|
|
685
|
+
# csv # => ["2020-05-07T14:59:00-05:00", "x"]
|
|
686
|
+
# # With the converter
|
|
687
|
+
# csv = CSV.parse_line(data, converters: :time)
|
|
688
|
+
# csv # => [2020-05-07 14:59:00 -0500, "x"]
|
|
689
|
+
#
|
|
678
690
|
# Converter +:numeric+ converts with both +:date_time+ and +:numeric+..
|
|
679
691
|
#
|
|
680
692
|
# As seen above, method #convert adds \converters to a \CSV instance,
|
|
@@ -705,7 +717,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
|
705
717
|
# Header converters operate only on headers (and not on other rows).
|
|
706
718
|
#
|
|
707
719
|
# There are three ways to use header \converters;
|
|
708
|
-
# these examples use built-in header converter +:
|
|
720
|
+
# these examples use built-in header converter +:downcase+,
|
|
709
721
|
# which downcases each parsed header.
|
|
710
722
|
#
|
|
711
723
|
# - Option +header_converters+ with a singleton parsing method:
|
|
@@ -846,6 +858,15 @@ class CSV
|
|
|
846
858
|
end
|
|
847
859
|
end
|
|
848
860
|
|
|
861
|
+
# The error thrown when the parser encounters invalid encoding in CSV.
|
|
862
|
+
class InvalidEncodingError < MalformedCSVError
|
|
863
|
+
attr_reader :encoding
|
|
864
|
+
def initialize(encoding, line_number)
|
|
865
|
+
@encoding = encoding
|
|
866
|
+
super("Invalid byte sequence in #{encoding}", line_number)
|
|
867
|
+
end
|
|
868
|
+
end
|
|
869
|
+
|
|
849
870
|
#
|
|
850
871
|
# A FieldInfo Struct contains details about a field's position in the data
|
|
851
872
|
# source it was read from. CSV will pass this Struct to some blocks that make
|
|
@@ -855,19 +876,19 @@ class CSV
|
|
|
855
876
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
|
856
877
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
|
857
878
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
|
879
|
+
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
|
858
880
|
#
|
|
859
|
-
FieldInfo = Struct.new(:index, :line, :header)
|
|
881
|
+
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
|
860
882
|
|
|
861
883
|
# A Regexp used to find and convert some common Date formats.
|
|
862
884
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
|
863
885
|
\d{4}-\d{2}-\d{2} )\z /x
|
|
864
|
-
# A Regexp used to find and convert some common
|
|
886
|
+
# A Regexp used to find and convert some common (Date)Time formats.
|
|
865
887
|
DateTimeMatcher =
|
|
866
888
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
|
867
|
-
|
|
868
|
-
# ISO-8601
|
|
889
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse
|
|
869
890
|
\d{4}-\d{2}-\d{2}
|
|
870
|
-
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
|
891
|
+
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
|
871
892
|
)\z /x
|
|
872
893
|
|
|
873
894
|
# The encoding used by all converters.
|
|
@@ -903,6 +924,14 @@ class CSV
|
|
|
903
924
|
f
|
|
904
925
|
end
|
|
905
926
|
},
|
|
927
|
+
time: lambda { |f|
|
|
928
|
+
begin
|
|
929
|
+
e = f.encode(ConverterEncoding)
|
|
930
|
+
e.match?(DateTimeMatcher) ? Time.parse(e) : f
|
|
931
|
+
rescue # encoding conversion or parse errors
|
|
932
|
+
f
|
|
933
|
+
end
|
|
934
|
+
},
|
|
906
935
|
all: [:date_time, :numeric],
|
|
907
936
|
}
|
|
908
937
|
|
|
@@ -917,8 +946,10 @@ class CSV
|
|
|
917
946
|
symbol: lambda { |h|
|
|
918
947
|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
|
|
919
948
|
gsub(/\s+/, "_").to_sym
|
|
920
|
-
}
|
|
949
|
+
},
|
|
950
|
+
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
|
|
921
951
|
}
|
|
952
|
+
|
|
922
953
|
# Default values for method options.
|
|
923
954
|
DEFAULT_OPTIONS = {
|
|
924
955
|
# For both parsing and generating.
|
|
@@ -927,6 +958,7 @@ class CSV
|
|
|
927
958
|
quote_char: '"',
|
|
928
959
|
# For parsing.
|
|
929
960
|
field_size_limit: nil,
|
|
961
|
+
max_field_size: nil,
|
|
930
962
|
converters: nil,
|
|
931
963
|
unconverted_fields: nil,
|
|
932
964
|
headers: false,
|
|
@@ -937,6 +969,7 @@ class CSV
|
|
|
937
969
|
liberal_parsing: false,
|
|
938
970
|
nil_value: nil,
|
|
939
971
|
empty_value: "",
|
|
972
|
+
strip: false,
|
|
940
973
|
# For generating.
|
|
941
974
|
write_headers: nil,
|
|
942
975
|
quote_empty: true,
|
|
@@ -944,7 +977,6 @@ class CSV
|
|
|
944
977
|
write_converters: nil,
|
|
945
978
|
write_nil_value: nil,
|
|
946
979
|
write_empty_value: "",
|
|
947
|
-
strip: false,
|
|
948
980
|
}.freeze
|
|
949
981
|
|
|
950
982
|
class << self
|
|
@@ -957,6 +989,8 @@ class CSV
|
|
|
957
989
|
# Creates or retrieves cached \CSV objects.
|
|
958
990
|
# For arguments and options, see CSV.new.
|
|
959
991
|
#
|
|
992
|
+
# This API is not Ractor-safe.
|
|
993
|
+
#
|
|
960
994
|
# ---
|
|
961
995
|
#
|
|
962
996
|
# With no block given, returns a \CSV object.
|
|
@@ -992,7 +1026,7 @@ class CSV
|
|
|
992
1026
|
def instance(data = $stdout, **options)
|
|
993
1027
|
# create a _signature_ for this method call, data object and options
|
|
994
1028
|
sig = [data.object_id] +
|
|
995
|
-
options.values_at(*DEFAULT_OPTIONS.keys
|
|
1029
|
+
options.values_at(*DEFAULT_OPTIONS.keys)
|
|
996
1030
|
|
|
997
1031
|
# fetch or create the instance for this signature
|
|
998
1032
|
@@instances ||= Hash.new
|
|
@@ -1006,65 +1040,227 @@ class CSV
|
|
|
1006
1040
|
end
|
|
1007
1041
|
|
|
1008
1042
|
# :call-seq:
|
|
1009
|
-
# filter(**options) {|row| ... }
|
|
1010
|
-
# filter(
|
|
1011
|
-
# filter(
|
|
1012
|
-
# filter(in_string, out_string, **options) {|row| ... }
|
|
1013
|
-
# filter(in_string, out_io, **options) {|row| ... }
|
|
1014
|
-
# filter(in_io, out_string, **options) {|row| ... }
|
|
1015
|
-
# filter(in_io, out_io, **options) {|row| ... }
|
|
1016
|
-
#
|
|
1017
|
-
# Reads \CSV input and writes \CSV output.
|
|
1018
|
-
#
|
|
1019
|
-
# For each input row:
|
|
1020
|
-
# - Forms the data into:
|
|
1021
|
-
# - A CSV::Row object, if headers are in use.
|
|
1022
|
-
# - An \Array of Arrays, otherwise.
|
|
1023
|
-
# - Calls the block with that object.
|
|
1024
|
-
# - Appends the block's return value to the output.
|
|
1043
|
+
# filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
|
1044
|
+
# filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
|
1045
|
+
# filter(**options) {|row| ... } -> array_of_arrays or csv_table
|
|
1025
1046
|
#
|
|
1026
|
-
#
|
|
1027
|
-
#
|
|
1028
|
-
#
|
|
1029
|
-
#
|
|
1030
|
-
#
|
|
1031
|
-
#
|
|
1032
|
-
#
|
|
1033
|
-
#
|
|
1034
|
-
# * \CSV output:
|
|
1035
|
-
# * Argument +out_string+, if given, should be a \String object;
|
|
1036
|
-
# it will be put into a new StringIO object positioned at the beginning.
|
|
1037
|
-
# * Argument +out_io+, if given, should be an IO object that is
|
|
1038
|
-
# ppen for writing; on return, the IO object will be closed.
|
|
1039
|
-
# * If neither +out_string+ nor +out_io+ is given,
|
|
1040
|
-
# the output stream defaults to <tt>$stdout</tt>.
|
|
1041
|
-
# * Argument +options+ should be keyword arguments.
|
|
1042
|
-
# - Each argument name that is prefixed with +in_+ or +input_+
|
|
1043
|
-
# is stripped of its prefix and is treated as an option
|
|
1044
|
-
# for parsing the input.
|
|
1045
|
-
# Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
|
|
1046
|
-
# - Each argument name that is prefixed with +out_+ or +output_+
|
|
1047
|
-
# is stripped of its prefix and is treated as an option
|
|
1048
|
-
# for generating the output.
|
|
1049
|
-
# Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>.
|
|
1050
|
-
# - Each argument not prefixed as above is treated as an option
|
|
1051
|
-
# both for parsing the input and for generating the output.
|
|
1052
|
-
# - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
|
|
1053
|
-
# and {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
|
1047
|
+
# - Parses \CSV from a source (\String, \IO stream, or ARGF).
|
|
1048
|
+
# - Calls the given block with each parsed row:
|
|
1049
|
+
# - Without headers, each row is an \Array.
|
|
1050
|
+
# - With headers, each row is a CSV::Row.
|
|
1051
|
+
# - Generates \CSV to an output (\String, \IO stream, or STDOUT).
|
|
1052
|
+
# - Returns the parsed source:
|
|
1053
|
+
# - Without headers, an \Array of \Arrays.
|
|
1054
|
+
# - With headers, a CSV::Table.
|
|
1054
1055
|
#
|
|
1055
|
-
#
|
|
1056
|
-
#
|
|
1056
|
+
# When +in_string_or_io+ is given, but not +out_string_or_io+,
|
|
1057
|
+
# parses from the given +in_string_or_io+
|
|
1058
|
+
# and generates to STDOUT.
|
|
1059
|
+
#
|
|
1060
|
+
# \String input without headers:
|
|
1061
|
+
#
|
|
1062
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
|
1063
|
+
# CSV.filter(in_string) do |row|
|
|
1064
|
+
# row[0].upcase!
|
|
1065
|
+
# row[1] = - row[1].to_i
|
|
1066
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
|
1067
|
+
#
|
|
1068
|
+
# Output (to STDOUT):
|
|
1069
|
+
#
|
|
1070
|
+
# FOO,0
|
|
1071
|
+
# BAR,-1
|
|
1072
|
+
# BAZ,-2
|
|
1073
|
+
#
|
|
1074
|
+
# \String input with headers:
|
|
1075
|
+
#
|
|
1076
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
|
1077
|
+
# CSV.filter(in_string, headers: true) do |row|
|
|
1078
|
+
# row[0].upcase!
|
|
1079
|
+
# row[1] = - row[1].to_i
|
|
1080
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
|
1081
|
+
#
|
|
1082
|
+
# Output (to STDOUT):
|
|
1083
|
+
#
|
|
1084
|
+
# Name,Value
|
|
1085
|
+
# FOO,0
|
|
1086
|
+
# BAR,-1
|
|
1087
|
+
# BAZ,-2
|
|
1088
|
+
#
|
|
1089
|
+
# \IO stream input without headers:
|
|
1090
|
+
#
|
|
1091
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
|
1092
|
+
# File.open('t.csv') do |in_io|
|
|
1093
|
+
# CSV.filter(in_io) do |row|
|
|
1094
|
+
# row[0].upcase!
|
|
1095
|
+
# row[1] = - row[1].to_i
|
|
1096
|
+
# end
|
|
1097
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
|
1098
|
+
#
|
|
1099
|
+
# Output (to STDOUT):
|
|
1100
|
+
#
|
|
1101
|
+
# FOO,0
|
|
1102
|
+
# BAR,-1
|
|
1103
|
+
# BAZ,-2
|
|
1104
|
+
#
|
|
1105
|
+
# \IO stream input with headers:
|
|
1106
|
+
#
|
|
1107
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
|
1108
|
+
# File.open('t.csv') do |in_io|
|
|
1109
|
+
# CSV.filter(in_io, headers: true) do |row|
|
|
1110
|
+
# row[0].upcase!
|
|
1111
|
+
# row[1] = - row[1].to_i
|
|
1112
|
+
# end
|
|
1113
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
|
1114
|
+
#
|
|
1115
|
+
# Output (to STDOUT):
|
|
1116
|
+
#
|
|
1117
|
+
# Name,Value
|
|
1118
|
+
# FOO,0
|
|
1119
|
+
# BAR,-1
|
|
1120
|
+
# BAZ,-2
|
|
1121
|
+
#
|
|
1122
|
+
# When both +in_string_or_io+ and +out_string_or_io+ are given,
|
|
1123
|
+
# parses from +in_string_or_io+ and generates to +out_string_or_io+.
|
|
1124
|
+
#
|
|
1125
|
+
# \String output without headers:
|
|
1126
|
+
#
|
|
1127
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
|
1057
1128
|
# out_string = ''
|
|
1058
1129
|
# CSV.filter(in_string, out_string) do |row|
|
|
1059
|
-
# row[0]
|
|
1060
|
-
# row[1]
|
|
1061
|
-
# end
|
|
1062
|
-
# out_string # => "FOO,
|
|
1130
|
+
# row[0].upcase!
|
|
1131
|
+
# row[1] = - row[1].to_i
|
|
1132
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
|
1133
|
+
# out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
|
1134
|
+
#
|
|
1135
|
+
# \String output with headers:
|
|
1136
|
+
#
|
|
1137
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
|
1138
|
+
# out_string = ''
|
|
1139
|
+
# CSV.filter(in_string, out_string, headers: true) do |row|
|
|
1140
|
+
# row[0].upcase!
|
|
1141
|
+
# row[1] = - row[1].to_i
|
|
1142
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
|
1143
|
+
# out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
|
1144
|
+
#
|
|
1145
|
+
# \IO stream output without headers:
|
|
1146
|
+
#
|
|
1147
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
|
1148
|
+
# File.open('t.csv', 'w') do |out_io|
|
|
1149
|
+
# CSV.filter(in_string, out_io) do |row|
|
|
1150
|
+
# row[0].upcase!
|
|
1151
|
+
# row[1] = - row[1].to_i
|
|
1152
|
+
# end
|
|
1153
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
|
1154
|
+
# File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
|
1155
|
+
#
|
|
1156
|
+
# \IO stream output with headers:
|
|
1157
|
+
#
|
|
1158
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
|
1159
|
+
# File.open('t.csv', 'w') do |out_io|
|
|
1160
|
+
# CSV.filter(in_string, out_io, headers: true) do |row|
|
|
1161
|
+
# row[0].upcase!
|
|
1162
|
+
# row[1] = - row[1].to_i
|
|
1163
|
+
# end
|
|
1164
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
|
1165
|
+
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
|
1166
|
+
#
|
|
1167
|
+
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
|
|
1168
|
+
# parses from {ARGF}[rdoc-ref:ARGF]
|
|
1169
|
+
# and generates to STDOUT.
|
|
1170
|
+
#
|
|
1171
|
+
# Without headers:
|
|
1172
|
+
#
|
|
1173
|
+
# # Put Ruby code into a file.
|
|
1174
|
+
# ruby = <<-EOT
|
|
1175
|
+
# require 'csv'
|
|
1176
|
+
# CSV.filter do |row|
|
|
1177
|
+
# row[0].upcase!
|
|
1178
|
+
# row[1] = - row[1].to_i
|
|
1179
|
+
# end
|
|
1180
|
+
# EOT
|
|
1181
|
+
# File.write('t.rb', ruby)
|
|
1182
|
+
# # Put some CSV into a file.
|
|
1183
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
|
1184
|
+
# # Run the Ruby code with CSV filename as argument.
|
|
1185
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
|
1186
|
+
#
|
|
1187
|
+
# Output (to STDOUT):
|
|
1188
|
+
#
|
|
1189
|
+
# FOO,0
|
|
1190
|
+
# BAR,-1
|
|
1191
|
+
# BAZ,-2
|
|
1192
|
+
#
|
|
1193
|
+
# With headers:
|
|
1194
|
+
#
|
|
1195
|
+
# # Put Ruby code into a file.
|
|
1196
|
+
# ruby = <<-EOT
|
|
1197
|
+
# require 'csv'
|
|
1198
|
+
# CSV.filter(headers: true) do |row|
|
|
1199
|
+
# row[0].upcase!
|
|
1200
|
+
# row[1] = - row[1].to_i
|
|
1201
|
+
# end
|
|
1202
|
+
# EOT
|
|
1203
|
+
# File.write('t.rb', ruby)
|
|
1204
|
+
# # Put some CSV into a file.
|
|
1205
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
|
1206
|
+
# # Run the Ruby code with CSV filename as argument.
|
|
1207
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
|
1208
|
+
#
|
|
1209
|
+
# Output (to STDOUT):
|
|
1210
|
+
#
|
|
1211
|
+
# Name,Value
|
|
1212
|
+
# FOO,0
|
|
1213
|
+
# BAR,-1
|
|
1214
|
+
# BAZ,-2
|
|
1215
|
+
#
|
|
1216
|
+
# Arguments:
|
|
1217
|
+
#
|
|
1218
|
+
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
|
|
1219
|
+
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
|
|
1220
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
|
1221
|
+
#
|
|
1222
|
+
# - Each option defined as an {option for parsing}[#class-CSV-label-Options+for+Parsing]
|
|
1223
|
+
# is used for parsing the filter input.
|
|
1224
|
+
# - Each option defined as an {option for generating}[#class-CSV-label-Options+for+Generating]
|
|
1225
|
+
# is used for generator the filter input.
|
|
1226
|
+
#
|
|
1227
|
+
# However, there are three options that may be used for both parsing and generating:
|
|
1228
|
+
# +col_sep+, +quote_char+, and +row_sep+.
|
|
1229
|
+
#
|
|
1230
|
+
# Therefore for method +filter+ (and method +filter+ only),
|
|
1231
|
+
# there are special options that allow these parsing and generating options
|
|
1232
|
+
# to be specified separately:
|
|
1233
|
+
#
|
|
1234
|
+
# - Options +input_col_sep+ and +output_col_sep+
|
|
1235
|
+
# (and their aliases +in_col_sep+ and +out_col_sep+)
|
|
1236
|
+
# specify the column separators for parsing and generating.
|
|
1237
|
+
# - Options +input_quote_char+ and +output_quote_char+
|
|
1238
|
+
# (and their aliases +in_quote_char+ and +out_quote_char+)
|
|
1239
|
+
# specify the quote characters for parsing and generting.
|
|
1240
|
+
# - Options +input_row_sep+ and +output_row_sep+
|
|
1241
|
+
# (and their aliases +in_row_sep+ and +out_row_sep+)
|
|
1242
|
+
# specify the row separators for parsing and generating.
|
|
1243
|
+
#
|
|
1244
|
+
# Example options (for column separators):
|
|
1245
|
+
#
|
|
1246
|
+
# CSV.filter # Default for both parsing and generating.
|
|
1247
|
+
# CSV.filter(in_col_sep: ';') # ';' for parsing, default for generating.
|
|
1248
|
+
# CSV.filter(out_col_sep: '|') # Default for parsing, '|' for generating.
|
|
1249
|
+
# CSV.filter(in_col_sep: ';', out_col_sep: '|') # ';' for parsing, '|' for generating.
|
|
1250
|
+
#
|
|
1251
|
+
# Note that for a special option (e.g., +input_col_sep+)
|
|
1252
|
+
# and its corresponding "regular" option (e.g., +col_sep+),
|
|
1253
|
+
# the two are mutually overriding.
|
|
1254
|
+
#
|
|
1255
|
+
# Another example (possibly surprising):
|
|
1256
|
+
#
|
|
1257
|
+
# CSV.filter(in_col_sep: ';', col_sep: '|') # '|' for both parsing(!) and generating.
|
|
1258
|
+
#
|
|
1063
1259
|
def filter(input=nil, output=nil, **options)
|
|
1064
1260
|
# parse options for input, output, or both
|
|
1065
|
-
in_options, out_options = Hash.new, {row_sep:
|
|
1261
|
+
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
|
1066
1262
|
options.each do |key, value|
|
|
1067
|
-
case key
|
|
1263
|
+
case key
|
|
1068
1264
|
when /\Ain(?:put)?_(.+)\Z/
|
|
1069
1265
|
in_options[$1.to_sym] = value
|
|
1070
1266
|
when /\Aout(?:put)?_(.+)\Z/
|
|
@@ -1106,111 +1302,90 @@ class CSV
|
|
|
1106
1302
|
|
|
1107
1303
|
#
|
|
1108
1304
|
# :call-seq:
|
|
1109
|
-
# foreach(
|
|
1110
|
-
# foreach(
|
|
1111
|
-
# foreach(path, mode='r', headers: ..., **options) {|row| ... )
|
|
1112
|
-
# foreach(io, mode='r', headers: ..., **options {|row| ... )
|
|
1113
|
-
# foreach(path, mode='r', **options) -> new_enumerator
|
|
1114
|
-
# foreach(io, mode='r', **options -> new_enumerator
|
|
1115
|
-
#
|
|
1116
|
-
# Calls the block with each row read from source +path+ or +io+.
|
|
1117
|
-
#
|
|
1118
|
-
# * Argument +path+, if given, must be the path to a file.
|
|
1119
|
-
# :include: ../doc/csv/arguments/io.rdoc
|
|
1120
|
-
# * Argument +mode+, if given, must be a \File mode
|
|
1121
|
-
# See {Open Mode}[IO.html#method-c-new-label-Open+Mode].
|
|
1122
|
-
# * Arguments <tt>**options</tt> must be keyword options.
|
|
1123
|
-
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
|
1124
|
-
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
|
1125
|
-
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
|
1126
|
-
# You must provide this unless your data is in the encoding
|
|
1127
|
-
# given by <tt>Encoding::default_external</tt>.
|
|
1128
|
-
# Parsing will use this to determine how to parse the data.
|
|
1129
|
-
# You may provide a second Encoding to
|
|
1130
|
-
# have the data transcoded as it is read. For example,
|
|
1131
|
-
# encoding: 'UTF-32BE:UTF-8'
|
|
1132
|
-
# would read +UTF-32BE+ data from the file
|
|
1133
|
-
# but transcode it to +UTF-8+ before parsing.
|
|
1305
|
+
# foreach(path_or_io, mode='r', **options) {|row| ... )
|
|
1306
|
+
# foreach(path_or_io, mode='r', **options) -> new_enumerator
|
|
1134
1307
|
#
|
|
1135
|
-
#
|
|
1308
|
+
# Calls the block with each row read from source +path_or_io+.
|
|
1136
1309
|
#
|
|
1137
|
-
#
|
|
1310
|
+
# \Path input without headers:
|
|
1138
1311
|
#
|
|
1139
|
-
# These examples assume prior execution of:
|
|
1140
1312
|
# string = "foo,0\nbar,1\nbaz,2\n"
|
|
1141
|
-
#
|
|
1142
|
-
# File.write(
|
|
1313
|
+
# in_path = 't.csv'
|
|
1314
|
+
# File.write(in_path, string)
|
|
1315
|
+
# CSV.foreach(in_path) {|row| p row }
|
|
1143
1316
|
#
|
|
1144
|
-
# Read rows from a file at +path+:
|
|
1145
|
-
# CSV.foreach(path) {|row| p row }
|
|
1146
1317
|
# Output:
|
|
1147
|
-
# ["foo", "0"]
|
|
1148
|
-
# ["bar", "1"]
|
|
1149
|
-
# ["baz", "2"]
|
|
1150
1318
|
#
|
|
1151
|
-
# Read rows from an \IO object:
|
|
1152
|
-
# File.open(path) do |file|
|
|
1153
|
-
# CSV.foreach(file) {|row| p row }
|
|
1154
|
-
# end
|
|
1155
|
-
#
|
|
1156
|
-
# Output:
|
|
1157
1319
|
# ["foo", "0"]
|
|
1158
1320
|
# ["bar", "1"]
|
|
1159
1321
|
# ["baz", "2"]
|
|
1160
1322
|
#
|
|
1161
|
-
#
|
|
1162
|
-
#
|
|
1163
|
-
#
|
|
1323
|
+
# \Path input with headers:
|
|
1324
|
+
#
|
|
1325
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
|
1326
|
+
# in_path = 't.csv'
|
|
1327
|
+
# File.write(in_path, string)
|
|
1328
|
+
# CSV.foreach(in_path, headers: true) {|row| p row }
|
|
1164
1329
|
#
|
|
1165
|
-
# Issues a warning if an encoding is unsupported:
|
|
1166
|
-
# CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| }
|
|
1167
1330
|
# Output:
|
|
1168
|
-
# warning: Unsupported encoding foo ignored
|
|
1169
|
-
# warning: Unsupported encoding bar ignored
|
|
1170
1331
|
#
|
|
1171
|
-
#
|
|
1332
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
|
1333
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
|
1334
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
|
1172
1335
|
#
|
|
1173
|
-
#
|
|
1174
|
-
# returns each row as a CSV::Row object.
|
|
1336
|
+
# \IO stream input without headers:
|
|
1175
1337
|
#
|
|
1176
|
-
#
|
|
1177
|
-
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
|
|
1338
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
|
1178
1339
|
# path = 't.csv'
|
|
1179
1340
|
# File.write(path, string)
|
|
1180
|
-
#
|
|
1181
|
-
#
|
|
1182
|
-
#
|
|
1341
|
+
# File.open('t.csv') do |in_io|
|
|
1342
|
+
# CSV.foreach(in_io) {|row| p row }
|
|
1343
|
+
# end
|
|
1183
1344
|
#
|
|
1184
1345
|
# Output:
|
|
1185
|
-
# #<CSV::Row "Name":"foo" "Count":"0">
|
|
1186
|
-
# #<CSV::Row "Name":"bar" "Count":"1">
|
|
1187
|
-
# #<CSV::Row "Name":"baz" "Count":"2">
|
|
1188
1346
|
#
|
|
1189
|
-
#
|
|
1190
|
-
#
|
|
1191
|
-
#
|
|
1347
|
+
# ["foo", "0"]
|
|
1348
|
+
# ["bar", "1"]
|
|
1349
|
+
# ["baz", "2"]
|
|
1350
|
+
#
|
|
1351
|
+
# \IO stream input with headers:
|
|
1352
|
+
#
|
|
1353
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
|
1354
|
+
# path = 't.csv'
|
|
1355
|
+
# File.write(path, string)
|
|
1356
|
+
# File.open('t.csv') do |in_io|
|
|
1357
|
+
# CSV.foreach(in_io, headers: true) {|row| p row }
|
|
1192
1358
|
# end
|
|
1193
1359
|
#
|
|
1194
1360
|
# Output:
|
|
1195
|
-
# #<CSV::Row "Name":"foo" "Count":"0">
|
|
1196
|
-
# #<CSV::Row "Name":"bar" "Count":"1">
|
|
1197
|
-
# #<CSV::Row "Name":"baz" "Count":"2">
|
|
1198
1361
|
#
|
|
1199
|
-
#
|
|
1200
|
-
#
|
|
1201
|
-
#
|
|
1202
|
-
# # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv):
|
|
1203
|
-
# CSV.foreach('nosuch.csv') {|row| }
|
|
1362
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
|
1363
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
|
1364
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
|
1204
1365
|
#
|
|
1205
|
-
#
|
|
1206
|
-
# io = File.open(path, 'w') {|row| }
|
|
1207
|
-
# # Raises TypeError (no implicit conversion of nil into String):
|
|
1208
|
-
# CSV.foreach(io) {|row| }
|
|
1366
|
+
# With no block given, returns an \Enumerator:
|
|
1209
1367
|
#
|
|
1210
|
-
#
|
|
1211
|
-
#
|
|
1212
|
-
#
|
|
1368
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
|
1369
|
+
# path = 't.csv'
|
|
1370
|
+
# File.write(path, string)
|
|
1371
|
+
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
|
|
1213
1372
|
#
|
|
1373
|
+
# Arguments:
|
|
1374
|
+
# * Argument +path_or_io+ must be a file path or an \IO stream.
|
|
1375
|
+
# * Argument +mode+, if given, must be a \File mode.
|
|
1376
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
|
1377
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
|
1378
|
+
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
|
1379
|
+
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
|
1380
|
+
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
|
1381
|
+
# You must provide this unless your data is in the encoding
|
|
1382
|
+
# given by <tt>Encoding::default_external</tt>.
|
|
1383
|
+
# Parsing will use this to determine how to parse the data.
|
|
1384
|
+
# You may provide a second Encoding to
|
|
1385
|
+
# have the data transcoded as it is read. For example,
|
|
1386
|
+
# encoding: 'UTF-32BE:UTF-8'
|
|
1387
|
+
# would read +UTF-32BE+ data from the file
|
|
1388
|
+
# but transcode it to +UTF-8+ before parsing.
|
|
1214
1389
|
def foreach(path, mode="r", **options, &block)
|
|
1215
1390
|
return to_enum(__method__, path, mode, **options) unless block_given?
|
|
1216
1391
|
open(path, mode, **options) do |csv|
|
|
@@ -1303,8 +1478,8 @@ class CSV
|
|
|
1303
1478
|
# Argument +ary+ must be an \Array.
|
|
1304
1479
|
#
|
|
1305
1480
|
# Special options:
|
|
1306
|
-
# * Option <tt>:row_sep</tt> defaults to <tt
|
|
1307
|
-
# (<tt>$/</tt>).:
|
|
1481
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"> on Ruby 3.0 or later
|
|
1482
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
|
1308
1483
|
# $INPUT_RECORD_SEPARATOR # => "\n"
|
|
1309
1484
|
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
|
1310
1485
|
# Encoding for the output. This method will try to guess your Encoding from
|
|
@@ -1326,7 +1501,7 @@ class CSV
|
|
|
1326
1501
|
# CSV.generate_line(:foo)
|
|
1327
1502
|
#
|
|
1328
1503
|
def generate_line(row, **options)
|
|
1329
|
-
options = {row_sep:
|
|
1504
|
+
options = {row_sep: InputRecordSeparator.value}.merge(options)
|
|
1330
1505
|
str = +""
|
|
1331
1506
|
if options[:encoding]
|
|
1332
1507
|
str.force_encoding(options[:encoding])
|
|
@@ -1348,24 +1523,62 @@ class CSV
|
|
|
1348
1523
|
(new(str, **options) << row).string
|
|
1349
1524
|
end
|
|
1350
1525
|
|
|
1526
|
+
# :call-seq:
|
|
1527
|
+
# CSV.generate_lines(rows)
|
|
1528
|
+
# CSV.generate_lines(rows, **options)
|
|
1529
|
+
#
|
|
1530
|
+
# Returns the \String created by generating \CSV from
|
|
1531
|
+
# using the specified +options+.
|
|
1532
|
+
#
|
|
1533
|
+
# Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
|
|
1534
|
+
#
|
|
1535
|
+
# Special options:
|
|
1536
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
|
|
1537
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
|
1538
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
|
1539
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
|
1540
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
|
1541
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
|
1542
|
+
# this parameter as a backup plan.
|
|
1543
|
+
#
|
|
1544
|
+
# For other +options+,
|
|
1545
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
|
1546
|
+
#
|
|
1547
|
+
# ---
|
|
1548
|
+
#
|
|
1549
|
+
# Returns the \String generated from an
|
|
1550
|
+
# CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
|
|
1551
|
+
#
|
|
1552
|
+
# ---
|
|
1553
|
+
#
|
|
1554
|
+
# Raises an exception
|
|
1555
|
+
# # Raises NoMethodError (undefined method `each' for :foo:Symbol)
|
|
1556
|
+
# CSV.generate_lines(:foo)
|
|
1557
|
+
#
|
|
1558
|
+
def generate_lines(rows, **options)
|
|
1559
|
+
self.generate(**options) do |csv|
|
|
1560
|
+
rows.each do |row|
|
|
1561
|
+
csv << row
|
|
1562
|
+
end
|
|
1563
|
+
end
|
|
1564
|
+
end
|
|
1565
|
+
|
|
1351
1566
|
#
|
|
1352
1567
|
# :call-seq:
|
|
1353
|
-
# open(
|
|
1354
|
-
# open(
|
|
1355
|
-
# open(file_path, mode = "rb", **options ) { |csv| ... } -> object
|
|
1356
|
-
# open(io, mode = "rb", **options ) { |csv| ... } -> object
|
|
1568
|
+
# open(path_or_io, mode = "rb", **options ) -> new_csv
|
|
1569
|
+
# open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object
|
|
1357
1570
|
#
|
|
1358
1571
|
# possible options elements:
|
|
1359
|
-
#
|
|
1572
|
+
# keyword form:
|
|
1360
1573
|
# :invalid => nil # raise error on invalid byte sequence (default)
|
|
1361
1574
|
# :invalid => :replace # replace invalid byte sequence
|
|
1362
1575
|
# :undef => :replace # replace undefined conversion
|
|
1363
1576
|
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
|
|
1364
1577
|
#
|
|
1365
|
-
# * Argument +
|
|
1578
|
+
# * Argument +path_or_io+, must be a file path or an \IO stream.
|
|
1366
1579
|
# :include: ../doc/csv/arguments/io.rdoc
|
|
1367
|
-
# * Argument +mode+, if given, must be a \File mode
|
|
1368
|
-
# See {
|
|
1580
|
+
# * Argument +mode+, if given, must be a \File mode.
|
|
1581
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
|
1369
1582
|
# * Arguments <tt>**options</tt> must be keyword options.
|
|
1370
1583
|
# See {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
|
1371
1584
|
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
|
@@ -1386,6 +1599,9 @@ class CSV
|
|
|
1386
1599
|
# path = 't.csv'
|
|
1387
1600
|
# File.write(path, string)
|
|
1388
1601
|
#
|
|
1602
|
+
# string_io = StringIO.new
|
|
1603
|
+
# string_io << "foo,0\nbar,1\nbaz,2\n"
|
|
1604
|
+
#
|
|
1389
1605
|
# ---
|
|
1390
1606
|
#
|
|
1391
1607
|
# With no block given, returns a new \CSV object.
|
|
@@ -1398,6 +1614,9 @@ class CSV
|
|
|
1398
1614
|
# csv = CSV.open(File.open(path))
|
|
1399
1615
|
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
|
1400
1616
|
#
|
|
1617
|
+
# Create a \CSV object using a \StringIO:
|
|
1618
|
+
# csv = CSV.open(string_io)
|
|
1619
|
+
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
|
1401
1620
|
# ---
|
|
1402
1621
|
#
|
|
1403
1622
|
# With a block given, calls the block with the created \CSV object;
|
|
@@ -1415,27 +1634,46 @@ class CSV
|
|
|
1415
1634
|
# Output:
|
|
1416
1635
|
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
|
1417
1636
|
#
|
|
1637
|
+
# Using a \StringIO:
|
|
1638
|
+
# csv = CSV.open(string_io) {|csv| p csv}
|
|
1639
|
+
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
|
1640
|
+
# Output:
|
|
1641
|
+
# #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
|
1418
1642
|
# ---
|
|
1419
1643
|
#
|
|
1420
1644
|
# Raises an exception if the argument is not a \String object or \IO object:
|
|
1421
1645
|
# # Raises TypeError (no implicit conversion of Symbol into String)
|
|
1422
1646
|
# CSV.open(:foo)
|
|
1423
|
-
def open(
|
|
1647
|
+
def open(filename_or_io, mode="r", **options)
|
|
1424
1648
|
# wrap a File opened with the remaining +args+ with no newline
|
|
1425
1649
|
# decorator
|
|
1426
|
-
file_opts = {
|
|
1650
|
+
file_opts = {}
|
|
1651
|
+
may_enable_bom_detection_automatically(filename_or_io,
|
|
1652
|
+
mode,
|
|
1653
|
+
options,
|
|
1654
|
+
file_opts)
|
|
1655
|
+
file_opts.merge!(options)
|
|
1656
|
+
unless file_opts.key?(:newline)
|
|
1657
|
+
file_opts[:universal_newline] ||= false
|
|
1658
|
+
end
|
|
1427
1659
|
options.delete(:invalid)
|
|
1428
1660
|
options.delete(:undef)
|
|
1429
1661
|
options.delete(:replace)
|
|
1662
|
+
options.delete_if {|k, _| /newline\z/.match?(k)}
|
|
1430
1663
|
|
|
1431
|
-
|
|
1432
|
-
f =
|
|
1433
|
-
|
|
1434
|
-
|
|
1435
|
-
|
|
1436
|
-
|
|
1437
|
-
|
|
1664
|
+
if filename_or_io.is_a?(StringIO)
|
|
1665
|
+
f = create_stringio(filename_or_io.string, mode, **file_opts)
|
|
1666
|
+
else
|
|
1667
|
+
begin
|
|
1668
|
+
f = File.open(filename_or_io, mode, **file_opts)
|
|
1669
|
+
rescue ArgumentError => e
|
|
1670
|
+
raise unless /needs binmode/.match?(e.message) and mode == "r"
|
|
1671
|
+
mode = "rb"
|
|
1672
|
+
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
|
|
1673
|
+
retry
|
|
1674
|
+
end
|
|
1438
1675
|
end
|
|
1676
|
+
|
|
1439
1677
|
begin
|
|
1440
1678
|
csv = new(f, **options)
|
|
1441
1679
|
rescue Exception
|
|
@@ -1567,6 +1805,23 @@ class CSV
|
|
|
1567
1805
|
# Raises an exception if the argument is not a \String object or \IO object:
|
|
1568
1806
|
# # Raises NoMethodError (undefined method `close' for :foo:Symbol)
|
|
1569
1807
|
# CSV.parse(:foo)
|
|
1808
|
+
#
|
|
1809
|
+
# ---
|
|
1810
|
+
#
|
|
1811
|
+
# Please make sure if your text contains \BOM or not. CSV.parse will not remove
|
|
1812
|
+
# \BOM automatically. You might want to remove \BOM before calling CSV.parse :
|
|
1813
|
+
# # remove BOM on calling File.open
|
|
1814
|
+
# File.open(path, encoding: 'bom|utf-8') do |file|
|
|
1815
|
+
# CSV.parse(file, headers: true) do |row|
|
|
1816
|
+
# # you can get value by column name because BOM is removed
|
|
1817
|
+
# p row['Name']
|
|
1818
|
+
# end
|
|
1819
|
+
# end
|
|
1820
|
+
#
|
|
1821
|
+
# Output:
|
|
1822
|
+
# # "foo"
|
|
1823
|
+
# # "bar"
|
|
1824
|
+
# # "baz"
|
|
1570
1825
|
def parse(str, **options, &block)
|
|
1571
1826
|
csv = new(str, **options)
|
|
1572
1827
|
|
|
@@ -1681,7 +1936,7 @@ class CSV
|
|
|
1681
1936
|
#
|
|
1682
1937
|
# Calls CSV.read with +source+, +options+, and certain default options:
|
|
1683
1938
|
# - +headers+: +true+
|
|
1684
|
-
# - +
|
|
1939
|
+
# - +converters+: +:numeric+
|
|
1685
1940
|
# - +header_converters+: +:symbol+
|
|
1686
1941
|
#
|
|
1687
1942
|
# Returns a CSV::Table object.
|
|
@@ -1700,6 +1955,42 @@ class CSV
|
|
|
1700
1955
|
options = default_options.merge(options)
|
|
1701
1956
|
read(path, **options)
|
|
1702
1957
|
end
|
|
1958
|
+
|
|
1959
|
+
ON_WINDOWS = /mingw|mswin/.match?(RUBY_PLATFORM)
|
|
1960
|
+
private_constant :ON_WINDOWS
|
|
1961
|
+
|
|
1962
|
+
private
|
|
1963
|
+
def may_enable_bom_detection_automatically(filename_or_io,
|
|
1964
|
+
mode,
|
|
1965
|
+
options,
|
|
1966
|
+
file_opts)
|
|
1967
|
+
if filename_or_io.is_a?(StringIO)
|
|
1968
|
+
# Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support:
|
|
1969
|
+
# https://github.com/ruby/stringio/pull/47
|
|
1970
|
+
return if RUBY_VERSION < "2.7"
|
|
1971
|
+
else
|
|
1972
|
+
# "bom|utf-8" may be buggy on Windows:
|
|
1973
|
+
# https://bugs.ruby-lang.org/issues/20526
|
|
1974
|
+
return if ON_WINDOWS
|
|
1975
|
+
end
|
|
1976
|
+
return unless Encoding.default_external == Encoding::UTF_8
|
|
1977
|
+
return if options.key?(:encoding)
|
|
1978
|
+
return if options.key?(:external_encoding)
|
|
1979
|
+
return if mode.include?(":")
|
|
1980
|
+
file_opts[:encoding] = "bom|utf-8"
|
|
1981
|
+
end
|
|
1982
|
+
|
|
1983
|
+
if RUBY_VERSION < "2.7"
|
|
1984
|
+
def create_stringio(str, mode, opts)
|
|
1985
|
+
opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)}
|
|
1986
|
+
raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty?
|
|
1987
|
+
StringIO.new(str, mode)
|
|
1988
|
+
end
|
|
1989
|
+
else
|
|
1990
|
+
def create_stringio(str, mode, opts)
|
|
1991
|
+
StringIO.new(str, mode, **opts)
|
|
1992
|
+
end
|
|
1993
|
+
end
|
|
1703
1994
|
end
|
|
1704
1995
|
|
|
1705
1996
|
# :call-seq:
|
|
@@ -1745,6 +2036,7 @@ class CSV
|
|
|
1745
2036
|
row_sep: :auto,
|
|
1746
2037
|
quote_char: '"',
|
|
1747
2038
|
field_size_limit: nil,
|
|
2039
|
+
max_field_size: nil,
|
|
1748
2040
|
converters: nil,
|
|
1749
2041
|
unconverted_fields: nil,
|
|
1750
2042
|
headers: false,
|
|
@@ -1760,16 +2052,27 @@ class CSV
|
|
|
1760
2052
|
encoding: nil,
|
|
1761
2053
|
nil_value: nil,
|
|
1762
2054
|
empty_value: "",
|
|
2055
|
+
strip: false,
|
|
1763
2056
|
quote_empty: true,
|
|
1764
2057
|
write_converters: nil,
|
|
1765
2058
|
write_nil_value: nil,
|
|
1766
|
-
write_empty_value: ""
|
|
1767
|
-
strip: false)
|
|
2059
|
+
write_empty_value: "")
|
|
1768
2060
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
|
1769
2061
|
|
|
1770
2062
|
if data.is_a?(String)
|
|
2063
|
+
if encoding
|
|
2064
|
+
if encoding.is_a?(String)
|
|
2065
|
+
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
|
2066
|
+
if data_internal_encoding
|
|
2067
|
+
data = data.encode(data_internal_encoding, data_external_encoding)
|
|
2068
|
+
else
|
|
2069
|
+
data = data.dup.force_encoding(data_external_encoding)
|
|
2070
|
+
end
|
|
2071
|
+
else
|
|
2072
|
+
data = data.dup.force_encoding(encoding)
|
|
2073
|
+
end
|
|
2074
|
+
end
|
|
1771
2075
|
@io = StringIO.new(data)
|
|
1772
|
-
@io.set_encoding(encoding || data.encoding)
|
|
1773
2076
|
else
|
|
1774
2077
|
@io = data
|
|
1775
2078
|
end
|
|
@@ -1787,11 +2090,14 @@ class CSV
|
|
|
1787
2090
|
@initial_header_converters = header_converters
|
|
1788
2091
|
@initial_write_converters = write_converters
|
|
1789
2092
|
|
|
2093
|
+
if max_field_size.nil? and field_size_limit
|
|
2094
|
+
max_field_size = field_size_limit - 1
|
|
2095
|
+
end
|
|
1790
2096
|
@parser_options = {
|
|
1791
2097
|
column_separator: col_sep,
|
|
1792
2098
|
row_separator: row_sep,
|
|
1793
2099
|
quote_character: quote_char,
|
|
1794
|
-
|
|
2100
|
+
max_field_size: max_field_size,
|
|
1795
2101
|
unconverted_fields: unconverted_fields,
|
|
1796
2102
|
headers: headers,
|
|
1797
2103
|
return_headers: return_headers,
|
|
@@ -1823,6 +2129,12 @@ class CSV
|
|
|
1823
2129
|
writer if @writer_options[:write_headers]
|
|
1824
2130
|
end
|
|
1825
2131
|
|
|
2132
|
+
class TSV < CSV
|
|
2133
|
+
def initialize(data, **options)
|
|
2134
|
+
super(data, **({col_sep: "\t"}.merge(options)))
|
|
2135
|
+
end
|
|
2136
|
+
end
|
|
2137
|
+
|
|
1826
2138
|
# :call-seq:
|
|
1827
2139
|
# csv.col_sep -> string
|
|
1828
2140
|
#
|
|
@@ -1859,10 +2171,24 @@ class CSV
|
|
|
1859
2171
|
# Returns the limit for field size; used for parsing;
|
|
1860
2172
|
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
|
|
1861
2173
|
# CSV.new('').field_size_limit # => nil
|
|
2174
|
+
#
|
|
2175
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
|
1862
2176
|
def field_size_limit
|
|
1863
2177
|
parser.field_size_limit
|
|
1864
2178
|
end
|
|
1865
2179
|
|
|
2180
|
+
# :call-seq:
|
|
2181
|
+
# csv.max_field_size -> integer or nil
|
|
2182
|
+
#
|
|
2183
|
+
# Returns the limit for field size; used for parsing;
|
|
2184
|
+
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
|
|
2185
|
+
# CSV.new('').max_field_size # => nil
|
|
2186
|
+
#
|
|
2187
|
+
# Since 3.2.3.
|
|
2188
|
+
def max_field_size
|
|
2189
|
+
parser.max_field_size
|
|
2190
|
+
end
|
|
2191
|
+
|
|
1866
2192
|
# :call-seq:
|
|
1867
2193
|
# csv.skip_lines -> regexp or nil
|
|
1868
2194
|
#
|
|
@@ -1884,6 +2210,10 @@ class CSV
|
|
|
1884
2210
|
# csv.converters # => [:integer]
|
|
1885
2211
|
# csv.convert(proc {|x| x.to_s })
|
|
1886
2212
|
# csv.converters
|
|
2213
|
+
#
|
|
2214
|
+
# Notes that you need to call
|
|
2215
|
+
# +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use
|
|
2216
|
+
# this method.
|
|
1887
2217
|
def converters
|
|
1888
2218
|
parser_fields_converter.map do |converter|
|
|
1889
2219
|
name = Converters.rassoc(converter)
|
|
@@ -1946,6 +2276,10 @@ class CSV
|
|
|
1946
2276
|
# Returns an \Array containing header converters; used for parsing;
|
|
1947
2277
|
# see {Header Converters}[#class-CSV-label-Header+Converters]:
|
|
1948
2278
|
# CSV.new('').header_converters # => []
|
|
2279
|
+
#
|
|
2280
|
+
# Notes that you need to call
|
|
2281
|
+
# +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor
|
|
2282
|
+
# to use this method.
|
|
1949
2283
|
def header_converters
|
|
1950
2284
|
header_fields_converter.map do |converter|
|
|
1951
2285
|
name = HeaderConverters.rassoc(converter)
|
|
@@ -1985,7 +2319,7 @@ class CSV
|
|
|
1985
2319
|
end
|
|
1986
2320
|
|
|
1987
2321
|
# :call-seq:
|
|
1988
|
-
# csv.encoding ->
|
|
2322
|
+
# csv.encoding -> encoding
|
|
1989
2323
|
#
|
|
1990
2324
|
# Returns the encoding used for parsing and generating;
|
|
1991
2325
|
# see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
|
|
@@ -2353,7 +2687,13 @@ class CSV
|
|
|
2353
2687
|
# p row
|
|
2354
2688
|
# end
|
|
2355
2689
|
def each(&block)
|
|
2356
|
-
|
|
2690
|
+
return to_enum(__method__) unless block_given?
|
|
2691
|
+
begin
|
|
2692
|
+
while true
|
|
2693
|
+
yield(parser_enumerator.next)
|
|
2694
|
+
end
|
|
2695
|
+
rescue StopIteration
|
|
2696
|
+
end
|
|
2357
2697
|
end
|
|
2358
2698
|
|
|
2359
2699
|
# :call-seq:
|
|
@@ -2586,7 +2926,7 @@ class CSV
|
|
|
2586
2926
|
|
|
2587
2927
|
def build_parser_fields_converter
|
|
2588
2928
|
specific_options = {
|
|
2589
|
-
|
|
2929
|
+
builtin_converters_name: :Converters,
|
|
2590
2930
|
}
|
|
2591
2931
|
options = @base_fields_converter_options.merge(specific_options)
|
|
2592
2932
|
build_fields_converter(@initial_converters, options)
|
|
@@ -2598,7 +2938,7 @@ class CSV
|
|
|
2598
2938
|
|
|
2599
2939
|
def build_header_fields_converter
|
|
2600
2940
|
specific_options = {
|
|
2601
|
-
|
|
2941
|
+
builtin_converters_name: :HeaderConverters,
|
|
2602
2942
|
accept_nil: true,
|
|
2603
2943
|
}
|
|
2604
2944
|
options = @base_fields_converter_options.merge(specific_options)
|
|
@@ -2661,8 +3001,15 @@ end
|
|
|
2661
3001
|
# c.read.any? { |a| a.include?("zombies") }
|
|
2662
3002
|
# } #=> false
|
|
2663
3003
|
#
|
|
2664
|
-
|
|
2665
|
-
|
|
3004
|
+
# CSV options may also be given.
|
|
3005
|
+
#
|
|
3006
|
+
# io = StringIO.new
|
|
3007
|
+
# CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] }
|
|
3008
|
+
#
|
|
3009
|
+
# This API is not Ractor-safe.
|
|
3010
|
+
#
|
|
3011
|
+
def CSV(*args, **options, &block)
|
|
3012
|
+
CSV.instance(*args, **options, &block)
|
|
2666
3013
|
end
|
|
2667
3014
|
|
|
2668
3015
|
require_relative "csv/version"
|