csv 3.2.3 → 3.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +148 -0
- data/doc/csv/options/parsing/liberal_parsing.rdoc +21 -2
- data/doc/csv/recipes/parsing.rdoc +1 -1
- data/lib/csv/fields_converter.rb +3 -2
- data/lib/csv/parser.rb +50 -41
- data/lib/csv/row.rb +1 -1
- data/lib/csv/table.rb +1 -2
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +5 -5
- data/lib/csv.rb +80 -25
- metadata +7 -9
- data/lib/csv/delete_suffix.rb +0 -18
- data/lib/csv/match_p.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c64817c16c8991fc2596875101449b5452326fe91bd05e4bb6a66213113525d6
|
4
|
+
data.tar.gz: 19d6d80d6959f6cde0ac651774ea795dbd0f949135cae021fef3983d94248f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 556f6582468d4a3c2994c12c25dba73b8db65e1a10f7306b9b5bc1fa345f47bf7872db1c603ddcd1a0eb359e7857c51a9874be2231dc821730ae62d15604c3b7
|
7
|
+
data.tar.gz: 348a25f4c1bb8e4fe0d71dc944e0a26165627803cb2528fc067642827fd3c253bda48aba179d3575950a7244bd4e8edf2eed9a99101952a07256a3f4f9d1e7fe
|
data/NEWS.md
CHANGED
@@ -1,5 +1,153 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.2.8 - 2023-11-08
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added `CSV::InvalidEncodingError`.
|
8
|
+
|
9
|
+
Patch by Kosuke Shibata.
|
10
|
+
|
11
|
+
GH-287
|
12
|
+
|
13
|
+
### Thanks
|
14
|
+
|
15
|
+
* Kosuke Shibata
|
16
|
+
|
17
|
+
## 3.2.7 - 2023-06-26
|
18
|
+
|
19
|
+
### Improvements
|
20
|
+
|
21
|
+
* Removed an unused internal variable.
|
22
|
+
[GH-273](https://github.com/ruby/csv/issues/273)
|
23
|
+
[Patch by Mau Magnaguagno]
|
24
|
+
|
25
|
+
* Changed to use `https://` instead of `http://` in documents.
|
26
|
+
[GH-274](https://github.com/ruby/csv/issues/274)
|
27
|
+
[Patch by Vivek Bharath Akupatni]
|
28
|
+
|
29
|
+
* Added prefix to a helper module in test.
|
30
|
+
[GH-278](https://github.com/ruby/csv/issues/278)
|
31
|
+
[Patch by Luke Gruber]
|
32
|
+
|
33
|
+
* Added a documentation for `liberal_parsing: {backslash_quotes: true}`.
|
34
|
+
[GH-280](https://github.com/ruby/csv/issues/280)
|
35
|
+
[Patch by Mark Schneider]
|
36
|
+
|
37
|
+
### Fixes
|
38
|
+
|
39
|
+
* Fixed a wrong execution result in documents.
|
40
|
+
[GH-276](https://github.com/ruby/csv/issues/276)
|
41
|
+
[Patch by Yuki Tsujimoto]
|
42
|
+
|
43
|
+
* Fixed a bug that the same line is used multiple times.
|
44
|
+
[GH-279](https://github.com/ruby/csv/issues/279)
|
45
|
+
[Reported by Gabriel Nagy]
|
46
|
+
|
47
|
+
### Thanks
|
48
|
+
|
49
|
+
* Mau Magnaguagno
|
50
|
+
|
51
|
+
* Vivek Bharath Akupatni
|
52
|
+
|
53
|
+
* Yuki Tsujimoto
|
54
|
+
|
55
|
+
* Luke Gruber
|
56
|
+
|
57
|
+
* Mark Schneider
|
58
|
+
|
59
|
+
* Gabriel Nagy
|
60
|
+
|
61
|
+
## 3.2.6 - 2022-12-08
|
62
|
+
|
63
|
+
### Improvements
|
64
|
+
|
65
|
+
* `CSV#read` consumes the same lines with other methods like
|
66
|
+
`CSV#shift`.
|
67
|
+
[[GitHub#258](https://github.com/ruby/csv/issues/258)]
|
68
|
+
[Reported by Lhoussaine Ghallou]
|
69
|
+
|
70
|
+
* All `Enumerable` based methods consume the same lines with other
|
71
|
+
methods. This may have a performance penalty.
|
72
|
+
[[GitHub#260](https://github.com/ruby/csv/issues/260)]
|
73
|
+
[Reported by Lhoussaine Ghallou]
|
74
|
+
|
75
|
+
* Simplify some implementations.
|
76
|
+
[[GitHub#262](https://github.com/ruby/csv/pull/262)]
|
77
|
+
[[GitHub#263](https://github.com/ruby/csv/pull/263)]
|
78
|
+
[Patch by Mau Magnaguagno]
|
79
|
+
|
80
|
+
### Fixes
|
81
|
+
|
82
|
+
* Fixed `CSV.generate_lines` document.
|
83
|
+
[[GitHub#257](https://github.com/ruby/csv/pull/257)]
|
84
|
+
[Patch by Sampat Badhe]
|
85
|
+
|
86
|
+
### Thanks
|
87
|
+
|
88
|
+
* Sampat Badhe
|
89
|
+
|
90
|
+
* Lhoussaine Ghallou
|
91
|
+
|
92
|
+
* Mau Magnaguagno
|
93
|
+
|
94
|
+
## 3.2.5 - 2022-08-26
|
95
|
+
|
96
|
+
### Improvements
|
97
|
+
|
98
|
+
* Added `CSV.generate_lines`.
|
99
|
+
[[GitHub#255](https://github.com/ruby/csv/issues/255)]
|
100
|
+
[Reported by OKURA Masafumi]
|
101
|
+
[[GitHub#256](https://github.com/ruby/csv/pull/256)]
|
102
|
+
[Patch by Eriko Sugiyama]
|
103
|
+
|
104
|
+
### Thanks
|
105
|
+
|
106
|
+
* OKURA Masafumi
|
107
|
+
|
108
|
+
* Eriko Sugiyama
|
109
|
+
|
110
|
+
## 3.2.4 - 2022-08-22
|
111
|
+
|
112
|
+
### Improvements
|
113
|
+
|
114
|
+
* Cleaned up internal implementations.
|
115
|
+
[[GitHub#249](https://github.com/ruby/csv/pull/249)]
|
116
|
+
[[GitHub#250](https://github.com/ruby/csv/pull/250)]
|
117
|
+
[[GitHub#251](https://github.com/ruby/csv/pull/251)]
|
118
|
+
[Patch by Mau Magnaguagno]
|
119
|
+
|
120
|
+
* Added support for RFC 3339 style time.
|
121
|
+
[[GitHub#248](https://github.com/ruby/csv/pull/248)]
|
122
|
+
[Patch by Thierry Lambert]
|
123
|
+
|
124
|
+
* Added support for transcoding String CSV. Syntax is
|
125
|
+
`from-encoding:to-encoding`.
|
126
|
+
[[GitHub#254](https://github.com/ruby/csv/issues/254)]
|
127
|
+
[Reported by Richard Stueven]
|
128
|
+
|
129
|
+
* Added quoted information to `CSV::FieldInfo`.
|
130
|
+
[[GitHub#254](https://github.com/ruby/csv/pull/253)]
|
131
|
+
[Reported by Hirokazu SUZUKI]
|
132
|
+
|
133
|
+
### Fixes
|
134
|
+
|
135
|
+
* Fixed a link in documents.
|
136
|
+
[[GitHub#244](https://github.com/ruby/csv/pull/244)]
|
137
|
+
[Patch by Peter Zhu]
|
138
|
+
|
139
|
+
### Thanks
|
140
|
+
|
141
|
+
* Peter Zhu
|
142
|
+
|
143
|
+
* Mau Magnaguagno
|
144
|
+
|
145
|
+
* Thierry Lambert
|
146
|
+
|
147
|
+
* Richard Stueven
|
148
|
+
|
149
|
+
* Hirokazu SUZUKI
|
150
|
+
|
3
151
|
## 3.2.3 - 2022-04-09
|
4
152
|
|
5
153
|
### Improvements
|
@@ -1,13 +1,13 @@
|
|
1
1
|
====== Option +liberal_parsing+
|
2
2
|
|
3
|
-
Specifies the boolean value that determines whether
|
3
|
+
Specifies the boolean or hash value that determines whether
|
4
4
|
CSV will attempt to parse input not conformant with RFC 4180,
|
5
5
|
such as double quotes in unquoted fields.
|
6
6
|
|
7
7
|
Default value:
|
8
8
|
CSV::DEFAULT_OPTIONS.fetch(:liberal_parsing) # => false
|
9
9
|
|
10
|
-
For
|
10
|
+
For the next two examples:
|
11
11
|
str = 'is,this "three, or four",fields'
|
12
12
|
|
13
13
|
Without +liberal_parsing+:
|
@@ -17,3 +17,22 @@ Without +liberal_parsing+:
|
|
17
17
|
With +liberal_parsing+:
|
18
18
|
ary = CSV.parse_line(str, liberal_parsing: true)
|
19
19
|
ary # => ["is", "this \"three", " or four\"", "fields"]
|
20
|
+
|
21
|
+
Use the +backslash_quote+ sub-option to parse values that use
|
22
|
+
a backslash to escape a double-quote character. This
|
23
|
+
causes the parser to treat <code>\"</code> as if it were
|
24
|
+
<code>""</code>.
|
25
|
+
|
26
|
+
For the next two examples:
|
27
|
+
str = 'Show,"Harry \"Handcuff\" Houdini, the one and only","Tampa Theater"'
|
28
|
+
|
29
|
+
With +liberal_parsing+, but without the +backslash_quote+ sub-option:
|
30
|
+
# Incorrect interpretation of backslash; incorrectly interprets the quoted comma as a field separator.
|
31
|
+
ary = CSV.parse_line(str, liberal_parsing: true)
|
32
|
+
ary # => ["Show", "\"Harry \\\"Handcuff\\\" Houdini", " the one and only\"", "Tampa Theater"]
|
33
|
+
puts ary[1] # => "Harry \"Handcuff\" Houdini
|
34
|
+
|
35
|
+
With +liberal_parsing+ and its +backslash_quote+ sub-option:
|
36
|
+
ary = CSV.parse_line(str, liberal_parsing: { backslash_quote: true })
|
37
|
+
ary # => ["Show", "Harry \"Handcuff\" Houdini, the one and only", "Tampa Theater"]
|
38
|
+
puts ary[1] # => Harry "Handcuff" Houdini, the one and only
|
@@ -520,7 +520,7 @@ Apply multiple header converters by defining and registering a custom header con
|
|
520
520
|
To capture unconverted field values, use option +:unconverted_fields+:
|
521
521
|
source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
522
522
|
parsed = CSV.parse(source, converters: :integer, unconverted_fields: true)
|
523
|
-
parsed # => [["
|
523
|
+
parsed # => [["Name", "Value"], ["foo", 0], ["bar", 1], ["baz", 2]]
|
524
524
|
parsed.each {|row| p row.unconverted_fields }
|
525
525
|
Output:
|
526
526
|
["Name", "Value"]
|
data/lib/csv/fields_converter.rb
CHANGED
@@ -44,7 +44,7 @@ class CSV
|
|
44
44
|
@converters.empty?
|
45
45
|
end
|
46
46
|
|
47
|
-
def convert(fields, headers, lineno)
|
47
|
+
def convert(fields, headers, lineno, quoted_fields)
|
48
48
|
return fields unless need_convert?
|
49
49
|
|
50
50
|
fields.collect.with_index do |field, index|
|
@@ -63,7 +63,8 @@ class CSV
|
|
63
63
|
else
|
64
64
|
header = nil
|
65
65
|
end
|
66
|
-
|
66
|
+
quoted = quoted_fields[index]
|
67
|
+
field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
|
67
68
|
end
|
68
69
|
break unless field.is_a?(String) # short-circuit pipeline for speed
|
69
70
|
end
|
data/lib/csv/parser.rb
CHANGED
@@ -2,15 +2,10 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
|
-
require_relative "delete_suffix"
|
6
5
|
require_relative "input_record_separator"
|
7
|
-
require_relative "match_p"
|
8
6
|
require_relative "row"
|
9
7
|
require_relative "table"
|
10
8
|
|
11
|
-
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
12
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
13
|
-
|
14
9
|
class CSV
|
15
10
|
# Note: Don't use this class directly. This is an internal class.
|
16
11
|
class Parser
|
@@ -106,7 +101,7 @@ class CSV
|
|
106
101
|
position = @scanner.pos
|
107
102
|
offset = 0
|
108
103
|
n_row_separator_chars = row_separator.size
|
109
|
-
# trace(__method__, :start,
|
104
|
+
# trace(__method__, :start, input)
|
110
105
|
while true
|
111
106
|
input.each_line(row_separator) do |line|
|
112
107
|
@scanner.pos += line.bytesize
|
@@ -162,6 +157,7 @@ class CSV
|
|
162
157
|
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
163
158
|
return value if @last_scanner
|
164
159
|
|
160
|
+
# trace(__method__, pattern, :done, :nil) if value.nil?
|
165
161
|
return nil if value.nil?
|
166
162
|
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
|
167
163
|
# trace(__method__, pattern, :sub, sub_value)
|
@@ -205,7 +201,8 @@ class CSV
|
|
205
201
|
# trace(__method__, :rescan, start, buffer)
|
206
202
|
string = @scanner.string
|
207
203
|
if scanner == @scanner
|
208
|
-
keep = string.byteslice(start,
|
204
|
+
keep = string.byteslice(start,
|
205
|
+
string.bytesize - @scanner.pos - start)
|
209
206
|
else
|
210
207
|
keep = string
|
211
208
|
end
|
@@ -417,8 +414,7 @@ class CSV
|
|
417
414
|
else
|
418
415
|
lineno = @lineno + 1
|
419
416
|
end
|
420
|
-
|
421
|
-
raise MalformedCSVError.new(message, lineno)
|
417
|
+
raise InvalidEncodingError.new(@encoding, lineno)
|
422
418
|
rescue UnexpectedError => error
|
423
419
|
if @scanner
|
424
420
|
ignore_broken_line
|
@@ -490,7 +486,6 @@ class CSV
|
|
490
486
|
message = ":quote_char has to be nil or a single character String"
|
491
487
|
raise ArgumentError, message
|
492
488
|
end
|
493
|
-
@double_quote_character = @quote_character * 2
|
494
489
|
@escaped_quote_character = Regexp.escape(@quote_character)
|
495
490
|
@escaped_quote = Regexp.new(@escaped_quote_character)
|
496
491
|
end
|
@@ -763,9 +758,10 @@ class CSV
|
|
763
758
|
case headers
|
764
759
|
when Array
|
765
760
|
@raw_headers = headers
|
761
|
+
quoted_fields = [false] * @raw_headers.size
|
766
762
|
@use_headers = true
|
767
763
|
when String
|
768
|
-
@raw_headers = parse_headers(headers)
|
764
|
+
@raw_headers, quoted_fields = parse_headers(headers)
|
769
765
|
@use_headers = true
|
770
766
|
when nil, false
|
771
767
|
@raw_headers = nil
|
@@ -775,21 +771,28 @@ class CSV
|
|
775
771
|
@use_headers = true
|
776
772
|
end
|
777
773
|
if @raw_headers
|
778
|
-
@headers = adjust_headers(@raw_headers)
|
774
|
+
@headers = adjust_headers(@raw_headers, quoted_fields)
|
779
775
|
else
|
780
776
|
@headers = nil
|
781
777
|
end
|
782
778
|
end
|
783
779
|
|
784
780
|
def parse_headers(row)
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
781
|
+
quoted_fields = []
|
782
|
+
converter = lambda do |field, info|
|
783
|
+
quoted_fields << info.quoted?
|
784
|
+
field
|
785
|
+
end
|
786
|
+
headers = CSV.parse_line(row,
|
787
|
+
col_sep: @column_separator,
|
788
|
+
row_sep: @row_separator,
|
789
|
+
quote_char: @quote_character,
|
790
|
+
converters: [converter])
|
791
|
+
[headers, quoted_fields]
|
789
792
|
end
|
790
793
|
|
791
|
-
def adjust_headers(headers)
|
792
|
-
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
|
794
|
+
def adjust_headers(headers, quoted_fields)
|
795
|
+
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
|
793
796
|
adjusted_headers.each {|h| h.freeze if h.is_a? String}
|
794
797
|
adjusted_headers
|
795
798
|
end
|
@@ -872,8 +875,7 @@ class CSV
|
|
872
875
|
!line.valid_encoding?
|
873
876
|
end
|
874
877
|
if index
|
875
|
-
|
876
|
-
raise MalformedCSVError.new(message, @lineno + index + 1)
|
878
|
+
raise InvalidEncodingError.new(@encoding, @lineno + index + 1)
|
877
879
|
end
|
878
880
|
end
|
879
881
|
Scanner.new(string)
|
@@ -933,9 +935,11 @@ class CSV
|
|
933
935
|
if line.empty?
|
934
936
|
next if @skip_blanks
|
935
937
|
row = []
|
938
|
+
quoted_fields = []
|
936
939
|
else
|
937
940
|
line = strip_value(line)
|
938
941
|
row = line.split(@split_column_separator, -1)
|
942
|
+
quoted_fields = [false] * row.size
|
939
943
|
if @max_field_size
|
940
944
|
row.each do |column|
|
941
945
|
validate_field_size(column)
|
@@ -949,7 +953,7 @@ class CSV
|
|
949
953
|
end
|
950
954
|
end
|
951
955
|
@last_line = original_line
|
952
|
-
emit_row(row, &block)
|
956
|
+
emit_row(row, quoted_fields, &block)
|
953
957
|
end
|
954
958
|
end
|
955
959
|
|
@@ -971,25 +975,30 @@ class CSV
|
|
971
975
|
next
|
972
976
|
end
|
973
977
|
row = []
|
978
|
+
quoted_fields = []
|
974
979
|
elsif line.include?(@cr) or line.include?(@lf)
|
975
980
|
@scanner.keep_back
|
976
981
|
@need_robust_parsing = true
|
977
982
|
return parse_quotable_robust(&block)
|
978
983
|
else
|
979
984
|
row = line.split(@split_column_separator, -1)
|
985
|
+
quoted_fields = []
|
980
986
|
n_columns = row.size
|
981
987
|
i = 0
|
982
988
|
while i < n_columns
|
983
989
|
column = row[i]
|
984
990
|
if column.empty?
|
991
|
+
quoted_fields << false
|
985
992
|
row[i] = nil
|
986
993
|
else
|
987
994
|
n_quotes = column.count(@quote_character)
|
988
995
|
if n_quotes.zero?
|
996
|
+
quoted_fields << false
|
989
997
|
# no quote
|
990
998
|
elsif n_quotes == 2 and
|
991
999
|
column.start_with?(@quote_character) and
|
992
1000
|
column.end_with?(@quote_character)
|
1001
|
+
quoted_fields << true
|
993
1002
|
row[i] = column[1..-2]
|
994
1003
|
else
|
995
1004
|
@scanner.keep_back
|
@@ -1004,13 +1013,14 @@ class CSV
|
|
1004
1013
|
@scanner.keep_drop
|
1005
1014
|
@scanner.keep_start
|
1006
1015
|
@last_line = original_line
|
1007
|
-
emit_row(row, &block)
|
1016
|
+
emit_row(row, quoted_fields, &block)
|
1008
1017
|
end
|
1009
1018
|
@scanner.keep_drop
|
1010
1019
|
end
|
1011
1020
|
|
1012
1021
|
def parse_quotable_robust(&block)
|
1013
1022
|
row = []
|
1023
|
+
quoted_fields = []
|
1014
1024
|
skip_needless_lines
|
1015
1025
|
start_row
|
1016
1026
|
while true
|
@@ -1024,20 +1034,24 @@ class CSV
|
|
1024
1034
|
end
|
1025
1035
|
if parse_column_end
|
1026
1036
|
row << value
|
1037
|
+
quoted_fields << @quoted_column_value
|
1027
1038
|
elsif parse_row_end
|
1028
1039
|
if row.empty? and value.nil?
|
1029
|
-
emit_row([], &block) unless @skip_blanks
|
1040
|
+
emit_row([], [], &block) unless @skip_blanks
|
1030
1041
|
else
|
1031
1042
|
row << value
|
1032
|
-
|
1043
|
+
quoted_fields << @quoted_column_value
|
1044
|
+
emit_row(row, quoted_fields, &block)
|
1033
1045
|
row = []
|
1046
|
+
quoted_fields = []
|
1034
1047
|
end
|
1035
1048
|
skip_needless_lines
|
1036
1049
|
start_row
|
1037
1050
|
elsif @scanner.eos?
|
1038
1051
|
break if row.empty? and value.nil?
|
1039
1052
|
row << value
|
1040
|
-
|
1053
|
+
quoted_fields << @quoted_column_value
|
1054
|
+
emit_row(row, quoted_fields, &block)
|
1041
1055
|
break
|
1042
1056
|
else
|
1043
1057
|
if @quoted_column_value
|
@@ -1141,7 +1155,7 @@ class CSV
|
|
1141
1155
|
if (n_quotes % 2).zero?
|
1142
1156
|
quotes[0, (n_quotes - 2) / 2]
|
1143
1157
|
else
|
1144
|
-
value = quotes[0,
|
1158
|
+
value = quotes[0, n_quotes / 2]
|
1145
1159
|
while true
|
1146
1160
|
quoted_value = @scanner.scan_all(@quoted_value)
|
1147
1161
|
value << quoted_value if quoted_value
|
@@ -1165,11 +1179,9 @@ class CSV
|
|
1165
1179
|
n_quotes = quotes.size
|
1166
1180
|
if n_quotes == 1
|
1167
1181
|
break
|
1168
|
-
elsif (n_quotes % 2) == 1
|
1169
|
-
value << quotes[0, (n_quotes - 1) / 2]
|
1170
|
-
break
|
1171
1182
|
else
|
1172
1183
|
value << quotes[0, n_quotes / 2]
|
1184
|
+
break if (n_quotes % 2) == 1
|
1173
1185
|
end
|
1174
1186
|
end
|
1175
1187
|
value
|
@@ -1205,18 +1217,15 @@ class CSV
|
|
1205
1217
|
|
1206
1218
|
def strip_value(value)
|
1207
1219
|
return value unless @strip
|
1208
|
-
return
|
1220
|
+
return value if value.nil?
|
1209
1221
|
|
1210
1222
|
case @strip
|
1211
1223
|
when String
|
1212
|
-
|
1213
|
-
|
1214
|
-
size -= 1
|
1215
|
-
value = value[1, size]
|
1224
|
+
while value.delete_prefix!(@strip)
|
1225
|
+
# do nothing
|
1216
1226
|
end
|
1217
|
-
while value.
|
1218
|
-
|
1219
|
-
value = value[0, size]
|
1227
|
+
while value.delete_suffix!(@strip)
|
1228
|
+
# do nothing
|
1220
1229
|
end
|
1221
1230
|
else
|
1222
1231
|
value.strip!
|
@@ -1239,22 +1248,22 @@ class CSV
|
|
1239
1248
|
@scanner.keep_start
|
1240
1249
|
end
|
1241
1250
|
|
1242
|
-
def emit_row(row, &block)
|
1251
|
+
def emit_row(row, quoted_fields, &block)
|
1243
1252
|
@lineno += 1
|
1244
1253
|
|
1245
1254
|
raw_row = row
|
1246
1255
|
if @use_headers
|
1247
1256
|
if @headers.nil?
|
1248
|
-
@headers = adjust_headers(row)
|
1257
|
+
@headers = adjust_headers(row, quoted_fields)
|
1249
1258
|
return unless @return_headers
|
1250
1259
|
row = Row.new(@headers, row, true)
|
1251
1260
|
else
|
1252
1261
|
row = Row.new(@headers,
|
1253
|
-
@fields_converter.convert(raw_row, @headers, @lineno))
|
1262
|
+
@fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
|
1254
1263
|
end
|
1255
1264
|
else
|
1256
1265
|
# convert fields, if needed...
|
1257
|
-
row = @fields_converter.convert(raw_row, nil, @lineno)
|
1266
|
+
row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
|
1258
1267
|
end
|
1259
1268
|
|
1260
1269
|
# inject unconverted fields and accessor, if requested...
|
data/lib/csv/row.rb
CHANGED
@@ -703,7 +703,7 @@ class CSV
|
|
703
703
|
# by +index_or_header+ and +specifiers+.
|
704
704
|
#
|
705
705
|
# The nested objects may be instances of various classes.
|
706
|
-
# See {Dig Methods}[
|
706
|
+
# See {Dig Methods}[rdoc-ref:dig_methods.rdoc].
|
707
707
|
#
|
708
708
|
# Examples:
|
709
709
|
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
data/lib/csv/table.rb
CHANGED
@@ -890,9 +890,8 @@ class CSV
|
|
890
890
|
if @mode == :row or @mode == :col_or_row # by index
|
891
891
|
@table.delete_if(&block)
|
892
892
|
else # by header
|
893
|
-
deleted = []
|
894
893
|
headers.each do |header|
|
895
|
-
|
894
|
+
delete(header) if yield([header, self[header]])
|
896
895
|
end
|
897
896
|
end
|
898
897
|
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "input_record_separator"
|
4
|
-
require_relative "match_p"
|
5
4
|
require_relative "row"
|
6
5
|
|
7
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
8
|
-
|
9
6
|
class CSV
|
10
7
|
# Note: Don't use this class directly. This is an internal class.
|
11
8
|
class Writer
|
@@ -42,7 +39,10 @@ class CSV
|
|
42
39
|
@headers ||= row if @use_headers
|
43
40
|
@lineno += 1
|
44
41
|
|
45
|
-
|
42
|
+
if @fields_converter
|
43
|
+
quoted_fields = [false] * row.size
|
44
|
+
row = @fields_converter.convert(row, nil, lineno, quoted_fields)
|
45
|
+
end
|
46
46
|
|
47
47
|
i = -1
|
48
48
|
converted_row = row.collect do |field|
|
@@ -97,7 +97,7 @@ class CSV
|
|
97
97
|
return unless @headers
|
98
98
|
|
99
99
|
converter = @options[:header_fields_converter]
|
100
|
-
@headers = converter.convert(@headers, nil, 0)
|
100
|
+
@headers = converter.convert(@headers, nil, 0, [])
|
101
101
|
@headers.each do |header|
|
102
102
|
header.freeze if header.is_a?(String)
|
103
103
|
end
|
data/lib/csv.rb
CHANGED
@@ -70,7 +70,7 @@
|
|
70
70
|
# == What is CSV, really?
|
71
71
|
#
|
72
72
|
# CSV maintains a pretty strict definition of CSV taken directly from
|
73
|
-
# {the RFC}[
|
73
|
+
# {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
|
74
74
|
# place and that is to make using this library easier. CSV will parse all valid
|
75
75
|
# CSV.
|
76
76
|
#
|
@@ -95,24 +95,13 @@ require "stringio"
|
|
95
95
|
|
96
96
|
require_relative "csv/fields_converter"
|
97
97
|
require_relative "csv/input_record_separator"
|
98
|
-
require_relative "csv/match_p"
|
99
98
|
require_relative "csv/parser"
|
100
99
|
require_relative "csv/row"
|
101
100
|
require_relative "csv/table"
|
102
101
|
require_relative "csv/writer"
|
103
102
|
|
104
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
105
|
-
|
106
103
|
# == \CSV
|
107
104
|
#
|
108
|
-
# === In a Hurry?
|
109
|
-
#
|
110
|
-
# If you are familiar with \CSV data and have a particular task in mind,
|
111
|
-
# you may want to go directly to the:
|
112
|
-
# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
|
113
|
-
#
|
114
|
-
# Otherwise, read on here, about the API: classes, methods, and constants.
|
115
|
-
#
|
116
105
|
# === \CSV Data
|
117
106
|
#
|
118
107
|
# \CSV (comma-separated values) data is a text representation of a table:
|
@@ -857,6 +846,15 @@ class CSV
|
|
857
846
|
end
|
858
847
|
end
|
859
848
|
|
849
|
+
# The error thrown when the parser encounters invalid encoding in CSV.
|
850
|
+
class InvalidEncodingError < MalformedCSVError
|
851
|
+
attr_reader :encoding
|
852
|
+
def initialize(encoding, line_number)
|
853
|
+
@encoding = encoding
|
854
|
+
super("Invalid byte sequence in #{encoding}", line_number)
|
855
|
+
end
|
856
|
+
end
|
857
|
+
|
860
858
|
#
|
861
859
|
# A FieldInfo Struct contains details about a field's position in the data
|
862
860
|
# source it was read from. CSV will pass this Struct to some blocks that make
|
@@ -866,8 +864,9 @@ class CSV
|
|
866
864
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
867
865
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
868
866
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
867
|
+
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
869
868
|
#
|
870
|
-
FieldInfo = Struct.new(:index, :line, :header)
|
869
|
+
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
871
870
|
|
872
871
|
# A Regexp used to find and convert some common Date formats.
|
873
872
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
@@ -875,10 +874,9 @@ class CSV
|
|
875
874
|
# A Regexp used to find and convert some common DateTime formats.
|
876
875
|
DateTimeMatcher =
|
877
876
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
878
|
-
|
879
|
-
# ISO-8601
|
877
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse
|
880
878
|
\d{4}-\d{2}-\d{2}
|
881
|
-
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
879
|
+
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
882
880
|
)\z /x
|
883
881
|
|
884
882
|
# The encoding used by all converters.
|
@@ -1008,7 +1006,7 @@ class CSV
|
|
1008
1006
|
def instance(data = $stdout, **options)
|
1009
1007
|
# create a _signature_ for this method call, data object and options
|
1010
1008
|
sig = [data.object_id] +
|
1011
|
-
options.values_at(*DEFAULT_OPTIONS.keys
|
1009
|
+
options.values_at(*DEFAULT_OPTIONS.keys)
|
1012
1010
|
|
1013
1011
|
# fetch or create the instance for this signature
|
1014
1012
|
@@instances ||= Hash.new
|
@@ -1147,7 +1145,7 @@ class CSV
|
|
1147
1145
|
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1148
1146
|
#
|
1149
1147
|
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
|
1150
|
-
# parses from {ARGF}[
|
1148
|
+
# parses from {ARGF}[rdoc-ref:ARGF]
|
1151
1149
|
# and generates to STDOUT.
|
1152
1150
|
#
|
1153
1151
|
# Without headers:
|
@@ -1205,7 +1203,7 @@ class CSV
|
|
1205
1203
|
# parse options for input, output, or both
|
1206
1204
|
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
1207
1205
|
options.each do |key, value|
|
1208
|
-
case key
|
1206
|
+
case key
|
1209
1207
|
when /\Ain(?:put)?_(.+)\Z/
|
1210
1208
|
in_options[$1.to_sym] = value
|
1211
1209
|
when /\Aout(?:put)?_(.+)\Z/
|
@@ -1317,8 +1315,8 @@ class CSV
|
|
1317
1315
|
#
|
1318
1316
|
# Arguments:
|
1319
1317
|
# * Argument +path_or_io+ must be a file path or an \IO stream.
|
1320
|
-
# * Argument +mode+, if given, must be a \File mode
|
1321
|
-
# See {
|
1318
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1319
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1322
1320
|
# * Arguments <tt>**options</tt> must be keyword options.
|
1323
1321
|
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1324
1322
|
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
@@ -1468,6 +1466,46 @@ class CSV
|
|
1468
1466
|
(new(str, **options) << row).string
|
1469
1467
|
end
|
1470
1468
|
|
1469
|
+
# :call-seq:
|
1470
|
+
# CSV.generate_lines(rows)
|
1471
|
+
# CSV.generate_lines(rows, **options)
|
1472
|
+
#
|
1473
|
+
# Returns the \String created by generating \CSV from
|
1474
|
+
# using the specified +options+.
|
1475
|
+
#
|
1476
|
+
# Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
|
1477
|
+
#
|
1478
|
+
# Special options:
|
1479
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
|
1480
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1481
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1482
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1483
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
1484
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
1485
|
+
# this parameter as a backup plan.
|
1486
|
+
#
|
1487
|
+
# For other +options+,
|
1488
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1489
|
+
#
|
1490
|
+
# ---
|
1491
|
+
#
|
1492
|
+
# Returns the \String generated from an
|
1493
|
+
# CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
|
1494
|
+
#
|
1495
|
+
# ---
|
1496
|
+
#
|
1497
|
+
# Raises an exception
|
1498
|
+
# # Raises NoMethodError (undefined method `each' for :foo:Symbol)
|
1499
|
+
# CSV.generate_lines(:foo)
|
1500
|
+
#
|
1501
|
+
def generate_lines(rows, **options)
|
1502
|
+
self.generate(**options) do |csv|
|
1503
|
+
rows.each do |row|
|
1504
|
+
csv << row
|
1505
|
+
end
|
1506
|
+
end
|
1507
|
+
end
|
1508
|
+
|
1471
1509
|
#
|
1472
1510
|
# :call-seq:
|
1473
1511
|
# open(file_path, mode = "rb", **options ) -> new_csv
|
@@ -1484,8 +1522,8 @@ class CSV
|
|
1484
1522
|
#
|
1485
1523
|
# * Argument +path+, if given, must be the path to a file.
|
1486
1524
|
# :include: ../doc/csv/arguments/io.rdoc
|
1487
|
-
# * Argument +mode+, if given, must be a \File mode
|
1488
|
-
# See {
|
1525
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1526
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1489
1527
|
# * Arguments <tt>**options</tt> must be keyword options.
|
1490
1528
|
# See {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1491
1529
|
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
@@ -1893,8 +1931,19 @@ class CSV
|
|
1893
1931
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
1894
1932
|
|
1895
1933
|
if data.is_a?(String)
|
1934
|
+
if encoding
|
1935
|
+
if encoding.is_a?(String)
|
1936
|
+
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
1937
|
+
if data_internal_encoding
|
1938
|
+
data = data.encode(data_internal_encoding, data_external_encoding)
|
1939
|
+
else
|
1940
|
+
data = data.dup.force_encoding(data_external_encoding)
|
1941
|
+
end
|
1942
|
+
else
|
1943
|
+
data = data.dup.force_encoding(encoding)
|
1944
|
+
end
|
1945
|
+
end
|
1896
1946
|
@io = StringIO.new(data)
|
1897
|
-
@io.set_encoding(encoding || data.encoding)
|
1898
1947
|
else
|
1899
1948
|
@io = data
|
1900
1949
|
end
|
@@ -2503,7 +2552,13 @@ class CSV
|
|
2503
2552
|
# p row
|
2504
2553
|
# end
|
2505
2554
|
def each(&block)
|
2506
|
-
|
2555
|
+
return to_enum(__method__) unless block_given?
|
2556
|
+
begin
|
2557
|
+
while true
|
2558
|
+
yield(parser_enumerator.next)
|
2559
|
+
end
|
2560
|
+
rescue StopIteration
|
2561
|
+
end
|
2507
2562
|
end
|
2508
2563
|
|
2509
2564
|
# :call-seq:
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
8
8
|
- Kouhei Sutou
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-11-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -71,7 +71,7 @@ description: The CSV library provides a complete interface to CSV files and data
|
|
71
71
|
It offers tools to enable you to read and write to and from Strings or IO objects,
|
72
72
|
as needed.
|
73
73
|
email:
|
74
|
-
-
|
74
|
+
-
|
75
75
|
- kou@cozmixng.org
|
76
76
|
executables: []
|
77
77
|
extensions: []
|
@@ -116,10 +116,8 @@ files:
|
|
116
116
|
- lib/csv.rb
|
117
117
|
- lib/csv/core_ext/array.rb
|
118
118
|
- lib/csv/core_ext/string.rb
|
119
|
-
- lib/csv/delete_suffix.rb
|
120
119
|
- lib/csv/fields_converter.rb
|
121
120
|
- lib/csv/input_record_separator.rb
|
122
|
-
- lib/csv/match_p.rb
|
123
121
|
- lib/csv/parser.rb
|
124
122
|
- lib/csv/row.rb
|
125
123
|
- lib/csv/table.rb
|
@@ -130,7 +128,7 @@ licenses:
|
|
130
128
|
- Ruby
|
131
129
|
- BSD-2-Clause
|
132
130
|
metadata: {}
|
133
|
-
post_install_message:
|
131
|
+
post_install_message:
|
134
132
|
rdoc_options:
|
135
133
|
- "--main"
|
136
134
|
- README.md
|
@@ -147,8 +145,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
145
|
- !ruby/object:Gem::Version
|
148
146
|
version: '0'
|
149
147
|
requirements: []
|
150
|
-
rubygems_version: 3.
|
151
|
-
signing_key:
|
148
|
+
rubygems_version: 3.5.0.dev
|
149
|
+
signing_key:
|
152
150
|
specification_version: 4
|
153
151
|
summary: CSV Reading and Writing
|
154
152
|
test_files: []
|
data/lib/csv/delete_suffix.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#delete_suffix? for Ruby 2.4.
|
4
|
-
unless String.method_defined?(:delete_suffix)
|
5
|
-
class CSV
|
6
|
-
module DeleteSuffix
|
7
|
-
refine String do
|
8
|
-
def delete_suffix(suffix)
|
9
|
-
if end_with?(suffix)
|
10
|
-
self[0...-suffix.size]
|
11
|
-
else
|
12
|
-
self
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/csv/match_p.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
4
|
-
unless String.method_defined?(:match?)
|
5
|
-
class CSV
|
6
|
-
module MatchP
|
7
|
-
refine String do
|
8
|
-
def match?(pattern)
|
9
|
-
self =~ pattern
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
refine Regexp do
|
14
|
-
def match?(string)
|
15
|
-
self =~ string
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|