csv 3.2.3 → 3.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +148 -0
- data/doc/csv/options/parsing/liberal_parsing.rdoc +21 -2
- data/doc/csv/recipes/parsing.rdoc +1 -1
- data/lib/csv/fields_converter.rb +3 -2
- data/lib/csv/parser.rb +50 -41
- data/lib/csv/row.rb +1 -1
- data/lib/csv/table.rb +1 -2
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +5 -5
- data/lib/csv.rb +80 -25
- metadata +7 -9
- data/lib/csv/delete_suffix.rb +0 -18
- data/lib/csv/match_p.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c64817c16c8991fc2596875101449b5452326fe91bd05e4bb6a66213113525d6
|
4
|
+
data.tar.gz: 19d6d80d6959f6cde0ac651774ea795dbd0f949135cae021fef3983d94248f9c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 556f6582468d4a3c2994c12c25dba73b8db65e1a10f7306b9b5bc1fa345f47bf7872db1c603ddcd1a0eb359e7857c51a9874be2231dc821730ae62d15604c3b7
|
7
|
+
data.tar.gz: 348a25f4c1bb8e4fe0d71dc944e0a26165627803cb2528fc067642827fd3c253bda48aba179d3575950a7244bd4e8edf2eed9a99101952a07256a3f4f9d1e7fe
|
data/NEWS.md
CHANGED
@@ -1,5 +1,153 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.2.8 - 2023-11-08
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added `CSV::InvalidEncodingError`.
|
8
|
+
|
9
|
+
Patch by Kosuke Shibata.
|
10
|
+
|
11
|
+
GH-287
|
12
|
+
|
13
|
+
### Thanks
|
14
|
+
|
15
|
+
* Kosuke Shibata
|
16
|
+
|
17
|
+
## 3.2.7 - 2023-06-26
|
18
|
+
|
19
|
+
### Improvements
|
20
|
+
|
21
|
+
* Removed an unused internal variable.
|
22
|
+
[GH-273](https://github.com/ruby/csv/issues/273)
|
23
|
+
[Patch by Mau Magnaguagno]
|
24
|
+
|
25
|
+
* Changed to use `https://` instead of `http://` in documents.
|
26
|
+
[GH-274](https://github.com/ruby/csv/issues/274)
|
27
|
+
[Patch by Vivek Bharath Akupatni]
|
28
|
+
|
29
|
+
* Added prefix to a helper module in test.
|
30
|
+
[GH-278](https://github.com/ruby/csv/issues/278)
|
31
|
+
[Patch by Luke Gruber]
|
32
|
+
|
33
|
+
* Added a documentation for `liberal_parsing: {backslash_quotes: true}`.
|
34
|
+
[GH-280](https://github.com/ruby/csv/issues/280)
|
35
|
+
[Patch by Mark Schneider]
|
36
|
+
|
37
|
+
### Fixes
|
38
|
+
|
39
|
+
* Fixed a wrong execution result in documents.
|
40
|
+
[GH-276](https://github.com/ruby/csv/issues/276)
|
41
|
+
[Patch by Yuki Tsujimoto]
|
42
|
+
|
43
|
+
* Fixed a bug that the same line is used multiple times.
|
44
|
+
[GH-279](https://github.com/ruby/csv/issues/279)
|
45
|
+
[Reported by Gabriel Nagy]
|
46
|
+
|
47
|
+
### Thanks
|
48
|
+
|
49
|
+
* Mau Magnaguagno
|
50
|
+
|
51
|
+
* Vivek Bharath Akupatni
|
52
|
+
|
53
|
+
* Yuki Tsujimoto
|
54
|
+
|
55
|
+
* Luke Gruber
|
56
|
+
|
57
|
+
* Mark Schneider
|
58
|
+
|
59
|
+
* Gabriel Nagy
|
60
|
+
|
61
|
+
## 3.2.6 - 2022-12-08
|
62
|
+
|
63
|
+
### Improvements
|
64
|
+
|
65
|
+
* `CSV#read` consumes the same lines with other methods like
|
66
|
+
`CSV#shift`.
|
67
|
+
[[GitHub#258](https://github.com/ruby/csv/issues/258)]
|
68
|
+
[Reported by Lhoussaine Ghallou]
|
69
|
+
|
70
|
+
* All `Enumerable` based methods consume the same lines with other
|
71
|
+
methods. This may have a performance penalty.
|
72
|
+
[[GitHub#260](https://github.com/ruby/csv/issues/260)]
|
73
|
+
[Reported by Lhoussaine Ghallou]
|
74
|
+
|
75
|
+
* Simplify some implementations.
|
76
|
+
[[GitHub#262](https://github.com/ruby/csv/pull/262)]
|
77
|
+
[[GitHub#263](https://github.com/ruby/csv/pull/263)]
|
78
|
+
[Patch by Mau Magnaguagno]
|
79
|
+
|
80
|
+
### Fixes
|
81
|
+
|
82
|
+
* Fixed `CSV.generate_lines` document.
|
83
|
+
[[GitHub#257](https://github.com/ruby/csv/pull/257)]
|
84
|
+
[Patch by Sampat Badhe]
|
85
|
+
|
86
|
+
### Thanks
|
87
|
+
|
88
|
+
* Sampat Badhe
|
89
|
+
|
90
|
+
* Lhoussaine Ghallou
|
91
|
+
|
92
|
+
* Mau Magnaguagno
|
93
|
+
|
94
|
+
## 3.2.5 - 2022-08-26
|
95
|
+
|
96
|
+
### Improvements
|
97
|
+
|
98
|
+
* Added `CSV.generate_lines`.
|
99
|
+
[[GitHub#255](https://github.com/ruby/csv/issues/255)]
|
100
|
+
[Reported by OKURA Masafumi]
|
101
|
+
[[GitHub#256](https://github.com/ruby/csv/pull/256)]
|
102
|
+
[Patch by Eriko Sugiyama]
|
103
|
+
|
104
|
+
### Thanks
|
105
|
+
|
106
|
+
* OKURA Masafumi
|
107
|
+
|
108
|
+
* Eriko Sugiyama
|
109
|
+
|
110
|
+
## 3.2.4 - 2022-08-22
|
111
|
+
|
112
|
+
### Improvements
|
113
|
+
|
114
|
+
* Cleaned up internal implementations.
|
115
|
+
[[GitHub#249](https://github.com/ruby/csv/pull/249)]
|
116
|
+
[[GitHub#250](https://github.com/ruby/csv/pull/250)]
|
117
|
+
[[GitHub#251](https://github.com/ruby/csv/pull/251)]
|
118
|
+
[Patch by Mau Magnaguagno]
|
119
|
+
|
120
|
+
* Added support for RFC 3339 style time.
|
121
|
+
[[GitHub#248](https://github.com/ruby/csv/pull/248)]
|
122
|
+
[Patch by Thierry Lambert]
|
123
|
+
|
124
|
+
* Added support for transcoding String CSV. Syntax is
|
125
|
+
`from-encoding:to-encoding`.
|
126
|
+
[[GitHub#254](https://github.com/ruby/csv/issues/254)]
|
127
|
+
[Reported by Richard Stueven]
|
128
|
+
|
129
|
+
* Added quoted information to `CSV::FieldInfo`.
|
130
|
+
[[GitHub#254](https://github.com/ruby/csv/pull/253)]
|
131
|
+
[Reported by Hirokazu SUZUKI]
|
132
|
+
|
133
|
+
### Fixes
|
134
|
+
|
135
|
+
* Fixed a link in documents.
|
136
|
+
[[GitHub#244](https://github.com/ruby/csv/pull/244)]
|
137
|
+
[Patch by Peter Zhu]
|
138
|
+
|
139
|
+
### Thanks
|
140
|
+
|
141
|
+
* Peter Zhu
|
142
|
+
|
143
|
+
* Mau Magnaguagno
|
144
|
+
|
145
|
+
* Thierry Lambert
|
146
|
+
|
147
|
+
* Richard Stueven
|
148
|
+
|
149
|
+
* Hirokazu SUZUKI
|
150
|
+
|
3
151
|
## 3.2.3 - 2022-04-09
|
4
152
|
|
5
153
|
### Improvements
|
@@ -1,13 +1,13 @@
|
|
1
1
|
====== Option +liberal_parsing+
|
2
2
|
|
3
|
-
Specifies the boolean value that determines whether
|
3
|
+
Specifies the boolean or hash value that determines whether
|
4
4
|
CSV will attempt to parse input not conformant with RFC 4180,
|
5
5
|
such as double quotes in unquoted fields.
|
6
6
|
|
7
7
|
Default value:
|
8
8
|
CSV::DEFAULT_OPTIONS.fetch(:liberal_parsing) # => false
|
9
9
|
|
10
|
-
For
|
10
|
+
For the next two examples:
|
11
11
|
str = 'is,this "three, or four",fields'
|
12
12
|
|
13
13
|
Without +liberal_parsing+:
|
@@ -17,3 +17,22 @@ Without +liberal_parsing+:
|
|
17
17
|
With +liberal_parsing+:
|
18
18
|
ary = CSV.parse_line(str, liberal_parsing: true)
|
19
19
|
ary # => ["is", "this \"three", " or four\"", "fields"]
|
20
|
+
|
21
|
+
Use the +backslash_quote+ sub-option to parse values that use
|
22
|
+
a backslash to escape a double-quote character. This
|
23
|
+
causes the parser to treat <code>\"</code> as if it were
|
24
|
+
<code>""</code>.
|
25
|
+
|
26
|
+
For the next two examples:
|
27
|
+
str = 'Show,"Harry \"Handcuff\" Houdini, the one and only","Tampa Theater"'
|
28
|
+
|
29
|
+
With +liberal_parsing+, but without the +backslash_quote+ sub-option:
|
30
|
+
# Incorrect interpretation of backslash; incorrectly interprets the quoted comma as a field separator.
|
31
|
+
ary = CSV.parse_line(str, liberal_parsing: true)
|
32
|
+
ary # => ["Show", "\"Harry \\\"Handcuff\\\" Houdini", " the one and only\"", "Tampa Theater"]
|
33
|
+
puts ary[1] # => "Harry \"Handcuff\" Houdini
|
34
|
+
|
35
|
+
With +liberal_parsing+ and its +backslash_quote+ sub-option:
|
36
|
+
ary = CSV.parse_line(str, liberal_parsing: { backslash_quote: true })
|
37
|
+
ary # => ["Show", "Harry \"Handcuff\" Houdini, the one and only", "Tampa Theater"]
|
38
|
+
puts ary[1] # => Harry "Handcuff" Houdini, the one and only
|
@@ -520,7 +520,7 @@ Apply multiple header converters by defining and registering a custom header con
|
|
520
520
|
To capture unconverted field values, use option +:unconverted_fields+:
|
521
521
|
source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
522
522
|
parsed = CSV.parse(source, converters: :integer, unconverted_fields: true)
|
523
|
-
parsed # => [["
|
523
|
+
parsed # => [["Name", "Value"], ["foo", 0], ["bar", 1], ["baz", 2]]
|
524
524
|
parsed.each {|row| p row.unconverted_fields }
|
525
525
|
Output:
|
526
526
|
["Name", "Value"]
|
data/lib/csv/fields_converter.rb
CHANGED
@@ -44,7 +44,7 @@ class CSV
|
|
44
44
|
@converters.empty?
|
45
45
|
end
|
46
46
|
|
47
|
-
def convert(fields, headers, lineno)
|
47
|
+
def convert(fields, headers, lineno, quoted_fields)
|
48
48
|
return fields unless need_convert?
|
49
49
|
|
50
50
|
fields.collect.with_index do |field, index|
|
@@ -63,7 +63,8 @@ class CSV
|
|
63
63
|
else
|
64
64
|
header = nil
|
65
65
|
end
|
66
|
-
|
66
|
+
quoted = quoted_fields[index]
|
67
|
+
field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
|
67
68
|
end
|
68
69
|
break unless field.is_a?(String) # short-circuit pipeline for speed
|
69
70
|
end
|
data/lib/csv/parser.rb
CHANGED
@@ -2,15 +2,10 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
|
-
require_relative "delete_suffix"
|
6
5
|
require_relative "input_record_separator"
|
7
|
-
require_relative "match_p"
|
8
6
|
require_relative "row"
|
9
7
|
require_relative "table"
|
10
8
|
|
11
|
-
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
12
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
13
|
-
|
14
9
|
class CSV
|
15
10
|
# Note: Don't use this class directly. This is an internal class.
|
16
11
|
class Parser
|
@@ -106,7 +101,7 @@ class CSV
|
|
106
101
|
position = @scanner.pos
|
107
102
|
offset = 0
|
108
103
|
n_row_separator_chars = row_separator.size
|
109
|
-
# trace(__method__, :start,
|
104
|
+
# trace(__method__, :start, input)
|
110
105
|
while true
|
111
106
|
input.each_line(row_separator) do |line|
|
112
107
|
@scanner.pos += line.bytesize
|
@@ -162,6 +157,7 @@ class CSV
|
|
162
157
|
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
163
158
|
return value if @last_scanner
|
164
159
|
|
160
|
+
# trace(__method__, pattern, :done, :nil) if value.nil?
|
165
161
|
return nil if value.nil?
|
166
162
|
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
|
167
163
|
# trace(__method__, pattern, :sub, sub_value)
|
@@ -205,7 +201,8 @@ class CSV
|
|
205
201
|
# trace(__method__, :rescan, start, buffer)
|
206
202
|
string = @scanner.string
|
207
203
|
if scanner == @scanner
|
208
|
-
keep = string.byteslice(start,
|
204
|
+
keep = string.byteslice(start,
|
205
|
+
string.bytesize - @scanner.pos - start)
|
209
206
|
else
|
210
207
|
keep = string
|
211
208
|
end
|
@@ -417,8 +414,7 @@ class CSV
|
|
417
414
|
else
|
418
415
|
lineno = @lineno + 1
|
419
416
|
end
|
420
|
-
|
421
|
-
raise MalformedCSVError.new(message, lineno)
|
417
|
+
raise InvalidEncodingError.new(@encoding, lineno)
|
422
418
|
rescue UnexpectedError => error
|
423
419
|
if @scanner
|
424
420
|
ignore_broken_line
|
@@ -490,7 +486,6 @@ class CSV
|
|
490
486
|
message = ":quote_char has to be nil or a single character String"
|
491
487
|
raise ArgumentError, message
|
492
488
|
end
|
493
|
-
@double_quote_character = @quote_character * 2
|
494
489
|
@escaped_quote_character = Regexp.escape(@quote_character)
|
495
490
|
@escaped_quote = Regexp.new(@escaped_quote_character)
|
496
491
|
end
|
@@ -763,9 +758,10 @@ class CSV
|
|
763
758
|
case headers
|
764
759
|
when Array
|
765
760
|
@raw_headers = headers
|
761
|
+
quoted_fields = [false] * @raw_headers.size
|
766
762
|
@use_headers = true
|
767
763
|
when String
|
768
|
-
@raw_headers = parse_headers(headers)
|
764
|
+
@raw_headers, quoted_fields = parse_headers(headers)
|
769
765
|
@use_headers = true
|
770
766
|
when nil, false
|
771
767
|
@raw_headers = nil
|
@@ -775,21 +771,28 @@ class CSV
|
|
775
771
|
@use_headers = true
|
776
772
|
end
|
777
773
|
if @raw_headers
|
778
|
-
@headers = adjust_headers(@raw_headers)
|
774
|
+
@headers = adjust_headers(@raw_headers, quoted_fields)
|
779
775
|
else
|
780
776
|
@headers = nil
|
781
777
|
end
|
782
778
|
end
|
783
779
|
|
784
780
|
def parse_headers(row)
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
781
|
+
quoted_fields = []
|
782
|
+
converter = lambda do |field, info|
|
783
|
+
quoted_fields << info.quoted?
|
784
|
+
field
|
785
|
+
end
|
786
|
+
headers = CSV.parse_line(row,
|
787
|
+
col_sep: @column_separator,
|
788
|
+
row_sep: @row_separator,
|
789
|
+
quote_char: @quote_character,
|
790
|
+
converters: [converter])
|
791
|
+
[headers, quoted_fields]
|
789
792
|
end
|
790
793
|
|
791
|
-
def adjust_headers(headers)
|
792
|
-
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
|
794
|
+
def adjust_headers(headers, quoted_fields)
|
795
|
+
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
|
793
796
|
adjusted_headers.each {|h| h.freeze if h.is_a? String}
|
794
797
|
adjusted_headers
|
795
798
|
end
|
@@ -872,8 +875,7 @@ class CSV
|
|
872
875
|
!line.valid_encoding?
|
873
876
|
end
|
874
877
|
if index
|
875
|
-
|
876
|
-
raise MalformedCSVError.new(message, @lineno + index + 1)
|
878
|
+
raise InvalidEncodingError.new(@encoding, @lineno + index + 1)
|
877
879
|
end
|
878
880
|
end
|
879
881
|
Scanner.new(string)
|
@@ -933,9 +935,11 @@ class CSV
|
|
933
935
|
if line.empty?
|
934
936
|
next if @skip_blanks
|
935
937
|
row = []
|
938
|
+
quoted_fields = []
|
936
939
|
else
|
937
940
|
line = strip_value(line)
|
938
941
|
row = line.split(@split_column_separator, -1)
|
942
|
+
quoted_fields = [false] * row.size
|
939
943
|
if @max_field_size
|
940
944
|
row.each do |column|
|
941
945
|
validate_field_size(column)
|
@@ -949,7 +953,7 @@ class CSV
|
|
949
953
|
end
|
950
954
|
end
|
951
955
|
@last_line = original_line
|
952
|
-
emit_row(row, &block)
|
956
|
+
emit_row(row, quoted_fields, &block)
|
953
957
|
end
|
954
958
|
end
|
955
959
|
|
@@ -971,25 +975,30 @@ class CSV
|
|
971
975
|
next
|
972
976
|
end
|
973
977
|
row = []
|
978
|
+
quoted_fields = []
|
974
979
|
elsif line.include?(@cr) or line.include?(@lf)
|
975
980
|
@scanner.keep_back
|
976
981
|
@need_robust_parsing = true
|
977
982
|
return parse_quotable_robust(&block)
|
978
983
|
else
|
979
984
|
row = line.split(@split_column_separator, -1)
|
985
|
+
quoted_fields = []
|
980
986
|
n_columns = row.size
|
981
987
|
i = 0
|
982
988
|
while i < n_columns
|
983
989
|
column = row[i]
|
984
990
|
if column.empty?
|
991
|
+
quoted_fields << false
|
985
992
|
row[i] = nil
|
986
993
|
else
|
987
994
|
n_quotes = column.count(@quote_character)
|
988
995
|
if n_quotes.zero?
|
996
|
+
quoted_fields << false
|
989
997
|
# no quote
|
990
998
|
elsif n_quotes == 2 and
|
991
999
|
column.start_with?(@quote_character) and
|
992
1000
|
column.end_with?(@quote_character)
|
1001
|
+
quoted_fields << true
|
993
1002
|
row[i] = column[1..-2]
|
994
1003
|
else
|
995
1004
|
@scanner.keep_back
|
@@ -1004,13 +1013,14 @@ class CSV
|
|
1004
1013
|
@scanner.keep_drop
|
1005
1014
|
@scanner.keep_start
|
1006
1015
|
@last_line = original_line
|
1007
|
-
emit_row(row, &block)
|
1016
|
+
emit_row(row, quoted_fields, &block)
|
1008
1017
|
end
|
1009
1018
|
@scanner.keep_drop
|
1010
1019
|
end
|
1011
1020
|
|
1012
1021
|
def parse_quotable_robust(&block)
|
1013
1022
|
row = []
|
1023
|
+
quoted_fields = []
|
1014
1024
|
skip_needless_lines
|
1015
1025
|
start_row
|
1016
1026
|
while true
|
@@ -1024,20 +1034,24 @@ class CSV
|
|
1024
1034
|
end
|
1025
1035
|
if parse_column_end
|
1026
1036
|
row << value
|
1037
|
+
quoted_fields << @quoted_column_value
|
1027
1038
|
elsif parse_row_end
|
1028
1039
|
if row.empty? and value.nil?
|
1029
|
-
emit_row([], &block) unless @skip_blanks
|
1040
|
+
emit_row([], [], &block) unless @skip_blanks
|
1030
1041
|
else
|
1031
1042
|
row << value
|
1032
|
-
|
1043
|
+
quoted_fields << @quoted_column_value
|
1044
|
+
emit_row(row, quoted_fields, &block)
|
1033
1045
|
row = []
|
1046
|
+
quoted_fields = []
|
1034
1047
|
end
|
1035
1048
|
skip_needless_lines
|
1036
1049
|
start_row
|
1037
1050
|
elsif @scanner.eos?
|
1038
1051
|
break if row.empty? and value.nil?
|
1039
1052
|
row << value
|
1040
|
-
|
1053
|
+
quoted_fields << @quoted_column_value
|
1054
|
+
emit_row(row, quoted_fields, &block)
|
1041
1055
|
break
|
1042
1056
|
else
|
1043
1057
|
if @quoted_column_value
|
@@ -1141,7 +1155,7 @@ class CSV
|
|
1141
1155
|
if (n_quotes % 2).zero?
|
1142
1156
|
quotes[0, (n_quotes - 2) / 2]
|
1143
1157
|
else
|
1144
|
-
value = quotes[0,
|
1158
|
+
value = quotes[0, n_quotes / 2]
|
1145
1159
|
while true
|
1146
1160
|
quoted_value = @scanner.scan_all(@quoted_value)
|
1147
1161
|
value << quoted_value if quoted_value
|
@@ -1165,11 +1179,9 @@ class CSV
|
|
1165
1179
|
n_quotes = quotes.size
|
1166
1180
|
if n_quotes == 1
|
1167
1181
|
break
|
1168
|
-
elsif (n_quotes % 2) == 1
|
1169
|
-
value << quotes[0, (n_quotes - 1) / 2]
|
1170
|
-
break
|
1171
1182
|
else
|
1172
1183
|
value << quotes[0, n_quotes / 2]
|
1184
|
+
break if (n_quotes % 2) == 1
|
1173
1185
|
end
|
1174
1186
|
end
|
1175
1187
|
value
|
@@ -1205,18 +1217,15 @@ class CSV
|
|
1205
1217
|
|
1206
1218
|
def strip_value(value)
|
1207
1219
|
return value unless @strip
|
1208
|
-
return
|
1220
|
+
return value if value.nil?
|
1209
1221
|
|
1210
1222
|
case @strip
|
1211
1223
|
when String
|
1212
|
-
|
1213
|
-
|
1214
|
-
size -= 1
|
1215
|
-
value = value[1, size]
|
1224
|
+
while value.delete_prefix!(@strip)
|
1225
|
+
# do nothing
|
1216
1226
|
end
|
1217
|
-
while value.
|
1218
|
-
|
1219
|
-
value = value[0, size]
|
1227
|
+
while value.delete_suffix!(@strip)
|
1228
|
+
# do nothing
|
1220
1229
|
end
|
1221
1230
|
else
|
1222
1231
|
value.strip!
|
@@ -1239,22 +1248,22 @@ class CSV
|
|
1239
1248
|
@scanner.keep_start
|
1240
1249
|
end
|
1241
1250
|
|
1242
|
-
def emit_row(row, &block)
|
1251
|
+
def emit_row(row, quoted_fields, &block)
|
1243
1252
|
@lineno += 1
|
1244
1253
|
|
1245
1254
|
raw_row = row
|
1246
1255
|
if @use_headers
|
1247
1256
|
if @headers.nil?
|
1248
|
-
@headers = adjust_headers(row)
|
1257
|
+
@headers = adjust_headers(row, quoted_fields)
|
1249
1258
|
return unless @return_headers
|
1250
1259
|
row = Row.new(@headers, row, true)
|
1251
1260
|
else
|
1252
1261
|
row = Row.new(@headers,
|
1253
|
-
@fields_converter.convert(raw_row, @headers, @lineno))
|
1262
|
+
@fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
|
1254
1263
|
end
|
1255
1264
|
else
|
1256
1265
|
# convert fields, if needed...
|
1257
|
-
row = @fields_converter.convert(raw_row, nil, @lineno)
|
1266
|
+
row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
|
1258
1267
|
end
|
1259
1268
|
|
1260
1269
|
# inject unconverted fields and accessor, if requested...
|
data/lib/csv/row.rb
CHANGED
@@ -703,7 +703,7 @@ class CSV
|
|
703
703
|
# by +index_or_header+ and +specifiers+.
|
704
704
|
#
|
705
705
|
# The nested objects may be instances of various classes.
|
706
|
-
# See {Dig Methods}[
|
706
|
+
# See {Dig Methods}[rdoc-ref:dig_methods.rdoc].
|
707
707
|
#
|
708
708
|
# Examples:
|
709
709
|
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
data/lib/csv/table.rb
CHANGED
@@ -890,9 +890,8 @@ class CSV
|
|
890
890
|
if @mode == :row or @mode == :col_or_row # by index
|
891
891
|
@table.delete_if(&block)
|
892
892
|
else # by header
|
893
|
-
deleted = []
|
894
893
|
headers.each do |header|
|
895
|
-
|
894
|
+
delete(header) if yield([header, self[header]])
|
896
895
|
end
|
897
896
|
end
|
898
897
|
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "input_record_separator"
|
4
|
-
require_relative "match_p"
|
5
4
|
require_relative "row"
|
6
5
|
|
7
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
8
|
-
|
9
6
|
class CSV
|
10
7
|
# Note: Don't use this class directly. This is an internal class.
|
11
8
|
class Writer
|
@@ -42,7 +39,10 @@ class CSV
|
|
42
39
|
@headers ||= row if @use_headers
|
43
40
|
@lineno += 1
|
44
41
|
|
45
|
-
|
42
|
+
if @fields_converter
|
43
|
+
quoted_fields = [false] * row.size
|
44
|
+
row = @fields_converter.convert(row, nil, lineno, quoted_fields)
|
45
|
+
end
|
46
46
|
|
47
47
|
i = -1
|
48
48
|
converted_row = row.collect do |field|
|
@@ -97,7 +97,7 @@ class CSV
|
|
97
97
|
return unless @headers
|
98
98
|
|
99
99
|
converter = @options[:header_fields_converter]
|
100
|
-
@headers = converter.convert(@headers, nil, 0)
|
100
|
+
@headers = converter.convert(@headers, nil, 0, [])
|
101
101
|
@headers.each do |header|
|
102
102
|
header.freeze if header.is_a?(String)
|
103
103
|
end
|
data/lib/csv.rb
CHANGED
@@ -70,7 +70,7 @@
|
|
70
70
|
# == What is CSV, really?
|
71
71
|
#
|
72
72
|
# CSV maintains a pretty strict definition of CSV taken directly from
|
73
|
-
# {the RFC}[
|
73
|
+
# {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
|
74
74
|
# place and that is to make using this library easier. CSV will parse all valid
|
75
75
|
# CSV.
|
76
76
|
#
|
@@ -95,24 +95,13 @@ require "stringio"
|
|
95
95
|
|
96
96
|
require_relative "csv/fields_converter"
|
97
97
|
require_relative "csv/input_record_separator"
|
98
|
-
require_relative "csv/match_p"
|
99
98
|
require_relative "csv/parser"
|
100
99
|
require_relative "csv/row"
|
101
100
|
require_relative "csv/table"
|
102
101
|
require_relative "csv/writer"
|
103
102
|
|
104
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
105
|
-
|
106
103
|
# == \CSV
|
107
104
|
#
|
108
|
-
# === In a Hurry?
|
109
|
-
#
|
110
|
-
# If you are familiar with \CSV data and have a particular task in mind,
|
111
|
-
# you may want to go directly to the:
|
112
|
-
# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html].
|
113
|
-
#
|
114
|
-
# Otherwise, read on here, about the API: classes, methods, and constants.
|
115
|
-
#
|
116
105
|
# === \CSV Data
|
117
106
|
#
|
118
107
|
# \CSV (comma-separated values) data is a text representation of a table:
|
@@ -857,6 +846,15 @@ class CSV
|
|
857
846
|
end
|
858
847
|
end
|
859
848
|
|
849
|
+
# The error thrown when the parser encounters invalid encoding in CSV.
|
850
|
+
class InvalidEncodingError < MalformedCSVError
|
851
|
+
attr_reader :encoding
|
852
|
+
def initialize(encoding, line_number)
|
853
|
+
@encoding = encoding
|
854
|
+
super("Invalid byte sequence in #{encoding}", line_number)
|
855
|
+
end
|
856
|
+
end
|
857
|
+
|
860
858
|
#
|
861
859
|
# A FieldInfo Struct contains details about a field's position in the data
|
862
860
|
# source it was read from. CSV will pass this Struct to some blocks that make
|
@@ -866,8 +864,9 @@ class CSV
|
|
866
864
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
867
865
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
868
866
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
867
|
+
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
869
868
|
#
|
870
|
-
FieldInfo = Struct.new(:index, :line, :header)
|
869
|
+
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
871
870
|
|
872
871
|
# A Regexp used to find and convert some common Date formats.
|
873
872
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
@@ -875,10 +874,9 @@ class CSV
|
|
875
874
|
# A Regexp used to find and convert some common DateTime formats.
|
876
875
|
DateTimeMatcher =
|
877
876
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
878
|
-
|
879
|
-
# ISO-8601
|
877
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse
|
880
878
|
\d{4}-\d{2}-\d{2}
|
881
|
-
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
879
|
+
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
882
880
|
)\z /x
|
883
881
|
|
884
882
|
# The encoding used by all converters.
|
@@ -1008,7 +1006,7 @@ class CSV
|
|
1008
1006
|
def instance(data = $stdout, **options)
|
1009
1007
|
# create a _signature_ for this method call, data object and options
|
1010
1008
|
sig = [data.object_id] +
|
1011
|
-
options.values_at(*DEFAULT_OPTIONS.keys
|
1009
|
+
options.values_at(*DEFAULT_OPTIONS.keys)
|
1012
1010
|
|
1013
1011
|
# fetch or create the instance for this signature
|
1014
1012
|
@@instances ||= Hash.new
|
@@ -1147,7 +1145,7 @@ class CSV
|
|
1147
1145
|
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1148
1146
|
#
|
1149
1147
|
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
|
1150
|
-
# parses from {ARGF}[
|
1148
|
+
# parses from {ARGF}[rdoc-ref:ARGF]
|
1151
1149
|
# and generates to STDOUT.
|
1152
1150
|
#
|
1153
1151
|
# Without headers:
|
@@ -1205,7 +1203,7 @@ class CSV
|
|
1205
1203
|
# parse options for input, output, or both
|
1206
1204
|
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
1207
1205
|
options.each do |key, value|
|
1208
|
-
case key
|
1206
|
+
case key
|
1209
1207
|
when /\Ain(?:put)?_(.+)\Z/
|
1210
1208
|
in_options[$1.to_sym] = value
|
1211
1209
|
when /\Aout(?:put)?_(.+)\Z/
|
@@ -1317,8 +1315,8 @@ class CSV
|
|
1317
1315
|
#
|
1318
1316
|
# Arguments:
|
1319
1317
|
# * Argument +path_or_io+ must be a file path or an \IO stream.
|
1320
|
-
# * Argument +mode+, if given, must be a \File mode
|
1321
|
-
# See {
|
1318
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1319
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1322
1320
|
# * Arguments <tt>**options</tt> must be keyword options.
|
1323
1321
|
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1324
1322
|
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
@@ -1468,6 +1466,46 @@ class CSV
|
|
1468
1466
|
(new(str, **options) << row).string
|
1469
1467
|
end
|
1470
1468
|
|
1469
|
+
# :call-seq:
|
1470
|
+
# CSV.generate_lines(rows)
|
1471
|
+
# CSV.generate_lines(rows, **options)
|
1472
|
+
#
|
1473
|
+
# Returns the \String created by generating \CSV from
|
1474
|
+
# using the specified +options+.
|
1475
|
+
#
|
1476
|
+
# Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
|
1477
|
+
#
|
1478
|
+
# Special options:
|
1479
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
|
1480
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1481
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1482
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1483
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
1484
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
1485
|
+
# this parameter as a backup plan.
|
1486
|
+
#
|
1487
|
+
# For other +options+,
|
1488
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1489
|
+
#
|
1490
|
+
# ---
|
1491
|
+
#
|
1492
|
+
# Returns the \String generated from an
|
1493
|
+
# CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
|
1494
|
+
#
|
1495
|
+
# ---
|
1496
|
+
#
|
1497
|
+
# Raises an exception
|
1498
|
+
# # Raises NoMethodError (undefined method `each' for :foo:Symbol)
|
1499
|
+
# CSV.generate_lines(:foo)
|
1500
|
+
#
|
1501
|
+
def generate_lines(rows, **options)
|
1502
|
+
self.generate(**options) do |csv|
|
1503
|
+
rows.each do |row|
|
1504
|
+
csv << row
|
1505
|
+
end
|
1506
|
+
end
|
1507
|
+
end
|
1508
|
+
|
1471
1509
|
#
|
1472
1510
|
# :call-seq:
|
1473
1511
|
# open(file_path, mode = "rb", **options ) -> new_csv
|
@@ -1484,8 +1522,8 @@ class CSV
|
|
1484
1522
|
#
|
1485
1523
|
# * Argument +path+, if given, must be the path to a file.
|
1486
1524
|
# :include: ../doc/csv/arguments/io.rdoc
|
1487
|
-
# * Argument +mode+, if given, must be a \File mode
|
1488
|
-
# See {
|
1525
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1526
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1489
1527
|
# * Arguments <tt>**options</tt> must be keyword options.
|
1490
1528
|
# See {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1491
1529
|
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
@@ -1893,8 +1931,19 @@ class CSV
|
|
1893
1931
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
1894
1932
|
|
1895
1933
|
if data.is_a?(String)
|
1934
|
+
if encoding
|
1935
|
+
if encoding.is_a?(String)
|
1936
|
+
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
1937
|
+
if data_internal_encoding
|
1938
|
+
data = data.encode(data_internal_encoding, data_external_encoding)
|
1939
|
+
else
|
1940
|
+
data = data.dup.force_encoding(data_external_encoding)
|
1941
|
+
end
|
1942
|
+
else
|
1943
|
+
data = data.dup.force_encoding(encoding)
|
1944
|
+
end
|
1945
|
+
end
|
1896
1946
|
@io = StringIO.new(data)
|
1897
|
-
@io.set_encoding(encoding || data.encoding)
|
1898
1947
|
else
|
1899
1948
|
@io = data
|
1900
1949
|
end
|
@@ -2503,7 +2552,13 @@ class CSV
|
|
2503
2552
|
# p row
|
2504
2553
|
# end
|
2505
2554
|
def each(&block)
|
2506
|
-
|
2555
|
+
return to_enum(__method__) unless block_given?
|
2556
|
+
begin
|
2557
|
+
while true
|
2558
|
+
yield(parser_enumerator.next)
|
2559
|
+
end
|
2560
|
+
rescue StopIteration
|
2561
|
+
end
|
2507
2562
|
end
|
2508
2563
|
|
2509
2564
|
# :call-seq:
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
8
8
|
- Kouhei Sutou
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-11-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -71,7 +71,7 @@ description: The CSV library provides a complete interface to CSV files and data
|
|
71
71
|
It offers tools to enable you to read and write to and from Strings or IO objects,
|
72
72
|
as needed.
|
73
73
|
email:
|
74
|
-
-
|
74
|
+
-
|
75
75
|
- kou@cozmixng.org
|
76
76
|
executables: []
|
77
77
|
extensions: []
|
@@ -116,10 +116,8 @@ files:
|
|
116
116
|
- lib/csv.rb
|
117
117
|
- lib/csv/core_ext/array.rb
|
118
118
|
- lib/csv/core_ext/string.rb
|
119
|
-
- lib/csv/delete_suffix.rb
|
120
119
|
- lib/csv/fields_converter.rb
|
121
120
|
- lib/csv/input_record_separator.rb
|
122
|
-
- lib/csv/match_p.rb
|
123
121
|
- lib/csv/parser.rb
|
124
122
|
- lib/csv/row.rb
|
125
123
|
- lib/csv/table.rb
|
@@ -130,7 +128,7 @@ licenses:
|
|
130
128
|
- Ruby
|
131
129
|
- BSD-2-Clause
|
132
130
|
metadata: {}
|
133
|
-
post_install_message:
|
131
|
+
post_install_message:
|
134
132
|
rdoc_options:
|
135
133
|
- "--main"
|
136
134
|
- README.md
|
@@ -147,8 +145,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
145
|
- !ruby/object:Gem::Version
|
148
146
|
version: '0'
|
149
147
|
requirements: []
|
150
|
-
rubygems_version: 3.
|
151
|
-
signing_key:
|
148
|
+
rubygems_version: 3.5.0.dev
|
149
|
+
signing_key:
|
152
150
|
specification_version: 4
|
153
151
|
summary: CSV Reading and Writing
|
154
152
|
test_files: []
|
data/lib/csv/delete_suffix.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#delete_suffix? for Ruby 2.4.
|
4
|
-
unless String.method_defined?(:delete_suffix)
|
5
|
-
class CSV
|
6
|
-
module DeleteSuffix
|
7
|
-
refine String do
|
8
|
-
def delete_suffix(suffix)
|
9
|
-
if end_with?(suffix)
|
10
|
-
self[0...-suffix.size]
|
11
|
-
else
|
12
|
-
self
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/csv/match_p.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
4
|
-
unless String.method_defined?(:match?)
|
5
|
-
class CSV
|
6
|
-
module MatchP
|
7
|
-
refine String do
|
8
|
-
def match?(pattern)
|
9
|
-
self =~ pattern
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
refine Regexp do
|
14
|
-
def match?(string)
|
15
|
-
self =~ string
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|