csv 3.2.2 → 3.2.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +139 -5
- data/doc/csv/options/generating/write_headers.rdoc +1 -1
- data/doc/csv/recipes/generating.rdoc +1 -1
- data/doc/csv/recipes/parsing.rdoc +1 -1
- data/lib/csv/fields_converter.rb +3 -2
- data/lib/csv/input_record_separator.rb +1 -14
- data/lib/csv/parser.rb +205 -88
- data/lib/csv/row.rb +1 -1
- data/lib/csv/table.rb +15 -6
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +5 -5
- data/lib/csv.rb +90 -14
- metadata +7 -9
- data/lib/csv/delete_suffix.rb +0 -18
- data/lib/csv/match_p.rb +0 -20
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48581eef7d2903fa52d36b48a8c1596396a5957b166c99ea8dcfd14ffb0dc221
|
4
|
+
data.tar.gz: fa6a5cdd9ade30c0a45f7974dcb43128d5e99cc7bf344e9e5b012993ad081ffe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 00f55443919b4ece138c025818a440ed4a6d0bed64917bcb6f6e810e318f4738f9e129371434c8711173e818b5b58e1d4b2ac2987612617922fda74f03bd3a4b
|
7
|
+
data.tar.gz: 0b83b3b64bf5287653054fa8ecfd4e345f5c41a2a0daedbdba355c4d034bc85c917cf87bd9ee1ede54475ec4e31caadaee54cc2553d529e9ba5b0bc1d5806ff0
|
data/NEWS.md
CHANGED
@@ -1,5 +1,144 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.2.6 - 2022-12-08
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `CSV#read` consumes the same lines with other methods like
|
8
|
+
`CSV#shift`.
|
9
|
+
[[GitHub#258](https://github.com/ruby/csv/issues/258)]
|
10
|
+
[Reported by Lhoussaine Ghallou]
|
11
|
+
|
12
|
+
* All `Enumerable` based methods consume the same lines with other
|
13
|
+
methods. This may have a performance penalty.
|
14
|
+
[[GitHub#260](https://github.com/ruby/csv/issues/260)]
|
15
|
+
[Reported by Lhoussaine Ghallou]
|
16
|
+
|
17
|
+
* Simplify some implementations.
|
18
|
+
[[GitHub#262](https://github.com/ruby/csv/pull/262)]
|
19
|
+
[[GitHub#263](https://github.com/ruby/csv/pull/263)]
|
20
|
+
[Patch by Mau Magnaguagno]
|
21
|
+
|
22
|
+
### Fixes
|
23
|
+
|
24
|
+
* Fixed `CSV.generate_lines` document.
|
25
|
+
[[GitHub#257](https://github.com/ruby/csv/pull/257)]
|
26
|
+
[Patch by Sampat Badhe]
|
27
|
+
|
28
|
+
### Thanks
|
29
|
+
|
30
|
+
* Sampat Badhe
|
31
|
+
|
32
|
+
* Lhoussaine Ghallou
|
33
|
+
|
34
|
+
* Mau Magnaguagno
|
35
|
+
|
36
|
+
## 3.2.5 - 2022-08-26
|
37
|
+
|
38
|
+
### Improvements
|
39
|
+
|
40
|
+
* Added `CSV.generate_lines`.
|
41
|
+
[[GitHub#255](https://github.com/ruby/csv/issues/255)]
|
42
|
+
[Reported by OKURA Masafumi]
|
43
|
+
[[GitHub#256](https://github.com/ruby/csv/pull/256)]
|
44
|
+
[Patch by Eriko Sugiyama]
|
45
|
+
|
46
|
+
### Thanks
|
47
|
+
|
48
|
+
* OKURA Masafumi
|
49
|
+
|
50
|
+
* Eriko Sugiyama
|
51
|
+
|
52
|
+
## 3.2.4 - 2022-08-22
|
53
|
+
|
54
|
+
### Improvements
|
55
|
+
|
56
|
+
* Cleaned up internal implementations.
|
57
|
+
[[GitHub#249](https://github.com/ruby/csv/pull/249)]
|
58
|
+
[[GitHub#250](https://github.com/ruby/csv/pull/250)]
|
59
|
+
[[GitHub#251](https://github.com/ruby/csv/pull/251)]
|
60
|
+
[Patch by Mau Magnaguagno]
|
61
|
+
|
62
|
+
* Added support for RFC 3339 style time.
|
63
|
+
[[GitHub#248](https://github.com/ruby/csv/pull/248)]
|
64
|
+
[Patch by Thierry Lambert]
|
65
|
+
|
66
|
+
* Added support for transcoding String CSV. Syntax is
|
67
|
+
`from-encoding:to-encoding`.
|
68
|
+
[[GitHub#254](https://github.com/ruby/csv/issues/254)]
|
69
|
+
[Reported by Richard Stueven]
|
70
|
+
|
71
|
+
* Added quoted information to `CSV::FieldInfo`.
|
72
|
+
[[GitHub#254](https://github.com/ruby/csv/pull/253)]
|
73
|
+
[Reported by Hirokazu SUZUKI]
|
74
|
+
|
75
|
+
### Fixes
|
76
|
+
|
77
|
+
* Fixed a link in documents.
|
78
|
+
[[GitHub#244](https://github.com/ruby/csv/pull/244)]
|
79
|
+
[Patch by Peter Zhu]
|
80
|
+
|
81
|
+
### Thanks
|
82
|
+
|
83
|
+
* Peter Zhu
|
84
|
+
|
85
|
+
* Mau Magnaguagno
|
86
|
+
|
87
|
+
* Thierry Lambert
|
88
|
+
|
89
|
+
* Richard Stueven
|
90
|
+
|
91
|
+
* Hirokazu SUZUKI
|
92
|
+
|
93
|
+
## 3.2.3 - 2022-04-09
|
94
|
+
|
95
|
+
### Improvements
|
96
|
+
|
97
|
+
* Added contents summary to `CSV::Table#inspect`.
|
98
|
+
[GitHub#229][Patch by Eriko Sugiyama]
|
99
|
+
[GitHub#235][Patch by Sampat Badhe]
|
100
|
+
|
101
|
+
* Suppressed `$INPUT_RECORD_SEPARATOR` deprecation warning by
|
102
|
+
`Warning.warn`.
|
103
|
+
[GitHub#233][Reported by Jean byroot Boussier]
|
104
|
+
|
105
|
+
* Improved error message for liberal parsing with quoted values.
|
106
|
+
[GitHub#231][Patch by Nikolay Rys]
|
107
|
+
|
108
|
+
* Fixed typos in documentation.
|
109
|
+
[GitHub#236][Patch by Sampat Badhe]
|
110
|
+
|
111
|
+
* Added `:max_field_size` option and deprecated `:field_size_limit` option.
|
112
|
+
[GitHub#238][Reported by Dan Buettner]
|
113
|
+
|
114
|
+
* Added `:symbol_raw` to built-in header converters.
|
115
|
+
[GitHub#237][Reported by taki]
|
116
|
+
[GitHub#239][Patch by Eriko Sugiyama]
|
117
|
+
|
118
|
+
### Fixes
|
119
|
+
|
120
|
+
* Fixed a bug that some texts may be dropped unexpectedly.
|
121
|
+
[Bug #18245][ruby-core:105587][Reported by Hassan Abdul Rehman]
|
122
|
+
|
123
|
+
* Fixed a bug that `:field_size_limit` doesn't work with not complex row.
|
124
|
+
[GitHub#238][Reported by Dan Buettner]
|
125
|
+
|
126
|
+
### Thanks
|
127
|
+
|
128
|
+
* Hassan Abdul Rehman
|
129
|
+
|
130
|
+
* Eriko Sugiyama
|
131
|
+
|
132
|
+
* Jean byroot Boussier
|
133
|
+
|
134
|
+
* Nikolay Rys
|
135
|
+
|
136
|
+
* Sampat Badhe
|
137
|
+
|
138
|
+
* Dan Buettner
|
139
|
+
|
140
|
+
* taki
|
141
|
+
|
3
142
|
## 3.2.2 - 2021-12-24
|
4
143
|
|
5
144
|
### Improvements
|
@@ -15,9 +154,6 @@
|
|
15
154
|
* Fixed a bug that all of `ARGF` contents may not be consumed.
|
16
155
|
[GitHub#228][Reported by Rafael Navaza]
|
17
156
|
|
18
|
-
* Fixed a bug that some texts may be dropped unexpectedly.
|
19
|
-
[Bug #18245][ruby-core:105587][Reported by Hassan Abdul Rehman]
|
20
|
-
|
21
157
|
### Thanks
|
22
158
|
|
23
159
|
* adamroyjones
|
@@ -26,8 +162,6 @@
|
|
26
162
|
|
27
163
|
* Rafael Navaza
|
28
164
|
|
29
|
-
* Hassan Abdul Rehman
|
30
|
-
|
31
165
|
## 3.2.1 - 2021-10-23
|
32
166
|
|
33
167
|
### Improvements
|
@@ -148,7 +148,7 @@ This example defines and uses a custom write converter to strip whitespace from
|
|
148
148
|
|
149
149
|
==== Recipe: Specify Multiple Write Converters
|
150
150
|
|
151
|
-
Use option <tt>:write_converters</tt> and multiple custom
|
151
|
+
Use option <tt>:write_converters</tt> and multiple custom converters
|
152
152
|
to convert field values when generating \CSV.
|
153
153
|
|
154
154
|
This example defines and uses two custom write converters to strip and upcase generated fields:
|
@@ -83,7 +83,7 @@ Use instance method CSV#each with option +headers+ to read a source \String one
|
|
83
83
|
CSV.new(string, headers: true).each do |row|
|
84
84
|
p row
|
85
85
|
end
|
86
|
-
|
86
|
+
Output:
|
87
87
|
#<CSV::Row "Name":"foo" "Value":"0">
|
88
88
|
#<CSV::Row "Name":"bar" "Value":"1">
|
89
89
|
#<CSV::Row "Name":"baz" "Value":"2">
|
data/lib/csv/fields_converter.rb
CHANGED
@@ -44,7 +44,7 @@ class CSV
|
|
44
44
|
@converters.empty?
|
45
45
|
end
|
46
46
|
|
47
|
-
def convert(fields, headers, lineno)
|
47
|
+
def convert(fields, headers, lineno, quoted_fields)
|
48
48
|
return fields unless need_convert?
|
49
49
|
|
50
50
|
fields.collect.with_index do |field, index|
|
@@ -63,7 +63,8 @@ class CSV
|
|
63
63
|
else
|
64
64
|
header = nil
|
65
65
|
end
|
66
|
-
|
66
|
+
quoted = quoted_fields[index]
|
67
|
+
field = converter[field, FieldInfo.new(index, lineno, header, quoted)]
|
67
68
|
end
|
68
69
|
break unless field.is_a?(String) # short-circuit pipeline for speed
|
69
70
|
end
|
@@ -4,20 +4,7 @@ require "stringio"
|
|
4
4
|
class CSV
|
5
5
|
module InputRecordSeparator
|
6
6
|
class << self
|
7
|
-
|
8
|
-
verbose, $VERBOSE = $VERBOSE, true
|
9
|
-
stderr, $stderr = $stderr, StringIO.new
|
10
|
-
input_record_separator = $INPUT_RECORD_SEPARATOR
|
11
|
-
begin
|
12
|
-
$INPUT_RECORD_SEPARATOR = "\r\n"
|
13
|
-
is_input_record_separator_deprecated = (not $stderr.string.empty?)
|
14
|
-
ensure
|
15
|
-
$INPUT_RECORD_SEPARATOR = input_record_separator
|
16
|
-
$stderr = stderr
|
17
|
-
$VERBOSE = verbose
|
18
|
-
end
|
19
|
-
|
20
|
-
if is_input_record_separator_deprecated
|
7
|
+
if RUBY_VERSION >= "3.0.0"
|
21
8
|
def value
|
22
9
|
"\n"
|
23
10
|
end
|
data/lib/csv/parser.rb
CHANGED
@@ -2,15 +2,10 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
|
-
require_relative "delete_suffix"
|
6
5
|
require_relative "input_record_separator"
|
7
|
-
require_relative "match_p"
|
8
6
|
require_relative "row"
|
9
7
|
require_relative "table"
|
10
8
|
|
11
|
-
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
12
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
13
|
-
|
14
9
|
class CSV
|
15
10
|
# Note: Don't use this class directly. This is an internal class.
|
16
11
|
class Parser
|
@@ -27,6 +22,10 @@ class CSV
|
|
27
22
|
class InvalidEncoding < StandardError
|
28
23
|
end
|
29
24
|
|
25
|
+
# Raised when unexpected case is happen.
|
26
|
+
class UnexpectedError < StandardError
|
27
|
+
end
|
28
|
+
|
30
29
|
#
|
31
30
|
# CSV::Scanner receives a CSV output, scans it and return the content.
|
32
31
|
# It also controls the life cycle of the object with its methods +keep_start+,
|
@@ -78,10 +77,10 @@ class CSV
|
|
78
77
|
# +keep_end+, +keep_back+, +keep_drop+.
|
79
78
|
#
|
80
79
|
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
81
|
-
# If there's a match, the scanner advances the
|
80
|
+
# If there's a match, the scanner advances the "scan pointer" and returns the matched string.
|
82
81
|
# Otherwise, the scanner returns nil.
|
83
82
|
#
|
84
|
-
# CSV::InputsScanner.rest() returns the
|
83
|
+
# CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer).
|
85
84
|
# If there is no more data (eos? = true), it returns "".
|
86
85
|
#
|
87
86
|
class InputsScanner
|
@@ -96,11 +95,13 @@ class CSV
|
|
96
95
|
end
|
97
96
|
|
98
97
|
def each_line(row_separator)
|
98
|
+
return enum_for(__method__, row_separator) unless block_given?
|
99
99
|
buffer = nil
|
100
100
|
input = @scanner.rest
|
101
101
|
position = @scanner.pos
|
102
102
|
offset = 0
|
103
103
|
n_row_separator_chars = row_separator.size
|
104
|
+
# trace(__method__, :start, line, input)
|
104
105
|
while true
|
105
106
|
input.each_line(row_separator) do |line|
|
106
107
|
@scanner.pos += line.bytesize
|
@@ -140,25 +141,28 @@ class CSV
|
|
140
141
|
end
|
141
142
|
|
142
143
|
def scan(pattern)
|
144
|
+
# trace(__method__, pattern, :start)
|
143
145
|
value = @scanner.scan(pattern)
|
146
|
+
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
144
147
|
return value if @last_scanner
|
145
148
|
|
146
|
-
if value
|
147
|
-
|
148
|
-
|
149
|
-
else
|
150
|
-
nil
|
151
|
-
end
|
149
|
+
read_chunk if value and @scanner.eos?
|
150
|
+
# trace(__method__, pattern, :done, value)
|
151
|
+
value
|
152
152
|
end
|
153
153
|
|
154
154
|
def scan_all(pattern)
|
155
|
+
# trace(__method__, pattern, :start)
|
155
156
|
value = @scanner.scan(pattern)
|
157
|
+
# trace(__method__, pattern, :done, :last, value) if @last_scanner
|
156
158
|
return value if @last_scanner
|
157
159
|
|
158
160
|
return nil if value.nil?
|
159
161
|
while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern))
|
162
|
+
# trace(__method__, pattern, :sub, sub_value)
|
160
163
|
value << sub_value
|
161
164
|
end
|
165
|
+
# trace(__method__, pattern, :done, value)
|
162
166
|
value
|
163
167
|
end
|
164
168
|
|
@@ -167,68 +171,126 @@ class CSV
|
|
167
171
|
end
|
168
172
|
|
169
173
|
def keep_start
|
170
|
-
|
174
|
+
# trace(__method__, :start)
|
175
|
+
adjust_last_keep
|
176
|
+
@keeps.push([@scanner, @scanner.pos, nil])
|
177
|
+
# trace(__method__, :done)
|
171
178
|
end
|
172
179
|
|
173
180
|
def keep_end
|
174
|
-
|
175
|
-
|
181
|
+
# trace(__method__, :start)
|
182
|
+
scanner, start, buffer = @keeps.pop
|
183
|
+
if scanner == @scanner
|
184
|
+
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
185
|
+
else
|
186
|
+
keep = @scanner.string.byteslice(0, @scanner.pos)
|
187
|
+
end
|
176
188
|
if buffer
|
177
189
|
buffer << keep
|
178
190
|
keep = buffer
|
179
191
|
end
|
192
|
+
# trace(__method__, :done, keep)
|
180
193
|
keep
|
181
194
|
end
|
182
195
|
|
183
196
|
def keep_back
|
184
|
-
|
197
|
+
# trace(__method__, :start)
|
198
|
+
scanner, start, buffer = @keeps.pop
|
185
199
|
if buffer
|
200
|
+
# trace(__method__, :rescan, start, buffer)
|
186
201
|
string = @scanner.string
|
187
|
-
|
202
|
+
if scanner == @scanner
|
203
|
+
keep = string.byteslice(start, string.bytesize - start)
|
204
|
+
else
|
205
|
+
keep = string
|
206
|
+
end
|
188
207
|
if keep and not keep.empty?
|
189
208
|
@inputs.unshift(StringIO.new(keep))
|
190
209
|
@last_scanner = false
|
191
210
|
end
|
192
211
|
@scanner = StringScanner.new(buffer)
|
193
212
|
else
|
213
|
+
if @scanner != scanner
|
214
|
+
message = "scanners are different but no buffer: "
|
215
|
+
message += "#{@scanner.inspect}(#{@scanner.object_id}): "
|
216
|
+
message += "#{scanner.inspect}(#{scanner.object_id})"
|
217
|
+
raise UnexpectedError, message
|
218
|
+
end
|
219
|
+
# trace(__method__, :repos, start, buffer)
|
194
220
|
@scanner.pos = start
|
195
221
|
end
|
196
222
|
read_chunk if @scanner.eos?
|
197
223
|
end
|
198
224
|
|
199
225
|
def keep_drop
|
200
|
-
@keeps.pop
|
226
|
+
_, _, buffer = @keeps.pop
|
227
|
+
# trace(__method__, :done, :empty) unless buffer
|
228
|
+
return unless buffer
|
229
|
+
|
230
|
+
last_keep = @keeps.last
|
231
|
+
# trace(__method__, :done, :no_last_keep) unless last_keep
|
232
|
+
return unless last_keep
|
233
|
+
|
234
|
+
if last_keep[2]
|
235
|
+
last_keep[2] << buffer
|
236
|
+
else
|
237
|
+
last_keep[2] = buffer
|
238
|
+
end
|
239
|
+
# trace(__method__, :done)
|
201
240
|
end
|
202
241
|
|
203
242
|
def rest
|
204
243
|
@scanner.rest
|
205
244
|
end
|
206
245
|
|
246
|
+
def check(pattern)
|
247
|
+
@scanner.check(pattern)
|
248
|
+
end
|
249
|
+
|
207
250
|
private
|
208
|
-
def
|
209
|
-
|
251
|
+
def trace(*args)
|
252
|
+
pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps])
|
253
|
+
end
|
210
254
|
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
255
|
+
def adjust_last_keep
|
256
|
+
# trace(__method__, :start)
|
257
|
+
|
258
|
+
keep = @keeps.last
|
259
|
+
# trace(__method__, :done, :empty) if keep.nil?
|
260
|
+
return if keep.nil?
|
261
|
+
|
262
|
+
scanner, start, buffer = keep
|
263
|
+
string = @scanner.string
|
264
|
+
if @scanner != scanner
|
265
|
+
start = 0
|
266
|
+
end
|
267
|
+
if start == 0 and @scanner.eos?
|
268
|
+
keep_data = string
|
269
|
+
else
|
270
|
+
keep_data = string.byteslice(start, @scanner.pos - start)
|
271
|
+
end
|
272
|
+
if keep_data
|
273
|
+
if buffer
|
274
|
+
buffer << keep_data
|
275
|
+
else
|
276
|
+
keep[2] = keep_data.dup
|
223
277
|
end
|
224
|
-
keep[0] = 0
|
225
278
|
end
|
226
279
|
|
280
|
+
# trace(__method__, :done)
|
281
|
+
end
|
282
|
+
|
283
|
+
def read_chunk
|
284
|
+
return false if @last_scanner
|
285
|
+
|
286
|
+
adjust_last_keep
|
287
|
+
|
227
288
|
input = @inputs.first
|
228
289
|
case input
|
229
290
|
when StringIO
|
230
291
|
string = input.read
|
231
292
|
raise InvalidEncoding unless string.valid_encoding?
|
293
|
+
# trace(__method__, :stringio, string)
|
232
294
|
@scanner = StringScanner.new(string)
|
233
295
|
@inputs.shift
|
234
296
|
@last_scanner = @inputs.empty?
|
@@ -237,6 +299,7 @@ class CSV
|
|
237
299
|
chunk = input.gets(@row_separator, @chunk_size)
|
238
300
|
if chunk
|
239
301
|
raise InvalidEncoding unless chunk.valid_encoding?
|
302
|
+
# trace(__method__, :chunk, chunk)
|
240
303
|
@scanner = StringScanner.new(chunk)
|
241
304
|
if input.respond_to?(:eof?) and input.eof?
|
242
305
|
@inputs.shift
|
@@ -244,6 +307,7 @@ class CSV
|
|
244
307
|
end
|
245
308
|
true
|
246
309
|
else
|
310
|
+
# trace(__method__, :no_chunk)
|
247
311
|
@scanner = StringScanner.new("".encode(@encoding))
|
248
312
|
@inputs.shift
|
249
313
|
@last_scanner = @inputs.empty?
|
@@ -278,7 +342,11 @@ class CSV
|
|
278
342
|
end
|
279
343
|
|
280
344
|
def field_size_limit
|
281
|
-
@
|
345
|
+
@max_field_size&.succ
|
346
|
+
end
|
347
|
+
|
348
|
+
def max_field_size
|
349
|
+
@max_field_size
|
282
350
|
end
|
283
351
|
|
284
352
|
def skip_lines
|
@@ -346,6 +414,16 @@ class CSV
|
|
346
414
|
end
|
347
415
|
message = "Invalid byte sequence in #{@encoding}"
|
348
416
|
raise MalformedCSVError.new(message, lineno)
|
417
|
+
rescue UnexpectedError => error
|
418
|
+
if @scanner
|
419
|
+
ignore_broken_line
|
420
|
+
lineno = @lineno
|
421
|
+
else
|
422
|
+
lineno = @lineno + 1
|
423
|
+
end
|
424
|
+
message = "This should not be happen: #{error.message}: "
|
425
|
+
message += "Please report this to https://github.com/ruby/csv/issues"
|
426
|
+
raise MalformedCSVError.new(message, lineno)
|
349
427
|
end
|
350
428
|
end
|
351
429
|
|
@@ -390,7 +468,7 @@ class CSV
|
|
390
468
|
@backslash_quote = false
|
391
469
|
end
|
392
470
|
@unconverted_fields = @options[:unconverted_fields]
|
393
|
-
@
|
471
|
+
@max_field_size = @options[:max_field_size]
|
394
472
|
@skip_blanks = @options[:skip_blanks]
|
395
473
|
@fields_converter = @options[:fields_converter]
|
396
474
|
@header_fields_converter = @options[:header_fields_converter]
|
@@ -680,9 +758,10 @@ class CSV
|
|
680
758
|
case headers
|
681
759
|
when Array
|
682
760
|
@raw_headers = headers
|
761
|
+
quoted_fields = [false] * @raw_headers.size
|
683
762
|
@use_headers = true
|
684
763
|
when String
|
685
|
-
@raw_headers = parse_headers(headers)
|
764
|
+
@raw_headers, quoted_fields = parse_headers(headers)
|
686
765
|
@use_headers = true
|
687
766
|
when nil, false
|
688
767
|
@raw_headers = nil
|
@@ -692,21 +771,28 @@ class CSV
|
|
692
771
|
@use_headers = true
|
693
772
|
end
|
694
773
|
if @raw_headers
|
695
|
-
@headers = adjust_headers(@raw_headers)
|
774
|
+
@headers = adjust_headers(@raw_headers, quoted_fields)
|
696
775
|
else
|
697
776
|
@headers = nil
|
698
777
|
end
|
699
778
|
end
|
700
779
|
|
701
780
|
def parse_headers(row)
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
781
|
+
quoted_fields = []
|
782
|
+
converter = lambda do |field, info|
|
783
|
+
quoted_fields << info.quoted?
|
784
|
+
field
|
785
|
+
end
|
786
|
+
headers = CSV.parse_line(row,
|
787
|
+
col_sep: @column_separator,
|
788
|
+
row_sep: @row_separator,
|
789
|
+
quote_char: @quote_character,
|
790
|
+
converters: [converter])
|
791
|
+
[headers, quoted_fields]
|
706
792
|
end
|
707
793
|
|
708
|
-
def adjust_headers(headers)
|
709
|
-
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno)
|
794
|
+
def adjust_headers(headers, quoted_fields)
|
795
|
+
adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields)
|
710
796
|
adjusted_headers.each {|h| h.freeze if h.is_a? String}
|
711
797
|
adjusted_headers
|
712
798
|
end
|
@@ -729,28 +815,28 @@ class CSV
|
|
729
815
|
sample[0, 128].index(@quote_character)
|
730
816
|
end
|
731
817
|
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
|
736
|
-
@io = StringIO.new(string, "rb:#{string.encoding}")
|
737
|
-
end
|
818
|
+
class UnoptimizedStringIO # :nodoc:
|
819
|
+
def initialize(string)
|
820
|
+
@io = StringIO.new(string, "rb:#{string.encoding}")
|
821
|
+
end
|
738
822
|
|
739
|
-
|
740
|
-
|
741
|
-
|
823
|
+
def gets(*args)
|
824
|
+
@io.gets(*args)
|
825
|
+
end
|
742
826
|
|
743
|
-
|
744
|
-
|
745
|
-
|
827
|
+
def each_line(*args, &block)
|
828
|
+
@io.each_line(*args, &block)
|
829
|
+
end
|
746
830
|
|
747
|
-
|
748
|
-
|
749
|
-
end
|
831
|
+
def eof?
|
832
|
+
@io.eof?
|
750
833
|
end
|
834
|
+
end
|
751
835
|
|
752
|
-
|
753
|
-
|
836
|
+
SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes")
|
837
|
+
if SCANNER_TEST
|
838
|
+
SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"
|
839
|
+
SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
|
754
840
|
def build_scanner
|
755
841
|
inputs = @samples.collect do |sample|
|
756
842
|
UnoptimizedStringIO.new(sample)
|
@@ -760,10 +846,17 @@ class CSV
|
|
760
846
|
else
|
761
847
|
inputs << @input
|
762
848
|
end
|
849
|
+
begin
|
850
|
+
chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME]
|
851
|
+
rescue # Ractor::IsolationError
|
852
|
+
# Ractor on Ruby 3.0 can't read ENV value.
|
853
|
+
chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE
|
854
|
+
end
|
855
|
+
chunk_size = Integer((chunk_size_value || "1"), 10)
|
763
856
|
InputsScanner.new(inputs,
|
764
857
|
@encoding,
|
765
858
|
@row_separator,
|
766
|
-
chunk_size:
|
859
|
+
chunk_size: chunk_size)
|
767
860
|
end
|
768
861
|
else
|
769
862
|
def build_scanner
|
@@ -826,6 +919,14 @@ class CSV
|
|
826
919
|
end
|
827
920
|
end
|
828
921
|
|
922
|
+
def validate_field_size(field)
|
923
|
+
return unless @max_field_size
|
924
|
+
return if field.size <= @max_field_size
|
925
|
+
ignore_broken_line
|
926
|
+
message = "Field size exceeded: #{field.size} > #{@max_field_size}"
|
927
|
+
raise MalformedCSVError.new(message, @lineno)
|
928
|
+
end
|
929
|
+
|
829
930
|
def parse_no_quote(&block)
|
830
931
|
@scanner.each_line(@row_separator) do |line|
|
831
932
|
next if @skip_lines and skip_line?(line)
|
@@ -835,9 +936,16 @@ class CSV
|
|
835
936
|
if line.empty?
|
836
937
|
next if @skip_blanks
|
837
938
|
row = []
|
939
|
+
quoted_fields = []
|
838
940
|
else
|
839
941
|
line = strip_value(line)
|
840
942
|
row = line.split(@split_column_separator, -1)
|
943
|
+
quoted_fields = [false] * row.size
|
944
|
+
if @max_field_size
|
945
|
+
row.each do |column|
|
946
|
+
validate_field_size(column)
|
947
|
+
end
|
948
|
+
end
|
841
949
|
n_columns = row.size
|
842
950
|
i = 0
|
843
951
|
while i < n_columns
|
@@ -846,7 +954,7 @@ class CSV
|
|
846
954
|
end
|
847
955
|
end
|
848
956
|
@last_line = original_line
|
849
|
-
emit_row(row, &block)
|
957
|
+
emit_row(row, quoted_fields, &block)
|
850
958
|
end
|
851
959
|
end
|
852
960
|
|
@@ -868,31 +976,37 @@ class CSV
|
|
868
976
|
next
|
869
977
|
end
|
870
978
|
row = []
|
979
|
+
quoted_fields = []
|
871
980
|
elsif line.include?(@cr) or line.include?(@lf)
|
872
981
|
@scanner.keep_back
|
873
982
|
@need_robust_parsing = true
|
874
983
|
return parse_quotable_robust(&block)
|
875
984
|
else
|
876
985
|
row = line.split(@split_column_separator, -1)
|
986
|
+
quoted_fields = []
|
877
987
|
n_columns = row.size
|
878
988
|
i = 0
|
879
989
|
while i < n_columns
|
880
990
|
column = row[i]
|
881
991
|
if column.empty?
|
992
|
+
quoted_fields << false
|
882
993
|
row[i] = nil
|
883
994
|
else
|
884
995
|
n_quotes = column.count(@quote_character)
|
885
996
|
if n_quotes.zero?
|
997
|
+
quoted_fields << false
|
886
998
|
# no quote
|
887
999
|
elsif n_quotes == 2 and
|
888
1000
|
column.start_with?(@quote_character) and
|
889
1001
|
column.end_with?(@quote_character)
|
1002
|
+
quoted_fields << true
|
890
1003
|
row[i] = column[1..-2]
|
891
1004
|
else
|
892
1005
|
@scanner.keep_back
|
893
1006
|
@need_robust_parsing = true
|
894
1007
|
return parse_quotable_robust(&block)
|
895
1008
|
end
|
1009
|
+
validate_field_size(row[i])
|
896
1010
|
end
|
897
1011
|
i += 1
|
898
1012
|
end
|
@@ -900,13 +1014,14 @@ class CSV
|
|
900
1014
|
@scanner.keep_drop
|
901
1015
|
@scanner.keep_start
|
902
1016
|
@last_line = original_line
|
903
|
-
emit_row(row, &block)
|
1017
|
+
emit_row(row, quoted_fields, &block)
|
904
1018
|
end
|
905
1019
|
@scanner.keep_drop
|
906
1020
|
end
|
907
1021
|
|
908
1022
|
def parse_quotable_robust(&block)
|
909
1023
|
row = []
|
1024
|
+
quoted_fields = []
|
910
1025
|
skip_needless_lines
|
911
1026
|
start_row
|
912
1027
|
while true
|
@@ -916,32 +1031,39 @@ class CSV
|
|
916
1031
|
value = parse_column_value
|
917
1032
|
if value
|
918
1033
|
@scanner.scan_all(@strip_value) if @strip_value
|
919
|
-
|
920
|
-
ignore_broken_line
|
921
|
-
raise MalformedCSVError.new("Field size exceeded", @lineno)
|
922
|
-
end
|
1034
|
+
validate_field_size(value)
|
923
1035
|
end
|
924
1036
|
if parse_column_end
|
925
1037
|
row << value
|
1038
|
+
quoted_fields << @quoted_column_value
|
926
1039
|
elsif parse_row_end
|
927
1040
|
if row.empty? and value.nil?
|
928
|
-
emit_row([], &block) unless @skip_blanks
|
1041
|
+
emit_row([], [], &block) unless @skip_blanks
|
929
1042
|
else
|
930
1043
|
row << value
|
931
|
-
|
1044
|
+
quoted_fields << @quoted_column_value
|
1045
|
+
emit_row(row, quoted_fields, &block)
|
932
1046
|
row = []
|
1047
|
+
quoted_fields = []
|
933
1048
|
end
|
934
1049
|
skip_needless_lines
|
935
1050
|
start_row
|
936
1051
|
elsif @scanner.eos?
|
937
1052
|
break if row.empty? and value.nil?
|
938
1053
|
row << value
|
939
|
-
|
1054
|
+
quoted_fields << @quoted_column_value
|
1055
|
+
emit_row(row, quoted_fields, &block)
|
940
1056
|
break
|
941
1057
|
else
|
942
1058
|
if @quoted_column_value
|
1059
|
+
if liberal_parsing? and (new_line = @scanner.check(@line_end))
|
1060
|
+
message =
|
1061
|
+
"Illegal end-of-line sequence outside of a quoted field " +
|
1062
|
+
"<#{new_line.inspect}>"
|
1063
|
+
else
|
1064
|
+
message = "Any value after quoted field isn't allowed"
|
1065
|
+
end
|
943
1066
|
ignore_broken_line
|
944
|
-
message = "Any value after quoted field isn't allowed"
|
945
1067
|
raise MalformedCSVError.new(message, @lineno)
|
946
1068
|
elsif @unquoted_column_value and
|
947
1069
|
(new_line = @scanner.scan(@line_end))
|
@@ -1034,7 +1156,7 @@ class CSV
|
|
1034
1156
|
if (n_quotes % 2).zero?
|
1035
1157
|
quotes[0, (n_quotes - 2) / 2]
|
1036
1158
|
else
|
1037
|
-
value = quotes[0,
|
1159
|
+
value = quotes[0, n_quotes / 2]
|
1038
1160
|
while true
|
1039
1161
|
quoted_value = @scanner.scan_all(@quoted_value)
|
1040
1162
|
value << quoted_value if quoted_value
|
@@ -1058,11 +1180,9 @@ class CSV
|
|
1058
1180
|
n_quotes = quotes.size
|
1059
1181
|
if n_quotes == 1
|
1060
1182
|
break
|
1061
|
-
elsif (n_quotes % 2) == 1
|
1062
|
-
value << quotes[0, (n_quotes - 1) / 2]
|
1063
|
-
break
|
1064
1183
|
else
|
1065
1184
|
value << quotes[0, n_quotes / 2]
|
1185
|
+
break if (n_quotes % 2) == 1
|
1066
1186
|
end
|
1067
1187
|
end
|
1068
1188
|
value
|
@@ -1098,18 +1218,15 @@ class CSV
|
|
1098
1218
|
|
1099
1219
|
def strip_value(value)
|
1100
1220
|
return value unless @strip
|
1101
|
-
return
|
1221
|
+
return value if value.nil?
|
1102
1222
|
|
1103
1223
|
case @strip
|
1104
1224
|
when String
|
1105
|
-
|
1106
|
-
|
1107
|
-
size -= 1
|
1108
|
-
value = value[1, size]
|
1225
|
+
while value.delete_prefix!(@strip)
|
1226
|
+
# do nothing
|
1109
1227
|
end
|
1110
|
-
while value.
|
1111
|
-
|
1112
|
-
value = value[0, size]
|
1228
|
+
while value.delete_suffix!(@strip)
|
1229
|
+
# do nothing
|
1113
1230
|
end
|
1114
1231
|
else
|
1115
1232
|
value.strip!
|
@@ -1132,22 +1249,22 @@ class CSV
|
|
1132
1249
|
@scanner.keep_start
|
1133
1250
|
end
|
1134
1251
|
|
1135
|
-
def emit_row(row, &block)
|
1252
|
+
def emit_row(row, quoted_fields, &block)
|
1136
1253
|
@lineno += 1
|
1137
1254
|
|
1138
1255
|
raw_row = row
|
1139
1256
|
if @use_headers
|
1140
1257
|
if @headers.nil?
|
1141
|
-
@headers = adjust_headers(row)
|
1258
|
+
@headers = adjust_headers(row, quoted_fields)
|
1142
1259
|
return unless @return_headers
|
1143
1260
|
row = Row.new(@headers, row, true)
|
1144
1261
|
else
|
1145
1262
|
row = Row.new(@headers,
|
1146
|
-
@fields_converter.convert(raw_row, @headers, @lineno))
|
1263
|
+
@fields_converter.convert(raw_row, @headers, @lineno, quoted_fields))
|
1147
1264
|
end
|
1148
1265
|
else
|
1149
1266
|
# convert fields, if needed...
|
1150
|
-
row = @fields_converter.convert(raw_row, nil, @lineno)
|
1267
|
+
row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields)
|
1151
1268
|
end
|
1152
1269
|
|
1153
1270
|
# inject unconverted fields and accessor, if requested...
|
data/lib/csv/row.rb
CHANGED
@@ -703,7 +703,7 @@ class CSV
|
|
703
703
|
# by +index_or_header+ and +specifiers+.
|
704
704
|
#
|
705
705
|
# The nested objects may be instances of various classes.
|
706
|
-
# See {Dig Methods}[https://docs.ruby-lang.org/en/master/
|
706
|
+
# See {Dig Methods}[https://docs.ruby-lang.org/en/master/dig_methods_rdoc.html].
|
707
707
|
#
|
708
708
|
# Examples:
|
709
709
|
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
data/lib/csv/table.rb
CHANGED
@@ -890,9 +890,8 @@ class CSV
|
|
890
890
|
if @mode == :row or @mode == :col_or_row # by index
|
891
891
|
@table.delete_if(&block)
|
892
892
|
else # by header
|
893
|
-
deleted = []
|
894
893
|
headers.each do |header|
|
895
|
-
|
894
|
+
delete(header) if yield([header, self[header]])
|
896
895
|
end
|
897
896
|
end
|
898
897
|
|
@@ -999,9 +998,15 @@ class CSV
|
|
999
998
|
# Omits the headers if option +write_headers+ is given as +false+
|
1000
999
|
# (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]):
|
1001
1000
|
# table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n"
|
1002
|
-
|
1001
|
+
#
|
1002
|
+
# Limit rows if option +limit+ is given like +2+:
|
1003
|
+
# table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n"
|
1004
|
+
def to_csv(write_headers: true, limit: nil, **options)
|
1003
1005
|
array = write_headers ? [headers.to_csv(**options)] : []
|
1004
|
-
@table.
|
1006
|
+
limit ||= @table.size
|
1007
|
+
limit = @table.size + 1 + limit if limit < 0
|
1008
|
+
limit = 0 if limit < 0
|
1009
|
+
@table.first(limit).each do |row|
|
1005
1010
|
array.push(row.fields.to_csv(**options)) unless row.header_row?
|
1006
1011
|
end
|
1007
1012
|
|
@@ -1038,9 +1043,13 @@ class CSV
|
|
1038
1043
|
# Example:
|
1039
1044
|
# source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1040
1045
|
# table = CSV.parse(source, headers: true)
|
1041
|
-
# table.inspect # => "#<CSV::Table mode:col_or_row row_count:4
|
1046
|
+
# table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1047
|
+
#
|
1042
1048
|
def inspect
|
1043
|
-
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
1049
|
+
inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>"
|
1050
|
+
summary = to_csv(limit: 5)
|
1051
|
+
inspected << "\n" << summary if summary.encoding.ascii_compatible?
|
1052
|
+
inspected
|
1044
1053
|
end
|
1045
1054
|
end
|
1046
1055
|
end
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
@@ -1,11 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative "input_record_separator"
|
4
|
-
require_relative "match_p"
|
5
4
|
require_relative "row"
|
6
5
|
|
7
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
8
|
-
|
9
6
|
class CSV
|
10
7
|
# Note: Don't use this class directly. This is an internal class.
|
11
8
|
class Writer
|
@@ -42,7 +39,10 @@ class CSV
|
|
42
39
|
@headers ||= row if @use_headers
|
43
40
|
@lineno += 1
|
44
41
|
|
45
|
-
|
42
|
+
if @fields_converter
|
43
|
+
quoted_fields = [false] * row.size
|
44
|
+
row = @fields_converter.convert(row, nil, lineno, quoted_fields)
|
45
|
+
end
|
46
46
|
|
47
47
|
i = -1
|
48
48
|
converted_row = row.collect do |field|
|
@@ -97,7 +97,7 @@ class CSV
|
|
97
97
|
return unless @headers
|
98
98
|
|
99
99
|
converter = @options[:header_fields_converter]
|
100
|
-
@headers = converter.convert(@headers, nil, 0)
|
100
|
+
@headers = converter.convert(@headers, nil, 0, [])
|
101
101
|
@headers.each do |header|
|
102
102
|
header.freeze if header.is_a?(String)
|
103
103
|
end
|
data/lib/csv.rb
CHANGED
@@ -95,14 +95,11 @@ require "stringio"
|
|
95
95
|
|
96
96
|
require_relative "csv/fields_converter"
|
97
97
|
require_relative "csv/input_record_separator"
|
98
|
-
require_relative "csv/match_p"
|
99
98
|
require_relative "csv/parser"
|
100
99
|
require_relative "csv/row"
|
101
100
|
require_relative "csv/table"
|
102
101
|
require_relative "csv/writer"
|
103
102
|
|
104
|
-
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
105
|
-
|
106
103
|
# == \CSV
|
107
104
|
#
|
108
105
|
# === In a Hurry?
|
@@ -357,7 +354,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP)
|
|
357
354
|
# - +row_sep+: Specifies the row separator; used to delimit rows.
|
358
355
|
# - +col_sep+: Specifies the column separator; used to delimit fields.
|
359
356
|
# - +quote_char+: Specifies the quote character; used to quote fields.
|
360
|
-
# - +field_size_limit+: Specifies the maximum field size allowed.
|
357
|
+
# - +field_size_limit+: Specifies the maximum field size + 1 allowed.
|
358
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
359
|
+
# - +max_field_size+: Specifies the maximum field size allowed.
|
361
360
|
# - +converters+: Specifies the field converters to be used.
|
362
361
|
# - +unconverted_fields+: Specifies whether unconverted fields are to be available.
|
363
362
|
# - +headers+: Specifies whether data contains headers,
|
@@ -864,8 +863,9 @@ class CSV
|
|
864
863
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
865
864
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
866
865
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
866
|
+
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
867
867
|
#
|
868
|
-
FieldInfo = Struct.new(:index, :line, :header)
|
868
|
+
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
869
869
|
|
870
870
|
# A Regexp used to find and convert some common Date formats.
|
871
871
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
@@ -873,10 +873,9 @@ class CSV
|
|
873
873
|
# A Regexp used to find and convert some common DateTime formats.
|
874
874
|
DateTimeMatcher =
|
875
875
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
876
|
-
|
877
|
-
# ISO-8601
|
876
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse
|
878
877
|
\d{4}-\d{2}-\d{2}
|
879
|
-
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
878
|
+
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
880
879
|
)\z /x
|
881
880
|
|
882
881
|
# The encoding used by all converters.
|
@@ -926,7 +925,8 @@ class CSV
|
|
926
925
|
symbol: lambda { |h|
|
927
926
|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
|
928
927
|
gsub(/\s+/, "_").to_sym
|
929
|
-
}
|
928
|
+
},
|
929
|
+
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
|
930
930
|
}
|
931
931
|
|
932
932
|
# Default values for method options.
|
@@ -937,6 +937,7 @@ class CSV
|
|
937
937
|
quote_char: '"',
|
938
938
|
# For parsing.
|
939
939
|
field_size_limit: nil,
|
940
|
+
max_field_size: nil,
|
940
941
|
converters: nil,
|
941
942
|
unconverted_fields: nil,
|
942
943
|
headers: false,
|
@@ -1004,7 +1005,7 @@ class CSV
|
|
1004
1005
|
def instance(data = $stdout, **options)
|
1005
1006
|
# create a _signature_ for this method call, data object and options
|
1006
1007
|
sig = [data.object_id] +
|
1007
|
-
options.values_at(*DEFAULT_OPTIONS.keys
|
1008
|
+
options.values_at(*DEFAULT_OPTIONS.keys)
|
1008
1009
|
|
1009
1010
|
# fetch or create the instance for this signature
|
1010
1011
|
@@instances ||= Hash.new
|
@@ -1201,7 +1202,7 @@ class CSV
|
|
1201
1202
|
# parse options for input, output, or both
|
1202
1203
|
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
1203
1204
|
options.each do |key, value|
|
1204
|
-
case key
|
1205
|
+
case key
|
1205
1206
|
when /\Ain(?:put)?_(.+)\Z/
|
1206
1207
|
in_options[$1.to_sym] = value
|
1207
1208
|
when /\Aout(?:put)?_(.+)\Z/
|
@@ -1464,6 +1465,46 @@ class CSV
|
|
1464
1465
|
(new(str, **options) << row).string
|
1465
1466
|
end
|
1466
1467
|
|
1468
|
+
# :call-seq:
|
1469
|
+
# CSV.generate_lines(rows)
|
1470
|
+
# CSV.generate_lines(rows, **options)
|
1471
|
+
#
|
1472
|
+
# Returns the \String created by generating \CSV from
|
1473
|
+
# using the specified +options+.
|
1474
|
+
#
|
1475
|
+
# Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
|
1476
|
+
#
|
1477
|
+
# Special options:
|
1478
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
|
1479
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1480
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1481
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1482
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
1483
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
1484
|
+
# this parameter as a backup plan.
|
1485
|
+
#
|
1486
|
+
# For other +options+,
|
1487
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1488
|
+
#
|
1489
|
+
# ---
|
1490
|
+
#
|
1491
|
+
# Returns the \String generated from an
|
1492
|
+
# CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
|
1493
|
+
#
|
1494
|
+
# ---
|
1495
|
+
#
|
1496
|
+
# Raises an exception
|
1497
|
+
# # Raises NoMethodError (undefined method `each' for :foo:Symbol)
|
1498
|
+
# CSV.generate_lines(:foo)
|
1499
|
+
#
|
1500
|
+
def generate_lines(rows, **options)
|
1501
|
+
self.generate(**options) do |csv|
|
1502
|
+
rows.each do |row|
|
1503
|
+
csv << row
|
1504
|
+
end
|
1505
|
+
end
|
1506
|
+
end
|
1507
|
+
|
1467
1508
|
#
|
1468
1509
|
# :call-seq:
|
1469
1510
|
# open(file_path, mode = "rb", **options ) -> new_csv
|
@@ -1865,6 +1906,7 @@ class CSV
|
|
1865
1906
|
row_sep: :auto,
|
1866
1907
|
quote_char: '"',
|
1867
1908
|
field_size_limit: nil,
|
1909
|
+
max_field_size: nil,
|
1868
1910
|
converters: nil,
|
1869
1911
|
unconverted_fields: nil,
|
1870
1912
|
headers: false,
|
@@ -1888,8 +1930,19 @@ class CSV
|
|
1888
1930
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
1889
1931
|
|
1890
1932
|
if data.is_a?(String)
|
1933
|
+
if encoding
|
1934
|
+
if encoding.is_a?(String)
|
1935
|
+
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
1936
|
+
if data_internal_encoding
|
1937
|
+
data = data.encode(data_internal_encoding, data_external_encoding)
|
1938
|
+
else
|
1939
|
+
data = data.dup.force_encoding(data_external_encoding)
|
1940
|
+
end
|
1941
|
+
else
|
1942
|
+
data = data.dup.force_encoding(encoding)
|
1943
|
+
end
|
1944
|
+
end
|
1891
1945
|
@io = StringIO.new(data)
|
1892
|
-
@io.set_encoding(encoding || data.encoding)
|
1893
1946
|
else
|
1894
1947
|
@io = data
|
1895
1948
|
end
|
@@ -1907,11 +1960,14 @@ class CSV
|
|
1907
1960
|
@initial_header_converters = header_converters
|
1908
1961
|
@initial_write_converters = write_converters
|
1909
1962
|
|
1963
|
+
if max_field_size.nil? and field_size_limit
|
1964
|
+
max_field_size = field_size_limit - 1
|
1965
|
+
end
|
1910
1966
|
@parser_options = {
|
1911
1967
|
column_separator: col_sep,
|
1912
1968
|
row_separator: row_sep,
|
1913
1969
|
quote_character: quote_char,
|
1914
|
-
|
1970
|
+
max_field_size: max_field_size,
|
1915
1971
|
unconverted_fields: unconverted_fields,
|
1916
1972
|
headers: headers,
|
1917
1973
|
return_headers: return_headers,
|
@@ -1979,10 +2035,24 @@ class CSV
|
|
1979
2035
|
# Returns the limit for field size; used for parsing;
|
1980
2036
|
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
|
1981
2037
|
# CSV.new('').field_size_limit # => nil
|
2038
|
+
#
|
2039
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
1982
2040
|
def field_size_limit
|
1983
2041
|
parser.field_size_limit
|
1984
2042
|
end
|
1985
2043
|
|
2044
|
+
# :call-seq:
|
2045
|
+
# csv.max_field_size -> integer or nil
|
2046
|
+
#
|
2047
|
+
# Returns the limit for field size; used for parsing;
|
2048
|
+
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
|
2049
|
+
# CSV.new('').max_field_size # => nil
|
2050
|
+
#
|
2051
|
+
# Since 3.2.3.
|
2052
|
+
def max_field_size
|
2053
|
+
parser.max_field_size
|
2054
|
+
end
|
2055
|
+
|
1986
2056
|
# :call-seq:
|
1987
2057
|
# csv.skip_lines -> regexp or nil
|
1988
2058
|
#
|
@@ -2481,7 +2551,13 @@ class CSV
|
|
2481
2551
|
# p row
|
2482
2552
|
# end
|
2483
2553
|
def each(&block)
|
2484
|
-
|
2554
|
+
return to_enum(__method__) unless block_given?
|
2555
|
+
begin
|
2556
|
+
while true
|
2557
|
+
yield(parser_enumerator.next)
|
2558
|
+
end
|
2559
|
+
rescue StopIteration
|
2560
|
+
end
|
2485
2561
|
end
|
2486
2562
|
|
2487
2563
|
# :call-seq:
|
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.2.
|
4
|
+
version: 3.2.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
8
8
|
- Kouhei Sutou
|
9
|
-
autorequire:
|
9
|
+
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2022-12-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -71,7 +71,7 @@ description: The CSV library provides a complete interface to CSV files and data
|
|
71
71
|
It offers tools to enable you to read and write to and from Strings or IO objects,
|
72
72
|
as needed.
|
73
73
|
email:
|
74
|
-
-
|
74
|
+
-
|
75
75
|
- kou@cozmixng.org
|
76
76
|
executables: []
|
77
77
|
extensions: []
|
@@ -116,10 +116,8 @@ files:
|
|
116
116
|
- lib/csv.rb
|
117
117
|
- lib/csv/core_ext/array.rb
|
118
118
|
- lib/csv/core_ext/string.rb
|
119
|
-
- lib/csv/delete_suffix.rb
|
120
119
|
- lib/csv/fields_converter.rb
|
121
120
|
- lib/csv/input_record_separator.rb
|
122
|
-
- lib/csv/match_p.rb
|
123
121
|
- lib/csv/parser.rb
|
124
122
|
- lib/csv/row.rb
|
125
123
|
- lib/csv/table.rb
|
@@ -130,7 +128,7 @@ licenses:
|
|
130
128
|
- Ruby
|
131
129
|
- BSD-2-Clause
|
132
130
|
metadata: {}
|
133
|
-
post_install_message:
|
131
|
+
post_install_message:
|
134
132
|
rdoc_options:
|
135
133
|
- "--main"
|
136
134
|
- README.md
|
@@ -147,8 +145,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
147
145
|
- !ruby/object:Gem::Version
|
148
146
|
version: '0'
|
149
147
|
requirements: []
|
150
|
-
rubygems_version: 3.
|
151
|
-
signing_key:
|
148
|
+
rubygems_version: 3.4.0.dev
|
149
|
+
signing_key:
|
152
150
|
specification_version: 4
|
153
151
|
summary: CSV Reading and Writing
|
154
152
|
test_files: []
|
data/lib/csv/delete_suffix.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#delete_suffix? for Ruby 2.4.
|
4
|
-
unless String.method_defined?(:delete_suffix)
|
5
|
-
class CSV
|
6
|
-
module DeleteSuffix
|
7
|
-
refine String do
|
8
|
-
def delete_suffix(suffix)
|
9
|
-
if end_with?(suffix)
|
10
|
-
self[0...-suffix.size]
|
11
|
-
else
|
12
|
-
self
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
data/lib/csv/match_p.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
4
|
-
unless String.method_defined?(:match?)
|
5
|
-
class CSV
|
6
|
-
module MatchP
|
7
|
-
refine String do
|
8
|
-
def match?(pattern)
|
9
|
-
self =~ pattern
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
refine Regexp do
|
14
|
-
def match?(string)
|
15
|
-
self =~ string
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|