csv 3.0.4 → 3.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +62 -0
- data/lib/csv.rb +107 -26
- data/lib/csv/parser.rb +306 -87
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +3 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 277fe614e5fc3f2f8ad100a70c6021aea0b8c9f989a46aad6618f9c3e81e5baf
|
4
|
+
data.tar.gz: cb8aafb272a93788371dcc6992f61ea26a7a19844f3aabacb0e16fc9a3e54058
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 063bf8125079ad1c9f42f26990ab9a6a7a07100fecc2db71a792e5b89aa757decc571e1c39a70c8c8a2eb50475749bbc103985a0fa66a1e35762f7568d7a7f85
|
7
|
+
data.tar.gz: 7daef31a3902cba8dbc139a1d828d15493def81c95011f4da6ee69369dbdfdd3fb97a20a7d9a9f0cd1a8129669e27403f6426644e3b2f48e7fef592c22967dc0
|
data/NEWS.md
CHANGED
@@ -1,5 +1,67 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.0.5 - 2019-03-24
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Added `:liberal_parsing => {backslash_quote: true}` option.
|
8
|
+
[GitHub#74][Patch by 284km]
|
9
|
+
|
10
|
+
* Added `:write_converters` option.
|
11
|
+
[GitHub#73][Patch by Danillo Souza]
|
12
|
+
|
13
|
+
* Added `:write_nil_value` option.
|
14
|
+
|
15
|
+
* Added `:write_empty_value` option.
|
16
|
+
|
17
|
+
* Improved invalid byte line number detection.
|
18
|
+
[GitHub#78][Patch by Alyssa Ross]
|
19
|
+
|
20
|
+
* Added `quote_char: nil` optimization.
|
21
|
+
[GitHub#79][Patch by 284km]
|
22
|
+
|
23
|
+
* Improved error message.
|
24
|
+
[GitHub#81][Patch by Andrés Torres]
|
25
|
+
|
26
|
+
* Improved IO-like implementation for `StringIO` data.
|
27
|
+
[GitHub#80][Patch by Genadi Samokovarov]
|
28
|
+
|
29
|
+
* Added `:strip` option.
|
30
|
+
[GitHub#58]
|
31
|
+
|
32
|
+
### Fixes
|
33
|
+
|
34
|
+
* Fixed a compatibility bug that `CSV#each` doesn't care `CSV#shift`.
|
35
|
+
[GitHub#76][Patch by Alyssa Ross]
|
36
|
+
|
37
|
+
* Fixed a compatibility bug that `CSV#eof?` doesn't care `CSV#each`
|
38
|
+
and `CSV#shift`.
|
39
|
+
[GitHub#77][Reported by Chi Leung]
|
40
|
+
|
41
|
+
* Fixed a compatibility bug that invalid line isn't ignored.
|
42
|
+
[GitHub#82][Reported by krororo]
|
43
|
+
|
44
|
+
* Fixed a bug that `:skip_lines` doesn't work with multibyte characters data.
|
45
|
+
[GitHub#83][Reported by ff2248]
|
46
|
+
|
47
|
+
### Thanks
|
48
|
+
|
49
|
+
* Alyssa Ross
|
50
|
+
|
51
|
+
* 284km
|
52
|
+
|
53
|
+
* Chi Leung
|
54
|
+
|
55
|
+
* Danillo Souza
|
56
|
+
|
57
|
+
* Andrés Torres
|
58
|
+
|
59
|
+
* Genadi Samokovarov
|
60
|
+
|
61
|
+
* krororo
|
62
|
+
|
63
|
+
* ff2248
|
64
|
+
|
3
65
|
## 3.0.4 - 2019-01-25
|
4
66
|
|
5
67
|
### Improvements
|
data/lib/csv.rb
CHANGED
@@ -885,6 +885,10 @@ class CSV
|
|
885
885
|
# blank string field is replaced by
|
886
886
|
# the set object.
|
887
887
|
# <b><tt>:quote_empty</tt></b>:: TODO
|
888
|
+
# <b><tt>:write_converters</tt></b>:: TODO
|
889
|
+
# <b><tt>:write_nil_value</tt></b>:: TODO
|
890
|
+
# <b><tt>:write_empty_value</tt></b>:: TODO
|
891
|
+
# <b><tt>:strip</tt></b>:: TODO
|
888
892
|
#
|
889
893
|
# See CSV::DEFAULT_OPTIONS for the default settings.
|
890
894
|
#
|
@@ -911,7 +915,11 @@ class CSV
|
|
911
915
|
encoding: nil,
|
912
916
|
nil_value: nil,
|
913
917
|
empty_value: "",
|
914
|
-
quote_empty: true
|
918
|
+
quote_empty: true,
|
919
|
+
write_converters: nil,
|
920
|
+
write_nil_value: nil,
|
921
|
+
write_empty_value: "",
|
922
|
+
strip: false)
|
915
923
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
916
924
|
|
917
925
|
# create the IO object we will read from
|
@@ -922,8 +930,13 @@ class CSV
|
|
922
930
|
nil_value: nil_value,
|
923
931
|
empty_value: empty_value,
|
924
932
|
}
|
933
|
+
@write_fields_converter_options = {
|
934
|
+
nil_value: write_nil_value,
|
935
|
+
empty_value: write_empty_value,
|
936
|
+
}
|
925
937
|
@initial_converters = converters
|
926
938
|
@initial_header_converters = header_converters
|
939
|
+
@initial_write_converters = write_converters
|
927
940
|
|
928
941
|
@parser_options = {
|
929
942
|
column_separator: col_sep,
|
@@ -939,6 +952,7 @@ class CSV
|
|
939
952
|
encoding: @encoding,
|
940
953
|
nil_value: nil_value,
|
941
954
|
empty_value: empty_value,
|
955
|
+
strip: strip,
|
942
956
|
}
|
943
957
|
@parser = nil
|
944
958
|
|
@@ -998,7 +1012,7 @@ class CSV
|
|
998
1012
|
# as is.
|
999
1013
|
#
|
1000
1014
|
def converters
|
1001
|
-
|
1015
|
+
parser_fields_converter.map do |converter|
|
1002
1016
|
name = Converters.rassoc(converter)
|
1003
1017
|
name ? name.first : converter
|
1004
1018
|
end
|
@@ -1098,12 +1112,58 @@ class CSV
|
|
1098
1112
|
### IO and StringIO Delegation ###
|
1099
1113
|
|
1100
1114
|
extend Forwardable
|
1101
|
-
def_delegators :@io, :binmode, :
|
1102
|
-
:closed?, :
|
1103
|
-
:fileno, :
|
1104
|
-
:
|
1105
|
-
:seek, :
|
1106
|
-
:
|
1115
|
+
def_delegators :@io, :binmode, :close, :close_read, :close_write,
|
1116
|
+
:closed?, :external_encoding, :fcntl,
|
1117
|
+
:fileno, :flush, :fsync, :internal_encoding,
|
1118
|
+
:isatty, :pid, :pos, :pos=, :reopen,
|
1119
|
+
:seek, :string, :sync, :sync=, :tell,
|
1120
|
+
:truncate, :tty?
|
1121
|
+
|
1122
|
+
def binmode?
|
1123
|
+
if @io.respond_to?(:binmode?)
|
1124
|
+
@io.binmode?
|
1125
|
+
else
|
1126
|
+
false
|
1127
|
+
end
|
1128
|
+
end
|
1129
|
+
|
1130
|
+
def flock(*args)
|
1131
|
+
raise NotImplementedError unless @io.respond_to?(:flock)
|
1132
|
+
@io.flock(*args)
|
1133
|
+
end
|
1134
|
+
|
1135
|
+
def ioctl(*args)
|
1136
|
+
raise NotImplementedError unless @io.respond_to?(:ioctl)
|
1137
|
+
@io.ioctl(*args)
|
1138
|
+
end
|
1139
|
+
|
1140
|
+
def path
|
1141
|
+
@io.path if @io.respond_to?(:path)
|
1142
|
+
end
|
1143
|
+
|
1144
|
+
def stat(*args)
|
1145
|
+
raise NotImplementedError unless @io.respond_to?(:stat)
|
1146
|
+
@io.stat(*args)
|
1147
|
+
end
|
1148
|
+
|
1149
|
+
def to_i
|
1150
|
+
raise NotImplementedError unless @io.respond_to?(:to_i)
|
1151
|
+
@io.to_i
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
def to_io
|
1155
|
+
@io.respond_to?(:to_io) ? @io.to_io : @io
|
1156
|
+
end
|
1157
|
+
|
1158
|
+
def eof?
|
1159
|
+
begin
|
1160
|
+
parser_enumerator.peek
|
1161
|
+
false
|
1162
|
+
rescue StopIteration
|
1163
|
+
true
|
1164
|
+
end
|
1165
|
+
end
|
1166
|
+
alias_method :eof, :eof?
|
1107
1167
|
|
1108
1168
|
# Rewinds the underlying IO object and resets CSV's lineno() counter.
|
1109
1169
|
def rewind
|
@@ -1145,7 +1205,7 @@ class CSV
|
|
1145
1205
|
# converted field or the field itself.
|
1146
1206
|
#
|
1147
1207
|
def convert(name = nil, &converter)
|
1148
|
-
|
1208
|
+
parser_fields_converter.add_converter(name, &converter)
|
1149
1209
|
end
|
1150
1210
|
|
1151
1211
|
#
|
@@ -1172,8 +1232,16 @@ class CSV
|
|
1172
1232
|
#
|
1173
1233
|
# The data source must be open for reading.
|
1174
1234
|
#
|
1175
|
-
def each
|
1176
|
-
|
1235
|
+
def each
|
1236
|
+
return to_enum(__method__) unless block_given?
|
1237
|
+
enumerator = parser_enumerator
|
1238
|
+
begin
|
1239
|
+
while true
|
1240
|
+
yield enumerator.next
|
1241
|
+
end
|
1242
|
+
rescue StopIteration
|
1243
|
+
end
|
1244
|
+
self
|
1177
1245
|
end
|
1178
1246
|
|
1179
1247
|
#
|
@@ -1204,9 +1272,8 @@ class CSV
|
|
1204
1272
|
# The data source must be open for reading.
|
1205
1273
|
#
|
1206
1274
|
def shift
|
1207
|
-
@parser_enumerator ||= parser.parse
|
1208
1275
|
begin
|
1209
|
-
|
1276
|
+
parser_enumerator.next
|
1210
1277
|
rescue StopIteration
|
1211
1278
|
nil
|
1212
1279
|
end
|
@@ -1299,7 +1366,7 @@ class CSV
|
|
1299
1366
|
if headers
|
1300
1367
|
header_fields_converter.convert(fields, nil, 0)
|
1301
1368
|
else
|
1302
|
-
|
1369
|
+
parser_fields_converter.convert(fields, @headers, lineno)
|
1303
1370
|
end
|
1304
1371
|
end
|
1305
1372
|
|
@@ -1316,20 +1383,16 @@ class CSV
|
|
1316
1383
|
end
|
1317
1384
|
end
|
1318
1385
|
|
1319
|
-
def
|
1320
|
-
@
|
1386
|
+
def parser_fields_converter
|
1387
|
+
@parser_fields_converter ||= build_parser_fields_converter
|
1321
1388
|
end
|
1322
1389
|
|
1323
|
-
def
|
1390
|
+
def build_parser_fields_converter
|
1324
1391
|
specific_options = {
|
1325
1392
|
builtin_converters: Converters,
|
1326
1393
|
}
|
1327
1394
|
options = @base_fields_converter_options.merge(specific_options)
|
1328
|
-
|
1329
|
-
normalize_converters(@initial_converters).each do |name, converter|
|
1330
|
-
fields_converter.add_converter(name, &converter)
|
1331
|
-
end
|
1332
|
-
fields_converter
|
1395
|
+
build_fields_converter(@initial_converters, options)
|
1333
1396
|
end
|
1334
1397
|
|
1335
1398
|
def header_fields_converter
|
@@ -1342,8 +1405,21 @@ class CSV
|
|
1342
1405
|
accept_nil: true,
|
1343
1406
|
}
|
1344
1407
|
options = @base_fields_converter_options.merge(specific_options)
|
1408
|
+
build_fields_converter(@initial_header_converters, options)
|
1409
|
+
end
|
1410
|
+
|
1411
|
+
def writer_fields_converter
|
1412
|
+
@writer_fields_converter ||= build_writer_fields_converter
|
1413
|
+
end
|
1414
|
+
|
1415
|
+
def build_writer_fields_converter
|
1416
|
+
build_fields_converter(@initial_write_converters,
|
1417
|
+
@write_fields_converter_options)
|
1418
|
+
end
|
1419
|
+
|
1420
|
+
def build_fields_converter(initial_converters, options)
|
1345
1421
|
fields_converter = FieldsConverter.new(options)
|
1346
|
-
normalize_converters(
|
1422
|
+
normalize_converters(initial_converters).each do |name, converter|
|
1347
1423
|
fields_converter.add_converter(name, &converter)
|
1348
1424
|
end
|
1349
1425
|
fields_converter
|
@@ -1354,8 +1430,12 @@ class CSV
|
|
1354
1430
|
end
|
1355
1431
|
|
1356
1432
|
def parser_options
|
1357
|
-
@parser_options.merge(
|
1358
|
-
|
1433
|
+
@parser_options.merge(header_fields_converter: header_fields_converter,
|
1434
|
+
fields_converter: parser_fields_converter)
|
1435
|
+
end
|
1436
|
+
|
1437
|
+
def parser_enumerator
|
1438
|
+
@parser_enumerator ||= parser.parse
|
1359
1439
|
end
|
1360
1440
|
|
1361
1441
|
def writer
|
@@ -1363,7 +1443,8 @@ class CSV
|
|
1363
1443
|
end
|
1364
1444
|
|
1365
1445
|
def writer_options
|
1366
|
-
@writer_options.merge(header_fields_converter: header_fields_converter
|
1446
|
+
@writer_options.merge(header_fields_converter: header_fields_converter,
|
1447
|
+
fields_converter: writer_fields_converter)
|
1367
1448
|
end
|
1368
1449
|
end
|
1369
1450
|
|
data/lib/csv/parser.rb
CHANGED
@@ -49,6 +49,28 @@ class CSV
|
|
49
49
|
read_chunk
|
50
50
|
end
|
51
51
|
|
52
|
+
def each_line(row_separator)
|
53
|
+
buffer = nil
|
54
|
+
input = @scanner.rest
|
55
|
+
@scanner.terminate
|
56
|
+
while input
|
57
|
+
input.each_line(row_separator) do |line|
|
58
|
+
if buffer
|
59
|
+
buffer << line
|
60
|
+
line = buffer
|
61
|
+
buffer = nil
|
62
|
+
end
|
63
|
+
if line.end_with?(row_separator)
|
64
|
+
yield(line)
|
65
|
+
else
|
66
|
+
buffer = line
|
67
|
+
end
|
68
|
+
end
|
69
|
+
input = @inputs.shift
|
70
|
+
end
|
71
|
+
yield(buffer) if buffer
|
72
|
+
end
|
73
|
+
|
52
74
|
def scan(pattern)
|
53
75
|
value = @scanner.scan(pattern)
|
54
76
|
return value if @last_scanner
|
@@ -94,7 +116,7 @@ class CSV
|
|
94
116
|
start, buffer = @keeps.pop
|
95
117
|
if buffer
|
96
118
|
string = @scanner.string
|
97
|
-
keep = string
|
119
|
+
keep = string.byteslice(start, string.bytesize - start)
|
98
120
|
if keep and not keep.empty?
|
99
121
|
@inputs.unshift(StringIO.new(keep))
|
100
122
|
@last_scanner = false
|
@@ -121,7 +143,7 @@ class CSV
|
|
121
143
|
keep = @keeps.last
|
122
144
|
keep_start = keep[0]
|
123
145
|
string = @scanner.string
|
124
|
-
keep_data = string
|
146
|
+
keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
|
125
147
|
if keep_data
|
126
148
|
keep_buffer = keep[1]
|
127
149
|
if keep_buffer
|
@@ -170,7 +192,6 @@ class CSV
|
|
170
192
|
@input = input
|
171
193
|
@options = options
|
172
194
|
@samples = []
|
173
|
-
@parsed = false
|
174
195
|
|
175
196
|
prepare
|
176
197
|
end
|
@@ -230,9 +251,7 @@ class CSV
|
|
230
251
|
def parse(&block)
|
231
252
|
return to_enum(__method__) unless block_given?
|
232
253
|
|
233
|
-
|
234
|
-
|
235
|
-
if @return_headers and @headers
|
254
|
+
if @return_headers and @headers and @raw_headers
|
236
255
|
headers = Row.new(@headers, @raw_headers, true)
|
237
256
|
if @unconverted_fields
|
238
257
|
headers = add_unconverted_fields(headers, [])
|
@@ -240,58 +259,23 @@ class CSV
|
|
240
259
|
yield headers
|
241
260
|
end
|
242
261
|
|
243
|
-
row = []
|
244
262
|
begin
|
245
|
-
@scanner
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
@unquoted_column_value = false
|
251
|
-
value = parse_column_value
|
252
|
-
if value and @field_size_limit and value.size >= @field_size_limit
|
253
|
-
raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
|
254
|
-
end
|
255
|
-
if parse_column_end
|
256
|
-
row << value
|
257
|
-
elsif parse_row_end
|
258
|
-
if row.empty? and value.nil?
|
259
|
-
emit_row([], &block) unless @skip_blanks
|
260
|
-
else
|
261
|
-
row << value
|
262
|
-
emit_row(row, &block)
|
263
|
-
row = []
|
264
|
-
end
|
265
|
-
skip_needless_lines
|
266
|
-
start_row
|
267
|
-
elsif @scanner.eos?
|
268
|
-
break if row.empty? and value.nil?
|
269
|
-
row << value
|
270
|
-
emit_row(row, &block)
|
271
|
-
break
|
272
|
-
else
|
273
|
-
if @quoted_column_value
|
274
|
-
message = "Do not allow except col_sep_split_separator " +
|
275
|
-
"after quoted fields"
|
276
|
-
raise MalformedCSVError.new(message, @lineno + 1)
|
277
|
-
elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
|
278
|
-
message = "Unquoted fields do not allow \\r or \\n"
|
279
|
-
raise MalformedCSVError.new(message, @lineno + 1)
|
280
|
-
elsif @scanner.rest.start_with?(@quote_character)
|
281
|
-
message = "Illegal quoting"
|
282
|
-
raise MalformedCSVError.new(message, @lineno + 1)
|
283
|
-
else
|
284
|
-
raise MalformedCSVError.new("TODO: Meaningful message",
|
285
|
-
@lineno + 1)
|
286
|
-
end
|
287
|
-
end
|
263
|
+
@scanner ||= build_scanner
|
264
|
+
if quote_character.nil?
|
265
|
+
parse_no_quote(&block)
|
266
|
+
else
|
267
|
+
parse_quotable(&block)
|
288
268
|
end
|
289
269
|
rescue InvalidEncoding
|
270
|
+
if @scanner
|
271
|
+
ignore_broken_line
|
272
|
+
lineno = @lineno
|
273
|
+
else
|
274
|
+
lineno = @lineno + 1
|
275
|
+
end
|
290
276
|
message = "Invalid byte sequence in #{@encoding}"
|
291
|
-
raise MalformedCSVError.new(message,
|
277
|
+
raise MalformedCSVError.new(message, lineno)
|
292
278
|
end
|
293
|
-
|
294
|
-
@parsed = true
|
295
279
|
end
|
296
280
|
|
297
281
|
def use_headers?
|
@@ -301,7 +285,13 @@ class CSV
|
|
301
285
|
private
|
302
286
|
def prepare
|
303
287
|
prepare_variable
|
304
|
-
|
288
|
+
prepare_backslash
|
289
|
+
prepare_quote_character
|
290
|
+
prepare_skip_lines
|
291
|
+
prepare_strip
|
292
|
+
prepare_separators
|
293
|
+
prepare_quoted
|
294
|
+
prepare_unquoted
|
305
295
|
prepare_line
|
306
296
|
prepare_header
|
307
297
|
prepare_parser
|
@@ -315,11 +305,14 @@ class CSV
|
|
315
305
|
if liberal_parsing.is_a?(Hash)
|
316
306
|
@double_quote_outside_quote =
|
317
307
|
liberal_parsing[:double_quote_outside_quote]
|
308
|
+
@backslash_quote = liberal_parsing[:backslash_quote]
|
318
309
|
else
|
319
310
|
@double_quote_outside_quote = false
|
311
|
+
@backslash_quote = false
|
320
312
|
end
|
321
313
|
else
|
322
314
|
@liberal_parsing = false
|
315
|
+
@backslash_quote = false
|
323
316
|
end
|
324
317
|
@unconverted_fields = @options[:unconverted_fields]
|
325
318
|
@field_size_limit = @options[:field_size_limit]
|
@@ -328,20 +321,33 @@ class CSV
|
|
328
321
|
@header_fields_converter = @options[:header_fields_converter]
|
329
322
|
end
|
330
323
|
|
331
|
-
def
|
332
|
-
@
|
333
|
-
@row_separator =
|
334
|
-
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
335
|
-
@quote_character = @options[:quote_character].to_s.encode(@encoding)
|
336
|
-
if @quote_character.length != 1
|
337
|
-
raise ArgumentError, ":quote_char has to be a single character String"
|
338
|
-
end
|
324
|
+
def prepare_backslash
|
325
|
+
@backslash_character = "\\".encode(@encoding)
|
339
326
|
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
327
|
+
@escaped_backslash_character = Regexp.escape(@backslash_character)
|
328
|
+
@escaped_backslash = Regexp.new(@escaped_backslash_character)
|
329
|
+
end
|
330
|
+
|
331
|
+
def prepare_quote_character
|
332
|
+
@quote_character = @options[:quote_character]
|
333
|
+
if @quote_character.nil?
|
334
|
+
@escaped_quote_character = nil
|
335
|
+
@escaped_quote = nil
|
336
|
+
@backslash_quote_character = nil
|
337
|
+
else
|
338
|
+
@quote_character = @quote_character.to_s.encode(@encoding)
|
339
|
+
if @quote_character.length != 1
|
340
|
+
message = ":quote_char has to be nil or a single character String"
|
341
|
+
raise ArgumentError, message
|
342
|
+
end
|
343
|
+
@escaped_quote_character = Regexp.escape(@quote_character)
|
344
|
+
@escaped_quote = Regexp.new(@escaped_quote_character)
|
345
|
+
@backslash_quote_character =
|
346
|
+
@backslash_character + @escaped_quote_character
|
347
|
+
end
|
348
|
+
end
|
344
349
|
|
350
|
+
def prepare_skip_lines
|
345
351
|
skip_lines = @options[:skip_lines]
|
346
352
|
case skip_lines
|
347
353
|
when String
|
@@ -356,18 +362,56 @@ class CSV
|
|
356
362
|
end
|
357
363
|
@skip_lines = skip_lines
|
358
364
|
end
|
365
|
+
end
|
359
366
|
|
360
|
-
|
367
|
+
def prepare_strip
|
368
|
+
@strip = @options[:strip]
|
369
|
+
@escaped_strip = nil
|
370
|
+
@strip_value = nil
|
371
|
+
if @strip.is_a?(String)
|
372
|
+
case @strip.length
|
373
|
+
when 0
|
374
|
+
raise ArgumentError, ":strip must not be an empty String"
|
375
|
+
when 1
|
376
|
+
# ok
|
377
|
+
else
|
378
|
+
raise ArgumentError, ":strip doesn't support 2 or more characters yet"
|
379
|
+
end
|
380
|
+
@strip = @strip.encode(@encoding)
|
381
|
+
@escaped_strip = Regexp.escape(@strip)
|
382
|
+
if @quote_character
|
383
|
+
@strip_value = Regexp.new(@escaped_strip +
|
384
|
+
"+".encode(@encoding))
|
385
|
+
end
|
386
|
+
elsif @strip
|
387
|
+
strip_values = " \t\r\n\f\v"
|
388
|
+
@escaped_strip = strip_values.encode(@encoding)
|
389
|
+
if @quote_character
|
390
|
+
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
|
391
|
+
end
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
def prepare_separators
|
396
|
+
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
397
|
+
@row_separator =
|
398
|
+
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
399
|
+
|
400
|
+
@escaped_column_separator = Regexp.escape(@column_separator)
|
401
|
+
@escaped_first_column_separator = Regexp.escape(@column_separator[0])
|
402
|
+
@column_end = Regexp.new(@escaped_column_separator)
|
361
403
|
if @column_separator.size > 1
|
362
404
|
@column_ends = @column_separator.each_char.collect do |char|
|
363
405
|
Regexp.new(Regexp.escape(char))
|
364
406
|
end
|
365
|
-
@first_column_separators = Regexp.new(escaped_first_column_separator +
|
407
|
+
@first_column_separators = Regexp.new(@escaped_first_column_separator +
|
366
408
|
"+".encode(@encoding))
|
367
409
|
else
|
368
410
|
@column_ends = nil
|
369
411
|
@first_column_separators = nil
|
370
412
|
end
|
413
|
+
|
414
|
+
escaped_row_separator = Regexp.escape(@row_separator)
|
371
415
|
@row_end = Regexp.new(escaped_row_separator)
|
372
416
|
if @row_separator.size > 1
|
373
417
|
@row_ends = @row_separator.each_char.collect do |char|
|
@@ -376,25 +420,55 @@ class CSV
|
|
376
420
|
else
|
377
421
|
@row_ends = nil
|
378
422
|
end
|
379
|
-
|
380
|
-
"+".encode(@encoding))
|
381
|
-
@quoted_value = Regexp.new("[^".encode(@encoding) +
|
382
|
-
escaped_quote_character +
|
383
|
-
"]+".encode(@encoding))
|
384
|
-
if @liberal_parsing
|
385
|
-
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
386
|
-
escaped_first_column_separator +
|
387
|
-
"\r\n]+".encode(@encoding))
|
388
|
-
else
|
389
|
-
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
390
|
-
escaped_quote_character +
|
391
|
-
escaped_first_column_separator +
|
392
|
-
"\r\n]+".encode(@encoding))
|
393
|
-
end
|
423
|
+
|
394
424
|
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
|
395
425
|
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
396
426
|
end
|
397
427
|
|
428
|
+
def prepare_quoted
|
429
|
+
if @quote_character
|
430
|
+
@quotes = Regexp.new(@escaped_quote_character +
|
431
|
+
"+".encode(@encoding))
|
432
|
+
no_quoted_values = @escaped_quote_character.dup
|
433
|
+
if @backslash_quote
|
434
|
+
no_quoted_values << @escaped_backslash_character
|
435
|
+
end
|
436
|
+
@quoted_value = Regexp.new("[^".encode(@encoding) +
|
437
|
+
no_quoted_values +
|
438
|
+
"]+".encode(@encoding))
|
439
|
+
else
|
440
|
+
if @escaped_strip
|
441
|
+
@split_column_separator = Regexp.new(@escaped_strip +
|
442
|
+
"*".encode(@encoding) +
|
443
|
+
@escaped_column_separator +
|
444
|
+
@escaped_strip +
|
445
|
+
"*".encode(@encoding))
|
446
|
+
else
|
447
|
+
if @column_separator == " ".encode(@encoding)
|
448
|
+
@split_column_separator = @column_end
|
449
|
+
else
|
450
|
+
@split_column_separator = @column_separator
|
451
|
+
end
|
452
|
+
end
|
453
|
+
end
|
454
|
+
end
|
455
|
+
|
456
|
+
def prepare_unquoted
|
457
|
+
return if @quote_character.nil?
|
458
|
+
|
459
|
+
no_unquoted_values = "\r\n".encode(@encoding)
|
460
|
+
no_unquoted_values << @escaped_first_column_separator
|
461
|
+
unless @liberal_parsing
|
462
|
+
no_unquoted_values << @escaped_quote_character
|
463
|
+
end
|
464
|
+
if @escaped_strip
|
465
|
+
no_unquoted_values << @escaped_strip
|
466
|
+
end
|
467
|
+
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
468
|
+
no_unquoted_values +
|
469
|
+
"]+".encode(@encoding))
|
470
|
+
end
|
471
|
+
|
398
472
|
def resolve_row_separator(separator)
|
399
473
|
if separator == :auto
|
400
474
|
cr = "\r".encode(@encoding)
|
@@ -514,6 +588,8 @@ class CSV
|
|
514
588
|
end
|
515
589
|
|
516
590
|
def may_quoted?
|
591
|
+
return false if @quote_character.nil?
|
592
|
+
|
517
593
|
if @input.is_a?(StringIO)
|
518
594
|
sample = @input.string
|
519
595
|
else
|
@@ -534,6 +610,10 @@ class CSV
|
|
534
610
|
@io.gets(*args)
|
535
611
|
end
|
536
612
|
|
613
|
+
def each_line(*args, &block)
|
614
|
+
@io.each_line(*args, &block)
|
615
|
+
end
|
616
|
+
|
537
617
|
def eof?
|
538
618
|
@io.eof?
|
539
619
|
end
|
@@ -548,7 +628,10 @@ class CSV
|
|
548
628
|
else
|
549
629
|
inputs << @input
|
550
630
|
end
|
551
|
-
|
631
|
+
chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
|
632
|
+
InputsScanner.new(inputs,
|
633
|
+
@encoding,
|
634
|
+
chunk_size: Integer(chunk_size, 10))
|
552
635
|
end
|
553
636
|
else
|
554
637
|
def build_scanner
|
@@ -560,8 +643,13 @@ class CSV
|
|
560
643
|
end
|
561
644
|
if string
|
562
645
|
unless string.valid_encoding?
|
563
|
-
|
564
|
-
|
646
|
+
index = string.lines(@row_separator).index do |line|
|
647
|
+
!line.valid_encoding?
|
648
|
+
end
|
649
|
+
if index
|
650
|
+
message = "Invalid byte sequence in #{@encoding}"
|
651
|
+
raise MalformedCSVError.new(message, @lineno + index + 1)
|
652
|
+
end
|
565
653
|
end
|
566
654
|
Scanner.new(string)
|
567
655
|
else
|
@@ -582,6 +670,7 @@ class CSV
|
|
582
670
|
line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
|
583
671
|
line << @row_separator if parse_row_end
|
584
672
|
if skip_line?(line)
|
673
|
+
@lineno += 1
|
585
674
|
@scanner.keep_drop
|
586
675
|
else
|
587
676
|
@scanner.keep_back
|
@@ -601,6 +690,96 @@ class CSV
|
|
601
690
|
end
|
602
691
|
end
|
603
692
|
|
693
|
+
def parse_no_quote(&block)
|
694
|
+
if @scanner.respond_to?(:string)
|
695
|
+
scanner = @scanner.string
|
696
|
+
else
|
697
|
+
scanner = @scanner
|
698
|
+
end
|
699
|
+
scanner.each_line(@row_separator) do |value|
|
700
|
+
next if @skip_lines and skip_line?(value)
|
701
|
+
value.chomp!
|
702
|
+
|
703
|
+
if value.empty?
|
704
|
+
next if @skip_blanks
|
705
|
+
row = []
|
706
|
+
else
|
707
|
+
value = strip_value(value)
|
708
|
+
row = value.split(@split_column_separator, -1)
|
709
|
+
n_columns = row.size
|
710
|
+
i = 0
|
711
|
+
while i < n_columns
|
712
|
+
row[i] = nil if row[i].empty?
|
713
|
+
i += 1
|
714
|
+
end
|
715
|
+
end
|
716
|
+
@last_line = value
|
717
|
+
emit_row(row, &block)
|
718
|
+
end
|
719
|
+
end
|
720
|
+
|
721
|
+
def parse_quotable(&block)
|
722
|
+
row = []
|
723
|
+
skip_needless_lines
|
724
|
+
start_row
|
725
|
+
while true
|
726
|
+
@quoted_column_value = false
|
727
|
+
@unquoted_column_value = false
|
728
|
+
@scanner.scan_all(@strip_value) if @strip_value
|
729
|
+
value = parse_column_value
|
730
|
+
if value
|
731
|
+
@scanner.scan_all(@strip_value) if @strip_value
|
732
|
+
if @field_size_limit and value.size >= @field_size_limit
|
733
|
+
ignore_broken_line
|
734
|
+
raise MalformedCSVError.new("Field size exceeded", @lineno)
|
735
|
+
end
|
736
|
+
end
|
737
|
+
if parse_column_end
|
738
|
+
row << value
|
739
|
+
elsif parse_row_end
|
740
|
+
if row.empty? and value.nil?
|
741
|
+
emit_row([], &block) unless @skip_blanks
|
742
|
+
else
|
743
|
+
row << value
|
744
|
+
emit_row(row, &block)
|
745
|
+
row = []
|
746
|
+
end
|
747
|
+
skip_needless_lines
|
748
|
+
start_row
|
749
|
+
elsif @scanner.eos?
|
750
|
+
break if row.empty? and value.nil?
|
751
|
+
row << value
|
752
|
+
emit_row(row, &block)
|
753
|
+
break
|
754
|
+
else
|
755
|
+
if @quoted_column_value
|
756
|
+
ignore_broken_line
|
757
|
+
message = "Any value after quoted field isn't allowed"
|
758
|
+
raise MalformedCSVError.new(message, @lineno)
|
759
|
+
elsif @unquoted_column_value and
|
760
|
+
(new_line = @scanner.scan(@cr_or_lf))
|
761
|
+
ignore_broken_line
|
762
|
+
message = "Unquoted fields do not allow new line " +
|
763
|
+
"<#{new_line.inspect}>"
|
764
|
+
raise MalformedCSVError.new(message, @lineno)
|
765
|
+
elsif @scanner.rest.start_with?(@quote_character)
|
766
|
+
ignore_broken_line
|
767
|
+
message = "Illegal quoting"
|
768
|
+
raise MalformedCSVError.new(message, @lineno)
|
769
|
+
elsif (new_line = @scanner.scan(@cr_or_lf))
|
770
|
+
ignore_broken_line
|
771
|
+
message = "New line must be <#{@row_separator.inspect}> " +
|
772
|
+
"not <#{new_line.inspect}>"
|
773
|
+
raise MalformedCSVError.new(message, @lineno)
|
774
|
+
else
|
775
|
+
ignore_broken_line
|
776
|
+
raise MalformedCSVError.new("TODO: Meaningful message",
|
777
|
+
@lineno)
|
778
|
+
end
|
779
|
+
end
|
780
|
+
end
|
781
|
+
end
|
782
|
+
|
604
783
|
def parse_column_value
|
605
784
|
if @liberal_parsing
|
606
785
|
quoted_value = parse_quoted_column_value
|
@@ -651,6 +830,7 @@ class CSV
|
|
651
830
|
value << sub_value
|
652
831
|
end
|
653
832
|
end
|
833
|
+
value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
|
654
834
|
value
|
655
835
|
end
|
656
836
|
|
@@ -667,10 +847,22 @@ class CSV
|
|
667
847
|
while true
|
668
848
|
quoted_value = @scanner.scan_all(@quoted_value)
|
669
849
|
value << quoted_value if quoted_value
|
850
|
+
if @backslash_quote
|
851
|
+
if @scanner.scan(@escaped_backslash)
|
852
|
+
if @scanner.scan(@escaped_quote)
|
853
|
+
value << @quote_character
|
854
|
+
else
|
855
|
+
value << @backslash_character
|
856
|
+
end
|
857
|
+
next
|
858
|
+
end
|
859
|
+
end
|
860
|
+
|
670
861
|
quotes = @scanner.scan_all(@quotes)
|
671
862
|
unless quotes
|
863
|
+
ignore_broken_line
|
672
864
|
message = "Unclosed quoted field"
|
673
|
-
raise MalformedCSVError.new(message, @lineno
|
865
|
+
raise MalformedCSVError.new(message, @lineno)
|
674
866
|
end
|
675
867
|
n_quotes = quotes.size
|
676
868
|
if n_quotes == 1
|
@@ -713,6 +905,33 @@ class CSV
|
|
713
905
|
end
|
714
906
|
end
|
715
907
|
|
908
|
+
def strip_value(value)
|
909
|
+
return value unless @strip
|
910
|
+
return nil if value.nil?
|
911
|
+
|
912
|
+
case @strip
|
913
|
+
when String
|
914
|
+
size = value.size
|
915
|
+
while value.start_with?(@strip)
|
916
|
+
size -= 1
|
917
|
+
value = value[1, size]
|
918
|
+
end
|
919
|
+
while value.end_with?(@strip)
|
920
|
+
size -= 1
|
921
|
+
value = value[0, size]
|
922
|
+
end
|
923
|
+
else
|
924
|
+
value.strip!
|
925
|
+
end
|
926
|
+
value
|
927
|
+
end
|
928
|
+
|
929
|
+
def ignore_broken_line
|
930
|
+
@scanner.scan_all(@not_line_end)
|
931
|
+
@scanner.scan_all(@cr_or_lf)
|
932
|
+
@lineno += 1
|
933
|
+
end
|
934
|
+
|
716
935
|
def start_row
|
717
936
|
if @last_line
|
718
937
|
@last_line = nil
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
@@ -18,6 +18,7 @@ class CSV
|
|
18
18
|
if @options[:write_headers] and @headers
|
19
19
|
self << @headers
|
20
20
|
end
|
21
|
+
@fields_converter = @options[:fields_converter]
|
21
22
|
end
|
22
23
|
|
23
24
|
def <<(row)
|
@@ -31,6 +32,8 @@ class CSV
|
|
31
32
|
@headers ||= row if @use_headers
|
32
33
|
@lineno += 1
|
33
34
|
|
35
|
+
row = @fields_converter.convert(row, nil, lineno) if @fields_converter
|
36
|
+
|
34
37
|
converted_row = row.collect do |field|
|
35
38
|
quote(field)
|
36
39
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-03-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|