csv 3.0.4 → 3.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0c308f4cca10d6b48a2268e992fa545918c6cbe99837b83ec048b55eb76b702
4
- data.tar.gz: 8661eb5e276abdeb5e939e3c2359d6ee991199550cfaa8e623496946fad17b8f
3
+ metadata.gz: 277fe614e5fc3f2f8ad100a70c6021aea0b8c9f989a46aad6618f9c3e81e5baf
4
+ data.tar.gz: cb8aafb272a93788371dcc6992f61ea26a7a19844f3aabacb0e16fc9a3e54058
5
5
  SHA512:
6
- metadata.gz: 5875a4b446bd22fbdb0ab1a829576e219b64aa80a66f27e73ae487ed7fa6fb09429f8f3185ab8b08c07ee30a49204b98acd1d7a72a97c9ec89234d79dc597553
7
- data.tar.gz: bda1a607b00312b0e4bc0e2d444670d5f9661251ad73cb2061baeb695cb264af0a06f3bf44fc4d00d6406a1b61f0d4bbdfb8e6fd036d276f51fd30fec4f179e3
6
+ metadata.gz: 063bf8125079ad1c9f42f26990ab9a6a7a07100fecc2db71a792e5b89aa757decc571e1c39a70c8c8a2eb50475749bbc103985a0fa66a1e35762f7568d7a7f85
7
+ data.tar.gz: 7daef31a3902cba8dbc139a1d828d15493def81c95011f4da6ee69369dbdfdd3fb97a20a7d9a9f0cd1a8129669e27403f6426644e3b2f48e7fef592c22967dc0
data/NEWS.md CHANGED
@@ -1,5 +1,67 @@
1
1
  # News
2
2
 
3
+ ## 3.0.5 - 2019-03-24
4
+
5
+ ### Improvements
6
+
7
+ * Added `:liberal_parsing => {backslash_quote: true}` option.
8
+ [GitHub#74][Patch by 284km]
9
+
10
+ * Added `:write_converters` option.
11
+ [GitHub#73][Patch by Danillo Souza]
12
+
13
+ * Added `:write_nil_value` option.
14
+
15
+ * Added `:write_empty_value` option.
16
+
17
+ * Improved invalid byte line number detection.
18
+ [GitHub#78][Patch by Alyssa Ross]
19
+
20
+ * Added `quote_char: nil` optimization.
21
+ [GitHub#79][Patch by 284km]
22
+
23
+ * Improved error message.
24
+ [GitHub#81][Patch by Andrés Torres]
25
+
26
+ * Improved IO-like implementation for `StringIO` data.
27
+ [GitHub#80][Patch by Genadi Samokovarov]
28
+
29
+ * Added `:strip` option.
30
+ [GitHub#58]
31
+
32
+ ### Fixes
33
+
34
+ * Fixed a compatibility bug that `CSV#each` doesn't care `CSV#shift`.
35
+ [GitHub#76][Patch by Alyssa Ross]
36
+
37
+ * Fixed a compatibility bug that `CSV#eof?` doesn't care `CSV#each`
38
+ and `CSV#shift`.
39
+ [GitHub#77][Reported by Chi Leung]
40
+
41
+ * Fixed a compatibility bug that invalid line isn't ignored.
42
+ [GitHub#82][Reported by krororo]
43
+
44
+ * Fixed a bug that `:skip_lines` doesn't work with multibyte characters data.
45
+ [GitHub#83][Reported by ff2248]
46
+
47
+ ### Thanks
48
+
49
+ * Alyssa Ross
50
+
51
+ * 284km
52
+
53
+ * Chi Leung
54
+
55
+ * Danillo Souza
56
+
57
+ * Andrés Torres
58
+
59
+ * Genadi Samokovarov
60
+
61
+ * krororo
62
+
63
+ * ff2248
64
+
3
65
  ## 3.0.4 - 2019-01-25
4
66
 
5
67
  ### Improvements
data/lib/csv.rb CHANGED
@@ -885,6 +885,10 @@ class CSV
885
885
  # blank string field is replaced by
886
886
  # the set object.
887
887
  # <b><tt>:quote_empty</tt></b>:: TODO
888
+ # <b><tt>:write_converters</tt></b>:: TODO
889
+ # <b><tt>:write_nil_value</tt></b>:: TODO
890
+ # <b><tt>:write_empty_value</tt></b>:: TODO
891
+ # <b><tt>:strip</tt></b>:: TODO
888
892
  #
889
893
  # See CSV::DEFAULT_OPTIONS for the default settings.
890
894
  #
@@ -911,7 +915,11 @@ class CSV
911
915
  encoding: nil,
912
916
  nil_value: nil,
913
917
  empty_value: "",
914
- quote_empty: true)
918
+ quote_empty: true,
919
+ write_converters: nil,
920
+ write_nil_value: nil,
921
+ write_empty_value: "",
922
+ strip: false)
915
923
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
916
924
 
917
925
  # create the IO object we will read from
@@ -922,8 +930,13 @@ class CSV
922
930
  nil_value: nil_value,
923
931
  empty_value: empty_value,
924
932
  }
933
+ @write_fields_converter_options = {
934
+ nil_value: write_nil_value,
935
+ empty_value: write_empty_value,
936
+ }
925
937
  @initial_converters = converters
926
938
  @initial_header_converters = header_converters
939
+ @initial_write_converters = write_converters
927
940
 
928
941
  @parser_options = {
929
942
  column_separator: col_sep,
@@ -939,6 +952,7 @@ class CSV
939
952
  encoding: @encoding,
940
953
  nil_value: nil_value,
941
954
  empty_value: empty_value,
955
+ strip: strip,
942
956
  }
943
957
  @parser = nil
944
958
 
@@ -998,7 +1012,7 @@ class CSV
998
1012
  # as is.
999
1013
  #
1000
1014
  def converters
1001
- fields_converter.map do |converter|
1015
+ parser_fields_converter.map do |converter|
1002
1016
  name = Converters.rassoc(converter)
1003
1017
  name ? name.first : converter
1004
1018
  end
@@ -1098,12 +1112,58 @@ class CSV
1098
1112
  ### IO and StringIO Delegation ###
1099
1113
 
1100
1114
  extend Forwardable
1101
- def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
1102
- :closed?, :eof, :eof?, :external_encoding, :fcntl,
1103
- :fileno, :flock, :flush, :fsync, :internal_encoding,
1104
- :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
1105
- :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
1106
- :to_io, :truncate, :tty?
1115
+ def_delegators :@io, :binmode, :close, :close_read, :close_write,
1116
+ :closed?, :external_encoding, :fcntl,
1117
+ :fileno, :flush, :fsync, :internal_encoding,
1118
+ :isatty, :pid, :pos, :pos=, :reopen,
1119
+ :seek, :string, :sync, :sync=, :tell,
1120
+ :truncate, :tty?
1121
+
1122
+ def binmode?
1123
+ if @io.respond_to?(:binmode?)
1124
+ @io.binmode?
1125
+ else
1126
+ false
1127
+ end
1128
+ end
1129
+
1130
+ def flock(*args)
1131
+ raise NotImplementedError unless @io.respond_to?(:flock)
1132
+ @io.flock(*args)
1133
+ end
1134
+
1135
+ def ioctl(*args)
1136
+ raise NotImplementedError unless @io.respond_to?(:ioctl)
1137
+ @io.ioctl(*args)
1138
+ end
1139
+
1140
+ def path
1141
+ @io.path if @io.respond_to?(:path)
1142
+ end
1143
+
1144
+ def stat(*args)
1145
+ raise NotImplementedError unless @io.respond_to?(:stat)
1146
+ @io.stat(*args)
1147
+ end
1148
+
1149
+ def to_i
1150
+ raise NotImplementedError unless @io.respond_to?(:to_i)
1151
+ @io.to_i
1152
+ end
1153
+
1154
+ def to_io
1155
+ @io.respond_to?(:to_io) ? @io.to_io : @io
1156
+ end
1157
+
1158
+ def eof?
1159
+ begin
1160
+ parser_enumerator.peek
1161
+ false
1162
+ rescue StopIteration
1163
+ true
1164
+ end
1165
+ end
1166
+ alias_method :eof, :eof?
1107
1167
 
1108
1168
  # Rewinds the underlying IO object and resets CSV's lineno() counter.
1109
1169
  def rewind
@@ -1145,7 +1205,7 @@ class CSV
1145
1205
  # converted field or the field itself.
1146
1206
  #
1147
1207
  def convert(name = nil, &converter)
1148
- fields_converter.add_converter(name, &converter)
1208
+ parser_fields_converter.add_converter(name, &converter)
1149
1209
  end
1150
1210
 
1151
1211
  #
@@ -1172,8 +1232,16 @@ class CSV
1172
1232
  #
1173
1233
  # The data source must be open for reading.
1174
1234
  #
1175
- def each(&block)
1176
- parser.parse(&block)
1235
+ def each
1236
+ return to_enum(__method__) unless block_given?
1237
+ enumerator = parser_enumerator
1238
+ begin
1239
+ while true
1240
+ yield enumerator.next
1241
+ end
1242
+ rescue StopIteration
1243
+ end
1244
+ self
1177
1245
  end
1178
1246
 
1179
1247
  #
@@ -1204,9 +1272,8 @@ class CSV
1204
1272
  # The data source must be open for reading.
1205
1273
  #
1206
1274
  def shift
1207
- @parser_enumerator ||= parser.parse
1208
1275
  begin
1209
- @parser_enumerator.next
1276
+ parser_enumerator.next
1210
1277
  rescue StopIteration
1211
1278
  nil
1212
1279
  end
@@ -1299,7 +1366,7 @@ class CSV
1299
1366
  if headers
1300
1367
  header_fields_converter.convert(fields, nil, 0)
1301
1368
  else
1302
- fields_converter.convert(fields, @headers, lineno)
1369
+ parser_fields_converter.convert(fields, @headers, lineno)
1303
1370
  end
1304
1371
  end
1305
1372
 
@@ -1316,20 +1383,16 @@ class CSV
1316
1383
  end
1317
1384
  end
1318
1385
 
1319
- def fields_converter
1320
- @fields_converter ||= build_fields_converter
1386
+ def parser_fields_converter
1387
+ @parser_fields_converter ||= build_parser_fields_converter
1321
1388
  end
1322
1389
 
1323
- def build_fields_converter
1390
+ def build_parser_fields_converter
1324
1391
  specific_options = {
1325
1392
  builtin_converters: Converters,
1326
1393
  }
1327
1394
  options = @base_fields_converter_options.merge(specific_options)
1328
- fields_converter = FieldsConverter.new(options)
1329
- normalize_converters(@initial_converters).each do |name, converter|
1330
- fields_converter.add_converter(name, &converter)
1331
- end
1332
- fields_converter
1395
+ build_fields_converter(@initial_converters, options)
1333
1396
  end
1334
1397
 
1335
1398
  def header_fields_converter
@@ -1342,8 +1405,21 @@ class CSV
1342
1405
  accept_nil: true,
1343
1406
  }
1344
1407
  options = @base_fields_converter_options.merge(specific_options)
1408
+ build_fields_converter(@initial_header_converters, options)
1409
+ end
1410
+
1411
+ def writer_fields_converter
1412
+ @writer_fields_converter ||= build_writer_fields_converter
1413
+ end
1414
+
1415
+ def build_writer_fields_converter
1416
+ build_fields_converter(@initial_write_converters,
1417
+ @write_fields_converter_options)
1418
+ end
1419
+
1420
+ def build_fields_converter(initial_converters, options)
1345
1421
  fields_converter = FieldsConverter.new(options)
1346
- normalize_converters(@initial_header_converters).each do |name, converter|
1422
+ normalize_converters(initial_converters).each do |name, converter|
1347
1423
  fields_converter.add_converter(name, &converter)
1348
1424
  end
1349
1425
  fields_converter
@@ -1354,8 +1430,12 @@ class CSV
1354
1430
  end
1355
1431
 
1356
1432
  def parser_options
1357
- @parser_options.merge(fields_converter: fields_converter,
1358
- header_fields_converter: header_fields_converter)
1433
+ @parser_options.merge(header_fields_converter: header_fields_converter,
1434
+ fields_converter: parser_fields_converter)
1435
+ end
1436
+
1437
+ def parser_enumerator
1438
+ @parser_enumerator ||= parser.parse
1359
1439
  end
1360
1440
 
1361
1441
  def writer
@@ -1363,7 +1443,8 @@ class CSV
1363
1443
  end
1364
1444
 
1365
1445
  def writer_options
1366
- @writer_options.merge(header_fields_converter: header_fields_converter)
1446
+ @writer_options.merge(header_fields_converter: header_fields_converter,
1447
+ fields_converter: writer_fields_converter)
1367
1448
  end
1368
1449
  end
1369
1450
 
@@ -49,6 +49,28 @@ class CSV
49
49
  read_chunk
50
50
  end
51
51
 
52
+ def each_line(row_separator)
53
+ buffer = nil
54
+ input = @scanner.rest
55
+ @scanner.terminate
56
+ while input
57
+ input.each_line(row_separator) do |line|
58
+ if buffer
59
+ buffer << line
60
+ line = buffer
61
+ buffer = nil
62
+ end
63
+ if line.end_with?(row_separator)
64
+ yield(line)
65
+ else
66
+ buffer = line
67
+ end
68
+ end
69
+ input = @inputs.shift
70
+ end
71
+ yield(buffer) if buffer
72
+ end
73
+
52
74
  def scan(pattern)
53
75
  value = @scanner.scan(pattern)
54
76
  return value if @last_scanner
@@ -94,7 +116,7 @@ class CSV
94
116
  start, buffer = @keeps.pop
95
117
  if buffer
96
118
  string = @scanner.string
97
- keep = string[start, string.size - start]
119
+ keep = string.byteslice(start, string.bytesize - start)
98
120
  if keep and not keep.empty?
99
121
  @inputs.unshift(StringIO.new(keep))
100
122
  @last_scanner = false
@@ -121,7 +143,7 @@ class CSV
121
143
  keep = @keeps.last
122
144
  keep_start = keep[0]
123
145
  string = @scanner.string
124
- keep_data = string[keep_start, @scanner.pos - keep_start]
146
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
125
147
  if keep_data
126
148
  keep_buffer = keep[1]
127
149
  if keep_buffer
@@ -170,7 +192,6 @@ class CSV
170
192
  @input = input
171
193
  @options = options
172
194
  @samples = []
173
- @parsed = false
174
195
 
175
196
  prepare
176
197
  end
@@ -230,9 +251,7 @@ class CSV
230
251
  def parse(&block)
231
252
  return to_enum(__method__) unless block_given?
232
253
 
233
- return if @parsed
234
-
235
- if @return_headers and @headers
254
+ if @return_headers and @headers and @raw_headers
236
255
  headers = Row.new(@headers, @raw_headers, true)
237
256
  if @unconverted_fields
238
257
  headers = add_unconverted_fields(headers, [])
@@ -240,58 +259,23 @@ class CSV
240
259
  yield headers
241
260
  end
242
261
 
243
- row = []
244
262
  begin
245
- @scanner = build_scanner
246
- skip_needless_lines
247
- start_row
248
- while true
249
- @quoted_column_value = false
250
- @unquoted_column_value = false
251
- value = parse_column_value
252
- if value and @field_size_limit and value.size >= @field_size_limit
253
- raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
254
- end
255
- if parse_column_end
256
- row << value
257
- elsif parse_row_end
258
- if row.empty? and value.nil?
259
- emit_row([], &block) unless @skip_blanks
260
- else
261
- row << value
262
- emit_row(row, &block)
263
- row = []
264
- end
265
- skip_needless_lines
266
- start_row
267
- elsif @scanner.eos?
268
- break if row.empty? and value.nil?
269
- row << value
270
- emit_row(row, &block)
271
- break
272
- else
273
- if @quoted_column_value
274
- message = "Do not allow except col_sep_split_separator " +
275
- "after quoted fields"
276
- raise MalformedCSVError.new(message, @lineno + 1)
277
- elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
278
- message = "Unquoted fields do not allow \\r or \\n"
279
- raise MalformedCSVError.new(message, @lineno + 1)
280
- elsif @scanner.rest.start_with?(@quote_character)
281
- message = "Illegal quoting"
282
- raise MalformedCSVError.new(message, @lineno + 1)
283
- else
284
- raise MalformedCSVError.new("TODO: Meaningful message",
285
- @lineno + 1)
286
- end
287
- end
263
+ @scanner ||= build_scanner
264
+ if quote_character.nil?
265
+ parse_no_quote(&block)
266
+ else
267
+ parse_quotable(&block)
288
268
  end
289
269
  rescue InvalidEncoding
270
+ if @scanner
271
+ ignore_broken_line
272
+ lineno = @lineno
273
+ else
274
+ lineno = @lineno + 1
275
+ end
290
276
  message = "Invalid byte sequence in #{@encoding}"
291
- raise MalformedCSVError.new(message, @lineno + 1)
277
+ raise MalformedCSVError.new(message, lineno)
292
278
  end
293
-
294
- @parsed = true
295
279
  end
296
280
 
297
281
  def use_headers?
@@ -301,7 +285,13 @@ class CSV
301
285
  private
302
286
  def prepare
303
287
  prepare_variable
304
- prepare_regexp
288
+ prepare_backslash
289
+ prepare_quote_character
290
+ prepare_skip_lines
291
+ prepare_strip
292
+ prepare_separators
293
+ prepare_quoted
294
+ prepare_unquoted
305
295
  prepare_line
306
296
  prepare_header
307
297
  prepare_parser
@@ -315,11 +305,14 @@ class CSV
315
305
  if liberal_parsing.is_a?(Hash)
316
306
  @double_quote_outside_quote =
317
307
  liberal_parsing[:double_quote_outside_quote]
308
+ @backslash_quote = liberal_parsing[:backslash_quote]
318
309
  else
319
310
  @double_quote_outside_quote = false
311
+ @backslash_quote = false
320
312
  end
321
313
  else
322
314
  @liberal_parsing = false
315
+ @backslash_quote = false
323
316
  end
324
317
  @unconverted_fields = @options[:unconverted_fields]
325
318
  @field_size_limit = @options[:field_size_limit]
@@ -328,20 +321,33 @@ class CSV
328
321
  @header_fields_converter = @options[:header_fields_converter]
329
322
  end
330
323
 
331
- def prepare_regexp
332
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
333
- @row_separator =
334
- resolve_row_separator(@options[:row_separator]).encode(@encoding)
335
- @quote_character = @options[:quote_character].to_s.encode(@encoding)
336
- if @quote_character.length != 1
337
- raise ArgumentError, ":quote_char has to be a single character String"
338
- end
324
+ def prepare_backslash
325
+ @backslash_character = "\\".encode(@encoding)
339
326
 
340
- escaped_column_separator = Regexp.escape(@column_separator)
341
- escaped_first_column_separator = Regexp.escape(@column_separator[0])
342
- escaped_row_separator = Regexp.escape(@row_separator)
343
- escaped_quote_character = Regexp.escape(@quote_character)
327
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
328
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
329
+ end
330
+
331
+ def prepare_quote_character
332
+ @quote_character = @options[:quote_character]
333
+ if @quote_character.nil?
334
+ @escaped_quote_character = nil
335
+ @escaped_quote = nil
336
+ @backslash_quote_character = nil
337
+ else
338
+ @quote_character = @quote_character.to_s.encode(@encoding)
339
+ if @quote_character.length != 1
340
+ message = ":quote_char has to be nil or a single character String"
341
+ raise ArgumentError, message
342
+ end
343
+ @escaped_quote_character = Regexp.escape(@quote_character)
344
+ @escaped_quote = Regexp.new(@escaped_quote_character)
345
+ @backslash_quote_character =
346
+ @backslash_character + @escaped_quote_character
347
+ end
348
+ end
344
349
 
350
+ def prepare_skip_lines
345
351
  skip_lines = @options[:skip_lines]
346
352
  case skip_lines
347
353
  when String
@@ -356,18 +362,56 @@ class CSV
356
362
  end
357
363
  @skip_lines = skip_lines
358
364
  end
365
+ end
359
366
 
360
- @column_end = Regexp.new(escaped_column_separator)
367
+ def prepare_strip
368
+ @strip = @options[:strip]
369
+ @escaped_strip = nil
370
+ @strip_value = nil
371
+ if @strip.is_a?(String)
372
+ case @strip.length
373
+ when 0
374
+ raise ArgumentError, ":strip must not be an empty String"
375
+ when 1
376
+ # ok
377
+ else
378
+ raise ArgumentError, ":strip doesn't support 2 or more characters yet"
379
+ end
380
+ @strip = @strip.encode(@encoding)
381
+ @escaped_strip = Regexp.escape(@strip)
382
+ if @quote_character
383
+ @strip_value = Regexp.new(@escaped_strip +
384
+ "+".encode(@encoding))
385
+ end
386
+ elsif @strip
387
+ strip_values = " \t\r\n\f\v"
388
+ @escaped_strip = strip_values.encode(@encoding)
389
+ if @quote_character
390
+ @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
391
+ end
392
+ end
393
+ end
394
+
395
+ def prepare_separators
396
+ @column_separator = @options[:column_separator].to_s.encode(@encoding)
397
+ @row_separator =
398
+ resolve_row_separator(@options[:row_separator]).encode(@encoding)
399
+
400
+ @escaped_column_separator = Regexp.escape(@column_separator)
401
+ @escaped_first_column_separator = Regexp.escape(@column_separator[0])
402
+ @column_end = Regexp.new(@escaped_column_separator)
361
403
  if @column_separator.size > 1
362
404
  @column_ends = @column_separator.each_char.collect do |char|
363
405
  Regexp.new(Regexp.escape(char))
364
406
  end
365
- @first_column_separators = Regexp.new(escaped_first_column_separator +
407
+ @first_column_separators = Regexp.new(@escaped_first_column_separator +
366
408
  "+".encode(@encoding))
367
409
  else
368
410
  @column_ends = nil
369
411
  @first_column_separators = nil
370
412
  end
413
+
414
+ escaped_row_separator = Regexp.escape(@row_separator)
371
415
  @row_end = Regexp.new(escaped_row_separator)
372
416
  if @row_separator.size > 1
373
417
  @row_ends = @row_separator.each_char.collect do |char|
@@ -376,25 +420,55 @@ class CSV
376
420
  else
377
421
  @row_ends = nil
378
422
  end
379
- @quotes = Regexp.new(escaped_quote_character +
380
- "+".encode(@encoding))
381
- @quoted_value = Regexp.new("[^".encode(@encoding) +
382
- escaped_quote_character +
383
- "]+".encode(@encoding))
384
- if @liberal_parsing
385
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
386
- escaped_first_column_separator +
387
- "\r\n]+".encode(@encoding))
388
- else
389
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
390
- escaped_quote_character +
391
- escaped_first_column_separator +
392
- "\r\n]+".encode(@encoding))
393
- end
423
+
394
424
  @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
395
425
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
396
426
  end
397
427
 
428
+ def prepare_quoted
429
+ if @quote_character
430
+ @quotes = Regexp.new(@escaped_quote_character +
431
+ "+".encode(@encoding))
432
+ no_quoted_values = @escaped_quote_character.dup
433
+ if @backslash_quote
434
+ no_quoted_values << @escaped_backslash_character
435
+ end
436
+ @quoted_value = Regexp.new("[^".encode(@encoding) +
437
+ no_quoted_values +
438
+ "]+".encode(@encoding))
439
+ else
440
+ if @escaped_strip
441
+ @split_column_separator = Regexp.new(@escaped_strip +
442
+ "*".encode(@encoding) +
443
+ @escaped_column_separator +
444
+ @escaped_strip +
445
+ "*".encode(@encoding))
446
+ else
447
+ if @column_separator == " ".encode(@encoding)
448
+ @split_column_separator = @column_end
449
+ else
450
+ @split_column_separator = @column_separator
451
+ end
452
+ end
453
+ end
454
+ end
455
+
456
+ def prepare_unquoted
457
+ return if @quote_character.nil?
458
+
459
+ no_unquoted_values = "\r\n".encode(@encoding)
460
+ no_unquoted_values << @escaped_first_column_separator
461
+ unless @liberal_parsing
462
+ no_unquoted_values << @escaped_quote_character
463
+ end
464
+ if @escaped_strip
465
+ no_unquoted_values << @escaped_strip
466
+ end
467
+ @unquoted_value = Regexp.new("[^".encode(@encoding) +
468
+ no_unquoted_values +
469
+ "]+".encode(@encoding))
470
+ end
471
+
398
472
  def resolve_row_separator(separator)
399
473
  if separator == :auto
400
474
  cr = "\r".encode(@encoding)
@@ -514,6 +588,8 @@ class CSV
514
588
  end
515
589
 
516
590
  def may_quoted?
591
+ return false if @quote_character.nil?
592
+
517
593
  if @input.is_a?(StringIO)
518
594
  sample = @input.string
519
595
  else
@@ -534,6 +610,10 @@ class CSV
534
610
  @io.gets(*args)
535
611
  end
536
612
 
613
+ def each_line(*args, &block)
614
+ @io.each_line(*args, &block)
615
+ end
616
+
537
617
  def eof?
538
618
  @io.eof?
539
619
  end
@@ -548,7 +628,10 @@ class CSV
548
628
  else
549
629
  inputs << @input
550
630
  end
551
- InputsScanner.new(inputs, @encoding, chunk_size: 1)
631
+ chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
632
+ InputsScanner.new(inputs,
633
+ @encoding,
634
+ chunk_size: Integer(chunk_size, 10))
552
635
  end
553
636
  else
554
637
  def build_scanner
@@ -560,8 +643,13 @@ class CSV
560
643
  end
561
644
  if string
562
645
  unless string.valid_encoding?
563
- message = "Invalid byte sequence in #{@encoding}"
564
- raise MalformedCSVError.new(message, @lineno + 1)
646
+ index = string.lines(@row_separator).index do |line|
647
+ !line.valid_encoding?
648
+ end
649
+ if index
650
+ message = "Invalid byte sequence in #{@encoding}"
651
+ raise MalformedCSVError.new(message, @lineno + index + 1)
652
+ end
565
653
  end
566
654
  Scanner.new(string)
567
655
  else
@@ -582,6 +670,7 @@ class CSV
582
670
  line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
583
671
  line << @row_separator if parse_row_end
584
672
  if skip_line?(line)
673
+ @lineno += 1
585
674
  @scanner.keep_drop
586
675
  else
587
676
  @scanner.keep_back
@@ -601,6 +690,96 @@ class CSV
601
690
  end
602
691
  end
603
692
 
693
+ def parse_no_quote(&block)
694
+ if @scanner.respond_to?(:string)
695
+ scanner = @scanner.string
696
+ else
697
+ scanner = @scanner
698
+ end
699
+ scanner.each_line(@row_separator) do |value|
700
+ next if @skip_lines and skip_line?(value)
701
+ value.chomp!
702
+
703
+ if value.empty?
704
+ next if @skip_blanks
705
+ row = []
706
+ else
707
+ value = strip_value(value)
708
+ row = value.split(@split_column_separator, -1)
709
+ n_columns = row.size
710
+ i = 0
711
+ while i < n_columns
712
+ row[i] = nil if row[i].empty?
713
+ i += 1
714
+ end
715
+ end
716
+ @last_line = value
717
+ emit_row(row, &block)
718
+ end
719
+ end
720
+
721
+ def parse_quotable(&block)
722
+ row = []
723
+ skip_needless_lines
724
+ start_row
725
+ while true
726
+ @quoted_column_value = false
727
+ @unquoted_column_value = false
728
+ @scanner.scan_all(@strip_value) if @strip_value
729
+ value = parse_column_value
730
+ if value
731
+ @scanner.scan_all(@strip_value) if @strip_value
732
+ if @field_size_limit and value.size >= @field_size_limit
733
+ ignore_broken_line
734
+ raise MalformedCSVError.new("Field size exceeded", @lineno)
735
+ end
736
+ end
737
+ if parse_column_end
738
+ row << value
739
+ elsif parse_row_end
740
+ if row.empty? and value.nil?
741
+ emit_row([], &block) unless @skip_blanks
742
+ else
743
+ row << value
744
+ emit_row(row, &block)
745
+ row = []
746
+ end
747
+ skip_needless_lines
748
+ start_row
749
+ elsif @scanner.eos?
750
+ break if row.empty? and value.nil?
751
+ row << value
752
+ emit_row(row, &block)
753
+ break
754
+ else
755
+ if @quoted_column_value
756
+ ignore_broken_line
757
+ message = "Any value after quoted field isn't allowed"
758
+ raise MalformedCSVError.new(message, @lineno)
759
+ elsif @unquoted_column_value and
760
+ (new_line = @scanner.scan(@cr_or_lf))
761
+ ignore_broken_line
762
+ message = "Unquoted fields do not allow new line " +
763
+ "<#{new_line.inspect}>"
764
+ raise MalformedCSVError.new(message, @lineno)
765
+ elsif @scanner.rest.start_with?(@quote_character)
766
+ ignore_broken_line
767
+ message = "Illegal quoting"
768
+ raise MalformedCSVError.new(message, @lineno)
769
+ elsif (new_line = @scanner.scan(@cr_or_lf))
770
+ ignore_broken_line
771
+ message = "New line must be <#{@row_separator.inspect}> " +
772
+ "not <#{new_line.inspect}>"
773
+ raise MalformedCSVError.new(message, @lineno)
774
+ else
775
+ ignore_broken_line
776
+ raise MalformedCSVError.new("TODO: Meaningful message",
777
+ @lineno)
778
+ end
779
+ end
780
+ end
781
+ end
782
+
604
783
  def parse_column_value
605
784
  if @liberal_parsing
606
785
  quoted_value = parse_quoted_column_value
@@ -651,6 +830,7 @@ class CSV
651
830
  value << sub_value
652
831
  end
653
832
  end
833
+ value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
654
834
  value
655
835
  end
656
836
 
@@ -667,10 +847,22 @@ class CSV
667
847
  while true
668
848
  quoted_value = @scanner.scan_all(@quoted_value)
669
849
  value << quoted_value if quoted_value
850
+ if @backslash_quote
851
+ if @scanner.scan(@escaped_backslash)
852
+ if @scanner.scan(@escaped_quote)
853
+ value << @quote_character
854
+ else
855
+ value << @backslash_character
856
+ end
857
+ next
858
+ end
859
+ end
860
+
670
861
  quotes = @scanner.scan_all(@quotes)
671
862
  unless quotes
863
+ ignore_broken_line
672
864
  message = "Unclosed quoted field"
673
- raise MalformedCSVError.new(message, @lineno + 1)
865
+ raise MalformedCSVError.new(message, @lineno)
674
866
  end
675
867
  n_quotes = quotes.size
676
868
  if n_quotes == 1
@@ -713,6 +905,33 @@ class CSV
713
905
  end
714
906
  end
715
907
 
908
+ def strip_value(value)
909
+ return value unless @strip
910
+ return nil if value.nil?
911
+
912
+ case @strip
913
+ when String
914
+ size = value.size
915
+ while value.start_with?(@strip)
916
+ size -= 1
917
+ value = value[1, size]
918
+ end
919
+ while value.end_with?(@strip)
920
+ size -= 1
921
+ value = value[0, size]
922
+ end
923
+ else
924
+ value.strip!
925
+ end
926
+ value
927
+ end
928
+
929
+ def ignore_broken_line
930
+ @scanner.scan_all(@not_line_end)
931
+ @scanner.scan_all(@cr_or_lf)
932
+ @lineno += 1
933
+ end
934
+
716
935
  def start_row
717
936
  if @last_line
718
937
  @last_line = nil
@@ -2,5 +2,5 @@
2
2
 
3
3
  class CSV
4
4
  # The version of the installed library.
5
- VERSION = "3.0.4"
5
+ VERSION = "3.0.5"
6
6
  end
@@ -18,6 +18,7 @@ class CSV
18
18
  if @options[:write_headers] and @headers
19
19
  self << @headers
20
20
  end
21
+ @fields_converter = @options[:fields_converter]
21
22
  end
22
23
 
23
24
  def <<(row)
@@ -31,6 +32,8 @@ class CSV
31
32
  @headers ||= row if @use_headers
32
33
  @lineno += 1
33
34
 
35
+ row = @fields_converter.convert(row, nil, lineno) if @fields_converter
36
+
34
37
  converted_row = row.collect do |field|
35
38
  quote(field)
36
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-01-25 00:00:00.000000000 Z
12
+ date: 2019-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler