csv 3.0.4 → 3.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0c308f4cca10d6b48a2268e992fa545918c6cbe99837b83ec048b55eb76b702
4
- data.tar.gz: 8661eb5e276abdeb5e939e3c2359d6ee991199550cfaa8e623496946fad17b8f
3
+ metadata.gz: 277fe614e5fc3f2f8ad100a70c6021aea0b8c9f989a46aad6618f9c3e81e5baf
4
+ data.tar.gz: cb8aafb272a93788371dcc6992f61ea26a7a19844f3aabacb0e16fc9a3e54058
5
5
  SHA512:
6
- metadata.gz: 5875a4b446bd22fbdb0ab1a829576e219b64aa80a66f27e73ae487ed7fa6fb09429f8f3185ab8b08c07ee30a49204b98acd1d7a72a97c9ec89234d79dc597553
7
- data.tar.gz: bda1a607b00312b0e4bc0e2d444670d5f9661251ad73cb2061baeb695cb264af0a06f3bf44fc4d00d6406a1b61f0d4bbdfb8e6fd036d276f51fd30fec4f179e3
6
+ metadata.gz: 063bf8125079ad1c9f42f26990ab9a6a7a07100fecc2db71a792e5b89aa757decc571e1c39a70c8c8a2eb50475749bbc103985a0fa66a1e35762f7568d7a7f85
7
+ data.tar.gz: 7daef31a3902cba8dbc139a1d828d15493def81c95011f4da6ee69369dbdfdd3fb97a20a7d9a9f0cd1a8129669e27403f6426644e3b2f48e7fef592c22967dc0
data/NEWS.md CHANGED
@@ -1,5 +1,67 @@
1
1
  # News
2
2
 
3
+ ## 3.0.5 - 2019-03-24
4
+
5
+ ### Improvements
6
+
7
+ * Added `:liberal_parsing => {backslash_quote: true}` option.
8
+ [GitHub#74][Patch by 284km]
9
+
10
+ * Added `:write_converters` option.
11
+ [GitHub#73][Patch by Danillo Souza]
12
+
13
+ * Added `:write_nil_value` option.
14
+
15
+ * Added `:write_empty_value` option.
16
+
17
+ * Improved invalid byte line number detection.
18
+ [GitHub#78][Patch by Alyssa Ross]
19
+
20
+ * Added `quote_char: nil` optimization.
21
+ [GitHub#79][Patch by 284km]
22
+
23
+ * Improved error message.
24
+ [GitHub#81][Patch by Andrés Torres]
25
+
26
+ * Improved IO-like implementation for `StringIO` data.
27
+ [GitHub#80][Patch by Genadi Samokovarov]
28
+
29
+ * Added `:strip` option.
30
+ [GitHub#58]
31
+
32
+ ### Fixes
33
+
34
+ * Fixed a compatibility bug that `CSV#each` doesn't care `CSV#shift`.
35
+ [GitHub#76][Patch by Alyssa Ross]
36
+
37
+ * Fixed a compatibility bug that `CSV#eof?` doesn't care `CSV#each`
38
+ and `CSV#shift`.
39
+ [GitHub#77][Reported by Chi Leung]
40
+
41
+ * Fixed a compatibility bug that invalid line isn't ignored.
42
+ [GitHub#82][Reported by krororo]
43
+
44
+ * Fixed a bug that `:skip_lines` doesn't work with multibyte characters data.
45
+ [GitHub#83][Reported by ff2248]
46
+
47
+ ### Thanks
48
+
49
+ * Alyssa Ross
50
+
51
+ * 284km
52
+
53
+ * Chi Leung
54
+
55
+ * Danillo Souza
56
+
57
+ * Andrés Torres
58
+
59
+ * Genadi Samokovarov
60
+
61
+ * krororo
62
+
63
+ * ff2248
64
+
3
65
  ## 3.0.4 - 2019-01-25
4
66
 
5
67
  ### Improvements
data/lib/csv.rb CHANGED
@@ -885,6 +885,10 @@ class CSV
885
885
  # blank string field is replaced by
886
886
  # the set object.
887
887
  # <b><tt>:quote_empty</tt></b>:: TODO
888
+ # <b><tt>:write_converters</tt></b>:: TODO
889
+ # <b><tt>:write_nil_value</tt></b>:: TODO
890
+ # <b><tt>:write_empty_value</tt></b>:: TODO
891
+ # <b><tt>:strip</tt></b>:: TODO
888
892
  #
889
893
  # See CSV::DEFAULT_OPTIONS for the default settings.
890
894
  #
@@ -911,7 +915,11 @@ class CSV
911
915
  encoding: nil,
912
916
  nil_value: nil,
913
917
  empty_value: "",
914
- quote_empty: true)
918
+ quote_empty: true,
919
+ write_converters: nil,
920
+ write_nil_value: nil,
921
+ write_empty_value: "",
922
+ strip: false)
915
923
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
916
924
 
917
925
  # create the IO object we will read from
@@ -922,8 +930,13 @@ class CSV
922
930
  nil_value: nil_value,
923
931
  empty_value: empty_value,
924
932
  }
933
+ @write_fields_converter_options = {
934
+ nil_value: write_nil_value,
935
+ empty_value: write_empty_value,
936
+ }
925
937
  @initial_converters = converters
926
938
  @initial_header_converters = header_converters
939
+ @initial_write_converters = write_converters
927
940
 
928
941
  @parser_options = {
929
942
  column_separator: col_sep,
@@ -939,6 +952,7 @@ class CSV
939
952
  encoding: @encoding,
940
953
  nil_value: nil_value,
941
954
  empty_value: empty_value,
955
+ strip: strip,
942
956
  }
943
957
  @parser = nil
944
958
 
@@ -998,7 +1012,7 @@ class CSV
998
1012
  # as is.
999
1013
  #
1000
1014
  def converters
1001
- fields_converter.map do |converter|
1015
+ parser_fields_converter.map do |converter|
1002
1016
  name = Converters.rassoc(converter)
1003
1017
  name ? name.first : converter
1004
1018
  end
@@ -1098,12 +1112,58 @@ class CSV
1098
1112
  ### IO and StringIO Delegation ###
1099
1113
 
1100
1114
  extend Forwardable
1101
- def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
1102
- :closed?, :eof, :eof?, :external_encoding, :fcntl,
1103
- :fileno, :flock, :flush, :fsync, :internal_encoding,
1104
- :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
1105
- :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
1106
- :to_io, :truncate, :tty?
1115
+ def_delegators :@io, :binmode, :close, :close_read, :close_write,
1116
+ :closed?, :external_encoding, :fcntl,
1117
+ :fileno, :flush, :fsync, :internal_encoding,
1118
+ :isatty, :pid, :pos, :pos=, :reopen,
1119
+ :seek, :string, :sync, :sync=, :tell,
1120
+ :truncate, :tty?
1121
+
1122
+ def binmode?
1123
+ if @io.respond_to?(:binmode?)
1124
+ @io.binmode?
1125
+ else
1126
+ false
1127
+ end
1128
+ end
1129
+
1130
+ def flock(*args)
1131
+ raise NotImplementedError unless @io.respond_to?(:flock)
1132
+ @io.flock(*args)
1133
+ end
1134
+
1135
+ def ioctl(*args)
1136
+ raise NotImplementedError unless @io.respond_to?(:ioctl)
1137
+ @io.ioctl(*args)
1138
+ end
1139
+
1140
+ def path
1141
+ @io.path if @io.respond_to?(:path)
1142
+ end
1143
+
1144
+ def stat(*args)
1145
+ raise NotImplementedError unless @io.respond_to?(:stat)
1146
+ @io.stat(*args)
1147
+ end
1148
+
1149
+ def to_i
1150
+ raise NotImplementedError unless @io.respond_to?(:to_i)
1151
+ @io.to_i
1152
+ end
1153
+
1154
+ def to_io
1155
+ @io.respond_to?(:to_io) ? @io.to_io : @io
1156
+ end
1157
+
1158
+ def eof?
1159
+ begin
1160
+ parser_enumerator.peek
1161
+ false
1162
+ rescue StopIteration
1163
+ true
1164
+ end
1165
+ end
1166
+ alias_method :eof, :eof?
1107
1167
 
1108
1168
  # Rewinds the underlying IO object and resets CSV's lineno() counter.
1109
1169
  def rewind
@@ -1145,7 +1205,7 @@ class CSV
1145
1205
  # converted field or the field itself.
1146
1206
  #
1147
1207
  def convert(name = nil, &converter)
1148
- fields_converter.add_converter(name, &converter)
1208
+ parser_fields_converter.add_converter(name, &converter)
1149
1209
  end
1150
1210
 
1151
1211
  #
@@ -1172,8 +1232,16 @@ class CSV
1172
1232
  #
1173
1233
  # The data source must be open for reading.
1174
1234
  #
1175
- def each(&block)
1176
- parser.parse(&block)
1235
+ def each
1236
+ return to_enum(__method__) unless block_given?
1237
+ enumerator = parser_enumerator
1238
+ begin
1239
+ while true
1240
+ yield enumerator.next
1241
+ end
1242
+ rescue StopIteration
1243
+ end
1244
+ self
1177
1245
  end
1178
1246
 
1179
1247
  #
@@ -1204,9 +1272,8 @@ class CSV
1204
1272
  # The data source must be open for reading.
1205
1273
  #
1206
1274
  def shift
1207
- @parser_enumerator ||= parser.parse
1208
1275
  begin
1209
- @parser_enumerator.next
1276
+ parser_enumerator.next
1210
1277
  rescue StopIteration
1211
1278
  nil
1212
1279
  end
@@ -1299,7 +1366,7 @@ class CSV
1299
1366
  if headers
1300
1367
  header_fields_converter.convert(fields, nil, 0)
1301
1368
  else
1302
- fields_converter.convert(fields, @headers, lineno)
1369
+ parser_fields_converter.convert(fields, @headers, lineno)
1303
1370
  end
1304
1371
  end
1305
1372
 
@@ -1316,20 +1383,16 @@ class CSV
1316
1383
  end
1317
1384
  end
1318
1385
 
1319
- def fields_converter
1320
- @fields_converter ||= build_fields_converter
1386
+ def parser_fields_converter
1387
+ @parser_fields_converter ||= build_parser_fields_converter
1321
1388
  end
1322
1389
 
1323
- def build_fields_converter
1390
+ def build_parser_fields_converter
1324
1391
  specific_options = {
1325
1392
  builtin_converters: Converters,
1326
1393
  }
1327
1394
  options = @base_fields_converter_options.merge(specific_options)
1328
- fields_converter = FieldsConverter.new(options)
1329
- normalize_converters(@initial_converters).each do |name, converter|
1330
- fields_converter.add_converter(name, &converter)
1331
- end
1332
- fields_converter
1395
+ build_fields_converter(@initial_converters, options)
1333
1396
  end
1334
1397
 
1335
1398
  def header_fields_converter
@@ -1342,8 +1405,21 @@ class CSV
1342
1405
  accept_nil: true,
1343
1406
  }
1344
1407
  options = @base_fields_converter_options.merge(specific_options)
1408
+ build_fields_converter(@initial_header_converters, options)
1409
+ end
1410
+
1411
+ def writer_fields_converter
1412
+ @writer_fields_converter ||= build_writer_fields_converter
1413
+ end
1414
+
1415
+ def build_writer_fields_converter
1416
+ build_fields_converter(@initial_write_converters,
1417
+ @write_fields_converter_options)
1418
+ end
1419
+
1420
+ def build_fields_converter(initial_converters, options)
1345
1421
  fields_converter = FieldsConverter.new(options)
1346
- normalize_converters(@initial_header_converters).each do |name, converter|
1422
+ normalize_converters(initial_converters).each do |name, converter|
1347
1423
  fields_converter.add_converter(name, &converter)
1348
1424
  end
1349
1425
  fields_converter
@@ -1354,8 +1430,12 @@ class CSV
1354
1430
  end
1355
1431
 
1356
1432
  def parser_options
1357
- @parser_options.merge(fields_converter: fields_converter,
1358
- header_fields_converter: header_fields_converter)
1433
+ @parser_options.merge(header_fields_converter: header_fields_converter,
1434
+ fields_converter: parser_fields_converter)
1435
+ end
1436
+
1437
+ def parser_enumerator
1438
+ @parser_enumerator ||= parser.parse
1359
1439
  end
1360
1440
 
1361
1441
  def writer
@@ -1363,7 +1443,8 @@ class CSV
1363
1443
  end
1364
1444
 
1365
1445
  def writer_options
1366
- @writer_options.merge(header_fields_converter: header_fields_converter)
1446
+ @writer_options.merge(header_fields_converter: header_fields_converter,
1447
+ fields_converter: writer_fields_converter)
1367
1448
  end
1368
1449
  end
1369
1450
 
@@ -49,6 +49,28 @@ class CSV
49
49
  read_chunk
50
50
  end
51
51
 
52
+ def each_line(row_separator)
53
+ buffer = nil
54
+ input = @scanner.rest
55
+ @scanner.terminate
56
+ while input
57
+ input.each_line(row_separator) do |line|
58
+ if buffer
59
+ buffer << line
60
+ line = buffer
61
+ buffer = nil
62
+ end
63
+ if line.end_with?(row_separator)
64
+ yield(line)
65
+ else
66
+ buffer = line
67
+ end
68
+ end
69
+ input = @inputs.shift
70
+ end
71
+ yield(buffer) if buffer
72
+ end
73
+
52
74
  def scan(pattern)
53
75
  value = @scanner.scan(pattern)
54
76
  return value if @last_scanner
@@ -94,7 +116,7 @@ class CSV
94
116
  start, buffer = @keeps.pop
95
117
  if buffer
96
118
  string = @scanner.string
97
- keep = string[start, string.size - start]
119
+ keep = string.byteslice(start, string.bytesize - start)
98
120
  if keep and not keep.empty?
99
121
  @inputs.unshift(StringIO.new(keep))
100
122
  @last_scanner = false
@@ -121,7 +143,7 @@ class CSV
121
143
  keep = @keeps.last
122
144
  keep_start = keep[0]
123
145
  string = @scanner.string
124
- keep_data = string[keep_start, @scanner.pos - keep_start]
146
+ keep_data = string.byteslice(keep_start, @scanner.pos - keep_start)
125
147
  if keep_data
126
148
  keep_buffer = keep[1]
127
149
  if keep_buffer
@@ -170,7 +192,6 @@ class CSV
170
192
  @input = input
171
193
  @options = options
172
194
  @samples = []
173
- @parsed = false
174
195
 
175
196
  prepare
176
197
  end
@@ -230,9 +251,7 @@ class CSV
230
251
  def parse(&block)
231
252
  return to_enum(__method__) unless block_given?
232
253
 
233
- return if @parsed
234
-
235
- if @return_headers and @headers
254
+ if @return_headers and @headers and @raw_headers
236
255
  headers = Row.new(@headers, @raw_headers, true)
237
256
  if @unconverted_fields
238
257
  headers = add_unconverted_fields(headers, [])
@@ -240,58 +259,23 @@ class CSV
240
259
  yield headers
241
260
  end
242
261
 
243
- row = []
244
262
  begin
245
- @scanner = build_scanner
246
- skip_needless_lines
247
- start_row
248
- while true
249
- @quoted_column_value = false
250
- @unquoted_column_value = false
251
- value = parse_column_value
252
- if value and @field_size_limit and value.size >= @field_size_limit
253
- raise MalformedCSVError.new("Field size exceeded", @lineno + 1)
254
- end
255
- if parse_column_end
256
- row << value
257
- elsif parse_row_end
258
- if row.empty? and value.nil?
259
- emit_row([], &block) unless @skip_blanks
260
- else
261
- row << value
262
- emit_row(row, &block)
263
- row = []
264
- end
265
- skip_needless_lines
266
- start_row
267
- elsif @scanner.eos?
268
- break if row.empty? and value.nil?
269
- row << value
270
- emit_row(row, &block)
271
- break
272
- else
273
- if @quoted_column_value
274
- message = "Do not allow except col_sep_split_separator " +
275
- "after quoted fields"
276
- raise MalformedCSVError.new(message, @lineno + 1)
277
- elsif @unquoted_column_value and @scanner.scan(@cr_or_lf)
278
- message = "Unquoted fields do not allow \\r or \\n"
279
- raise MalformedCSVError.new(message, @lineno + 1)
280
- elsif @scanner.rest.start_with?(@quote_character)
281
- message = "Illegal quoting"
282
- raise MalformedCSVError.new(message, @lineno + 1)
283
- else
284
- raise MalformedCSVError.new("TODO: Meaningful message",
285
- @lineno + 1)
286
- end
287
- end
263
+ @scanner ||= build_scanner
264
+ if quote_character.nil?
265
+ parse_no_quote(&block)
266
+ else
267
+ parse_quotable(&block)
288
268
  end
289
269
  rescue InvalidEncoding
270
+ if @scanner
271
+ ignore_broken_line
272
+ lineno = @lineno
273
+ else
274
+ lineno = @lineno + 1
275
+ end
290
276
  message = "Invalid byte sequence in #{@encoding}"
291
- raise MalformedCSVError.new(message, @lineno + 1)
277
+ raise MalformedCSVError.new(message, lineno)
292
278
  end
293
-
294
- @parsed = true
295
279
  end
296
280
 
297
281
  def use_headers?
@@ -301,7 +285,13 @@ class CSV
301
285
  private
302
286
  def prepare
303
287
  prepare_variable
304
- prepare_regexp
288
+ prepare_backslash
289
+ prepare_quote_character
290
+ prepare_skip_lines
291
+ prepare_strip
292
+ prepare_separators
293
+ prepare_quoted
294
+ prepare_unquoted
305
295
  prepare_line
306
296
  prepare_header
307
297
  prepare_parser
@@ -315,11 +305,14 @@ class CSV
315
305
  if liberal_parsing.is_a?(Hash)
316
306
  @double_quote_outside_quote =
317
307
  liberal_parsing[:double_quote_outside_quote]
308
+ @backslash_quote = liberal_parsing[:backslash_quote]
318
309
  else
319
310
  @double_quote_outside_quote = false
311
+ @backslash_quote = false
320
312
  end
321
313
  else
322
314
  @liberal_parsing = false
315
+ @backslash_quote = false
323
316
  end
324
317
  @unconverted_fields = @options[:unconverted_fields]
325
318
  @field_size_limit = @options[:field_size_limit]
@@ -328,20 +321,33 @@ class CSV
328
321
  @header_fields_converter = @options[:header_fields_converter]
329
322
  end
330
323
 
331
- def prepare_regexp
332
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
333
- @row_separator =
334
- resolve_row_separator(@options[:row_separator]).encode(@encoding)
335
- @quote_character = @options[:quote_character].to_s.encode(@encoding)
336
- if @quote_character.length != 1
337
- raise ArgumentError, ":quote_char has to be a single character String"
338
- end
324
+ def prepare_backslash
325
+ @backslash_character = "\\".encode(@encoding)
339
326
 
340
- escaped_column_separator = Regexp.escape(@column_separator)
341
- escaped_first_column_separator = Regexp.escape(@column_separator[0])
342
- escaped_row_separator = Regexp.escape(@row_separator)
343
- escaped_quote_character = Regexp.escape(@quote_character)
327
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
328
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
329
+ end
330
+
331
+ def prepare_quote_character
332
+ @quote_character = @options[:quote_character]
333
+ if @quote_character.nil?
334
+ @escaped_quote_character = nil
335
+ @escaped_quote = nil
336
+ @backslash_quote_character = nil
337
+ else
338
+ @quote_character = @quote_character.to_s.encode(@encoding)
339
+ if @quote_character.length != 1
340
+ message = ":quote_char has to be nil or a single character String"
341
+ raise ArgumentError, message
342
+ end
343
+ @escaped_quote_character = Regexp.escape(@quote_character)
344
+ @escaped_quote = Regexp.new(@escaped_quote_character)
345
+ @backslash_quote_character =
346
+ @backslash_character + @escaped_quote_character
347
+ end
348
+ end
344
349
 
350
+ def prepare_skip_lines
345
351
  skip_lines = @options[:skip_lines]
346
352
  case skip_lines
347
353
  when String
@@ -356,18 +362,56 @@ class CSV
356
362
  end
357
363
  @skip_lines = skip_lines
358
364
  end
365
+ end
359
366
 
360
- @column_end = Regexp.new(escaped_column_separator)
367
+ def prepare_strip
368
+ @strip = @options[:strip]
369
+ @escaped_strip = nil
370
+ @strip_value = nil
371
+ if @strip.is_a?(String)
372
+ case @strip.length
373
+ when 0
374
+ raise ArgumentError, ":strip must not be an empty String"
375
+ when 1
376
+ # ok
377
+ else
378
+ raise ArgumentError, ":strip doesn't support 2 or more characters yet"
379
+ end
380
+ @strip = @strip.encode(@encoding)
381
+ @escaped_strip = Regexp.escape(@strip)
382
+ if @quote_character
383
+ @strip_value = Regexp.new(@escaped_strip +
384
+ "+".encode(@encoding))
385
+ end
386
+ elsif @strip
387
+ strip_values = " \t\r\n\f\v"
388
+ @escaped_strip = strip_values.encode(@encoding)
389
+ if @quote_character
390
+ @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
391
+ end
392
+ end
393
+ end
394
+
395
+ def prepare_separators
396
+ @column_separator = @options[:column_separator].to_s.encode(@encoding)
397
+ @row_separator =
398
+ resolve_row_separator(@options[:row_separator]).encode(@encoding)
399
+
400
+ @escaped_column_separator = Regexp.escape(@column_separator)
401
+ @escaped_first_column_separator = Regexp.escape(@column_separator[0])
402
+ @column_end = Regexp.new(@escaped_column_separator)
361
403
  if @column_separator.size > 1
362
404
  @column_ends = @column_separator.each_char.collect do |char|
363
405
  Regexp.new(Regexp.escape(char))
364
406
  end
365
- @first_column_separators = Regexp.new(escaped_first_column_separator +
407
+ @first_column_separators = Regexp.new(@escaped_first_column_separator +
366
408
  "+".encode(@encoding))
367
409
  else
368
410
  @column_ends = nil
369
411
  @first_column_separators = nil
370
412
  end
413
+
414
+ escaped_row_separator = Regexp.escape(@row_separator)
371
415
  @row_end = Regexp.new(escaped_row_separator)
372
416
  if @row_separator.size > 1
373
417
  @row_ends = @row_separator.each_char.collect do |char|
@@ -376,25 +420,55 @@ class CSV
376
420
  else
377
421
  @row_ends = nil
378
422
  end
379
- @quotes = Regexp.new(escaped_quote_character +
380
- "+".encode(@encoding))
381
- @quoted_value = Regexp.new("[^".encode(@encoding) +
382
- escaped_quote_character +
383
- "]+".encode(@encoding))
384
- if @liberal_parsing
385
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
386
- escaped_first_column_separator +
387
- "\r\n]+".encode(@encoding))
388
- else
389
- @unquoted_value = Regexp.new("[^".encode(@encoding) +
390
- escaped_quote_character +
391
- escaped_first_column_separator +
392
- "\r\n]+".encode(@encoding))
393
- end
423
+
394
424
  @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
395
425
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
396
426
  end
397
427
 
428
+ def prepare_quoted
429
+ if @quote_character
430
+ @quotes = Regexp.new(@escaped_quote_character +
431
+ "+".encode(@encoding))
432
+ no_quoted_values = @escaped_quote_character.dup
433
+ if @backslash_quote
434
+ no_quoted_values << @escaped_backslash_character
435
+ end
436
+ @quoted_value = Regexp.new("[^".encode(@encoding) +
437
+ no_quoted_values +
438
+ "]+".encode(@encoding))
439
+ else
440
+ if @escaped_strip
441
+ @split_column_separator = Regexp.new(@escaped_strip +
442
+ "*".encode(@encoding) +
443
+ @escaped_column_separator +
444
+ @escaped_strip +
445
+ "*".encode(@encoding))
446
+ else
447
+ if @column_separator == " ".encode(@encoding)
448
+ @split_column_separator = @column_end
449
+ else
450
+ @split_column_separator = @column_separator
451
+ end
452
+ end
453
+ end
454
+ end
455
+
456
+ def prepare_unquoted
457
+ return if @quote_character.nil?
458
+
459
+ no_unquoted_values = "\r\n".encode(@encoding)
460
+ no_unquoted_values << @escaped_first_column_separator
461
+ unless @liberal_parsing
462
+ no_unquoted_values << @escaped_quote_character
463
+ end
464
+ if @escaped_strip
465
+ no_unquoted_values << @escaped_strip
466
+ end
467
+ @unquoted_value = Regexp.new("[^".encode(@encoding) +
468
+ no_unquoted_values +
469
+ "]+".encode(@encoding))
470
+ end
471
+
398
472
  def resolve_row_separator(separator)
399
473
  if separator == :auto
400
474
  cr = "\r".encode(@encoding)
@@ -514,6 +588,8 @@ class CSV
514
588
  end
515
589
 
516
590
  def may_quoted?
591
+ return false if @quote_character.nil?
592
+
517
593
  if @input.is_a?(StringIO)
518
594
  sample = @input.string
519
595
  else
@@ -534,6 +610,10 @@ class CSV
534
610
  @io.gets(*args)
535
611
  end
536
612
 
613
+ def each_line(*args, &block)
614
+ @io.each_line(*args, &block)
615
+ end
616
+
537
617
  def eof?
538
618
  @io.eof?
539
619
  end
@@ -548,7 +628,10 @@ class CSV
548
628
  else
549
629
  inputs << @input
550
630
  end
551
- InputsScanner.new(inputs, @encoding, chunk_size: 1)
631
+ chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
632
+ InputsScanner.new(inputs,
633
+ @encoding,
634
+ chunk_size: Integer(chunk_size, 10))
552
635
  end
553
636
  else
554
637
  def build_scanner
@@ -560,8 +643,13 @@ class CSV
560
643
  end
561
644
  if string
562
645
  unless string.valid_encoding?
563
- message = "Invalid byte sequence in #{@encoding}"
564
- raise MalformedCSVError.new(message, @lineno + 1)
646
+ index = string.lines(@row_separator).index do |line|
647
+ !line.valid_encoding?
648
+ end
649
+ if index
650
+ message = "Invalid byte sequence in #{@encoding}"
651
+ raise MalformedCSVError.new(message, @lineno + index + 1)
652
+ end
565
653
  end
566
654
  Scanner.new(string)
567
655
  else
@@ -582,6 +670,7 @@ class CSV
582
670
  line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
583
671
  line << @row_separator if parse_row_end
584
672
  if skip_line?(line)
673
+ @lineno += 1
585
674
  @scanner.keep_drop
586
675
  else
587
676
  @scanner.keep_back
@@ -601,6 +690,96 @@ class CSV
601
690
  end
602
691
  end
603
692
 
693
+ def parse_no_quote(&block)
694
+ if @scanner.respond_to?(:string)
695
+ scanner = @scanner.string
696
+ else
697
+ scanner = @scanner
698
+ end
699
+ scanner.each_line(@row_separator) do |value|
700
+ next if @skip_lines and skip_line?(value)
701
+ value.chomp!
702
+
703
+ if value.empty?
704
+ next if @skip_blanks
705
+ row = []
706
+ else
707
+ value = strip_value(value)
708
+ row = value.split(@split_column_separator, -1)
709
+ n_columns = row.size
710
+ i = 0
711
+ while i < n_columns
712
+ row[i] = nil if row[i].empty?
713
+ i += 1
714
+ end
715
+ end
716
+ @last_line = value
717
+ emit_row(row, &block)
718
+ end
719
+ end
720
+
721
+ def parse_quotable(&block)
722
+ row = []
723
+ skip_needless_lines
724
+ start_row
725
+ while true
726
+ @quoted_column_value = false
727
+ @unquoted_column_value = false
728
+ @scanner.scan_all(@strip_value) if @strip_value
729
+ value = parse_column_value
730
+ if value
731
+ @scanner.scan_all(@strip_value) if @strip_value
732
+ if @field_size_limit and value.size >= @field_size_limit
733
+ ignore_broken_line
734
+ raise MalformedCSVError.new("Field size exceeded", @lineno)
735
+ end
736
+ end
737
+ if parse_column_end
738
+ row << value
739
+ elsif parse_row_end
740
+ if row.empty? and value.nil?
741
+ emit_row([], &block) unless @skip_blanks
742
+ else
743
+ row << value
744
+ emit_row(row, &block)
745
+ row = []
746
+ end
747
+ skip_needless_lines
748
+ start_row
749
+ elsif @scanner.eos?
750
+ break if row.empty? and value.nil?
751
+ row << value
752
+ emit_row(row, &block)
753
+ break
754
+ else
755
+ if @quoted_column_value
756
+ ignore_broken_line
757
+ message = "Any value after quoted field isn't allowed"
758
+ raise MalformedCSVError.new(message, @lineno)
759
+ elsif @unquoted_column_value and
760
+ (new_line = @scanner.scan(@cr_or_lf))
761
+ ignore_broken_line
762
+ message = "Unquoted fields do not allow new line " +
763
+ "<#{new_line.inspect}>"
764
+ raise MalformedCSVError.new(message, @lineno)
765
+ elsif @scanner.rest.start_with?(@quote_character)
766
+ ignore_broken_line
767
+ message = "Illegal quoting"
768
+ raise MalformedCSVError.new(message, @lineno)
769
+ elsif (new_line = @scanner.scan(@cr_or_lf))
770
+ ignore_broken_line
771
+ message = "New line must be <#{@row_separator.inspect}> " +
772
+ "not <#{new_line.inspect}>"
773
+ raise MalformedCSVError.new(message, @lineno)
774
+ else
775
+ ignore_broken_line
776
+ raise MalformedCSVError.new("TODO: Meaningful message",
777
+ @lineno)
778
+ end
779
+ end
780
+ end
781
+ end
782
+
604
783
  def parse_column_value
605
784
  if @liberal_parsing
606
785
  quoted_value = parse_quoted_column_value
@@ -651,6 +830,7 @@ class CSV
651
830
  value << sub_value
652
831
  end
653
832
  end
833
+ value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
654
834
  value
655
835
  end
656
836
 
@@ -667,10 +847,22 @@ class CSV
667
847
  while true
668
848
  quoted_value = @scanner.scan_all(@quoted_value)
669
849
  value << quoted_value if quoted_value
850
+ if @backslash_quote
851
+ if @scanner.scan(@escaped_backslash)
852
+ if @scanner.scan(@escaped_quote)
853
+ value << @quote_character
854
+ else
855
+ value << @backslash_character
856
+ end
857
+ next
858
+ end
859
+ end
860
+
670
861
  quotes = @scanner.scan_all(@quotes)
671
862
  unless quotes
863
+ ignore_broken_line
672
864
  message = "Unclosed quoted field"
673
- raise MalformedCSVError.new(message, @lineno + 1)
865
+ raise MalformedCSVError.new(message, @lineno)
674
866
  end
675
867
  n_quotes = quotes.size
676
868
  if n_quotes == 1
@@ -713,6 +905,33 @@ class CSV
713
905
  end
714
906
  end
715
907
 
908
+ def strip_value(value)
909
+ return value unless @strip
910
+ return nil if value.nil?
911
+
912
+ case @strip
913
+ when String
914
+ size = value.size
915
+ while value.start_with?(@strip)
916
+ size -= 1
917
+ value = value[1, size]
918
+ end
919
+ while value.end_with?(@strip)
920
+ size -= 1
921
+ value = value[0, size]
922
+ end
923
+ else
924
+ value.strip!
925
+ end
926
+ value
927
+ end
928
+
929
+ def ignore_broken_line
930
+ @scanner.scan_all(@not_line_end)
931
+ @scanner.scan_all(@cr_or_lf)
932
+ @lineno += 1
933
+ end
934
+
716
935
  def start_row
717
936
  if @last_line
718
937
  @last_line = nil
@@ -2,5 +2,5 @@
2
2
 
3
3
  class CSV
4
4
  # The version of the installed library.
5
- VERSION = "3.0.4"
5
+ VERSION = "3.0.5"
6
6
  end
@@ -18,6 +18,7 @@ class CSV
18
18
  if @options[:write_headers] and @headers
19
19
  self << @headers
20
20
  end
21
+ @fields_converter = @options[:fields_converter]
21
22
  end
22
23
 
23
24
  def <<(row)
@@ -31,6 +32,8 @@ class CSV
31
32
  @headers ||= row if @use_headers
32
33
  @lineno += 1
33
34
 
35
+ row = @fields_converter.convert(row, nil, lineno) if @fields_converter
36
+
34
37
  converted_row = row.collect do |field|
35
38
  quote(field)
36
39
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.4
4
+ version: 3.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-01-25 00:00:00.000000000 Z
12
+ date: 2019-03-24 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler