csv 3.0.6 → 3.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b0dfdbc61b4f75c06e3ea3ffd26980443aebe03d9a2fbdb8dc46a48d986d0447
4
- data.tar.gz: 06cae607c0c77df4aa9f206d8d6d776a4c9207e359506558b9b07885c6f18c24
3
+ metadata.gz: 4a0aa4d6e8819de8616255194ac7fb7acf8669fb7a6f7580bd07e23e6ee798b0
4
+ data.tar.gz: 79044828a9b7232a6b671767541a6a860da3229e86e1101304b4a210490b867c
5
5
  SHA512:
6
- metadata.gz: 0ec7e66bf0feb2f6dc9cea0cf72748e74e5a9ac817cd98254f17e2e42dca39265b067a8415c4edefd1879c878f4e1e96f68caa3289a97b26eafcd2e835d0160e
7
- data.tar.gz: cbe775cdbf43c7eb86af541d2dbafc8359aee4e108cb62f34fa73adbb401ac96c2c3b6839f972f0d3285068d8fa7c96120ef4de1b10f14acd9457fe9384cc822
6
+ metadata.gz: f6626726217b3e967847f93a0a751b640f60d2aaca8b09e34a103b3b377c4623930e3264170fdd44b901a7e421c4953d0d05238e4c274de3e1eec9b97efad4b4
7
+ data.tar.gz: b722012b844524e1fd94ba7403dfa21aff33087789440f578b6f21f8b235e8f8cf8ef83dd1ebf98d37b64e958255120bd674c4b86715a2a37912bb0e9bf35fe3
data/NEWS.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # News
2
2
 
3
+ ## 3.0.7 - 2019-04-08
4
+
5
+ ### Improvements
6
+
7
+ * Improve parse performance 1.5x by introducing loose parser.
8
+
9
+ ### Fixes
10
+
11
+ * Fix performance regression in 3.0.5.
12
+
13
+ * Fix a bug that `CSV#line` returns wrong value when you
14
+ use `quote_char: nil`.
15
+
3
16
  ## 3.0.6 - 2019-03-30
4
17
 
5
18
  ### Improvements
data/lib/csv.rb CHANGED
@@ -1232,16 +1232,8 @@ class CSV
1232
1232
  #
1233
1233
  # The data source must be open for reading.
1234
1234
  #
1235
- def each
1236
- return to_enum(__method__) unless block_given?
1237
- enumerator = parser_enumerator
1238
- begin
1239
- while true
1240
- yield enumerator.next
1241
- end
1242
- rescue StopIteration
1243
- end
1244
- self
1235
+ def each(&block)
1236
+ parser_enumerator.each(&block)
1245
1237
  end
1246
1238
 
1247
1239
  #
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This provides String#delete_suffix? for Ruby 2.4.
4
+ unless String.method_defined?(:delete_suffix)
5
+ class CSV
6
+ module DeleteSuffix
7
+ refine String do
8
+ def delete_suffix(suffix)
9
+ if end_with?(suffix)
10
+ self[0..(-(suffix.size + 1))]
11
+ else
12
+ self
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -2,10 +2,12 @@
2
2
 
3
3
  require "strscan"
4
4
 
5
+ require_relative "delete_suffix"
5
6
  require_relative "match_p"
6
7
  require_relative "row"
7
8
  require_relative "table"
8
9
 
10
+ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
9
11
  using CSV::MatchP if CSV.const_defined?(:MatchP)
10
12
 
11
13
  class CSV
@@ -21,6 +23,15 @@ class CSV
21
23
  @keeps = []
22
24
  end
23
25
 
26
+ def each_line(row_separator)
27
+ position = pos
28
+ rest.each_line(row_separator) do |line|
29
+ position += line.bytesize
30
+ self.pos = position
31
+ yield(line)
32
+ end
33
+ end
34
+
24
35
  def keep_start
25
36
  @keeps.push(pos)
26
37
  end
@@ -52,21 +63,43 @@ class CSV
52
63
  def each_line(row_separator)
53
64
  buffer = nil
54
65
  input = @scanner.rest
55
- @scanner.terminate
56
- while input
66
+ position = @scanner.pos
67
+ offset = 0
68
+ n_row_separator_chars = row_separator.size
69
+ while true
57
70
  input.each_line(row_separator) do |line|
71
+ @scanner.pos += line.bytesize
58
72
  if buffer
59
- buffer << line
60
- line = buffer
61
- buffer = nil
73
+ if n_row_separator_chars == 2 and
74
+ buffer.end_with?(row_separator[0]) and
75
+ line.start_with?(row_separator[1])
76
+ buffer << line[0]
77
+ line = line[1..-1]
78
+ position += buffer.bytesize + offset
79
+ @scanner.pos = position
80
+ offset = 0
81
+ yield(buffer)
82
+ buffer = nil
83
+ next if line.empty?
84
+ else
85
+ buffer << line
86
+ line = buffer
87
+ buffer = nil
88
+ end
62
89
  end
63
90
  if line.end_with?(row_separator)
91
+ position += line.bytesize + offset
92
+ @scanner.pos = position
93
+ offset = 0
64
94
  yield(line)
65
95
  else
66
96
  buffer = line
67
97
  end
68
98
  end
69
- input = @inputs.shift
99
+ break unless read_chunk
100
+ input = @scanner.rest
101
+ position = @scanner.pos
102
+ offset = -buffer.bytesize if buffer
70
103
  end
71
104
  yield(buffer) if buffer
72
105
  end
@@ -125,6 +158,7 @@ class CSV
125
158
  else
126
159
  @scanner.pos = start
127
160
  end
161
+ read_chunk if @scanner.eos?
128
162
  end
129
163
 
130
164
  def keep_drop
@@ -263,8 +297,10 @@ class CSV
263
297
  @scanner ||= build_scanner
264
298
  if quote_character.nil?
265
299
  parse_no_quote(&block)
300
+ elsif @need_robust_parsing
301
+ parse_quotable_robust(&block)
266
302
  else
267
- parse_quotable(&block)
303
+ parse_quotable_loose(&block)
268
304
  end
269
305
  rescue InvalidEncoding
270
306
  if @scanner
@@ -285,8 +321,8 @@ class CSV
285
321
  private
286
322
  def prepare
287
323
  prepare_variable
288
- prepare_backslash
289
324
  prepare_quote_character
325
+ prepare_backslash
290
326
  prepare_skip_lines
291
327
  prepare_strip
292
328
  prepare_separators
@@ -298,6 +334,7 @@ class CSV
298
334
  end
299
335
 
300
336
  def prepare_variable
337
+ @need_robust_parsing = false
301
338
  @encoding = @options[:encoding]
302
339
  liberal_parsing = @options[:liberal_parsing]
303
340
  if liberal_parsing
@@ -310,6 +347,7 @@ class CSV
310
347
  @double_quote_outside_quote = false
311
348
  @backslash_quote = false
312
349
  end
350
+ @need_robust_parsing = true
313
351
  else
314
352
  @liberal_parsing = false
315
353
  @backslash_quote = false
@@ -321,27 +359,33 @@ class CSV
321
359
  @header_fields_converter = @options[:header_fields_converter]
322
360
  end
323
361
 
324
- def prepare_backslash
325
- @backslash_character = "\\".encode(@encoding)
326
-
327
- @escaped_backslash_character = Regexp.escape(@backslash_character)
328
- @escaped_backslash = Regexp.new(@escaped_backslash_character)
329
- end
330
-
331
362
  def prepare_quote_character
332
363
  @quote_character = @options[:quote_character]
333
364
  if @quote_character.nil?
334
365
  @escaped_quote_character = nil
335
366
  @escaped_quote = nil
336
- @backslash_quote_character = nil
337
367
  else
338
368
  @quote_character = @quote_character.to_s.encode(@encoding)
339
369
  if @quote_character.length != 1
340
370
  message = ":quote_char has to be nil or a single character String"
341
371
  raise ArgumentError, message
342
372
  end
373
+ @double_quote_character = @quote_character * 2
343
374
  @escaped_quote_character = Regexp.escape(@quote_character)
344
375
  @escaped_quote = Regexp.new(@escaped_quote_character)
376
+ end
377
+ end
378
+
379
+ def prepare_backslash
380
+ return unless @backslash_quote
381
+
382
+ @backslash_character = "\\".encode(@encoding)
383
+
384
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
385
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
386
+ if @quote_character.nil?
387
+ @backslash_quote_character = nil
388
+ else
345
389
  @backslash_quote_character =
346
390
  @backslash_character + @escaped_quote_character
347
391
  end
@@ -389,9 +433,18 @@ class CSV
389
433
  if @quote_character
390
434
  @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
391
435
  end
436
+ @need_robust_parsing = true
392
437
  end
393
438
  end
394
439
 
440
+ begin
441
+ StringScanner.new("x").scan("x")
442
+ rescue TypeError
443
+ @@string_scanner_scan_accept_string = false
444
+ else
445
+ @@string_scanner_scan_accept_string = true
446
+ end
447
+
395
448
  def prepare_separators
396
449
  @column_separator = @options[:column_separator].to_s.encode(@encoding)
397
450
  @row_separator =
@@ -399,14 +452,19 @@ class CSV
399
452
 
400
453
  @escaped_column_separator = Regexp.escape(@column_separator)
401
454
  @escaped_first_column_separator = Regexp.escape(@column_separator[0])
402
- @column_end = Regexp.new(@escaped_column_separator)
403
455
  if @column_separator.size > 1
456
+ @column_end = Regexp.new(@escaped_column_separator)
404
457
  @column_ends = @column_separator.each_char.collect do |char|
405
458
  Regexp.new(Regexp.escape(char))
406
459
  end
407
460
  @first_column_separators = Regexp.new(@escaped_first_column_separator +
408
461
  "+".encode(@encoding))
409
462
  else
463
+ if @@string_scanner_scan_accept_string
464
+ @column_end = @column_separator
465
+ else
466
+ @column_end = Regexp.new(@escaped_column_separator)
467
+ end
410
468
  @column_ends = nil
411
469
  @first_column_separators = nil
412
470
  end
@@ -421,6 +479,8 @@ class CSV
421
479
  @row_ends = nil
422
480
  end
423
481
 
482
+ @cr = "\r".encode(@encoding)
483
+ @lf = "\n".encode(@encoding)
424
484
  @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
425
485
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
426
486
  end
@@ -436,19 +496,18 @@ class CSV
436
496
  @quoted_value = Regexp.new("[^".encode(@encoding) +
437
497
  no_quoted_values +
438
498
  "]+".encode(@encoding))
499
+ end
500
+ if @escaped_strip
501
+ @split_column_separator = Regexp.new(@escaped_strip +
502
+ "*".encode(@encoding) +
503
+ @escaped_column_separator +
504
+ @escaped_strip +
505
+ "*".encode(@encoding))
439
506
  else
440
- if @escaped_strip
441
- @split_column_separator = Regexp.new(@escaped_strip +
442
- "*".encode(@encoding) +
443
- @escaped_column_separator +
444
- @escaped_strip +
445
- "*".encode(@encoding))
507
+ if @column_separator == " ".encode(@encoding)
508
+ @split_column_separator = Regexp.new(@escaped_column_separator)
446
509
  else
447
- if @column_separator == " ".encode(@encoding)
448
- @split_column_separator = @column_end
449
- else
450
- @split_column_separator = @column_separator
451
- end
510
+ @split_column_separator = @column_separator
452
511
  end
453
512
  end
454
513
  end
@@ -691,14 +750,10 @@ class CSV
691
750
  end
692
751
 
693
752
  def parse_no_quote(&block)
694
- if @scanner.respond_to?(:string)
695
- scanner = @scanner.string
696
- else
697
- scanner = @scanner
698
- end
699
- scanner.each_line(@row_separator) do |line|
753
+ @scanner.each_line(@row_separator) do |line|
700
754
  next if @skip_lines and skip_line?(line)
701
- line.chomp!
755
+ original_line = line
756
+ line = line.delete_suffix(@row_separator)
702
757
 
703
758
  if line.empty?
704
759
  next if @skip_blanks
@@ -713,12 +768,67 @@ class CSV
713
768
  i += 1
714
769
  end
715
770
  end
716
- @last_line = line
771
+ @last_line = original_line
772
+ emit_row(row, &block)
773
+ end
774
+ end
775
+
776
+ def parse_quotable_loose(&block)
777
+ @scanner.keep_start
778
+ @scanner.each_line(@row_separator) do |line|
779
+ if @skip_lines and skip_line?(line)
780
+ @scanner.keep_drop
781
+ @scanner.keep_start
782
+ next
783
+ end
784
+ original_line = line
785
+ line = line.delete_suffix(@row_separator)
786
+
787
+ if line.empty?
788
+ if @skip_blanks
789
+ @scanner.keep_drop
790
+ @scanner.keep_start
791
+ next
792
+ end
793
+ row = []
794
+ elsif line.include?(@cr) or line.include?(@lf)
795
+ @scanner.keep_back
796
+ @need_robust_parsing = true
797
+ return parse_quotable_robust(&block)
798
+ else
799
+ row = line.split(@split_column_separator, -1)
800
+ n_columns = row.size
801
+ i = 0
802
+ while i < n_columns
803
+ column = row[i]
804
+ if column.empty?
805
+ row[i] = nil
806
+ else
807
+ n_quotes = column.count(@quote_character)
808
+ if n_quotes.zero?
809
+ # no quote
810
+ elsif n_quotes == 2 and
811
+ column.start_with?(@quote_character) and
812
+ column.end_with?(@quote_character)
813
+ row[i] = column[1..-2]
814
+ else
815
+ @scanner.keep_back
816
+ @need_robust_parsing = true
817
+ return parse_quotable_robust(&block)
818
+ end
819
+ end
820
+ i += 1
821
+ end
822
+ end
823
+ @scanner.keep_drop
824
+ @scanner.keep_start
825
+ @last_line = original_line
717
826
  emit_row(row, &block)
718
827
  end
828
+ @scanner.keep_drop
719
829
  end
720
830
 
721
- def parse_quotable(&block)
831
+ def parse_quotable_robust(&block)
722
832
  row = []
723
833
  skip_needless_lines
724
834
  start_row
@@ -2,5 +2,5 @@
2
2
 
3
3
  class CSV
4
4
  # The version of the installed library.
5
- VERSION = "3.0.6"
5
+ VERSION = "3.0.7"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.6
4
+ version: 3.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-29 00:00:00.000000000 Z
12
+ date: 2019-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -83,6 +83,7 @@ files:
83
83
  - lib/csv.rb
84
84
  - lib/csv/core_ext/array.rb
85
85
  - lib/csv/core_ext/string.rb
86
+ - lib/csv/delete_suffix.rb
86
87
  - lib/csv/fields_converter.rb
87
88
  - lib/csv/match_p.rb
88
89
  - lib/csv/parser.rb