csv 3.0.5 → 3.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 277fe614e5fc3f2f8ad100a70c6021aea0b8c9f989a46aad6618f9c3e81e5baf
4
- data.tar.gz: cb8aafb272a93788371dcc6992f61ea26a7a19844f3aabacb0e16fc9a3e54058
3
+ metadata.gz: 4a0aa4d6e8819de8616255194ac7fb7acf8669fb7a6f7580bd07e23e6ee798b0
4
+ data.tar.gz: 79044828a9b7232a6b671767541a6a860da3229e86e1101304b4a210490b867c
5
5
  SHA512:
6
- metadata.gz: 063bf8125079ad1c9f42f26990ab9a6a7a07100fecc2db71a792e5b89aa757decc571e1c39a70c8c8a2eb50475749bbc103985a0fa66a1e35762f7568d7a7f85
7
- data.tar.gz: 7daef31a3902cba8dbc139a1d828d15493def81c95011f4da6ee69369dbdfdd3fb97a20a7d9a9f0cd1a8129669e27403f6426644e3b2f48e7fef592c22967dc0
6
+ metadata.gz: f6626726217b3e967847f93a0a751b640f60d2aaca8b09e34a103b3b377c4623930e3264170fdd44b901a7e421c4953d0d05238e4c274de3e1eec9b97efad4b4
7
+ data.tar.gz: b722012b844524e1fd94ba7403dfa21aff33087789440f578b6f21f8b235e8f8cf8ef83dd1ebf98d37b64e958255120bd674c4b86715a2a37912bb0e9bf35fe3
data/NEWS.md CHANGED
@@ -1,5 +1,24 @@
1
1
  # News
2
2
 
3
+ ## 3.0.7 - 2019-04-08
4
+
5
+ ### Improvements
6
+
7
+ * Improve parse performance 1.5x by introducing loose parser.
8
+
9
+ ### Fixes
10
+
11
+ * Fix performance regression in 3.0.5.
12
+
13
+ * Fix a bug that `CSV#line` returns wrong value when you
14
+ use `quote_char: nil`.
15
+
16
+ ## 3.0.6 - 2019-03-30
17
+
18
+ ### Improvements
19
+
20
+ * `CSV.foreach`: Added support for `mode`.
21
+
3
22
  ## 3.0.5 - 2019-03-24
4
23
 
5
24
  ### Improvements
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This provides String#delete_suffix? for Ruby 2.4.
4
+ unless String.method_defined?(:delete_suffix)
5
+ class CSV
6
+ module DeleteSuffix
7
+ refine String do
8
+ def delete_suffix(suffix)
9
+ if end_with?(suffix)
10
+ self[0..(-(suffix.size + 1))]
11
+ else
12
+ self
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
data/lib/csv/parser.rb CHANGED
@@ -2,10 +2,12 @@
2
2
 
3
3
  require "strscan"
4
4
 
5
+ require_relative "delete_suffix"
5
6
  require_relative "match_p"
6
7
  require_relative "row"
7
8
  require_relative "table"
8
9
 
10
+ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
9
11
  using CSV::MatchP if CSV.const_defined?(:MatchP)
10
12
 
11
13
  class CSV
@@ -21,6 +23,15 @@ class CSV
21
23
  @keeps = []
22
24
  end
23
25
 
26
+ def each_line(row_separator)
27
+ position = pos
28
+ rest.each_line(row_separator) do |line|
29
+ position += line.bytesize
30
+ self.pos = position
31
+ yield(line)
32
+ end
33
+ end
34
+
24
35
  def keep_start
25
36
  @keeps.push(pos)
26
37
  end
@@ -52,21 +63,43 @@ class CSV
52
63
  def each_line(row_separator)
53
64
  buffer = nil
54
65
  input = @scanner.rest
55
- @scanner.terminate
56
- while input
66
+ position = @scanner.pos
67
+ offset = 0
68
+ n_row_separator_chars = row_separator.size
69
+ while true
57
70
  input.each_line(row_separator) do |line|
71
+ @scanner.pos += line.bytesize
58
72
  if buffer
59
- buffer << line
60
- line = buffer
61
- buffer = nil
73
+ if n_row_separator_chars == 2 and
74
+ buffer.end_with?(row_separator[0]) and
75
+ line.start_with?(row_separator[1])
76
+ buffer << line[0]
77
+ line = line[1..-1]
78
+ position += buffer.bytesize + offset
79
+ @scanner.pos = position
80
+ offset = 0
81
+ yield(buffer)
82
+ buffer = nil
83
+ next if line.empty?
84
+ else
85
+ buffer << line
86
+ line = buffer
87
+ buffer = nil
88
+ end
62
89
  end
63
90
  if line.end_with?(row_separator)
91
+ position += line.bytesize + offset
92
+ @scanner.pos = position
93
+ offset = 0
64
94
  yield(line)
65
95
  else
66
96
  buffer = line
67
97
  end
68
98
  end
69
- input = @inputs.shift
99
+ break unless read_chunk
100
+ input = @scanner.rest
101
+ position = @scanner.pos
102
+ offset = -buffer.bytesize if buffer
70
103
  end
71
104
  yield(buffer) if buffer
72
105
  end
@@ -125,6 +158,7 @@ class CSV
125
158
  else
126
159
  @scanner.pos = start
127
160
  end
161
+ read_chunk if @scanner.eos?
128
162
  end
129
163
 
130
164
  def keep_drop
@@ -263,8 +297,10 @@ class CSV
263
297
  @scanner ||= build_scanner
264
298
  if quote_character.nil?
265
299
  parse_no_quote(&block)
300
+ elsif @need_robust_parsing
301
+ parse_quotable_robust(&block)
266
302
  else
267
- parse_quotable(&block)
303
+ parse_quotable_loose(&block)
268
304
  end
269
305
  rescue InvalidEncoding
270
306
  if @scanner
@@ -285,8 +321,8 @@ class CSV
285
321
  private
286
322
  def prepare
287
323
  prepare_variable
288
- prepare_backslash
289
324
  prepare_quote_character
325
+ prepare_backslash
290
326
  prepare_skip_lines
291
327
  prepare_strip
292
328
  prepare_separators
@@ -298,6 +334,7 @@ class CSV
298
334
  end
299
335
 
300
336
  def prepare_variable
337
+ @need_robust_parsing = false
301
338
  @encoding = @options[:encoding]
302
339
  liberal_parsing = @options[:liberal_parsing]
303
340
  if liberal_parsing
@@ -310,6 +347,7 @@ class CSV
310
347
  @double_quote_outside_quote = false
311
348
  @backslash_quote = false
312
349
  end
350
+ @need_robust_parsing = true
313
351
  else
314
352
  @liberal_parsing = false
315
353
  @backslash_quote = false
@@ -321,27 +359,33 @@ class CSV
321
359
  @header_fields_converter = @options[:header_fields_converter]
322
360
  end
323
361
 
324
- def prepare_backslash
325
- @backslash_character = "\\".encode(@encoding)
326
-
327
- @escaped_backslash_character = Regexp.escape(@backslash_character)
328
- @escaped_backslash = Regexp.new(@escaped_backslash_character)
329
- end
330
-
331
362
  def prepare_quote_character
332
363
  @quote_character = @options[:quote_character]
333
364
  if @quote_character.nil?
334
365
  @escaped_quote_character = nil
335
366
  @escaped_quote = nil
336
- @backslash_quote_character = nil
337
367
  else
338
368
  @quote_character = @quote_character.to_s.encode(@encoding)
339
369
  if @quote_character.length != 1
340
370
  message = ":quote_char has to be nil or a single character String"
341
371
  raise ArgumentError, message
342
372
  end
373
+ @double_quote_character = @quote_character * 2
343
374
  @escaped_quote_character = Regexp.escape(@quote_character)
344
375
  @escaped_quote = Regexp.new(@escaped_quote_character)
376
+ end
377
+ end
378
+
379
+ def prepare_backslash
380
+ return unless @backslash_quote
381
+
382
+ @backslash_character = "\\".encode(@encoding)
383
+
384
+ @escaped_backslash_character = Regexp.escape(@backslash_character)
385
+ @escaped_backslash = Regexp.new(@escaped_backslash_character)
386
+ if @quote_character.nil?
387
+ @backslash_quote_character = nil
388
+ else
345
389
  @backslash_quote_character =
346
390
  @backslash_character + @escaped_quote_character
347
391
  end
@@ -389,9 +433,18 @@ class CSV
389
433
  if @quote_character
390
434
  @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
391
435
  end
436
+ @need_robust_parsing = true
392
437
  end
393
438
  end
394
439
 
440
+ begin
441
+ StringScanner.new("x").scan("x")
442
+ rescue TypeError
443
+ @@string_scanner_scan_accept_string = false
444
+ else
445
+ @@string_scanner_scan_accept_string = true
446
+ end
447
+
395
448
  def prepare_separators
396
449
  @column_separator = @options[:column_separator].to_s.encode(@encoding)
397
450
  @row_separator =
@@ -399,14 +452,19 @@ class CSV
399
452
 
400
453
  @escaped_column_separator = Regexp.escape(@column_separator)
401
454
  @escaped_first_column_separator = Regexp.escape(@column_separator[0])
402
- @column_end = Regexp.new(@escaped_column_separator)
403
455
  if @column_separator.size > 1
456
+ @column_end = Regexp.new(@escaped_column_separator)
404
457
  @column_ends = @column_separator.each_char.collect do |char|
405
458
  Regexp.new(Regexp.escape(char))
406
459
  end
407
460
  @first_column_separators = Regexp.new(@escaped_first_column_separator +
408
461
  "+".encode(@encoding))
409
462
  else
463
+ if @@string_scanner_scan_accept_string
464
+ @column_end = @column_separator
465
+ else
466
+ @column_end = Regexp.new(@escaped_column_separator)
467
+ end
410
468
  @column_ends = nil
411
469
  @first_column_separators = nil
412
470
  end
@@ -421,6 +479,8 @@ class CSV
421
479
  @row_ends = nil
422
480
  end
423
481
 
482
+ @cr = "\r".encode(@encoding)
483
+ @lf = "\n".encode(@encoding)
424
484
  @cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
425
485
  @not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
426
486
  end
@@ -436,19 +496,18 @@ class CSV
436
496
  @quoted_value = Regexp.new("[^".encode(@encoding) +
437
497
  no_quoted_values +
438
498
  "]+".encode(@encoding))
499
+ end
500
+ if @escaped_strip
501
+ @split_column_separator = Regexp.new(@escaped_strip +
502
+ "*".encode(@encoding) +
503
+ @escaped_column_separator +
504
+ @escaped_strip +
505
+ "*".encode(@encoding))
439
506
  else
440
- if @escaped_strip
441
- @split_column_separator = Regexp.new(@escaped_strip +
442
- "*".encode(@encoding) +
443
- @escaped_column_separator +
444
- @escaped_strip +
445
- "*".encode(@encoding))
507
+ if @column_separator == " ".encode(@encoding)
508
+ @split_column_separator = Regexp.new(@escaped_column_separator)
446
509
  else
447
- if @column_separator == " ".encode(@encoding)
448
- @split_column_separator = @column_end
449
- else
450
- @split_column_separator = @column_separator
451
- end
510
+ @split_column_separator = @column_separator
452
511
  end
453
512
  end
454
513
  end
@@ -691,21 +750,17 @@ class CSV
691
750
  end
692
751
 
693
752
  def parse_no_quote(&block)
694
- if @scanner.respond_to?(:string)
695
- scanner = @scanner.string
696
- else
697
- scanner = @scanner
698
- end
699
- scanner.each_line(@row_separator) do |value|
700
- next if @skip_lines and skip_line?(value)
701
- value.chomp!
753
+ @scanner.each_line(@row_separator) do |line|
754
+ next if @skip_lines and skip_line?(line)
755
+ original_line = line
756
+ line = line.delete_suffix(@row_separator)
702
757
 
703
- if value.empty?
758
+ if line.empty?
704
759
  next if @skip_blanks
705
760
  row = []
706
761
  else
707
- value = strip_value(value)
708
- row = value.split(@split_column_separator, -1)
762
+ line = strip_value(line)
763
+ row = line.split(@split_column_separator, -1)
709
764
  n_columns = row.size
710
765
  i = 0
711
766
  while i < n_columns
@@ -713,12 +768,67 @@ class CSV
713
768
  i += 1
714
769
  end
715
770
  end
716
- @last_line = value
771
+ @last_line = original_line
772
+ emit_row(row, &block)
773
+ end
774
+ end
775
+
776
+ def parse_quotable_loose(&block)
777
+ @scanner.keep_start
778
+ @scanner.each_line(@row_separator) do |line|
779
+ if @skip_lines and skip_line?(line)
780
+ @scanner.keep_drop
781
+ @scanner.keep_start
782
+ next
783
+ end
784
+ original_line = line
785
+ line = line.delete_suffix(@row_separator)
786
+
787
+ if line.empty?
788
+ if @skip_blanks
789
+ @scanner.keep_drop
790
+ @scanner.keep_start
791
+ next
792
+ end
793
+ row = []
794
+ elsif line.include?(@cr) or line.include?(@lf)
795
+ @scanner.keep_back
796
+ @need_robust_parsing = true
797
+ return parse_quotable_robust(&block)
798
+ else
799
+ row = line.split(@split_column_separator, -1)
800
+ n_columns = row.size
801
+ i = 0
802
+ while i < n_columns
803
+ column = row[i]
804
+ if column.empty?
805
+ row[i] = nil
806
+ else
807
+ n_quotes = column.count(@quote_character)
808
+ if n_quotes.zero?
809
+ # no quote
810
+ elsif n_quotes == 2 and
811
+ column.start_with?(@quote_character) and
812
+ column.end_with?(@quote_character)
813
+ row[i] = column[1..-2]
814
+ else
815
+ @scanner.keep_back
816
+ @need_robust_parsing = true
817
+ return parse_quotable_robust(&block)
818
+ end
819
+ end
820
+ i += 1
821
+ end
822
+ end
823
+ @scanner.keep_drop
824
+ @scanner.keep_start
825
+ @last_line = original_line
717
826
  emit_row(row, &block)
718
827
  end
828
+ @scanner.keep_drop
719
829
  end
720
830
 
721
- def parse_quotable(&block)
831
+ def parse_quotable_robust(&block)
722
832
  row = []
723
833
  skip_needless_lines
724
834
  start_row
data/lib/csv/version.rb CHANGED
@@ -2,5 +2,5 @@
2
2
 
3
3
  class CSV
4
4
  # The version of the installed library.
5
- VERSION = "3.0.5"
5
+ VERSION = "3.0.7"
6
6
  end
data/lib/csv.rb CHANGED
@@ -504,9 +504,9 @@ class CSV
504
504
  # <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
505
505
  # but transcode it to UTF-8 before CSV parses it.
506
506
  #
507
- def self.foreach(path, **options, &block)
508
- return to_enum(__method__, path, options) unless block_given?
509
- open(path, options) do |csv|
507
+ def self.foreach(path, mode="r", **options, &block)
508
+ return to_enum(__method__, path, mode, options) unless block_given?
509
+ open(path, mode, options) do |csv|
510
510
  csv.each(&block)
511
511
  end
512
512
  end
@@ -1232,16 +1232,8 @@ class CSV
1232
1232
  #
1233
1233
  # The data source must be open for reading.
1234
1234
  #
1235
- def each
1236
- return to_enum(__method__) unless block_given?
1237
- enumerator = parser_enumerator
1238
- begin
1239
- while true
1240
- yield enumerator.next
1241
- end
1242
- rescue StopIteration
1243
- end
1244
- self
1235
+ def each(&block)
1236
+ parser_enumerator.each(&block)
1245
1237
  end
1246
1238
 
1247
1239
  #
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Edward Gray II
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-03-24 00:00:00.000000000 Z
12
+ date: 2019-04-08 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -83,6 +83,7 @@ files:
83
83
  - lib/csv.rb
84
84
  - lib/csv/core_ext/array.rb
85
85
  - lib/csv/core_ext/string.rb
86
+ - lib/csv/delete_suffix.rb
86
87
  - lib/csv/fields_converter.rb
87
88
  - lib/csv/match_p.rb
88
89
  - lib/csv/parser.rb
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
110
111
  version: '0'
111
112
  requirements: []
112
113
  rubyforge_project:
113
- rubygems_version: 2.7.6
114
+ rubygems_version: 2.7.6.2
114
115
  signing_key:
115
116
  specification_version: 4
116
117
  summary: CSV Reading and Writing