csv 3.0.6 → 3.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +13 -0
- data/lib/csv.rb +2 -10
- data/lib/csv/delete_suffix.rb +18 -0
- data/lib/csv/parser.rb +147 -37
- data/lib/csv/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a0aa4d6e8819de8616255194ac7fb7acf8669fb7a6f7580bd07e23e6ee798b0
|
4
|
+
data.tar.gz: 79044828a9b7232a6b671767541a6a860da3229e86e1101304b4a210490b867c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6626726217b3e967847f93a0a751b640f60d2aaca8b09e34a103b3b377c4623930e3264170fdd44b901a7e421c4953d0d05238e4c274de3e1eec9b97efad4b4
|
7
|
+
data.tar.gz: b722012b844524e1fd94ba7403dfa21aff33087789440f578b6f21f8b235e8f8cf8ef83dd1ebf98d37b64e958255120bd674c4b86715a2a37912bb0e9bf35fe3
|
data/NEWS.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.0.7 - 2019-04-08
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improve parse performance 1.5x by introducing loose parser.
|
8
|
+
|
9
|
+
### Fixes
|
10
|
+
|
11
|
+
* Fix performance regression in 3.0.5.
|
12
|
+
|
13
|
+
* Fix a bug that `CSV#line` returns wrong value when you
|
14
|
+
use `quote_char: nil`.
|
15
|
+
|
3
16
|
## 3.0.6 - 2019-03-30
|
4
17
|
|
5
18
|
### Improvements
|
data/lib/csv.rb
CHANGED
@@ -1232,16 +1232,8 @@ class CSV
|
|
1232
1232
|
#
|
1233
1233
|
# The data source must be open for reading.
|
1234
1234
|
#
|
1235
|
-
def each
|
1236
|
-
|
1237
|
-
enumerator = parser_enumerator
|
1238
|
-
begin
|
1239
|
-
while true
|
1240
|
-
yield enumerator.next
|
1241
|
-
end
|
1242
|
-
rescue StopIteration
|
1243
|
-
end
|
1244
|
-
self
|
1235
|
+
def each(&block)
|
1236
|
+
parser_enumerator.each(&block)
|
1245
1237
|
end
|
1246
1238
|
|
1247
1239
|
#
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This provides String#delete_suffix? for Ruby 2.4.
|
4
|
+
unless String.method_defined?(:delete_suffix)
|
5
|
+
class CSV
|
6
|
+
module DeleteSuffix
|
7
|
+
refine String do
|
8
|
+
def delete_suffix(suffix)
|
9
|
+
if end_with?(suffix)
|
10
|
+
self[0..(-(suffix.size + 1))]
|
11
|
+
else
|
12
|
+
self
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/csv/parser.rb
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
|
+
require_relative "delete_suffix"
|
5
6
|
require_relative "match_p"
|
6
7
|
require_relative "row"
|
7
8
|
require_relative "table"
|
8
9
|
|
10
|
+
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
9
11
|
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
10
12
|
|
11
13
|
class CSV
|
@@ -21,6 +23,15 @@ class CSV
|
|
21
23
|
@keeps = []
|
22
24
|
end
|
23
25
|
|
26
|
+
def each_line(row_separator)
|
27
|
+
position = pos
|
28
|
+
rest.each_line(row_separator) do |line|
|
29
|
+
position += line.bytesize
|
30
|
+
self.pos = position
|
31
|
+
yield(line)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
24
35
|
def keep_start
|
25
36
|
@keeps.push(pos)
|
26
37
|
end
|
@@ -52,21 +63,43 @@ class CSV
|
|
52
63
|
def each_line(row_separator)
|
53
64
|
buffer = nil
|
54
65
|
input = @scanner.rest
|
55
|
-
@scanner.
|
56
|
-
|
66
|
+
position = @scanner.pos
|
67
|
+
offset = 0
|
68
|
+
n_row_separator_chars = row_separator.size
|
69
|
+
while true
|
57
70
|
input.each_line(row_separator) do |line|
|
71
|
+
@scanner.pos += line.bytesize
|
58
72
|
if buffer
|
59
|
-
|
60
|
-
|
61
|
-
|
73
|
+
if n_row_separator_chars == 2 and
|
74
|
+
buffer.end_with?(row_separator[0]) and
|
75
|
+
line.start_with?(row_separator[1])
|
76
|
+
buffer << line[0]
|
77
|
+
line = line[1..-1]
|
78
|
+
position += buffer.bytesize + offset
|
79
|
+
@scanner.pos = position
|
80
|
+
offset = 0
|
81
|
+
yield(buffer)
|
82
|
+
buffer = nil
|
83
|
+
next if line.empty?
|
84
|
+
else
|
85
|
+
buffer << line
|
86
|
+
line = buffer
|
87
|
+
buffer = nil
|
88
|
+
end
|
62
89
|
end
|
63
90
|
if line.end_with?(row_separator)
|
91
|
+
position += line.bytesize + offset
|
92
|
+
@scanner.pos = position
|
93
|
+
offset = 0
|
64
94
|
yield(line)
|
65
95
|
else
|
66
96
|
buffer = line
|
67
97
|
end
|
68
98
|
end
|
69
|
-
|
99
|
+
break unless read_chunk
|
100
|
+
input = @scanner.rest
|
101
|
+
position = @scanner.pos
|
102
|
+
offset = -buffer.bytesize if buffer
|
70
103
|
end
|
71
104
|
yield(buffer) if buffer
|
72
105
|
end
|
@@ -125,6 +158,7 @@ class CSV
|
|
125
158
|
else
|
126
159
|
@scanner.pos = start
|
127
160
|
end
|
161
|
+
read_chunk if @scanner.eos?
|
128
162
|
end
|
129
163
|
|
130
164
|
def keep_drop
|
@@ -263,8 +297,10 @@ class CSV
|
|
263
297
|
@scanner ||= build_scanner
|
264
298
|
if quote_character.nil?
|
265
299
|
parse_no_quote(&block)
|
300
|
+
elsif @need_robust_parsing
|
301
|
+
parse_quotable_robust(&block)
|
266
302
|
else
|
267
|
-
|
303
|
+
parse_quotable_loose(&block)
|
268
304
|
end
|
269
305
|
rescue InvalidEncoding
|
270
306
|
if @scanner
|
@@ -285,8 +321,8 @@ class CSV
|
|
285
321
|
private
|
286
322
|
def prepare
|
287
323
|
prepare_variable
|
288
|
-
prepare_backslash
|
289
324
|
prepare_quote_character
|
325
|
+
prepare_backslash
|
290
326
|
prepare_skip_lines
|
291
327
|
prepare_strip
|
292
328
|
prepare_separators
|
@@ -298,6 +334,7 @@ class CSV
|
|
298
334
|
end
|
299
335
|
|
300
336
|
def prepare_variable
|
337
|
+
@need_robust_parsing = false
|
301
338
|
@encoding = @options[:encoding]
|
302
339
|
liberal_parsing = @options[:liberal_parsing]
|
303
340
|
if liberal_parsing
|
@@ -310,6 +347,7 @@ class CSV
|
|
310
347
|
@double_quote_outside_quote = false
|
311
348
|
@backslash_quote = false
|
312
349
|
end
|
350
|
+
@need_robust_parsing = true
|
313
351
|
else
|
314
352
|
@liberal_parsing = false
|
315
353
|
@backslash_quote = false
|
@@ -321,27 +359,33 @@ class CSV
|
|
321
359
|
@header_fields_converter = @options[:header_fields_converter]
|
322
360
|
end
|
323
361
|
|
324
|
-
def prepare_backslash
|
325
|
-
@backslash_character = "\\".encode(@encoding)
|
326
|
-
|
327
|
-
@escaped_backslash_character = Regexp.escape(@backslash_character)
|
328
|
-
@escaped_backslash = Regexp.new(@escaped_backslash_character)
|
329
|
-
end
|
330
|
-
|
331
362
|
def prepare_quote_character
|
332
363
|
@quote_character = @options[:quote_character]
|
333
364
|
if @quote_character.nil?
|
334
365
|
@escaped_quote_character = nil
|
335
366
|
@escaped_quote = nil
|
336
|
-
@backslash_quote_character = nil
|
337
367
|
else
|
338
368
|
@quote_character = @quote_character.to_s.encode(@encoding)
|
339
369
|
if @quote_character.length != 1
|
340
370
|
message = ":quote_char has to be nil or a single character String"
|
341
371
|
raise ArgumentError, message
|
342
372
|
end
|
373
|
+
@double_quote_character = @quote_character * 2
|
343
374
|
@escaped_quote_character = Regexp.escape(@quote_character)
|
344
375
|
@escaped_quote = Regexp.new(@escaped_quote_character)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
def prepare_backslash
|
380
|
+
return unless @backslash_quote
|
381
|
+
|
382
|
+
@backslash_character = "\\".encode(@encoding)
|
383
|
+
|
384
|
+
@escaped_backslash_character = Regexp.escape(@backslash_character)
|
385
|
+
@escaped_backslash = Regexp.new(@escaped_backslash_character)
|
386
|
+
if @quote_character.nil?
|
387
|
+
@backslash_quote_character = nil
|
388
|
+
else
|
345
389
|
@backslash_quote_character =
|
346
390
|
@backslash_character + @escaped_quote_character
|
347
391
|
end
|
@@ -389,9 +433,18 @@ class CSV
|
|
389
433
|
if @quote_character
|
390
434
|
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
|
391
435
|
end
|
436
|
+
@need_robust_parsing = true
|
392
437
|
end
|
393
438
|
end
|
394
439
|
|
440
|
+
begin
|
441
|
+
StringScanner.new("x").scan("x")
|
442
|
+
rescue TypeError
|
443
|
+
@@string_scanner_scan_accept_string = false
|
444
|
+
else
|
445
|
+
@@string_scanner_scan_accept_string = true
|
446
|
+
end
|
447
|
+
|
395
448
|
def prepare_separators
|
396
449
|
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
397
450
|
@row_separator =
|
@@ -399,14 +452,19 @@ class CSV
|
|
399
452
|
|
400
453
|
@escaped_column_separator = Regexp.escape(@column_separator)
|
401
454
|
@escaped_first_column_separator = Regexp.escape(@column_separator[0])
|
402
|
-
@column_end = Regexp.new(@escaped_column_separator)
|
403
455
|
if @column_separator.size > 1
|
456
|
+
@column_end = Regexp.new(@escaped_column_separator)
|
404
457
|
@column_ends = @column_separator.each_char.collect do |char|
|
405
458
|
Regexp.new(Regexp.escape(char))
|
406
459
|
end
|
407
460
|
@first_column_separators = Regexp.new(@escaped_first_column_separator +
|
408
461
|
"+".encode(@encoding))
|
409
462
|
else
|
463
|
+
if @@string_scanner_scan_accept_string
|
464
|
+
@column_end = @column_separator
|
465
|
+
else
|
466
|
+
@column_end = Regexp.new(@escaped_column_separator)
|
467
|
+
end
|
410
468
|
@column_ends = nil
|
411
469
|
@first_column_separators = nil
|
412
470
|
end
|
@@ -421,6 +479,8 @@ class CSV
|
|
421
479
|
@row_ends = nil
|
422
480
|
end
|
423
481
|
|
482
|
+
@cr = "\r".encode(@encoding)
|
483
|
+
@lf = "\n".encode(@encoding)
|
424
484
|
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
|
425
485
|
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
426
486
|
end
|
@@ -436,19 +496,18 @@ class CSV
|
|
436
496
|
@quoted_value = Regexp.new("[^".encode(@encoding) +
|
437
497
|
no_quoted_values +
|
438
498
|
"]+".encode(@encoding))
|
499
|
+
end
|
500
|
+
if @escaped_strip
|
501
|
+
@split_column_separator = Regexp.new(@escaped_strip +
|
502
|
+
"*".encode(@encoding) +
|
503
|
+
@escaped_column_separator +
|
504
|
+
@escaped_strip +
|
505
|
+
"*".encode(@encoding))
|
439
506
|
else
|
440
|
-
if @
|
441
|
-
@split_column_separator = Regexp.new(@
|
442
|
-
"*".encode(@encoding) +
|
443
|
-
@escaped_column_separator +
|
444
|
-
@escaped_strip +
|
445
|
-
"*".encode(@encoding))
|
507
|
+
if @column_separator == " ".encode(@encoding)
|
508
|
+
@split_column_separator = Regexp.new(@escaped_column_separator)
|
446
509
|
else
|
447
|
-
|
448
|
-
@split_column_separator = @column_end
|
449
|
-
else
|
450
|
-
@split_column_separator = @column_separator
|
451
|
-
end
|
510
|
+
@split_column_separator = @column_separator
|
452
511
|
end
|
453
512
|
end
|
454
513
|
end
|
@@ -691,14 +750,10 @@ class CSV
|
|
691
750
|
end
|
692
751
|
|
693
752
|
def parse_no_quote(&block)
|
694
|
-
|
695
|
-
scanner = @scanner.string
|
696
|
-
else
|
697
|
-
scanner = @scanner
|
698
|
-
end
|
699
|
-
scanner.each_line(@row_separator) do |line|
|
753
|
+
@scanner.each_line(@row_separator) do |line|
|
700
754
|
next if @skip_lines and skip_line?(line)
|
701
|
-
line
|
755
|
+
original_line = line
|
756
|
+
line = line.delete_suffix(@row_separator)
|
702
757
|
|
703
758
|
if line.empty?
|
704
759
|
next if @skip_blanks
|
@@ -713,12 +768,67 @@ class CSV
|
|
713
768
|
i += 1
|
714
769
|
end
|
715
770
|
end
|
716
|
-
@last_line =
|
771
|
+
@last_line = original_line
|
772
|
+
emit_row(row, &block)
|
773
|
+
end
|
774
|
+
end
|
775
|
+
|
776
|
+
def parse_quotable_loose(&block)
|
777
|
+
@scanner.keep_start
|
778
|
+
@scanner.each_line(@row_separator) do |line|
|
779
|
+
if @skip_lines and skip_line?(line)
|
780
|
+
@scanner.keep_drop
|
781
|
+
@scanner.keep_start
|
782
|
+
next
|
783
|
+
end
|
784
|
+
original_line = line
|
785
|
+
line = line.delete_suffix(@row_separator)
|
786
|
+
|
787
|
+
if line.empty?
|
788
|
+
if @skip_blanks
|
789
|
+
@scanner.keep_drop
|
790
|
+
@scanner.keep_start
|
791
|
+
next
|
792
|
+
end
|
793
|
+
row = []
|
794
|
+
elsif line.include?(@cr) or line.include?(@lf)
|
795
|
+
@scanner.keep_back
|
796
|
+
@need_robust_parsing = true
|
797
|
+
return parse_quotable_robust(&block)
|
798
|
+
else
|
799
|
+
row = line.split(@split_column_separator, -1)
|
800
|
+
n_columns = row.size
|
801
|
+
i = 0
|
802
|
+
while i < n_columns
|
803
|
+
column = row[i]
|
804
|
+
if column.empty?
|
805
|
+
row[i] = nil
|
806
|
+
else
|
807
|
+
n_quotes = column.count(@quote_character)
|
808
|
+
if n_quotes.zero?
|
809
|
+
# no quote
|
810
|
+
elsif n_quotes == 2 and
|
811
|
+
column.start_with?(@quote_character) and
|
812
|
+
column.end_with?(@quote_character)
|
813
|
+
row[i] = column[1..-2]
|
814
|
+
else
|
815
|
+
@scanner.keep_back
|
816
|
+
@need_robust_parsing = true
|
817
|
+
return parse_quotable_robust(&block)
|
818
|
+
end
|
819
|
+
end
|
820
|
+
i += 1
|
821
|
+
end
|
822
|
+
end
|
823
|
+
@scanner.keep_drop
|
824
|
+
@scanner.keep_start
|
825
|
+
@last_line = original_line
|
717
826
|
emit_row(row, &block)
|
718
827
|
end
|
828
|
+
@scanner.keep_drop
|
719
829
|
end
|
720
830
|
|
721
|
-
def
|
831
|
+
def parse_quotable_robust(&block)
|
722
832
|
row = []
|
723
833
|
skip_needless_lines
|
724
834
|
start_row
|
data/lib/csv/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- lib/csv.rb
|
84
84
|
- lib/csv/core_ext/array.rb
|
85
85
|
- lib/csv/core_ext/string.rb
|
86
|
+
- lib/csv/delete_suffix.rb
|
86
87
|
- lib/csv/fields_converter.rb
|
87
88
|
- lib/csv/match_p.rb
|
88
89
|
- lib/csv/parser.rb
|