csv 3.0.5 → 3.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +19 -0
- data/lib/csv/delete_suffix.rb +18 -0
- data/lib/csv/parser.rb +151 -41
- data/lib/csv/version.rb +1 -1
- data/lib/csv.rb +5 -13
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4a0aa4d6e8819de8616255194ac7fb7acf8669fb7a6f7580bd07e23e6ee798b0
|
4
|
+
data.tar.gz: 79044828a9b7232a6b671767541a6a860da3229e86e1101304b4a210490b867c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f6626726217b3e967847f93a0a751b640f60d2aaca8b09e34a103b3b377c4623930e3264170fdd44b901a7e421c4953d0d05238e4c274de3e1eec9b97efad4b4
|
7
|
+
data.tar.gz: b722012b844524e1fd94ba7403dfa21aff33087789440f578b6f21f8b235e8f8cf8ef83dd1ebf98d37b64e958255120bd674c4b86715a2a37912bb0e9bf35fe3
|
data/NEWS.md
CHANGED
@@ -1,5 +1,24 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.0.7 - 2019-04-08
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Improve parse performance 1.5x by introducing loose parser.
|
8
|
+
|
9
|
+
### Fixes
|
10
|
+
|
11
|
+
* Fix performance regression in 3.0.5.
|
12
|
+
|
13
|
+
* Fix a bug that `CSV#line` returns wrong value when you
|
14
|
+
use `quote_char: nil`.
|
15
|
+
|
16
|
+
## 3.0.6 - 2019-03-30
|
17
|
+
|
18
|
+
### Improvements
|
19
|
+
|
20
|
+
* `CSV.foreach`: Added support for `mode`.
|
21
|
+
|
3
22
|
## 3.0.5 - 2019-03-24
|
4
23
|
|
5
24
|
### Improvements
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This provides String#delete_suffix? for Ruby 2.4.
|
4
|
+
unless String.method_defined?(:delete_suffix)
|
5
|
+
class CSV
|
6
|
+
module DeleteSuffix
|
7
|
+
refine String do
|
8
|
+
def delete_suffix(suffix)
|
9
|
+
if end_with?(suffix)
|
10
|
+
self[0..(-(suffix.size + 1))]
|
11
|
+
else
|
12
|
+
self
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
data/lib/csv/parser.rb
CHANGED
@@ -2,10 +2,12 @@
|
|
2
2
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
|
+
require_relative "delete_suffix"
|
5
6
|
require_relative "match_p"
|
6
7
|
require_relative "row"
|
7
8
|
require_relative "table"
|
8
9
|
|
10
|
+
using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
9
11
|
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
10
12
|
|
11
13
|
class CSV
|
@@ -21,6 +23,15 @@ class CSV
|
|
21
23
|
@keeps = []
|
22
24
|
end
|
23
25
|
|
26
|
+
def each_line(row_separator)
|
27
|
+
position = pos
|
28
|
+
rest.each_line(row_separator) do |line|
|
29
|
+
position += line.bytesize
|
30
|
+
self.pos = position
|
31
|
+
yield(line)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
24
35
|
def keep_start
|
25
36
|
@keeps.push(pos)
|
26
37
|
end
|
@@ -52,21 +63,43 @@ class CSV
|
|
52
63
|
def each_line(row_separator)
|
53
64
|
buffer = nil
|
54
65
|
input = @scanner.rest
|
55
|
-
@scanner.
|
56
|
-
|
66
|
+
position = @scanner.pos
|
67
|
+
offset = 0
|
68
|
+
n_row_separator_chars = row_separator.size
|
69
|
+
while true
|
57
70
|
input.each_line(row_separator) do |line|
|
71
|
+
@scanner.pos += line.bytesize
|
58
72
|
if buffer
|
59
|
-
|
60
|
-
|
61
|
-
|
73
|
+
if n_row_separator_chars == 2 and
|
74
|
+
buffer.end_with?(row_separator[0]) and
|
75
|
+
line.start_with?(row_separator[1])
|
76
|
+
buffer << line[0]
|
77
|
+
line = line[1..-1]
|
78
|
+
position += buffer.bytesize + offset
|
79
|
+
@scanner.pos = position
|
80
|
+
offset = 0
|
81
|
+
yield(buffer)
|
82
|
+
buffer = nil
|
83
|
+
next if line.empty?
|
84
|
+
else
|
85
|
+
buffer << line
|
86
|
+
line = buffer
|
87
|
+
buffer = nil
|
88
|
+
end
|
62
89
|
end
|
63
90
|
if line.end_with?(row_separator)
|
91
|
+
position += line.bytesize + offset
|
92
|
+
@scanner.pos = position
|
93
|
+
offset = 0
|
64
94
|
yield(line)
|
65
95
|
else
|
66
96
|
buffer = line
|
67
97
|
end
|
68
98
|
end
|
69
|
-
|
99
|
+
break unless read_chunk
|
100
|
+
input = @scanner.rest
|
101
|
+
position = @scanner.pos
|
102
|
+
offset = -buffer.bytesize if buffer
|
70
103
|
end
|
71
104
|
yield(buffer) if buffer
|
72
105
|
end
|
@@ -125,6 +158,7 @@ class CSV
|
|
125
158
|
else
|
126
159
|
@scanner.pos = start
|
127
160
|
end
|
161
|
+
read_chunk if @scanner.eos?
|
128
162
|
end
|
129
163
|
|
130
164
|
def keep_drop
|
@@ -263,8 +297,10 @@ class CSV
|
|
263
297
|
@scanner ||= build_scanner
|
264
298
|
if quote_character.nil?
|
265
299
|
parse_no_quote(&block)
|
300
|
+
elsif @need_robust_parsing
|
301
|
+
parse_quotable_robust(&block)
|
266
302
|
else
|
267
|
-
|
303
|
+
parse_quotable_loose(&block)
|
268
304
|
end
|
269
305
|
rescue InvalidEncoding
|
270
306
|
if @scanner
|
@@ -285,8 +321,8 @@ class CSV
|
|
285
321
|
private
|
286
322
|
def prepare
|
287
323
|
prepare_variable
|
288
|
-
prepare_backslash
|
289
324
|
prepare_quote_character
|
325
|
+
prepare_backslash
|
290
326
|
prepare_skip_lines
|
291
327
|
prepare_strip
|
292
328
|
prepare_separators
|
@@ -298,6 +334,7 @@ class CSV
|
|
298
334
|
end
|
299
335
|
|
300
336
|
def prepare_variable
|
337
|
+
@need_robust_parsing = false
|
301
338
|
@encoding = @options[:encoding]
|
302
339
|
liberal_parsing = @options[:liberal_parsing]
|
303
340
|
if liberal_parsing
|
@@ -310,6 +347,7 @@ class CSV
|
|
310
347
|
@double_quote_outside_quote = false
|
311
348
|
@backslash_quote = false
|
312
349
|
end
|
350
|
+
@need_robust_parsing = true
|
313
351
|
else
|
314
352
|
@liberal_parsing = false
|
315
353
|
@backslash_quote = false
|
@@ -321,27 +359,33 @@ class CSV
|
|
321
359
|
@header_fields_converter = @options[:header_fields_converter]
|
322
360
|
end
|
323
361
|
|
324
|
-
def prepare_backslash
|
325
|
-
@backslash_character = "\\".encode(@encoding)
|
326
|
-
|
327
|
-
@escaped_backslash_character = Regexp.escape(@backslash_character)
|
328
|
-
@escaped_backslash = Regexp.new(@escaped_backslash_character)
|
329
|
-
end
|
330
|
-
|
331
362
|
def prepare_quote_character
|
332
363
|
@quote_character = @options[:quote_character]
|
333
364
|
if @quote_character.nil?
|
334
365
|
@escaped_quote_character = nil
|
335
366
|
@escaped_quote = nil
|
336
|
-
@backslash_quote_character = nil
|
337
367
|
else
|
338
368
|
@quote_character = @quote_character.to_s.encode(@encoding)
|
339
369
|
if @quote_character.length != 1
|
340
370
|
message = ":quote_char has to be nil or a single character String"
|
341
371
|
raise ArgumentError, message
|
342
372
|
end
|
373
|
+
@double_quote_character = @quote_character * 2
|
343
374
|
@escaped_quote_character = Regexp.escape(@quote_character)
|
344
375
|
@escaped_quote = Regexp.new(@escaped_quote_character)
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
def prepare_backslash
|
380
|
+
return unless @backslash_quote
|
381
|
+
|
382
|
+
@backslash_character = "\\".encode(@encoding)
|
383
|
+
|
384
|
+
@escaped_backslash_character = Regexp.escape(@backslash_character)
|
385
|
+
@escaped_backslash = Regexp.new(@escaped_backslash_character)
|
386
|
+
if @quote_character.nil?
|
387
|
+
@backslash_quote_character = nil
|
388
|
+
else
|
345
389
|
@backslash_quote_character =
|
346
390
|
@backslash_character + @escaped_quote_character
|
347
391
|
end
|
@@ -389,9 +433,18 @@ class CSV
|
|
389
433
|
if @quote_character
|
390
434
|
@strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
|
391
435
|
end
|
436
|
+
@need_robust_parsing = true
|
392
437
|
end
|
393
438
|
end
|
394
439
|
|
440
|
+
begin
|
441
|
+
StringScanner.new("x").scan("x")
|
442
|
+
rescue TypeError
|
443
|
+
@@string_scanner_scan_accept_string = false
|
444
|
+
else
|
445
|
+
@@string_scanner_scan_accept_string = true
|
446
|
+
end
|
447
|
+
|
395
448
|
def prepare_separators
|
396
449
|
@column_separator = @options[:column_separator].to_s.encode(@encoding)
|
397
450
|
@row_separator =
|
@@ -399,14 +452,19 @@ class CSV
|
|
399
452
|
|
400
453
|
@escaped_column_separator = Regexp.escape(@column_separator)
|
401
454
|
@escaped_first_column_separator = Regexp.escape(@column_separator[0])
|
402
|
-
@column_end = Regexp.new(@escaped_column_separator)
|
403
455
|
if @column_separator.size > 1
|
456
|
+
@column_end = Regexp.new(@escaped_column_separator)
|
404
457
|
@column_ends = @column_separator.each_char.collect do |char|
|
405
458
|
Regexp.new(Regexp.escape(char))
|
406
459
|
end
|
407
460
|
@first_column_separators = Regexp.new(@escaped_first_column_separator +
|
408
461
|
"+".encode(@encoding))
|
409
462
|
else
|
463
|
+
if @@string_scanner_scan_accept_string
|
464
|
+
@column_end = @column_separator
|
465
|
+
else
|
466
|
+
@column_end = Regexp.new(@escaped_column_separator)
|
467
|
+
end
|
410
468
|
@column_ends = nil
|
411
469
|
@first_column_separators = nil
|
412
470
|
end
|
@@ -421,6 +479,8 @@ class CSV
|
|
421
479
|
@row_ends = nil
|
422
480
|
end
|
423
481
|
|
482
|
+
@cr = "\r".encode(@encoding)
|
483
|
+
@lf = "\n".encode(@encoding)
|
424
484
|
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
|
425
485
|
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
426
486
|
end
|
@@ -436,19 +496,18 @@ class CSV
|
|
436
496
|
@quoted_value = Regexp.new("[^".encode(@encoding) +
|
437
497
|
no_quoted_values +
|
438
498
|
"]+".encode(@encoding))
|
499
|
+
end
|
500
|
+
if @escaped_strip
|
501
|
+
@split_column_separator = Regexp.new(@escaped_strip +
|
502
|
+
"*".encode(@encoding) +
|
503
|
+
@escaped_column_separator +
|
504
|
+
@escaped_strip +
|
505
|
+
"*".encode(@encoding))
|
439
506
|
else
|
440
|
-
if @
|
441
|
-
@split_column_separator = Regexp.new(@
|
442
|
-
"*".encode(@encoding) +
|
443
|
-
@escaped_column_separator +
|
444
|
-
@escaped_strip +
|
445
|
-
"*".encode(@encoding))
|
507
|
+
if @column_separator == " ".encode(@encoding)
|
508
|
+
@split_column_separator = Regexp.new(@escaped_column_separator)
|
446
509
|
else
|
447
|
-
|
448
|
-
@split_column_separator = @column_end
|
449
|
-
else
|
450
|
-
@split_column_separator = @column_separator
|
451
|
-
end
|
510
|
+
@split_column_separator = @column_separator
|
452
511
|
end
|
453
512
|
end
|
454
513
|
end
|
@@ -691,21 +750,17 @@ class CSV
|
|
691
750
|
end
|
692
751
|
|
693
752
|
def parse_no_quote(&block)
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
end
|
699
|
-
scanner.each_line(@row_separator) do |value|
|
700
|
-
next if @skip_lines and skip_line?(value)
|
701
|
-
value.chomp!
|
753
|
+
@scanner.each_line(@row_separator) do |line|
|
754
|
+
next if @skip_lines and skip_line?(line)
|
755
|
+
original_line = line
|
756
|
+
line = line.delete_suffix(@row_separator)
|
702
757
|
|
703
|
-
if
|
758
|
+
if line.empty?
|
704
759
|
next if @skip_blanks
|
705
760
|
row = []
|
706
761
|
else
|
707
|
-
|
708
|
-
row =
|
762
|
+
line = strip_value(line)
|
763
|
+
row = line.split(@split_column_separator, -1)
|
709
764
|
n_columns = row.size
|
710
765
|
i = 0
|
711
766
|
while i < n_columns
|
@@ -713,12 +768,67 @@ class CSV
|
|
713
768
|
i += 1
|
714
769
|
end
|
715
770
|
end
|
716
|
-
@last_line =
|
771
|
+
@last_line = original_line
|
772
|
+
emit_row(row, &block)
|
773
|
+
end
|
774
|
+
end
|
775
|
+
|
776
|
+
def parse_quotable_loose(&block)
|
777
|
+
@scanner.keep_start
|
778
|
+
@scanner.each_line(@row_separator) do |line|
|
779
|
+
if @skip_lines and skip_line?(line)
|
780
|
+
@scanner.keep_drop
|
781
|
+
@scanner.keep_start
|
782
|
+
next
|
783
|
+
end
|
784
|
+
original_line = line
|
785
|
+
line = line.delete_suffix(@row_separator)
|
786
|
+
|
787
|
+
if line.empty?
|
788
|
+
if @skip_blanks
|
789
|
+
@scanner.keep_drop
|
790
|
+
@scanner.keep_start
|
791
|
+
next
|
792
|
+
end
|
793
|
+
row = []
|
794
|
+
elsif line.include?(@cr) or line.include?(@lf)
|
795
|
+
@scanner.keep_back
|
796
|
+
@need_robust_parsing = true
|
797
|
+
return parse_quotable_robust(&block)
|
798
|
+
else
|
799
|
+
row = line.split(@split_column_separator, -1)
|
800
|
+
n_columns = row.size
|
801
|
+
i = 0
|
802
|
+
while i < n_columns
|
803
|
+
column = row[i]
|
804
|
+
if column.empty?
|
805
|
+
row[i] = nil
|
806
|
+
else
|
807
|
+
n_quotes = column.count(@quote_character)
|
808
|
+
if n_quotes.zero?
|
809
|
+
# no quote
|
810
|
+
elsif n_quotes == 2 and
|
811
|
+
column.start_with?(@quote_character) and
|
812
|
+
column.end_with?(@quote_character)
|
813
|
+
row[i] = column[1..-2]
|
814
|
+
else
|
815
|
+
@scanner.keep_back
|
816
|
+
@need_robust_parsing = true
|
817
|
+
return parse_quotable_robust(&block)
|
818
|
+
end
|
819
|
+
end
|
820
|
+
i += 1
|
821
|
+
end
|
822
|
+
end
|
823
|
+
@scanner.keep_drop
|
824
|
+
@scanner.keep_start
|
825
|
+
@last_line = original_line
|
717
826
|
emit_row(row, &block)
|
718
827
|
end
|
828
|
+
@scanner.keep_drop
|
719
829
|
end
|
720
830
|
|
721
|
-
def
|
831
|
+
def parse_quotable_robust(&block)
|
722
832
|
row = []
|
723
833
|
skip_needless_lines
|
724
834
|
start_row
|
data/lib/csv/version.rb
CHANGED
data/lib/csv.rb
CHANGED
@@ -504,9 +504,9 @@ class CSV
|
|
504
504
|
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
|
505
505
|
# but transcode it to UTF-8 before CSV parses it.
|
506
506
|
#
|
507
|
-
def self.foreach(path, **options, &block)
|
508
|
-
return to_enum(__method__, path, options) unless block_given?
|
509
|
-
open(path, options) do |csv|
|
507
|
+
def self.foreach(path, mode="r", **options, &block)
|
508
|
+
return to_enum(__method__, path, mode, options) unless block_given?
|
509
|
+
open(path, mode, options) do |csv|
|
510
510
|
csv.each(&block)
|
511
511
|
end
|
512
512
|
end
|
@@ -1232,16 +1232,8 @@ class CSV
|
|
1232
1232
|
#
|
1233
1233
|
# The data source must be open for reading.
|
1234
1234
|
#
|
1235
|
-
def each
|
1236
|
-
|
1237
|
-
enumerator = parser_enumerator
|
1238
|
-
begin
|
1239
|
-
while true
|
1240
|
-
yield enumerator.next
|
1241
|
-
end
|
1242
|
-
rescue StopIteration
|
1243
|
-
end
|
1244
|
-
self
|
1235
|
+
def each(&block)
|
1236
|
+
parser_enumerator.each(&block)
|
1245
1237
|
end
|
1246
1238
|
|
1247
1239
|
#
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2019-
|
12
|
+
date: 2019-04-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -83,6 +83,7 @@ files:
|
|
83
83
|
- lib/csv.rb
|
84
84
|
- lib/csv/core_ext/array.rb
|
85
85
|
- lib/csv/core_ext/string.rb
|
86
|
+
- lib/csv/delete_suffix.rb
|
86
87
|
- lib/csv/fields_converter.rb
|
87
88
|
- lib/csv/match_p.rb
|
88
89
|
- lib/csv/parser.rb
|
@@ -110,7 +111,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
110
111
|
version: '0'
|
111
112
|
requirements: []
|
112
113
|
rubyforge_project:
|
113
|
-
rubygems_version: 2.7.6
|
114
|
+
rubygems_version: 2.7.6.2
|
114
115
|
signing_key:
|
115
116
|
specification_version: 4
|
116
117
|
summary: CSV Reading and Writing
|