csv 3.1.7 → 3.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +81 -0
- data/README.md +5 -3
- data/doc/csv/options/common/col_sep.rdoc +1 -7
- data/doc/csv/options/common/row_sep.rdoc +0 -9
- data/doc/csv/options/generating/write_converters.rdoc +0 -8
- data/doc/csv/recipes/filtering.rdoc +158 -0
- data/doc/csv/recipes/generating.rdoc +298 -0
- data/doc/csv/recipes/parsing.rdoc +545 -0
- data/doc/csv/recipes/recipes.rdoc +6 -0
- data/lib/csv/fields_converter.rb +6 -2
- data/lib/csv/input_record_separator.rb +31 -0
- data/lib/csv/parser.rb +13 -10
- data/lib/csv/row.rb +499 -132
- data/lib/csv/table.rb +489 -66
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +2 -1
- data/lib/csv.rb +344 -169
- metadata +16 -6
data/lib/csv/parser.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
5
|
require_relative "delete_suffix"
|
6
|
+
require_relative "input_record_separator"
|
6
7
|
require_relative "match_p"
|
7
8
|
require_relative "row"
|
8
9
|
require_relative "table"
|
@@ -479,9 +480,9 @@ class CSV
|
|
479
480
|
begin
|
480
481
|
StringScanner.new("x").scan("x")
|
481
482
|
rescue TypeError
|
482
|
-
|
483
|
+
STRING_SCANNER_SCAN_ACCEPT_STRING = false
|
483
484
|
else
|
484
|
-
|
485
|
+
STRING_SCANNER_SCAN_ACCEPT_STRING = true
|
485
486
|
end
|
486
487
|
|
487
488
|
def prepare_separators
|
@@ -505,7 +506,7 @@ class CSV
|
|
505
506
|
@first_column_separators = Regexp.new(@escaped_first_column_separator +
|
506
507
|
"+".encode(@encoding))
|
507
508
|
else
|
508
|
-
if
|
509
|
+
if STRING_SCANNER_SCAN_ACCEPT_STRING
|
509
510
|
@column_end = @column_separator
|
510
511
|
else
|
511
512
|
@column_end = Regexp.new(@escaped_column_separator)
|
@@ -526,7 +527,7 @@ class CSV
|
|
526
527
|
|
527
528
|
@cr = "\r".encode(@encoding)
|
528
529
|
@lf = "\n".encode(@encoding)
|
529
|
-
@
|
530
|
+
@line_end = Regexp.new("\r\n|\n|\r".encode(@encoding))
|
530
531
|
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
531
532
|
end
|
532
533
|
|
@@ -605,7 +606,7 @@ class CSV
|
|
605
606
|
# do nothing: ensure will set default
|
606
607
|
end
|
607
608
|
end
|
608
|
-
separator =
|
609
|
+
separator = InputRecordSeparator.value if separator == :auto
|
609
610
|
end
|
610
611
|
separator.to_s.encode(@encoding)
|
611
612
|
end
|
@@ -724,6 +725,8 @@ class CSV
|
|
724
725
|
end
|
725
726
|
end
|
726
727
|
|
728
|
+
SCANNER_TEST_CHUNK_SIZE =
|
729
|
+
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
727
730
|
def build_scanner
|
728
731
|
inputs = @samples.collect do |sample|
|
729
732
|
UnoptimizedStringIO.new(sample)
|
@@ -733,10 +736,9 @@ class CSV
|
|
733
736
|
else
|
734
737
|
inputs << @input
|
735
738
|
end
|
736
|
-
chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
|
737
739
|
InputsScanner.new(inputs,
|
738
740
|
@encoding,
|
739
|
-
chunk_size:
|
741
|
+
chunk_size: SCANNER_TEST_CHUNK_SIZE)
|
740
742
|
end
|
741
743
|
else
|
742
744
|
def build_scanner
|
@@ -785,6 +787,7 @@ class CSV
|
|
785
787
|
end
|
786
788
|
|
787
789
|
def skip_line?(line)
|
790
|
+
line = line.delete_suffix(@row_separator)
|
788
791
|
case @skip_lines
|
789
792
|
when String
|
790
793
|
line.include?(@skip_lines)
|
@@ -913,7 +916,7 @@ class CSV
|
|
913
916
|
message = "Any value after quoted field isn't allowed"
|
914
917
|
raise MalformedCSVError.new(message, @lineno)
|
915
918
|
elsif @unquoted_column_value and
|
916
|
-
(new_line = @scanner.scan(@
|
919
|
+
(new_line = @scanner.scan(@line_end))
|
917
920
|
ignore_broken_line
|
918
921
|
message = "Unquoted fields do not allow new line " +
|
919
922
|
"<#{new_line.inspect}>"
|
@@ -922,7 +925,7 @@ class CSV
|
|
922
925
|
ignore_broken_line
|
923
926
|
message = "Illegal quoting"
|
924
927
|
raise MalformedCSVError.new(message, @lineno)
|
925
|
-
elsif (new_line = @scanner.scan(@
|
928
|
+
elsif (new_line = @scanner.scan(@line_end))
|
926
929
|
ignore_broken_line
|
927
930
|
message = "New line must be <#{@row_separator.inspect}> " +
|
928
931
|
"not <#{new_line.inspect}>"
|
@@ -1088,7 +1091,7 @@ class CSV
|
|
1088
1091
|
|
1089
1092
|
def ignore_broken_line
|
1090
1093
|
@scanner.scan_all(@not_line_end)
|
1091
|
-
@scanner.scan_all(@
|
1094
|
+
@scanner.scan_all(@line_end)
|
1092
1095
|
@lineno += 1
|
1093
1096
|
end
|
1094
1097
|
|