csv 3.1.7 → 3.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +81 -0
- data/README.md +5 -3
- data/doc/csv/options/common/col_sep.rdoc +1 -7
- data/doc/csv/options/common/row_sep.rdoc +0 -9
- data/doc/csv/options/generating/write_converters.rdoc +0 -8
- data/doc/csv/recipes/filtering.rdoc +158 -0
- data/doc/csv/recipes/generating.rdoc +298 -0
- data/doc/csv/recipes/parsing.rdoc +545 -0
- data/doc/csv/recipes/recipes.rdoc +6 -0
- data/lib/csv/fields_converter.rb +6 -2
- data/lib/csv/input_record_separator.rb +31 -0
- data/lib/csv/parser.rb +13 -10
- data/lib/csv/row.rb +499 -132
- data/lib/csv/table.rb +489 -66
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +2 -1
- data/lib/csv.rb +344 -169
- metadata +16 -6
data/lib/csv/parser.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
require "strscan"
|
4
4
|
|
5
5
|
require_relative "delete_suffix"
|
6
|
+
require_relative "input_record_separator"
|
6
7
|
require_relative "match_p"
|
7
8
|
require_relative "row"
|
8
9
|
require_relative "table"
|
@@ -479,9 +480,9 @@ class CSV
|
|
479
480
|
begin
|
480
481
|
StringScanner.new("x").scan("x")
|
481
482
|
rescue TypeError
|
482
|
-
|
483
|
+
STRING_SCANNER_SCAN_ACCEPT_STRING = false
|
483
484
|
else
|
484
|
-
|
485
|
+
STRING_SCANNER_SCAN_ACCEPT_STRING = true
|
485
486
|
end
|
486
487
|
|
487
488
|
def prepare_separators
|
@@ -505,7 +506,7 @@ class CSV
|
|
505
506
|
@first_column_separators = Regexp.new(@escaped_first_column_separator +
|
506
507
|
"+".encode(@encoding))
|
507
508
|
else
|
508
|
-
if
|
509
|
+
if STRING_SCANNER_SCAN_ACCEPT_STRING
|
509
510
|
@column_end = @column_separator
|
510
511
|
else
|
511
512
|
@column_end = Regexp.new(@escaped_column_separator)
|
@@ -526,7 +527,7 @@ class CSV
|
|
526
527
|
|
527
528
|
@cr = "\r".encode(@encoding)
|
528
529
|
@lf = "\n".encode(@encoding)
|
529
|
-
@
|
530
|
+
@line_end = Regexp.new("\r\n|\n|\r".encode(@encoding))
|
530
531
|
@not_line_end = Regexp.new("[^\r\n]+".encode(@encoding))
|
531
532
|
end
|
532
533
|
|
@@ -605,7 +606,7 @@ class CSV
|
|
605
606
|
# do nothing: ensure will set default
|
606
607
|
end
|
607
608
|
end
|
608
|
-
separator =
|
609
|
+
separator = InputRecordSeparator.value if separator == :auto
|
609
610
|
end
|
610
611
|
separator.to_s.encode(@encoding)
|
611
612
|
end
|
@@ -724,6 +725,8 @@ class CSV
|
|
724
725
|
end
|
725
726
|
end
|
726
727
|
|
728
|
+
SCANNER_TEST_CHUNK_SIZE =
|
729
|
+
Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10)
|
727
730
|
def build_scanner
|
728
731
|
inputs = @samples.collect do |sample|
|
729
732
|
UnoptimizedStringIO.new(sample)
|
@@ -733,10 +736,9 @@ class CSV
|
|
733
736
|
else
|
734
737
|
inputs << @input
|
735
738
|
end
|
736
|
-
chunk_size = ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"
|
737
739
|
InputsScanner.new(inputs,
|
738
740
|
@encoding,
|
739
|
-
chunk_size:
|
741
|
+
chunk_size: SCANNER_TEST_CHUNK_SIZE)
|
740
742
|
end
|
741
743
|
else
|
742
744
|
def build_scanner
|
@@ -785,6 +787,7 @@ class CSV
|
|
785
787
|
end
|
786
788
|
|
787
789
|
def skip_line?(line)
|
790
|
+
line = line.delete_suffix(@row_separator)
|
788
791
|
case @skip_lines
|
789
792
|
when String
|
790
793
|
line.include?(@skip_lines)
|
@@ -913,7 +916,7 @@ class CSV
|
|
913
916
|
message = "Any value after quoted field isn't allowed"
|
914
917
|
raise MalformedCSVError.new(message, @lineno)
|
915
918
|
elsif @unquoted_column_value and
|
916
|
-
(new_line = @scanner.scan(@
|
919
|
+
(new_line = @scanner.scan(@line_end))
|
917
920
|
ignore_broken_line
|
918
921
|
message = "Unquoted fields do not allow new line " +
|
919
922
|
"<#{new_line.inspect}>"
|
@@ -922,7 +925,7 @@ class CSV
|
|
922
925
|
ignore_broken_line
|
923
926
|
message = "Illegal quoting"
|
924
927
|
raise MalformedCSVError.new(message, @lineno)
|
925
|
-
elsif (new_line = @scanner.scan(@
|
928
|
+
elsif (new_line = @scanner.scan(@line_end))
|
926
929
|
ignore_broken_line
|
927
930
|
message = "New line must be <#{@row_separator.inspect}> " +
|
928
931
|
"not <#{new_line.inspect}>"
|
@@ -1088,7 +1091,7 @@ class CSV
|
|
1088
1091
|
|
1089
1092
|
def ignore_broken_line
|
1090
1093
|
@scanner.scan_all(@not_line_end)
|
1091
|
-
@scanner.scan_all(@
|
1094
|
+
@scanner.scan_all(@line_end)
|
1092
1095
|
@lineno += 1
|
1093
1096
|
end
|
1094
1097
|
|