csv 3.0.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/{news.md → NEWS.md} +29 -0
- data/lib/csv.rb +222 -614
- data/lib/csv/fields_converter.rb +78 -0
- data/lib/csv/match_p.rb +20 -0
- data/lib/csv/parser.rb +713 -0
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +144 -0
- metadata +8 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 25367f06751ab916228ddcffcbc857bc13ca1e7fcc65a908fcfc7c974e5473f6
|
4
|
+
data.tar.gz: 92ff4c8f3b96219b9d74fc849311afd0cb97f3d124c77250878402fc006ce2ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 25584f3c7ccf6ffa990dfa5a58cb4564092f51c3145fccb0b54252210096d8d322044c72647be36d7151362fe02e76c3ab103f99615ac42e9f1621ba6f2e9aa4
|
7
|
+
data.tar.gz: 2e0bb6973a005ae822b08bf6c13ff1681bf7c599e2c966f8bfd19ecffc6083fb09a03b74bbb47019a44bda729c0d173bed864815bffa670d19684092da0f128d
|
data/{news.md → NEWS.md}
RENAMED
@@ -1,5 +1,34 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.0.2 - 2018-12-23
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Changed to use strscan in parser.
|
8
|
+
[GitHub#52][Patch by 284km]
|
9
|
+
|
10
|
+
* Improves CSV write performance.
|
11
|
+
3.0.2 will be about 2 times faster than 3.0.1.
|
12
|
+
|
13
|
+
* Improves CSV parse performance for complex case.
|
14
|
+
3.0.2 will be about 2 times faster than 3.0.1.
|
15
|
+
|
16
|
+
### Fixes
|
17
|
+
|
18
|
+
* Fixed a parse error bug for new line only input with `headers` option.
|
19
|
+
[GitHub#53][Reported by Chris Beer]
|
20
|
+
|
21
|
+
* Fixed some typos in document.
|
22
|
+
[GitHub#54][Patch by Victor Shepelev]
|
23
|
+
|
24
|
+
### Thanks
|
25
|
+
|
26
|
+
* 284km
|
27
|
+
|
28
|
+
* Chris Beer
|
29
|
+
|
30
|
+
* Victor Shepelev
|
31
|
+
|
3
32
|
## 3.0.1 - 2018-12-07
|
4
33
|
|
5
34
|
### Improvements
|
data/lib/csv.rb
CHANGED
@@ -93,36 +93,22 @@ require "forwardable"
|
|
93
93
|
require "English"
|
94
94
|
require "date"
|
95
95
|
require "stringio"
|
96
|
-
require_relative "csv/table"
|
97
|
-
require_relative "csv/row"
|
98
96
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
self =~ pattern
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
refine Regexp do
|
110
|
-
def match?(string)
|
111
|
-
self =~ string
|
112
|
-
end
|
113
|
-
end
|
114
|
-
end
|
115
|
-
end
|
97
|
+
require_relative "csv/fields_converter"
|
98
|
+
require_relative "csv/match_p"
|
99
|
+
require_relative "csv/parser"
|
100
|
+
require_relative "csv/row"
|
101
|
+
require_relative "csv/table"
|
102
|
+
require_relative "csv/writer"
|
116
103
|
|
117
|
-
|
118
|
-
end
|
104
|
+
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
119
105
|
|
120
106
|
#
|
121
107
|
# This class provides a complete interface to CSV files and data. It offers
|
122
108
|
# tools to enable you to read and write to and from Strings or IO objects, as
|
123
109
|
# needed.
|
124
110
|
#
|
125
|
-
# The most generic interface of
|
111
|
+
# The most generic interface of the library is:
|
126
112
|
#
|
127
113
|
# csv = CSV.new(string_or_io, **options)
|
128
114
|
#
|
@@ -204,18 +190,18 @@ end
|
|
204
190
|
# # Headers are part of data
|
205
191
|
# data = CSV.parse(<<~ROWS, headers: true)
|
206
192
|
# Name,Department,Salary
|
207
|
-
# Bob,
|
193
|
+
# Bob,Engineering,1000
|
208
194
|
# Jane,Sales,2000
|
209
195
|
# John,Management,5000
|
210
196
|
# ROWS
|
211
197
|
#
|
212
198
|
# data.class #=> CSV::Table
|
213
|
-
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"
|
214
|
-
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"
|
199
|
+
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engineering" "Salary":"1000">
|
200
|
+
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engineering", "Salary"=>"1000"}
|
215
201
|
#
|
216
202
|
# # Headers provided by developer
|
217
203
|
# data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
|
218
|
-
# data.first #=> #<CSV::Row name:"Bob" department:"
|
204
|
+
# data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000">
|
219
205
|
#
|
220
206
|
# === Typed data reading
|
221
207
|
#
|
@@ -902,76 +888,104 @@ class CSV
|
|
902
888
|
# Options cannot be overridden in the instance methods for performance reasons,
|
903
889
|
# so be sure to set what you want here.
|
904
890
|
#
|
905
|
-
def initialize(data,
|
906
|
-
|
907
|
-
|
908
|
-
|
891
|
+
def initialize(data,
|
892
|
+
col_sep: ",",
|
893
|
+
row_sep: :auto,
|
894
|
+
quote_char: '"',
|
895
|
+
field_size_limit: nil,
|
896
|
+
converters: nil,
|
897
|
+
unconverted_fields: nil,
|
898
|
+
headers: false,
|
899
|
+
return_headers: false,
|
900
|
+
write_headers: nil,
|
901
|
+
header_converters: nil,
|
902
|
+
skip_blanks: false,
|
903
|
+
force_quotes: false,
|
904
|
+
skip_lines: nil,
|
905
|
+
liberal_parsing: false,
|
906
|
+
internal_encoding: nil,
|
907
|
+
external_encoding: nil,
|
908
|
+
encoding: nil,
|
909
909
|
nil_value: nil,
|
910
910
|
empty_value: "")
|
911
911
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
912
912
|
|
913
913
|
# create the IO object we will read from
|
914
914
|
@io = data.is_a?(String) ? StringIO.new(data) : data
|
915
|
-
@prefix_io = nil # cache for input data possibly read by init_separators
|
916
915
|
@encoding = determine_encoding(encoding, internal_encoding)
|
917
|
-
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
@
|
923
|
-
@
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
948
|
-
|
949
|
-
|
950
|
-
|
951
|
-
|
952
|
-
|
916
|
+
|
917
|
+
@base_fields_converter_options = {
|
918
|
+
nil_value: nil_value,
|
919
|
+
empty_value: empty_value,
|
920
|
+
}
|
921
|
+
@initial_converters = converters
|
922
|
+
@initial_header_converters = header_converters
|
923
|
+
|
924
|
+
@parser_options = {
|
925
|
+
column_separator: col_sep,
|
926
|
+
row_separator: row_sep,
|
927
|
+
quote_character: quote_char,
|
928
|
+
field_size_limit: field_size_limit,
|
929
|
+
unconverted_fields: unconverted_fields,
|
930
|
+
headers: headers,
|
931
|
+
return_headers: return_headers,
|
932
|
+
skip_blanks: skip_blanks,
|
933
|
+
skip_lines: skip_lines,
|
934
|
+
liberal_parsing: liberal_parsing,
|
935
|
+
encoding: @encoding,
|
936
|
+
nil_value: nil_value,
|
937
|
+
empty_value: empty_value,
|
938
|
+
}
|
939
|
+
@parser = nil
|
940
|
+
|
941
|
+
@writer_options = {
|
942
|
+
encoding: @encoding,
|
943
|
+
force_encoding: (not encoding.nil?),
|
944
|
+
force_quotes: force_quotes,
|
945
|
+
headers: headers,
|
946
|
+
write_headers: write_headers,
|
947
|
+
column_separator: col_sep,
|
948
|
+
row_separator: row_sep,
|
949
|
+
quote_character: quote_char,
|
950
|
+
}
|
951
|
+
|
952
|
+
@writer = nil
|
953
|
+
writer if @writer_options[:write_headers]
|
953
954
|
end
|
954
955
|
|
955
956
|
#
|
956
957
|
# The encoded <tt>:col_sep</tt> used in parsing and writing. See CSV::new
|
957
958
|
# for details.
|
958
959
|
#
|
959
|
-
|
960
|
+
def col_sep
|
961
|
+
parser.column_separator
|
962
|
+
end
|
963
|
+
|
960
964
|
#
|
961
965
|
# The encoded <tt>:row_sep</tt> used in parsing and writing. See CSV::new
|
962
966
|
# for details.
|
963
967
|
#
|
964
|
-
|
968
|
+
def row_sep
|
969
|
+
parser.row_separator
|
970
|
+
end
|
971
|
+
|
965
972
|
#
|
966
973
|
# The encoded <tt>:quote_char</tt> used in parsing and writing. See CSV::new
|
967
974
|
# for details.
|
968
975
|
#
|
969
|
-
|
976
|
+
def quote_char
|
977
|
+
parser.quote_character
|
978
|
+
end
|
979
|
+
|
970
980
|
# The limit for field size, if any. See CSV::new for details.
|
971
|
-
|
981
|
+
def field_size_limit
|
982
|
+
parser.field_size_limit
|
983
|
+
end
|
972
984
|
|
973
985
|
# The regex marking a line as a comment. See CSV::new for details
|
974
|
-
|
986
|
+
def skip_lines
|
987
|
+
parser.skip_lines
|
988
|
+
end
|
975
989
|
|
976
990
|
#
|
977
991
|
# Returns the current list of converters in effect. See CSV::new for details.
|
@@ -979,7 +993,7 @@ class CSV
|
|
979
993
|
# as is.
|
980
994
|
#
|
981
995
|
def converters
|
982
|
-
|
996
|
+
fields_converter.map do |converter|
|
983
997
|
name = Converters.rassoc(converter)
|
984
998
|
name ? name.first : converter
|
985
999
|
end
|
@@ -988,42 +1002,68 @@ class CSV
|
|
988
1002
|
# Returns +true+ if unconverted_fields() to parsed results. See CSV::new
|
989
1003
|
# for details.
|
990
1004
|
#
|
991
|
-
def unconverted_fields?
|
1005
|
+
def unconverted_fields?
|
1006
|
+
parser.unconverted_fields?
|
1007
|
+
end
|
1008
|
+
|
992
1009
|
#
|
993
1010
|
# Returns +nil+ if headers will not be used, +true+ if they will but have not
|
994
1011
|
# yet been read, or the actual headers after they have been read. See
|
995
1012
|
# CSV::new for details.
|
996
1013
|
#
|
997
1014
|
def headers
|
998
|
-
|
1015
|
+
if @writer
|
1016
|
+
@writer.headers
|
1017
|
+
else
|
1018
|
+
parsed_headers = parser.headers
|
1019
|
+
return parsed_headers if parsed_headers
|
1020
|
+
raw_headers = @parser_options[:headers]
|
1021
|
+
raw_headers = nil if raw_headers == false
|
1022
|
+
raw_headers
|
1023
|
+
end
|
999
1024
|
end
|
1000
1025
|
#
|
1001
1026
|
# Returns +true+ if headers will be returned as a row of results.
|
1002
1027
|
# See CSV::new for details.
|
1003
1028
|
#
|
1004
|
-
def return_headers?
|
1029
|
+
def return_headers?
|
1030
|
+
parser.return_headers?
|
1031
|
+
end
|
1032
|
+
|
1005
1033
|
# Returns +true+ if headers are written in output. See CSV::new for details.
|
1006
|
-
def write_headers?
|
1034
|
+
def write_headers?
|
1035
|
+
@writer_options[:write_headers]
|
1036
|
+
end
|
1037
|
+
|
1007
1038
|
#
|
1008
1039
|
# Returns the current list of converters in effect for headers. See CSV::new
|
1009
1040
|
# for details. Built-in converters will be returned by name, while others
|
1010
1041
|
# will be returned as is.
|
1011
1042
|
#
|
1012
1043
|
def header_converters
|
1013
|
-
|
1044
|
+
header_fields_converter.map do |converter|
|
1014
1045
|
name = HeaderConverters.rassoc(converter)
|
1015
1046
|
name ? name.first : converter
|
1016
1047
|
end
|
1017
1048
|
end
|
1049
|
+
|
1018
1050
|
#
|
1019
1051
|
# Returns +true+ blank lines are skipped by the parser. See CSV::new
|
1020
1052
|
# for details.
|
1021
1053
|
#
|
1022
|
-
def skip_blanks?
|
1054
|
+
def skip_blanks?
|
1055
|
+
parser.skip_blanks?
|
1056
|
+
end
|
1057
|
+
|
1023
1058
|
# Returns +true+ if all output fields are quoted. See CSV::new for details.
|
1024
|
-
def force_quotes?
|
1059
|
+
def force_quotes?
|
1060
|
+
@writer_options[:force_quotes]
|
1061
|
+
end
|
1062
|
+
|
1025
1063
|
# Returns +true+ if illegal input is handled. See CSV::new for details.
|
1026
|
-
def liberal_parsing?
|
1064
|
+
def liberal_parsing?
|
1065
|
+
parser.liberal_parsing?
|
1066
|
+
end
|
1027
1067
|
|
1028
1068
|
#
|
1029
1069
|
# The Encoding CSV is parsing or writing in. This will be the Encoding you
|
@@ -1032,10 +1072,23 @@ class CSV
|
|
1032
1072
|
attr_reader :encoding
|
1033
1073
|
|
1034
1074
|
#
|
1035
|
-
# The line number of the last row read from this file.
|
1075
|
+
# The line number of the last row read from this file. Fields with nested
|
1036
1076
|
# line-end characters will not affect this count.
|
1037
1077
|
#
|
1038
|
-
|
1078
|
+
def lineno
|
1079
|
+
if @writer
|
1080
|
+
@writer.lineno
|
1081
|
+
else
|
1082
|
+
parser.lineno
|
1083
|
+
end
|
1084
|
+
end
|
1085
|
+
|
1086
|
+
#
|
1087
|
+
# The last row read from this file.
|
1088
|
+
#
|
1089
|
+
def line
|
1090
|
+
parser.line
|
1091
|
+
end
|
1039
1092
|
|
1040
1093
|
### IO and StringIO Delegation ###
|
1041
1094
|
|
@@ -1049,9 +1102,9 @@ class CSV
|
|
1049
1102
|
|
1050
1103
|
# Rewinds the underlying IO object and resets CSV's lineno() counter.
|
1051
1104
|
def rewind
|
1052
|
-
@
|
1053
|
-
@
|
1054
|
-
|
1105
|
+
@parser = nil
|
1106
|
+
@parser_enumerator = nil
|
1107
|
+
@writer.rewind if @writer
|
1055
1108
|
@io.rewind
|
1056
1109
|
end
|
1057
1110
|
|
@@ -1065,34 +1118,8 @@ class CSV
|
|
1065
1118
|
# The data source must be open for writing.
|
1066
1119
|
#
|
1067
1120
|
def <<(row)
|
1068
|
-
|
1069
|
-
|
1070
|
-
parse_headers # won't read data for Array or String
|
1071
|
-
end
|
1072
|
-
|
1073
|
-
# handle CSV::Row objects and Hashes
|
1074
|
-
row = case row
|
1075
|
-
when self.class::Row then row.fields
|
1076
|
-
when Hash then @headers.map { |header| row[header] }
|
1077
|
-
else row
|
1078
|
-
end
|
1079
|
-
|
1080
|
-
@headers = row if header_row?
|
1081
|
-
@lineno += 1
|
1082
|
-
|
1083
|
-
output = row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
|
1084
|
-
if @io.is_a?(StringIO) and
|
1085
|
-
output.encoding != (encoding = raw_encoding)
|
1086
|
-
if @force_encoding
|
1087
|
-
output = output.encode(encoding)
|
1088
|
-
elsif (compatible_encoding = Encoding.compatible?(@io.string, output))
|
1089
|
-
@io.set_encoding(compatible_encoding)
|
1090
|
-
@io.seek(0, IO::SEEK_END)
|
1091
|
-
end
|
1092
|
-
end
|
1093
|
-
@io << output
|
1094
|
-
|
1095
|
-
self # for chaining
|
1121
|
+
writer << row
|
1122
|
+
self
|
1096
1123
|
end
|
1097
1124
|
alias_method :add_row, :<<
|
1098
1125
|
alias_method :puts, :<<
|
@@ -1113,7 +1140,7 @@ class CSV
|
|
1113
1140
|
# converted field or the field itself.
|
1114
1141
|
#
|
1115
1142
|
def convert(name = nil, &converter)
|
1116
|
-
add_converter(
|
1143
|
+
fields_converter.add_converter(name, &converter)
|
1117
1144
|
end
|
1118
1145
|
|
1119
1146
|
#
|
@@ -1128,10 +1155,7 @@ class CSV
|
|
1128
1155
|
# effect.
|
1129
1156
|
#
|
1130
1157
|
def header_convert(name = nil, &converter)
|
1131
|
-
add_converter(
|
1132
|
-
self.class::HeaderConverters,
|
1133
|
-
name,
|
1134
|
-
&converter )
|
1158
|
+
header_fields_converter.add_converter(name, &converter)
|
1135
1159
|
end
|
1136
1160
|
|
1137
1161
|
include Enumerable
|
@@ -1143,14 +1167,8 @@ class CSV
|
|
1143
1167
|
#
|
1144
1168
|
# The data source must be open for reading.
|
1145
1169
|
#
|
1146
|
-
def each
|
1147
|
-
|
1148
|
-
while row = shift
|
1149
|
-
yield row
|
1150
|
-
end
|
1151
|
-
else
|
1152
|
-
to_enum
|
1153
|
-
end
|
1170
|
+
def each(&block)
|
1171
|
+
parser.parse(&block)
|
1154
1172
|
end
|
1155
1173
|
|
1156
1174
|
#
|
@@ -1160,8 +1178,9 @@ class CSV
|
|
1160
1178
|
#
|
1161
1179
|
def read
|
1162
1180
|
rows = to_a
|
1163
|
-
|
1164
|
-
|
1181
|
+
headers = parser.headers
|
1182
|
+
if headers
|
1183
|
+
Table.new(rows, headers: headers)
|
1165
1184
|
else
|
1166
1185
|
rows
|
1167
1186
|
end
|
@@ -1170,7 +1189,7 @@ class CSV
|
|
1170
1189
|
|
1171
1190
|
# Returns +true+ if the next row read will be a header row.
|
1172
1191
|
def header_row?
|
1173
|
-
|
1192
|
+
parser.header_row?
|
1174
1193
|
end
|
1175
1194
|
|
1176
1195
|
#
|
@@ -1181,177 +1200,11 @@ class CSV
|
|
1181
1200
|
# The data source must be open for reading.
|
1182
1201
|
#
|
1183
1202
|
def shift
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
# handle headers not based on document content
|
1190
|
-
if header_row? and @return_headers and
|
1191
|
-
[Array, String].include? @use_headers.class
|
1192
|
-
if @unconverted_fields
|
1193
|
-
return add_unconverted_fields(parse_headers, Array.new)
|
1194
|
-
else
|
1195
|
-
return parse_headers
|
1196
|
-
end
|
1197
|
-
end
|
1198
|
-
|
1199
|
-
#
|
1200
|
-
# it can take multiple calls to <tt>@io.gets()</tt> to get a full line,
|
1201
|
-
# because of \r and/or \n characters embedded in quoted fields
|
1202
|
-
#
|
1203
|
-
in_extended_col = false
|
1204
|
-
csv = Array.new
|
1205
|
-
|
1206
|
-
loop do
|
1207
|
-
# add another read to the line
|
1208
|
-
if @prefix_io
|
1209
|
-
parse = @prefix_io.gets(@row_sep)
|
1210
|
-
if @prefix_io.eof?
|
1211
|
-
parse << (@io.gets(@row_sep) || "") unless parse.end_with?(@row_sep)
|
1212
|
-
@prefix_io = nil # avoid having to test @prefix_io.eof? in main code path
|
1213
|
-
end
|
1214
|
-
else
|
1215
|
-
return nil unless parse = @io.gets(@row_sep)
|
1216
|
-
end
|
1217
|
-
|
1218
|
-
if in_extended_col
|
1219
|
-
@line.concat(parse)
|
1220
|
-
else
|
1221
|
-
@line = parse.clone
|
1222
|
-
end
|
1223
|
-
|
1224
|
-
begin
|
1225
|
-
parse.sub!(@parsers[:line_end], "")
|
1226
|
-
rescue ArgumentError
|
1227
|
-
unless parse.valid_encoding?
|
1228
|
-
message = "Invalid byte sequence in #{parse.encoding}"
|
1229
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1230
|
-
end
|
1231
|
-
raise
|
1232
|
-
end
|
1233
|
-
|
1234
|
-
if csv.empty?
|
1235
|
-
#
|
1236
|
-
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
|
1237
|
-
# CSV's <tt>[nil]</tt>
|
1238
|
-
#
|
1239
|
-
if parse.empty?
|
1240
|
-
@lineno += 1
|
1241
|
-
if @skip_blanks
|
1242
|
-
next
|
1243
|
-
elsif @unconverted_fields
|
1244
|
-
return add_unconverted_fields(Array.new, Array.new)
|
1245
|
-
elsif @use_headers
|
1246
|
-
return self.class::Row.new(@headers, Array.new)
|
1247
|
-
else
|
1248
|
-
return Array.new
|
1249
|
-
end
|
1250
|
-
end
|
1251
|
-
end
|
1252
|
-
|
1253
|
-
next if @skip_lines and @skip_lines.match parse
|
1254
|
-
|
1255
|
-
parts = parse.split(@col_sep_split_separator, -1)
|
1256
|
-
if parts.empty?
|
1257
|
-
if in_extended_col
|
1258
|
-
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
|
1259
|
-
else
|
1260
|
-
csv << nil
|
1261
|
-
end
|
1262
|
-
end
|
1263
|
-
|
1264
|
-
# This loop is the hot path of csv parsing. Some things may be non-dry
|
1265
|
-
# for a reason. Make sure to benchmark when refactoring.
|
1266
|
-
parts.each do |part|
|
1267
|
-
if in_extended_col
|
1268
|
-
# If we are continuing a previous column
|
1269
|
-
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
1270
|
-
# extended column ends
|
1271
|
-
csv.last << part[0..-2]
|
1272
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1273
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1274
|
-
lineno + 1)
|
1275
|
-
end
|
1276
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1277
|
-
in_extended_col = false
|
1278
|
-
else
|
1279
|
-
csv.last << part << @col_sep
|
1280
|
-
end
|
1281
|
-
elsif part.start_with?(@quote_char)
|
1282
|
-
# If we are starting a new quoted column
|
1283
|
-
if part.count(@quote_char) % 2 != 0
|
1284
|
-
# start an extended column
|
1285
|
-
csv << (part[1..-1] << @col_sep)
|
1286
|
-
in_extended_col = true
|
1287
|
-
elsif part.end_with?(@quote_char)
|
1288
|
-
# regular quoted column
|
1289
|
-
csv << part[1..-2]
|
1290
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1291
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1292
|
-
lineno + 1)
|
1293
|
-
end
|
1294
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1295
|
-
elsif @liberal_parsing
|
1296
|
-
csv << part
|
1297
|
-
else
|
1298
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1299
|
-
lineno + 1)
|
1300
|
-
end
|
1301
|
-
elsif part.match?(@parsers[:quote_or_nl])
|
1302
|
-
# Unquoted field with bad characters.
|
1303
|
-
if part.match?(@parsers[:nl_or_lf])
|
1304
|
-
message = "Unquoted fields do not allow \\r or \\n"
|
1305
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1306
|
-
else
|
1307
|
-
if @liberal_parsing
|
1308
|
-
csv << part
|
1309
|
-
else
|
1310
|
-
raise MalformedCSVError.new("Illegal quoting", lineno + 1)
|
1311
|
-
end
|
1312
|
-
end
|
1313
|
-
else
|
1314
|
-
# Regular ole unquoted field.
|
1315
|
-
csv << (part.empty? ? nil : part)
|
1316
|
-
end
|
1317
|
-
end
|
1318
|
-
|
1319
|
-
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
1320
|
-
# column.
|
1321
|
-
csv[-1][-1] = @row_sep if in_extended_col
|
1322
|
-
|
1323
|
-
if in_extended_col
|
1324
|
-
# if we're at eof?(), a quoted field wasn't closed...
|
1325
|
-
if @io.eof? and !@prefix_io
|
1326
|
-
raise MalformedCSVError.new("Unclosed quoted field",
|
1327
|
-
lineno + 1)
|
1328
|
-
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
1329
|
-
raise MalformedCSVError.new("Field size exceeded",
|
1330
|
-
lineno + 1)
|
1331
|
-
end
|
1332
|
-
# otherwise, we need to loop and pull some more data to complete the row
|
1333
|
-
else
|
1334
|
-
@lineno += 1
|
1335
|
-
|
1336
|
-
# save fields unconverted fields, if needed...
|
1337
|
-
unconverted = csv.dup if @unconverted_fields
|
1338
|
-
|
1339
|
-
if @use_headers
|
1340
|
-
# parse out header rows and handle CSV::Row conversions...
|
1341
|
-
csv = parse_headers(csv)
|
1342
|
-
else
|
1343
|
-
# convert fields, if needed...
|
1344
|
-
csv = convert_fields(csv)
|
1345
|
-
end
|
1346
|
-
|
1347
|
-
# inject unconverted fields and accessor, if requested...
|
1348
|
-
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
1349
|
-
add_unconverted_fields(csv, unconverted)
|
1350
|
-
end
|
1351
|
-
|
1352
|
-
# return the results
|
1353
|
-
break csv
|
1354
|
-
end
|
1203
|
+
@parser_enumerator ||= parser.parse
|
1204
|
+
begin
|
1205
|
+
@parser_enumerator.next
|
1206
|
+
rescue StopIteration
|
1207
|
+
nil
|
1355
1208
|
end
|
1356
1209
|
end
|
1357
1210
|
alias_method :gets, :shift
|
@@ -1376,15 +1229,19 @@ class CSV
|
|
1376
1229
|
# show encoding
|
1377
1230
|
str << " encoding:" << @encoding.name
|
1378
1231
|
# show other attributes
|
1379
|
-
|
1380
|
-
|
1381
|
-
if a = instance_variable_get("@#{attr_name}")
|
1232
|
+
["lineno", "col_sep", "row_sep", "quote_char"].each do |attr_name|
|
1233
|
+
if a = __send__(attr_name)
|
1382
1234
|
str << " " << attr_name << ":" << a.inspect
|
1383
1235
|
end
|
1384
1236
|
end
|
1385
|
-
|
1386
|
-
|
1237
|
+
["skip_blanks", "liberal_parsing"].each do |attr_name|
|
1238
|
+
if a = __send__("#{attr_name}?")
|
1239
|
+
str << " " << attr_name << ":" << a.inspect
|
1240
|
+
end
|
1387
1241
|
end
|
1242
|
+
_headers = headers
|
1243
|
+
_headers = headers
|
1244
|
+
str << " headers:" << _headers.inspect if _headers
|
1388
1245
|
str << ">"
|
1389
1246
|
begin
|
1390
1247
|
str.join('')
|
@@ -1400,7 +1257,7 @@ class CSV
|
|
1400
1257
|
|
1401
1258
|
def determine_encoding(encoding, internal_encoding)
|
1402
1259
|
# honor the IO encoding if we can, otherwise default to ASCII-8BIT
|
1403
|
-
io_encoding = raw_encoding
|
1260
|
+
io_encoding = raw_encoding
|
1404
1261
|
return io_encoding if io_encoding
|
1405
1262
|
|
1406
1263
|
return Encoding.find(internal_encoding) if internal_encoding
|
@@ -1413,210 +1270,17 @@ class CSV
|
|
1413
1270
|
Encoding.default_internal || Encoding.default_external
|
1414
1271
|
end
|
1415
1272
|
|
1416
|
-
|
1417
|
-
|
1418
|
-
|
1419
|
-
|
1420
|
-
# ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
|
1421
|
-
# +STDERR+ and any stream open for output only with a default
|
1422
|
-
# <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
1423
|
-
#
|
1424
|
-
# This method also establishes the quoting rules used for CSV output.
|
1425
|
-
#
|
1426
|
-
def init_separators(col_sep, row_sep, quote_char, force_quotes)
|
1427
|
-
# store the selected separators
|
1428
|
-
@col_sep = col_sep.to_s.encode(@encoding)
|
1429
|
-
if @col_sep == " "
|
1430
|
-
@col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
|
1431
|
-
else
|
1432
|
-
@col_sep_split_separator = @col_sep
|
1433
|
-
end
|
1434
|
-
@row_sep = row_sep # encode after resolving :auto
|
1435
|
-
@quote_char = quote_char.to_s.encode(@encoding)
|
1436
|
-
@double_quote_char = @quote_char * 2
|
1437
|
-
|
1438
|
-
if @quote_char.length != 1
|
1439
|
-
raise ArgumentError, ":quote_char has to be a single character String"
|
1440
|
-
end
|
1441
|
-
|
1442
|
-
#
|
1443
|
-
# automatically discover row separator when requested
|
1444
|
-
# (not fully encoding safe)
|
1445
|
-
#
|
1446
|
-
if @row_sep == :auto
|
1447
|
-
saved_prefix = [] # sample chunks to be reprocessed later
|
1448
|
-
begin
|
1449
|
-
while @row_sep == :auto && @io.respond_to?(:gets)
|
1450
|
-
#
|
1451
|
-
# if we run out of data, it's probably a single line
|
1452
|
-
# (ensure will set default value)
|
1453
|
-
#
|
1454
|
-
break unless sample = @io.gets(nil, 1024)
|
1455
|
-
|
1456
|
-
cr = encode_str("\r")
|
1457
|
-
lf = encode_str("\n")
|
1458
|
-
# extend sample if we're unsure of the line ending
|
1459
|
-
if sample.end_with?(cr)
|
1460
|
-
sample << (@io.gets(nil, 1) || "")
|
1461
|
-
end
|
1462
|
-
|
1463
|
-
saved_prefix << sample
|
1464
|
-
|
1465
|
-
# try to find a standard separator
|
1466
|
-
last_char = nil
|
1467
|
-
sample.each_char.each_cons(2) do |char, next_char|
|
1468
|
-
last_char = next_char
|
1469
|
-
case char
|
1470
|
-
when cr
|
1471
|
-
if next_char == lf
|
1472
|
-
@row_sep = encode_str("\r\n")
|
1473
|
-
else
|
1474
|
-
@row_sep = cr
|
1475
|
-
end
|
1476
|
-
break
|
1477
|
-
when lf
|
1478
|
-
@row_sep = lf
|
1479
|
-
break
|
1480
|
-
end
|
1481
|
-
end
|
1482
|
-
if @row_sep == :auto
|
1483
|
-
case last_char
|
1484
|
-
when cr
|
1485
|
-
@row_sep = cr
|
1486
|
-
when lf
|
1487
|
-
@row_sep = lf
|
1488
|
-
end
|
1489
|
-
end
|
1490
|
-
end
|
1491
|
-
rescue IOError
|
1492
|
-
# do nothing: ensure will set default
|
1493
|
-
ensure
|
1494
|
-
#
|
1495
|
-
# set default if we failed to detect
|
1496
|
-
# (stream not opened for reading or a single line of data)
|
1497
|
-
#
|
1498
|
-
@row_sep = $INPUT_RECORD_SEPARATOR if @row_sep == :auto
|
1499
|
-
|
1500
|
-
# save sampled input for later parsing (but only if there is some!)
|
1501
|
-
saved_prefix = saved_prefix.join('')
|
1502
|
-
@prefix_io = StringIO.new(saved_prefix) unless saved_prefix.empty?
|
1503
|
-
end
|
1504
|
-
end
|
1505
|
-
@row_sep = @row_sep.to_s.encode(@encoding)
|
1506
|
-
|
1507
|
-
# establish quoting rules
|
1508
|
-
@force_quotes = force_quotes
|
1509
|
-
do_quote = lambda do |field|
|
1510
|
-
field = String(field)
|
1511
|
-
encoded_quote = @quote_char.encode(field.encoding)
|
1512
|
-
encoded_quote + field.gsub(encoded_quote, encoded_quote * 2) + encoded_quote
|
1273
|
+
def normalize_converters(converters)
|
1274
|
+
converters ||= []
|
1275
|
+
unless converters.is_a?(Array)
|
1276
|
+
converters = [converters]
|
1513
1277
|
end
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
""
|
1521
|
-
else
|
1522
|
-
field = String(field) # Stringify fields
|
1523
|
-
# represent empty fields as empty quoted fields
|
1524
|
-
if field.empty? or
|
1525
|
-
field.count(quotable_chars).nonzero?
|
1526
|
-
do_quote.call(field)
|
1527
|
-
else
|
1528
|
-
field # unquoted field
|
1529
|
-
end
|
1530
|
-
end
|
1531
|
-
end
|
1532
|
-
end
|
1533
|
-
end
|
1534
|
-
|
1535
|
-
# Pre-compiles parsers and stores them by name for access during reads.
|
1536
|
-
def init_parsers(skip_blanks, field_size_limit, liberal_parsing)
|
1537
|
-
# store the parser behaviors
|
1538
|
-
@skip_blanks = skip_blanks
|
1539
|
-
@field_size_limit = field_size_limit
|
1540
|
-
@liberal_parsing = liberal_parsing
|
1541
|
-
|
1542
|
-
# prebuild Regexps for faster parsing
|
1543
|
-
esc_row_sep = escape_re(@row_sep)
|
1544
|
-
esc_quote = escape_re(@quote_char)
|
1545
|
-
@parsers = {
|
1546
|
-
# for detecting parse errors
|
1547
|
-
quote_or_nl: encode_re("[", esc_quote, "\r\n]"),
|
1548
|
-
nl_or_lf: encode_re("[\r\n]"),
|
1549
|
-
stray_quote: encode_re( "[^", esc_quote, "]", esc_quote,
|
1550
|
-
"[^", esc_quote, "]" ),
|
1551
|
-
# safer than chomp!()
|
1552
|
-
line_end: encode_re(esc_row_sep, "\\z"),
|
1553
|
-
# illegal unquoted characters
|
1554
|
-
return_newline: encode_str("\r\n")
|
1555
|
-
}
|
1556
|
-
end
|
1557
|
-
|
1558
|
-
#
|
1559
|
-
# Loads any converters requested during construction.
|
1560
|
-
#
|
1561
|
-
# If +field_name+ is set <tt>:converters</tt> (the default) field converters
|
1562
|
-
# are set. When +field_name+ is <tt>:header_converters</tt> header converters
|
1563
|
-
# are added instead.
|
1564
|
-
#
|
1565
|
-
# The <tt>:unconverted_fields</tt> option is also activated for
|
1566
|
-
# <tt>:converters</tt> calls, if requested.
|
1567
|
-
#
|
1568
|
-
def init_converters(converters, ivar_name, convert_method)
|
1569
|
-
converters = case converters
|
1570
|
-
when nil then []
|
1571
|
-
when Array then converters
|
1572
|
-
else [converters]
|
1573
|
-
end
|
1574
|
-
instance_variable_set(ivar_name, [])
|
1575
|
-
convert = method(convert_method)
|
1576
|
-
|
1577
|
-
# load converters
|
1578
|
-
converters.each do |converter|
|
1579
|
-
if converter.is_a? Proc # custom code block
|
1580
|
-
convert.call(&converter)
|
1581
|
-
else # by name
|
1582
|
-
convert.call(converter)
|
1583
|
-
end
|
1584
|
-
end
|
1585
|
-
end
|
1586
|
-
|
1587
|
-
# Stores the pattern of comments to skip from the provided options.
|
1588
|
-
#
|
1589
|
-
# The pattern must respond to +.match+, else ArgumentError is raised.
|
1590
|
-
# Strings are converted to a Regexp.
|
1591
|
-
#
|
1592
|
-
# See also CSV.new
|
1593
|
-
def init_comments(skip_lines)
|
1594
|
-
@skip_lines = skip_lines
|
1595
|
-
@skip_lines = Regexp.new(Regexp.escape(@skip_lines)) if @skip_lines.is_a? String
|
1596
|
-
if @skip_lines and not @skip_lines.respond_to?(:match)
|
1597
|
-
raise ArgumentError, ":skip_lines has to respond to matches"
|
1598
|
-
end
|
1599
|
-
end
|
1600
|
-
#
|
1601
|
-
# The actual work method for adding converters, used by both CSV.convert() and
|
1602
|
-
# CSV.header_convert().
|
1603
|
-
#
|
1604
|
-
# This method requires the +var_name+ of the instance variable to place the
|
1605
|
-
# converters in, the +const+ Hash to lookup named converters in, and the
|
1606
|
-
# normal parameters of the CSV.convert() and CSV.header_convert() methods.
|
1607
|
-
#
|
1608
|
-
def add_converter(var_name, const, name = nil, &converter)
|
1609
|
-
if name.nil? # custom converter
|
1610
|
-
instance_variable_get(var_name) << converter
|
1611
|
-
else # named converter
|
1612
|
-
combo = const[name]
|
1613
|
-
case combo
|
1614
|
-
when Array # combo converter
|
1615
|
-
combo.each do |converter_name|
|
1616
|
-
add_converter(var_name, const, converter_name)
|
1617
|
-
end
|
1618
|
-
else # individual named converter
|
1619
|
-
instance_variable_get(var_name) << combo
|
1278
|
+
converters.collect do |converter|
|
1279
|
+
case converter
|
1280
|
+
when Proc # custom code block
|
1281
|
+
[nil, converter]
|
1282
|
+
else # by name
|
1283
|
+
[converter, nil]
|
1620
1284
|
end
|
1621
1285
|
end
|
1622
1286
|
end
|
@@ -1630,129 +1294,73 @@ class CSV
|
|
1630
1294
|
#
|
1631
1295
|
def convert_fields(fields, headers = false)
|
1632
1296
|
if headers
|
1633
|
-
|
1297
|
+
header_fields_converter.convert(fields, nil, 0)
|
1634
1298
|
else
|
1635
|
-
|
1636
|
-
if !@use_headers and
|
1637
|
-
converters.empty? and
|
1638
|
-
@nil_value.nil? and
|
1639
|
-
@empty_value_is_empty_string
|
1640
|
-
return fields
|
1641
|
-
end
|
1299
|
+
fields_converter.convert(fields, @headers, lineno)
|
1642
1300
|
end
|
1301
|
+
end
|
1643
1302
|
|
1644
|
-
|
1645
|
-
|
1646
|
-
|
1647
|
-
|
1648
|
-
|
1649
|
-
|
1650
|
-
|
1651
|
-
|
1652
|
-
|
1653
|
-
|
1654
|
-
else # FieldInfo converter
|
1655
|
-
header = @use_headers && !headers ? @headers[index] : nil
|
1656
|
-
converter[field, FieldInfo.new(index, lineno, header)]
|
1657
|
-
end
|
1658
|
-
break unless field.is_a? String # short-circuit pipeline for speed
|
1659
|
-
end
|
1660
|
-
field # final state of each field, converted or original
|
1303
|
+
#
|
1304
|
+
# Returns the encoding of the internal IO object.
|
1305
|
+
#
|
1306
|
+
def raw_encoding
|
1307
|
+
if @io.respond_to? :internal_encoding
|
1308
|
+
@io.internal_encoding || @io.external_encoding
|
1309
|
+
elsif @io.respond_to? :encoding
|
1310
|
+
@io.encoding
|
1311
|
+
else
|
1312
|
+
nil
|
1661
1313
|
end
|
1662
1314
|
end
|
1663
1315
|
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1671
|
-
|
1672
|
-
|
1673
|
-
|
1674
|
-
|
1675
|
-
|
1676
|
-
@headers = case @use_headers # save headers
|
1677
|
-
# Array of headers
|
1678
|
-
when Array then @use_headers
|
1679
|
-
# CSV header String
|
1680
|
-
when String
|
1681
|
-
self.class.parse_line( @use_headers,
|
1682
|
-
col_sep: @col_sep,
|
1683
|
-
row_sep: @row_sep,
|
1684
|
-
quote_char: @quote_char )
|
1685
|
-
# first row is headers
|
1686
|
-
else row
|
1687
|
-
end
|
1688
|
-
|
1689
|
-
# prepare converted and unconverted copies
|
1690
|
-
row = @headers if row.nil?
|
1691
|
-
@headers = convert_fields(@headers, true)
|
1692
|
-
@headers.each { |h| h.freeze if h.is_a? String }
|
1693
|
-
|
1694
|
-
if @return_headers # return headers
|
1695
|
-
return self.class::Row.new(@headers, row, true)
|
1696
|
-
elsif not [Array, String].include? @use_headers.class # skip to field row
|
1697
|
-
return shift
|
1698
|
-
end
|
1316
|
+
def fields_converter
|
1317
|
+
@fields_converter ||= build_fields_converter
|
1318
|
+
end
|
1319
|
+
|
1320
|
+
def build_fields_converter
|
1321
|
+
specific_options = {
|
1322
|
+
builtin_converters: Converters,
|
1323
|
+
}
|
1324
|
+
options = @base_fields_converter_options.merge(specific_options)
|
1325
|
+
fields_converter = FieldsConverter.new(options)
|
1326
|
+
normalize_converters(@initial_converters).each do |name, converter|
|
1327
|
+
fields_converter.add_converter(name, &converter)
|
1699
1328
|
end
|
1329
|
+
fields_converter
|
1330
|
+
end
|
1700
1331
|
|
1701
|
-
|
1332
|
+
def header_fields_converter
|
1333
|
+
@header_fields_converter ||= build_header_fields_converter
|
1702
1334
|
end
|
1703
1335
|
|
1704
|
-
|
1705
|
-
|
1706
|
-
|
1707
|
-
|
1708
|
-
|
1709
|
-
|
1710
|
-
|
1711
|
-
|
1336
|
+
def build_header_fields_converter
|
1337
|
+
specific_options = {
|
1338
|
+
builtin_converters: HeaderConverters,
|
1339
|
+
accept_nil: true,
|
1340
|
+
}
|
1341
|
+
options = @base_fields_converter_options.merge(specific_options)
|
1342
|
+
fields_converter = FieldsConverter.new(options)
|
1343
|
+
normalize_converters(@initial_header_converters).each do |name, converter|
|
1344
|
+
fields_converter.add_converter(name, &converter)
|
1712
1345
|
end
|
1713
|
-
|
1714
|
-
row
|
1346
|
+
fields_converter
|
1715
1347
|
end
|
1716
1348
|
|
1717
|
-
|
1718
|
-
|
1719
|
-
# any characters that would change the meaning of a regular expression in the
|
1720
|
-
# encoding of +str+. Regular expression characters that cannot be transcoded
|
1721
|
-
# to the target encoding will be skipped and no escaping will be performed if
|
1722
|
-
# a backslash cannot be transcoded.
|
1723
|
-
#
|
1724
|
-
def escape_re(str)
|
1725
|
-
str.gsub(@re_chars) {|c| @re_esc + c}
|
1349
|
+
def parser
|
1350
|
+
@parser ||= Parser.new(@io, parser_options)
|
1726
1351
|
end
|
1727
1352
|
|
1728
|
-
|
1729
|
-
|
1730
|
-
|
1731
|
-
#
|
1732
|
-
def encode_re(*chunks)
|
1733
|
-
Regexp.new(encode_str(*chunks))
|
1353
|
+
def parser_options
|
1354
|
+
@parser_options.merge(fields_converter: fields_converter,
|
1355
|
+
header_fields_converter: header_fields_converter)
|
1734
1356
|
end
|
1735
1357
|
|
1736
|
-
|
1737
|
-
|
1738
|
-
# that encoding.
|
1739
|
-
#
|
1740
|
-
def encode_str(*chunks)
|
1741
|
-
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
|
1358
|
+
def writer
|
1359
|
+
@writer ||= Writer.new(@io, writer_options)
|
1742
1360
|
end
|
1743
1361
|
|
1744
|
-
|
1745
|
-
|
1746
|
-
# encoding cannot be determined.
|
1747
|
-
#
|
1748
|
-
def raw_encoding(default = Encoding::ASCII_8BIT)
|
1749
|
-
if @io.respond_to? :internal_encoding
|
1750
|
-
@io.internal_encoding || @io.external_encoding
|
1751
|
-
elsif @io.respond_to? :encoding
|
1752
|
-
@io.encoding
|
1753
|
-
else
|
1754
|
-
default
|
1755
|
-
end
|
1362
|
+
def writer_options
|
1363
|
+
@writer_options.merge(header_fields_converter: header_fields_converter)
|
1756
1364
|
end
|
1757
1365
|
end
|
1758
1366
|
|