csv 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +377 -0
- data/README.md +1 -0
- data/lib/csv/delete_suffix.rb +18 -0
- data/lib/csv/fields_converter.rb +78 -0
- data/lib/csv/match_p.rb +20 -0
- data/lib/csv/parser.rb +1092 -0
- data/lib/csv/row.rb +6 -6
- data/lib/csv/table.rb +28 -4
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +156 -0
- data/lib/csv.rb +330 -632
- metadata +24 -5
- data/news.md +0 -123
data/lib/csv.rb
CHANGED
@@ -93,36 +93,22 @@ require "forwardable"
|
|
93
93
|
require "English"
|
94
94
|
require "date"
|
95
95
|
require "stringio"
|
96
|
-
require_relative "csv/table"
|
97
|
-
require_relative "csv/row"
|
98
|
-
|
99
|
-
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
100
|
-
unless String.method_defined?(:match?)
|
101
|
-
class CSV
|
102
|
-
module MatchP
|
103
|
-
refine String do
|
104
|
-
def match?(pattern)
|
105
|
-
self =~ pattern
|
106
|
-
end
|
107
|
-
end
|
108
96
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
97
|
+
require_relative "csv/fields_converter"
|
98
|
+
require_relative "csv/match_p"
|
99
|
+
require_relative "csv/parser"
|
100
|
+
require_relative "csv/row"
|
101
|
+
require_relative "csv/table"
|
102
|
+
require_relative "csv/writer"
|
116
103
|
|
117
|
-
|
118
|
-
end
|
104
|
+
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
119
105
|
|
120
106
|
#
|
121
107
|
# This class provides a complete interface to CSV files and data. It offers
|
122
108
|
# tools to enable you to read and write to and from Strings or IO objects, as
|
123
109
|
# needed.
|
124
110
|
#
|
125
|
-
# The most generic interface of
|
111
|
+
# The most generic interface of the library is:
|
126
112
|
#
|
127
113
|
# csv = CSV.new(string_or_io, **options)
|
128
114
|
#
|
@@ -141,7 +127,7 @@ end
|
|
141
127
|
# There are several specialized class methods for one-statement reading or writing,
|
142
128
|
# described in the Specialized Methods section.
|
143
129
|
#
|
144
|
-
# If a String passed into ::new, it is internally wrapped into a StringIO object.
|
130
|
+
# If a String is passed into ::new, it is internally wrapped into a StringIO object.
|
145
131
|
#
|
146
132
|
# +options+ can be used for specifying the particular CSV flavor (column
|
147
133
|
# separators, row separators, value quoting and so on), and for data conversion,
|
@@ -204,18 +190,18 @@ end
|
|
204
190
|
# # Headers are part of data
|
205
191
|
# data = CSV.parse(<<~ROWS, headers: true)
|
206
192
|
# Name,Department,Salary
|
207
|
-
# Bob,
|
193
|
+
# Bob,Engineering,1000
|
208
194
|
# Jane,Sales,2000
|
209
195
|
# John,Management,5000
|
210
196
|
# ROWS
|
211
197
|
#
|
212
198
|
# data.class #=> CSV::Table
|
213
|
-
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"
|
214
|
-
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"
|
199
|
+
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engineering" "Salary":"1000">
|
200
|
+
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engineering", "Salary"=>"1000"}
|
215
201
|
#
|
216
202
|
# # Headers provided by developer
|
217
203
|
# data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
|
218
|
-
# data.first #=> #<CSV::Row name:"Bob" department:"
|
204
|
+
# data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000">
|
219
205
|
#
|
220
206
|
# === Typed data reading
|
221
207
|
#
|
@@ -411,6 +397,7 @@ class CSV
|
|
411
397
|
# <b><tt>:force_quotes</tt></b>:: +false+
|
412
398
|
# <b><tt>:skip_lines</tt></b>:: +nil+
|
413
399
|
# <b><tt>:liberal_parsing</tt></b>:: +false+
|
400
|
+
# <b><tt>:quote_empty</tt></b>:: +true+
|
414
401
|
#
|
415
402
|
DEFAULT_OPTIONS = {
|
416
403
|
col_sep: ",",
|
@@ -426,6 +413,7 @@ class CSV
|
|
426
413
|
force_quotes: false,
|
427
414
|
skip_lines: nil,
|
428
415
|
liberal_parsing: false,
|
416
|
+
quote_empty: true,
|
429
417
|
}.freeze
|
430
418
|
|
431
419
|
#
|
@@ -516,9 +504,9 @@ class CSV
|
|
516
504
|
# <tt>encoding: "UTF-32BE:UTF-8"</tt> would read UTF-32BE data from the file
|
517
505
|
# but transcode it to UTF-8 before CSV parses it.
|
518
506
|
#
|
519
|
-
def self.foreach(path, **options, &block)
|
520
|
-
return to_enum(__method__, path, options) unless block_given?
|
521
|
-
open(path, options) do |csv|
|
507
|
+
def self.foreach(path, mode="r", **options, &block)
|
508
|
+
return to_enum(__method__, path, mode, options) unless block_given?
|
509
|
+
open(path, mode, options) do |csv|
|
522
510
|
csv.each(&block)
|
523
511
|
end
|
524
512
|
end
|
@@ -548,7 +536,7 @@ class CSV
|
|
548
536
|
str.seek(0, IO::SEEK_END)
|
549
537
|
else
|
550
538
|
encoding = options[:encoding]
|
551
|
-
str
|
539
|
+
str = +""
|
552
540
|
str.force_encoding(encoding) if encoding
|
553
541
|
end
|
554
542
|
csv = new(str, options) # wrap
|
@@ -571,11 +559,11 @@ class CSV
|
|
571
559
|
#
|
572
560
|
def self.generate_line(row, **options)
|
573
561
|
options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
|
574
|
-
str =
|
562
|
+
str = +""
|
575
563
|
if options[:encoding]
|
576
564
|
str.force_encoding(options[:encoding])
|
577
|
-
elsif field = row.find {
|
578
|
-
str.force_encoding(
|
565
|
+
elsif field = row.find {|f| f.is_a?(String)}
|
566
|
+
str.force_encoding(field.encoding)
|
579
567
|
end
|
580
568
|
(new(str, options) << row).string
|
581
569
|
end
|
@@ -890,83 +878,135 @@ class CSV
|
|
890
878
|
# attempt to parse input not conformant
|
891
879
|
# with RFC 4180, such as double quotes
|
892
880
|
# in unquoted fields.
|
893
|
-
# <b><tt>:nil_value</tt></b>::
|
894
|
-
#
|
881
|
+
# <b><tt>:nil_value</tt></b>:: When set an object, any values of an
|
882
|
+
# empty field are replaced by the set
|
883
|
+
# object, not nil.
|
884
|
+
# <b><tt>:empty_value</tt></b>:: When set an object, any values of a
|
885
|
+
# blank string field is replaced by
|
886
|
+
# the set object.
|
887
|
+
# <b><tt>:quote_empty</tt></b>:: TODO
|
888
|
+
# <b><tt>:write_converters</tt></b>:: TODO
|
889
|
+
# <b><tt>:write_nil_value</tt></b>:: TODO
|
890
|
+
# <b><tt>:write_empty_value</tt></b>:: TODO
|
891
|
+
# <b><tt>:strip</tt></b>:: TODO
|
895
892
|
#
|
896
893
|
# See CSV::DEFAULT_OPTIONS for the default settings.
|
897
894
|
#
|
898
895
|
# Options cannot be overridden in the instance methods for performance reasons,
|
899
896
|
# so be sure to set what you want here.
|
900
897
|
#
|
901
|
-
def initialize(data,
|
902
|
-
|
903
|
-
|
904
|
-
|
898
|
+
def initialize(data,
|
899
|
+
col_sep: ",",
|
900
|
+
row_sep: :auto,
|
901
|
+
quote_char: '"',
|
902
|
+
field_size_limit: nil,
|
903
|
+
converters: nil,
|
904
|
+
unconverted_fields: nil,
|
905
|
+
headers: false,
|
906
|
+
return_headers: false,
|
907
|
+
write_headers: nil,
|
908
|
+
header_converters: nil,
|
909
|
+
skip_blanks: false,
|
910
|
+
force_quotes: false,
|
911
|
+
skip_lines: nil,
|
912
|
+
liberal_parsing: false,
|
913
|
+
internal_encoding: nil,
|
914
|
+
external_encoding: nil,
|
915
|
+
encoding: nil,
|
905
916
|
nil_value: nil,
|
906
|
-
empty_value: ""
|
917
|
+
empty_value: "",
|
918
|
+
quote_empty: true,
|
919
|
+
write_converters: nil,
|
920
|
+
write_nil_value: nil,
|
921
|
+
write_empty_value: "",
|
922
|
+
strip: false)
|
907
923
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
908
924
|
|
909
925
|
# create the IO object we will read from
|
910
926
|
@io = data.is_a?(String) ? StringIO.new(data) : data
|
911
927
|
@encoding = determine_encoding(encoding, internal_encoding)
|
912
|
-
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
@
|
918
|
-
|
919
|
-
|
920
|
-
|
921
|
-
@
|
922
|
-
@
|
923
|
-
@
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
|
933
|
-
|
934
|
-
|
935
|
-
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
|
940
|
-
|
941
|
-
@
|
942
|
-
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
928
|
+
|
929
|
+
@base_fields_converter_options = {
|
930
|
+
nil_value: nil_value,
|
931
|
+
empty_value: empty_value,
|
932
|
+
}
|
933
|
+
@write_fields_converter_options = {
|
934
|
+
nil_value: write_nil_value,
|
935
|
+
empty_value: write_empty_value,
|
936
|
+
}
|
937
|
+
@initial_converters = converters
|
938
|
+
@initial_header_converters = header_converters
|
939
|
+
@initial_write_converters = write_converters
|
940
|
+
|
941
|
+
@parser_options = {
|
942
|
+
column_separator: col_sep,
|
943
|
+
row_separator: row_sep,
|
944
|
+
quote_character: quote_char,
|
945
|
+
field_size_limit: field_size_limit,
|
946
|
+
unconverted_fields: unconverted_fields,
|
947
|
+
headers: headers,
|
948
|
+
return_headers: return_headers,
|
949
|
+
skip_blanks: skip_blanks,
|
950
|
+
skip_lines: skip_lines,
|
951
|
+
liberal_parsing: liberal_parsing,
|
952
|
+
encoding: @encoding,
|
953
|
+
nil_value: nil_value,
|
954
|
+
empty_value: empty_value,
|
955
|
+
strip: strip,
|
956
|
+
}
|
957
|
+
@parser = nil
|
958
|
+
@parser_enumerator = nil
|
959
|
+
@eof_error = nil
|
960
|
+
|
961
|
+
@writer_options = {
|
962
|
+
encoding: @encoding,
|
963
|
+
force_encoding: (not encoding.nil?),
|
964
|
+
force_quotes: force_quotes,
|
965
|
+
headers: headers,
|
966
|
+
write_headers: write_headers,
|
967
|
+
column_separator: col_sep,
|
968
|
+
row_separator: row_sep,
|
969
|
+
quote_character: quote_char,
|
970
|
+
quote_empty: quote_empty,
|
971
|
+
}
|
972
|
+
|
973
|
+
@writer = nil
|
974
|
+
writer if @writer_options[:write_headers]
|
948
975
|
end
|
949
976
|
|
950
977
|
#
|
951
978
|
# The encoded <tt>:col_sep</tt> used in parsing and writing. See CSV::new
|
952
979
|
# for details.
|
953
980
|
#
|
954
|
-
|
981
|
+
def col_sep
|
982
|
+
parser.column_separator
|
983
|
+
end
|
984
|
+
|
955
985
|
#
|
956
986
|
# The encoded <tt>:row_sep</tt> used in parsing and writing. See CSV::new
|
957
987
|
# for details.
|
958
988
|
#
|
959
|
-
|
989
|
+
def row_sep
|
990
|
+
parser.row_separator
|
991
|
+
end
|
992
|
+
|
960
993
|
#
|
961
994
|
# The encoded <tt>:quote_char</tt> used in parsing and writing. See CSV::new
|
962
995
|
# for details.
|
963
996
|
#
|
964
|
-
|
997
|
+
def quote_char
|
998
|
+
parser.quote_character
|
999
|
+
end
|
1000
|
+
|
965
1001
|
# The limit for field size, if any. See CSV::new for details.
|
966
|
-
|
1002
|
+
def field_size_limit
|
1003
|
+
parser.field_size_limit
|
1004
|
+
end
|
967
1005
|
|
968
1006
|
# The regex marking a line as a comment. See CSV::new for details
|
969
|
-
|
1007
|
+
def skip_lines
|
1008
|
+
parser.skip_lines
|
1009
|
+
end
|
970
1010
|
|
971
1011
|
#
|
972
1012
|
# Returns the current list of converters in effect. See CSV::new for details.
|
@@ -974,7 +1014,7 @@ class CSV
|
|
974
1014
|
# as is.
|
975
1015
|
#
|
976
1016
|
def converters
|
977
|
-
|
1017
|
+
parser_fields_converter.map do |converter|
|
978
1018
|
name = Converters.rassoc(converter)
|
979
1019
|
name ? name.first : converter
|
980
1020
|
end
|
@@ -983,42 +1023,68 @@ class CSV
|
|
983
1023
|
# Returns +true+ if unconverted_fields() to parsed results. See CSV::new
|
984
1024
|
# for details.
|
985
1025
|
#
|
986
|
-
def unconverted_fields?
|
1026
|
+
def unconverted_fields?
|
1027
|
+
parser.unconverted_fields?
|
1028
|
+
end
|
1029
|
+
|
987
1030
|
#
|
988
1031
|
# Returns +nil+ if headers will not be used, +true+ if they will but have not
|
989
1032
|
# yet been read, or the actual headers after they have been read. See
|
990
1033
|
# CSV::new for details.
|
991
1034
|
#
|
992
1035
|
def headers
|
993
|
-
|
1036
|
+
if @writer
|
1037
|
+
@writer.headers
|
1038
|
+
else
|
1039
|
+
parsed_headers = parser.headers
|
1040
|
+
return parsed_headers if parsed_headers
|
1041
|
+
raw_headers = @parser_options[:headers]
|
1042
|
+
raw_headers = nil if raw_headers == false
|
1043
|
+
raw_headers
|
1044
|
+
end
|
994
1045
|
end
|
995
1046
|
#
|
996
1047
|
# Returns +true+ if headers will be returned as a row of results.
|
997
1048
|
# See CSV::new for details.
|
998
1049
|
#
|
999
|
-
def return_headers?
|
1050
|
+
def return_headers?
|
1051
|
+
parser.return_headers?
|
1052
|
+
end
|
1053
|
+
|
1000
1054
|
# Returns +true+ if headers are written in output. See CSV::new for details.
|
1001
|
-
def write_headers?
|
1055
|
+
def write_headers?
|
1056
|
+
@writer_options[:write_headers]
|
1057
|
+
end
|
1058
|
+
|
1002
1059
|
#
|
1003
1060
|
# Returns the current list of converters in effect for headers. See CSV::new
|
1004
1061
|
# for details. Built-in converters will be returned by name, while others
|
1005
1062
|
# will be returned as is.
|
1006
1063
|
#
|
1007
1064
|
def header_converters
|
1008
|
-
|
1065
|
+
header_fields_converter.map do |converter|
|
1009
1066
|
name = HeaderConverters.rassoc(converter)
|
1010
1067
|
name ? name.first : converter
|
1011
1068
|
end
|
1012
1069
|
end
|
1070
|
+
|
1013
1071
|
#
|
1014
1072
|
# Returns +true+ blank lines are skipped by the parser. See CSV::new
|
1015
1073
|
# for details.
|
1016
1074
|
#
|
1017
|
-
def skip_blanks?
|
1075
|
+
def skip_blanks?
|
1076
|
+
parser.skip_blanks?
|
1077
|
+
end
|
1078
|
+
|
1018
1079
|
# Returns +true+ if all output fields are quoted. See CSV::new for details.
|
1019
|
-
def force_quotes?
|
1080
|
+
def force_quotes?
|
1081
|
+
@writer_options[:force_quotes]
|
1082
|
+
end
|
1083
|
+
|
1020
1084
|
# Returns +true+ if illegal input is handled. See CSV::new for details.
|
1021
|
-
def liberal_parsing?
|
1085
|
+
def liberal_parsing?
|
1086
|
+
parser.liberal_parsing?
|
1087
|
+
end
|
1022
1088
|
|
1023
1089
|
#
|
1024
1090
|
# The Encoding CSV is parsing or writing in. This will be the Encoding you
|
@@ -1027,26 +1093,90 @@ class CSV
|
|
1027
1093
|
attr_reader :encoding
|
1028
1094
|
|
1029
1095
|
#
|
1030
|
-
# The line number of the last row read from this file.
|
1096
|
+
# The line number of the last row read from this file. Fields with nested
|
1031
1097
|
# line-end characters will not affect this count.
|
1032
1098
|
#
|
1033
|
-
|
1099
|
+
def lineno
|
1100
|
+
if @writer
|
1101
|
+
@writer.lineno
|
1102
|
+
else
|
1103
|
+
parser.lineno
|
1104
|
+
end
|
1105
|
+
end
|
1106
|
+
|
1107
|
+
#
|
1108
|
+
# The last row read from this file.
|
1109
|
+
#
|
1110
|
+
def line
|
1111
|
+
parser.line
|
1112
|
+
end
|
1034
1113
|
|
1035
1114
|
### IO and StringIO Delegation ###
|
1036
1115
|
|
1037
1116
|
extend Forwardable
|
1038
|
-
def_delegators :@io, :binmode, :
|
1039
|
-
:closed?, :
|
1040
|
-
:fileno, :
|
1041
|
-
:
|
1042
|
-
:seek, :
|
1043
|
-
:
|
1117
|
+
def_delegators :@io, :binmode, :close, :close_read, :close_write,
|
1118
|
+
:closed?, :external_encoding, :fcntl,
|
1119
|
+
:fileno, :flush, :fsync, :internal_encoding,
|
1120
|
+
:isatty, :pid, :pos, :pos=, :reopen,
|
1121
|
+
:seek, :string, :sync, :sync=, :tell,
|
1122
|
+
:truncate, :tty?
|
1123
|
+
|
1124
|
+
def binmode?
|
1125
|
+
if @io.respond_to?(:binmode?)
|
1126
|
+
@io.binmode?
|
1127
|
+
else
|
1128
|
+
false
|
1129
|
+
end
|
1130
|
+
end
|
1131
|
+
|
1132
|
+
def flock(*args)
|
1133
|
+
raise NotImplementedError unless @io.respond_to?(:flock)
|
1134
|
+
@io.flock(*args)
|
1135
|
+
end
|
1136
|
+
|
1137
|
+
def ioctl(*args)
|
1138
|
+
raise NotImplementedError unless @io.respond_to?(:ioctl)
|
1139
|
+
@io.ioctl(*args)
|
1140
|
+
end
|
1141
|
+
|
1142
|
+
def path
|
1143
|
+
@io.path if @io.respond_to?(:path)
|
1144
|
+
end
|
1145
|
+
|
1146
|
+
def stat(*args)
|
1147
|
+
raise NotImplementedError unless @io.respond_to?(:stat)
|
1148
|
+
@io.stat(*args)
|
1149
|
+
end
|
1150
|
+
|
1151
|
+
def to_i
|
1152
|
+
raise NotImplementedError unless @io.respond_to?(:to_i)
|
1153
|
+
@io.to_i
|
1154
|
+
end
|
1155
|
+
|
1156
|
+
def to_io
|
1157
|
+
@io.respond_to?(:to_io) ? @io.to_io : @io
|
1158
|
+
end
|
1159
|
+
|
1160
|
+
def eof?
|
1161
|
+
return false if @eof_error
|
1162
|
+
begin
|
1163
|
+
parser_enumerator.peek
|
1164
|
+
false
|
1165
|
+
rescue MalformedCSVError => error
|
1166
|
+
@eof_error = error
|
1167
|
+
false
|
1168
|
+
rescue StopIteration
|
1169
|
+
true
|
1170
|
+
end
|
1171
|
+
end
|
1172
|
+
alias_method :eof, :eof?
|
1044
1173
|
|
1045
1174
|
# Rewinds the underlying IO object and resets CSV's lineno() counter.
|
1046
1175
|
def rewind
|
1047
|
-
@
|
1048
|
-
@
|
1049
|
-
|
1176
|
+
@parser = nil
|
1177
|
+
@parser_enumerator = nil
|
1178
|
+
@eof_error = nil
|
1179
|
+
@writer.rewind if @writer
|
1050
1180
|
@io.rewind
|
1051
1181
|
end
|
1052
1182
|
|
@@ -1060,34 +1190,8 @@ class CSV
|
|
1060
1190
|
# The data source must be open for writing.
|
1061
1191
|
#
|
1062
1192
|
def <<(row)
|
1063
|
-
|
1064
|
-
|
1065
|
-
parse_headers # won't read data for Array or String
|
1066
|
-
end
|
1067
|
-
|
1068
|
-
# handle CSV::Row objects and Hashes
|
1069
|
-
row = case row
|
1070
|
-
when self.class::Row then row.fields
|
1071
|
-
when Hash then @headers.map { |header| row[header] }
|
1072
|
-
else row
|
1073
|
-
end
|
1074
|
-
|
1075
|
-
@headers = row if header_row?
|
1076
|
-
@lineno += 1
|
1077
|
-
|
1078
|
-
output = row.map(&@quote).join(@col_sep) + @row_sep # quote and separate
|
1079
|
-
if @io.is_a?(StringIO) and
|
1080
|
-
output.encoding != (encoding = raw_encoding)
|
1081
|
-
if @force_encoding
|
1082
|
-
output = output.encode(encoding)
|
1083
|
-
elsif (compatible_encoding = Encoding.compatible?(@io.string, output))
|
1084
|
-
@io.set_encoding(compatible_encoding)
|
1085
|
-
@io.seek(0, IO::SEEK_END)
|
1086
|
-
end
|
1087
|
-
end
|
1088
|
-
@io << output
|
1089
|
-
|
1090
|
-
self # for chaining
|
1193
|
+
writer << row
|
1194
|
+
self
|
1091
1195
|
end
|
1092
1196
|
alias_method :add_row, :<<
|
1093
1197
|
alias_method :puts, :<<
|
@@ -1108,7 +1212,7 @@ class CSV
|
|
1108
1212
|
# converted field or the field itself.
|
1109
1213
|
#
|
1110
1214
|
def convert(name = nil, &converter)
|
1111
|
-
add_converter(
|
1215
|
+
parser_fields_converter.add_converter(name, &converter)
|
1112
1216
|
end
|
1113
1217
|
|
1114
1218
|
#
|
@@ -1123,10 +1227,7 @@ class CSV
|
|
1123
1227
|
# effect.
|
1124
1228
|
#
|
1125
1229
|
def header_convert(name = nil, &converter)
|
1126
|
-
add_converter(
|
1127
|
-
self.class::HeaderConverters,
|
1128
|
-
name,
|
1129
|
-
&converter )
|
1230
|
+
header_fields_converter.add_converter(name, &converter)
|
1130
1231
|
end
|
1131
1232
|
|
1132
1233
|
include Enumerable
|
@@ -1138,14 +1239,8 @@ class CSV
|
|
1138
1239
|
#
|
1139
1240
|
# The data source must be open for reading.
|
1140
1241
|
#
|
1141
|
-
def each
|
1142
|
-
|
1143
|
-
while row = shift
|
1144
|
-
yield row
|
1145
|
-
end
|
1146
|
-
else
|
1147
|
-
to_enum
|
1148
|
-
end
|
1242
|
+
def each(&block)
|
1243
|
+
parser_enumerator.each(&block)
|
1149
1244
|
end
|
1150
1245
|
|
1151
1246
|
#
|
@@ -1155,8 +1250,8 @@ class CSV
|
|
1155
1250
|
#
|
1156
1251
|
def read
|
1157
1252
|
rows = to_a
|
1158
|
-
if
|
1159
|
-
Table.new(rows)
|
1253
|
+
if parser.use_headers?
|
1254
|
+
Table.new(rows, headers: parser.headers)
|
1160
1255
|
else
|
1161
1256
|
rows
|
1162
1257
|
end
|
@@ -1165,7 +1260,7 @@ class CSV
|
|
1165
1260
|
|
1166
1261
|
# Returns +true+ if the next row read will be a header row.
|
1167
1262
|
def header_row?
|
1168
|
-
|
1263
|
+
parser.header_row?
|
1169
1264
|
end
|
1170
1265
|
|
1171
1266
|
#
|
@@ -1176,171 +1271,14 @@ class CSV
|
|
1176
1271
|
# The data source must be open for reading.
|
1177
1272
|
#
|
1178
1273
|
def shift
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
#########################################################################
|
1183
|
-
|
1184
|
-
# handle headers not based on document content
|
1185
|
-
if header_row? and @return_headers and
|
1186
|
-
[Array, String].include? @use_headers.class
|
1187
|
-
if @unconverted_fields
|
1188
|
-
return add_unconverted_fields(parse_headers, Array.new)
|
1189
|
-
else
|
1190
|
-
return parse_headers
|
1191
|
-
end
|
1274
|
+
if @eof_error
|
1275
|
+
eof_error, @eof_error = @eof_error, nil
|
1276
|
+
raise eof_error
|
1192
1277
|
end
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
#
|
1198
|
-
in_extended_col = false
|
1199
|
-
csv = Array.new
|
1200
|
-
|
1201
|
-
loop do
|
1202
|
-
# add another read to the line
|
1203
|
-
unless parse = @io.gets(@row_sep)
|
1204
|
-
return nil
|
1205
|
-
end
|
1206
|
-
|
1207
|
-
if in_extended_col
|
1208
|
-
@line.concat(parse)
|
1209
|
-
else
|
1210
|
-
@line = parse.clone
|
1211
|
-
end
|
1212
|
-
|
1213
|
-
begin
|
1214
|
-
parse.sub!(@parsers[:line_end], "")
|
1215
|
-
rescue ArgumentError
|
1216
|
-
unless parse.valid_encoding?
|
1217
|
-
message = "Invalid byte sequence in #{parse.encoding}"
|
1218
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1219
|
-
end
|
1220
|
-
raise
|
1221
|
-
end
|
1222
|
-
|
1223
|
-
if csv.empty?
|
1224
|
-
#
|
1225
|
-
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
|
1226
|
-
# CSV's <tt>[nil]</tt>
|
1227
|
-
#
|
1228
|
-
if parse.empty?
|
1229
|
-
@lineno += 1
|
1230
|
-
if @skip_blanks
|
1231
|
-
next
|
1232
|
-
elsif @unconverted_fields
|
1233
|
-
return add_unconverted_fields(Array.new, Array.new)
|
1234
|
-
elsif @use_headers
|
1235
|
-
return self.class::Row.new(@headers, Array.new)
|
1236
|
-
else
|
1237
|
-
return Array.new
|
1238
|
-
end
|
1239
|
-
end
|
1240
|
-
end
|
1241
|
-
|
1242
|
-
next if @skip_lines and @skip_lines.match parse
|
1243
|
-
|
1244
|
-
parts = parse.split(@col_sep_split_separator, -1)
|
1245
|
-
if parts.empty?
|
1246
|
-
if in_extended_col
|
1247
|
-
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
|
1248
|
-
else
|
1249
|
-
csv << nil
|
1250
|
-
end
|
1251
|
-
end
|
1252
|
-
|
1253
|
-
# This loop is the hot path of csv parsing. Some things may be non-dry
|
1254
|
-
# for a reason. Make sure to benchmark when refactoring.
|
1255
|
-
parts.each do |part|
|
1256
|
-
if in_extended_col
|
1257
|
-
# If we are continuing a previous column
|
1258
|
-
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
1259
|
-
# extended column ends
|
1260
|
-
csv.last << part[0..-2]
|
1261
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1262
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1263
|
-
lineno + 1)
|
1264
|
-
end
|
1265
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1266
|
-
in_extended_col = false
|
1267
|
-
else
|
1268
|
-
csv.last << part << @col_sep
|
1269
|
-
end
|
1270
|
-
elsif part.start_with?(@quote_char)
|
1271
|
-
# If we are starting a new quoted column
|
1272
|
-
if part.count(@quote_char) % 2 != 0
|
1273
|
-
# start an extended column
|
1274
|
-
csv << (part[1..-1] << @col_sep)
|
1275
|
-
in_extended_col = true
|
1276
|
-
elsif part.end_with?(@quote_char)
|
1277
|
-
# regular quoted column
|
1278
|
-
csv << part[1..-2]
|
1279
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1280
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1281
|
-
lineno + 1)
|
1282
|
-
end
|
1283
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1284
|
-
elsif @liberal_parsing
|
1285
|
-
csv << part
|
1286
|
-
else
|
1287
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1288
|
-
lineno + 1)
|
1289
|
-
end
|
1290
|
-
elsif part.match?(@parsers[:quote_or_nl])
|
1291
|
-
# Unquoted field with bad characters.
|
1292
|
-
if part.match?(@parsers[:nl_or_lf])
|
1293
|
-
message = "Unquoted fields do not allow \\r or \\n"
|
1294
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1295
|
-
else
|
1296
|
-
if @liberal_parsing
|
1297
|
-
csv << part
|
1298
|
-
else
|
1299
|
-
raise MalformedCSVError.new("Illegal quoting", lineno + 1)
|
1300
|
-
end
|
1301
|
-
end
|
1302
|
-
else
|
1303
|
-
# Regular ole unquoted field.
|
1304
|
-
csv << (part.empty? ? nil : part)
|
1305
|
-
end
|
1306
|
-
end
|
1307
|
-
|
1308
|
-
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
1309
|
-
# column.
|
1310
|
-
csv[-1][-1] = @row_sep if in_extended_col
|
1311
|
-
|
1312
|
-
if in_extended_col
|
1313
|
-
# if we're at eof?(), a quoted field wasn't closed...
|
1314
|
-
if @io.eof?
|
1315
|
-
raise MalformedCSVError.new("Unclosed quoted field",
|
1316
|
-
lineno + 1)
|
1317
|
-
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
1318
|
-
raise MalformedCSVError.new("Field size exceeded",
|
1319
|
-
lineno + 1)
|
1320
|
-
end
|
1321
|
-
# otherwise, we need to loop and pull some more data to complete the row
|
1322
|
-
else
|
1323
|
-
@lineno += 1
|
1324
|
-
|
1325
|
-
# save fields unconverted fields, if needed...
|
1326
|
-
unconverted = csv.dup if @unconverted_fields
|
1327
|
-
|
1328
|
-
if @use_headers
|
1329
|
-
# parse out header rows and handle CSV::Row conversions...
|
1330
|
-
csv = parse_headers(csv)
|
1331
|
-
else
|
1332
|
-
# convert fields, if needed...
|
1333
|
-
csv = convert_fields(csv)
|
1334
|
-
end
|
1335
|
-
|
1336
|
-
# inject unconverted fields and accessor, if requested...
|
1337
|
-
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
1338
|
-
add_unconverted_fields(csv, unconverted)
|
1339
|
-
end
|
1340
|
-
|
1341
|
-
# return the results
|
1342
|
-
break csv
|
1343
|
-
end
|
1278
|
+
begin
|
1279
|
+
parser_enumerator.next
|
1280
|
+
rescue StopIteration
|
1281
|
+
nil
|
1344
1282
|
end
|
1345
1283
|
end
|
1346
1284
|
alias_method :gets, :shift
|
@@ -1365,15 +1303,18 @@ class CSV
|
|
1365
1303
|
# show encoding
|
1366
1304
|
str << " encoding:" << @encoding.name
|
1367
1305
|
# show other attributes
|
1368
|
-
|
1369
|
-
|
1370
|
-
if a = instance_variable_get("@#{attr_name}")
|
1306
|
+
["lineno", "col_sep", "row_sep", "quote_char"].each do |attr_name|
|
1307
|
+
if a = __send__(attr_name)
|
1371
1308
|
str << " " << attr_name << ":" << a.inspect
|
1372
1309
|
end
|
1373
1310
|
end
|
1374
|
-
|
1375
|
-
|
1311
|
+
["skip_blanks", "liberal_parsing"].each do |attr_name|
|
1312
|
+
if a = __send__("#{attr_name}?")
|
1313
|
+
str << " " << attr_name << ":" << a.inspect
|
1314
|
+
end
|
1376
1315
|
end
|
1316
|
+
_headers = headers
|
1317
|
+
str << " headers:" << _headers.inspect if _headers
|
1377
1318
|
str << ">"
|
1378
1319
|
begin
|
1379
1320
|
str.join('')
|
@@ -1389,7 +1330,7 @@ class CSV
|
|
1389
1330
|
|
1390
1331
|
def determine_encoding(encoding, internal_encoding)
|
1391
1332
|
# honor the IO encoding if we can, otherwise default to ASCII-8BIT
|
1392
|
-
io_encoding = raw_encoding
|
1333
|
+
io_encoding = raw_encoding
|
1393
1334
|
return io_encoding if io_encoding
|
1394
1335
|
|
1395
1336
|
return Encoding.find(internal_encoding) if internal_encoding
|
@@ -1402,216 +1343,17 @@ class CSV
|
|
1402
1343
|
Encoding.default_internal || Encoding.default_external
|
1403
1344
|
end
|
1404
1345
|
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
# ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
|
1410
|
-
# +STDERR+ and any stream open for output only with a default
|
1411
|
-
# <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
1412
|
-
#
|
1413
|
-
# This method also establishes the quoting rules used for CSV output.
|
1414
|
-
#
|
1415
|
-
def init_separators(col_sep, row_sep, quote_char, force_quotes)
|
1416
|
-
# store the selected separators
|
1417
|
-
@col_sep = col_sep.to_s.encode(@encoding)
|
1418
|
-
if @col_sep == " "
|
1419
|
-
@col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
|
1420
|
-
else
|
1421
|
-
@col_sep_split_separator = @col_sep
|
1422
|
-
end
|
1423
|
-
@row_sep = row_sep # encode after resolving :auto
|
1424
|
-
@quote_char = quote_char.to_s.encode(@encoding)
|
1425
|
-
@double_quote_char = @quote_char * 2
|
1426
|
-
|
1427
|
-
if @quote_char.length != 1
|
1428
|
-
raise ArgumentError, ":quote_char has to be a single character String"
|
1429
|
-
end
|
1430
|
-
|
1431
|
-
#
|
1432
|
-
# automatically discover row separator when requested
|
1433
|
-
# (not fully encoding safe)
|
1434
|
-
#
|
1435
|
-
if @row_sep == :auto
|
1436
|
-
if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
|
1437
|
-
(defined?(Zlib) and @io.class == Zlib::GzipWriter)
|
1438
|
-
@row_sep = $INPUT_RECORD_SEPARATOR
|
1439
|
-
else
|
1440
|
-
begin
|
1441
|
-
#
|
1442
|
-
# remember where we were (pos() will raise an exception if @io is pipe
|
1443
|
-
# or not opened for reading)
|
1444
|
-
#
|
1445
|
-
saved_pos = @io.pos
|
1446
|
-
while @row_sep == :auto
|
1447
|
-
#
|
1448
|
-
# if we run out of data, it's probably a single line
|
1449
|
-
# (ensure will set default value)
|
1450
|
-
#
|
1451
|
-
break unless sample = @io.gets(nil, 1024)
|
1452
|
-
|
1453
|
-
cr = encode_str("\r")
|
1454
|
-
lf = encode_str("\n")
|
1455
|
-
# extend sample if we're unsure of the line ending
|
1456
|
-
if sample.end_with?(cr)
|
1457
|
-
sample << (@io.gets(nil, 1) || "")
|
1458
|
-
end
|
1459
|
-
|
1460
|
-
# try to find a standard separator
|
1461
|
-
sample.each_char.each_cons(2) do |char, next_char|
|
1462
|
-
case char
|
1463
|
-
when cr
|
1464
|
-
if next_char == lf
|
1465
|
-
@row_sep = encode_str("\r\n")
|
1466
|
-
else
|
1467
|
-
@row_sep = cr
|
1468
|
-
end
|
1469
|
-
break
|
1470
|
-
when lf
|
1471
|
-
@row_sep = lf
|
1472
|
-
break
|
1473
|
-
end
|
1474
|
-
end
|
1475
|
-
end
|
1476
|
-
|
1477
|
-
# tricky seek() clone to work around GzipReader's lack of seek()
|
1478
|
-
@io.rewind
|
1479
|
-
# reset back to the remembered position
|
1480
|
-
while saved_pos > 1024 # avoid loading a lot of data into memory
|
1481
|
-
@io.read(1024)
|
1482
|
-
saved_pos -= 1024
|
1483
|
-
end
|
1484
|
-
@io.read(saved_pos) if saved_pos.nonzero?
|
1485
|
-
rescue IOError # not opened for reading
|
1486
|
-
# do nothing: ensure will set default
|
1487
|
-
rescue NoMethodError # Zlib::GzipWriter doesn't have some IO methods
|
1488
|
-
# do nothing: ensure will set default
|
1489
|
-
rescue SystemCallError # pipe
|
1490
|
-
# do nothing: ensure will set default
|
1491
|
-
ensure
|
1492
|
-
#
|
1493
|
-
# set default if we failed to detect
|
1494
|
-
# (stream not opened for reading, a pipe, or a single line of data)
|
1495
|
-
#
|
1496
|
-
@row_sep = $INPUT_RECORD_SEPARATOR if @row_sep == :auto
|
1497
|
-
end
|
1498
|
-
end
|
1499
|
-
end
|
1500
|
-
@row_sep = @row_sep.to_s.encode(@encoding)
|
1501
|
-
|
1502
|
-
# establish quoting rules
|
1503
|
-
@force_quotes = force_quotes
|
1504
|
-
do_quote = lambda do |field|
|
1505
|
-
field = String(field)
|
1506
|
-
encoded_quote = @quote_char.encode(field.encoding)
|
1507
|
-
encoded_quote + field.gsub(encoded_quote, encoded_quote * 2) + encoded_quote
|
1508
|
-
end
|
1509
|
-
quotable_chars = encode_str("\r\n", @col_sep, @quote_char)
|
1510
|
-
@quote = if @force_quotes
|
1511
|
-
do_quote
|
1512
|
-
else
|
1513
|
-
lambda do |field|
|
1514
|
-
if field.nil? # represent +nil+ fields as empty unquoted fields
|
1515
|
-
""
|
1516
|
-
else
|
1517
|
-
field = String(field) # Stringify fields
|
1518
|
-
# represent empty fields as empty quoted fields
|
1519
|
-
if field.empty? or
|
1520
|
-
field.count(quotable_chars).nonzero?
|
1521
|
-
do_quote.call(field)
|
1522
|
-
else
|
1523
|
-
field # unquoted field
|
1524
|
-
end
|
1525
|
-
end
|
1526
|
-
end
|
1527
|
-
end
|
1528
|
-
end
|
1529
|
-
|
1530
|
-
# Pre-compiles parsers and stores them by name for access during reads.
|
1531
|
-
def init_parsers(skip_blanks, field_size_limit, liberal_parsing)
|
1532
|
-
# store the parser behaviors
|
1533
|
-
@skip_blanks = skip_blanks
|
1534
|
-
@field_size_limit = field_size_limit
|
1535
|
-
@liberal_parsing = liberal_parsing
|
1536
|
-
|
1537
|
-
# prebuild Regexps for faster parsing
|
1538
|
-
esc_row_sep = escape_re(@row_sep)
|
1539
|
-
esc_quote = escape_re(@quote_char)
|
1540
|
-
@parsers = {
|
1541
|
-
# for detecting parse errors
|
1542
|
-
quote_or_nl: encode_re("[", esc_quote, "\r\n]"),
|
1543
|
-
nl_or_lf: encode_re("[\r\n]"),
|
1544
|
-
stray_quote: encode_re( "[^", esc_quote, "]", esc_quote,
|
1545
|
-
"[^", esc_quote, "]" ),
|
1546
|
-
# safer than chomp!()
|
1547
|
-
line_end: encode_re(esc_row_sep, "\\z"),
|
1548
|
-
# illegal unquoted characters
|
1549
|
-
return_newline: encode_str("\r\n")
|
1550
|
-
}
|
1551
|
-
end
|
1552
|
-
|
1553
|
-
#
|
1554
|
-
# Loads any converters requested during construction.
|
1555
|
-
#
|
1556
|
-
# If +field_name+ is set <tt>:converters</tt> (the default) field converters
|
1557
|
-
# are set. When +field_name+ is <tt>:header_converters</tt> header converters
|
1558
|
-
# are added instead.
|
1559
|
-
#
|
1560
|
-
# The <tt>:unconverted_fields</tt> option is also activated for
|
1561
|
-
# <tt>:converters</tt> calls, if requested.
|
1562
|
-
#
|
1563
|
-
def init_converters(converters, ivar_name, convert_method)
|
1564
|
-
converters = case converters
|
1565
|
-
when nil then []
|
1566
|
-
when Array then converters
|
1567
|
-
else [converters]
|
1568
|
-
end
|
1569
|
-
instance_variable_set(ivar_name, [])
|
1570
|
-
convert = method(convert_method)
|
1571
|
-
|
1572
|
-
# load converters
|
1573
|
-
converters.each do |converter|
|
1574
|
-
if converter.is_a? Proc # custom code block
|
1575
|
-
convert.call(&converter)
|
1576
|
-
else # by name
|
1577
|
-
convert.call(converter)
|
1578
|
-
end
|
1579
|
-
end
|
1580
|
-
end
|
1581
|
-
|
1582
|
-
# Stores the pattern of comments to skip from the provided options.
|
1583
|
-
#
|
1584
|
-
# The pattern must respond to +.match+, else ArgumentError is raised.
|
1585
|
-
# Strings are converted to a Regexp.
|
1586
|
-
#
|
1587
|
-
# See also CSV.new
|
1588
|
-
def init_comments(skip_lines)
|
1589
|
-
@skip_lines = skip_lines
|
1590
|
-
@skip_lines = Regexp.new(Regexp.escape(@skip_lines)) if @skip_lines.is_a? String
|
1591
|
-
if @skip_lines and not @skip_lines.respond_to?(:match)
|
1592
|
-
raise ArgumentError, ":skip_lines has to respond to matches"
|
1346
|
+
def normalize_converters(converters)
|
1347
|
+
converters ||= []
|
1348
|
+
unless converters.is_a?(Array)
|
1349
|
+
converters = [converters]
|
1593
1350
|
end
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
# converters in, the +const+ Hash to lookup named converters in, and the
|
1601
|
-
# normal parameters of the CSV.convert() and CSV.header_convert() methods.
|
1602
|
-
#
|
1603
|
-
def add_converter(var_name, const, name = nil, &converter)
|
1604
|
-
if name.nil? # custom converter
|
1605
|
-
instance_variable_get(var_name) << converter
|
1606
|
-
else # named converter
|
1607
|
-
combo = const[name]
|
1608
|
-
case combo
|
1609
|
-
when Array # combo converter
|
1610
|
-
combo.each do |converter_name|
|
1611
|
-
add_converter(var_name, const, converter_name)
|
1612
|
-
end
|
1613
|
-
else # individual named converter
|
1614
|
-
instance_variable_get(var_name) << combo
|
1351
|
+
converters.collect do |converter|
|
1352
|
+
case converter
|
1353
|
+
when Proc # custom code block
|
1354
|
+
[nil, converter]
|
1355
|
+
else # by name
|
1356
|
+
[converter, nil]
|
1615
1357
|
end
|
1616
1358
|
end
|
1617
1359
|
end
|
@@ -1625,131 +1367,87 @@ class CSV
|
|
1625
1367
|
#
|
1626
1368
|
def convert_fields(fields, headers = false)
|
1627
1369
|
if headers
|
1628
|
-
|
1370
|
+
header_fields_converter.convert(fields, nil, 0)
|
1629
1371
|
else
|
1630
|
-
|
1631
|
-
if !@use_headers and
|
1632
|
-
converters.empty? and
|
1633
|
-
@nil_value.nil? and
|
1634
|
-
@empty_value_is_empty_string
|
1635
|
-
return fields
|
1636
|
-
end
|
1637
|
-
end
|
1638
|
-
|
1639
|
-
fields.map.with_index do |field, index|
|
1640
|
-
if field.nil?
|
1641
|
-
field = @nil_value
|
1642
|
-
elsif field.empty?
|
1643
|
-
field = @empty_value unless @empty_value_is_empty_string
|
1644
|
-
end
|
1645
|
-
converters.each do |converter|
|
1646
|
-
break if headers && field.nil?
|
1647
|
-
field = if converter.arity == 1 # straight field converter
|
1648
|
-
converter[field]
|
1649
|
-
else # FieldInfo converter
|
1650
|
-
header = @use_headers && !headers ? @headers[index] : nil
|
1651
|
-
converter[field, FieldInfo.new(index, lineno, header)]
|
1652
|
-
end
|
1653
|
-
break unless field.is_a? String # short-circuit pipeline for speed
|
1654
|
-
end
|
1655
|
-
field # final state of each field, converted or original
|
1372
|
+
parser_fields_converter.convert(fields, @headers, lineno)
|
1656
1373
|
end
|
1657
1374
|
end
|
1658
1375
|
|
1659
1376
|
#
|
1660
|
-
#
|
1661
|
-
#
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
def parse_headers(row = nil)
|
1670
|
-
if @headers.nil? # header row
|
1671
|
-
@headers = case @use_headers # save headers
|
1672
|
-
# Array of headers
|
1673
|
-
when Array then @use_headers
|
1674
|
-
# CSV header String
|
1675
|
-
when String
|
1676
|
-
self.class.parse_line( @use_headers,
|
1677
|
-
col_sep: @col_sep,
|
1678
|
-
row_sep: @row_sep,
|
1679
|
-
quote_char: @quote_char )
|
1680
|
-
# first row is headers
|
1681
|
-
else row
|
1682
|
-
end
|
1683
|
-
|
1684
|
-
# prepare converted and unconverted copies
|
1685
|
-
row = @headers if row.nil?
|
1686
|
-
@headers = convert_fields(@headers, true)
|
1687
|
-
@headers.each { |h| h.freeze if h.is_a? String }
|
1688
|
-
|
1689
|
-
if @return_headers # return headers
|
1690
|
-
return self.class::Row.new(@headers, row, true)
|
1691
|
-
elsif not [Array, String].include? @use_headers.class # skip to field row
|
1692
|
-
return shift
|
1693
|
-
end
|
1377
|
+
# Returns the encoding of the internal IO object.
|
1378
|
+
#
|
1379
|
+
def raw_encoding
|
1380
|
+
if @io.respond_to? :internal_encoding
|
1381
|
+
@io.internal_encoding || @io.external_encoding
|
1382
|
+
elsif @io.respond_to? :encoding
|
1383
|
+
@io.encoding
|
1384
|
+
else
|
1385
|
+
nil
|
1694
1386
|
end
|
1387
|
+
end
|
1695
1388
|
|
1696
|
-
|
1389
|
+
def parser_fields_converter
|
1390
|
+
@parser_fields_converter ||= build_parser_fields_converter
|
1697
1391
|
end
|
1698
1392
|
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
|
1704
|
-
|
1705
|
-
class << row
|
1706
|
-
attr_reader :unconverted_fields
|
1707
|
-
end
|
1708
|
-
row.instance_variable_set(:@unconverted_fields, fields)
|
1709
|
-
row
|
1393
|
+
def build_parser_fields_converter
|
1394
|
+
specific_options = {
|
1395
|
+
builtin_converters: Converters,
|
1396
|
+
}
|
1397
|
+
options = @base_fields_converter_options.merge(specific_options)
|
1398
|
+
build_fields_converter(@initial_converters, options)
|
1710
1399
|
end
|
1711
1400
|
|
1712
|
-
|
1713
|
-
|
1714
|
-
# any characters that would change the meaning of a regular expression in the
|
1715
|
-
# encoding of +str+. Regular expression characters that cannot be transcoded
|
1716
|
-
# to the target encoding will be skipped and no escaping will be performed if
|
1717
|
-
# a backslash cannot be transcoded.
|
1718
|
-
#
|
1719
|
-
def escape_re(str)
|
1720
|
-
str.gsub(@re_chars) {|c| @re_esc + c}
|
1401
|
+
def header_fields_converter
|
1402
|
+
@header_fields_converter ||= build_header_fields_converter
|
1721
1403
|
end
|
1722
1404
|
|
1723
|
-
|
1724
|
-
|
1725
|
-
|
1726
|
-
|
1727
|
-
|
1728
|
-
|
1405
|
+
def build_header_fields_converter
|
1406
|
+
specific_options = {
|
1407
|
+
builtin_converters: HeaderConverters,
|
1408
|
+
accept_nil: true,
|
1409
|
+
}
|
1410
|
+
options = @base_fields_converter_options.merge(specific_options)
|
1411
|
+
build_fields_converter(@initial_header_converters, options)
|
1729
1412
|
end
|
1730
1413
|
|
1731
|
-
|
1732
|
-
|
1733
|
-
# that encoding.
|
1734
|
-
#
|
1735
|
-
def encode_str(*chunks)
|
1736
|
-
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
|
1414
|
+
def writer_fields_converter
|
1415
|
+
@writer_fields_converter ||= build_writer_fields_converter
|
1737
1416
|
end
|
1738
1417
|
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
|
1744
|
-
|
1745
|
-
|
1746
|
-
|
1747
|
-
|
1748
|
-
elsif @io.respond_to? :encoding
|
1749
|
-
@io.encoding
|
1750
|
-
else
|
1751
|
-
default
|
1418
|
+
def build_writer_fields_converter
|
1419
|
+
build_fields_converter(@initial_write_converters,
|
1420
|
+
@write_fields_converter_options)
|
1421
|
+
end
|
1422
|
+
|
1423
|
+
def build_fields_converter(initial_converters, options)
|
1424
|
+
fields_converter = FieldsConverter.new(options)
|
1425
|
+
normalize_converters(initial_converters).each do |name, converter|
|
1426
|
+
fields_converter.add_converter(name, &converter)
|
1752
1427
|
end
|
1428
|
+
fields_converter
|
1429
|
+
end
|
1430
|
+
|
1431
|
+
def parser
|
1432
|
+
@parser ||= Parser.new(@io, parser_options)
|
1433
|
+
end
|
1434
|
+
|
1435
|
+
def parser_options
|
1436
|
+
@parser_options.merge(header_fields_converter: header_fields_converter,
|
1437
|
+
fields_converter: parser_fields_converter)
|
1438
|
+
end
|
1439
|
+
|
1440
|
+
def parser_enumerator
|
1441
|
+
@parser_enumerator ||= parser.parse
|
1442
|
+
end
|
1443
|
+
|
1444
|
+
def writer
|
1445
|
+
@writer ||= Writer.new(@io, writer_options)
|
1446
|
+
end
|
1447
|
+
|
1448
|
+
def writer_options
|
1449
|
+
@writer_options.merge(header_fields_converter: header_fields_converter,
|
1450
|
+
fields_converter: writer_fields_converter)
|
1753
1451
|
end
|
1754
1452
|
end
|
1755
1453
|
|