csv 3.0.0 → 3.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +882 -0
- data/README.md +6 -3
- data/doc/csv/arguments/io.rdoc +5 -0
- data/doc/csv/options/common/col_sep.rdoc +57 -0
- data/doc/csv/options/common/quote_char.rdoc +42 -0
- data/doc/csv/options/common/row_sep.rdoc +91 -0
- data/doc/csv/options/generating/force_quotes.rdoc +17 -0
- data/doc/csv/options/generating/quote_empty.rdoc +12 -0
- data/doc/csv/options/generating/write_converters.rdoc +25 -0
- data/doc/csv/options/generating/write_empty_value.rdoc +15 -0
- data/doc/csv/options/generating/write_headers.rdoc +29 -0
- data/doc/csv/options/generating/write_nil_value.rdoc +14 -0
- data/doc/csv/options/parsing/converters.rdoc +46 -0
- data/doc/csv/options/parsing/empty_value.rdoc +13 -0
- data/doc/csv/options/parsing/field_size_limit.rdoc +39 -0
- data/doc/csv/options/parsing/header_converters.rdoc +43 -0
- data/doc/csv/options/parsing/headers.rdoc +63 -0
- data/doc/csv/options/parsing/liberal_parsing.rdoc +38 -0
- data/doc/csv/options/parsing/nil_value.rdoc +12 -0
- data/doc/csv/options/parsing/return_headers.rdoc +22 -0
- data/doc/csv/options/parsing/skip_blanks.rdoc +31 -0
- data/doc/csv/options/parsing/skip_lines.rdoc +37 -0
- data/doc/csv/options/parsing/strip.rdoc +15 -0
- data/doc/csv/options/parsing/unconverted_fields.rdoc +27 -0
- data/doc/csv/recipes/filtering.rdoc +158 -0
- data/doc/csv/recipes/generating.rdoc +298 -0
- data/doc/csv/recipes/parsing.rdoc +545 -0
- data/doc/csv/recipes/recipes.rdoc +6 -0
- data/lib/csv/core_ext/array.rb +1 -1
- data/lib/csv/core_ext/string.rb +1 -1
- data/lib/csv/fields_converter.rb +89 -0
- data/lib/csv/input_record_separator.rb +18 -0
- data/lib/csv/parser.rb +1288 -0
- data/lib/csv/row.rb +505 -136
- data/lib/csv/table.rb +791 -114
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +210 -0
- data/lib/csv.rb +2433 -1329
- metadata +66 -13
- data/news.md +0 -123
data/lib/csv.rb
CHANGED
@@ -10,18 +10,18 @@
|
|
10
10
|
#
|
11
11
|
# Welcome to the new and improved CSV.
|
12
12
|
#
|
13
|
-
# This version of the CSV library began its life as FasterCSV.
|
14
|
-
# intended as a replacement to Ruby's then standard CSV library.
|
13
|
+
# This version of the CSV library began its life as FasterCSV. FasterCSV was
|
14
|
+
# intended as a replacement to Ruby's then standard CSV library. It was
|
15
15
|
# designed to address concerns users of that library had and it had three
|
16
16
|
# primary goals:
|
17
17
|
#
|
18
18
|
# 1. Be significantly faster than CSV while remaining a pure Ruby library.
|
19
|
-
# 2. Use a smaller and easier to maintain code base.
|
20
|
-
# grew larger, was also but considerably richer in features.
|
19
|
+
# 2. Use a smaller and easier to maintain code base. (FasterCSV eventually
|
20
|
+
# grew larger, was also but considerably richer in features. The parsing
|
21
21
|
# core remains quite small.)
|
22
22
|
# 3. Improve on the CSV interface.
|
23
23
|
#
|
24
|
-
# Obviously, the last one is subjective.
|
24
|
+
# Obviously, the last one is subjective. I did try to defer to the original
|
25
25
|
# interface whenever I didn't have a compelling reason to change it though, so
|
26
26
|
# hopefully this won't be too radically different.
|
27
27
|
#
|
@@ -29,26 +29,26 @@
|
|
29
29
|
# the original library as of Ruby 1.9. If you are migrating code from 1.8 or
|
30
30
|
# earlier, you may have to change your code to comply with the new interface.
|
31
31
|
#
|
32
|
-
# == What's Different From the Old CSV?
|
32
|
+
# == What's the Different From the Old CSV?
|
33
33
|
#
|
34
34
|
# I'm sure I'll miss something, but I'll try to mention most of the major
|
35
35
|
# differences I am aware of, to help others quickly get up to speed:
|
36
36
|
#
|
37
|
-
# === CSV Parsing
|
37
|
+
# === \CSV Parsing
|
38
38
|
#
|
39
|
-
# * This parser is m17n aware.
|
39
|
+
# * This parser is m17n aware. See CSV for full details.
|
40
40
|
# * This library has a stricter parser and will throw MalformedCSVErrors on
|
41
41
|
# problematic data.
|
42
|
-
# * This library has a less liberal idea of a line ending than CSV.
|
43
|
-
# set as the <tt>:row_sep</tt> is law.
|
42
|
+
# * This library has a less liberal idea of a line ending than CSV. What you
|
43
|
+
# set as the <tt>:row_sep</tt> is law. It can auto-detect your line endings
|
44
44
|
# though.
|
45
|
-
# * The old library returned empty lines as <tt>[nil]</tt>.
|
45
|
+
# * The old library returned empty lines as <tt>[nil]</tt>. This library calls
|
46
46
|
# them <tt>[]</tt>.
|
47
47
|
# * This library has a much faster parser.
|
48
48
|
#
|
49
49
|
# === Interface
|
50
50
|
#
|
51
|
-
# * CSV now uses
|
51
|
+
# * CSV now uses keyword parameters to set options.
|
52
52
|
# * CSV no longer has generate_row() or parse_row().
|
53
53
|
# * The old CSV's Reader and Writer classes have been dropped.
|
54
54
|
# * CSV::open() is now more like Ruby's open().
|
@@ -56,9 +56,9 @@
|
|
56
56
|
# * CSV now has a new() method used to wrap objects like String and IO for
|
57
57
|
# reading and writing.
|
58
58
|
# * CSV::generate() is different from the old method.
|
59
|
-
# * CSV no longer supports partial reads.
|
59
|
+
# * CSV no longer supports partial reads. It works line-by-line.
|
60
60
|
# * CSV no longer allows the instance methods to override the separators for
|
61
|
-
# performance reasons.
|
61
|
+
# performance reasons. They must be set in the constructor.
|
62
62
|
#
|
63
63
|
# If you use this library and find yourself missing any functionality I have
|
64
64
|
# trimmed, please {let me know}[mailto:james@grayproductions.net].
|
@@ -70,16 +70,16 @@
|
|
70
70
|
# == What is CSV, really?
|
71
71
|
#
|
72
72
|
# CSV maintains a pretty strict definition of CSV taken directly from
|
73
|
-
# {the RFC}[
|
74
|
-
# place and that is to make using this library easier.
|
73
|
+
# {the RFC}[https://www.ietf.org/rfc/rfc4180.txt]. I relax the rules in only one
|
74
|
+
# place and that is to make using this library easier. CSV will parse all valid
|
75
75
|
# CSV.
|
76
76
|
#
|
77
|
-
# What you don't want to do is feed CSV invalid data.
|
77
|
+
# What you don't want to do is to feed CSV invalid data. Because of the way the
|
78
78
|
# CSV format works, it's common for a parser to need to read until the end of
|
79
|
-
# the file to be sure a field is invalid.
|
79
|
+
# the file to be sure a field is invalid. This consumes a lot of time and memory.
|
80
80
|
#
|
81
81
|
# Luckily, when working with invalid CSV, Ruby's built-in methods will almost
|
82
|
-
# always be superior in every way.
|
82
|
+
# always be superior in every way. For example, parsing non-quoted fields is as
|
83
83
|
# easy as:
|
84
84
|
#
|
85
85
|
# data.split(",")
|
@@ -90,189 +90,747 @@
|
|
90
90
|
# with any questions.
|
91
91
|
|
92
92
|
require "forwardable"
|
93
|
-
require "English"
|
94
93
|
require "date"
|
95
94
|
require "stringio"
|
96
|
-
require_relative "csv/table"
|
97
|
-
require_relative "csv/row"
|
98
|
-
|
99
|
-
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
100
|
-
unless String.method_defined?(:match?)
|
101
|
-
class CSV
|
102
|
-
module MatchP
|
103
|
-
refine String do
|
104
|
-
def match?(pattern)
|
105
|
-
self =~ pattern
|
106
|
-
end
|
107
|
-
end
|
108
95
|
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
end
|
116
|
-
|
117
|
-
using CSV::MatchP
|
118
|
-
end
|
96
|
+
require_relative "csv/fields_converter"
|
97
|
+
require_relative "csv/input_record_separator"
|
98
|
+
require_relative "csv/parser"
|
99
|
+
require_relative "csv/row"
|
100
|
+
require_relative "csv/table"
|
101
|
+
require_relative "csv/writer"
|
119
102
|
|
103
|
+
# == \CSV
|
120
104
|
#
|
121
|
-
#
|
122
|
-
# tools to enable you to read and write to and from Strings or IO objects, as
|
123
|
-
# needed.
|
105
|
+
# === \CSV Data
|
124
106
|
#
|
125
|
-
#
|
107
|
+
# \CSV (comma-separated values) data is a text representation of a table:
|
108
|
+
# - A _row_ _separator_ delimits table rows.
|
109
|
+
# A common row separator is the newline character <tt>"\n"</tt>.
|
110
|
+
# - A _column_ _separator_ delimits fields in a row.
|
111
|
+
# A common column separator is the comma character <tt>","</tt>.
|
126
112
|
#
|
127
|
-
#
|
113
|
+
# This \CSV \String, with row separator <tt>"\n"</tt>
|
114
|
+
# and column separator <tt>","</tt>,
|
115
|
+
# has three rows and two columns:
|
116
|
+
# "foo,0\nbar,1\nbaz,2\n"
|
128
117
|
#
|
129
|
-
#
|
130
|
-
# csv.read # => array of rows
|
131
|
-
# # or
|
132
|
-
# csv.each do |row|
|
133
|
-
# # ...
|
134
|
-
# end
|
135
|
-
# # or
|
136
|
-
# row = csv.shift
|
118
|
+
# Despite the name \CSV, a \CSV representation can use different separators.
|
137
119
|
#
|
138
|
-
#
|
139
|
-
#
|
120
|
+
# For more about tables, see the Wikipedia article
|
121
|
+
# "{Table (information)}[https://en.wikipedia.org/wiki/Table_(information)]",
|
122
|
+
# especially its section
|
123
|
+
# "{Simple table}[https://en.wikipedia.org/wiki/Table_(information)#Simple_table]"
|
140
124
|
#
|
141
|
-
#
|
142
|
-
# described in the Specialized Methods section.
|
125
|
+
# == \Class \CSV
|
143
126
|
#
|
144
|
-
#
|
127
|
+
# Class \CSV provides methods for:
|
128
|
+
# - Parsing \CSV data from a \String object, a \File (via its file path), or an \IO object.
|
129
|
+
# - Generating \CSV data to a \String object.
|
145
130
|
#
|
146
|
-
#
|
147
|
-
#
|
148
|
-
# see Data Conversion section for the description of the latter.
|
131
|
+
# To make \CSV available:
|
132
|
+
# require 'csv'
|
149
133
|
#
|
150
|
-
#
|
134
|
+
# All examples here assume that this has been done.
|
151
135
|
#
|
152
|
-
#
|
136
|
+
# == Keeping It Simple
|
153
137
|
#
|
154
|
-
#
|
155
|
-
#
|
156
|
-
#
|
157
|
-
# CSV.foreach("path/to/file.csv", **options) do |row|
|
158
|
-
# # ...
|
159
|
-
# end
|
138
|
+
# A \CSV object has dozens of instance methods that offer fine-grained control
|
139
|
+
# of parsing and generating \CSV data.
|
140
|
+
# For many needs, though, simpler approaches will do.
|
160
141
|
#
|
161
|
-
#
|
162
|
-
#
|
163
|
-
#
|
164
|
-
#
|
165
|
-
#
|
166
|
-
#
|
142
|
+
# This section summarizes the singleton methods in \CSV
|
143
|
+
# that allow you to parse and generate without explicitly
|
144
|
+
# creating \CSV objects.
|
145
|
+
# For details, follow the links.
|
146
|
+
#
|
147
|
+
# === Simple Parsing
|
148
|
+
#
|
149
|
+
# Parsing methods commonly return either of:
|
150
|
+
# - An \Array of Arrays of Strings:
|
151
|
+
# - The outer \Array is the entire "table".
|
152
|
+
# - Each inner \Array is a row.
|
153
|
+
# - Each \String is a field.
|
154
|
+
# - A CSV::Table object. For details, see
|
155
|
+
# {\CSV with Headers}[#class-CSV-label-CSV+with+Headers].
|
156
|
+
#
|
157
|
+
# ==== Parsing a \String
|
158
|
+
#
|
159
|
+
# The input to be parsed can be a string:
|
160
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
161
|
+
#
|
162
|
+
# \Method CSV.parse returns the entire \CSV data:
|
163
|
+
# CSV.parse(string) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
164
|
+
#
|
165
|
+
# \Method CSV.parse_line returns only the first row:
|
166
|
+
# CSV.parse_line(string) # => ["foo", "0"]
|
167
|
+
#
|
168
|
+
# \CSV extends class \String with instance method String#parse_csv,
|
169
|
+
# which also returns only the first row:
|
170
|
+
# string.parse_csv # => ["foo", "0"]
|
167
171
|
#
|
168
|
-
#
|
172
|
+
# ==== Parsing Via a \File Path
|
169
173
|
#
|
170
|
-
#
|
171
|
-
#
|
172
|
-
#
|
173
|
-
#
|
174
|
-
#
|
174
|
+
# The input to be parsed can be in a file:
|
175
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
176
|
+
# path = 't.csv'
|
177
|
+
# File.write(path, string)
|
178
|
+
#
|
179
|
+
# \Method CSV.read returns the entire \CSV data:
|
180
|
+
# CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
181
|
+
#
|
182
|
+
# \Method CSV.foreach iterates, passing each row to the given block:
|
183
|
+
# CSV.foreach(path) do |row|
|
184
|
+
# p row
|
185
|
+
# end
|
186
|
+
# Output:
|
187
|
+
# ["foo", "0"]
|
188
|
+
# ["bar", "1"]
|
189
|
+
# ["baz", "2"]
|
190
|
+
#
|
191
|
+
# \Method CSV.table returns the entire \CSV data as a CSV::Table object:
|
192
|
+
# CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:3>
|
193
|
+
#
|
194
|
+
# ==== Parsing from an Open \IO Stream
|
195
|
+
#
|
196
|
+
# The input to be parsed can be in an open \IO stream:
|
197
|
+
#
|
198
|
+
# \Method CSV.read returns the entire \CSV data:
|
199
|
+
# File.open(path) do |file|
|
200
|
+
# CSV.read(file)
|
201
|
+
# end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
202
|
+
#
|
203
|
+
# As does method CSV.parse:
|
204
|
+
# File.open(path) do |file|
|
205
|
+
# CSV.parse(file)
|
206
|
+
# end # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
207
|
+
#
|
208
|
+
# \Method CSV.parse_line returns only the first row:
|
209
|
+
# File.open(path) do |file|
|
210
|
+
# CSV.parse_line(file)
|
211
|
+
# end # => ["foo", "0"]
|
212
|
+
#
|
213
|
+
# \Method CSV.foreach iterates, passing each row to the given block:
|
214
|
+
# File.open(path) do |file|
|
215
|
+
# CSV.foreach(file) do |row|
|
216
|
+
# p row
|
217
|
+
# end
|
175
218
|
# end
|
219
|
+
# Output:
|
220
|
+
# ["foo", "0"]
|
221
|
+
# ["bar", "1"]
|
222
|
+
# ["baz", "2"]
|
223
|
+
#
|
224
|
+
# \Method CSV.table returns the entire \CSV data as a CSV::Table object:
|
225
|
+
# File.open(path) do |file|
|
226
|
+
# CSV.table(file)
|
227
|
+
# end # => #<CSV::Table mode:col_or_row row_count:3>
|
228
|
+
#
|
229
|
+
# === Simple Generating
|
230
|
+
#
|
231
|
+
# \Method CSV.generate returns a \String;
|
232
|
+
# this example uses method CSV#<< to append the rows
|
233
|
+
# that are to be generated:
|
234
|
+
# output_string = CSV.generate do |csv|
|
235
|
+
# csv << ['foo', 0]
|
236
|
+
# csv << ['bar', 1]
|
237
|
+
# csv << ['baz', 2]
|
238
|
+
# end
|
239
|
+
# output_string # => "foo,0\nbar,1\nbaz,2\n"
|
240
|
+
#
|
241
|
+
# \Method CSV.generate_line returns a \String containing the single row
|
242
|
+
# constructed from an \Array:
|
243
|
+
# CSV.generate_line(['foo', '0']) # => "foo,0\n"
|
176
244
|
#
|
177
|
-
#
|
178
|
-
#
|
179
|
-
#
|
180
|
-
#
|
181
|
-
#
|
245
|
+
# \CSV extends class \Array with instance method <tt>Array#to_csv</tt>,
|
246
|
+
# which forms an \Array into a \String:
|
247
|
+
# ['foo', '0'].to_csv # => "foo,0\n"
|
248
|
+
#
|
249
|
+
# === "Filtering" \CSV
|
250
|
+
#
|
251
|
+
# \Method CSV.filter provides a Unix-style filter for \CSV data.
|
252
|
+
# The input data is processed to form the output data:
|
253
|
+
# in_string = "foo,0\nbar,1\nbaz,2\n"
|
254
|
+
# out_string = ''
|
255
|
+
# CSV.filter(in_string, out_string) do |row|
|
256
|
+
# row[0] = row[0].upcase
|
257
|
+
# row[1] *= 4
|
182
258
|
# end
|
259
|
+
# out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n"
|
260
|
+
#
|
261
|
+
# == \CSV Objects
|
262
|
+
#
|
263
|
+
# There are three ways to create a \CSV object:
|
264
|
+
# - \Method CSV.new returns a new \CSV object.
|
265
|
+
# - \Method CSV.instance returns a new or cached \CSV object.
|
266
|
+
# - \Method \CSV() also returns a new or cached \CSV object.
|
267
|
+
#
|
268
|
+
# === Instance Methods
|
269
|
+
#
|
270
|
+
# \CSV has three groups of instance methods:
|
271
|
+
# - Its own internally defined instance methods.
|
272
|
+
# - Methods included by module Enumerable.
|
273
|
+
# - Methods delegated to class IO. See below.
|
274
|
+
#
|
275
|
+
# ==== Delegated Methods
|
276
|
+
#
|
277
|
+
# For convenience, a CSV object will delegate to many methods in class IO.
|
278
|
+
# (A few have wrapper "guard code" in \CSV.) You may call:
|
279
|
+
# * IO#binmode
|
280
|
+
# * #binmode?
|
281
|
+
# * IO#close
|
282
|
+
# * IO#close_read
|
283
|
+
# * IO#close_write
|
284
|
+
# * IO#closed?
|
285
|
+
# * #eof
|
286
|
+
# * #eof?
|
287
|
+
# * IO#external_encoding
|
288
|
+
# * IO#fcntl
|
289
|
+
# * IO#fileno
|
290
|
+
# * #flock
|
291
|
+
# * IO#flush
|
292
|
+
# * IO#fsync
|
293
|
+
# * IO#internal_encoding
|
294
|
+
# * #ioctl
|
295
|
+
# * IO#isatty
|
296
|
+
# * #path
|
297
|
+
# * IO#pid
|
298
|
+
# * IO#pos
|
299
|
+
# * IO#pos=
|
300
|
+
# * IO#reopen
|
301
|
+
# * #rewind
|
302
|
+
# * IO#seek
|
303
|
+
# * #stat
|
304
|
+
# * IO#string
|
305
|
+
# * IO#sync
|
306
|
+
# * IO#sync=
|
307
|
+
# * IO#tell
|
308
|
+
# * #to_i
|
309
|
+
# * #to_io
|
310
|
+
# * IO#truncate
|
311
|
+
# * IO#tty?
|
312
|
+
#
|
313
|
+
# === Options
|
314
|
+
#
|
315
|
+
# The default values for options are:
|
316
|
+
# DEFAULT_OPTIONS = {
|
317
|
+
# # For both parsing and generating.
|
318
|
+
# col_sep: ",",
|
319
|
+
# row_sep: :auto,
|
320
|
+
# quote_char: '"',
|
321
|
+
# # For parsing.
|
322
|
+
# field_size_limit: nil,
|
323
|
+
# converters: nil,
|
324
|
+
# unconverted_fields: nil,
|
325
|
+
# headers: false,
|
326
|
+
# return_headers: false,
|
327
|
+
# header_converters: nil,
|
328
|
+
# skip_blanks: false,
|
329
|
+
# skip_lines: nil,
|
330
|
+
# liberal_parsing: false,
|
331
|
+
# nil_value: nil,
|
332
|
+
# empty_value: "",
|
333
|
+
# strip: false,
|
334
|
+
# # For generating.
|
335
|
+
# write_headers: nil,
|
336
|
+
# quote_empty: true,
|
337
|
+
# force_quotes: false,
|
338
|
+
# write_converters: nil,
|
339
|
+
# write_nil_value: nil,
|
340
|
+
# write_empty_value: "",
|
341
|
+
# }
|
342
|
+
#
|
343
|
+
# ==== Options for Parsing
|
344
|
+
#
|
345
|
+
# Options for parsing, described in detail below, include:
|
346
|
+
# - +row_sep+: Specifies the row separator; used to delimit rows.
|
347
|
+
# - +col_sep+: Specifies the column separator; used to delimit fields.
|
348
|
+
# - +quote_char+: Specifies the quote character; used to quote fields.
|
349
|
+
# - +field_size_limit+: Specifies the maximum field size + 1 allowed.
|
350
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
351
|
+
# - +max_field_size+: Specifies the maximum field size allowed.
|
352
|
+
# - +converters+: Specifies the field converters to be used.
|
353
|
+
# - +unconverted_fields+: Specifies whether unconverted fields are to be available.
|
354
|
+
# - +headers+: Specifies whether data contains headers,
|
355
|
+
# or specifies the headers themselves.
|
356
|
+
# - +return_headers+: Specifies whether headers are to be returned.
|
357
|
+
# - +header_converters+: Specifies the header converters to be used.
|
358
|
+
# - +skip_blanks+: Specifies whether blanks lines are to be ignored.
|
359
|
+
# - +skip_lines+: Specifies how comments lines are to be recognized.
|
360
|
+
# - +strip+: Specifies whether leading and trailing whitespace are to be
|
361
|
+
# stripped from fields. This must be compatible with +col_sep+; if it is not,
|
362
|
+
# then an +ArgumentError+ exception will be raised.
|
363
|
+
# - +liberal_parsing+: Specifies whether \CSV should attempt to parse
|
364
|
+
# non-compliant data.
|
365
|
+
# - +nil_value+: Specifies the object that is to be substituted for each null (no-text) field.
|
366
|
+
# - +empty_value+: Specifies the object that is to be substituted for each empty field.
|
367
|
+
#
|
368
|
+
# :include: ../doc/csv/options/common/row_sep.rdoc
|
369
|
+
#
|
370
|
+
# :include: ../doc/csv/options/common/col_sep.rdoc
|
371
|
+
#
|
372
|
+
# :include: ../doc/csv/options/common/quote_char.rdoc
|
373
|
+
#
|
374
|
+
# :include: ../doc/csv/options/parsing/field_size_limit.rdoc
|
375
|
+
#
|
376
|
+
# :include: ../doc/csv/options/parsing/converters.rdoc
|
377
|
+
#
|
378
|
+
# :include: ../doc/csv/options/parsing/unconverted_fields.rdoc
|
379
|
+
#
|
380
|
+
# :include: ../doc/csv/options/parsing/headers.rdoc
|
381
|
+
#
|
382
|
+
# :include: ../doc/csv/options/parsing/return_headers.rdoc
|
383
|
+
#
|
384
|
+
# :include: ../doc/csv/options/parsing/header_converters.rdoc
|
385
|
+
#
|
386
|
+
# :include: ../doc/csv/options/parsing/skip_blanks.rdoc
|
387
|
+
#
|
388
|
+
# :include: ../doc/csv/options/parsing/skip_lines.rdoc
|
389
|
+
#
|
390
|
+
# :include: ../doc/csv/options/parsing/strip.rdoc
|
391
|
+
#
|
392
|
+
# :include: ../doc/csv/options/parsing/liberal_parsing.rdoc
|
393
|
+
#
|
394
|
+
# :include: ../doc/csv/options/parsing/nil_value.rdoc
|
395
|
+
#
|
396
|
+
# :include: ../doc/csv/options/parsing/empty_value.rdoc
|
397
|
+
#
|
398
|
+
# ==== Options for Generating
|
399
|
+
#
|
400
|
+
# Options for generating, described in detail below, include:
|
401
|
+
# - +row_sep+: Specifies the row separator; used to delimit rows.
|
402
|
+
# - +col_sep+: Specifies the column separator; used to delimit fields.
|
403
|
+
# - +quote_char+: Specifies the quote character; used to quote fields.
|
404
|
+
# - +write_headers+: Specifies whether headers are to be written.
|
405
|
+
# - +force_quotes+: Specifies whether each output field is to be quoted.
|
406
|
+
# - +quote_empty+: Specifies whether each empty output field is to be quoted.
|
407
|
+
# - +write_converters+: Specifies the field converters to be used in writing.
|
408
|
+
# - +write_nil_value+: Specifies the object that is to be substituted for each +nil+-valued field.
|
409
|
+
# - +write_empty_value+: Specifies the object that is to be substituted for each empty field.
|
410
|
+
#
|
411
|
+
# :include: ../doc/csv/options/common/row_sep.rdoc
|
412
|
+
#
|
413
|
+
# :include: ../doc/csv/options/common/col_sep.rdoc
|
414
|
+
#
|
415
|
+
# :include: ../doc/csv/options/common/quote_char.rdoc
|
416
|
+
#
|
417
|
+
# :include: ../doc/csv/options/generating/write_headers.rdoc
|
183
418
|
#
|
184
|
-
#
|
419
|
+
# :include: ../doc/csv/options/generating/force_quotes.rdoc
|
185
420
|
#
|
186
|
-
#
|
187
|
-
# csv_string = ["CSV", "data"].to_csv # to CSV
|
188
|
-
# csv_array = "CSV,String".parse_csv # from CSV
|
421
|
+
# :include: ../doc/csv/options/generating/quote_empty.rdoc
|
189
422
|
#
|
190
|
-
#
|
191
|
-
# CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
|
192
|
-
# CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
|
193
|
-
# CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
|
194
|
-
# CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
|
423
|
+
# :include: ../doc/csv/options/generating/write_converters.rdoc
|
195
424
|
#
|
196
|
-
#
|
425
|
+
# :include: ../doc/csv/options/generating/write_nil_value.rdoc
|
197
426
|
#
|
198
|
-
#
|
427
|
+
# :include: ../doc/csv/options/generating/write_empty_value.rdoc
|
428
|
+
#
|
429
|
+
# === \CSV with Headers
|
199
430
|
#
|
200
431
|
# CSV allows to specify column names of CSV file, whether they are in data, or
|
201
|
-
# provided separately. If headers specified, reading methods return an instance
|
432
|
+
# provided separately. If headers are specified, reading methods return an instance
|
202
433
|
# of CSV::Table, consisting of CSV::Row.
|
203
434
|
#
|
204
435
|
# # Headers are part of data
|
205
436
|
# data = CSV.parse(<<~ROWS, headers: true)
|
206
437
|
# Name,Department,Salary
|
207
|
-
# Bob,
|
438
|
+
# Bob,Engineering,1000
|
208
439
|
# Jane,Sales,2000
|
209
440
|
# John,Management,5000
|
210
441
|
# ROWS
|
211
442
|
#
|
212
443
|
# data.class #=> CSV::Table
|
213
|
-
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"
|
214
|
-
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"
|
444
|
+
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engineering" "Salary":"1000">
|
445
|
+
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engineering", "Salary"=>"1000"}
|
215
446
|
#
|
216
447
|
# # Headers provided by developer
|
217
|
-
# data = CSV.parse('Bob,
|
218
|
-
# data.first #=> #<CSV::Row name:"Bob" department:"
|
219
|
-
#
|
220
|
-
# ===
|
221
|
-
#
|
222
|
-
#
|
223
|
-
#
|
224
|
-
#
|
225
|
-
#
|
226
|
-
#
|
227
|
-
#
|
228
|
-
#
|
229
|
-
#
|
230
|
-
#
|
231
|
-
#
|
232
|
-
#
|
233
|
-
#
|
234
|
-
#
|
235
|
-
#
|
448
|
+
# data = CSV.parse('Bob,Engineering,1000', headers: %i[name department salary])
|
449
|
+
# data.first #=> #<CSV::Row name:"Bob" department:"Engineering" salary:"1000">
|
450
|
+
#
|
451
|
+
# === \Converters
|
452
|
+
#
|
453
|
+
# By default, each value (field or header) parsed by \CSV is formed into a \String.
|
454
|
+
# You can use a _field_ _converter_ or _header_ _converter_
|
455
|
+
# to intercept and modify the parsed values:
|
456
|
+
# - See {Field Converters}[#class-CSV-label-Field+Converters].
|
457
|
+
# - See {Header Converters}[#class-CSV-label-Header+Converters].
|
458
|
+
#
|
459
|
+
# Also by default, each value to be written during generation is written 'as-is'.
|
460
|
+
# You can use a _write_ _converter_ to modify values before writing.
|
461
|
+
# - See {Write Converters}[#class-CSV-label-Write+Converters].
|
462
|
+
#
|
463
|
+
# ==== Specifying \Converters
|
464
|
+
#
|
465
|
+
# You can specify converters for parsing or generating in the +options+
|
466
|
+
# argument to various \CSV methods:
|
467
|
+
# - Option +converters+ for converting parsed field values.
|
468
|
+
# - Option +header_converters+ for converting parsed header values.
|
469
|
+
# - Option +write_converters+ for converting values to be written (generated).
|
470
|
+
#
|
471
|
+
# There are three forms for specifying converters:
|
472
|
+
# - A converter proc: executable code to be used for conversion.
|
473
|
+
# - A converter name: the name of a stored converter.
|
474
|
+
# - A converter list: an array of converter procs, converter names, and converter lists.
|
475
|
+
#
|
476
|
+
# ===== Converter Procs
|
477
|
+
#
|
478
|
+
# This converter proc, +strip_converter+, accepts a value +field+
|
479
|
+
# and returns <tt>field.strip</tt>:
|
480
|
+
# strip_converter = proc {|field| field.strip }
|
481
|
+
# In this call to <tt>CSV.parse</tt>,
|
482
|
+
# the keyword argument <tt>converters: string_converter</tt>
|
483
|
+
# specifies that:
|
484
|
+
# - \Proc +string_converter+ is to be called for each parsed field.
|
485
|
+
# - The converter's return value is to replace the +field+ value.
|
486
|
+
# Example:
|
487
|
+
# string = " foo , 0 \n bar , 1 \n baz , 2 \n"
|
488
|
+
# array = CSV.parse(string, converters: strip_converter)
|
489
|
+
# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
490
|
+
#
|
491
|
+
# A converter proc can receive a second argument, +field_info+,
|
492
|
+
# that contains details about the field.
|
493
|
+
# This modified +strip_converter+ displays its arguments:
|
494
|
+
# strip_converter = proc do |field, field_info|
|
495
|
+
# p [field, field_info]
|
496
|
+
# field.strip
|
497
|
+
# end
|
498
|
+
# string = " foo , 0 \n bar , 1 \n baz , 2 \n"
|
499
|
+
# array = CSV.parse(string, converters: strip_converter)
|
500
|
+
# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
501
|
+
# Output:
|
502
|
+
# [" foo ", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
|
503
|
+
# [" 0 ", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
|
504
|
+
# [" bar ", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
|
505
|
+
# [" 1 ", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
|
506
|
+
# [" baz ", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
|
507
|
+
# [" 2 ", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
|
508
|
+
# Each CSV::FieldInfo object shows:
|
509
|
+
# - The 0-based field index.
|
510
|
+
# - The 1-based line index.
|
511
|
+
# - The field header, if any.
|
512
|
+
#
|
513
|
+
# ===== Stored \Converters
|
514
|
+
#
|
515
|
+
# A converter may be given a name and stored in a structure where
|
516
|
+
# the parsing methods can find it by name.
|
517
|
+
#
|
518
|
+
# The storage structure for field converters is the \Hash CSV::Converters.
|
519
|
+
# It has several built-in converter procs:
|
520
|
+
# - <tt>:integer</tt>: converts each \String-embedded integer into a true \Integer.
|
521
|
+
# - <tt>:float</tt>: converts each \String-embedded float into a true \Float.
|
522
|
+
# - <tt>:date</tt>: converts each \String-embedded date into a true \Date.
|
523
|
+
# - <tt>:date_time</tt>: converts each \String-embedded date-time into a true \DateTime
|
524
|
+
# .
|
525
|
+
# This example creates a converter proc, then stores it:
|
526
|
+
# strip_converter = proc {|field| field.strip }
|
527
|
+
# CSV::Converters[:strip] = strip_converter
|
528
|
+
# Then the parsing method call can refer to the converter
|
529
|
+
# by its name, <tt>:strip</tt>:
|
530
|
+
# string = " foo , 0 \n bar , 1 \n baz , 2 \n"
|
531
|
+
# array = CSV.parse(string, converters: :strip)
|
532
|
+
# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
533
|
+
#
|
534
|
+
# The storage structure for header converters is the \Hash CSV::HeaderConverters,
|
535
|
+
# which works in the same way.
|
536
|
+
# It also has built-in converter procs:
|
537
|
+
# - <tt>:downcase</tt>: Downcases each header.
|
538
|
+
# - <tt>:symbol</tt>: Converts each header to a \Symbol.
|
539
|
+
#
|
540
|
+
# There is no such storage structure for write headers.
|
541
|
+
#
|
542
|
+
# In order for the parsing methods to access stored converters in non-main-Ractors, the
|
543
|
+
# storage structure must be made shareable first.
|
544
|
+
# Therefore, <tt>Ractor.make_shareable(CSV::Converters)</tt> and
|
545
|
+
# <tt>Ractor.make_shareable(CSV::HeaderConverters)</tt> must be called before the creation
|
546
|
+
# of Ractors that use the converters stored in these structures. (Since making the storage
|
547
|
+
# structures shareable involves freezing them, any custom converters that are to be used
|
548
|
+
# must be added first.)
|
549
|
+
#
|
550
|
+
# ===== Converter Lists
|
551
|
+
#
|
552
|
+
# A _converter_ _list_ is an \Array that may include any assortment of:
|
553
|
+
# - Converter procs.
|
554
|
+
# - Names of stored converters.
|
555
|
+
# - Nested converter lists.
|
556
|
+
#
|
557
|
+
# Examples:
|
558
|
+
# numeric_converters = [:integer, :float]
|
559
|
+
# date_converters = [:date, :date_time]
|
560
|
+
# [numeric_converters, strip_converter]
|
561
|
+
# [strip_converter, date_converters, :float]
|
562
|
+
#
|
563
|
+
# Like a converter proc, a converter list may be named and stored in either
|
564
|
+
# \CSV::Converters or CSV::HeaderConverters:
|
565
|
+
# CSV::Converters[:custom] = [strip_converter, date_converters, :float]
|
566
|
+
# CSV::HeaderConverters[:custom] = [:downcase, :symbol]
|
567
|
+
#
|
568
|
+
# There are two built-in converter lists:
|
569
|
+
# CSV::Converters[:numeric] # => [:integer, :float]
|
570
|
+
# CSV::Converters[:all] # => [:date_time, :numeric]
|
571
|
+
#
|
572
|
+
# ==== Field \Converters
|
573
|
+
#
|
574
|
+
# With no conversion, all parsed fields in all rows become Strings:
|
575
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
576
|
+
# ary = CSV.parse(string)
|
577
|
+
# ary # => # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
578
|
+
#
|
579
|
+
# When you specify a field converter, each parsed field is passed to the converter;
|
580
|
+
# its return value becomes the stored value for the field.
|
581
|
+
# A converter might, for example, convert an integer embedded in a \String
|
582
|
+
# into a true \Integer.
|
583
|
+
# (In fact, that's what built-in field converter +:integer+ does.)
|
584
|
+
#
|
585
|
+
# There are three ways to use field \converters.
|
586
|
+
#
|
587
|
+
# - Using option {converters}[#class-CSV-label-Option+converters] with a parsing method:
|
588
|
+
# ary = CSV.parse(string, converters: :integer)
|
589
|
+
# ary # => [0, 1, 2] # => [["foo", 0], ["bar", 1], ["baz", 2]]
|
590
|
+
# - Using option {converters}[#class-CSV-label-Option+converters] with a new \CSV instance:
|
591
|
+
# csv = CSV.new(string, converters: :integer)
|
592
|
+
# # Field converters in effect:
|
593
|
+
# csv.converters # => [:integer]
|
594
|
+
# csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
|
595
|
+
# - Using method #convert to add a field converter to a \CSV instance:
|
596
|
+
# csv = CSV.new(string)
|
597
|
+
# # Add a converter.
|
598
|
+
# csv.convert(:integer)
|
599
|
+
# csv.converters # => [:integer]
|
600
|
+
# csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
|
601
|
+
#
|
602
|
+
# Installing a field converter does not affect already-read rows:
|
603
|
+
# csv = CSV.new(string)
|
604
|
+
# csv.shift # => ["foo", "0"]
|
605
|
+
# # Add a converter.
|
606
|
+
# csv.convert(:integer)
|
607
|
+
# csv.converters # => [:integer]
|
608
|
+
# csv.read # => [["bar", 1], ["baz", 2]]
|
609
|
+
#
|
610
|
+
# There are additional built-in \converters, and custom \converters are also supported.
|
611
|
+
#
|
612
|
+
# ===== Built-In Field \Converters
|
613
|
+
#
|
614
|
+
# The built-in field converters are in \Hash CSV::Converters:
|
615
|
+
# - Each key is a field converter name.
|
616
|
+
# - Each value is one of:
|
617
|
+
# - A \Proc field converter.
|
618
|
+
# - An \Array of field converter names.
|
619
|
+
#
|
620
|
+
# Display:
|
621
|
+
# CSV::Converters.each_pair do |name, value|
|
622
|
+
# if value.kind_of?(Proc)
|
623
|
+
# p [name, value.class]
|
624
|
+
# else
|
625
|
+
# p [name, value]
|
626
|
+
# end
|
627
|
+
# end
|
628
|
+
# Output:
|
629
|
+
# [:integer, Proc]
|
630
|
+
# [:float, Proc]
|
631
|
+
# [:numeric, [:integer, :float]]
|
632
|
+
# [:date, Proc]
|
633
|
+
# [:date_time, Proc]
|
634
|
+
# [:all, [:date_time, :numeric]]
|
635
|
+
#
|
636
|
+
# Each of these converters transcodes values to UTF-8 before attempting conversion.
|
637
|
+
# If a value cannot be transcoded to UTF-8 the conversion will
|
638
|
+
# fail and the value will remain unconverted.
|
639
|
+
#
|
640
|
+
# Converter +:integer+ converts each field that Integer() accepts:
|
641
|
+
# data = '0,1,2,x'
|
642
|
+
# # Without the converter
|
643
|
+
# csv = CSV.parse_line(data)
|
644
|
+
# csv # => ["0", "1", "2", "x"]
|
645
|
+
# # With the converter
|
646
|
+
# csv = CSV.parse_line(data, converters: :integer)
|
647
|
+
# csv # => [0, 1, 2, "x"]
|
648
|
+
#
|
649
|
+
# Converter +:float+ converts each field that Float() accepts:
|
650
|
+
# data = '1.0,3.14159,x'
|
651
|
+
# # Without the converter
|
652
|
+
# csv = CSV.parse_line(data)
|
653
|
+
# csv # => ["1.0", "3.14159", "x"]
|
654
|
+
# # With the converter
|
655
|
+
# csv = CSV.parse_line(data, converters: :float)
|
656
|
+
# csv # => [1.0, 3.14159, "x"]
|
657
|
+
#
|
658
|
+
# Converter +:numeric+ converts with both +:integer+ and +:float+..
|
659
|
+
#
|
660
|
+
# Converter +:date+ converts each field that Date::parse accepts:
|
661
|
+
# data = '2001-02-03,x'
|
662
|
+
# # Without the converter
|
663
|
+
# csv = CSV.parse_line(data)
|
664
|
+
# csv # => ["2001-02-03", "x"]
|
665
|
+
# # With the converter
|
666
|
+
# csv = CSV.parse_line(data, converters: :date)
|
667
|
+
# csv # => [#<Date: 2001-02-03 ((2451944j,0s,0n),+0s,2299161j)>, "x"]
|
668
|
+
#
|
669
|
+
# Converter +:date_time+ converts each field that DateTime::parse accepts:
|
670
|
+
# data = '2020-05-07T14:59:00-05:00,x'
|
671
|
+
# # Without the converter
|
672
|
+
# csv = CSV.parse_line(data)
|
673
|
+
# csv # => ["2020-05-07T14:59:00-05:00", "x"]
|
674
|
+
# # With the converter
|
675
|
+
# csv = CSV.parse_line(data, converters: :date_time)
|
676
|
+
# csv # => [#<DateTime: 2020-05-07T14:59:00-05:00 ((2458977j,71940s,0n),-18000s,2299161j)>, "x"]
|
677
|
+
#
|
678
|
+
# Converter +:numeric+ converts with both +:date_time+ and +:numeric+..
|
679
|
+
#
|
680
|
+
# As seen above, method #convert adds \converters to a \CSV instance,
|
681
|
+
# and method #converters returns an \Array of the \converters in effect:
|
682
|
+
# csv = CSV.new('0,1,2')
|
683
|
+
# csv.converters # => []
|
684
|
+
# csv.convert(:integer)
|
685
|
+
# csv.converters # => [:integer]
|
686
|
+
# csv.convert(:date)
|
687
|
+
# csv.converters # => [:integer, :date]
|
688
|
+
#
|
689
|
+
# ===== Custom Field \Converters
|
690
|
+
#
|
691
|
+
# You can define a custom field converter:
|
692
|
+
# strip_converter = proc {|field| field.strip }
|
693
|
+
# string = " foo , 0 \n bar , 1 \n baz , 2 \n"
|
694
|
+
# array = CSV.parse(string, converters: strip_converter)
|
695
|
+
# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
696
|
+
# You can register the converter in \Converters \Hash,
|
697
|
+
# which allows you to refer to it by name:
|
698
|
+
# CSV::Converters[:strip] = strip_converter
|
699
|
+
# string = " foo , 0 \n bar , 1 \n baz , 2 \n"
|
700
|
+
# array = CSV.parse(string, converters: :strip)
|
701
|
+
# array # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
702
|
+
#
|
703
|
+
# ==== Header \Converters
|
704
|
+
#
|
705
|
+
# Header converters operate only on headers (and not on other rows).
|
706
|
+
#
|
707
|
+
# There are three ways to use header \converters;
|
708
|
+
# these examples use built-in header converter +:downcase+,
|
709
|
+
# which downcases each parsed header.
|
710
|
+
#
|
711
|
+
# - Option +header_converters+ with a singleton parsing method:
|
712
|
+
# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
|
713
|
+
# tbl = CSV.parse(string, headers: true, header_converters: :downcase)
|
714
|
+
# tbl.class # => CSV::Table
|
715
|
+
# tbl.headers # => ["name", "count"]
|
716
|
+
#
|
717
|
+
# - Option +header_converters+ with a new \CSV instance:
|
718
|
+
# csv = CSV.new(string, header_converters: :downcase)
|
719
|
+
# # Header converters in effect:
|
720
|
+
# csv.header_converters # => [:downcase]
|
721
|
+
# tbl = CSV.parse(string, headers: true)
|
722
|
+
# tbl.headers # => ["Name", "Count"]
|
723
|
+
#
|
724
|
+
# - Method #header_convert adds a header converter to a \CSV instance:
|
725
|
+
# csv = CSV.new(string)
|
726
|
+
# # Add a header converter.
|
727
|
+
# csv.header_convert(:downcase)
|
728
|
+
# csv.header_converters # => [:downcase]
|
729
|
+
# tbl = CSV.parse(string, headers: true)
|
730
|
+
# tbl.headers # => ["Name", "Count"]
|
731
|
+
#
|
732
|
+
# ===== Built-In Header \Converters
|
733
|
+
#
|
734
|
+
# The built-in header \converters are in \Hash CSV::HeaderConverters.
|
735
|
+
# The keys there are the names of the \converters:
|
736
|
+
# CSV::HeaderConverters.keys # => [:downcase, :symbol]
|
737
|
+
#
|
738
|
+
# Converter +:downcase+ converts each header by downcasing it:
|
739
|
+
# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
|
740
|
+
# tbl = CSV.parse(string, headers: true, header_converters: :downcase)
|
741
|
+
# tbl.class # => CSV::Table
|
742
|
+
# tbl.headers # => ["name", "count"]
|
743
|
+
#
|
744
|
+
# Converter +:symbol+ converts each header by making it into a \Symbol:
|
745
|
+
# string = "Name,Count\nFoo,0\n,Bar,1\nBaz,2"
|
746
|
+
# tbl = CSV.parse(string, headers: true, header_converters: :symbol)
|
747
|
+
# tbl.headers # => [:name, :count]
|
748
|
+
# Details:
|
749
|
+
# - Strips leading and trailing whitespace.
|
750
|
+
# - Downcases the header.
|
751
|
+
# - Replaces embedded spaces with underscores.
|
752
|
+
# - Removes non-word characters.
|
753
|
+
# - Makes the string into a \Symbol.
|
754
|
+
#
|
755
|
+
# ===== Custom Header \Converters
|
756
|
+
#
|
757
|
+
# You can define a custom header converter:
|
758
|
+
# upcase_converter = proc {|header| header.upcase }
|
759
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
760
|
+
# table = CSV.parse(string, headers: true, header_converters: upcase_converter)
|
761
|
+
# table # => #<CSV::Table mode:col_or_row row_count:4>
|
762
|
+
# table.headers # => ["NAME", "VALUE"]
|
763
|
+
# You can register the converter in \HeaderConverters \Hash,
|
764
|
+
# which allows you to refer to it by name:
|
765
|
+
# CSV::HeaderConverters[:upcase] = upcase_converter
|
766
|
+
# table = CSV.parse(string, headers: true, header_converters: :upcase)
|
767
|
+
# table # => #<CSV::Table mode:col_or_row row_count:4>
|
768
|
+
# table.headers # => ["NAME", "VALUE"]
|
769
|
+
#
|
770
|
+
# ===== Write \Converters
|
771
|
+
#
|
772
|
+
# When you specify a write converter for generating \CSV,
|
773
|
+
# each field to be written is passed to the converter;
|
774
|
+
# its return value becomes the new value for the field.
|
775
|
+
# A converter might, for example, strip whitespace from a field.
|
776
|
+
#
|
777
|
+
# Using no write converter (all fields unmodified):
|
778
|
+
# output_string = CSV.generate do |csv|
|
779
|
+
# csv << [' foo ', 0]
|
780
|
+
# csv << [' bar ', 1]
|
781
|
+
# csv << [' baz ', 2]
|
782
|
+
# end
|
783
|
+
# output_string # => " foo ,0\n bar ,1\n baz ,2\n"
|
784
|
+
# Using option +write_converters+ with two custom write converters:
|
785
|
+
# strip_converter = proc {|field| field.respond_to?(:strip) ? field.strip : field }
|
786
|
+
# upcase_converter = proc {|field| field.respond_to?(:upcase) ? field.upcase : field }
|
787
|
+
# write_converters = [strip_converter, upcase_converter]
|
788
|
+
# output_string = CSV.generate(write_converters: write_converters) do |csv|
|
789
|
+
# csv << [' foo ', 0]
|
790
|
+
# csv << [' bar ', 1]
|
791
|
+
# csv << [' baz ', 2]
|
792
|
+
# end
|
793
|
+
# output_string # => "FOO,0\nBAR,1\nBAZ,2\n"
|
236
794
|
#
|
237
|
-
#
|
795
|
+
# === Character Encodings (M17n or Multilingualization)
|
238
796
|
#
|
239
797
|
# This new CSV parser is m17n savvy. The parser works in the Encoding of the IO
|
240
|
-
# or String object being read from or written to.
|
798
|
+
# or String object being read from or written to. Your data is never transcoded
|
241
799
|
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
|
242
|
-
# the Encoding it is in.
|
243
|
-
# Encoding of your data.
|
800
|
+
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
|
801
|
+
# Encoding of your data. This is accomplished by transcoding the parser itself
|
244
802
|
# into your Encoding.
|
245
803
|
#
|
246
804
|
# Some transcoding must take place, of course, to accomplish this multiencoding
|
247
|
-
# support.
|
805
|
+
# support. For example, <tt>:col_sep</tt>, <tt>:row_sep</tt>, and
|
248
806
|
# <tt>:quote_char</tt> must be transcoded to match your data. Hopefully this
|
249
807
|
# makes the entire process feel transparent, since CSV's defaults should just
|
250
|
-
# magically work for your data.
|
808
|
+
# magically work for your data. However, you can set these values manually in
|
251
809
|
# the target Encoding to avoid the translation.
|
252
810
|
#
|
253
811
|
# It's also important to note that while all of CSV's core parser is now
|
254
|
-
# Encoding agnostic, some features are not.
|
812
|
+
# Encoding agnostic, some features are not. For example, the built-in
|
255
813
|
# converters will try to transcode data to UTF-8 before making conversions.
|
256
814
|
# Again, you can provide custom converters that are aware of your Encodings to
|
257
|
-
# avoid this translation.
|
815
|
+
# avoid this translation. It's just too hard for me to support native
|
258
816
|
# conversions in all of Ruby's Encodings.
|
259
817
|
#
|
260
|
-
# Anyway, the practical side of this is simple:
|
818
|
+
# Anyway, the practical side of this is simple: make sure IO and String objects
|
261
819
|
# passed into CSV have the proper Encoding set and everything should just work.
|
262
820
|
# CSV methods that allow you to open IO objects (CSV::foreach(), CSV::open(),
|
263
821
|
# CSV::read(), and CSV::readlines()) do allow you to specify the Encoding.
|
264
822
|
#
|
265
823
|
# One minor exception comes when generating CSV into a String with an Encoding
|
266
|
-
# that is not ASCII compatible.
|
824
|
+
# that is not ASCII compatible. There's no existing data for CSV to use to
|
267
825
|
# prepare itself and thus you will probably need to manually specify the desired
|
268
|
-
# Encoding for most of those cases.
|
826
|
+
# Encoding for most of those cases. It will try to guess using the fields in a
|
269
827
|
# row of output though, when using CSV::generate_line() or Array#to_csv().
|
270
828
|
#
|
271
829
|
# I try to point out any other Encoding issues in the documentation of methods
|
272
830
|
# as they come up.
|
273
831
|
#
|
274
832
|
# This has been tested to the best of my ability with all non-"dummy" Encodings
|
275
|
-
# Ruby ships with.
|
833
|
+
# Ruby ships with. However, it is brave new code and may have some bugs.
|
276
834
|
# Please feel free to {report}[mailto:james@grayproductions.net] any issues you
|
277
835
|
# find with it.
|
278
836
|
#
|
@@ -288,6 +846,15 @@ class CSV
|
|
288
846
|
end
|
289
847
|
end
|
290
848
|
|
849
|
+
# The error thrown when the parser encounters invalid encoding in CSV.
|
850
|
+
class InvalidEncodingError < MalformedCSVError
|
851
|
+
attr_reader :encoding
|
852
|
+
def initialize(encoding, line_number)
|
853
|
+
@encoding = encoding
|
854
|
+
super("Invalid byte sequence in #{encoding}", line_number)
|
855
|
+
end
|
856
|
+
end
|
857
|
+
|
291
858
|
#
|
292
859
|
# A FieldInfo Struct contains details about a field's position in the data
|
293
860
|
# source it was read from. CSV will pass this Struct to some blocks that make
|
@@ -297,8 +864,9 @@ class CSV
|
|
297
864
|
# <b><tt>index</tt></b>:: The zero-based index of the field in its row.
|
298
865
|
# <b><tt>line</tt></b>:: The line of the data source this row is from.
|
299
866
|
# <b><tt>header</tt></b>:: The header for the column, when available.
|
867
|
+
# <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not.
|
300
868
|
#
|
301
|
-
FieldInfo = Struct.new(:index, :line, :header)
|
869
|
+
FieldInfo = Struct.new(:index, :line, :header, :quoted?)
|
302
870
|
|
303
871
|
# A Regexp used to find and convert some common Date formats.
|
304
872
|
DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} |
|
@@ -306,39 +874,20 @@ class CSV
|
|
306
874
|
# A Regexp used to find and convert some common DateTime formats.
|
307
875
|
DateTimeMatcher =
|
308
876
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
309
|
-
|
310
|
-
# ISO-8601
|
877
|
+
# ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse
|
311
878
|
\d{4}-\d{2}-\d{2}
|
312
|
-
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
879
|
+
(?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
313
880
|
)\z /x
|
314
881
|
|
315
882
|
# The encoding used by all converters.
|
316
883
|
ConverterEncoding = Encoding.find("UTF-8")
|
317
884
|
|
885
|
+
# A \Hash containing the names and \Procs for the built-in field converters.
|
886
|
+
# See {Built-In Field Converters}[#class-CSV-label-Built-In+Field+Converters].
|
318
887
|
#
|
319
|
-
# This Hash
|
320
|
-
#
|
321
|
-
#
|
322
|
-
#
|
323
|
-
# <b><tt>:integer</tt></b>:: Converts any field Integer() accepts.
|
324
|
-
# <b><tt>:float</tt></b>:: Converts any field Float() accepts.
|
325
|
-
# <b><tt>:numeric</tt></b>:: A combination of <tt>:integer</tt>
|
326
|
-
# and <tt>:float</tt>.
|
327
|
-
# <b><tt>:date</tt></b>:: Converts any field Date::parse() accepts.
|
328
|
-
# <b><tt>:date_time</tt></b>:: Converts any field DateTime::parse() accepts.
|
329
|
-
# <b><tt>:all</tt></b>:: All built-in converters. A combination of
|
330
|
-
# <tt>:date_time</tt> and <tt>:numeric</tt>.
|
331
|
-
#
|
332
|
-
# All built-in converters transcode field data to UTF-8 before attempting a
|
333
|
-
# conversion. If your data cannot be transcoded to UTF-8 the conversion will
|
334
|
-
# fail and the field will remain unchanged.
|
335
|
-
#
|
336
|
-
# This Hash is intentionally left unfrozen and users should feel free to add
|
337
|
-
# values to it that can be accessed by all CSV objects.
|
338
|
-
#
|
339
|
-
# To add a combo field, the value should be an Array of names. Combo fields
|
340
|
-
# can be nested with other combo fields.
|
341
|
-
#
|
888
|
+
# This \Hash is intentionally left unfrozen, and may be extended with
|
889
|
+
# custom field converters.
|
890
|
+
# See {Custom Field Converters}[#class-CSV-label-Custom+Field+Converters].
|
342
891
|
Converters = {
|
343
892
|
integer: lambda { |f|
|
344
893
|
Integer(f.encode(ConverterEncoding)) rescue f
|
@@ -366,992 +915,1780 @@ class CSV
|
|
366
915
|
all: [:date_time, :numeric],
|
367
916
|
}
|
368
917
|
|
918
|
+
# A \Hash containing the names and \Procs for the built-in header converters.
|
919
|
+
# See {Built-In Header Converters}[#class-CSV-label-Built-In+Header+Converters].
|
369
920
|
#
|
370
|
-
# This Hash
|
371
|
-
#
|
372
|
-
#
|
373
|
-
#
|
374
|
-
# <b><tt>:downcase</tt></b>:: Calls downcase() on the header String.
|
375
|
-
# <b><tt>:symbol</tt></b>:: Leading/trailing spaces are dropped, string is
|
376
|
-
# downcased, remaining spaces are replaced with
|
377
|
-
# underscores, non-word characters are dropped,
|
378
|
-
# and finally to_sym() is called.
|
379
|
-
#
|
380
|
-
# All built-in header converters transcode header data to UTF-8 before
|
381
|
-
# attempting a conversion. If your data cannot be transcoded to UTF-8 the
|
382
|
-
# conversion will fail and the header will remain unchanged.
|
383
|
-
#
|
384
|
-
# This Hash is intentionally left unfrozen and users should feel free to add
|
385
|
-
# values to it that can be accessed by all CSV objects.
|
386
|
-
#
|
387
|
-
# To add a combo field, the value should be an Array of names. Combo fields
|
388
|
-
# can be nested with other combo fields.
|
389
|
-
#
|
921
|
+
# This \Hash is intentionally left unfrozen, and may be extended with
|
922
|
+
# custom field converters.
|
923
|
+
# See {Custom Header Converters}[#class-CSV-label-Custom+Header+Converters].
|
390
924
|
HeaderConverters = {
|
391
925
|
downcase: lambda { |h| h.encode(ConverterEncoding).downcase },
|
392
926
|
symbol: lambda { |h|
|
393
927
|
h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip.
|
394
928
|
gsub(/\s+/, "_").to_sym
|
395
|
-
}
|
929
|
+
},
|
930
|
+
symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym }
|
396
931
|
}
|
397
932
|
|
398
|
-
#
|
399
|
-
# The options used when no overrides are given by calling code. They are:
|
400
|
-
#
|
401
|
-
# <b><tt>:col_sep</tt></b>:: <tt>","</tt>
|
402
|
-
# <b><tt>:row_sep</tt></b>:: <tt>:auto</tt>
|
403
|
-
# <b><tt>:quote_char</tt></b>:: <tt>'"'</tt>
|
404
|
-
# <b><tt>:field_size_limit</tt></b>:: +nil+
|
405
|
-
# <b><tt>:converters</tt></b>:: +nil+
|
406
|
-
# <b><tt>:unconverted_fields</tt></b>:: +nil+
|
407
|
-
# <b><tt>:headers</tt></b>:: +false+
|
408
|
-
# <b><tt>:return_headers</tt></b>:: +false+
|
409
|
-
# <b><tt>:header_converters</tt></b>:: +nil+
|
410
|
-
# <b><tt>:skip_blanks</tt></b>:: +false+
|
411
|
-
# <b><tt>:force_quotes</tt></b>:: +false+
|
412
|
-
# <b><tt>:skip_lines</tt></b>:: +nil+
|
413
|
-
# <b><tt>:liberal_parsing</tt></b>:: +false+
|
414
|
-
#
|
933
|
+
# Default values for method options.
|
415
934
|
DEFAULT_OPTIONS = {
|
935
|
+
# For both parsing and generating.
|
416
936
|
col_sep: ",",
|
417
937
|
row_sep: :auto,
|
418
938
|
quote_char: '"',
|
939
|
+
# For parsing.
|
419
940
|
field_size_limit: nil,
|
941
|
+
max_field_size: nil,
|
420
942
|
converters: nil,
|
421
943
|
unconverted_fields: nil,
|
422
944
|
headers: false,
|
423
945
|
return_headers: false,
|
424
946
|
header_converters: nil,
|
425
947
|
skip_blanks: false,
|
426
|
-
force_quotes: false,
|
427
948
|
skip_lines: nil,
|
428
949
|
liberal_parsing: false,
|
950
|
+
nil_value: nil,
|
951
|
+
empty_value: "",
|
952
|
+
strip: false,
|
953
|
+
# For generating.
|
954
|
+
write_headers: nil,
|
955
|
+
quote_empty: true,
|
956
|
+
force_quotes: false,
|
957
|
+
write_converters: nil,
|
958
|
+
write_nil_value: nil,
|
959
|
+
write_empty_value: "",
|
429
960
|
}.freeze
|
430
961
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
#
|
442
|
-
|
443
|
-
|
962
|
+
class << self
|
963
|
+
# :call-seq:
|
964
|
+
# instance(string, **options)
|
965
|
+
# instance(io = $stdout, **options)
|
966
|
+
# instance(string, **options) {|csv| ... }
|
967
|
+
# instance(io = $stdout, **options) {|csv| ... }
|
968
|
+
#
|
969
|
+
# Creates or retrieves cached \CSV objects.
|
970
|
+
# For arguments and options, see CSV.new.
|
971
|
+
#
|
972
|
+
# This API is not Ractor-safe.
|
973
|
+
#
|
974
|
+
# ---
|
975
|
+
#
|
976
|
+
# With no block given, returns a \CSV object.
|
977
|
+
#
|
978
|
+
# The first call to +instance+ creates and caches a \CSV object:
|
979
|
+
# s0 = 's0'
|
980
|
+
# csv0 = CSV.instance(s0)
|
981
|
+
# csv0.class # => CSV
|
982
|
+
#
|
983
|
+
# Subsequent calls to +instance+ with that _same_ +string+ or +io+
|
984
|
+
# retrieve that same cached object:
|
985
|
+
# csv1 = CSV.instance(s0)
|
986
|
+
# csv1.class # => CSV
|
987
|
+
# csv1.equal?(csv0) # => true # Same CSV object
|
988
|
+
#
|
989
|
+
# A subsequent call to +instance+ with a _different_ +string+ or +io+
|
990
|
+
# creates and caches a _different_ \CSV object.
|
991
|
+
# s1 = 's1'
|
992
|
+
# csv2 = CSV.instance(s1)
|
993
|
+
# csv2.equal?(csv0) # => false # Different CSV object
|
994
|
+
#
|
995
|
+
# All the cached objects remains available:
|
996
|
+
# csv3 = CSV.instance(s0)
|
997
|
+
# csv3.equal?(csv0) # true # Same CSV object
|
998
|
+
# csv4 = CSV.instance(s1)
|
999
|
+
# csv4.equal?(csv2) # true # Same CSV object
|
1000
|
+
#
|
1001
|
+
# ---
|
1002
|
+
#
|
1003
|
+
# When a block is given, calls the block with the created or retrieved
|
1004
|
+
# \CSV object; returns the block's return value:
|
1005
|
+
# CSV.instance(s0) {|csv| :foo } # => :foo
|
1006
|
+
def instance(data = $stdout, **options)
|
1007
|
+
# create a _signature_ for this method call, data object and options
|
1008
|
+
sig = [data.object_id] +
|
1009
|
+
options.values_at(*DEFAULT_OPTIONS.keys)
|
1010
|
+
|
1011
|
+
# fetch or create the instance for this signature
|
1012
|
+
@@instances ||= Hash.new
|
1013
|
+
instance = (@@instances[sig] ||= new(data, **options))
|
1014
|
+
|
1015
|
+
if block_given?
|
1016
|
+
yield instance # run block, if given, returning result
|
1017
|
+
else
|
1018
|
+
instance # or return the instance
|
1019
|
+
end
|
1020
|
+
end
|
444
1021
|
|
445
|
-
#
|
446
|
-
|
447
|
-
|
1022
|
+
# :call-seq:
|
1023
|
+
# filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
1024
|
+
# filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table
|
1025
|
+
# filter(**options) {|row| ... } -> array_of_arrays or csv_table
|
1026
|
+
#
|
1027
|
+
# - Parses \CSV from a source (\String, \IO stream, or ARGF).
|
1028
|
+
# - Calls the given block with each parsed row:
|
1029
|
+
# - Without headers, each row is an \Array.
|
1030
|
+
# - With headers, each row is a CSV::Row.
|
1031
|
+
# - Generates \CSV to an output (\String, \IO stream, or STDOUT).
|
1032
|
+
# - Returns the parsed source:
|
1033
|
+
# - Without headers, an \Array of \Arrays.
|
1034
|
+
# - With headers, a CSV::Table.
|
1035
|
+
#
|
1036
|
+
# When +in_string_or_io+ is given, but not +out_string_or_io+,
|
1037
|
+
# parses from the given +in_string_or_io+
|
1038
|
+
# and generates to STDOUT.
|
1039
|
+
#
|
1040
|
+
# \String input without headers:
|
1041
|
+
#
|
1042
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1043
|
+
# CSV.filter(in_string) do |row|
|
1044
|
+
# row[0].upcase!
|
1045
|
+
# row[1] = - row[1].to_i
|
1046
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1047
|
+
#
|
1048
|
+
# Output (to STDOUT):
|
1049
|
+
#
|
1050
|
+
# FOO,0
|
1051
|
+
# BAR,-1
|
1052
|
+
# BAZ,-2
|
1053
|
+
#
|
1054
|
+
# \String input with headers:
|
1055
|
+
#
|
1056
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1057
|
+
# CSV.filter(in_string, headers: true) do |row|
|
1058
|
+
# row[0].upcase!
|
1059
|
+
# row[1] = - row[1].to_i
|
1060
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1061
|
+
#
|
1062
|
+
# Output (to STDOUT):
|
1063
|
+
#
|
1064
|
+
# Name,Value
|
1065
|
+
# FOO,0
|
1066
|
+
# BAR,-1
|
1067
|
+
# BAZ,-2
|
1068
|
+
#
|
1069
|
+
# \IO stream input without headers:
|
1070
|
+
#
|
1071
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
1072
|
+
# File.open('t.csv') do |in_io|
|
1073
|
+
# CSV.filter(in_io) do |row|
|
1074
|
+
# row[0].upcase!
|
1075
|
+
# row[1] = - row[1].to_i
|
1076
|
+
# end
|
1077
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1078
|
+
#
|
1079
|
+
# Output (to STDOUT):
|
1080
|
+
#
|
1081
|
+
# FOO,0
|
1082
|
+
# BAR,-1
|
1083
|
+
# BAZ,-2
|
1084
|
+
#
|
1085
|
+
# \IO stream input with headers:
|
1086
|
+
#
|
1087
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
1088
|
+
# File.open('t.csv') do |in_io|
|
1089
|
+
# CSV.filter(in_io, headers: true) do |row|
|
1090
|
+
# row[0].upcase!
|
1091
|
+
# row[1] = - row[1].to_i
|
1092
|
+
# end
|
1093
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1094
|
+
#
|
1095
|
+
# Output (to STDOUT):
|
1096
|
+
#
|
1097
|
+
# Name,Value
|
1098
|
+
# FOO,0
|
1099
|
+
# BAR,-1
|
1100
|
+
# BAZ,-2
|
1101
|
+
#
|
1102
|
+
# When both +in_string_or_io+ and +out_string_or_io+ are given,
|
1103
|
+
# parses from +in_string_or_io+ and generates to +out_string_or_io+.
|
1104
|
+
#
|
1105
|
+
# \String output without headers:
|
1106
|
+
#
|
1107
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1108
|
+
# out_string = ''
|
1109
|
+
# CSV.filter(in_string, out_string) do |row|
|
1110
|
+
# row[0].upcase!
|
1111
|
+
# row[1] = - row[1].to_i
|
1112
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1113
|
+
# out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
1114
|
+
#
|
1115
|
+
# \String output with headers:
|
1116
|
+
#
|
1117
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1118
|
+
# out_string = ''
|
1119
|
+
# CSV.filter(in_string, out_string, headers: true) do |row|
|
1120
|
+
# row[0].upcase!
|
1121
|
+
# row[1] = - row[1].to_i
|
1122
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1123
|
+
# out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1124
|
+
#
|
1125
|
+
# \IO stream output without headers:
|
1126
|
+
#
|
1127
|
+
# in_string = "foo,0\nbar,1\nbaz,2"
|
1128
|
+
# File.open('t.csv', 'w') do |out_io|
|
1129
|
+
# CSV.filter(in_string, out_io) do |row|
|
1130
|
+
# row[0].upcase!
|
1131
|
+
# row[1] = - row[1].to_i
|
1132
|
+
# end
|
1133
|
+
# end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]]
|
1134
|
+
# File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n"
|
1135
|
+
#
|
1136
|
+
# \IO stream output with headers:
|
1137
|
+
#
|
1138
|
+
# in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2"
|
1139
|
+
# File.open('t.csv', 'w') do |out_io|
|
1140
|
+
# CSV.filter(in_string, out_io, headers: true) do |row|
|
1141
|
+
# row[0].upcase!
|
1142
|
+
# row[1] = - row[1].to_i
|
1143
|
+
# end
|
1144
|
+
# end # => #<CSV::Table mode:col_or_row row_count:4>
|
1145
|
+
# File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n"
|
1146
|
+
#
|
1147
|
+
# When neither +in_string_or_io+ nor +out_string_or_io+ given,
|
1148
|
+
# parses from {ARGF}[rdoc-ref:ARGF]
|
1149
|
+
# and generates to STDOUT.
|
1150
|
+
#
|
1151
|
+
# Without headers:
|
1152
|
+
#
|
1153
|
+
# # Put Ruby code into a file.
|
1154
|
+
# ruby = <<-EOT
|
1155
|
+
# require 'csv'
|
1156
|
+
# CSV.filter do |row|
|
1157
|
+
# row[0].upcase!
|
1158
|
+
# row[1] = - row[1].to_i
|
1159
|
+
# end
|
1160
|
+
# EOT
|
1161
|
+
# File.write('t.rb', ruby)
|
1162
|
+
# # Put some CSV into a file.
|
1163
|
+
# File.write('t.csv', "foo,0\nbar,1\nbaz,2")
|
1164
|
+
# # Run the Ruby code with CSV filename as argument.
|
1165
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
1166
|
+
#
|
1167
|
+
# Output (to STDOUT):
|
1168
|
+
#
|
1169
|
+
# FOO,0
|
1170
|
+
# BAR,-1
|
1171
|
+
# BAZ,-2
|
1172
|
+
#
|
1173
|
+
# With headers:
|
1174
|
+
#
|
1175
|
+
# # Put Ruby code into a file.
|
1176
|
+
# ruby = <<-EOT
|
1177
|
+
# require 'csv'
|
1178
|
+
# CSV.filter(headers: true) do |row|
|
1179
|
+
# row[0].upcase!
|
1180
|
+
# row[1] = - row[1].to_i
|
1181
|
+
# end
|
1182
|
+
# EOT
|
1183
|
+
# File.write('t.rb', ruby)
|
1184
|
+
# # Put some CSV into a file.
|
1185
|
+
# File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2")
|
1186
|
+
# # Run the Ruby code with CSV filename as argument.
|
1187
|
+
# system(Gem.ruby, "t.rb", "t.csv")
|
1188
|
+
#
|
1189
|
+
# Output (to STDOUT):
|
1190
|
+
#
|
1191
|
+
# Name,Value
|
1192
|
+
# FOO,0
|
1193
|
+
# BAR,-1
|
1194
|
+
# BAZ,-2
|
1195
|
+
#
|
1196
|
+
# Arguments:
|
1197
|
+
#
|
1198
|
+
# * Argument +in_string_or_io+ must be a \String or an \IO stream.
|
1199
|
+
# * Argument +out_string_or_io+ must be a \String or an \IO stream.
|
1200
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
1201
|
+
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1202
|
+
def filter(input=nil, output=nil, **options)
|
1203
|
+
# parse options for input, output, or both
|
1204
|
+
in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value}
|
1205
|
+
options.each do |key, value|
|
1206
|
+
case key
|
1207
|
+
when /\Ain(?:put)?_(.+)\Z/
|
1208
|
+
in_options[$1.to_sym] = value
|
1209
|
+
when /\Aout(?:put)?_(.+)\Z/
|
1210
|
+
out_options[$1.to_sym] = value
|
1211
|
+
else
|
1212
|
+
in_options[key] = value
|
1213
|
+
out_options[key] = value
|
1214
|
+
end
|
1215
|
+
end
|
448
1216
|
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
1217
|
+
# build input and output wrappers
|
1218
|
+
input = new(input || ARGF, **in_options)
|
1219
|
+
output = new(output || $stdout, **out_options)
|
1220
|
+
|
1221
|
+
# process headers
|
1222
|
+
need_manual_header_output =
|
1223
|
+
(in_options[:headers] and
|
1224
|
+
out_options[:headers] == true and
|
1225
|
+
out_options[:write_headers])
|
1226
|
+
if need_manual_header_output
|
1227
|
+
first_row = input.shift
|
1228
|
+
if first_row
|
1229
|
+
if first_row.is_a?(Row)
|
1230
|
+
headers = first_row.headers
|
1231
|
+
yield headers
|
1232
|
+
output << headers
|
1233
|
+
end
|
1234
|
+
yield first_row
|
1235
|
+
output << first_row
|
1236
|
+
end
|
1237
|
+
end
|
455
1238
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
# filter( input, output, **options ) { |row| ... }
|
461
|
-
#
|
462
|
-
# This method is a convenience for building Unix-like filters for CSV data.
|
463
|
-
# Each row is yielded to the provided block which can alter it as needed.
|
464
|
-
# After the block returns, the row is appended to +output+ altered or not.
|
465
|
-
#
|
466
|
-
# The +input+ and +output+ arguments can be anything CSV::new() accepts
|
467
|
-
# (generally String or IO objects). If not given, they default to
|
468
|
-
# <tt>ARGF</tt> and <tt>$stdout</tt>.
|
469
|
-
#
|
470
|
-
# The +options+ parameter is also filtered down to CSV::new() after some
|
471
|
-
# clever key parsing. Any key beginning with <tt>:in_</tt> or
|
472
|
-
# <tt>:input_</tt> will have that leading identifier stripped and will only
|
473
|
-
# be used in the +options+ Hash for the +input+ object. Keys starting with
|
474
|
-
# <tt>:out_</tt> or <tt>:output_</tt> affect only +output+. All other keys
|
475
|
-
# are assigned to both objects.
|
476
|
-
#
|
477
|
-
# The <tt>:output_row_sep</tt> +option+ defaults to
|
478
|
-
# <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
479
|
-
#
|
480
|
-
def self.filter(input=nil, output=nil, **options)
|
481
|
-
# parse options for input, output, or both
|
482
|
-
in_options, out_options = Hash.new, {row_sep: $INPUT_RECORD_SEPARATOR}
|
483
|
-
options.each do |key, value|
|
484
|
-
case key.to_s
|
485
|
-
when /\Ain(?:put)?_(.+)\Z/
|
486
|
-
in_options[$1.to_sym] = value
|
487
|
-
when /\Aout(?:put)?_(.+)\Z/
|
488
|
-
out_options[$1.to_sym] = value
|
489
|
-
else
|
490
|
-
in_options[key] = value
|
491
|
-
out_options[key] = value
|
1239
|
+
# read, yield, write
|
1240
|
+
input.each do |row|
|
1241
|
+
yield row
|
1242
|
+
output << row
|
492
1243
|
end
|
493
1244
|
end
|
494
|
-
# build input and output wrappers
|
495
|
-
input = new(input || ARGF, in_options)
|
496
|
-
output = new(output || $stdout, out_options)
|
497
|
-
|
498
|
-
# read, yield, write
|
499
|
-
input.each do |row|
|
500
|
-
yield row
|
501
|
-
output << row
|
502
|
-
end
|
503
|
-
end
|
504
1245
|
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
1246
|
+
#
|
1247
|
+
# :call-seq:
|
1248
|
+
# foreach(path_or_io, mode='r', **options) {|row| ... )
|
1249
|
+
# foreach(path_or_io, mode='r', **options) -> new_enumerator
|
1250
|
+
#
|
1251
|
+
# Calls the block with each row read from source +path_or_io+.
|
1252
|
+
#
|
1253
|
+
# \Path input without headers:
|
1254
|
+
#
|
1255
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1256
|
+
# in_path = 't.csv'
|
1257
|
+
# File.write(in_path, string)
|
1258
|
+
# CSV.foreach(in_path) {|row| p row }
|
1259
|
+
#
|
1260
|
+
# Output:
|
1261
|
+
#
|
1262
|
+
# ["foo", "0"]
|
1263
|
+
# ["bar", "1"]
|
1264
|
+
# ["baz", "2"]
|
1265
|
+
#
|
1266
|
+
# \Path input with headers:
|
1267
|
+
#
|
1268
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1269
|
+
# in_path = 't.csv'
|
1270
|
+
# File.write(in_path, string)
|
1271
|
+
# CSV.foreach(in_path, headers: true) {|row| p row }
|
1272
|
+
#
|
1273
|
+
# Output:
|
1274
|
+
#
|
1275
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
1276
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
1277
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
1278
|
+
#
|
1279
|
+
# \IO stream input without headers:
|
1280
|
+
#
|
1281
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1282
|
+
# path = 't.csv'
|
1283
|
+
# File.write(path, string)
|
1284
|
+
# File.open('t.csv') do |in_io|
|
1285
|
+
# CSV.foreach(in_io) {|row| p row }
|
1286
|
+
# end
|
1287
|
+
#
|
1288
|
+
# Output:
|
1289
|
+
#
|
1290
|
+
# ["foo", "0"]
|
1291
|
+
# ["bar", "1"]
|
1292
|
+
# ["baz", "2"]
|
1293
|
+
#
|
1294
|
+
# \IO stream input with headers:
|
1295
|
+
#
|
1296
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1297
|
+
# path = 't.csv'
|
1298
|
+
# File.write(path, string)
|
1299
|
+
# File.open('t.csv') do |in_io|
|
1300
|
+
# CSV.foreach(in_io, headers: true) {|row| p row }
|
1301
|
+
# end
|
1302
|
+
#
|
1303
|
+
# Output:
|
1304
|
+
#
|
1305
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
1306
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
1307
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
1308
|
+
#
|
1309
|
+
# With no block given, returns an \Enumerator:
|
1310
|
+
#
|
1311
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1312
|
+
# path = 't.csv'
|
1313
|
+
# File.write(path, string)
|
1314
|
+
# CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")>
|
1315
|
+
#
|
1316
|
+
# Arguments:
|
1317
|
+
# * Argument +path_or_io+ must be a file path or an \IO stream.
|
1318
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1319
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1320
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
1321
|
+
# See {Options for Parsing}[#class-CSV-label-Options+for+Parsing].
|
1322
|
+
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
1323
|
+
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
1324
|
+
# You must provide this unless your data is in the encoding
|
1325
|
+
# given by <tt>Encoding::default_external</tt>.
|
1326
|
+
# Parsing will use this to determine how to parse the data.
|
1327
|
+
# You may provide a second Encoding to
|
1328
|
+
# have the data transcoded as it is read. For example,
|
1329
|
+
# encoding: 'UTF-32BE:UTF-8'
|
1330
|
+
# would read +UTF-32BE+ data from the file
|
1331
|
+
# but transcode it to +UTF-8+ before parsing.
|
1332
|
+
def foreach(path, mode="r", **options, &block)
|
1333
|
+
return to_enum(__method__, path, mode, **options) unless block_given?
|
1334
|
+
open(path, mode, **options) do |csv|
|
1335
|
+
csv.each(&block)
|
1336
|
+
end
|
523
1337
|
end
|
524
|
-
end
|
525
1338
|
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
#
|
546
|
-
if str
|
547
|
-
|
548
|
-
|
549
|
-
|
1339
|
+
#
|
1340
|
+
# :call-seq:
|
1341
|
+
# generate(csv_string, **options) {|csv| ... }
|
1342
|
+
# generate(**options) {|csv| ... }
|
1343
|
+
#
|
1344
|
+
# * Argument +csv_string+, if given, must be a \String object;
|
1345
|
+
# defaults to a new empty \String.
|
1346
|
+
# * Arguments +options+, if given, should be generating options.
|
1347
|
+
# See {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1348
|
+
#
|
1349
|
+
# ---
|
1350
|
+
#
|
1351
|
+
# Creates a new \CSV object via <tt>CSV.new(csv_string, **options)</tt>;
|
1352
|
+
# calls the block with the \CSV object, which the block may modify;
|
1353
|
+
# returns the \String generated from the \CSV object.
|
1354
|
+
#
|
1355
|
+
# Note that a passed \String *is* modified by this method.
|
1356
|
+
# Pass <tt>csv_string</tt>.dup if the \String must be preserved.
|
1357
|
+
#
|
1358
|
+
# This method has one additional option: <tt>:encoding</tt>,
|
1359
|
+
# which sets the base Encoding for the output if no no +str+ is specified.
|
1360
|
+
# CSV needs this hint if you plan to output non-ASCII compatible data.
|
1361
|
+
#
|
1362
|
+
# ---
|
1363
|
+
#
|
1364
|
+
# Add lines:
|
1365
|
+
# input_string = "foo,0\nbar,1\nbaz,2\n"
|
1366
|
+
# output_string = CSV.generate(input_string) do |csv|
|
1367
|
+
# csv << ['bat', 3]
|
1368
|
+
# csv << ['bam', 4]
|
1369
|
+
# end
|
1370
|
+
# output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
|
1371
|
+
# input_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
|
1372
|
+
# output_string.equal?(input_string) # => true # Same string, modified
|
1373
|
+
#
|
1374
|
+
# Add lines into new string, preserving old string:
|
1375
|
+
# input_string = "foo,0\nbar,1\nbaz,2\n"
|
1376
|
+
# output_string = CSV.generate(input_string.dup) do |csv|
|
1377
|
+
# csv << ['bat', 3]
|
1378
|
+
# csv << ['bam', 4]
|
1379
|
+
# end
|
1380
|
+
# output_string # => "foo,0\nbar,1\nbaz,2\nbat,3\nbam,4\n"
|
1381
|
+
# input_string # => "foo,0\nbar,1\nbaz,2\n"
|
1382
|
+
# output_string.equal?(input_string) # => false # Different strings
|
1383
|
+
#
|
1384
|
+
# Create lines from nothing:
|
1385
|
+
# output_string = CSV.generate do |csv|
|
1386
|
+
# csv << ['foo', 0]
|
1387
|
+
# csv << ['bar', 1]
|
1388
|
+
# csv << ['baz', 2]
|
1389
|
+
# end
|
1390
|
+
# output_string # => "foo,0\nbar,1\nbaz,2\n"
|
1391
|
+
#
|
1392
|
+
# ---
|
1393
|
+
#
|
1394
|
+
# Raises an exception if +csv_string+ is not a \String object:
|
1395
|
+
# # Raises TypeError (no implicit conversion of Integer into String)
|
1396
|
+
# CSV.generate(0)
|
1397
|
+
#
|
1398
|
+
def generate(str=nil, **options)
|
550
1399
|
encoding = options[:encoding]
|
551
|
-
|
552
|
-
|
1400
|
+
# add a default empty String, if none was given
|
1401
|
+
if str
|
1402
|
+
str = StringIO.new(str)
|
1403
|
+
str.seek(0, IO::SEEK_END)
|
1404
|
+
str.set_encoding(encoding) if encoding
|
1405
|
+
else
|
1406
|
+
str = +""
|
1407
|
+
str.force_encoding(encoding) if encoding
|
1408
|
+
end
|
1409
|
+
csv = new(str, **options) # wrap
|
1410
|
+
yield csv # yield for appending
|
1411
|
+
csv.string # return final String
|
553
1412
|
end
|
554
|
-
csv = new(str, options) # wrap
|
555
|
-
yield csv # yield for appending
|
556
|
-
csv.string # return final String
|
557
|
-
end
|
558
1413
|
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
574
|
-
|
575
|
-
|
576
|
-
|
577
|
-
|
578
|
-
|
1414
|
+
# :call-seq:
|
1415
|
+
# CSV.generate_line(ary)
|
1416
|
+
# CSV.generate_line(ary, **options)
|
1417
|
+
#
|
1418
|
+
# Returns the \String created by generating \CSV from +ary+
|
1419
|
+
# using the specified +options+.
|
1420
|
+
#
|
1421
|
+
# Argument +ary+ must be an \Array.
|
1422
|
+
#
|
1423
|
+
# Special options:
|
1424
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"> on Ruby 3.0 or later
|
1425
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1426
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1427
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1428
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
1429
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
1430
|
+
# this parameter as a backup plan.
|
1431
|
+
#
|
1432
|
+
# For other +options+,
|
1433
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1434
|
+
#
|
1435
|
+
# ---
|
1436
|
+
#
|
1437
|
+
# Returns the \String generated from an \Array:
|
1438
|
+
# CSV.generate_line(['foo', '0']) # => "foo,0\n"
|
1439
|
+
#
|
1440
|
+
# ---
|
1441
|
+
#
|
1442
|
+
# Raises an exception if +ary+ is not an \Array:
|
1443
|
+
# # Raises NoMethodError (undefined method `find' for :foo:Symbol)
|
1444
|
+
# CSV.generate_line(:foo)
|
1445
|
+
#
|
1446
|
+
def generate_line(row, **options)
|
1447
|
+
options = {row_sep: InputRecordSeparator.value}.merge(options)
|
1448
|
+
str = +""
|
1449
|
+
if options[:encoding]
|
1450
|
+
str.force_encoding(options[:encoding])
|
1451
|
+
else
|
1452
|
+
fallback_encoding = nil
|
1453
|
+
output_encoding = nil
|
1454
|
+
row.each do |field|
|
1455
|
+
next unless field.is_a?(String)
|
1456
|
+
fallback_encoding ||= field.encoding
|
1457
|
+
next if field.ascii_only?
|
1458
|
+
output_encoding = field.encoding
|
1459
|
+
break
|
1460
|
+
end
|
1461
|
+
output_encoding ||= fallback_encoding
|
1462
|
+
if output_encoding
|
1463
|
+
str.force_encoding(output_encoding)
|
1464
|
+
end
|
1465
|
+
end
|
1466
|
+
(new(str, **options) << row).string
|
579
1467
|
end
|
580
|
-
(new(str, options) << row).string
|
581
|
-
end
|
582
1468
|
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
# * eof?()
|
622
|
-
# * external_encoding()
|
623
|
-
# * fcntl()
|
624
|
-
# * fileno()
|
625
|
-
# * flock()
|
626
|
-
# * flush()
|
627
|
-
# * fsync()
|
628
|
-
# * internal_encoding()
|
629
|
-
# * ioctl()
|
630
|
-
# * isatty()
|
631
|
-
# * path()
|
632
|
-
# * pid()
|
633
|
-
# * pos()
|
634
|
-
# * pos=()
|
635
|
-
# * reopen()
|
636
|
-
# * seek()
|
637
|
-
# * stat()
|
638
|
-
# * sync()
|
639
|
-
# * sync=()
|
640
|
-
# * tell()
|
641
|
-
# * to_i()
|
642
|
-
# * to_io()
|
643
|
-
# * truncate()
|
644
|
-
# * tty?()
|
645
|
-
#
|
646
|
-
def self.open(filename, mode="r", **options)
|
647
|
-
# wrap a File opened with the remaining +args+ with no newline
|
648
|
-
# decorator
|
649
|
-
file_opts = {universal_newline: false}.merge(options)
|
650
|
-
|
651
|
-
begin
|
652
|
-
f = File.open(filename, mode, file_opts)
|
653
|
-
rescue ArgumentError => e
|
654
|
-
raise unless /needs binmode/.match?(e.message) and mode == "r"
|
655
|
-
mode = "rb"
|
656
|
-
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
|
657
|
-
retry
|
658
|
-
end
|
659
|
-
begin
|
660
|
-
csv = new(f, options)
|
661
|
-
rescue Exception
|
662
|
-
f.close
|
663
|
-
raise
|
1469
|
+
# :call-seq:
|
1470
|
+
# CSV.generate_lines(rows)
|
1471
|
+
# CSV.generate_lines(rows, **options)
|
1472
|
+
#
|
1473
|
+
# Returns the \String created by generating \CSV from
|
1474
|
+
# using the specified +options+.
|
1475
|
+
#
|
1476
|
+
# Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row.
|
1477
|
+
#
|
1478
|
+
# Special options:
|
1479
|
+
# * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later
|
1480
|
+
# and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.:
|
1481
|
+
# $INPUT_RECORD_SEPARATOR # => "\n"
|
1482
|
+
# * This method accepts an additional option, <tt>:encoding</tt>, which sets the base
|
1483
|
+
# Encoding for the output. This method will try to guess your Encoding from
|
1484
|
+
# the first non-+nil+ field in +row+, if possible, but you may need to use
|
1485
|
+
# this parameter as a backup plan.
|
1486
|
+
#
|
1487
|
+
# For other +options+,
|
1488
|
+
# see {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1489
|
+
#
|
1490
|
+
# ---
|
1491
|
+
#
|
1492
|
+
# Returns the \String generated from an
|
1493
|
+
# CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n"
|
1494
|
+
#
|
1495
|
+
# ---
|
1496
|
+
#
|
1497
|
+
# Raises an exception
|
1498
|
+
# # Raises NoMethodError (undefined method `each' for :foo:Symbol)
|
1499
|
+
# CSV.generate_lines(:foo)
|
1500
|
+
#
|
1501
|
+
def generate_lines(rows, **options)
|
1502
|
+
self.generate(**options) do |csv|
|
1503
|
+
rows.each do |row|
|
1504
|
+
csv << row
|
1505
|
+
end
|
1506
|
+
end
|
664
1507
|
end
|
665
1508
|
|
666
|
-
#
|
667
|
-
|
1509
|
+
#
|
1510
|
+
# :call-seq:
|
1511
|
+
# open(file_path, mode = "rb", **options ) -> new_csv
|
1512
|
+
# open(io, mode = "rb", **options ) -> new_csv
|
1513
|
+
# open(file_path, mode = "rb", **options ) { |csv| ... } -> object
|
1514
|
+
# open(io, mode = "rb", **options ) { |csv| ... } -> object
|
1515
|
+
#
|
1516
|
+
# possible options elements:
|
1517
|
+
# keyword form:
|
1518
|
+
# :invalid => nil # raise error on invalid byte sequence (default)
|
1519
|
+
# :invalid => :replace # replace invalid byte sequence
|
1520
|
+
# :undef => :replace # replace undefined conversion
|
1521
|
+
# :replace => string # replacement string ("?" or "\uFFFD" if not specified)
|
1522
|
+
#
|
1523
|
+
# * Argument +path+, if given, must be the path to a file.
|
1524
|
+
# :include: ../doc/csv/arguments/io.rdoc
|
1525
|
+
# * Argument +mode+, if given, must be a \File mode.
|
1526
|
+
# See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes].
|
1527
|
+
# * Arguments <tt>**options</tt> must be keyword options.
|
1528
|
+
# See {Options for Generating}[#class-CSV-label-Options+for+Generating].
|
1529
|
+
# * This method optionally accepts an additional <tt>:encoding</tt> option
|
1530
|
+
# that you can use to specify the Encoding of the data read from +path+ or +io+.
|
1531
|
+
# You must provide this unless your data is in the encoding
|
1532
|
+
# given by <tt>Encoding::default_external</tt>.
|
1533
|
+
# Parsing will use this to determine how to parse the data.
|
1534
|
+
# You may provide a second Encoding to
|
1535
|
+
# have the data transcoded as it is read. For example,
|
1536
|
+
# encoding: 'UTF-32BE:UTF-8'
|
1537
|
+
# would read +UTF-32BE+ data from the file
|
1538
|
+
# but transcode it to +UTF-8+ before parsing.
|
1539
|
+
#
|
1540
|
+
# ---
|
1541
|
+
#
|
1542
|
+
# These examples assume prior execution of:
|
1543
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1544
|
+
# path = 't.csv'
|
1545
|
+
# File.write(path, string)
|
1546
|
+
#
|
1547
|
+
# ---
|
1548
|
+
#
|
1549
|
+
# With no block given, returns a new \CSV object.
|
1550
|
+
#
|
1551
|
+
# Create a \CSV object using a file path:
|
1552
|
+
# csv = CSV.open(path)
|
1553
|
+
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1554
|
+
#
|
1555
|
+
# Create a \CSV object using an open \File:
|
1556
|
+
# csv = CSV.open(File.open(path))
|
1557
|
+
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1558
|
+
#
|
1559
|
+
# ---
|
1560
|
+
#
|
1561
|
+
# With a block given, calls the block with the created \CSV object;
|
1562
|
+
# returns the block's return value:
|
1563
|
+
#
|
1564
|
+
# Using a file path:
|
1565
|
+
# csv = CSV.open(path) {|csv| p csv}
|
1566
|
+
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1567
|
+
# Output:
|
1568
|
+
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1569
|
+
#
|
1570
|
+
# Using an open \File:
|
1571
|
+
# csv = CSV.open(File.open(path)) {|csv| p csv}
|
1572
|
+
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1573
|
+
# Output:
|
1574
|
+
# #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1575
|
+
#
|
1576
|
+
# ---
|
1577
|
+
#
|
1578
|
+
# Raises an exception if the argument is not a \String object or \IO object:
|
1579
|
+
# # Raises TypeError (no implicit conversion of Symbol into String)
|
1580
|
+
# CSV.open(:foo)
|
1581
|
+
def open(filename, mode="r", **options)
|
1582
|
+
# wrap a File opened with the remaining +args+ with no newline
|
1583
|
+
# decorator
|
1584
|
+
file_opts = options.dup
|
1585
|
+
unless file_opts.key?(:newline)
|
1586
|
+
file_opts[:universal_newline] ||= false
|
1587
|
+
end
|
1588
|
+
options.delete(:invalid)
|
1589
|
+
options.delete(:undef)
|
1590
|
+
options.delete(:replace)
|
1591
|
+
options.delete_if {|k, _| /newline\z/.match?(k)}
|
1592
|
+
|
668
1593
|
begin
|
669
|
-
|
670
|
-
|
671
|
-
|
1594
|
+
f = File.open(filename, mode, **file_opts)
|
1595
|
+
rescue ArgumentError => e
|
1596
|
+
raise unless /needs binmode/.match?(e.message) and mode == "r"
|
1597
|
+
mode = "rb"
|
1598
|
+
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
|
1599
|
+
retry
|
1600
|
+
end
|
1601
|
+
begin
|
1602
|
+
csv = new(f, **options)
|
1603
|
+
rescue Exception
|
1604
|
+
f.close
|
1605
|
+
raise
|
1606
|
+
end
|
1607
|
+
|
1608
|
+
# handle blocks like Ruby's open(), not like the CSV library
|
1609
|
+
if block_given?
|
1610
|
+
begin
|
1611
|
+
yield csv
|
1612
|
+
ensure
|
1613
|
+
csv.close
|
1614
|
+
end
|
1615
|
+
else
|
1616
|
+
csv
|
672
1617
|
end
|
673
|
-
else
|
674
|
-
csv
|
675
1618
|
end
|
676
|
-
end
|
677
1619
|
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
1620
|
+
#
|
1621
|
+
# :call-seq:
|
1622
|
+
# parse(string) -> array_of_arrays
|
1623
|
+
# parse(io) -> array_of_arrays
|
1624
|
+
# parse(string, headers: ..., **options) -> csv_table
|
1625
|
+
# parse(io, headers: ..., **options) -> csv_table
|
1626
|
+
# parse(string, **options) {|row| ... }
|
1627
|
+
# parse(io, **options) {|row| ... }
|
1628
|
+
#
|
1629
|
+
# Parses +string+ or +io+ using the specified +options+.
|
1630
|
+
#
|
1631
|
+
# - Argument +string+ should be a \String object;
|
1632
|
+
# it will be put into a new StringIO object positioned at the beginning.
|
1633
|
+
# :include: ../doc/csv/arguments/io.rdoc
|
1634
|
+
# - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
|
1635
|
+
#
|
1636
|
+
# ====== Without Option +headers+
|
1637
|
+
#
|
1638
|
+
# Without {option +headers+}[#class-CSV-label-Option+headers] case.
|
1639
|
+
#
|
1640
|
+
# These examples assume prior execution of:
|
1641
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1642
|
+
# path = 't.csv'
|
1643
|
+
# File.write(path, string)
|
1644
|
+
#
|
1645
|
+
# ---
|
1646
|
+
#
|
1647
|
+
# With no block given, returns an \Array of Arrays formed from the source.
|
1648
|
+
#
|
1649
|
+
# Parse a \String:
|
1650
|
+
# a_of_a = CSV.parse(string)
|
1651
|
+
# a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
1652
|
+
#
|
1653
|
+
# Parse an open \File:
|
1654
|
+
# a_of_a = File.open(path) do |file|
|
1655
|
+
# CSV.parse(file)
|
1656
|
+
# end
|
1657
|
+
# a_of_a # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
1658
|
+
#
|
1659
|
+
# ---
|
1660
|
+
#
|
1661
|
+
# With a block given, calls the block with each parsed row:
|
1662
|
+
#
|
1663
|
+
# Parse a \String:
|
1664
|
+
# CSV.parse(string) {|row| p row }
|
1665
|
+
#
|
1666
|
+
# Output:
|
1667
|
+
# ["foo", "0"]
|
1668
|
+
# ["bar", "1"]
|
1669
|
+
# ["baz", "2"]
|
1670
|
+
#
|
1671
|
+
# Parse an open \File:
|
1672
|
+
# File.open(path) do |file|
|
1673
|
+
# CSV.parse(file) {|row| p row }
|
1674
|
+
# end
|
1675
|
+
#
|
1676
|
+
# Output:
|
1677
|
+
# ["foo", "0"]
|
1678
|
+
# ["bar", "1"]
|
1679
|
+
# ["baz", "2"]
|
1680
|
+
#
|
1681
|
+
# ====== With Option +headers+
|
1682
|
+
#
|
1683
|
+
# With {option +headers+}[#class-CSV-label-Option+headers] case.
|
1684
|
+
#
|
1685
|
+
# These examples assume prior execution of:
|
1686
|
+
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
|
1687
|
+
# path = 't.csv'
|
1688
|
+
# File.write(path, string)
|
1689
|
+
#
|
1690
|
+
# ---
|
1691
|
+
#
|
1692
|
+
# With no block given, returns a CSV::Table object formed from the source.
|
1693
|
+
#
|
1694
|
+
# Parse a \String:
|
1695
|
+
# csv_table = CSV.parse(string, headers: ['Name', 'Count'])
|
1696
|
+
# csv_table # => #<CSV::Table mode:col_or_row row_count:5>
|
1697
|
+
#
|
1698
|
+
# Parse an open \File:
|
1699
|
+
# csv_table = File.open(path) do |file|
|
1700
|
+
# CSV.parse(file, headers: ['Name', 'Count'])
|
1701
|
+
# end
|
1702
|
+
# csv_table # => #<CSV::Table mode:col_or_row row_count:4>
|
1703
|
+
#
|
1704
|
+
# ---
|
1705
|
+
#
|
1706
|
+
# With a block given, calls the block with each parsed row,
|
1707
|
+
# which has been formed into a CSV::Row object:
|
1708
|
+
#
|
1709
|
+
# Parse a \String:
|
1710
|
+
# CSV.parse(string, headers: ['Name', 'Count']) {|row| p row }
|
1711
|
+
#
|
1712
|
+
# Output:
|
1713
|
+
# # <CSV::Row "Name":"foo" "Count":"0">
|
1714
|
+
# # <CSV::Row "Name":"bar" "Count":"1">
|
1715
|
+
# # <CSV::Row "Name":"baz" "Count":"2">
|
1716
|
+
#
|
1717
|
+
# Parse an open \File:
|
1718
|
+
# File.open(path) do |file|
|
1719
|
+
# CSV.parse(file, headers: ['Name', 'Count']) {|row| p row }
|
1720
|
+
# end
|
1721
|
+
#
|
1722
|
+
# Output:
|
1723
|
+
# # <CSV::Row "Name":"foo" "Count":"0">
|
1724
|
+
# # <CSV::Row "Name":"bar" "Count":"1">
|
1725
|
+
# # <CSV::Row "Name":"baz" "Count":"2">
|
1726
|
+
#
|
1727
|
+
# ---
|
1728
|
+
#
|
1729
|
+
# Raises an exception if the argument is not a \String object or \IO object:
|
1730
|
+
# # Raises NoMethodError (undefined method `close' for :foo:Symbol)
|
1731
|
+
# CSV.parse(:foo)
|
1732
|
+
def parse(str, **options, &block)
|
1733
|
+
csv = new(str, **options)
|
692
1734
|
|
693
|
-
|
1735
|
+
return csv.each(&block) if block_given?
|
694
1736
|
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
1737
|
+
# slurp contents, if no block is given
|
1738
|
+
begin
|
1739
|
+
csv.read
|
1740
|
+
ensure
|
1741
|
+
csv.close
|
1742
|
+
end
|
700
1743
|
end
|
701
|
-
end
|
702
1744
|
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
1745
|
+
# :call-seq:
|
1746
|
+
# CSV.parse_line(string) -> new_array or nil
|
1747
|
+
# CSV.parse_line(io) -> new_array or nil
|
1748
|
+
# CSV.parse_line(string, **options) -> new_array or nil
|
1749
|
+
# CSV.parse_line(io, **options) -> new_array or nil
|
1750
|
+
# CSV.parse_line(string, headers: true, **options) -> csv_row or nil
|
1751
|
+
# CSV.parse_line(io, headers: true, **options) -> csv_row or nil
|
1752
|
+
#
|
1753
|
+
# Returns the data created by parsing the first line of +string+ or +io+
|
1754
|
+
# using the specified +options+.
|
1755
|
+
#
|
1756
|
+
# - Argument +string+ should be a \String object;
|
1757
|
+
# it will be put into a new StringIO object positioned at the beginning.
|
1758
|
+
# :include: ../doc/csv/arguments/io.rdoc
|
1759
|
+
# - Argument +options+: see {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
|
1760
|
+
#
|
1761
|
+
# ====== Without Option +headers+
|
1762
|
+
#
|
1763
|
+
# Without option +headers+, returns the first row as a new \Array.
|
1764
|
+
#
|
1765
|
+
# These examples assume prior execution of:
|
1766
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1767
|
+
# path = 't.csv'
|
1768
|
+
# File.write(path, string)
|
1769
|
+
#
|
1770
|
+
# Parse the first line from a \String object:
|
1771
|
+
# CSV.parse_line(string) # => ["foo", "0"]
|
1772
|
+
#
|
1773
|
+
# Parse the first line from a File object:
|
1774
|
+
# File.open(path) do |file|
|
1775
|
+
# CSV.parse_line(file) # => ["foo", "0"]
|
1776
|
+
# end # => ["foo", "0"]
|
1777
|
+
#
|
1778
|
+
# Returns +nil+ if the argument is an empty \String:
|
1779
|
+
# CSV.parse_line('') # => nil
|
1780
|
+
#
|
1781
|
+
# ====== With Option +headers+
|
1782
|
+
#
|
1783
|
+
# With {option +headers+}[#class-CSV-label-Option+headers],
|
1784
|
+
# returns the first row as a CSV::Row object.
|
1785
|
+
#
|
1786
|
+
# These examples assume prior execution of:
|
1787
|
+
# string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n"
|
1788
|
+
# path = 't.csv'
|
1789
|
+
# File.write(path, string)
|
1790
|
+
#
|
1791
|
+
# Parse the first line from a \String object:
|
1792
|
+
# CSV.parse_line(string, headers: true) # => #<CSV::Row "Name":"foo" "Count":"0">
|
1793
|
+
#
|
1794
|
+
# Parse the first line from a File object:
|
1795
|
+
# File.open(path) do |file|
|
1796
|
+
# CSV.parse_line(file, headers: true)
|
1797
|
+
# end # => #<CSV::Row "Name":"foo" "Count":"0">
|
1798
|
+
#
|
1799
|
+
# ---
|
1800
|
+
#
|
1801
|
+
# Raises an exception if the argument is +nil+:
|
1802
|
+
# # Raises ArgumentError (Cannot parse nil as CSV):
|
1803
|
+
# CSV.parse_line(nil)
|
1804
|
+
#
|
1805
|
+
def parse_line(line, **options)
|
1806
|
+
new(line, **options).each.first
|
1807
|
+
end
|
713
1808
|
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
720
|
-
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
727
|
-
|
1809
|
+
#
|
1810
|
+
# :call-seq:
|
1811
|
+
# read(source, **options) -> array_of_arrays
|
1812
|
+
# read(source, headers: true, **options) -> csv_table
|
1813
|
+
#
|
1814
|
+
# Opens the given +source+ with the given +options+ (see CSV.open),
|
1815
|
+
# reads the source (see CSV#read), and returns the result,
|
1816
|
+
# which will be either an \Array of Arrays or a CSV::Table.
|
1817
|
+
#
|
1818
|
+
# Without headers:
|
1819
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
1820
|
+
# path = 't.csv'
|
1821
|
+
# File.write(path, string)
|
1822
|
+
# CSV.read(path) # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
1823
|
+
#
|
1824
|
+
# With headers:
|
1825
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1826
|
+
# path = 't.csv'
|
1827
|
+
# File.write(path, string)
|
1828
|
+
# CSV.read(path, headers: true) # => #<CSV::Table mode:col_or_row row_count:4>
|
1829
|
+
def read(path, **options)
|
1830
|
+
open(path, **options) { |csv| csv.read }
|
1831
|
+
end
|
728
1832
|
|
729
|
-
|
730
|
-
|
731
|
-
|
732
|
-
|
1833
|
+
# :call-seq:
|
1834
|
+
# CSV.readlines(source, **options)
|
1835
|
+
#
|
1836
|
+
# Alias for CSV.read.
|
1837
|
+
def readlines(path, **options)
|
1838
|
+
read(path, **options)
|
1839
|
+
end
|
733
1840
|
|
734
|
-
|
735
|
-
|
736
|
-
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
1841
|
+
# :call-seq:
|
1842
|
+
# CSV.table(source, **options)
|
1843
|
+
#
|
1844
|
+
# Calls CSV.read with +source+, +options+, and certain default options:
|
1845
|
+
# - +headers+: +true+
|
1846
|
+
# - +converters+: +:numeric+
|
1847
|
+
# - +header_converters+: +:symbol+
|
1848
|
+
#
|
1849
|
+
# Returns a CSV::Table object.
|
1850
|
+
#
|
1851
|
+
# Example:
|
1852
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
1853
|
+
# path = 't.csv'
|
1854
|
+
# File.write(path, string)
|
1855
|
+
# CSV.table(path) # => #<CSV::Table mode:col_or_row row_count:4>
|
1856
|
+
def table(path, **options)
|
1857
|
+
default_options = {
|
1858
|
+
headers: true,
|
1859
|
+
converters: :numeric,
|
1860
|
+
header_converters: :symbol,
|
1861
|
+
}
|
1862
|
+
options = default_options.merge(options)
|
1863
|
+
read(path, **options)
|
1864
|
+
end
|
745
1865
|
end
|
746
1866
|
|
747
|
-
#
|
748
|
-
#
|
749
|
-
#
|
750
|
-
#
|
751
|
-
#
|
752
|
-
#
|
753
|
-
#
|
754
|
-
#
|
755
|
-
#
|
756
|
-
#
|
757
|
-
#
|
758
|
-
#
|
759
|
-
#
|
760
|
-
#
|
761
|
-
#
|
762
|
-
#
|
763
|
-
#
|
764
|
-
#
|
765
|
-
#
|
766
|
-
#
|
767
|
-
#
|
768
|
-
#
|
769
|
-
#
|
770
|
-
#
|
771
|
-
#
|
772
|
-
#
|
773
|
-
#
|
774
|
-
#
|
775
|
-
#
|
776
|
-
#
|
777
|
-
#
|
778
|
-
#
|
779
|
-
#
|
780
|
-
#
|
781
|
-
#
|
782
|
-
#
|
783
|
-
#
|
784
|
-
#
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
# <b><tt>:field_size_limit</tt></b>:: This is a maximum size CSV will read
|
805
|
-
# ahead looking for the closing quote
|
806
|
-
# for a field. (In truth, it reads to
|
807
|
-
# the first line ending beyond this
|
808
|
-
# size.) If a quote cannot be found
|
809
|
-
# within the limit CSV will raise a
|
810
|
-
# MalformedCSVError, assuming the data
|
811
|
-
# is faulty. You can use this limit to
|
812
|
-
# prevent what are effectively DoS
|
813
|
-
# attacks on the parser. However, this
|
814
|
-
# limit can cause a legitimate parse to
|
815
|
-
# fail and thus is set to +nil+, or off,
|
816
|
-
# by default.
|
817
|
-
# <b><tt>:converters</tt></b>:: An Array of names from the Converters
|
818
|
-
# Hash and/or lambdas that handle custom
|
819
|
-
# conversion. A single converter
|
820
|
-
# doesn't have to be in an Array. All
|
821
|
-
# built-in converters try to transcode
|
822
|
-
# fields to UTF-8 before converting.
|
823
|
-
# The conversion will fail if the data
|
824
|
-
# cannot be transcoded, leaving the
|
825
|
-
# field unchanged.
|
826
|
-
# <b><tt>:unconverted_fields</tt></b>:: If set to +true+, an
|
827
|
-
# unconverted_fields() method will be
|
828
|
-
# added to all returned rows (Array or
|
829
|
-
# CSV::Row) that will return the fields
|
830
|
-
# as they were before conversion. Note
|
831
|
-
# that <tt>:headers</tt> supplied by
|
832
|
-
# Array or String were not fields of the
|
833
|
-
# document and thus will have an empty
|
834
|
-
# Array attached.
|
835
|
-
# <b><tt>:headers</tt></b>:: If set to <tt>:first_row</tt> or
|
836
|
-
# +true+, the initial row of the CSV
|
837
|
-
# file will be treated as a row of
|
838
|
-
# headers. If set to an Array, the
|
839
|
-
# contents will be used as the headers.
|
840
|
-
# If set to a String, the String is run
|
841
|
-
# through a call of CSV::parse_line()
|
842
|
-
# with the same <tt>:col_sep</tt>,
|
843
|
-
# <tt>:row_sep</tt>, and
|
844
|
-
# <tt>:quote_char</tt> as this instance
|
845
|
-
# to produce an Array of headers. This
|
846
|
-
# setting causes CSV#shift() to return
|
847
|
-
# rows as CSV::Row objects instead of
|
848
|
-
# Arrays and CSV#read() to return
|
849
|
-
# CSV::Table objects instead of an Array
|
850
|
-
# of Arrays.
|
851
|
-
# <b><tt>:return_headers</tt></b>:: When +false+, header rows are silently
|
852
|
-
# swallowed. If set to +true+, header
|
853
|
-
# rows are returned in a CSV::Row object
|
854
|
-
# with identical headers and
|
855
|
-
# fields (save that the fields do not go
|
856
|
-
# through the converters).
|
857
|
-
# <b><tt>:write_headers</tt></b>:: When +true+ and <tt>:headers</tt> is
|
858
|
-
# set, a header row will be added to the
|
859
|
-
# output.
|
860
|
-
# <b><tt>:header_converters</tt></b>:: Identical in functionality to
|
861
|
-
# <tt>:converters</tt> save that the
|
862
|
-
# conversions are only made to header
|
863
|
-
# rows. All built-in converters try to
|
864
|
-
# transcode headers to UTF-8 before
|
865
|
-
# converting. The conversion will fail
|
866
|
-
# if the data cannot be transcoded,
|
867
|
-
# leaving the header unchanged.
|
868
|
-
# <b><tt>:skip_blanks</tt></b>:: When set to a +true+ value, CSV will
|
869
|
-
# skip over any empty rows. Note that
|
870
|
-
# this setting will not skip rows that
|
871
|
-
# contain column separators, even if
|
872
|
-
# the rows contain no actual data. If
|
873
|
-
# you want to skip rows that contain
|
874
|
-
# separators but no content, consider
|
875
|
-
# using <tt>:skip_lines</tt>, or
|
876
|
-
# inspecting fields.compact.empty? on
|
877
|
-
# each row.
|
878
|
-
# <b><tt>:force_quotes</tt></b>:: When set to a +true+ value, CSV will
|
879
|
-
# quote all CSV fields it creates.
|
880
|
-
# <b><tt>:skip_lines</tt></b>:: When set to an object responding to
|
881
|
-
# <tt>match</tt>, every line matching
|
882
|
-
# it is considered a comment and ignored
|
883
|
-
# during parsing. When set to a String,
|
884
|
-
# it is first converted to a Regexp.
|
885
|
-
# When set to +nil+ no line is considered
|
886
|
-
# a comment. If the passed object does
|
887
|
-
# not respond to <tt>match</tt>,
|
888
|
-
# <tt>ArgumentError</tt> is thrown.
|
889
|
-
# <b><tt>:liberal_parsing</tt></b>:: When set to a +true+ value, CSV will
|
890
|
-
# attempt to parse input not conformant
|
891
|
-
# with RFC 4180, such as double quotes
|
892
|
-
# in unquoted fields.
|
893
|
-
# <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
|
894
|
-
# <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
|
895
|
-
#
|
896
|
-
# See CSV::DEFAULT_OPTIONS for the default settings.
|
897
|
-
#
|
898
|
-
# Options cannot be overridden in the instance methods for performance reasons,
|
899
|
-
# so be sure to set what you want here.
|
900
|
-
#
|
901
|
-
def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
|
902
|
-
converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
|
903
|
-
write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
|
904
|
-
skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
|
1867
|
+
# :call-seq:
|
1868
|
+
# CSV.new(string)
|
1869
|
+
# CSV.new(io)
|
1870
|
+
# CSV.new(string, **options)
|
1871
|
+
# CSV.new(io, **options)
|
1872
|
+
#
|
1873
|
+
# Returns the new \CSV object created using +string+ or +io+
|
1874
|
+
# and the specified +options+.
|
1875
|
+
#
|
1876
|
+
# - Argument +string+ should be a \String object;
|
1877
|
+
# it will be put into a new StringIO object positioned at the beginning.
|
1878
|
+
# :include: ../doc/csv/arguments/io.rdoc
|
1879
|
+
# - Argument +options+: See:
|
1880
|
+
# * {Options for Parsing}[#class-CSV-label-Options+for+Parsing]
|
1881
|
+
# * {Options for Generating}[#class-CSV-label-Options+for+Generating]
|
1882
|
+
# For performance reasons, the options cannot be overridden
|
1883
|
+
# in a \CSV object, so those specified here will endure.
|
1884
|
+
#
|
1885
|
+
# In addition to the \CSV instance methods, several \IO methods are delegated.
|
1886
|
+
# See {Delegated Methods}[#class-CSV-label-Delegated+Methods].
|
1887
|
+
#
|
1888
|
+
# ---
|
1889
|
+
#
|
1890
|
+
# Create a \CSV object from a \String object:
|
1891
|
+
# csv = CSV.new('foo,0')
|
1892
|
+
# csv # => #<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1893
|
+
#
|
1894
|
+
# Create a \CSV object from a \File object:
|
1895
|
+
# File.write('t.csv', 'foo,0')
|
1896
|
+
# csv = CSV.new(File.open('t.csv'))
|
1897
|
+
# csv # => #<CSV io_type:File io_path:"t.csv" encoding:UTF-8 lineno:0 col_sep:"," row_sep:"\n" quote_char:"\"">
|
1898
|
+
#
|
1899
|
+
# ---
|
1900
|
+
#
|
1901
|
+
# Raises an exception if the argument is +nil+:
|
1902
|
+
# # Raises ArgumentError (Cannot parse nil as CSV):
|
1903
|
+
# CSV.new(nil)
|
1904
|
+
#
|
1905
|
+
def initialize(data,
|
1906
|
+
col_sep: ",",
|
1907
|
+
row_sep: :auto,
|
1908
|
+
quote_char: '"',
|
1909
|
+
field_size_limit: nil,
|
1910
|
+
max_field_size: nil,
|
1911
|
+
converters: nil,
|
1912
|
+
unconverted_fields: nil,
|
1913
|
+
headers: false,
|
1914
|
+
return_headers: false,
|
1915
|
+
write_headers: nil,
|
1916
|
+
header_converters: nil,
|
1917
|
+
skip_blanks: false,
|
1918
|
+
force_quotes: false,
|
1919
|
+
skip_lines: nil,
|
1920
|
+
liberal_parsing: false,
|
1921
|
+
internal_encoding: nil,
|
1922
|
+
external_encoding: nil,
|
1923
|
+
encoding: nil,
|
905
1924
|
nil_value: nil,
|
906
|
-
empty_value: ""
|
1925
|
+
empty_value: "",
|
1926
|
+
strip: false,
|
1927
|
+
quote_empty: true,
|
1928
|
+
write_converters: nil,
|
1929
|
+
write_nil_value: nil,
|
1930
|
+
write_empty_value: "")
|
907
1931
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
908
1932
|
|
909
|
-
|
910
|
-
|
1933
|
+
if data.is_a?(String)
|
1934
|
+
if encoding
|
1935
|
+
if encoding.is_a?(String)
|
1936
|
+
data_external_encoding, data_internal_encoding = encoding.split(":", 2)
|
1937
|
+
if data_internal_encoding
|
1938
|
+
data = data.encode(data_internal_encoding, data_external_encoding)
|
1939
|
+
else
|
1940
|
+
data = data.dup.force_encoding(data_external_encoding)
|
1941
|
+
end
|
1942
|
+
else
|
1943
|
+
data = data.dup.force_encoding(encoding)
|
1944
|
+
end
|
1945
|
+
end
|
1946
|
+
@io = StringIO.new(data)
|
1947
|
+
else
|
1948
|
+
@io = data
|
1949
|
+
end
|
911
1950
|
@encoding = determine_encoding(encoding, internal_encoding)
|
912
|
-
#
|
913
|
-
# prepare for building safe regular expressions in the target encoding,
|
914
|
-
# if we can transcode the needed characters
|
915
|
-
#
|
916
|
-
@re_esc = "\\".encode(@encoding).freeze rescue ""
|
917
|
-
@re_chars = /#{%"[-\\]\\[\\.^$?*+{}()|# \r\n\t\f\v]".encode(@encoding)}/
|
918
|
-
@unconverted_fields = unconverted_fields
|
919
1951
|
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
@
|
929
|
-
@
|
930
|
-
@
|
931
|
-
|
932
|
-
init_separators(col_sep, row_sep, quote_char, force_quotes)
|
933
|
-
init_parsers(skip_blanks, field_size_limit, liberal_parsing)
|
934
|
-
init_converters(converters, :@converters, :convert)
|
935
|
-
init_converters(header_converters, :@header_converters, :header_convert)
|
936
|
-
init_comments(skip_lines)
|
1952
|
+
@base_fields_converter_options = {
|
1953
|
+
nil_value: nil_value,
|
1954
|
+
empty_value: empty_value,
|
1955
|
+
}
|
1956
|
+
@write_fields_converter_options = {
|
1957
|
+
nil_value: write_nil_value,
|
1958
|
+
empty_value: write_empty_value,
|
1959
|
+
}
|
1960
|
+
@initial_converters = converters
|
1961
|
+
@initial_header_converters = header_converters
|
1962
|
+
@initial_write_converters = write_converters
|
937
1963
|
|
938
|
-
|
1964
|
+
if max_field_size.nil? and field_size_limit
|
1965
|
+
max_field_size = field_size_limit - 1
|
1966
|
+
end
|
1967
|
+
@parser_options = {
|
1968
|
+
column_separator: col_sep,
|
1969
|
+
row_separator: row_sep,
|
1970
|
+
quote_character: quote_char,
|
1971
|
+
max_field_size: max_field_size,
|
1972
|
+
unconverted_fields: unconverted_fields,
|
1973
|
+
headers: headers,
|
1974
|
+
return_headers: return_headers,
|
1975
|
+
skip_blanks: skip_blanks,
|
1976
|
+
skip_lines: skip_lines,
|
1977
|
+
liberal_parsing: liberal_parsing,
|
1978
|
+
encoding: @encoding,
|
1979
|
+
nil_value: nil_value,
|
1980
|
+
empty_value: empty_value,
|
1981
|
+
strip: strip,
|
1982
|
+
}
|
1983
|
+
@parser = nil
|
1984
|
+
@parser_enumerator = nil
|
1985
|
+
@eof_error = nil
|
1986
|
+
|
1987
|
+
@writer_options = {
|
1988
|
+
encoding: @encoding,
|
1989
|
+
force_encoding: (not encoding.nil?),
|
1990
|
+
force_quotes: force_quotes,
|
1991
|
+
headers: headers,
|
1992
|
+
write_headers: write_headers,
|
1993
|
+
column_separator: col_sep,
|
1994
|
+
row_separator: row_sep,
|
1995
|
+
quote_character: quote_char,
|
1996
|
+
quote_empty: quote_empty,
|
1997
|
+
}
|
939
1998
|
|
940
|
-
|
941
|
-
|
1999
|
+
@writer = nil
|
2000
|
+
writer if @writer_options[:write_headers]
|
2001
|
+
end
|
942
2002
|
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
2003
|
+
# :call-seq:
|
2004
|
+
# csv.col_sep -> string
|
2005
|
+
#
|
2006
|
+
# Returns the encoded column separator; used for parsing and writing;
|
2007
|
+
# see {Option +col_sep+}[#class-CSV-label-Option+col_sep]:
|
2008
|
+
# CSV.new('').col_sep # => ","
|
2009
|
+
def col_sep
|
2010
|
+
parser.column_separator
|
948
2011
|
end
|
949
2012
|
|
2013
|
+
# :call-seq:
|
2014
|
+
# csv.row_sep -> string
|
950
2015
|
#
|
951
|
-
#
|
952
|
-
#
|
2016
|
+
# Returns the encoded row separator; used for parsing and writing;
|
2017
|
+
# see {Option +row_sep+}[#class-CSV-label-Option+row_sep]:
|
2018
|
+
# CSV.new('').row_sep # => "\n"
|
2019
|
+
def row_sep
|
2020
|
+
parser.row_separator
|
2021
|
+
end
|
2022
|
+
|
2023
|
+
# :call-seq:
|
2024
|
+
# csv.quote_char -> character
|
953
2025
|
#
|
954
|
-
|
2026
|
+
# Returns the encoded quote character; used for parsing and writing;
|
2027
|
+
# see {Option +quote_char+}[#class-CSV-label-Option+quote_char]:
|
2028
|
+
# CSV.new('').quote_char # => "\""
|
2029
|
+
def quote_char
|
2030
|
+
parser.quote_character
|
2031
|
+
end
|
2032
|
+
|
2033
|
+
# :call-seq:
|
2034
|
+
# csv.field_size_limit -> integer or nil
|
955
2035
|
#
|
956
|
-
#
|
957
|
-
#
|
2036
|
+
# Returns the limit for field size; used for parsing;
|
2037
|
+
# see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]:
|
2038
|
+
# CSV.new('').field_size_limit # => nil
|
958
2039
|
#
|
959
|
-
|
2040
|
+
# Deprecated since 3.2.3. Use +max_field_size+ instead.
|
2041
|
+
def field_size_limit
|
2042
|
+
parser.field_size_limit
|
2043
|
+
end
|
2044
|
+
|
2045
|
+
# :call-seq:
|
2046
|
+
# csv.max_field_size -> integer or nil
|
960
2047
|
#
|
961
|
-
#
|
962
|
-
#
|
2048
|
+
# Returns the limit for field size; used for parsing;
|
2049
|
+
# see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]:
|
2050
|
+
# CSV.new('').max_field_size # => nil
|
963
2051
|
#
|
964
|
-
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
# The regex marking a line as a comment. See CSV::new for details
|
969
|
-
attr_reader :skip_lines
|
2052
|
+
# Since 3.2.3.
|
2053
|
+
def max_field_size
|
2054
|
+
parser.max_field_size
|
2055
|
+
end
|
970
2056
|
|
2057
|
+
# :call-seq:
|
2058
|
+
# csv.skip_lines -> regexp or nil
|
971
2059
|
#
|
972
|
-
# Returns the
|
973
|
-
#
|
974
|
-
#
|
975
|
-
|
2060
|
+
# Returns the \Regexp used to identify comment lines; used for parsing;
|
2061
|
+
# see {Option +skip_lines+}[#class-CSV-label-Option+skip_lines]:
|
2062
|
+
# CSV.new('').skip_lines # => nil
|
2063
|
+
def skip_lines
|
2064
|
+
parser.skip_lines
|
2065
|
+
end
|
2066
|
+
|
2067
|
+
# :call-seq:
|
2068
|
+
# csv.converters -> array
|
2069
|
+
#
|
2070
|
+
# Returns an \Array containing field converters;
|
2071
|
+
# see {Field Converters}[#class-CSV-label-Field+Converters]:
|
2072
|
+
# csv = CSV.new('')
|
2073
|
+
# csv.converters # => []
|
2074
|
+
# csv.convert(:integer)
|
2075
|
+
# csv.converters # => [:integer]
|
2076
|
+
# csv.convert(proc {|x| x.to_s })
|
2077
|
+
# csv.converters
|
2078
|
+
#
|
2079
|
+
# Notes that you need to call
|
2080
|
+
# +Ractor.make_shareable(CSV::Converters)+ on the main Ractor to use
|
2081
|
+
# this method.
|
976
2082
|
def converters
|
977
|
-
|
2083
|
+
parser_fields_converter.map do |converter|
|
978
2084
|
name = Converters.rassoc(converter)
|
979
2085
|
name ? name.first : converter
|
980
2086
|
end
|
981
2087
|
end
|
2088
|
+
|
2089
|
+
# :call-seq:
|
2090
|
+
# csv.unconverted_fields? -> object
|
2091
|
+
#
|
2092
|
+
# Returns the value that determines whether unconverted fields are to be
|
2093
|
+
# available; used for parsing;
|
2094
|
+
# see {Option +unconverted_fields+}[#class-CSV-label-Option+unconverted_fields]:
|
2095
|
+
# CSV.new('').unconverted_fields? # => nil
|
2096
|
+
def unconverted_fields?
|
2097
|
+
parser.unconverted_fields?
|
2098
|
+
end
|
2099
|
+
|
2100
|
+
# :call-seq:
|
2101
|
+
# csv.headers -> object
|
982
2102
|
#
|
983
|
-
# Returns
|
984
|
-
#
|
985
|
-
#
|
986
|
-
def unconverted_fields?() @unconverted_fields end
|
987
|
-
#
|
988
|
-
# Returns +nil+ if headers will not be used, +true+ if they will but have not
|
989
|
-
# yet been read, or the actual headers after they have been read. See
|
990
|
-
# CSV::new for details.
|
991
|
-
#
|
2103
|
+
# Returns the value that determines whether headers are used; used for parsing;
|
2104
|
+
# see {Option +headers+}[#class-CSV-label-Option+headers]:
|
2105
|
+
# CSV.new('').headers # => nil
|
992
2106
|
def headers
|
993
|
-
|
2107
|
+
if @writer
|
2108
|
+
@writer.headers
|
2109
|
+
else
|
2110
|
+
parsed_headers = parser.headers
|
2111
|
+
return parsed_headers if parsed_headers
|
2112
|
+
raw_headers = @parser_options[:headers]
|
2113
|
+
raw_headers = nil if raw_headers == false
|
2114
|
+
raw_headers
|
2115
|
+
end
|
994
2116
|
end
|
2117
|
+
|
2118
|
+
# :call-seq:
|
2119
|
+
# csv.return_headers? -> true or false
|
995
2120
|
#
|
996
|
-
# Returns
|
997
|
-
#
|
2121
|
+
# Returns the value that determines whether headers are to be returned; used for parsing;
|
2122
|
+
# see {Option +return_headers+}[#class-CSV-label-Option+return_headers]:
|
2123
|
+
# CSV.new('').return_headers? # => false
|
2124
|
+
def return_headers?
|
2125
|
+
parser.return_headers?
|
2126
|
+
end
|
2127
|
+
|
2128
|
+
# :call-seq:
|
2129
|
+
# csv.write_headers? -> true or false
|
998
2130
|
#
|
999
|
-
|
1000
|
-
#
|
1001
|
-
|
2131
|
+
# Returns the value that determines whether headers are to be written; used for generating;
|
2132
|
+
# see {Option +write_headers+}[#class-CSV-label-Option+write_headers]:
|
2133
|
+
# CSV.new('').write_headers? # => nil
|
2134
|
+
def write_headers?
|
2135
|
+
@writer_options[:write_headers]
|
2136
|
+
end
|
2137
|
+
|
2138
|
+
# :call-seq:
|
2139
|
+
# csv.header_converters -> array
|
1002
2140
|
#
|
1003
|
-
# Returns
|
1004
|
-
#
|
1005
|
-
#
|
2141
|
+
# Returns an \Array containing header converters; used for parsing;
|
2142
|
+
# see {Header Converters}[#class-CSV-label-Header+Converters]:
|
2143
|
+
# CSV.new('').header_converters # => []
|
1006
2144
|
#
|
2145
|
+
# Notes that you need to call
|
2146
|
+
# +Ractor.make_shareable(CSV::HeaderConverters)+ on the main Ractor
|
2147
|
+
# to use this method.
|
1007
2148
|
def header_converters
|
1008
|
-
|
2149
|
+
header_fields_converter.map do |converter|
|
1009
2150
|
name = HeaderConverters.rassoc(converter)
|
1010
2151
|
name ? name.first : converter
|
1011
2152
|
end
|
1012
2153
|
end
|
2154
|
+
|
2155
|
+
# :call-seq:
|
2156
|
+
# csv.skip_blanks? -> true or false
|
1013
2157
|
#
|
1014
|
-
# Returns
|
1015
|
-
#
|
1016
|
-
#
|
1017
|
-
def skip_blanks?
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
2158
|
+
# Returns the value that determines whether blank lines are to be ignored; used for parsing;
|
2159
|
+
# see {Option +skip_blanks+}[#class-CSV-label-Option+skip_blanks]:
|
2160
|
+
# CSV.new('').skip_blanks? # => false
|
2161
|
+
def skip_blanks?
|
2162
|
+
parser.skip_blanks?
|
2163
|
+
end
|
2164
|
+
|
2165
|
+
# :call-seq:
|
2166
|
+
# csv.force_quotes? -> true or false
|
2167
|
+
#
|
2168
|
+
# Returns the value that determines whether all output fields are to be quoted;
|
2169
|
+
# used for generating;
|
2170
|
+
# see {Option +force_quotes+}[#class-CSV-label-Option+force_quotes]:
|
2171
|
+
# CSV.new('').force_quotes? # => false
|
2172
|
+
def force_quotes?
|
2173
|
+
@writer_options[:force_quotes]
|
2174
|
+
end
|
1022
2175
|
|
2176
|
+
# :call-seq:
|
2177
|
+
# csv.liberal_parsing? -> true or false
|
1023
2178
|
#
|
1024
|
-
#
|
1025
|
-
#
|
2179
|
+
# Returns the value that determines whether illegal input is to be handled; used for parsing;
|
2180
|
+
# see {Option +liberal_parsing+}[#class-CSV-label-Option+liberal_parsing]:
|
2181
|
+
# CSV.new('').liberal_parsing? # => false
|
2182
|
+
def liberal_parsing?
|
2183
|
+
parser.liberal_parsing?
|
2184
|
+
end
|
2185
|
+
|
2186
|
+
# :call-seq:
|
2187
|
+
# csv.encoding -> encoding
|
1026
2188
|
#
|
2189
|
+
# Returns the encoding used for parsing and generating;
|
2190
|
+
# see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]:
|
2191
|
+
# CSV.new('').encoding # => #<Encoding:UTF-8>
|
1027
2192
|
attr_reader :encoding
|
1028
2193
|
|
1029
|
-
#
|
1030
|
-
#
|
1031
|
-
#
|
1032
|
-
#
|
1033
|
-
|
2194
|
+
# :call-seq:
|
2195
|
+
# csv.line_no -> integer
|
2196
|
+
#
|
2197
|
+
# Returns the count of the rows parsed or generated.
|
2198
|
+
#
|
2199
|
+
# Parsing:
|
2200
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2201
|
+
# path = 't.csv'
|
2202
|
+
# File.write(path, string)
|
2203
|
+
# CSV.open(path) do |csv|
|
2204
|
+
# csv.each do |row|
|
2205
|
+
# p [csv.lineno, row]
|
2206
|
+
# end
|
2207
|
+
# end
|
2208
|
+
# Output:
|
2209
|
+
# [1, ["foo", "0"]]
|
2210
|
+
# [2, ["bar", "1"]]
|
2211
|
+
# [3, ["baz", "2"]]
|
2212
|
+
#
|
2213
|
+
# Generating:
|
2214
|
+
# CSV.generate do |csv|
|
2215
|
+
# p csv.lineno; csv << ['foo', 0]
|
2216
|
+
# p csv.lineno; csv << ['bar', 1]
|
2217
|
+
# p csv.lineno; csv << ['baz', 2]
|
2218
|
+
# end
|
2219
|
+
# Output:
|
2220
|
+
# 0
|
2221
|
+
# 1
|
2222
|
+
# 2
|
2223
|
+
def lineno
|
2224
|
+
if @writer
|
2225
|
+
@writer.lineno
|
2226
|
+
else
|
2227
|
+
parser.lineno
|
2228
|
+
end
|
2229
|
+
end
|
2230
|
+
|
2231
|
+
# :call-seq:
|
2232
|
+
# csv.line -> array
|
2233
|
+
#
|
2234
|
+
# Returns the line most recently read:
|
2235
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2236
|
+
# path = 't.csv'
|
2237
|
+
# File.write(path, string)
|
2238
|
+
# CSV.open(path) do |csv|
|
2239
|
+
# csv.each do |row|
|
2240
|
+
# p [csv.lineno, csv.line]
|
2241
|
+
# end
|
2242
|
+
# end
|
2243
|
+
# Output:
|
2244
|
+
# [1, "foo,0\n"]
|
2245
|
+
# [2, "bar,1\n"]
|
2246
|
+
# [3, "baz,2\n"]
|
2247
|
+
def line
|
2248
|
+
parser.line
|
2249
|
+
end
|
1034
2250
|
|
1035
2251
|
### IO and StringIO Delegation ###
|
1036
2252
|
|
1037
2253
|
extend Forwardable
|
1038
|
-
def_delegators :@io, :binmode, :
|
1039
|
-
:closed?, :
|
1040
|
-
:fileno, :
|
1041
|
-
:
|
1042
|
-
:seek, :
|
1043
|
-
:
|
2254
|
+
def_delegators :@io, :binmode, :close, :close_read, :close_write,
|
2255
|
+
:closed?, :external_encoding, :fcntl,
|
2256
|
+
:fileno, :flush, :fsync, :internal_encoding,
|
2257
|
+
:isatty, :pid, :pos, :pos=, :reopen,
|
2258
|
+
:seek, :string, :sync, :sync=, :tell,
|
2259
|
+
:truncate, :tty?
|
2260
|
+
|
2261
|
+
def binmode?
|
2262
|
+
if @io.respond_to?(:binmode?)
|
2263
|
+
@io.binmode?
|
2264
|
+
else
|
2265
|
+
false
|
2266
|
+
end
|
2267
|
+
end
|
1044
2268
|
|
1045
|
-
|
1046
|
-
|
1047
|
-
@
|
1048
|
-
|
2269
|
+
def flock(*args)
|
2270
|
+
raise NotImplementedError unless @io.respond_to?(:flock)
|
2271
|
+
@io.flock(*args)
|
2272
|
+
end
|
1049
2273
|
|
1050
|
-
|
2274
|
+
def ioctl(*args)
|
2275
|
+
raise NotImplementedError unless @io.respond_to?(:ioctl)
|
2276
|
+
@io.ioctl(*args)
|
1051
2277
|
end
|
1052
2278
|
|
1053
|
-
|
2279
|
+
def path
|
2280
|
+
@io.path if @io.respond_to?(:path)
|
2281
|
+
end
|
1054
2282
|
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1058
|
-
|
1059
|
-
#
|
1060
|
-
# The data source must be open for writing.
|
1061
|
-
#
|
1062
|
-
def <<(row)
|
1063
|
-
# make sure headers have been assigned
|
1064
|
-
if header_row? and [Array, String].include? @use_headers.class and !@write_headers
|
1065
|
-
parse_headers # won't read data for Array or String
|
1066
|
-
end
|
2283
|
+
def stat(*args)
|
2284
|
+
raise NotImplementedError unless @io.respond_to?(:stat)
|
2285
|
+
@io.stat(*args)
|
2286
|
+
end
|
1067
2287
|
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
else row
|
1073
|
-
end
|
2288
|
+
def to_i
|
2289
|
+
raise NotImplementedError unless @io.respond_to?(:to_i)
|
2290
|
+
@io.to_i
|
2291
|
+
end
|
1074
2292
|
|
1075
|
-
|
1076
|
-
@
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
2293
|
+
def to_io
|
2294
|
+
@io.respond_to?(:to_io) ? @io.to_io : @io
|
2295
|
+
end
|
2296
|
+
|
2297
|
+
def eof?
|
2298
|
+
return false if @eof_error
|
2299
|
+
begin
|
2300
|
+
parser_enumerator.peek
|
2301
|
+
false
|
2302
|
+
rescue MalformedCSVError => error
|
2303
|
+
@eof_error = error
|
2304
|
+
false
|
2305
|
+
rescue StopIteration
|
2306
|
+
true
|
1087
2307
|
end
|
1088
|
-
|
2308
|
+
end
|
2309
|
+
alias_method :eof, :eof?
|
2310
|
+
|
2311
|
+
# Rewinds the underlying IO object and resets CSV's lineno() counter.
|
2312
|
+
def rewind
|
2313
|
+
@parser = nil
|
2314
|
+
@parser_enumerator = nil
|
2315
|
+
@eof_error = nil
|
2316
|
+
@writer.rewind if @writer
|
2317
|
+
@io.rewind
|
2318
|
+
end
|
1089
2319
|
|
1090
|
-
|
2320
|
+
### End Delegation ###
|
2321
|
+
|
2322
|
+
# :call-seq:
|
2323
|
+
# csv << row -> self
|
2324
|
+
#
|
2325
|
+
# Appends a row to +self+.
|
2326
|
+
#
|
2327
|
+
# - Argument +row+ must be an \Array object or a CSV::Row object.
|
2328
|
+
# - The output stream must be open for writing.
|
2329
|
+
#
|
2330
|
+
# ---
|
2331
|
+
#
|
2332
|
+
# Append Arrays:
|
2333
|
+
# CSV.generate do |csv|
|
2334
|
+
# csv << ['foo', 0]
|
2335
|
+
# csv << ['bar', 1]
|
2336
|
+
# csv << ['baz', 2]
|
2337
|
+
# end # => "foo,0\nbar,1\nbaz,2\n"
|
2338
|
+
#
|
2339
|
+
# Append CSV::Rows:
|
2340
|
+
# headers = []
|
2341
|
+
# CSV.generate do |csv|
|
2342
|
+
# csv << CSV::Row.new(headers, ['foo', 0])
|
2343
|
+
# csv << CSV::Row.new(headers, ['bar', 1])
|
2344
|
+
# csv << CSV::Row.new(headers, ['baz', 2])
|
2345
|
+
# end # => "foo,0\nbar,1\nbaz,2\n"
|
2346
|
+
#
|
2347
|
+
# Headers in CSV::Row objects are not appended:
|
2348
|
+
# headers = ['Name', 'Count']
|
2349
|
+
# CSV.generate do |csv|
|
2350
|
+
# csv << CSV::Row.new(headers, ['foo', 0])
|
2351
|
+
# csv << CSV::Row.new(headers, ['bar', 1])
|
2352
|
+
# csv << CSV::Row.new(headers, ['baz', 2])
|
2353
|
+
# end # => "foo,0\nbar,1\nbaz,2\n"
|
2354
|
+
#
|
2355
|
+
# ---
|
2356
|
+
#
|
2357
|
+
# Raises an exception if +row+ is not an \Array or \CSV::Row:
|
2358
|
+
# CSV.generate do |csv|
|
2359
|
+
# # Raises NoMethodError (undefined method `collect' for :foo:Symbol)
|
2360
|
+
# csv << :foo
|
2361
|
+
# end
|
2362
|
+
#
|
2363
|
+
# Raises an exception if the output stream is not opened for writing:
|
2364
|
+
# path = 't.csv'
|
2365
|
+
# File.write(path, '')
|
2366
|
+
# File.open(path) do |file|
|
2367
|
+
# CSV.open(file) do |csv|
|
2368
|
+
# # Raises IOError (not opened for writing)
|
2369
|
+
# csv << ['foo', 0]
|
2370
|
+
# end
|
2371
|
+
# end
|
2372
|
+
def <<(row)
|
2373
|
+
writer << row
|
2374
|
+
self
|
1091
2375
|
end
|
1092
2376
|
alias_method :add_row, :<<
|
1093
2377
|
alias_method :puts, :<<
|
1094
2378
|
|
1095
|
-
#
|
1096
2379
|
# :call-seq:
|
1097
|
-
# convert(
|
1098
|
-
# convert {
|
1099
|
-
#
|
1100
|
-
#
|
1101
|
-
#
|
1102
|
-
#
|
1103
|
-
#
|
1104
|
-
#
|
1105
|
-
#
|
1106
|
-
#
|
1107
|
-
#
|
1108
|
-
#
|
1109
|
-
#
|
2380
|
+
# convert(converter_name) -> array_of_procs
|
2381
|
+
# convert {|field, field_info| ... } -> array_of_procs
|
2382
|
+
#
|
2383
|
+
# - With no block, installs a field converter (a \Proc).
|
2384
|
+
# - With a block, defines and installs a custom field converter.
|
2385
|
+
# - Returns the \Array of installed field converters.
|
2386
|
+
#
|
2387
|
+
# - Argument +converter_name+, if given, should be the name
|
2388
|
+
# of an existing field converter.
|
2389
|
+
#
|
2390
|
+
# See {Field Converters}[#class-CSV-label-Field+Converters].
|
2391
|
+
# ---
|
2392
|
+
#
|
2393
|
+
# With no block, installs a field converter:
|
2394
|
+
# csv = CSV.new('')
|
2395
|
+
# csv.convert(:integer)
|
2396
|
+
# csv.convert(:float)
|
2397
|
+
# csv.convert(:date)
|
2398
|
+
# csv.converters # => [:integer, :float, :date]
|
2399
|
+
#
|
2400
|
+
# ---
|
2401
|
+
#
|
2402
|
+
# The block, if given, is called for each field:
|
2403
|
+
# - Argument +field+ is the field value.
|
2404
|
+
# - Argument +field_info+ is a CSV::FieldInfo object
|
2405
|
+
# containing details about the field.
|
2406
|
+
#
|
2407
|
+
# The examples here assume the prior execution of:
|
2408
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2409
|
+
# path = 't.csv'
|
2410
|
+
# File.write(path, string)
|
2411
|
+
#
|
2412
|
+
# Example giving a block:
|
2413
|
+
# csv = CSV.open(path)
|
2414
|
+
# csv.convert {|field, field_info| p [field, field_info]; field.upcase }
|
2415
|
+
# csv.read # => [["FOO", "0"], ["BAR", "1"], ["BAZ", "2"]]
|
2416
|
+
#
|
2417
|
+
# Output:
|
2418
|
+
# ["foo", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
|
2419
|
+
# ["0", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
|
2420
|
+
# ["bar", #<struct CSV::FieldInfo index=0, line=2, header=nil>]
|
2421
|
+
# ["1", #<struct CSV::FieldInfo index=1, line=2, header=nil>]
|
2422
|
+
# ["baz", #<struct CSV::FieldInfo index=0, line=3, header=nil>]
|
2423
|
+
# ["2", #<struct CSV::FieldInfo index=1, line=3, header=nil>]
|
2424
|
+
#
|
2425
|
+
# The block need not return a \String object:
|
2426
|
+
# csv = CSV.open(path)
|
2427
|
+
# csv.convert {|field, field_info| field.to_sym }
|
2428
|
+
# csv.read # => [[:foo, :"0"], [:bar, :"1"], [:baz, :"2"]]
|
2429
|
+
#
|
2430
|
+
# If +converter_name+ is given, the block is not called:
|
2431
|
+
# csv = CSV.open(path)
|
2432
|
+
# csv.convert(:integer) {|field, field_info| fail 'Cannot happen' }
|
2433
|
+
# csv.read # => [["foo", 0], ["bar", 1], ["baz", 2]]
|
2434
|
+
#
|
2435
|
+
# ---
|
2436
|
+
#
|
2437
|
+
# Raises a parse-time exception if +converter_name+ is not the name of a built-in
|
2438
|
+
# field converter:
|
2439
|
+
# csv = CSV.open(path)
|
2440
|
+
# csv.convert(:nosuch) => [nil]
|
2441
|
+
# # Raises NoMethodError (undefined method `arity' for nil:NilClass)
|
2442
|
+
# csv.read
|
1110
2443
|
def convert(name = nil, &converter)
|
1111
|
-
add_converter(
|
2444
|
+
parser_fields_converter.add_converter(name, &converter)
|
1112
2445
|
end
|
1113
2446
|
|
1114
|
-
#
|
1115
2447
|
# :call-seq:
|
1116
|
-
# header_convert(
|
1117
|
-
# header_convert { |
|
1118
|
-
#
|
1119
|
-
#
|
1120
|
-
#
|
1121
|
-
#
|
1122
|
-
#
|
1123
|
-
#
|
1124
|
-
#
|
2448
|
+
# header_convert(converter_name) -> array_of_procs
|
2449
|
+
# header_convert {|header, field_info| ... } -> array_of_procs
|
2450
|
+
#
|
2451
|
+
# - With no block, installs a header converter (a \Proc).
|
2452
|
+
# - With a block, defines and installs a custom header converter.
|
2453
|
+
# - Returns the \Array of installed header converters.
|
2454
|
+
#
|
2455
|
+
# - Argument +converter_name+, if given, should be the name
|
2456
|
+
# of an existing header converter.
|
2457
|
+
#
|
2458
|
+
# See {Header Converters}[#class-CSV-label-Header+Converters].
|
2459
|
+
# ---
|
2460
|
+
#
|
2461
|
+
# With no block, installs a header converter:
|
2462
|
+
# csv = CSV.new('')
|
2463
|
+
# csv.header_convert(:symbol)
|
2464
|
+
# csv.header_convert(:downcase)
|
2465
|
+
# csv.header_converters # => [:symbol, :downcase]
|
2466
|
+
#
|
2467
|
+
# ---
|
2468
|
+
#
|
2469
|
+
# The block, if given, is called for each header:
|
2470
|
+
# - Argument +header+ is the header value.
|
2471
|
+
# - Argument +field_info+ is a CSV::FieldInfo object
|
2472
|
+
# containing details about the header.
|
2473
|
+
#
|
2474
|
+
# The examples here assume the prior execution of:
|
2475
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2476
|
+
# path = 't.csv'
|
2477
|
+
# File.write(path, string)
|
2478
|
+
#
|
2479
|
+
# Example giving a block:
|
2480
|
+
# csv = CSV.open(path, headers: true)
|
2481
|
+
# csv.header_convert {|header, field_info| p [header, field_info]; header.upcase }
|
2482
|
+
# table = csv.read
|
2483
|
+
# table # => #<CSV::Table mode:col_or_row row_count:4>
|
2484
|
+
# table.headers # => ["NAME", "VALUE"]
|
2485
|
+
#
|
2486
|
+
# Output:
|
2487
|
+
# ["Name", #<struct CSV::FieldInfo index=0, line=1, header=nil>]
|
2488
|
+
# ["Value", #<struct CSV::FieldInfo index=1, line=1, header=nil>]
|
2489
|
+
|
2490
|
+
# The block need not return a \String object:
|
2491
|
+
# csv = CSV.open(path, headers: true)
|
2492
|
+
# csv.header_convert {|header, field_info| header.to_sym }
|
2493
|
+
# table = csv.read
|
2494
|
+
# table.headers # => [:Name, :Value]
|
2495
|
+
#
|
2496
|
+
# If +converter_name+ is given, the block is not called:
|
2497
|
+
# csv = CSV.open(path, headers: true)
|
2498
|
+
# csv.header_convert(:downcase) {|header, field_info| fail 'Cannot happen' }
|
2499
|
+
# table = csv.read
|
2500
|
+
# table.headers # => ["name", "value"]
|
2501
|
+
# ---
|
2502
|
+
#
|
2503
|
+
# Raises a parse-time exception if +converter_name+ is not the name of a built-in
|
2504
|
+
# field converter:
|
2505
|
+
# csv = CSV.open(path, headers: true)
|
2506
|
+
# csv.header_convert(:nosuch)
|
2507
|
+
# # Raises NoMethodError (undefined method `arity' for nil:NilClass)
|
2508
|
+
# csv.read
|
1125
2509
|
def header_convert(name = nil, &converter)
|
1126
|
-
add_converter(
|
1127
|
-
self.class::HeaderConverters,
|
1128
|
-
name,
|
1129
|
-
&converter )
|
2510
|
+
header_fields_converter.add_converter(name, &converter)
|
1130
2511
|
end
|
1131
2512
|
|
1132
2513
|
include Enumerable
|
1133
2514
|
|
1134
|
-
#
|
1135
|
-
#
|
1136
|
-
#
|
1137
|
-
#
|
1138
|
-
#
|
1139
|
-
# The data source must be
|
1140
|
-
#
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
2515
|
+
# :call-seq:
|
2516
|
+
# csv.each -> enumerator
|
2517
|
+
# csv.each {|row| ...}
|
2518
|
+
#
|
2519
|
+
# Calls the block with each successive row.
|
2520
|
+
# The data source must be opened for reading.
|
2521
|
+
#
|
2522
|
+
# Without headers:
|
2523
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2524
|
+
# csv = CSV.new(string)
|
2525
|
+
# csv.each do |row|
|
2526
|
+
# p row
|
2527
|
+
# end
|
2528
|
+
# Output:
|
2529
|
+
# ["foo", "0"]
|
2530
|
+
# ["bar", "1"]
|
2531
|
+
# ["baz", "2"]
|
2532
|
+
#
|
2533
|
+
# With headers:
|
2534
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2535
|
+
# csv = CSV.new(string, headers: true)
|
2536
|
+
# csv.each do |row|
|
2537
|
+
# p row
|
2538
|
+
# end
|
2539
|
+
# Output:
|
2540
|
+
# <CSV::Row "Name":"foo" "Value":"0">
|
2541
|
+
# <CSV::Row "Name":"bar" "Value":"1">
|
2542
|
+
# <CSV::Row "Name":"baz" "Value":"2">
|
2543
|
+
#
|
2544
|
+
# ---
|
2545
|
+
#
|
2546
|
+
# Raises an exception if the source is not opened for reading:
|
2547
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2548
|
+
# csv = CSV.new(string)
|
2549
|
+
# csv.close
|
2550
|
+
# # Raises IOError (not opened for reading)
|
2551
|
+
# csv.each do |row|
|
2552
|
+
# p row
|
2553
|
+
# end
|
2554
|
+
def each(&block)
|
2555
|
+
return to_enum(__method__) unless block_given?
|
2556
|
+
begin
|
2557
|
+
while true
|
2558
|
+
yield(parser_enumerator.next)
|
1145
2559
|
end
|
1146
|
-
|
1147
|
-
to_enum
|
2560
|
+
rescue StopIteration
|
1148
2561
|
end
|
1149
2562
|
end
|
1150
2563
|
|
1151
|
-
#
|
1152
|
-
#
|
1153
|
-
#
|
1154
|
-
#
|
1155
|
-
#
|
2564
|
+
# :call-seq:
|
2565
|
+
# csv.read -> array or csv_table
|
2566
|
+
#
|
2567
|
+
# Forms the remaining rows from +self+ into:
|
2568
|
+
# - A CSV::Table object, if headers are in use.
|
2569
|
+
# - An \Array of Arrays, otherwise.
|
2570
|
+
#
|
2571
|
+
# The data source must be opened for reading.
|
2572
|
+
#
|
2573
|
+
# Without headers:
|
2574
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2575
|
+
# path = 't.csv'
|
2576
|
+
# File.write(path, string)
|
2577
|
+
# csv = CSV.open(path)
|
2578
|
+
# csv.read # => [["foo", "0"], ["bar", "1"], ["baz", "2"]]
|
2579
|
+
#
|
2580
|
+
# With headers:
|
2581
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2582
|
+
# path = 't.csv'
|
2583
|
+
# File.write(path, string)
|
2584
|
+
# csv = CSV.open(path, headers: true)
|
2585
|
+
# csv.read # => #<CSV::Table mode:col_or_row row_count:4>
|
2586
|
+
#
|
2587
|
+
# ---
|
2588
|
+
#
|
2589
|
+
# Raises an exception if the source is not opened for reading:
|
2590
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2591
|
+
# csv = CSV.new(string)
|
2592
|
+
# csv.close
|
2593
|
+
# # Raises IOError (not opened for reading)
|
2594
|
+
# csv.read
|
1156
2595
|
def read
|
1157
2596
|
rows = to_a
|
1158
|
-
if
|
1159
|
-
Table.new(rows)
|
2597
|
+
if parser.use_headers?
|
2598
|
+
Table.new(rows, headers: parser.headers)
|
1160
2599
|
else
|
1161
2600
|
rows
|
1162
2601
|
end
|
1163
2602
|
end
|
1164
2603
|
alias_method :readlines, :read
|
1165
2604
|
|
1166
|
-
#
|
2605
|
+
# :call-seq:
|
2606
|
+
# csv.header_row? -> true or false
|
2607
|
+
#
|
2608
|
+
# Returns +true+ if the next row to be read is a header row\;
|
2609
|
+
# +false+ otherwise.
|
2610
|
+
#
|
2611
|
+
# Without headers:
|
2612
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2613
|
+
# csv = CSV.new(string)
|
2614
|
+
# csv.header_row? # => false
|
2615
|
+
#
|
2616
|
+
# With headers:
|
2617
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2618
|
+
# csv = CSV.new(string, headers: true)
|
2619
|
+
# csv.header_row? # => true
|
2620
|
+
# csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
|
2621
|
+
# csv.header_row? # => false
|
2622
|
+
#
|
2623
|
+
# ---
|
2624
|
+
#
|
2625
|
+
# Raises an exception if the source is not opened for reading:
|
2626
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2627
|
+
# csv = CSV.new(string)
|
2628
|
+
# csv.close
|
2629
|
+
# # Raises IOError (not opened for reading)
|
2630
|
+
# csv.header_row?
|
1167
2631
|
def header_row?
|
1168
|
-
|
2632
|
+
parser.header_row?
|
1169
2633
|
end
|
1170
2634
|
|
1171
|
-
#
|
1172
|
-
#
|
1173
|
-
#
|
1174
|
-
#
|
1175
|
-
#
|
1176
|
-
#
|
1177
|
-
#
|
2635
|
+
# :call-seq:
|
2636
|
+
# csv.shift -> array, csv_row, or nil
|
2637
|
+
#
|
2638
|
+
# Returns the next row of data as:
|
2639
|
+
# - An \Array if no headers are used.
|
2640
|
+
# - A CSV::Row object if headers are used.
|
2641
|
+
#
|
2642
|
+
# The data source must be opened for reading.
|
2643
|
+
#
|
2644
|
+
# Without headers:
|
2645
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2646
|
+
# csv = CSV.new(string)
|
2647
|
+
# csv.shift # => ["foo", "0"]
|
2648
|
+
# csv.shift # => ["bar", "1"]
|
2649
|
+
# csv.shift # => ["baz", "2"]
|
2650
|
+
# csv.shift # => nil
|
2651
|
+
#
|
2652
|
+
# With headers:
|
2653
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2654
|
+
# csv = CSV.new(string, headers: true)
|
2655
|
+
# csv.shift # => #<CSV::Row "Name":"foo" "Value":"0">
|
2656
|
+
# csv.shift # => #<CSV::Row "Name":"bar" "Value":"1">
|
2657
|
+
# csv.shift # => #<CSV::Row "Name":"baz" "Value":"2">
|
2658
|
+
# csv.shift # => nil
|
2659
|
+
#
|
2660
|
+
# ---
|
2661
|
+
#
|
2662
|
+
# Raises an exception if the source is not opened for reading:
|
2663
|
+
# string = "foo,0\nbar,1\nbaz,2\n"
|
2664
|
+
# csv = CSV.new(string)
|
2665
|
+
# csv.close
|
2666
|
+
# # Raises IOError (not opened for reading)
|
2667
|
+
# csv.shift
|
1178
2668
|
def shift
|
1179
|
-
|
1180
|
-
|
1181
|
-
|
1182
|
-
#########################################################################
|
1183
|
-
|
1184
|
-
# handle headers not based on document content
|
1185
|
-
if header_row? and @return_headers and
|
1186
|
-
[Array, String].include? @use_headers.class
|
1187
|
-
if @unconverted_fields
|
1188
|
-
return add_unconverted_fields(parse_headers, Array.new)
|
1189
|
-
else
|
1190
|
-
return parse_headers
|
1191
|
-
end
|
2669
|
+
if @eof_error
|
2670
|
+
eof_error, @eof_error = @eof_error, nil
|
2671
|
+
raise eof_error
|
1192
2672
|
end
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
#
|
1198
|
-
in_extended_col = false
|
1199
|
-
csv = Array.new
|
1200
|
-
|
1201
|
-
loop do
|
1202
|
-
# add another read to the line
|
1203
|
-
unless parse = @io.gets(@row_sep)
|
1204
|
-
return nil
|
1205
|
-
end
|
1206
|
-
|
1207
|
-
if in_extended_col
|
1208
|
-
@line.concat(parse)
|
1209
|
-
else
|
1210
|
-
@line = parse.clone
|
1211
|
-
end
|
1212
|
-
|
1213
|
-
begin
|
1214
|
-
parse.sub!(@parsers[:line_end], "")
|
1215
|
-
rescue ArgumentError
|
1216
|
-
unless parse.valid_encoding?
|
1217
|
-
message = "Invalid byte sequence in #{parse.encoding}"
|
1218
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1219
|
-
end
|
1220
|
-
raise
|
1221
|
-
end
|
1222
|
-
|
1223
|
-
if csv.empty?
|
1224
|
-
#
|
1225
|
-
# I believe a blank line should be an <tt>Array.new</tt>, not Ruby 1.8
|
1226
|
-
# CSV's <tt>[nil]</tt>
|
1227
|
-
#
|
1228
|
-
if parse.empty?
|
1229
|
-
@lineno += 1
|
1230
|
-
if @skip_blanks
|
1231
|
-
next
|
1232
|
-
elsif @unconverted_fields
|
1233
|
-
return add_unconverted_fields(Array.new, Array.new)
|
1234
|
-
elsif @use_headers
|
1235
|
-
return self.class::Row.new(@headers, Array.new)
|
1236
|
-
else
|
1237
|
-
return Array.new
|
1238
|
-
end
|
1239
|
-
end
|
1240
|
-
end
|
1241
|
-
|
1242
|
-
next if @skip_lines and @skip_lines.match parse
|
1243
|
-
|
1244
|
-
parts = parse.split(@col_sep_split_separator, -1)
|
1245
|
-
if parts.empty?
|
1246
|
-
if in_extended_col
|
1247
|
-
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
|
1248
|
-
else
|
1249
|
-
csv << nil
|
1250
|
-
end
|
1251
|
-
end
|
1252
|
-
|
1253
|
-
# This loop is the hot path of csv parsing. Some things may be non-dry
|
1254
|
-
# for a reason. Make sure to benchmark when refactoring.
|
1255
|
-
parts.each do |part|
|
1256
|
-
if in_extended_col
|
1257
|
-
# If we are continuing a previous column
|
1258
|
-
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
1259
|
-
# extended column ends
|
1260
|
-
csv.last << part[0..-2]
|
1261
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1262
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1263
|
-
lineno + 1)
|
1264
|
-
end
|
1265
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1266
|
-
in_extended_col = false
|
1267
|
-
else
|
1268
|
-
csv.last << part << @col_sep
|
1269
|
-
end
|
1270
|
-
elsif part.start_with?(@quote_char)
|
1271
|
-
# If we are starting a new quoted column
|
1272
|
-
if part.count(@quote_char) % 2 != 0
|
1273
|
-
# start an extended column
|
1274
|
-
csv << (part[1..-1] << @col_sep)
|
1275
|
-
in_extended_col = true
|
1276
|
-
elsif part.end_with?(@quote_char)
|
1277
|
-
# regular quoted column
|
1278
|
-
csv << part[1..-2]
|
1279
|
-
if csv.last.match?(@parsers[:stray_quote])
|
1280
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1281
|
-
lineno + 1)
|
1282
|
-
end
|
1283
|
-
csv.last.gsub!(@double_quote_char, @quote_char)
|
1284
|
-
elsif @liberal_parsing
|
1285
|
-
csv << part
|
1286
|
-
else
|
1287
|
-
raise MalformedCSVError.new("Missing or stray quote",
|
1288
|
-
lineno + 1)
|
1289
|
-
end
|
1290
|
-
elsif part.match?(@parsers[:quote_or_nl])
|
1291
|
-
# Unquoted field with bad characters.
|
1292
|
-
if part.match?(@parsers[:nl_or_lf])
|
1293
|
-
message = "Unquoted fields do not allow \\r or \\n"
|
1294
|
-
raise MalformedCSVError.new(message, lineno + 1)
|
1295
|
-
else
|
1296
|
-
if @liberal_parsing
|
1297
|
-
csv << part
|
1298
|
-
else
|
1299
|
-
raise MalformedCSVError.new("Illegal quoting", lineno + 1)
|
1300
|
-
end
|
1301
|
-
end
|
1302
|
-
else
|
1303
|
-
# Regular ole unquoted field.
|
1304
|
-
csv << (part.empty? ? nil : part)
|
1305
|
-
end
|
1306
|
-
end
|
1307
|
-
|
1308
|
-
# Replace tacked on @col_sep with @row_sep if we are still in an extended
|
1309
|
-
# column.
|
1310
|
-
csv[-1][-1] = @row_sep if in_extended_col
|
1311
|
-
|
1312
|
-
if in_extended_col
|
1313
|
-
# if we're at eof?(), a quoted field wasn't closed...
|
1314
|
-
if @io.eof?
|
1315
|
-
raise MalformedCSVError.new("Unclosed quoted field",
|
1316
|
-
lineno + 1)
|
1317
|
-
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
1318
|
-
raise MalformedCSVError.new("Field size exceeded",
|
1319
|
-
lineno + 1)
|
1320
|
-
end
|
1321
|
-
# otherwise, we need to loop and pull some more data to complete the row
|
1322
|
-
else
|
1323
|
-
@lineno += 1
|
1324
|
-
|
1325
|
-
# save fields unconverted fields, if needed...
|
1326
|
-
unconverted = csv.dup if @unconverted_fields
|
1327
|
-
|
1328
|
-
if @use_headers
|
1329
|
-
# parse out header rows and handle CSV::Row conversions...
|
1330
|
-
csv = parse_headers(csv)
|
1331
|
-
else
|
1332
|
-
# convert fields, if needed...
|
1333
|
-
csv = convert_fields(csv)
|
1334
|
-
end
|
1335
|
-
|
1336
|
-
# inject unconverted fields and accessor, if requested...
|
1337
|
-
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
1338
|
-
add_unconverted_fields(csv, unconverted)
|
1339
|
-
end
|
1340
|
-
|
1341
|
-
# return the results
|
1342
|
-
break csv
|
1343
|
-
end
|
2673
|
+
begin
|
2674
|
+
parser_enumerator.next
|
2675
|
+
rescue StopIteration
|
2676
|
+
nil
|
1344
2677
|
end
|
1345
2678
|
end
|
1346
2679
|
alias_method :gets, :shift
|
1347
2680
|
alias_method :readline, :shift
|
1348
2681
|
|
2682
|
+
# :call-seq:
|
2683
|
+
# csv.inspect -> string
|
1349
2684
|
#
|
1350
|
-
# Returns a
|
1351
|
-
#
|
1352
|
-
#
|
2685
|
+
# Returns a \String showing certain properties of +self+:
|
2686
|
+
# string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
|
2687
|
+
# csv = CSV.new(string, headers: true)
|
2688
|
+
# s = csv.inspect
|
2689
|
+
# s # => "#<CSV io_type:StringIO encoding:UTF-8 lineno:0 col_sep:\",\" row_sep:\"\\n\" quote_char:\"\\\"\" headers:true>"
|
1353
2690
|
def inspect
|
1354
|
-
str = ["
|
2691
|
+
str = ["#<", self.class.to_s, " io_type:"]
|
1355
2692
|
# show type of wrapped IO
|
1356
2693
|
if @io == $stdout then str << "$stdout"
|
1357
2694
|
elsif @io == $stdin then str << "$stdin"
|
@@ -1365,15 +2702,18 @@ class CSV
|
|
1365
2702
|
# show encoding
|
1366
2703
|
str << " encoding:" << @encoding.name
|
1367
2704
|
# show other attributes
|
1368
|
-
|
1369
|
-
|
1370
|
-
if a = instance_variable_get("@#{attr_name}")
|
2705
|
+
["lineno", "col_sep", "row_sep", "quote_char"].each do |attr_name|
|
2706
|
+
if a = __send__(attr_name)
|
1371
2707
|
str << " " << attr_name << ":" << a.inspect
|
1372
2708
|
end
|
1373
2709
|
end
|
1374
|
-
|
1375
|
-
|
2710
|
+
["skip_blanks", "liberal_parsing"].each do |attr_name|
|
2711
|
+
if a = __send__("#{attr_name}?")
|
2712
|
+
str << " " << attr_name << ":" << a.inspect
|
2713
|
+
end
|
1376
2714
|
end
|
2715
|
+
_headers = headers
|
2716
|
+
str << " headers:" << _headers.inspect if _headers
|
1377
2717
|
str << ">"
|
1378
2718
|
begin
|
1379
2719
|
str.join('')
|
@@ -1389,7 +2729,7 @@ class CSV
|
|
1389
2729
|
|
1390
2730
|
def determine_encoding(encoding, internal_encoding)
|
1391
2731
|
# honor the IO encoding if we can, otherwise default to ASCII-8BIT
|
1392
|
-
io_encoding = raw_encoding
|
2732
|
+
io_encoding = raw_encoding
|
1393
2733
|
return io_encoding if io_encoding
|
1394
2734
|
|
1395
2735
|
return Encoding.find(internal_encoding) if internal_encoding
|
@@ -1402,354 +2742,111 @@ class CSV
|
|
1402
2742
|
Encoding.default_internal || Encoding.default_external
|
1403
2743
|
end
|
1404
2744
|
|
1405
|
-
|
1406
|
-
|
1407
|
-
|
1408
|
-
|
1409
|
-
# ahead in the <tt>@io</tt> and try to find one. +ARGF+, +STDIN+, +STDOUT+,
|
1410
|
-
# +STDERR+ and any stream open for output only with a default
|
1411
|
-
# <tt>@row_sep</tt> of <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>).
|
1412
|
-
#
|
1413
|
-
# This method also establishes the quoting rules used for CSV output.
|
1414
|
-
#
|
1415
|
-
def init_separators(col_sep, row_sep, quote_char, force_quotes)
|
1416
|
-
# store the selected separators
|
1417
|
-
@col_sep = col_sep.to_s.encode(@encoding)
|
1418
|
-
if @col_sep == " "
|
1419
|
-
@col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
|
1420
|
-
else
|
1421
|
-
@col_sep_split_separator = @col_sep
|
1422
|
-
end
|
1423
|
-
@row_sep = row_sep # encode after resolving :auto
|
1424
|
-
@quote_char = quote_char.to_s.encode(@encoding)
|
1425
|
-
@double_quote_char = @quote_char * 2
|
1426
|
-
|
1427
|
-
if @quote_char.length != 1
|
1428
|
-
raise ArgumentError, ":quote_char has to be a single character String"
|
1429
|
-
end
|
1430
|
-
|
1431
|
-
#
|
1432
|
-
# automatically discover row separator when requested
|
1433
|
-
# (not fully encoding safe)
|
1434
|
-
#
|
1435
|
-
if @row_sep == :auto
|
1436
|
-
if [ARGF, STDIN, STDOUT, STDERR].include?(@io) or
|
1437
|
-
(defined?(Zlib) and @io.class == Zlib::GzipWriter)
|
1438
|
-
@row_sep = $INPUT_RECORD_SEPARATOR
|
1439
|
-
else
|
1440
|
-
begin
|
1441
|
-
#
|
1442
|
-
# remember where we were (pos() will raise an exception if @io is pipe
|
1443
|
-
# or not opened for reading)
|
1444
|
-
#
|
1445
|
-
saved_pos = @io.pos
|
1446
|
-
while @row_sep == :auto
|
1447
|
-
#
|
1448
|
-
# if we run out of data, it's probably a single line
|
1449
|
-
# (ensure will set default value)
|
1450
|
-
#
|
1451
|
-
break unless sample = @io.gets(nil, 1024)
|
1452
|
-
|
1453
|
-
cr = encode_str("\r")
|
1454
|
-
lf = encode_str("\n")
|
1455
|
-
# extend sample if we're unsure of the line ending
|
1456
|
-
if sample.end_with?(cr)
|
1457
|
-
sample << (@io.gets(nil, 1) || "")
|
1458
|
-
end
|
1459
|
-
|
1460
|
-
# try to find a standard separator
|
1461
|
-
sample.each_char.each_cons(2) do |char, next_char|
|
1462
|
-
case char
|
1463
|
-
when cr
|
1464
|
-
if next_char == lf
|
1465
|
-
@row_sep = encode_str("\r\n")
|
1466
|
-
else
|
1467
|
-
@row_sep = cr
|
1468
|
-
end
|
1469
|
-
break
|
1470
|
-
when lf
|
1471
|
-
@row_sep = lf
|
1472
|
-
break
|
1473
|
-
end
|
1474
|
-
end
|
1475
|
-
end
|
1476
|
-
|
1477
|
-
# tricky seek() clone to work around GzipReader's lack of seek()
|
1478
|
-
@io.rewind
|
1479
|
-
# reset back to the remembered position
|
1480
|
-
while saved_pos > 1024 # avoid loading a lot of data into memory
|
1481
|
-
@io.read(1024)
|
1482
|
-
saved_pos -= 1024
|
1483
|
-
end
|
1484
|
-
@io.read(saved_pos) if saved_pos.nonzero?
|
1485
|
-
rescue IOError # not opened for reading
|
1486
|
-
# do nothing: ensure will set default
|
1487
|
-
rescue NoMethodError # Zlib::GzipWriter doesn't have some IO methods
|
1488
|
-
# do nothing: ensure will set default
|
1489
|
-
rescue SystemCallError # pipe
|
1490
|
-
# do nothing: ensure will set default
|
1491
|
-
ensure
|
1492
|
-
#
|
1493
|
-
# set default if we failed to detect
|
1494
|
-
# (stream not opened for reading, a pipe, or a single line of data)
|
1495
|
-
#
|
1496
|
-
@row_sep = $INPUT_RECORD_SEPARATOR if @row_sep == :auto
|
1497
|
-
end
|
1498
|
-
end
|
1499
|
-
end
|
1500
|
-
@row_sep = @row_sep.to_s.encode(@encoding)
|
1501
|
-
|
1502
|
-
# establish quoting rules
|
1503
|
-
@force_quotes = force_quotes
|
1504
|
-
do_quote = lambda do |field|
|
1505
|
-
field = String(field)
|
1506
|
-
encoded_quote = @quote_char.encode(field.encoding)
|
1507
|
-
encoded_quote + field.gsub(encoded_quote, encoded_quote * 2) + encoded_quote
|
2745
|
+
def normalize_converters(converters)
|
2746
|
+
converters ||= []
|
2747
|
+
unless converters.is_a?(Array)
|
2748
|
+
converters = [converters]
|
1508
2749
|
end
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
""
|
1516
|
-
else
|
1517
|
-
field = String(field) # Stringify fields
|
1518
|
-
# represent empty fields as empty quoted fields
|
1519
|
-
if field.empty? or
|
1520
|
-
field.count(quotable_chars).nonzero?
|
1521
|
-
do_quote.call(field)
|
1522
|
-
else
|
1523
|
-
field # unquoted field
|
1524
|
-
end
|
1525
|
-
end
|
2750
|
+
converters.collect do |converter|
|
2751
|
+
case converter
|
2752
|
+
when Proc # custom code block
|
2753
|
+
[nil, converter]
|
2754
|
+
else # by name
|
2755
|
+
[converter, nil]
|
1526
2756
|
end
|
1527
2757
|
end
|
1528
2758
|
end
|
1529
2759
|
|
1530
|
-
# Pre-compiles parsers and stores them by name for access during reads.
|
1531
|
-
def init_parsers(skip_blanks, field_size_limit, liberal_parsing)
|
1532
|
-
# store the parser behaviors
|
1533
|
-
@skip_blanks = skip_blanks
|
1534
|
-
@field_size_limit = field_size_limit
|
1535
|
-
@liberal_parsing = liberal_parsing
|
1536
|
-
|
1537
|
-
# prebuild Regexps for faster parsing
|
1538
|
-
esc_row_sep = escape_re(@row_sep)
|
1539
|
-
esc_quote = escape_re(@quote_char)
|
1540
|
-
@parsers = {
|
1541
|
-
# for detecting parse errors
|
1542
|
-
quote_or_nl: encode_re("[", esc_quote, "\r\n]"),
|
1543
|
-
nl_or_lf: encode_re("[\r\n]"),
|
1544
|
-
stray_quote: encode_re( "[^", esc_quote, "]", esc_quote,
|
1545
|
-
"[^", esc_quote, "]" ),
|
1546
|
-
# safer than chomp!()
|
1547
|
-
line_end: encode_re(esc_row_sep, "\\z"),
|
1548
|
-
# illegal unquoted characters
|
1549
|
-
return_newline: encode_str("\r\n")
|
1550
|
-
}
|
1551
|
-
end
|
1552
|
-
|
1553
2760
|
#
|
1554
|
-
#
|
1555
|
-
#
|
1556
|
-
#
|
1557
|
-
#
|
1558
|
-
#
|
1559
|
-
#
|
1560
|
-
# The <tt>:unconverted_fields</tt> option is also activated for
|
1561
|
-
# <tt>:converters</tt> calls, if requested.
|
2761
|
+
# Processes +fields+ with <tt>@converters</tt>, or <tt>@header_converters</tt>
|
2762
|
+
# if +headers+ is passed as +true+, returning the converted field set. Any
|
2763
|
+
# converter that changes the field into something other than a String halts
|
2764
|
+
# the pipeline of conversion for that field. This is primarily an efficiency
|
2765
|
+
# shortcut.
|
1562
2766
|
#
|
1563
|
-
def
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
end
|
1569
|
-
instance_variable_set(ivar_name, [])
|
1570
|
-
convert = method(convert_method)
|
1571
|
-
|
1572
|
-
# load converters
|
1573
|
-
converters.each do |converter|
|
1574
|
-
if converter.is_a? Proc # custom code block
|
1575
|
-
convert.call(&converter)
|
1576
|
-
else # by name
|
1577
|
-
convert.call(converter)
|
1578
|
-
end
|
2767
|
+
def convert_fields(fields, headers = false)
|
2768
|
+
if headers
|
2769
|
+
header_fields_converter.convert(fields, nil, 0)
|
2770
|
+
else
|
2771
|
+
parser_fields_converter.convert(fields, @headers, lineno)
|
1579
2772
|
end
|
1580
2773
|
end
|
1581
2774
|
|
1582
|
-
# Stores the pattern of comments to skip from the provided options.
|
1583
2775
|
#
|
1584
|
-
#
|
1585
|
-
# Strings are converted to a Regexp.
|
2776
|
+
# Returns the encoding of the internal IO object.
|
1586
2777
|
#
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1591
|
-
|
1592
|
-
|
2778
|
+
def raw_encoding
|
2779
|
+
if @io.respond_to? :internal_encoding
|
2780
|
+
@io.internal_encoding || @io.external_encoding
|
2781
|
+
elsif @io.respond_to? :encoding
|
2782
|
+
@io.encoding
|
2783
|
+
else
|
2784
|
+
nil
|
1593
2785
|
end
|
1594
2786
|
end
|
1595
|
-
|
1596
|
-
|
1597
|
-
|
1598
|
-
#
|
1599
|
-
# This method requires the +var_name+ of the instance variable to place the
|
1600
|
-
# converters in, the +const+ Hash to lookup named converters in, and the
|
1601
|
-
# normal parameters of the CSV.convert() and CSV.header_convert() methods.
|
1602
|
-
#
|
1603
|
-
def add_converter(var_name, const, name = nil, &converter)
|
1604
|
-
if name.nil? # custom converter
|
1605
|
-
instance_variable_get(var_name) << converter
|
1606
|
-
else # named converter
|
1607
|
-
combo = const[name]
|
1608
|
-
case combo
|
1609
|
-
when Array # combo converter
|
1610
|
-
combo.each do |converter_name|
|
1611
|
-
add_converter(var_name, const, converter_name)
|
1612
|
-
end
|
1613
|
-
else # individual named converter
|
1614
|
-
instance_variable_get(var_name) << combo
|
1615
|
-
end
|
1616
|
-
end
|
2787
|
+
|
2788
|
+
def parser_fields_converter
|
2789
|
+
@parser_fields_converter ||= build_parser_fields_converter
|
1617
2790
|
end
|
1618
2791
|
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
def convert_fields(fields, headers = false)
|
1627
|
-
if headers
|
1628
|
-
converters = @header_converters
|
1629
|
-
else
|
1630
|
-
converters = @converters
|
1631
|
-
if !@use_headers and
|
1632
|
-
converters.empty? and
|
1633
|
-
@nil_value.nil? and
|
1634
|
-
@empty_value_is_empty_string
|
1635
|
-
return fields
|
1636
|
-
end
|
1637
|
-
end
|
2792
|
+
def build_parser_fields_converter
|
2793
|
+
specific_options = {
|
2794
|
+
builtin_converters_name: :Converters,
|
2795
|
+
}
|
2796
|
+
options = @base_fields_converter_options.merge(specific_options)
|
2797
|
+
build_fields_converter(@initial_converters, options)
|
2798
|
+
end
|
1638
2799
|
|
1639
|
-
|
1640
|
-
|
1641
|
-
field = @nil_value
|
1642
|
-
elsif field.empty?
|
1643
|
-
field = @empty_value unless @empty_value_is_empty_string
|
1644
|
-
end
|
1645
|
-
converters.each do |converter|
|
1646
|
-
break if headers && field.nil?
|
1647
|
-
field = if converter.arity == 1 # straight field converter
|
1648
|
-
converter[field]
|
1649
|
-
else # FieldInfo converter
|
1650
|
-
header = @use_headers && !headers ? @headers[index] : nil
|
1651
|
-
converter[field, FieldInfo.new(index, lineno, header)]
|
1652
|
-
end
|
1653
|
-
break unless field.is_a? String # short-circuit pipeline for speed
|
1654
|
-
end
|
1655
|
-
field # final state of each field, converted or original
|
1656
|
-
end
|
2800
|
+
def header_fields_converter
|
2801
|
+
@header_fields_converter ||= build_header_fields_converter
|
1657
2802
|
end
|
1658
2803
|
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
# of the stream.
|
1668
|
-
#
|
1669
|
-
def parse_headers(row = nil)
|
1670
|
-
if @headers.nil? # header row
|
1671
|
-
@headers = case @use_headers # save headers
|
1672
|
-
# Array of headers
|
1673
|
-
when Array then @use_headers
|
1674
|
-
# CSV header String
|
1675
|
-
when String
|
1676
|
-
self.class.parse_line( @use_headers,
|
1677
|
-
col_sep: @col_sep,
|
1678
|
-
row_sep: @row_sep,
|
1679
|
-
quote_char: @quote_char )
|
1680
|
-
# first row is headers
|
1681
|
-
else row
|
1682
|
-
end
|
1683
|
-
|
1684
|
-
# prepare converted and unconverted copies
|
1685
|
-
row = @headers if row.nil?
|
1686
|
-
@headers = convert_fields(@headers, true)
|
1687
|
-
@headers.each { |h| h.freeze if h.is_a? String }
|
1688
|
-
|
1689
|
-
if @return_headers # return headers
|
1690
|
-
return self.class::Row.new(@headers, row, true)
|
1691
|
-
elsif not [Array, String].include? @use_headers.class # skip to field row
|
1692
|
-
return shift
|
1693
|
-
end
|
1694
|
-
end
|
2804
|
+
def build_header_fields_converter
|
2805
|
+
specific_options = {
|
2806
|
+
builtin_converters_name: :HeaderConverters,
|
2807
|
+
accept_nil: true,
|
2808
|
+
}
|
2809
|
+
options = @base_fields_converter_options.merge(specific_options)
|
2810
|
+
build_fields_converter(@initial_header_converters, options)
|
2811
|
+
end
|
1695
2812
|
|
1696
|
-
|
2813
|
+
def writer_fields_converter
|
2814
|
+
@writer_fields_converter ||= build_writer_fields_converter
|
1697
2815
|
end
|
1698
2816
|
|
1699
|
-
|
1700
|
-
|
1701
|
-
|
1702
|
-
|
1703
|
-
|
1704
|
-
def
|
1705
|
-
|
1706
|
-
|
2817
|
+
def build_writer_fields_converter
|
2818
|
+
build_fields_converter(@initial_write_converters,
|
2819
|
+
@write_fields_converter_options)
|
2820
|
+
end
|
2821
|
+
|
2822
|
+
def build_fields_converter(initial_converters, options)
|
2823
|
+
fields_converter = FieldsConverter.new(options)
|
2824
|
+
normalize_converters(initial_converters).each do |name, converter|
|
2825
|
+
fields_converter.add_converter(name, &converter)
|
1707
2826
|
end
|
1708
|
-
|
1709
|
-
row
|
2827
|
+
fields_converter
|
1710
2828
|
end
|
1711
2829
|
|
1712
|
-
|
1713
|
-
|
1714
|
-
# any characters that would change the meaning of a regular expression in the
|
1715
|
-
# encoding of +str+. Regular expression characters that cannot be transcoded
|
1716
|
-
# to the target encoding will be skipped and no escaping will be performed if
|
1717
|
-
# a backslash cannot be transcoded.
|
1718
|
-
#
|
1719
|
-
def escape_re(str)
|
1720
|
-
str.gsub(@re_chars) {|c| @re_esc + c}
|
2830
|
+
def parser
|
2831
|
+
@parser ||= Parser.new(@io, parser_options)
|
1721
2832
|
end
|
1722
2833
|
|
1723
|
-
|
1724
|
-
|
1725
|
-
|
1726
|
-
#
|
1727
|
-
def encode_re(*chunks)
|
1728
|
-
Regexp.new(encode_str(*chunks))
|
2834
|
+
def parser_options
|
2835
|
+
@parser_options.merge(header_fields_converter: header_fields_converter,
|
2836
|
+
fields_converter: parser_fields_converter)
|
1729
2837
|
end
|
1730
2838
|
|
1731
|
-
|
1732
|
-
|
1733
|
-
# that encoding.
|
1734
|
-
#
|
1735
|
-
def encode_str(*chunks)
|
1736
|
-
chunks.map { |chunk| chunk.encode(@encoding.name) }.join('')
|
2839
|
+
def parser_enumerator
|
2840
|
+
@parser_enumerator ||= parser.parse
|
1737
2841
|
end
|
1738
2842
|
|
1739
|
-
|
1740
|
-
|
1741
|
-
|
1742
|
-
|
1743
|
-
def
|
1744
|
-
|
1745
|
-
|
1746
|
-
elsif @io.is_a? StringIO
|
1747
|
-
@io.string.encoding
|
1748
|
-
elsif @io.respond_to? :encoding
|
1749
|
-
@io.encoding
|
1750
|
-
else
|
1751
|
-
default
|
1752
|
-
end
|
2843
|
+
def writer
|
2844
|
+
@writer ||= Writer.new(@io, writer_options)
|
2845
|
+
end
|
2846
|
+
|
2847
|
+
def writer_options
|
2848
|
+
@writer_options.merge(header_fields_converter: header_fields_converter,
|
2849
|
+
fields_converter: writer_fields_converter)
|
1753
2850
|
end
|
1754
2851
|
end
|
1755
2852
|
|
@@ -1769,8 +2866,15 @@ end
|
|
1769
2866
|
# c.read.any? { |a| a.include?("zombies") }
|
1770
2867
|
# } #=> false
|
1771
2868
|
#
|
1772
|
-
|
1773
|
-
|
2869
|
+
# CSV options may also be given.
|
2870
|
+
#
|
2871
|
+
# io = StringIO.new
|
2872
|
+
# CSV(io, col_sep: ";") { |csv| csv << ["a", "b", "c"] }
|
2873
|
+
#
|
2874
|
+
# This API is not Ractor-safe.
|
2875
|
+
#
|
2876
|
+
def CSV(*args, **options, &block)
|
2877
|
+
CSV.instance(*args, **options, &block)
|
1774
2878
|
end
|
1775
2879
|
|
1776
2880
|
require_relative "csv/version"
|