csv 0.1.0 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE.txt +33 -0
- data/README.md +52 -0
- data/lib/csv/core_ext/array.rb +9 -0
- data/lib/csv/core_ext/string.rb +9 -0
- data/lib/csv/row.rb +388 -0
- data/lib/csv/table.rb +378 -0
- data/lib/csv/version.rb +6 -0
- data/lib/csv.rb +227 -804
- data/news.md +123 -0
- metadata +40 -14
data/lib/csv.rb
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
# = csv.rb -- CSV Reading and Writing
|
4
4
|
#
|
5
|
-
#
|
6
|
-
# Copyright 2005 James Edward Gray II. You can redistribute or modify this code
|
7
|
-
# under the terms of Ruby's license.
|
5
|
+
# Created by James Edward Gray II on 2005-10-31.
|
8
6
|
#
|
9
7
|
# See CSV for documentation.
|
10
8
|
#
|
@@ -95,74 +93,146 @@ require "forwardable"
|
|
95
93
|
require "English"
|
96
94
|
require "date"
|
97
95
|
require "stringio"
|
96
|
+
require_relative "csv/table"
|
97
|
+
require_relative "csv/row"
|
98
|
+
|
99
|
+
# This provides String#match? and Regexp#match? for Ruby 2.3.
|
100
|
+
unless String.method_defined?(:match?)
|
101
|
+
class CSV
|
102
|
+
module MatchP
|
103
|
+
refine String do
|
104
|
+
def match?(pattern)
|
105
|
+
self =~ pattern
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
refine Regexp do
|
110
|
+
def match?(string)
|
111
|
+
self =~ string
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
using CSV::MatchP
|
118
|
+
end
|
98
119
|
|
99
120
|
#
|
100
121
|
# This class provides a complete interface to CSV files and data. It offers
|
101
122
|
# tools to enable you to read and write to and from Strings or IO objects, as
|
102
123
|
# needed.
|
103
124
|
#
|
104
|
-
#
|
125
|
+
# The most generic interface of a class is:
|
105
126
|
#
|
106
|
-
#
|
127
|
+
# csv = CSV.new(string_or_io, **options)
|
107
128
|
#
|
108
|
-
#
|
129
|
+
# # Reading: IO object should be open for read
|
130
|
+
# csv.read # => array of rows
|
131
|
+
# # or
|
132
|
+
# csv.each do |row|
|
133
|
+
# # ...
|
134
|
+
# end
|
135
|
+
# # or
|
136
|
+
# row = csv.shift
|
109
137
|
#
|
110
|
-
#
|
111
|
-
#
|
112
|
-
# end
|
138
|
+
# # Writing: IO object should be open for write
|
139
|
+
# csv << row
|
113
140
|
#
|
114
|
-
#
|
141
|
+
# There are several specialized class methods for one-statement reading or writing,
|
142
|
+
# described in the Specialized Methods section.
|
115
143
|
#
|
116
|
-
#
|
144
|
+
# If a String passed into ::new, it is internally wrapped into a StringIO object.
|
117
145
|
#
|
118
|
-
#
|
146
|
+
# +options+ can be used for specifying the particular CSV flavor (column
|
147
|
+
# separators, row separators, value quoting and so on), and for data conversion,
|
148
|
+
# see Data Conversion section for the description of the latter.
|
119
149
|
#
|
120
|
-
#
|
150
|
+
# == Specialized Methods
|
121
151
|
#
|
122
|
-
#
|
123
|
-
# # use row here...
|
124
|
-
# end
|
152
|
+
# === Reading
|
125
153
|
#
|
126
|
-
#
|
127
|
-
#
|
128
|
-
#
|
154
|
+
# # From a file: all at once
|
155
|
+
# arr_of_rows = CSV.read("path/to/file.csv", **options)
|
156
|
+
# # iterator-style:
|
157
|
+
# CSV.foreach("path/to/file.csv", **options) do |row|
|
158
|
+
# # ...
|
159
|
+
# end
|
129
160
|
#
|
130
|
-
#
|
161
|
+
# # From a string
|
162
|
+
# arr_of_rows = CSV.parse("CSV,data,String", **options)
|
163
|
+
# # or
|
164
|
+
# CSV.parse("CSV,data,String", **options) do |row|
|
165
|
+
# # ...
|
166
|
+
# end
|
131
167
|
#
|
132
|
-
# ===
|
168
|
+
# === Writing
|
133
169
|
#
|
170
|
+
# # To a file
|
134
171
|
# CSV.open("path/to/file.csv", "wb") do |csv|
|
135
172
|
# csv << ["row", "of", "CSV", "data"]
|
136
173
|
# csv << ["another", "row"]
|
137
174
|
# # ...
|
138
175
|
# end
|
139
176
|
#
|
140
|
-
#
|
141
|
-
#
|
177
|
+
# # To a String
|
142
178
|
# csv_string = CSV.generate do |csv|
|
143
179
|
# csv << ["row", "of", "CSV", "data"]
|
144
180
|
# csv << ["another", "row"]
|
145
181
|
# # ...
|
146
182
|
# end
|
147
183
|
#
|
148
|
-
#
|
184
|
+
# === Shortcuts
|
149
185
|
#
|
186
|
+
# # Core extensions for converting one line
|
150
187
|
# csv_string = ["CSV", "data"].to_csv # to CSV
|
151
188
|
# csv_array = "CSV,String".parse_csv # from CSV
|
152
189
|
#
|
153
|
-
#
|
154
|
-
#
|
190
|
+
# # CSV() method
|
155
191
|
# CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
|
156
192
|
# CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
|
157
193
|
# CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
|
158
194
|
# CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
|
159
195
|
#
|
160
|
-
# ==
|
196
|
+
# == Data Conversion
|
197
|
+
#
|
198
|
+
# === CSV with headers
|
199
|
+
#
|
200
|
+
# CSV allows to specify column names of CSV file, whether they are in data, or
|
201
|
+
# provided separately. If headers specified, reading methods return an instance
|
202
|
+
# of CSV::Table, consisting of CSV::Row.
|
203
|
+
#
|
204
|
+
# # Headers are part of data
|
205
|
+
# data = CSV.parse(<<~ROWS, headers: true)
|
206
|
+
# Name,Department,Salary
|
207
|
+
# Bob,Engeneering,1000
|
208
|
+
# Jane,Sales,2000
|
209
|
+
# John,Management,5000
|
210
|
+
# ROWS
|
161
211
|
#
|
162
|
-
#
|
212
|
+
# data.class #=> CSV::Table
|
213
|
+
# data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engeneering" "Salary":"1000">
|
214
|
+
# data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engeneering", "Salary"=>"1000"}
|
163
215
|
#
|
164
|
-
#
|
165
|
-
#
|
216
|
+
# # Headers provided by developer
|
217
|
+
# data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
|
218
|
+
# data.first #=> #<CSV::Row name:"Bob" department:"Engeneering" salary:"1000">
|
219
|
+
#
|
220
|
+
# === Typed data reading
|
221
|
+
#
|
222
|
+
# CSV allows to provide a set of data _converters_ e.g. transformations to try on input
|
223
|
+
# data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
|
224
|
+
#
|
225
|
+
# # Without any converters:
|
226
|
+
# CSV.parse('Bob,2018-03-01,100')
|
227
|
+
# #=> [["Bob", "2018-03-01", "100"]]
|
228
|
+
#
|
229
|
+
# # With built-in converters:
|
230
|
+
# CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
|
231
|
+
# #=> [["Bob", #<Date: 2018-03-01>, 100]]
|
232
|
+
#
|
233
|
+
# # With custom converters:
|
234
|
+
# CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
|
235
|
+
# #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
|
166
236
|
#
|
167
237
|
# == CSV and Character Encodings (M17n or Multilingualization)
|
168
238
|
#
|
@@ -207,711 +277,17 @@ require "stringio"
|
|
207
277
|
# find with it.
|
208
278
|
#
|
209
279
|
class CSV
|
210
|
-
# The version of the installed library.
|
211
|
-
VERSION = "2.4.8"
|
212
|
-
|
213
|
-
#
|
214
|
-
# A CSV::Row is part Array and part Hash. It retains an order for the fields
|
215
|
-
# and allows duplicates just as an Array would, but also allows you to access
|
216
|
-
# fields by name just as you could if they were in a Hash.
|
217
|
-
#
|
218
|
-
# All rows returned by CSV will be constructed from this class, if header row
|
219
|
-
# processing is activated.
|
220
|
-
#
|
221
|
-
class Row
|
222
|
-
#
|
223
|
-
# Construct a new CSV::Row from +headers+ and +fields+, which are expected
|
224
|
-
# to be Arrays. If one Array is shorter than the other, it will be padded
|
225
|
-
# with +nil+ objects.
|
226
|
-
#
|
227
|
-
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
228
|
-
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
|
229
|
-
# row. Otherwise, the row is assumes to be a field row.
|
230
|
-
#
|
231
|
-
# A CSV::Row object supports the following Array methods through delegation:
|
232
|
-
#
|
233
|
-
# * empty?()
|
234
|
-
# * length()
|
235
|
-
# * size()
|
236
|
-
#
|
237
|
-
def initialize(headers, fields, header_row = false)
|
238
|
-
@header_row = header_row
|
239
|
-
headers.each { |h| h.freeze if h.is_a? String }
|
240
|
-
|
241
|
-
# handle extra headers or fields
|
242
|
-
@row = if headers.size >= fields.size
|
243
|
-
headers.zip(fields)
|
244
|
-
else
|
245
|
-
fields.zip(headers).each(&:reverse!)
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
# Internal data format used to compare equality.
|
250
|
-
attr_reader :row
|
251
|
-
protected :row
|
252
|
-
|
253
|
-
### Array Delegation ###
|
254
|
-
|
255
|
-
extend Forwardable
|
256
|
-
def_delegators :@row, :empty?, :length, :size
|
257
|
-
|
258
|
-
# Returns +true+ if this is a header row.
|
259
|
-
def header_row?
|
260
|
-
@header_row
|
261
|
-
end
|
262
|
-
|
263
|
-
# Returns +true+ if this is a field row.
|
264
|
-
def field_row?
|
265
|
-
not header_row?
|
266
|
-
end
|
267
|
-
|
268
|
-
# Returns the headers of this row.
|
269
|
-
def headers
|
270
|
-
@row.map(&:first)
|
271
|
-
end
|
272
|
-
|
273
|
-
#
|
274
|
-
# :call-seq:
|
275
|
-
# field( header )
|
276
|
-
# field( header, offset )
|
277
|
-
# field( index )
|
278
|
-
#
|
279
|
-
# This method will return the field value by +header+ or +index+. If a field
|
280
|
-
# is not found, +nil+ is returned.
|
281
|
-
#
|
282
|
-
# When provided, +offset+ ensures that a header match occurs on or later
|
283
|
-
# than the +offset+ index. You can use this to find duplicate headers,
|
284
|
-
# without resorting to hard-coding exact indices.
|
285
|
-
#
|
286
|
-
def field(header_or_index, minimum_index = 0)
|
287
|
-
# locate the pair
|
288
|
-
finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
|
289
|
-
pair = @row[minimum_index..-1].send(finder, header_or_index)
|
290
|
-
|
291
|
-
# return the field if we have a pair
|
292
|
-
if pair.nil?
|
293
|
-
nil
|
294
|
-
else
|
295
|
-
header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
|
296
|
-
end
|
297
|
-
end
|
298
|
-
alias_method :[], :field
|
299
|
-
|
300
|
-
#
|
301
|
-
# :call-seq:
|
302
|
-
# fetch( header )
|
303
|
-
# fetch( header ) { |row| ... }
|
304
|
-
# fetch( header, default )
|
305
|
-
#
|
306
|
-
# This method will fetch the field value by +header+. It has the same
|
307
|
-
# behavior as Hash#fetch: if there is a field with the given +header+, its
|
308
|
-
# value is returned. Otherwise, if a block is given, it is yielded the
|
309
|
-
# +header+ and its result is returned; if a +default+ is given as the
|
310
|
-
# second argument, it is returned; otherwise a KeyError is raised.
|
311
|
-
#
|
312
|
-
def fetch(header, *varargs)
|
313
|
-
raise ArgumentError, "Too many arguments" if varargs.length > 1
|
314
|
-
pair = @row.assoc(header)
|
315
|
-
if pair
|
316
|
-
pair.last
|
317
|
-
else
|
318
|
-
if block_given?
|
319
|
-
yield header
|
320
|
-
elsif varargs.empty?
|
321
|
-
raise KeyError, "key not found: #{header}"
|
322
|
-
else
|
323
|
-
varargs.first
|
324
|
-
end
|
325
|
-
end
|
326
|
-
end
|
327
|
-
|
328
|
-
# Returns +true+ if there is a field with the given +header+.
|
329
|
-
def has_key?(header)
|
330
|
-
!!@row.assoc(header)
|
331
|
-
end
|
332
|
-
alias_method :include?, :has_key?
|
333
|
-
alias_method :key?, :has_key?
|
334
|
-
alias_method :member?, :has_key?
|
335
|
-
|
336
|
-
#
|
337
|
-
# :call-seq:
|
338
|
-
# []=( header, value )
|
339
|
-
# []=( header, offset, value )
|
340
|
-
# []=( index, value )
|
341
|
-
#
|
342
|
-
# Looks up the field by the semantics described in CSV::Row.field() and
|
343
|
-
# assigns the +value+.
|
344
|
-
#
|
345
|
-
# Assigning past the end of the row with an index will set all pairs between
|
346
|
-
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
|
347
|
-
# pair.
|
348
|
-
#
|
349
|
-
def []=(*args)
|
350
|
-
value = args.pop
|
351
|
-
|
352
|
-
if args.first.is_a? Integer
|
353
|
-
if @row[args.first].nil? # extending past the end with index
|
354
|
-
@row[args.first] = [nil, value]
|
355
|
-
@row.map! { |pair| pair.nil? ? [nil, nil] : pair }
|
356
|
-
else # normal index assignment
|
357
|
-
@row[args.first][1] = value
|
358
|
-
end
|
359
|
-
else
|
360
|
-
index = index(*args)
|
361
|
-
if index.nil? # appending a field
|
362
|
-
self << [args.first, value]
|
363
|
-
else # normal header assignment
|
364
|
-
@row[index][1] = value
|
365
|
-
end
|
366
|
-
end
|
367
|
-
end
|
368
|
-
|
369
|
-
#
|
370
|
-
# :call-seq:
|
371
|
-
# <<( field )
|
372
|
-
# <<( header_and_field_array )
|
373
|
-
# <<( header_and_field_hash )
|
374
|
-
#
|
375
|
-
# If a two-element Array is provided, it is assumed to be a header and field
|
376
|
-
# and the pair is appended. A Hash works the same way with the key being
|
377
|
-
# the header and the value being the field. Anything else is assumed to be
|
378
|
-
# a lone field which is appended with a +nil+ header.
|
379
|
-
#
|
380
|
-
# This method returns the row for chaining.
|
381
|
-
#
|
382
|
-
def <<(arg)
|
383
|
-
if arg.is_a?(Array) and arg.size == 2 # appending a header and name
|
384
|
-
@row << arg
|
385
|
-
elsif arg.is_a?(Hash) # append header and name pairs
|
386
|
-
arg.each { |pair| @row << pair }
|
387
|
-
else # append field value
|
388
|
-
@row << [nil, arg]
|
389
|
-
end
|
390
|
-
|
391
|
-
self # for chaining
|
392
|
-
end
|
393
|
-
|
394
|
-
#
|
395
|
-
# A shortcut for appending multiple fields. Equivalent to:
|
396
|
-
#
|
397
|
-
# args.each { |arg| csv_row << arg }
|
398
|
-
#
|
399
|
-
# This method returns the row for chaining.
|
400
|
-
#
|
401
|
-
def push(*args)
|
402
|
-
args.each { |arg| self << arg }
|
403
|
-
|
404
|
-
self # for chaining
|
405
|
-
end
|
406
|
-
|
407
|
-
#
|
408
|
-
# :call-seq:
|
409
|
-
# delete( header )
|
410
|
-
# delete( header, offset )
|
411
|
-
# delete( index )
|
412
|
-
#
|
413
|
-
# Used to remove a pair from the row by +header+ or +index+. The pair is
|
414
|
-
# located as described in CSV::Row.field(). The deleted pair is returned,
|
415
|
-
# or +nil+ if a pair could not be found.
|
416
|
-
#
|
417
|
-
def delete(header_or_index, minimum_index = 0)
|
418
|
-
if header_or_index.is_a? Integer # by index
|
419
|
-
@row.delete_at(header_or_index)
|
420
|
-
elsif i = index(header_or_index, minimum_index) # by header
|
421
|
-
@row.delete_at(i)
|
422
|
-
else
|
423
|
-
[ ]
|
424
|
-
end
|
425
|
-
end
|
426
|
-
|
427
|
-
#
|
428
|
-
# The provided +block+ is passed a header and field for each pair in the row
|
429
|
-
# and expected to return +true+ or +false+, depending on whether the pair
|
430
|
-
# should be deleted.
|
431
|
-
#
|
432
|
-
# This method returns the row for chaining.
|
433
|
-
#
|
434
|
-
# If no block is given, an Enumerator is returned.
|
435
|
-
#
|
436
|
-
def delete_if(&block)
|
437
|
-
block or return enum_for(__method__) { size }
|
438
|
-
|
439
|
-
@row.delete_if(&block)
|
440
|
-
|
441
|
-
self # for chaining
|
442
|
-
end
|
443
|
-
|
444
|
-
#
|
445
|
-
# This method accepts any number of arguments which can be headers, indices,
|
446
|
-
# Ranges of either, or two-element Arrays containing a header and offset.
|
447
|
-
# Each argument will be replaced with a field lookup as described in
|
448
|
-
# CSV::Row.field().
|
449
|
-
#
|
450
|
-
# If called with no arguments, all fields are returned.
|
451
|
-
#
|
452
|
-
def fields(*headers_and_or_indices)
|
453
|
-
if headers_and_or_indices.empty? # return all fields--no arguments
|
454
|
-
@row.map(&:last)
|
455
|
-
else # or work like values_at()
|
456
|
-
all = []
|
457
|
-
headers_and_or_indices.each do |h_or_i|
|
458
|
-
if h_or_i.is_a? Range
|
459
|
-
index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
|
460
|
-
index(h_or_i.begin)
|
461
|
-
index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
|
462
|
-
index(h_or_i.end)
|
463
|
-
new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
|
464
|
-
(index_begin..index_end)
|
465
|
-
all.concat(fields.values_at(new_range))
|
466
|
-
else
|
467
|
-
all << field(*Array(h_or_i))
|
468
|
-
end
|
469
|
-
end
|
470
|
-
return all
|
471
|
-
end
|
472
|
-
end
|
473
|
-
alias_method :values_at, :fields
|
474
|
-
|
475
|
-
#
|
476
|
-
# :call-seq:
|
477
|
-
# index( header )
|
478
|
-
# index( header, offset )
|
479
|
-
#
|
480
|
-
# This method will return the index of a field with the provided +header+.
|
481
|
-
# The +offset+ can be used to locate duplicate header names, as described in
|
482
|
-
# CSV::Row.field().
|
483
|
-
#
|
484
|
-
def index(header, minimum_index = 0)
|
485
|
-
# find the pair
|
486
|
-
index = headers[minimum_index..-1].index(header)
|
487
|
-
# return the index at the right offset, if we found one
|
488
|
-
index.nil? ? nil : index + minimum_index
|
489
|
-
end
|
490
|
-
|
491
|
-
# Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
|
492
|
-
def header?(name)
|
493
|
-
headers.include? name
|
494
|
-
end
|
495
|
-
alias_method :include?, :header?
|
496
|
-
|
497
|
-
#
|
498
|
-
# Returns +true+ if +data+ matches a field in this row, and +false+
|
499
|
-
# otherwise.
|
500
|
-
#
|
501
|
-
def field?(data)
|
502
|
-
fields.include? data
|
503
|
-
end
|
504
|
-
|
505
|
-
include Enumerable
|
506
|
-
|
507
|
-
#
|
508
|
-
# Yields each pair of the row as header and field tuples (much like
|
509
|
-
# iterating over a Hash). This method returns the row for chaining.
|
510
|
-
#
|
511
|
-
# If no block is given, an Enumerator is returned.
|
512
|
-
#
|
513
|
-
# Support for Enumerable.
|
514
|
-
#
|
515
|
-
def each(&block)
|
516
|
-
block or return enum_for(__method__) { size }
|
517
|
-
|
518
|
-
@row.each(&block)
|
519
|
-
|
520
|
-
self # for chaining
|
521
|
-
end
|
522
|
-
|
523
|
-
#
|
524
|
-
# Returns +true+ if this row contains the same headers and fields in the
|
525
|
-
# same order as +other+.
|
526
|
-
#
|
527
|
-
def ==(other)
|
528
|
-
return @row == other.row if other.is_a? CSV::Row
|
529
|
-
@row == other
|
530
|
-
end
|
531
|
-
|
532
|
-
#
|
533
|
-
# Collapses the row into a simple Hash. Be warned that this discards field
|
534
|
-
# order and clobbers duplicate fields.
|
535
|
-
#
|
536
|
-
def to_hash
|
537
|
-
@row.to_h
|
538
|
-
end
|
539
|
-
|
540
|
-
#
|
541
|
-
# Returns the row as a CSV String. Headers are not used. Equivalent to:
|
542
|
-
#
|
543
|
-
# csv_row.fields.to_csv( options )
|
544
|
-
#
|
545
|
-
def to_csv(**options)
|
546
|
-
fields.to_csv(options)
|
547
|
-
end
|
548
|
-
alias_method :to_s, :to_csv
|
549
|
-
|
550
|
-
# A summary of fields, by header, in an ASCII compatible String.
|
551
|
-
def inspect
|
552
|
-
str = ["#<", self.class.to_s]
|
553
|
-
each do |header, field|
|
554
|
-
str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
|
555
|
-
":" << field.inspect
|
556
|
-
end
|
557
|
-
str << ">"
|
558
|
-
begin
|
559
|
-
str.join('')
|
560
|
-
rescue # any encoding error
|
561
|
-
str.map do |s|
|
562
|
-
e = Encoding::Converter.asciicompat_encoding(s.encoding)
|
563
|
-
e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
|
564
|
-
end.join('')
|
565
|
-
end
|
566
|
-
end
|
567
|
-
end
|
568
|
-
|
569
|
-
#
|
570
|
-
# A CSV::Table is a two-dimensional data structure for representing CSV
|
571
|
-
# documents. Tables allow you to work with the data by row or column,
|
572
|
-
# manipulate the data, and even convert the results back to CSV, if needed.
|
573
|
-
#
|
574
|
-
# All tables returned by CSV will be constructed from this class, if header
|
575
|
-
# row processing is activated.
|
576
|
-
#
|
577
|
-
class Table
|
578
|
-
#
|
579
|
-
# Construct a new CSV::Table from +array_of_rows+, which are expected
|
580
|
-
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
581
|
-
#
|
582
|
-
# A CSV::Table object supports the following Array methods through
|
583
|
-
# delegation:
|
584
|
-
#
|
585
|
-
# * empty?()
|
586
|
-
# * length()
|
587
|
-
# * size()
|
588
|
-
#
|
589
|
-
def initialize(array_of_rows)
|
590
|
-
@table = array_of_rows
|
591
|
-
@mode = :col_or_row
|
592
|
-
end
|
593
|
-
|
594
|
-
# The current access mode for indexing and iteration.
|
595
|
-
attr_reader :mode
|
596
|
-
|
597
|
-
# Internal data format used to compare equality.
|
598
|
-
attr_reader :table
|
599
|
-
protected :table
|
600
|
-
|
601
|
-
### Array Delegation ###
|
602
280
|
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
609
|
-
|
610
|
-
#
|
611
|
-
# This method returns the duplicate table for chaining. Don't chain
|
612
|
-
# destructive methods (like []=()) this way though, since you are working
|
613
|
-
# with a duplicate.
|
614
|
-
#
|
615
|
-
def by_col
|
616
|
-
self.class.new(@table.dup).by_col!
|
617
|
-
end
|
618
|
-
|
619
|
-
#
|
620
|
-
# Switches the mode of this table to column mode. All calls to indexing and
|
621
|
-
# iteration methods will work with columns until the mode is changed again.
|
622
|
-
#
|
623
|
-
# This method returns the table and is safe to chain.
|
624
|
-
#
|
625
|
-
def by_col!
|
626
|
-
@mode = :col
|
627
|
-
|
628
|
-
self
|
629
|
-
end
|
630
|
-
|
631
|
-
#
|
632
|
-
# Returns a duplicate table object, in mixed mode. This is handy for
|
633
|
-
# chaining in a single call without changing the table mode, but be aware
|
634
|
-
# that this method can consume a fair amount of memory for bigger data sets.
|
635
|
-
#
|
636
|
-
# This method returns the duplicate table for chaining. Don't chain
|
637
|
-
# destructive methods (like []=()) this way though, since you are working
|
638
|
-
# with a duplicate.
|
639
|
-
#
|
640
|
-
def by_col_or_row
|
641
|
-
self.class.new(@table.dup).by_col_or_row!
|
642
|
-
end
|
643
|
-
|
644
|
-
#
|
645
|
-
# Switches the mode of this table to mixed mode. All calls to indexing and
|
646
|
-
# iteration methods will use the default intelligent indexing system until
|
647
|
-
# the mode is changed again. In mixed mode an index is assumed to be a row
|
648
|
-
# reference while anything else is assumed to be column access by headers.
|
649
|
-
#
|
650
|
-
# This method returns the table and is safe to chain.
|
651
|
-
#
|
652
|
-
def by_col_or_row!
|
653
|
-
@mode = :col_or_row
|
654
|
-
|
655
|
-
self
|
656
|
-
end
|
657
|
-
|
658
|
-
#
|
659
|
-
# Returns a duplicate table object, in row mode. This is handy for chaining
|
660
|
-
# in a single call without changing the table mode, but be aware that this
|
661
|
-
# method can consume a fair amount of memory for bigger data sets.
|
662
|
-
#
|
663
|
-
# This method returns the duplicate table for chaining. Don't chain
|
664
|
-
# destructive methods (like []=()) this way though, since you are working
|
665
|
-
# with a duplicate.
|
666
|
-
#
|
667
|
-
def by_row
|
668
|
-
self.class.new(@table.dup).by_row!
|
669
|
-
end
|
670
|
-
|
671
|
-
#
|
672
|
-
# Switches the mode of this table to row mode. All calls to indexing and
|
673
|
-
# iteration methods will work with rows until the mode is changed again.
|
674
|
-
#
|
675
|
-
# This method returns the table and is safe to chain.
|
676
|
-
#
|
677
|
-
def by_row!
|
678
|
-
@mode = :row
|
679
|
-
|
680
|
-
self
|
681
|
-
end
|
682
|
-
|
683
|
-
#
|
684
|
-
# Returns the headers for the first row of this table (assumed to match all
|
685
|
-
# other rows). An empty Array is returned for empty tables.
|
686
|
-
#
|
687
|
-
def headers
|
688
|
-
if @table.empty?
|
689
|
-
Array.new
|
690
|
-
else
|
691
|
-
@table.first.headers
|
692
|
-
end
|
693
|
-
end
|
694
|
-
|
695
|
-
#
|
696
|
-
# In the default mixed mode, this method returns rows for index access and
|
697
|
-
# columns for header access. You can force the index association by first
|
698
|
-
# calling by_col!() or by_row!().
|
699
|
-
#
|
700
|
-
# Columns are returned as an Array of values. Altering that Array has no
|
701
|
-
# effect on the table.
|
702
|
-
#
|
703
|
-
def [](index_or_header)
|
704
|
-
if @mode == :row or # by index
|
705
|
-
(@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
|
706
|
-
@table[index_or_header]
|
707
|
-
else # by header
|
708
|
-
@table.map { |row| row[index_or_header] }
|
709
|
-
end
|
710
|
-
end
|
711
|
-
|
712
|
-
#
|
713
|
-
# In the default mixed mode, this method assigns rows for index access and
|
714
|
-
# columns for header access. You can force the index association by first
|
715
|
-
# calling by_col!() or by_row!().
|
716
|
-
#
|
717
|
-
# Rows may be set to an Array of values (which will inherit the table's
|
718
|
-
# headers()) or a CSV::Row.
|
719
|
-
#
|
720
|
-
# Columns may be set to a single value, which is copied to each row of the
|
721
|
-
# column, or an Array of values. Arrays of values are assigned to rows top
|
722
|
-
# to bottom in row major order. Excess values are ignored and if the Array
|
723
|
-
# does not have a value for each row the extra rows will receive a +nil+.
|
724
|
-
#
|
725
|
-
# Assigning to an existing column or row clobbers the data. Assigning to
|
726
|
-
# new columns creates them at the right end of the table.
|
727
|
-
#
|
728
|
-
def []=(index_or_header, value)
|
729
|
-
if @mode == :row or # by index
|
730
|
-
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
731
|
-
if value.is_a? Array
|
732
|
-
@table[index_or_header] = Row.new(headers, value)
|
733
|
-
else
|
734
|
-
@table[index_or_header] = value
|
735
|
-
end
|
736
|
-
else # set column
|
737
|
-
if value.is_a? Array # multiple values
|
738
|
-
@table.each_with_index do |row, i|
|
739
|
-
if row.header_row?
|
740
|
-
row[index_or_header] = index_or_header
|
741
|
-
else
|
742
|
-
row[index_or_header] = value[i]
|
743
|
-
end
|
744
|
-
end
|
745
|
-
else # repeated value
|
746
|
-
@table.each do |row|
|
747
|
-
if row.header_row?
|
748
|
-
row[index_or_header] = index_or_header
|
749
|
-
else
|
750
|
-
row[index_or_header] = value
|
751
|
-
end
|
752
|
-
end
|
753
|
-
end
|
754
|
-
end
|
755
|
-
end
|
756
|
-
|
757
|
-
#
|
758
|
-
# The mixed mode default is to treat a list of indices as row access,
|
759
|
-
# returning the rows indicated. Anything else is considered columnar
|
760
|
-
# access. For columnar access, the return set has an Array for each row
|
761
|
-
# with the values indicated by the headers in each Array. You can force
|
762
|
-
# column or row mode using by_col!() or by_row!().
|
763
|
-
#
|
764
|
-
# You cannot mix column and row access.
|
765
|
-
#
|
766
|
-
def values_at(*indices_or_headers)
|
767
|
-
if @mode == :row or # by indices
|
768
|
-
( @mode == :col_or_row and indices_or_headers.all? do |index|
|
769
|
-
index.is_a?(Integer) or
|
770
|
-
( index.is_a?(Range) and
|
771
|
-
index.first.is_a?(Integer) and
|
772
|
-
index.last.is_a?(Integer) )
|
773
|
-
end )
|
774
|
-
@table.values_at(*indices_or_headers)
|
775
|
-
else # by headers
|
776
|
-
@table.map { |row| row.values_at(*indices_or_headers) }
|
777
|
-
end
|
778
|
-
end
|
779
|
-
|
780
|
-
#
|
781
|
-
# Adds a new row to the bottom end of this table. You can provide an Array,
|
782
|
-
# which will be converted to a CSV::Row (inheriting the table's headers()),
|
783
|
-
# or a CSV::Row.
|
784
|
-
#
|
785
|
-
# This method returns the table for chaining.
|
786
|
-
#
|
787
|
-
def <<(row_or_array)
|
788
|
-
if row_or_array.is_a? Array # append Array
|
789
|
-
@table << Row.new(headers, row_or_array)
|
790
|
-
else # append Row
|
791
|
-
@table << row_or_array
|
792
|
-
end
|
793
|
-
|
794
|
-
self # for chaining
|
795
|
-
end
|
796
|
-
|
797
|
-
#
|
798
|
-
# A shortcut for appending multiple rows. Equivalent to:
|
799
|
-
#
|
800
|
-
# rows.each { |row| self << row }
|
801
|
-
#
|
802
|
-
# This method returns the table for chaining.
|
803
|
-
#
|
804
|
-
def push(*rows)
|
805
|
-
rows.each { |row| self << row }
|
806
|
-
|
807
|
-
self # for chaining
|
808
|
-
end
|
809
|
-
|
810
|
-
#
|
811
|
-
# Removes and returns the indicated column or row. In the default mixed
|
812
|
-
# mode indices refer to rows and everything else is assumed to be a column
|
813
|
-
# header. Use by_col!() or by_row!() to force the lookup.
|
814
|
-
#
|
815
|
-
def delete(index_or_header)
|
816
|
-
if @mode == :row or # by index
|
817
|
-
(@mode == :col_or_row and index_or_header.is_a? Integer)
|
818
|
-
@table.delete_at(index_or_header)
|
819
|
-
else # by header
|
820
|
-
@table.map { |row| row.delete(index_or_header).last }
|
821
|
-
end
|
822
|
-
end
|
823
|
-
|
824
|
-
#
|
825
|
-
# Removes any column or row for which the block returns +true+. In the
|
826
|
-
# default mixed mode or row mode, iteration is the standard row major
|
827
|
-
# walking of rows. In column mode, iteration will +yield+ two element
|
828
|
-
# tuples containing the column name and an Array of values for that column.
|
829
|
-
#
|
830
|
-
# This method returns the table for chaining.
|
831
|
-
#
|
832
|
-
# If no block is given, an Enumerator is returned.
|
833
|
-
#
|
834
|
-
def delete_if(&block)
|
835
|
-
block or return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size }
|
836
|
-
|
837
|
-
if @mode == :row or @mode == :col_or_row # by index
|
838
|
-
@table.delete_if(&block)
|
839
|
-
else # by header
|
840
|
-
deleted = []
|
841
|
-
headers.each do |header|
|
842
|
-
deleted << delete(header) if block[[header, self[header]]]
|
843
|
-
end
|
844
|
-
end
|
845
|
-
|
846
|
-
self # for chaining
|
847
|
-
end
|
848
|
-
|
849
|
-
include Enumerable
|
850
|
-
|
851
|
-
#
|
852
|
-
# In the default mixed mode or row mode, iteration is the standard row major
|
853
|
-
# walking of rows. In column mode, iteration will +yield+ two element
|
854
|
-
# tuples containing the column name and an Array of values for that column.
|
855
|
-
#
|
856
|
-
# This method returns the table for chaining.
|
857
|
-
#
|
858
|
-
# If no block is given, an Enumerator is returned.
|
859
|
-
#
|
860
|
-
def each(&block)
|
861
|
-
block or return enum_for(__method__) { @mode == :col ? headers.size : size }
|
862
|
-
|
863
|
-
if @mode == :col
|
864
|
-
headers.each { |header| block[[header, self[header]]] }
|
865
|
-
else
|
866
|
-
@table.each(&block)
|
867
|
-
end
|
868
|
-
|
869
|
-
self # for chaining
|
870
|
-
end
|
871
|
-
|
872
|
-
# Returns +true+ if all rows of this table ==() +other+'s rows.
|
873
|
-
def ==(other)
|
874
|
-
return @table == other.table if other.is_a? CSV::Table
|
875
|
-
@table == other
|
876
|
-
end
|
877
|
-
|
878
|
-
#
|
879
|
-
# Returns the table as an Array of Arrays. Headers will be the first row,
|
880
|
-
# then all of the field rows will follow.
|
881
|
-
#
|
882
|
-
def to_a
|
883
|
-
array = [headers]
|
884
|
-
@table.each do |row|
|
885
|
-
array.push(row.fields) unless row.header_row?
|
886
|
-
end
|
887
|
-
return array
|
888
|
-
end
|
889
|
-
|
890
|
-
#
|
891
|
-
# Returns the table as a complete CSV String. Headers will be listed first,
|
892
|
-
# then all of the field rows.
|
893
|
-
#
|
894
|
-
# This method assumes you want the Table.headers(), unless you explicitly
|
895
|
-
# pass <tt>:write_headers => false</tt>.
|
896
|
-
#
|
897
|
-
def to_csv(write_headers: true, **options)
|
898
|
-
array = write_headers ? [headers.to_csv(options)] : []
|
899
|
-
@table.each do |row|
|
900
|
-
array.push(row.fields.to_csv(options)) unless row.header_row?
|
901
|
-
end
|
902
|
-
return array.join('')
|
903
|
-
end
|
904
|
-
alias_method :to_s, :to_csv
|
905
|
-
|
906
|
-
# Shows the mode and size of this table in a US-ASCII String.
|
907
|
-
def inspect
|
908
|
-
"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
|
281
|
+
# The error thrown when the parser encounters illegal CSV formatting.
|
282
|
+
class MalformedCSVError < RuntimeError
|
283
|
+
attr_reader :line_number
|
284
|
+
alias_method :lineno, :line_number
|
285
|
+
def initialize(message, line_number)
|
286
|
+
@line_number = line_number
|
287
|
+
super("#{message} in line #{line_number}.")
|
909
288
|
end
|
910
289
|
end
|
911
290
|
|
912
|
-
# The error thrown when the parser encounters illegal CSV formatting.
|
913
|
-
class MalformedCSVError < RuntimeError; end
|
914
|
-
|
915
291
|
#
|
916
292
|
# A FieldInfo Struct contains details about a field's position in the data
|
917
293
|
# source it was read from. CSV will pass this Struct to some blocks that make
|
@@ -930,7 +306,11 @@ class CSV
|
|
930
306
|
# A Regexp used to find and convert some common DateTime formats.
|
931
307
|
DateTimeMatcher =
|
932
308
|
/ \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
|
933
|
-
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2}
|
309
|
+
\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} |
|
310
|
+
# ISO-8601
|
311
|
+
\d{4}-\d{2}-\d{2}
|
312
|
+
(?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
|
313
|
+
)\z /x
|
934
314
|
|
935
315
|
# The encoding used by all converters.
|
936
316
|
ConverterEncoding = Encoding.find("UTF-8")
|
@@ -970,7 +350,7 @@ class CSV
|
|
970
350
|
date: lambda { |f|
|
971
351
|
begin
|
972
352
|
e = f.encode(ConverterEncoding)
|
973
|
-
e
|
353
|
+
e.match?(DateMatcher) ? Date.parse(e) : f
|
974
354
|
rescue # encoding conversion or date parse errors
|
975
355
|
f
|
976
356
|
end
|
@@ -978,7 +358,7 @@ class CSV
|
|
978
358
|
date_time: lambda { |f|
|
979
359
|
begin
|
980
360
|
e = f.encode(ConverterEncoding)
|
981
|
-
e
|
361
|
+
e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
|
982
362
|
rescue # encoding conversion or date parse errors
|
983
363
|
f
|
984
364
|
end
|
@@ -1137,7 +517,7 @@ class CSV
|
|
1137
517
|
# but transcode it to UTF-8 before CSV parses it.
|
1138
518
|
#
|
1139
519
|
def self.foreach(path, **options, &block)
|
1140
|
-
return to_enum(__method__, path, options) unless
|
520
|
+
return to_enum(__method__, path, options) unless block_given?
|
1141
521
|
open(path, options) do |csv|
|
1142
522
|
csv.each(&block)
|
1143
523
|
end
|
@@ -1164,8 +544,8 @@ class CSV
|
|
1164
544
|
def self.generate(str=nil, **options)
|
1165
545
|
# add a default empty String, if none was given
|
1166
546
|
if str
|
1167
|
-
|
1168
|
-
|
547
|
+
str = StringIO.new(str)
|
548
|
+
str.seek(0, IO::SEEK_END)
|
1169
549
|
else
|
1170
550
|
encoding = options[:encoding]
|
1171
551
|
str = String.new
|
@@ -1271,7 +651,7 @@ class CSV
|
|
1271
651
|
begin
|
1272
652
|
f = File.open(filename, mode, file_opts)
|
1273
653
|
rescue ArgumentError => e
|
1274
|
-
raise unless /needs binmode
|
654
|
+
raise unless /needs binmode/.match?(e.message) and mode == "r"
|
1275
655
|
mode = "rb"
|
1276
656
|
file_opts = {encoding: Encoding.default_external}.merge(file_opts)
|
1277
657
|
retry
|
@@ -1309,14 +689,14 @@ class CSV
|
|
1309
689
|
#
|
1310
690
|
def self.parse(*args, &block)
|
1311
691
|
csv = new(*args)
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1317
|
-
|
1318
|
-
|
1319
|
-
csv.
|
692
|
+
|
693
|
+
return csv.each(&block) if block_given?
|
694
|
+
|
695
|
+
# slurp contents, if no block is given
|
696
|
+
begin
|
697
|
+
csv.read
|
698
|
+
ensure
|
699
|
+
csv.close
|
1320
700
|
end
|
1321
701
|
end
|
1322
702
|
|
@@ -1510,6 +890,8 @@ class CSV
|
|
1510
890
|
# attempt to parse input not conformant
|
1511
891
|
# with RFC 4180, such as double quotes
|
1512
892
|
# in unquoted fields.
|
893
|
+
# <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
|
894
|
+
# <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
|
1513
895
|
#
|
1514
896
|
# See CSV::DEFAULT_OPTIONS for the default settings.
|
1515
897
|
#
|
@@ -1519,20 +901,14 @@ class CSV
|
|
1519
901
|
def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
|
1520
902
|
converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
|
1521
903
|
write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
|
1522
|
-
skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil
|
904
|
+
skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
|
905
|
+
nil_value: nil,
|
906
|
+
empty_value: "")
|
1523
907
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
1524
908
|
|
1525
909
|
# create the IO object we will read from
|
1526
910
|
@io = data.is_a?(String) ? StringIO.new(data) : data
|
1527
|
-
|
1528
|
-
internal_encoding = Encoding.find(internal_encoding) if internal_encoding
|
1529
|
-
external_encoding = Encoding.find(external_encoding) if external_encoding
|
1530
|
-
if encoding
|
1531
|
-
encoding, = encoding.split(":", 2) if encoding.is_a?(String)
|
1532
|
-
encoding = Encoding.find(encoding)
|
1533
|
-
end
|
1534
|
-
@encoding = raw_encoding(nil) || internal_encoding || encoding ||
|
1535
|
-
Encoding.default_internal || Encoding.default_external
|
911
|
+
@encoding = determine_encoding(encoding, internal_encoding)
|
1536
912
|
#
|
1537
913
|
# prepare for building safe regular expressions in the target encoding,
|
1538
914
|
# if we can transcode the needed characters
|
@@ -1549,6 +925,10 @@ class CSV
|
|
1549
925
|
# headers must be delayed until shift(), in case they need a row of content
|
1550
926
|
@headers = nil
|
1551
927
|
|
928
|
+
@nil_value = nil_value
|
929
|
+
@empty_value = empty_value
|
930
|
+
@empty_value_is_empty_string = (empty_value == "")
|
931
|
+
|
1552
932
|
init_separators(col_sep, row_sep, quote_char, force_quotes)
|
1553
933
|
init_parsers(skip_blanks, field_size_limit, liberal_parsing)
|
1554
934
|
init_converters(converters, :@converters, :convert)
|
@@ -1830,7 +1210,15 @@ class CSV
|
|
1830
1210
|
@line = parse.clone
|
1831
1211
|
end
|
1832
1212
|
|
1833
|
-
|
1213
|
+
begin
|
1214
|
+
parse.sub!(@parsers[:line_end], "")
|
1215
|
+
rescue ArgumentError
|
1216
|
+
unless parse.valid_encoding?
|
1217
|
+
message = "Invalid byte sequence in #{parse.encoding}"
|
1218
|
+
raise MalformedCSVError.new(message, lineno + 1)
|
1219
|
+
end
|
1220
|
+
raise
|
1221
|
+
end
|
1834
1222
|
|
1835
1223
|
if csv.empty?
|
1836
1224
|
#
|
@@ -1844,7 +1232,7 @@ class CSV
|
|
1844
1232
|
elsif @unconverted_fields
|
1845
1233
|
return add_unconverted_fields(Array.new, Array.new)
|
1846
1234
|
elsif @use_headers
|
1847
|
-
return self.class::Row.new(
|
1235
|
+
return self.class::Row.new(@headers, Array.new)
|
1848
1236
|
else
|
1849
1237
|
return Array.new
|
1850
1238
|
end
|
@@ -1853,7 +1241,7 @@ class CSV
|
|
1853
1241
|
|
1854
1242
|
next if @skip_lines and @skip_lines.match parse
|
1855
1243
|
|
1856
|
-
parts = parse.split(@
|
1244
|
+
parts = parse.split(@col_sep_split_separator, -1)
|
1857
1245
|
if parts.empty?
|
1858
1246
|
if in_extended_col
|
1859
1247
|
csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
|
@@ -1870,9 +1258,9 @@ class CSV
|
|
1870
1258
|
if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
|
1871
1259
|
# extended column ends
|
1872
1260
|
csv.last << part[0..-2]
|
1873
|
-
if csv.last
|
1874
|
-
raise MalformedCSVError,
|
1875
|
-
|
1261
|
+
if csv.last.match?(@parsers[:stray_quote])
|
1262
|
+
raise MalformedCSVError.new("Missing or stray quote",
|
1263
|
+
lineno + 1)
|
1876
1264
|
end
|
1877
1265
|
csv.last.gsub!(@double_quote_char, @quote_char)
|
1878
1266
|
in_extended_col = false
|
@@ -1888,27 +1276,27 @@ class CSV
|
|
1888
1276
|
elsif part.end_with?(@quote_char)
|
1889
1277
|
# regular quoted column
|
1890
1278
|
csv << part[1..-2]
|
1891
|
-
if csv.last
|
1892
|
-
raise MalformedCSVError,
|
1893
|
-
|
1279
|
+
if csv.last.match?(@parsers[:stray_quote])
|
1280
|
+
raise MalformedCSVError.new("Missing or stray quote",
|
1281
|
+
lineno + 1)
|
1894
1282
|
end
|
1895
1283
|
csv.last.gsub!(@double_quote_char, @quote_char)
|
1896
1284
|
elsif @liberal_parsing
|
1897
1285
|
csv << part
|
1898
1286
|
else
|
1899
|
-
raise MalformedCSVError,
|
1900
|
-
|
1287
|
+
raise MalformedCSVError.new("Missing or stray quote",
|
1288
|
+
lineno + 1)
|
1901
1289
|
end
|
1902
|
-
elsif part
|
1290
|
+
elsif part.match?(@parsers[:quote_or_nl])
|
1903
1291
|
# Unquoted field with bad characters.
|
1904
|
-
if part
|
1905
|
-
|
1906
|
-
|
1292
|
+
if part.match?(@parsers[:nl_or_lf])
|
1293
|
+
message = "Unquoted fields do not allow \\r or \\n"
|
1294
|
+
raise MalformedCSVError.new(message, lineno + 1)
|
1907
1295
|
else
|
1908
1296
|
if @liberal_parsing
|
1909
1297
|
csv << part
|
1910
1298
|
else
|
1911
|
-
raise MalformedCSVError
|
1299
|
+
raise MalformedCSVError.new("Illegal quoting", lineno + 1)
|
1912
1300
|
end
|
1913
1301
|
end
|
1914
1302
|
else
|
@@ -1924,10 +1312,11 @@ class CSV
|
|
1924
1312
|
if in_extended_col
|
1925
1313
|
# if we're at eof?(), a quoted field wasn't closed...
|
1926
1314
|
if @io.eof?
|
1927
|
-
raise MalformedCSVError,
|
1928
|
-
|
1315
|
+
raise MalformedCSVError.new("Unclosed quoted field",
|
1316
|
+
lineno + 1)
|
1929
1317
|
elsif @field_size_limit and csv.last.size >= @field_size_limit
|
1930
|
-
raise MalformedCSVError
|
1318
|
+
raise MalformedCSVError.new("Field size exceeded",
|
1319
|
+
lineno + 1)
|
1931
1320
|
end
|
1932
1321
|
# otherwise, we need to loop and pull some more data to complete the row
|
1933
1322
|
else
|
@@ -1936,10 +1325,13 @@ class CSV
|
|
1936
1325
|
# save fields unconverted fields, if needed...
|
1937
1326
|
unconverted = csv.dup if @unconverted_fields
|
1938
1327
|
|
1939
|
-
|
1940
|
-
|
1941
|
-
|
1942
|
-
|
1328
|
+
if @use_headers
|
1329
|
+
# parse out header rows and handle CSV::Row conversions...
|
1330
|
+
csv = parse_headers(csv)
|
1331
|
+
else
|
1332
|
+
# convert fields, if needed...
|
1333
|
+
csv = convert_fields(csv)
|
1334
|
+
end
|
1943
1335
|
|
1944
1336
|
# inject unconverted fields and accessor, if requested...
|
1945
1337
|
if @unconverted_fields and not csv.respond_to? :unconverted_fields
|
@@ -1995,6 +1387,21 @@ class CSV
|
|
1995
1387
|
|
1996
1388
|
private
|
1997
1389
|
|
1390
|
+
def determine_encoding(encoding, internal_encoding)
|
1391
|
+
# honor the IO encoding if we can, otherwise default to ASCII-8BIT
|
1392
|
+
io_encoding = raw_encoding(nil)
|
1393
|
+
return io_encoding if io_encoding
|
1394
|
+
|
1395
|
+
return Encoding.find(internal_encoding) if internal_encoding
|
1396
|
+
|
1397
|
+
if encoding
|
1398
|
+
encoding, = encoding.split(":", 2) if encoding.is_a?(String)
|
1399
|
+
return Encoding.find(encoding)
|
1400
|
+
end
|
1401
|
+
|
1402
|
+
Encoding.default_internal || Encoding.default_external
|
1403
|
+
end
|
1404
|
+
|
1998
1405
|
#
|
1999
1406
|
# Stores the indicated separators for later use.
|
2000
1407
|
#
|
@@ -2008,6 +1415,11 @@ class CSV
|
|
2008
1415
|
def init_separators(col_sep, row_sep, quote_char, force_quotes)
|
2009
1416
|
# store the selected separators
|
2010
1417
|
@col_sep = col_sep.to_s.encode(@encoding)
|
1418
|
+
if @col_sep == " "
|
1419
|
+
@col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
|
1420
|
+
else
|
1421
|
+
@col_sep_split_separator = @col_sep
|
1422
|
+
end
|
2011
1423
|
@row_sep = row_sep # encode after resolving :auto
|
2012
1424
|
@quote_char = quote_char.to_s.encode(@encoding)
|
2013
1425
|
@double_quote_char = @quote_char * 2
|
@@ -2037,15 +1449,28 @@ class CSV
|
|
2037
1449
|
# (ensure will set default value)
|
2038
1450
|
#
|
2039
1451
|
break unless sample = @io.gets(nil, 1024)
|
1452
|
+
|
1453
|
+
cr = encode_str("\r")
|
1454
|
+
lf = encode_str("\n")
|
2040
1455
|
# extend sample if we're unsure of the line ending
|
2041
|
-
if sample.end_with?
|
1456
|
+
if sample.end_with?(cr)
|
2042
1457
|
sample << (@io.gets(nil, 1) || "")
|
2043
1458
|
end
|
2044
1459
|
|
2045
1460
|
# try to find a standard separator
|
2046
|
-
|
2047
|
-
|
2048
|
-
|
1461
|
+
sample.each_char.each_cons(2) do |char, next_char|
|
1462
|
+
case char
|
1463
|
+
when cr
|
1464
|
+
if next_char == lf
|
1465
|
+
@row_sep = encode_str("\r\n")
|
1466
|
+
else
|
1467
|
+
@row_sep = cr
|
1468
|
+
end
|
1469
|
+
break
|
1470
|
+
when lf
|
1471
|
+
@row_sep = lf
|
1472
|
+
break
|
1473
|
+
end
|
2049
1474
|
end
|
2050
1475
|
end
|
2051
1476
|
|
@@ -2199,10 +1624,24 @@ class CSV
|
|
2199
1624
|
# shortcut.
|
2200
1625
|
#
|
2201
1626
|
def convert_fields(fields, headers = false)
|
2202
|
-
|
2203
|
-
|
1627
|
+
if headers
|
1628
|
+
converters = @header_converters
|
1629
|
+
else
|
1630
|
+
converters = @converters
|
1631
|
+
if !@use_headers and
|
1632
|
+
converters.empty? and
|
1633
|
+
@nil_value.nil? and
|
1634
|
+
@empty_value_is_empty_string
|
1635
|
+
return fields
|
1636
|
+
end
|
1637
|
+
end
|
2204
1638
|
|
2205
1639
|
fields.map.with_index do |field, index|
|
1640
|
+
if field.nil?
|
1641
|
+
field = @nil_value
|
1642
|
+
elsif field.empty?
|
1643
|
+
field = @empty_value unless @empty_value_is_empty_string
|
1644
|
+
end
|
2206
1645
|
converters.each do |converter|
|
2207
1646
|
break if headers && field.nil?
|
2208
1647
|
field = if converter.arity == 1 # straight field converter
|
@@ -2334,22 +1773,6 @@ def CSV(*args, &block)
|
|
2334
1773
|
CSV.instance(*args, &block)
|
2335
1774
|
end
|
2336
1775
|
|
2337
|
-
|
2338
|
-
|
2339
|
-
|
2340
|
-
# ["CSV", "data"].to_csv
|
2341
|
-
# #=> "CSV,data\n"
|
2342
|
-
def to_csv(**options)
|
2343
|
-
CSV.generate_line(self, options)
|
2344
|
-
end
|
2345
|
-
end
|
2346
|
-
|
2347
|
-
class String # :nodoc:
|
2348
|
-
# Equivalent to CSV::parse_line(self, options)
|
2349
|
-
#
|
2350
|
-
# "CSV,data".parse_csv
|
2351
|
-
# #=> ["CSV", "data"]
|
2352
|
-
def parse_csv(**options)
|
2353
|
-
CSV.parse_line(self, options)
|
2354
|
-
end
|
2355
|
-
end
|
1776
|
+
require_relative "csv/version"
|
1777
|
+
require_relative "csv/core_ext/array"
|
1778
|
+
require_relative "csv/core_ext/string"
|