csv 0.1.0 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/csv.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # frozen_string_literal: true
3
3
  # = csv.rb -- CSV Reading and Writing
4
4
  #
5
- # Created by James Edward Gray II on 2005-10-31.
6
- # Copyright 2005 James Edward Gray II. You can redistribute or modify this code
7
- # under the terms of Ruby's license.
5
+ # Created by James Edward Gray II on 2005-10-31.
8
6
  #
9
7
  # See CSV for documentation.
10
8
  #
@@ -95,74 +93,146 @@ require "forwardable"
95
93
  require "English"
96
94
  require "date"
97
95
  require "stringio"
96
+ require_relative "csv/table"
97
+ require_relative "csv/row"
98
+
99
+ # This provides String#match? and Regexp#match? for Ruby 2.3.
100
+ unless String.method_defined?(:match?)
101
+ class CSV
102
+ module MatchP
103
+ refine String do
104
+ def match?(pattern)
105
+ self =~ pattern
106
+ end
107
+ end
108
+
109
+ refine Regexp do
110
+ def match?(string)
111
+ self =~ string
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ using CSV::MatchP
118
+ end
98
119
 
99
120
  #
100
121
  # This class provides a complete interface to CSV files and data. It offers
101
122
  # tools to enable you to read and write to and from Strings or IO objects, as
102
123
  # needed.
103
124
  #
104
- # == Reading
125
+ # The most generic interface of a class is:
105
126
  #
106
- # === From a File
127
+ # csv = CSV.new(string_or_io, **options)
107
128
  #
108
- # ==== A Line at a Time
129
+ # # Reading: IO object should be open for read
130
+ # csv.read # => array of rows
131
+ # # or
132
+ # csv.each do |row|
133
+ # # ...
134
+ # end
135
+ # # or
136
+ # row = csv.shift
109
137
  #
110
- # CSV.foreach("path/to/file.csv") do |row|
111
- # # use row here...
112
- # end
138
+ # # Writing: IO object should be open for write
139
+ # csv << row
113
140
  #
114
- # ==== All at Once
141
+ # There are several specialized class methods for one-statement reading or writing,
142
+ # described in the Specialized Methods section.
115
143
  #
116
- # arr_of_arrs = CSV.read("path/to/file.csv")
144
+ # If a String passed into ::new, it is internally wrapped into a StringIO object.
117
145
  #
118
- # === From a String
146
+ # +options+ can be used for specifying the particular CSV flavor (column
147
+ # separators, row separators, value quoting and so on), and for data conversion,
148
+ # see Data Conversion section for the description of the latter.
119
149
  #
120
- # ==== A Line at a Time
150
+ # == Specialized Methods
121
151
  #
122
- # CSV.parse("CSV,data,String") do |row|
123
- # # use row here...
124
- # end
152
+ # === Reading
125
153
  #
126
- # ==== All at Once
127
- #
128
- # arr_of_arrs = CSV.parse("CSV,data,String")
154
+ # # From a file: all at once
155
+ # arr_of_rows = CSV.read("path/to/file.csv", **options)
156
+ # # iterator-style:
157
+ # CSV.foreach("path/to/file.csv", **options) do |row|
158
+ # # ...
159
+ # end
129
160
  #
130
- # == Writing
161
+ # # From a string
162
+ # arr_of_rows = CSV.parse("CSV,data,String", **options)
163
+ # # or
164
+ # CSV.parse("CSV,data,String", **options) do |row|
165
+ # # ...
166
+ # end
131
167
  #
132
- # === To a File
168
+ # === Writing
133
169
  #
170
+ # # To a file
134
171
  # CSV.open("path/to/file.csv", "wb") do |csv|
135
172
  # csv << ["row", "of", "CSV", "data"]
136
173
  # csv << ["another", "row"]
137
174
  # # ...
138
175
  # end
139
176
  #
140
- # === To a String
141
- #
177
+ # # To a String
142
178
  # csv_string = CSV.generate do |csv|
143
179
  # csv << ["row", "of", "CSV", "data"]
144
180
  # csv << ["another", "row"]
145
181
  # # ...
146
182
  # end
147
183
  #
148
- # == Convert a Single Line
184
+ # === Shortcuts
149
185
  #
186
+ # # Core extensions for converting one line
150
187
  # csv_string = ["CSV", "data"].to_csv # to CSV
151
188
  # csv_array = "CSV,String".parse_csv # from CSV
152
189
  #
153
- # == Shortcut Interface
154
- #
190
+ # # CSV() method
155
191
  # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
156
192
  # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
157
193
  # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
158
194
  # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
159
195
  #
160
- # == Advanced Usage
196
+ # == Data Conversion
197
+ #
198
+ # === CSV with headers
199
+ #
200
+ # CSV allows to specify column names of CSV file, whether they are in data, or
201
+ # provided separately. If headers specified, reading methods return an instance
202
+ # of CSV::Table, consisting of CSV::Row.
203
+ #
204
+ # # Headers are part of data
205
+ # data = CSV.parse(<<~ROWS, headers: true)
206
+ # Name,Department,Salary
207
+ # Bob,Engeneering,1000
208
+ # Jane,Sales,2000
209
+ # John,Management,5000
210
+ # ROWS
161
211
  #
162
- # === Wrap an IO Object
212
+ # data.class #=> CSV::Table
213
+ # data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engeneering" "Salary":"1000">
214
+ # data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engeneering", "Salary"=>"1000"}
163
215
  #
164
- # csv = CSV.new(io, options)
165
- # # ... read (with gets() or each()) from and write (with <<) to csv here ...
216
+ # # Headers provided by developer
217
+ # data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
218
+ # data.first #=> #<CSV::Row name:"Bob" department:"Engeneering" salary:"1000">
219
+ #
220
+ # === Typed data reading
221
+ #
222
+ # CSV allows to provide a set of data _converters_ e.g. transformations to try on input
223
+ # data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
224
+ #
225
+ # # Without any converters:
226
+ # CSV.parse('Bob,2018-03-01,100')
227
+ # #=> [["Bob", "2018-03-01", "100"]]
228
+ #
229
+ # # With built-in converters:
230
+ # CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
231
+ # #=> [["Bob", #<Date: 2018-03-01>, 100]]
232
+ #
233
+ # # With custom converters:
234
+ # CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
235
+ # #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
166
236
  #
167
237
  # == CSV and Character Encodings (M17n or Multilingualization)
168
238
  #
@@ -207,711 +277,17 @@ require "stringio"
207
277
  # find with it.
208
278
  #
209
279
  class CSV
210
- # The version of the installed library.
211
- VERSION = "2.4.8"
212
-
213
- #
214
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
215
- # and allows duplicates just as an Array would, but also allows you to access
216
- # fields by name just as you could if they were in a Hash.
217
- #
218
- # All rows returned by CSV will be constructed from this class, if header row
219
- # processing is activated.
220
- #
221
- class Row
222
- #
223
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
224
- # to be Arrays. If one Array is shorter than the other, it will be padded
225
- # with +nil+ objects.
226
- #
227
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
228
- # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
229
- # row. Otherwise, the row is assumes to be a field row.
230
- #
231
- # A CSV::Row object supports the following Array methods through delegation:
232
- #
233
- # * empty?()
234
- # * length()
235
- # * size()
236
- #
237
- def initialize(headers, fields, header_row = false)
238
- @header_row = header_row
239
- headers.each { |h| h.freeze if h.is_a? String }
240
-
241
- # handle extra headers or fields
242
- @row = if headers.size >= fields.size
243
- headers.zip(fields)
244
- else
245
- fields.zip(headers).each(&:reverse!)
246
- end
247
- end
248
-
249
- # Internal data format used to compare equality.
250
- attr_reader :row
251
- protected :row
252
-
253
- ### Array Delegation ###
254
-
255
- extend Forwardable
256
- def_delegators :@row, :empty?, :length, :size
257
-
258
- # Returns +true+ if this is a header row.
259
- def header_row?
260
- @header_row
261
- end
262
-
263
- # Returns +true+ if this is a field row.
264
- def field_row?
265
- not header_row?
266
- end
267
-
268
- # Returns the headers of this row.
269
- def headers
270
- @row.map(&:first)
271
- end
272
-
273
- #
274
- # :call-seq:
275
- # field( header )
276
- # field( header, offset )
277
- # field( index )
278
- #
279
- # This method will return the field value by +header+ or +index+. If a field
280
- # is not found, +nil+ is returned.
281
- #
282
- # When provided, +offset+ ensures that a header match occurs on or later
283
- # than the +offset+ index. You can use this to find duplicate headers,
284
- # without resorting to hard-coding exact indices.
285
- #
286
- def field(header_or_index, minimum_index = 0)
287
- # locate the pair
288
- finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
289
- pair = @row[minimum_index..-1].send(finder, header_or_index)
290
-
291
- # return the field if we have a pair
292
- if pair.nil?
293
- nil
294
- else
295
- header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
296
- end
297
- end
298
- alias_method :[], :field
299
-
300
- #
301
- # :call-seq:
302
- # fetch( header )
303
- # fetch( header ) { |row| ... }
304
- # fetch( header, default )
305
- #
306
- # This method will fetch the field value by +header+. It has the same
307
- # behavior as Hash#fetch: if there is a field with the given +header+, its
308
- # value is returned. Otherwise, if a block is given, it is yielded the
309
- # +header+ and its result is returned; if a +default+ is given as the
310
- # second argument, it is returned; otherwise a KeyError is raised.
311
- #
312
- def fetch(header, *varargs)
313
- raise ArgumentError, "Too many arguments" if varargs.length > 1
314
- pair = @row.assoc(header)
315
- if pair
316
- pair.last
317
- else
318
- if block_given?
319
- yield header
320
- elsif varargs.empty?
321
- raise KeyError, "key not found: #{header}"
322
- else
323
- varargs.first
324
- end
325
- end
326
- end
327
-
328
- # Returns +true+ if there is a field with the given +header+.
329
- def has_key?(header)
330
- !!@row.assoc(header)
331
- end
332
- alias_method :include?, :has_key?
333
- alias_method :key?, :has_key?
334
- alias_method :member?, :has_key?
335
-
336
- #
337
- # :call-seq:
338
- # []=( header, value )
339
- # []=( header, offset, value )
340
- # []=( index, value )
341
- #
342
- # Looks up the field by the semantics described in CSV::Row.field() and
343
- # assigns the +value+.
344
- #
345
- # Assigning past the end of the row with an index will set all pairs between
346
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
347
- # pair.
348
- #
349
- def []=(*args)
350
- value = args.pop
351
-
352
- if args.first.is_a? Integer
353
- if @row[args.first].nil? # extending past the end with index
354
- @row[args.first] = [nil, value]
355
- @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
356
- else # normal index assignment
357
- @row[args.first][1] = value
358
- end
359
- else
360
- index = index(*args)
361
- if index.nil? # appending a field
362
- self << [args.first, value]
363
- else # normal header assignment
364
- @row[index][1] = value
365
- end
366
- end
367
- end
368
-
369
- #
370
- # :call-seq:
371
- # <<( field )
372
- # <<( header_and_field_array )
373
- # <<( header_and_field_hash )
374
- #
375
- # If a two-element Array is provided, it is assumed to be a header and field
376
- # and the pair is appended. A Hash works the same way with the key being
377
- # the header and the value being the field. Anything else is assumed to be
378
- # a lone field which is appended with a +nil+ header.
379
- #
380
- # This method returns the row for chaining.
381
- #
382
- def <<(arg)
383
- if arg.is_a?(Array) and arg.size == 2 # appending a header and name
384
- @row << arg
385
- elsif arg.is_a?(Hash) # append header and name pairs
386
- arg.each { |pair| @row << pair }
387
- else # append field value
388
- @row << [nil, arg]
389
- end
390
-
391
- self # for chaining
392
- end
393
-
394
- #
395
- # A shortcut for appending multiple fields. Equivalent to:
396
- #
397
- # args.each { |arg| csv_row << arg }
398
- #
399
- # This method returns the row for chaining.
400
- #
401
- def push(*args)
402
- args.each { |arg| self << arg }
403
-
404
- self # for chaining
405
- end
406
-
407
- #
408
- # :call-seq:
409
- # delete( header )
410
- # delete( header, offset )
411
- # delete( index )
412
- #
413
- # Used to remove a pair from the row by +header+ or +index+. The pair is
414
- # located as described in CSV::Row.field(). The deleted pair is returned,
415
- # or +nil+ if a pair could not be found.
416
- #
417
- def delete(header_or_index, minimum_index = 0)
418
- if header_or_index.is_a? Integer # by index
419
- @row.delete_at(header_or_index)
420
- elsif i = index(header_or_index, minimum_index) # by header
421
- @row.delete_at(i)
422
- else
423
- [ ]
424
- end
425
- end
426
-
427
- #
428
- # The provided +block+ is passed a header and field for each pair in the row
429
- # and expected to return +true+ or +false+, depending on whether the pair
430
- # should be deleted.
431
- #
432
- # This method returns the row for chaining.
433
- #
434
- # If no block is given, an Enumerator is returned.
435
- #
436
- def delete_if(&block)
437
- block or return enum_for(__method__) { size }
438
-
439
- @row.delete_if(&block)
440
-
441
- self # for chaining
442
- end
443
-
444
- #
445
- # This method accepts any number of arguments which can be headers, indices,
446
- # Ranges of either, or two-element Arrays containing a header and offset.
447
- # Each argument will be replaced with a field lookup as described in
448
- # CSV::Row.field().
449
- #
450
- # If called with no arguments, all fields are returned.
451
- #
452
- def fields(*headers_and_or_indices)
453
- if headers_and_or_indices.empty? # return all fields--no arguments
454
- @row.map(&:last)
455
- else # or work like values_at()
456
- all = []
457
- headers_and_or_indices.each do |h_or_i|
458
- if h_or_i.is_a? Range
459
- index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
460
- index(h_or_i.begin)
461
- index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
462
- index(h_or_i.end)
463
- new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
464
- (index_begin..index_end)
465
- all.concat(fields.values_at(new_range))
466
- else
467
- all << field(*Array(h_or_i))
468
- end
469
- end
470
- return all
471
- end
472
- end
473
- alias_method :values_at, :fields
474
-
475
- #
476
- # :call-seq:
477
- # index( header )
478
- # index( header, offset )
479
- #
480
- # This method will return the index of a field with the provided +header+.
481
- # The +offset+ can be used to locate duplicate header names, as described in
482
- # CSV::Row.field().
483
- #
484
- def index(header, minimum_index = 0)
485
- # find the pair
486
- index = headers[minimum_index..-1].index(header)
487
- # return the index at the right offset, if we found one
488
- index.nil? ? nil : index + minimum_index
489
- end
490
-
491
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
492
- def header?(name)
493
- headers.include? name
494
- end
495
- alias_method :include?, :header?
496
-
497
- #
498
- # Returns +true+ if +data+ matches a field in this row, and +false+
499
- # otherwise.
500
- #
501
- def field?(data)
502
- fields.include? data
503
- end
504
-
505
- include Enumerable
506
-
507
- #
508
- # Yields each pair of the row as header and field tuples (much like
509
- # iterating over a Hash). This method returns the row for chaining.
510
- #
511
- # If no block is given, an Enumerator is returned.
512
- #
513
- # Support for Enumerable.
514
- #
515
- def each(&block)
516
- block or return enum_for(__method__) { size }
517
-
518
- @row.each(&block)
519
-
520
- self # for chaining
521
- end
522
-
523
- #
524
- # Returns +true+ if this row contains the same headers and fields in the
525
- # same order as +other+.
526
- #
527
- def ==(other)
528
- return @row == other.row if other.is_a? CSV::Row
529
- @row == other
530
- end
531
-
532
- #
533
- # Collapses the row into a simple Hash. Be warned that this discards field
534
- # order and clobbers duplicate fields.
535
- #
536
- def to_hash
537
- @row.to_h
538
- end
539
-
540
- #
541
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
542
- #
543
- # csv_row.fields.to_csv( options )
544
- #
545
- def to_csv(**options)
546
- fields.to_csv(options)
547
- end
548
- alias_method :to_s, :to_csv
549
-
550
- # A summary of fields, by header, in an ASCII compatible String.
551
- def inspect
552
- str = ["#<", self.class.to_s]
553
- each do |header, field|
554
- str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
555
- ":" << field.inspect
556
- end
557
- str << ">"
558
- begin
559
- str.join('')
560
- rescue # any encoding error
561
- str.map do |s|
562
- e = Encoding::Converter.asciicompat_encoding(s.encoding)
563
- e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
564
- end.join('')
565
- end
566
- end
567
- end
568
-
569
- #
570
- # A CSV::Table is a two-dimensional data structure for representing CSV
571
- # documents. Tables allow you to work with the data by row or column,
572
- # manipulate the data, and even convert the results back to CSV, if needed.
573
- #
574
- # All tables returned by CSV will be constructed from this class, if header
575
- # row processing is activated.
576
- #
577
- class Table
578
- #
579
- # Construct a new CSV::Table from +array_of_rows+, which are expected
580
- # to be CSV::Row objects. All rows are assumed to have the same headers.
581
- #
582
- # A CSV::Table object supports the following Array methods through
583
- # delegation:
584
- #
585
- # * empty?()
586
- # * length()
587
- # * size()
588
- #
589
- def initialize(array_of_rows)
590
- @table = array_of_rows
591
- @mode = :col_or_row
592
- end
593
-
594
- # The current access mode for indexing and iteration.
595
- attr_reader :mode
596
-
597
- # Internal data format used to compare equality.
598
- attr_reader :table
599
- protected :table
600
-
601
- ### Array Delegation ###
602
280
 
603
- extend Forwardable
604
- def_delegators :@table, :empty?, :length, :size
605
-
606
- #
607
- # Returns a duplicate table object, in column mode. This is handy for
608
- # chaining in a single call without changing the table mode, but be aware
609
- # that this method can consume a fair amount of memory for bigger data sets.
610
- #
611
- # This method returns the duplicate table for chaining. Don't chain
612
- # destructive methods (like []=()) this way though, since you are working
613
- # with a duplicate.
614
- #
615
- def by_col
616
- self.class.new(@table.dup).by_col!
617
- end
618
-
619
- #
620
- # Switches the mode of this table to column mode. All calls to indexing and
621
- # iteration methods will work with columns until the mode is changed again.
622
- #
623
- # This method returns the table and is safe to chain.
624
- #
625
- def by_col!
626
- @mode = :col
627
-
628
- self
629
- end
630
-
631
- #
632
- # Returns a duplicate table object, in mixed mode. This is handy for
633
- # chaining in a single call without changing the table mode, but be aware
634
- # that this method can consume a fair amount of memory for bigger data sets.
635
- #
636
- # This method returns the duplicate table for chaining. Don't chain
637
- # destructive methods (like []=()) this way though, since you are working
638
- # with a duplicate.
639
- #
640
- def by_col_or_row
641
- self.class.new(@table.dup).by_col_or_row!
642
- end
643
-
644
- #
645
- # Switches the mode of this table to mixed mode. All calls to indexing and
646
- # iteration methods will use the default intelligent indexing system until
647
- # the mode is changed again. In mixed mode an index is assumed to be a row
648
- # reference while anything else is assumed to be column access by headers.
649
- #
650
- # This method returns the table and is safe to chain.
651
- #
652
- def by_col_or_row!
653
- @mode = :col_or_row
654
-
655
- self
656
- end
657
-
658
- #
659
- # Returns a duplicate table object, in row mode. This is handy for chaining
660
- # in a single call without changing the table mode, but be aware that this
661
- # method can consume a fair amount of memory for bigger data sets.
662
- #
663
- # This method returns the duplicate table for chaining. Don't chain
664
- # destructive methods (like []=()) this way though, since you are working
665
- # with a duplicate.
666
- #
667
- def by_row
668
- self.class.new(@table.dup).by_row!
669
- end
670
-
671
- #
672
- # Switches the mode of this table to row mode. All calls to indexing and
673
- # iteration methods will work with rows until the mode is changed again.
674
- #
675
- # This method returns the table and is safe to chain.
676
- #
677
- def by_row!
678
- @mode = :row
679
-
680
- self
681
- end
682
-
683
- #
684
- # Returns the headers for the first row of this table (assumed to match all
685
- # other rows). An empty Array is returned for empty tables.
686
- #
687
- def headers
688
- if @table.empty?
689
- Array.new
690
- else
691
- @table.first.headers
692
- end
693
- end
694
-
695
- #
696
- # In the default mixed mode, this method returns rows for index access and
697
- # columns for header access. You can force the index association by first
698
- # calling by_col!() or by_row!().
699
- #
700
- # Columns are returned as an Array of values. Altering that Array has no
701
- # effect on the table.
702
- #
703
- def [](index_or_header)
704
- if @mode == :row or # by index
705
- (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
706
- @table[index_or_header]
707
- else # by header
708
- @table.map { |row| row[index_or_header] }
709
- end
710
- end
711
-
712
- #
713
- # In the default mixed mode, this method assigns rows for index access and
714
- # columns for header access. You can force the index association by first
715
- # calling by_col!() or by_row!().
716
- #
717
- # Rows may be set to an Array of values (which will inherit the table's
718
- # headers()) or a CSV::Row.
719
- #
720
- # Columns may be set to a single value, which is copied to each row of the
721
- # column, or an Array of values. Arrays of values are assigned to rows top
722
- # to bottom in row major order. Excess values are ignored and if the Array
723
- # does not have a value for each row the extra rows will receive a +nil+.
724
- #
725
- # Assigning to an existing column or row clobbers the data. Assigning to
726
- # new columns creates them at the right end of the table.
727
- #
728
- def []=(index_or_header, value)
729
- if @mode == :row or # by index
730
- (@mode == :col_or_row and index_or_header.is_a? Integer)
731
- if value.is_a? Array
732
- @table[index_or_header] = Row.new(headers, value)
733
- else
734
- @table[index_or_header] = value
735
- end
736
- else # set column
737
- if value.is_a? Array # multiple values
738
- @table.each_with_index do |row, i|
739
- if row.header_row?
740
- row[index_or_header] = index_or_header
741
- else
742
- row[index_or_header] = value[i]
743
- end
744
- end
745
- else # repeated value
746
- @table.each do |row|
747
- if row.header_row?
748
- row[index_or_header] = index_or_header
749
- else
750
- row[index_or_header] = value
751
- end
752
- end
753
- end
754
- end
755
- end
756
-
757
- #
758
- # The mixed mode default is to treat a list of indices as row access,
759
- # returning the rows indicated. Anything else is considered columnar
760
- # access. For columnar access, the return set has an Array for each row
761
- # with the values indicated by the headers in each Array. You can force
762
- # column or row mode using by_col!() or by_row!().
763
- #
764
- # You cannot mix column and row access.
765
- #
766
- def values_at(*indices_or_headers)
767
- if @mode == :row or # by indices
768
- ( @mode == :col_or_row and indices_or_headers.all? do |index|
769
- index.is_a?(Integer) or
770
- ( index.is_a?(Range) and
771
- index.first.is_a?(Integer) and
772
- index.last.is_a?(Integer) )
773
- end )
774
- @table.values_at(*indices_or_headers)
775
- else # by headers
776
- @table.map { |row| row.values_at(*indices_or_headers) }
777
- end
778
- end
779
-
780
- #
781
- # Adds a new row to the bottom end of this table. You can provide an Array,
782
- # which will be converted to a CSV::Row (inheriting the table's headers()),
783
- # or a CSV::Row.
784
- #
785
- # This method returns the table for chaining.
786
- #
787
- def <<(row_or_array)
788
- if row_or_array.is_a? Array # append Array
789
- @table << Row.new(headers, row_or_array)
790
- else # append Row
791
- @table << row_or_array
792
- end
793
-
794
- self # for chaining
795
- end
796
-
797
- #
798
- # A shortcut for appending multiple rows. Equivalent to:
799
- #
800
- # rows.each { |row| self << row }
801
- #
802
- # This method returns the table for chaining.
803
- #
804
- def push(*rows)
805
- rows.each { |row| self << row }
806
-
807
- self # for chaining
808
- end
809
-
810
- #
811
- # Removes and returns the indicated column or row. In the default mixed
812
- # mode indices refer to rows and everything else is assumed to be a column
813
- # header. Use by_col!() or by_row!() to force the lookup.
814
- #
815
- def delete(index_or_header)
816
- if @mode == :row or # by index
817
- (@mode == :col_or_row and index_or_header.is_a? Integer)
818
- @table.delete_at(index_or_header)
819
- else # by header
820
- @table.map { |row| row.delete(index_or_header).last }
821
- end
822
- end
823
-
824
- #
825
- # Removes any column or row for which the block returns +true+. In the
826
- # default mixed mode or row mode, iteration is the standard row major
827
- # walking of rows. In column mode, iteration will +yield+ two element
828
- # tuples containing the column name and an Array of values for that column.
829
- #
830
- # This method returns the table for chaining.
831
- #
832
- # If no block is given, an Enumerator is returned.
833
- #
834
- def delete_if(&block)
835
- block or return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size }
836
-
837
- if @mode == :row or @mode == :col_or_row # by index
838
- @table.delete_if(&block)
839
- else # by header
840
- deleted = []
841
- headers.each do |header|
842
- deleted << delete(header) if block[[header, self[header]]]
843
- end
844
- end
845
-
846
- self # for chaining
847
- end
848
-
849
- include Enumerable
850
-
851
- #
852
- # In the default mixed mode or row mode, iteration is the standard row major
853
- # walking of rows. In column mode, iteration will +yield+ two element
854
- # tuples containing the column name and an Array of values for that column.
855
- #
856
- # This method returns the table for chaining.
857
- #
858
- # If no block is given, an Enumerator is returned.
859
- #
860
- def each(&block)
861
- block or return enum_for(__method__) { @mode == :col ? headers.size : size }
862
-
863
- if @mode == :col
864
- headers.each { |header| block[[header, self[header]]] }
865
- else
866
- @table.each(&block)
867
- end
868
-
869
- self # for chaining
870
- end
871
-
872
- # Returns +true+ if all rows of this table ==() +other+'s rows.
873
- def ==(other)
874
- return @table == other.table if other.is_a? CSV::Table
875
- @table == other
876
- end
877
-
878
- #
879
- # Returns the table as an Array of Arrays. Headers will be the first row,
880
- # then all of the field rows will follow.
881
- #
882
- def to_a
883
- array = [headers]
884
- @table.each do |row|
885
- array.push(row.fields) unless row.header_row?
886
- end
887
- return array
888
- end
889
-
890
- #
891
- # Returns the table as a complete CSV String. Headers will be listed first,
892
- # then all of the field rows.
893
- #
894
- # This method assumes you want the Table.headers(), unless you explicitly
895
- # pass <tt>:write_headers => false</tt>.
896
- #
897
- def to_csv(write_headers: true, **options)
898
- array = write_headers ? [headers.to_csv(options)] : []
899
- @table.each do |row|
900
- array.push(row.fields.to_csv(options)) unless row.header_row?
901
- end
902
- return array.join('')
903
- end
904
- alias_method :to_s, :to_csv
905
-
906
- # Shows the mode and size of this table in a US-ASCII String.
907
- def inspect
908
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
281
+ # The error thrown when the parser encounters illegal CSV formatting.
282
+ class MalformedCSVError < RuntimeError
283
+ attr_reader :line_number
284
+ alias_method :lineno, :line_number
285
+ def initialize(message, line_number)
286
+ @line_number = line_number
287
+ super("#{message} in line #{line_number}.")
909
288
  end
910
289
  end
911
290
 
912
- # The error thrown when the parser encounters illegal CSV formatting.
913
- class MalformedCSVError < RuntimeError; end
914
-
915
291
  #
916
292
  # A FieldInfo Struct contains details about a field's position in the data
917
293
  # source it was read from. CSV will pass this Struct to some blocks that make
@@ -930,7 +306,11 @@ class CSV
930
306
  # A Regexp used to find and convert some common DateTime formats.
931
307
  DateTimeMatcher =
932
308
  / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
933
- \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
309
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} |
310
+ # ISO-8601
311
+ \d{4}-\d{2}-\d{2}
312
+ (?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
313
+ )\z /x
934
314
 
935
315
  # The encoding used by all converters.
936
316
  ConverterEncoding = Encoding.find("UTF-8")
@@ -970,7 +350,7 @@ class CSV
970
350
  date: lambda { |f|
971
351
  begin
972
352
  e = f.encode(ConverterEncoding)
973
- e =~ DateMatcher ? Date.parse(e) : f
353
+ e.match?(DateMatcher) ? Date.parse(e) : f
974
354
  rescue # encoding conversion or date parse errors
975
355
  f
976
356
  end
@@ -978,7 +358,7 @@ class CSV
978
358
  date_time: lambda { |f|
979
359
  begin
980
360
  e = f.encode(ConverterEncoding)
981
- e =~ DateTimeMatcher ? DateTime.parse(e) : f
361
+ e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
982
362
  rescue # encoding conversion or date parse errors
983
363
  f
984
364
  end
@@ -1137,7 +517,7 @@ class CSV
1137
517
  # but transcode it to UTF-8 before CSV parses it.
1138
518
  #
1139
519
  def self.foreach(path, **options, &block)
1140
- return to_enum(__method__, path, options) unless block
520
+ return to_enum(__method__, path, options) unless block_given?
1141
521
  open(path, options) do |csv|
1142
522
  csv.each(&block)
1143
523
  end
@@ -1164,8 +544,8 @@ class CSV
1164
544
  def self.generate(str=nil, **options)
1165
545
  # add a default empty String, if none was given
1166
546
  if str
1167
- io = StringIO.new(str)
1168
- io.seek(0, IO::SEEK_END)
547
+ str = StringIO.new(str)
548
+ str.seek(0, IO::SEEK_END)
1169
549
  else
1170
550
  encoding = options[:encoding]
1171
551
  str = String.new
@@ -1271,7 +651,7 @@ class CSV
1271
651
  begin
1272
652
  f = File.open(filename, mode, file_opts)
1273
653
  rescue ArgumentError => e
1274
- raise unless /needs binmode/ =~ e.message and mode == "r"
654
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
1275
655
  mode = "rb"
1276
656
  file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1277
657
  retry
@@ -1309,14 +689,14 @@ class CSV
1309
689
  #
1310
690
  def self.parse(*args, &block)
1311
691
  csv = new(*args)
1312
- if block.nil? # slurp contents, if no block is given
1313
- begin
1314
- csv.read
1315
- ensure
1316
- csv.close
1317
- end
1318
- else # or pass each row to a provided block
1319
- csv.each(&block)
692
+
693
+ return csv.each(&block) if block_given?
694
+
695
+ # slurp contents, if no block is given
696
+ begin
697
+ csv.read
698
+ ensure
699
+ csv.close
1320
700
  end
1321
701
  end
1322
702
 
@@ -1510,6 +890,8 @@ class CSV
1510
890
  # attempt to parse input not conformant
1511
891
  # with RFC 4180, such as double quotes
1512
892
  # in unquoted fields.
893
+ # <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
894
+ # <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
1513
895
  #
1514
896
  # See CSV::DEFAULT_OPTIONS for the default settings.
1515
897
  #
@@ -1519,20 +901,14 @@ class CSV
1519
901
  def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
1520
902
  converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
1521
903
  write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
1522
- skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil)
904
+ skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
905
+ nil_value: nil,
906
+ empty_value: "")
1523
907
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
1524
908
 
1525
909
  # create the IO object we will read from
1526
910
  @io = data.is_a?(String) ? StringIO.new(data) : data
1527
- # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1528
- internal_encoding = Encoding.find(internal_encoding) if internal_encoding
1529
- external_encoding = Encoding.find(external_encoding) if external_encoding
1530
- if encoding
1531
- encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1532
- encoding = Encoding.find(encoding)
1533
- end
1534
- @encoding = raw_encoding(nil) || internal_encoding || encoding ||
1535
- Encoding.default_internal || Encoding.default_external
911
+ @encoding = determine_encoding(encoding, internal_encoding)
1536
912
  #
1537
913
  # prepare for building safe regular expressions in the target encoding,
1538
914
  # if we can transcode the needed characters
@@ -1549,6 +925,10 @@ class CSV
1549
925
  # headers must be delayed until shift(), in case they need a row of content
1550
926
  @headers = nil
1551
927
 
928
+ @nil_value = nil_value
929
+ @empty_value = empty_value
930
+ @empty_value_is_empty_string = (empty_value == "")
931
+
1552
932
  init_separators(col_sep, row_sep, quote_char, force_quotes)
1553
933
  init_parsers(skip_blanks, field_size_limit, liberal_parsing)
1554
934
  init_converters(converters, :@converters, :convert)
@@ -1830,7 +1210,15 @@ class CSV
1830
1210
  @line = parse.clone
1831
1211
  end
1832
1212
 
1833
- parse.sub!(@parsers[:line_end], "")
1213
+ begin
1214
+ parse.sub!(@parsers[:line_end], "")
1215
+ rescue ArgumentError
1216
+ unless parse.valid_encoding?
1217
+ message = "Invalid byte sequence in #{parse.encoding}"
1218
+ raise MalformedCSVError.new(message, lineno + 1)
1219
+ end
1220
+ raise
1221
+ end
1834
1222
 
1835
1223
  if csv.empty?
1836
1224
  #
@@ -1844,7 +1232,7 @@ class CSV
1844
1232
  elsif @unconverted_fields
1845
1233
  return add_unconverted_fields(Array.new, Array.new)
1846
1234
  elsif @use_headers
1847
- return self.class::Row.new(Array.new, Array.new)
1235
+ return self.class::Row.new(@headers, Array.new)
1848
1236
  else
1849
1237
  return Array.new
1850
1238
  end
@@ -1853,7 +1241,7 @@ class CSV
1853
1241
 
1854
1242
  next if @skip_lines and @skip_lines.match parse
1855
1243
 
1856
- parts = parse.split(@col_sep, -1)
1244
+ parts = parse.split(@col_sep_split_separator, -1)
1857
1245
  if parts.empty?
1858
1246
  if in_extended_col
1859
1247
  csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
@@ -1870,9 +1258,9 @@ class CSV
1870
1258
  if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
1871
1259
  # extended column ends
1872
1260
  csv.last << part[0..-2]
1873
- if csv.last =~ @parsers[:stray_quote]
1874
- raise MalformedCSVError,
1875
- "Missing or stray quote in line #{lineno + 1}"
1261
+ if csv.last.match?(@parsers[:stray_quote])
1262
+ raise MalformedCSVError.new("Missing or stray quote",
1263
+ lineno + 1)
1876
1264
  end
1877
1265
  csv.last.gsub!(@double_quote_char, @quote_char)
1878
1266
  in_extended_col = false
@@ -1888,27 +1276,27 @@ class CSV
1888
1276
  elsif part.end_with?(@quote_char)
1889
1277
  # regular quoted column
1890
1278
  csv << part[1..-2]
1891
- if csv.last =~ @parsers[:stray_quote]
1892
- raise MalformedCSVError,
1893
- "Missing or stray quote in line #{lineno + 1}"
1279
+ if csv.last.match?(@parsers[:stray_quote])
1280
+ raise MalformedCSVError.new("Missing or stray quote",
1281
+ lineno + 1)
1894
1282
  end
1895
1283
  csv.last.gsub!(@double_quote_char, @quote_char)
1896
1284
  elsif @liberal_parsing
1897
1285
  csv << part
1898
1286
  else
1899
- raise MalformedCSVError,
1900
- "Missing or stray quote in line #{lineno + 1}"
1287
+ raise MalformedCSVError.new("Missing or stray quote",
1288
+ lineno + 1)
1901
1289
  end
1902
- elsif part =~ @parsers[:quote_or_nl]
1290
+ elsif part.match?(@parsers[:quote_or_nl])
1903
1291
  # Unquoted field with bad characters.
1904
- if part =~ @parsers[:nl_or_lf]
1905
- raise MalformedCSVError, "Unquoted fields do not allow " +
1906
- "\\r or \\n (line #{lineno + 1})."
1292
+ if part.match?(@parsers[:nl_or_lf])
1293
+ message = "Unquoted fields do not allow \\r or \\n"
1294
+ raise MalformedCSVError.new(message, lineno + 1)
1907
1295
  else
1908
1296
  if @liberal_parsing
1909
1297
  csv << part
1910
1298
  else
1911
- raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1299
+ raise MalformedCSVError.new("Illegal quoting", lineno + 1)
1912
1300
  end
1913
1301
  end
1914
1302
  else
@@ -1924,10 +1312,11 @@ class CSV
1924
1312
  if in_extended_col
1925
1313
  # if we're at eof?(), a quoted field wasn't closed...
1926
1314
  if @io.eof?
1927
- raise MalformedCSVError,
1928
- "Unclosed quoted field on line #{lineno + 1}."
1315
+ raise MalformedCSVError.new("Unclosed quoted field",
1316
+ lineno + 1)
1929
1317
  elsif @field_size_limit and csv.last.size >= @field_size_limit
1930
- raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1318
+ raise MalformedCSVError.new("Field size exceeded",
1319
+ lineno + 1)
1931
1320
  end
1932
1321
  # otherwise, we need to loop and pull some more data to complete the row
1933
1322
  else
@@ -1936,10 +1325,13 @@ class CSV
1936
1325
  # save fields unconverted fields, if needed...
1937
1326
  unconverted = csv.dup if @unconverted_fields
1938
1327
 
1939
- # convert fields, if needed...
1940
- csv = convert_fields(csv) unless @use_headers or @converters.empty?
1941
- # parse out header rows and handle CSV::Row conversions...
1942
- csv = parse_headers(csv) if @use_headers
1328
+ if @use_headers
1329
+ # parse out header rows and handle CSV::Row conversions...
1330
+ csv = parse_headers(csv)
1331
+ else
1332
+ # convert fields, if needed...
1333
+ csv = convert_fields(csv)
1334
+ end
1943
1335
 
1944
1336
  # inject unconverted fields and accessor, if requested...
1945
1337
  if @unconverted_fields and not csv.respond_to? :unconverted_fields
@@ -1995,6 +1387,21 @@ class CSV
1995
1387
 
1996
1388
  private
1997
1389
 
1390
+ def determine_encoding(encoding, internal_encoding)
1391
+ # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1392
+ io_encoding = raw_encoding(nil)
1393
+ return io_encoding if io_encoding
1394
+
1395
+ return Encoding.find(internal_encoding) if internal_encoding
1396
+
1397
+ if encoding
1398
+ encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1399
+ return Encoding.find(encoding)
1400
+ end
1401
+
1402
+ Encoding.default_internal || Encoding.default_external
1403
+ end
1404
+
1998
1405
  #
1999
1406
  # Stores the indicated separators for later use.
2000
1407
  #
@@ -2008,6 +1415,11 @@ class CSV
2008
1415
  def init_separators(col_sep, row_sep, quote_char, force_quotes)
2009
1416
  # store the selected separators
2010
1417
  @col_sep = col_sep.to_s.encode(@encoding)
1418
+ if @col_sep == " "
1419
+ @col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
1420
+ else
1421
+ @col_sep_split_separator = @col_sep
1422
+ end
2011
1423
  @row_sep = row_sep # encode after resolving :auto
2012
1424
  @quote_char = quote_char.to_s.encode(@encoding)
2013
1425
  @double_quote_char = @quote_char * 2
@@ -2037,15 +1449,28 @@ class CSV
2037
1449
  # (ensure will set default value)
2038
1450
  #
2039
1451
  break unless sample = @io.gets(nil, 1024)
1452
+
1453
+ cr = encode_str("\r")
1454
+ lf = encode_str("\n")
2040
1455
  # extend sample if we're unsure of the line ending
2041
- if sample.end_with? encode_str("\r")
1456
+ if sample.end_with?(cr)
2042
1457
  sample << (@io.gets(nil, 1) || "")
2043
1458
  end
2044
1459
 
2045
1460
  # try to find a standard separator
2046
- if sample =~ encode_re("\r\n?|\n")
2047
- @row_sep = $&
2048
- break
1461
+ sample.each_char.each_cons(2) do |char, next_char|
1462
+ case char
1463
+ when cr
1464
+ if next_char == lf
1465
+ @row_sep = encode_str("\r\n")
1466
+ else
1467
+ @row_sep = cr
1468
+ end
1469
+ break
1470
+ when lf
1471
+ @row_sep = lf
1472
+ break
1473
+ end
2049
1474
  end
2050
1475
  end
2051
1476
 
@@ -2199,10 +1624,24 @@ class CSV
2199
1624
  # shortcut.
2200
1625
  #
2201
1626
  def convert_fields(fields, headers = false)
2202
- # see if we are converting headers or fields
2203
- converters = headers ? @header_converters : @converters
1627
+ if headers
1628
+ converters = @header_converters
1629
+ else
1630
+ converters = @converters
1631
+ if !@use_headers and
1632
+ converters.empty? and
1633
+ @nil_value.nil? and
1634
+ @empty_value_is_empty_string
1635
+ return fields
1636
+ end
1637
+ end
2204
1638
 
2205
1639
  fields.map.with_index do |field, index|
1640
+ if field.nil?
1641
+ field = @nil_value
1642
+ elsif field.empty?
1643
+ field = @empty_value unless @empty_value_is_empty_string
1644
+ end
2206
1645
  converters.each do |converter|
2207
1646
  break if headers && field.nil?
2208
1647
  field = if converter.arity == 1 # straight field converter
@@ -2334,22 +1773,6 @@ def CSV(*args, &block)
2334
1773
  CSV.instance(*args, &block)
2335
1774
  end
2336
1775
 
2337
- class Array # :nodoc:
2338
- # Equivalent to CSV::generate_line(self, options)
2339
- #
2340
- # ["CSV", "data"].to_csv
2341
- # #=> "CSV,data\n"
2342
- def to_csv(**options)
2343
- CSV.generate_line(self, options)
2344
- end
2345
- end
2346
-
2347
- class String # :nodoc:
2348
- # Equivalent to CSV::parse_line(self, options)
2349
- #
2350
- # "CSV,data".parse_csv
2351
- # #=> ["CSV", "data"]
2352
- def parse_csv(**options)
2353
- CSV.parse_line(self, options)
2354
- end
2355
- end
1776
+ require_relative "csv/version"
1777
+ require_relative "csv/core_ext/array"
1778
+ require_relative "csv/core_ext/string"