csv 0.1.0 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/csv.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # frozen_string_literal: true
3
3
  # = csv.rb -- CSV Reading and Writing
4
4
  #
5
- # Created by James Edward Gray II on 2005-10-31.
6
- # Copyright 2005 James Edward Gray II. You can redistribute or modify this code
7
- # under the terms of Ruby's license.
5
+ # Created by James Edward Gray II on 2005-10-31.
8
6
  #
9
7
  # See CSV for documentation.
10
8
  #
@@ -95,74 +93,146 @@ require "forwardable"
95
93
  require "English"
96
94
  require "date"
97
95
  require "stringio"
96
+ require_relative "csv/table"
97
+ require_relative "csv/row"
98
+
99
+ # This provides String#match? and Regexp#match? for Ruby 2.3.
100
+ unless String.method_defined?(:match?)
101
+ class CSV
102
+ module MatchP
103
+ refine String do
104
+ def match?(pattern)
105
+ self =~ pattern
106
+ end
107
+ end
108
+
109
+ refine Regexp do
110
+ def match?(string)
111
+ self =~ string
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ using CSV::MatchP
118
+ end
98
119
 
99
120
  #
100
121
  # This class provides a complete interface to CSV files and data. It offers
101
122
  # tools to enable you to read and write to and from Strings or IO objects, as
102
123
  # needed.
103
124
  #
104
- # == Reading
125
+ # The most generic interface of a class is:
105
126
  #
106
- # === From a File
127
+ # csv = CSV.new(string_or_io, **options)
107
128
  #
108
- # ==== A Line at a Time
129
+ # # Reading: IO object should be open for read
130
+ # csv.read # => array of rows
131
+ # # or
132
+ # csv.each do |row|
133
+ # # ...
134
+ # end
135
+ # # or
136
+ # row = csv.shift
109
137
  #
110
- # CSV.foreach("path/to/file.csv") do |row|
111
- # # use row here...
112
- # end
138
+ # # Writing: IO object should be open for write
139
+ # csv << row
113
140
  #
114
- # ==== All at Once
141
+ # There are several specialized class methods for one-statement reading or writing,
142
+ # described in the Specialized Methods section.
115
143
  #
116
- # arr_of_arrs = CSV.read("path/to/file.csv")
144
+ # If a String passed into ::new, it is internally wrapped into a StringIO object.
117
145
  #
118
- # === From a String
146
+ # +options+ can be used for specifying the particular CSV flavor (column
147
+ # separators, row separators, value quoting and so on), and for data conversion,
148
+ # see Data Conversion section for the description of the latter.
119
149
  #
120
- # ==== A Line at a Time
150
+ # == Specialized Methods
121
151
  #
122
- # CSV.parse("CSV,data,String") do |row|
123
- # # use row here...
124
- # end
152
+ # === Reading
125
153
  #
126
- # ==== All at Once
127
- #
128
- # arr_of_arrs = CSV.parse("CSV,data,String")
154
+ # # From a file: all at once
155
+ # arr_of_rows = CSV.read("path/to/file.csv", **options)
156
+ # # iterator-style:
157
+ # CSV.foreach("path/to/file.csv", **options) do |row|
158
+ # # ...
159
+ # end
129
160
  #
130
- # == Writing
161
+ # # From a string
162
+ # arr_of_rows = CSV.parse("CSV,data,String", **options)
163
+ # # or
164
+ # CSV.parse("CSV,data,String", **options) do |row|
165
+ # # ...
166
+ # end
131
167
  #
132
- # === To a File
168
+ # === Writing
133
169
  #
170
+ # # To a file
134
171
  # CSV.open("path/to/file.csv", "wb") do |csv|
135
172
  # csv << ["row", "of", "CSV", "data"]
136
173
  # csv << ["another", "row"]
137
174
  # # ...
138
175
  # end
139
176
  #
140
- # === To a String
141
- #
177
+ # # To a String
142
178
  # csv_string = CSV.generate do |csv|
143
179
  # csv << ["row", "of", "CSV", "data"]
144
180
  # csv << ["another", "row"]
145
181
  # # ...
146
182
  # end
147
183
  #
148
- # == Convert a Single Line
184
+ # === Shortcuts
149
185
  #
186
+ # # Core extensions for converting one line
150
187
  # csv_string = ["CSV", "data"].to_csv # to CSV
151
188
  # csv_array = "CSV,String".parse_csv # from CSV
152
189
  #
153
- # == Shortcut Interface
154
- #
190
+ # # CSV() method
155
191
  # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
156
192
  # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
157
193
  # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
158
194
  # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
159
195
  #
160
- # == Advanced Usage
196
+ # == Data Conversion
197
+ #
198
+ # === CSV with headers
199
+ #
200
+ # CSV allows to specify column names of CSV file, whether they are in data, or
201
+ # provided separately. If headers specified, reading methods return an instance
202
+ # of CSV::Table, consisting of CSV::Row.
203
+ #
204
+ # # Headers are part of data
205
+ # data = CSV.parse(<<~ROWS, headers: true)
206
+ # Name,Department,Salary
207
+ # Bob,Engeneering,1000
208
+ # Jane,Sales,2000
209
+ # John,Management,5000
210
+ # ROWS
161
211
  #
162
- # === Wrap an IO Object
212
+ # data.class #=> CSV::Table
213
+ # data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engeneering" "Salary":"1000">
214
+ # data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engeneering", "Salary"=>"1000"}
163
215
  #
164
- # csv = CSV.new(io, options)
165
- # # ... read (with gets() or each()) from and write (with <<) to csv here ...
216
+ # # Headers provided by developer
217
+ # data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
218
+ # data.first #=> #<CSV::Row name:"Bob" department:"Engeneering" salary:"1000">
219
+ #
220
+ # === Typed data reading
221
+ #
222
+ # CSV allows to provide a set of data _converters_ e.g. transformations to try on input
223
+ # data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
224
+ #
225
+ # # Without any converters:
226
+ # CSV.parse('Bob,2018-03-01,100')
227
+ # #=> [["Bob", "2018-03-01", "100"]]
228
+ #
229
+ # # With built-in converters:
230
+ # CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
231
+ # #=> [["Bob", #<Date: 2018-03-01>, 100]]
232
+ #
233
+ # # With custom converters:
234
+ # CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
235
+ # #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
166
236
  #
167
237
  # == CSV and Character Encodings (M17n or Multilingualization)
168
238
  #
@@ -207,711 +277,17 @@ require "stringio"
207
277
  # find with it.
208
278
  #
209
279
  class CSV
210
- # The version of the installed library.
211
- VERSION = "2.4.8"
212
-
213
- #
214
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
215
- # and allows duplicates just as an Array would, but also allows you to access
216
- # fields by name just as you could if they were in a Hash.
217
- #
218
- # All rows returned by CSV will be constructed from this class, if header row
219
- # processing is activated.
220
- #
221
- class Row
222
- #
223
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
224
- # to be Arrays. If one Array is shorter than the other, it will be padded
225
- # with +nil+ objects.
226
- #
227
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
228
- # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
229
- # row. Otherwise, the row is assumes to be a field row.
230
- #
231
- # A CSV::Row object supports the following Array methods through delegation:
232
- #
233
- # * empty?()
234
- # * length()
235
- # * size()
236
- #
237
- def initialize(headers, fields, header_row = false)
238
- @header_row = header_row
239
- headers.each { |h| h.freeze if h.is_a? String }
240
-
241
- # handle extra headers or fields
242
- @row = if headers.size >= fields.size
243
- headers.zip(fields)
244
- else
245
- fields.zip(headers).each(&:reverse!)
246
- end
247
- end
248
-
249
- # Internal data format used to compare equality.
250
- attr_reader :row
251
- protected :row
252
-
253
- ### Array Delegation ###
254
-
255
- extend Forwardable
256
- def_delegators :@row, :empty?, :length, :size
257
-
258
- # Returns +true+ if this is a header row.
259
- def header_row?
260
- @header_row
261
- end
262
-
263
- # Returns +true+ if this is a field row.
264
- def field_row?
265
- not header_row?
266
- end
267
-
268
- # Returns the headers of this row.
269
- def headers
270
- @row.map(&:first)
271
- end
272
-
273
- #
274
- # :call-seq:
275
- # field( header )
276
- # field( header, offset )
277
- # field( index )
278
- #
279
- # This method will return the field value by +header+ or +index+. If a field
280
- # is not found, +nil+ is returned.
281
- #
282
- # When provided, +offset+ ensures that a header match occurs on or later
283
- # than the +offset+ index. You can use this to find duplicate headers,
284
- # without resorting to hard-coding exact indices.
285
- #
286
- def field(header_or_index, minimum_index = 0)
287
- # locate the pair
288
- finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
289
- pair = @row[minimum_index..-1].send(finder, header_or_index)
290
-
291
- # return the field if we have a pair
292
- if pair.nil?
293
- nil
294
- else
295
- header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
296
- end
297
- end
298
- alias_method :[], :field
299
-
300
- #
301
- # :call-seq:
302
- # fetch( header )
303
- # fetch( header ) { |row| ... }
304
- # fetch( header, default )
305
- #
306
- # This method will fetch the field value by +header+. It has the same
307
- # behavior as Hash#fetch: if there is a field with the given +header+, its
308
- # value is returned. Otherwise, if a block is given, it is yielded the
309
- # +header+ and its result is returned; if a +default+ is given as the
310
- # second argument, it is returned; otherwise a KeyError is raised.
311
- #
312
- def fetch(header, *varargs)
313
- raise ArgumentError, "Too many arguments" if varargs.length > 1
314
- pair = @row.assoc(header)
315
- if pair
316
- pair.last
317
- else
318
- if block_given?
319
- yield header
320
- elsif varargs.empty?
321
- raise KeyError, "key not found: #{header}"
322
- else
323
- varargs.first
324
- end
325
- end
326
- end
327
-
328
- # Returns +true+ if there is a field with the given +header+.
329
- def has_key?(header)
330
- !!@row.assoc(header)
331
- end
332
- alias_method :include?, :has_key?
333
- alias_method :key?, :has_key?
334
- alias_method :member?, :has_key?
335
-
336
- #
337
- # :call-seq:
338
- # []=( header, value )
339
- # []=( header, offset, value )
340
- # []=( index, value )
341
- #
342
- # Looks up the field by the semantics described in CSV::Row.field() and
343
- # assigns the +value+.
344
- #
345
- # Assigning past the end of the row with an index will set all pairs between
346
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
347
- # pair.
348
- #
349
- def []=(*args)
350
- value = args.pop
351
-
352
- if args.first.is_a? Integer
353
- if @row[args.first].nil? # extending past the end with index
354
- @row[args.first] = [nil, value]
355
- @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
356
- else # normal index assignment
357
- @row[args.first][1] = value
358
- end
359
- else
360
- index = index(*args)
361
- if index.nil? # appending a field
362
- self << [args.first, value]
363
- else # normal header assignment
364
- @row[index][1] = value
365
- end
366
- end
367
- end
368
-
369
- #
370
- # :call-seq:
371
- # <<( field )
372
- # <<( header_and_field_array )
373
- # <<( header_and_field_hash )
374
- #
375
- # If a two-element Array is provided, it is assumed to be a header and field
376
- # and the pair is appended. A Hash works the same way with the key being
377
- # the header and the value being the field. Anything else is assumed to be
378
- # a lone field which is appended with a +nil+ header.
379
- #
380
- # This method returns the row for chaining.
381
- #
382
- def <<(arg)
383
- if arg.is_a?(Array) and arg.size == 2 # appending a header and name
384
- @row << arg
385
- elsif arg.is_a?(Hash) # append header and name pairs
386
- arg.each { |pair| @row << pair }
387
- else # append field value
388
- @row << [nil, arg]
389
- end
390
-
391
- self # for chaining
392
- end
393
-
394
- #
395
- # A shortcut for appending multiple fields. Equivalent to:
396
- #
397
- # args.each { |arg| csv_row << arg }
398
- #
399
- # This method returns the row for chaining.
400
- #
401
- def push(*args)
402
- args.each { |arg| self << arg }
403
-
404
- self # for chaining
405
- end
406
-
407
- #
408
- # :call-seq:
409
- # delete( header )
410
- # delete( header, offset )
411
- # delete( index )
412
- #
413
- # Used to remove a pair from the row by +header+ or +index+. The pair is
414
- # located as described in CSV::Row.field(). The deleted pair is returned,
415
- # or +nil+ if a pair could not be found.
416
- #
417
- def delete(header_or_index, minimum_index = 0)
418
- if header_or_index.is_a? Integer # by index
419
- @row.delete_at(header_or_index)
420
- elsif i = index(header_or_index, minimum_index) # by header
421
- @row.delete_at(i)
422
- else
423
- [ ]
424
- end
425
- end
426
-
427
- #
428
- # The provided +block+ is passed a header and field for each pair in the row
429
- # and expected to return +true+ or +false+, depending on whether the pair
430
- # should be deleted.
431
- #
432
- # This method returns the row for chaining.
433
- #
434
- # If no block is given, an Enumerator is returned.
435
- #
436
- def delete_if(&block)
437
- block or return enum_for(__method__) { size }
438
-
439
- @row.delete_if(&block)
440
-
441
- self # for chaining
442
- end
443
-
444
- #
445
- # This method accepts any number of arguments which can be headers, indices,
446
- # Ranges of either, or two-element Arrays containing a header and offset.
447
- # Each argument will be replaced with a field lookup as described in
448
- # CSV::Row.field().
449
- #
450
- # If called with no arguments, all fields are returned.
451
- #
452
- def fields(*headers_and_or_indices)
453
- if headers_and_or_indices.empty? # return all fields--no arguments
454
- @row.map(&:last)
455
- else # or work like values_at()
456
- all = []
457
- headers_and_or_indices.each do |h_or_i|
458
- if h_or_i.is_a? Range
459
- index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
460
- index(h_or_i.begin)
461
- index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
462
- index(h_or_i.end)
463
- new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
464
- (index_begin..index_end)
465
- all.concat(fields.values_at(new_range))
466
- else
467
- all << field(*Array(h_or_i))
468
- end
469
- end
470
- return all
471
- end
472
- end
473
- alias_method :values_at, :fields
474
-
475
- #
476
- # :call-seq:
477
- # index( header )
478
- # index( header, offset )
479
- #
480
- # This method will return the index of a field with the provided +header+.
481
- # The +offset+ can be used to locate duplicate header names, as described in
482
- # CSV::Row.field().
483
- #
484
- def index(header, minimum_index = 0)
485
- # find the pair
486
- index = headers[minimum_index..-1].index(header)
487
- # return the index at the right offset, if we found one
488
- index.nil? ? nil : index + minimum_index
489
- end
490
-
491
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
492
- def header?(name)
493
- headers.include? name
494
- end
495
- alias_method :include?, :header?
496
-
497
- #
498
- # Returns +true+ if +data+ matches a field in this row, and +false+
499
- # otherwise.
500
- #
501
- def field?(data)
502
- fields.include? data
503
- end
504
-
505
- include Enumerable
506
-
507
- #
508
- # Yields each pair of the row as header and field tuples (much like
509
- # iterating over a Hash). This method returns the row for chaining.
510
- #
511
- # If no block is given, an Enumerator is returned.
512
- #
513
- # Support for Enumerable.
514
- #
515
- def each(&block)
516
- block or return enum_for(__method__) { size }
517
-
518
- @row.each(&block)
519
-
520
- self # for chaining
521
- end
522
-
523
- #
524
- # Returns +true+ if this row contains the same headers and fields in the
525
- # same order as +other+.
526
- #
527
- def ==(other)
528
- return @row == other.row if other.is_a? CSV::Row
529
- @row == other
530
- end
531
-
532
- #
533
- # Collapses the row into a simple Hash. Be warned that this discards field
534
- # order and clobbers duplicate fields.
535
- #
536
- def to_hash
537
- @row.to_h
538
- end
539
-
540
- #
541
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
542
- #
543
- # csv_row.fields.to_csv( options )
544
- #
545
- def to_csv(**options)
546
- fields.to_csv(options)
547
- end
548
- alias_method :to_s, :to_csv
549
-
550
- # A summary of fields, by header, in an ASCII compatible String.
551
- def inspect
552
- str = ["#<", self.class.to_s]
553
- each do |header, field|
554
- str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
555
- ":" << field.inspect
556
- end
557
- str << ">"
558
- begin
559
- str.join('')
560
- rescue # any encoding error
561
- str.map do |s|
562
- e = Encoding::Converter.asciicompat_encoding(s.encoding)
563
- e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
564
- end.join('')
565
- end
566
- end
567
- end
568
-
569
- #
570
- # A CSV::Table is a two-dimensional data structure for representing CSV
571
- # documents. Tables allow you to work with the data by row or column,
572
- # manipulate the data, and even convert the results back to CSV, if needed.
573
- #
574
- # All tables returned by CSV will be constructed from this class, if header
575
- # row processing is activated.
576
- #
577
- class Table
578
- #
579
- # Construct a new CSV::Table from +array_of_rows+, which are expected
580
- # to be CSV::Row objects. All rows are assumed to have the same headers.
581
- #
582
- # A CSV::Table object supports the following Array methods through
583
- # delegation:
584
- #
585
- # * empty?()
586
- # * length()
587
- # * size()
588
- #
589
- def initialize(array_of_rows)
590
- @table = array_of_rows
591
- @mode = :col_or_row
592
- end
593
-
594
- # The current access mode for indexing and iteration.
595
- attr_reader :mode
596
-
597
- # Internal data format used to compare equality.
598
- attr_reader :table
599
- protected :table
600
-
601
- ### Array Delegation ###
602
280
 
603
- extend Forwardable
604
- def_delegators :@table, :empty?, :length, :size
605
-
606
- #
607
- # Returns a duplicate table object, in column mode. This is handy for
608
- # chaining in a single call without changing the table mode, but be aware
609
- # that this method can consume a fair amount of memory for bigger data sets.
610
- #
611
- # This method returns the duplicate table for chaining. Don't chain
612
- # destructive methods (like []=()) this way though, since you are working
613
- # with a duplicate.
614
- #
615
- def by_col
616
- self.class.new(@table.dup).by_col!
617
- end
618
-
619
- #
620
- # Switches the mode of this table to column mode. All calls to indexing and
621
- # iteration methods will work with columns until the mode is changed again.
622
- #
623
- # This method returns the table and is safe to chain.
624
- #
625
- def by_col!
626
- @mode = :col
627
-
628
- self
629
- end
630
-
631
- #
632
- # Returns a duplicate table object, in mixed mode. This is handy for
633
- # chaining in a single call without changing the table mode, but be aware
634
- # that this method can consume a fair amount of memory for bigger data sets.
635
- #
636
- # This method returns the duplicate table for chaining. Don't chain
637
- # destructive methods (like []=()) this way though, since you are working
638
- # with a duplicate.
639
- #
640
- def by_col_or_row
641
- self.class.new(@table.dup).by_col_or_row!
642
- end
643
-
644
- #
645
- # Switches the mode of this table to mixed mode. All calls to indexing and
646
- # iteration methods will use the default intelligent indexing system until
647
- # the mode is changed again. In mixed mode an index is assumed to be a row
648
- # reference while anything else is assumed to be column access by headers.
649
- #
650
- # This method returns the table and is safe to chain.
651
- #
652
- def by_col_or_row!
653
- @mode = :col_or_row
654
-
655
- self
656
- end
657
-
658
- #
659
- # Returns a duplicate table object, in row mode. This is handy for chaining
660
- # in a single call without changing the table mode, but be aware that this
661
- # method can consume a fair amount of memory for bigger data sets.
662
- #
663
- # This method returns the duplicate table for chaining. Don't chain
664
- # destructive methods (like []=()) this way though, since you are working
665
- # with a duplicate.
666
- #
667
- def by_row
668
- self.class.new(@table.dup).by_row!
669
- end
670
-
671
- #
672
- # Switches the mode of this table to row mode. All calls to indexing and
673
- # iteration methods will work with rows until the mode is changed again.
674
- #
675
- # This method returns the table and is safe to chain.
676
- #
677
- def by_row!
678
- @mode = :row
679
-
680
- self
681
- end
682
-
683
- #
684
- # Returns the headers for the first row of this table (assumed to match all
685
- # other rows). An empty Array is returned for empty tables.
686
- #
687
- def headers
688
- if @table.empty?
689
- Array.new
690
- else
691
- @table.first.headers
692
- end
693
- end
694
-
695
- #
696
- # In the default mixed mode, this method returns rows for index access and
697
- # columns for header access. You can force the index association by first
698
- # calling by_col!() or by_row!().
699
- #
700
- # Columns are returned as an Array of values. Altering that Array has no
701
- # effect on the table.
702
- #
703
- def [](index_or_header)
704
- if @mode == :row or # by index
705
- (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
706
- @table[index_or_header]
707
- else # by header
708
- @table.map { |row| row[index_or_header] }
709
- end
710
- end
711
-
712
- #
713
- # In the default mixed mode, this method assigns rows for index access and
714
- # columns for header access. You can force the index association by first
715
- # calling by_col!() or by_row!().
716
- #
717
- # Rows may be set to an Array of values (which will inherit the table's
718
- # headers()) or a CSV::Row.
719
- #
720
- # Columns may be set to a single value, which is copied to each row of the
721
- # column, or an Array of values. Arrays of values are assigned to rows top
722
- # to bottom in row major order. Excess values are ignored and if the Array
723
- # does not have a value for each row the extra rows will receive a +nil+.
724
- #
725
- # Assigning to an existing column or row clobbers the data. Assigning to
726
- # new columns creates them at the right end of the table.
727
- #
728
- def []=(index_or_header, value)
729
- if @mode == :row or # by index
730
- (@mode == :col_or_row and index_or_header.is_a? Integer)
731
- if value.is_a? Array
732
- @table[index_or_header] = Row.new(headers, value)
733
- else
734
- @table[index_or_header] = value
735
- end
736
- else # set column
737
- if value.is_a? Array # multiple values
738
- @table.each_with_index do |row, i|
739
- if row.header_row?
740
- row[index_or_header] = index_or_header
741
- else
742
- row[index_or_header] = value[i]
743
- end
744
- end
745
- else # repeated value
746
- @table.each do |row|
747
- if row.header_row?
748
- row[index_or_header] = index_or_header
749
- else
750
- row[index_or_header] = value
751
- end
752
- end
753
- end
754
- end
755
- end
756
-
757
- #
758
- # The mixed mode default is to treat a list of indices as row access,
759
- # returning the rows indicated. Anything else is considered columnar
760
- # access. For columnar access, the return set has an Array for each row
761
- # with the values indicated by the headers in each Array. You can force
762
- # column or row mode using by_col!() or by_row!().
763
- #
764
- # You cannot mix column and row access.
765
- #
766
- def values_at(*indices_or_headers)
767
- if @mode == :row or # by indices
768
- ( @mode == :col_or_row and indices_or_headers.all? do |index|
769
- index.is_a?(Integer) or
770
- ( index.is_a?(Range) and
771
- index.first.is_a?(Integer) and
772
- index.last.is_a?(Integer) )
773
- end )
774
- @table.values_at(*indices_or_headers)
775
- else # by headers
776
- @table.map { |row| row.values_at(*indices_or_headers) }
777
- end
778
- end
779
-
780
- #
781
- # Adds a new row to the bottom end of this table. You can provide an Array,
782
- # which will be converted to a CSV::Row (inheriting the table's headers()),
783
- # or a CSV::Row.
784
- #
785
- # This method returns the table for chaining.
786
- #
787
- def <<(row_or_array)
788
- if row_or_array.is_a? Array # append Array
789
- @table << Row.new(headers, row_or_array)
790
- else # append Row
791
- @table << row_or_array
792
- end
793
-
794
- self # for chaining
795
- end
796
-
797
- #
798
- # A shortcut for appending multiple rows. Equivalent to:
799
- #
800
- # rows.each { |row| self << row }
801
- #
802
- # This method returns the table for chaining.
803
- #
804
- def push(*rows)
805
- rows.each { |row| self << row }
806
-
807
- self # for chaining
808
- end
809
-
810
- #
811
- # Removes and returns the indicated column or row. In the default mixed
812
- # mode indices refer to rows and everything else is assumed to be a column
813
- # header. Use by_col!() or by_row!() to force the lookup.
814
- #
815
- def delete(index_or_header)
816
- if @mode == :row or # by index
817
- (@mode == :col_or_row and index_or_header.is_a? Integer)
818
- @table.delete_at(index_or_header)
819
- else # by header
820
- @table.map { |row| row.delete(index_or_header).last }
821
- end
822
- end
823
-
824
- #
825
- # Removes any column or row for which the block returns +true+. In the
826
- # default mixed mode or row mode, iteration is the standard row major
827
- # walking of rows. In column mode, iteration will +yield+ two element
828
- # tuples containing the column name and an Array of values for that column.
829
- #
830
- # This method returns the table for chaining.
831
- #
832
- # If no block is given, an Enumerator is returned.
833
- #
834
- def delete_if(&block)
835
- block or return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size }
836
-
837
- if @mode == :row or @mode == :col_or_row # by index
838
- @table.delete_if(&block)
839
- else # by header
840
- deleted = []
841
- headers.each do |header|
842
- deleted << delete(header) if block[[header, self[header]]]
843
- end
844
- end
845
-
846
- self # for chaining
847
- end
848
-
849
- include Enumerable
850
-
851
- #
852
- # In the default mixed mode or row mode, iteration is the standard row major
853
- # walking of rows. In column mode, iteration will +yield+ two element
854
- # tuples containing the column name and an Array of values for that column.
855
- #
856
- # This method returns the table for chaining.
857
- #
858
- # If no block is given, an Enumerator is returned.
859
- #
860
- def each(&block)
861
- block or return enum_for(__method__) { @mode == :col ? headers.size : size }
862
-
863
- if @mode == :col
864
- headers.each { |header| block[[header, self[header]]] }
865
- else
866
- @table.each(&block)
867
- end
868
-
869
- self # for chaining
870
- end
871
-
872
- # Returns +true+ if all rows of this table ==() +other+'s rows.
873
- def ==(other)
874
- return @table == other.table if other.is_a? CSV::Table
875
- @table == other
876
- end
877
-
878
- #
879
- # Returns the table as an Array of Arrays. Headers will be the first row,
880
- # then all of the field rows will follow.
881
- #
882
- def to_a
883
- array = [headers]
884
- @table.each do |row|
885
- array.push(row.fields) unless row.header_row?
886
- end
887
- return array
888
- end
889
-
890
- #
891
- # Returns the table as a complete CSV String. Headers will be listed first,
892
- # then all of the field rows.
893
- #
894
- # This method assumes you want the Table.headers(), unless you explicitly
895
- # pass <tt>:write_headers => false</tt>.
896
- #
897
- def to_csv(write_headers: true, **options)
898
- array = write_headers ? [headers.to_csv(options)] : []
899
- @table.each do |row|
900
- array.push(row.fields.to_csv(options)) unless row.header_row?
901
- end
902
- return array.join('')
903
- end
904
- alias_method :to_s, :to_csv
905
-
906
- # Shows the mode and size of this table in a US-ASCII String.
907
- def inspect
908
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
281
+ # The error thrown when the parser encounters illegal CSV formatting.
282
+ class MalformedCSVError < RuntimeError
283
+ attr_reader :line_number
284
+ alias_method :lineno, :line_number
285
+ def initialize(message, line_number)
286
+ @line_number = line_number
287
+ super("#{message} in line #{line_number}.")
909
288
  end
910
289
  end
911
290
 
912
- # The error thrown when the parser encounters illegal CSV formatting.
913
- class MalformedCSVError < RuntimeError; end
914
-
915
291
  #
916
292
  # A FieldInfo Struct contains details about a field's position in the data
917
293
  # source it was read from. CSV will pass this Struct to some blocks that make
@@ -930,7 +306,11 @@ class CSV
930
306
  # A Regexp used to find and convert some common DateTime formats.
931
307
  DateTimeMatcher =
932
308
  / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} |
933
- \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} )\z /x
309
+ \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} |
310
+ # ISO-8601
311
+ \d{4}-\d{2}-\d{2}
312
+ (?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)?
313
+ )\z /x
934
314
 
935
315
  # The encoding used by all converters.
936
316
  ConverterEncoding = Encoding.find("UTF-8")
@@ -970,7 +350,7 @@ class CSV
970
350
  date: lambda { |f|
971
351
  begin
972
352
  e = f.encode(ConverterEncoding)
973
- e =~ DateMatcher ? Date.parse(e) : f
353
+ e.match?(DateMatcher) ? Date.parse(e) : f
974
354
  rescue # encoding conversion or date parse errors
975
355
  f
976
356
  end
@@ -978,7 +358,7 @@ class CSV
978
358
  date_time: lambda { |f|
979
359
  begin
980
360
  e = f.encode(ConverterEncoding)
981
- e =~ DateTimeMatcher ? DateTime.parse(e) : f
361
+ e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
982
362
  rescue # encoding conversion or date parse errors
983
363
  f
984
364
  end
@@ -1137,7 +517,7 @@ class CSV
1137
517
  # but transcode it to UTF-8 before CSV parses it.
1138
518
  #
1139
519
  def self.foreach(path, **options, &block)
1140
- return to_enum(__method__, path, options) unless block
520
+ return to_enum(__method__, path, options) unless block_given?
1141
521
  open(path, options) do |csv|
1142
522
  csv.each(&block)
1143
523
  end
@@ -1164,8 +544,8 @@ class CSV
1164
544
  def self.generate(str=nil, **options)
1165
545
  # add a default empty String, if none was given
1166
546
  if str
1167
- io = StringIO.new(str)
1168
- io.seek(0, IO::SEEK_END)
547
+ str = StringIO.new(str)
548
+ str.seek(0, IO::SEEK_END)
1169
549
  else
1170
550
  encoding = options[:encoding]
1171
551
  str = String.new
@@ -1271,7 +651,7 @@ class CSV
1271
651
  begin
1272
652
  f = File.open(filename, mode, file_opts)
1273
653
  rescue ArgumentError => e
1274
- raise unless /needs binmode/ =~ e.message and mode == "r"
654
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
1275
655
  mode = "rb"
1276
656
  file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1277
657
  retry
@@ -1309,14 +689,14 @@ class CSV
1309
689
  #
1310
690
  def self.parse(*args, &block)
1311
691
  csv = new(*args)
1312
- if block.nil? # slurp contents, if no block is given
1313
- begin
1314
- csv.read
1315
- ensure
1316
- csv.close
1317
- end
1318
- else # or pass each row to a provided block
1319
- csv.each(&block)
692
+
693
+ return csv.each(&block) if block_given?
694
+
695
+ # slurp contents, if no block is given
696
+ begin
697
+ csv.read
698
+ ensure
699
+ csv.close
1320
700
  end
1321
701
  end
1322
702
 
@@ -1510,6 +890,8 @@ class CSV
1510
890
  # attempt to parse input not conformant
1511
891
  # with RFC 4180, such as double quotes
1512
892
  # in unquoted fields.
893
+ # <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
894
+ # <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
1513
895
  #
1514
896
  # See CSV::DEFAULT_OPTIONS for the default settings.
1515
897
  #
@@ -1519,20 +901,14 @@ class CSV
1519
901
  def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
1520
902
  converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
1521
903
  write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
1522
- skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil)
904
+ skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
905
+ nil_value: nil,
906
+ empty_value: "")
1523
907
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
1524
908
 
1525
909
  # create the IO object we will read from
1526
910
  @io = data.is_a?(String) ? StringIO.new(data) : data
1527
- # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1528
- internal_encoding = Encoding.find(internal_encoding) if internal_encoding
1529
- external_encoding = Encoding.find(external_encoding) if external_encoding
1530
- if encoding
1531
- encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1532
- encoding = Encoding.find(encoding)
1533
- end
1534
- @encoding = raw_encoding(nil) || internal_encoding || encoding ||
1535
- Encoding.default_internal || Encoding.default_external
911
+ @encoding = determine_encoding(encoding, internal_encoding)
1536
912
  #
1537
913
  # prepare for building safe regular expressions in the target encoding,
1538
914
  # if we can transcode the needed characters
@@ -1549,6 +925,10 @@ class CSV
1549
925
  # headers must be delayed until shift(), in case they need a row of content
1550
926
  @headers = nil
1551
927
 
928
+ @nil_value = nil_value
929
+ @empty_value = empty_value
930
+ @empty_value_is_empty_string = (empty_value == "")
931
+
1552
932
  init_separators(col_sep, row_sep, quote_char, force_quotes)
1553
933
  init_parsers(skip_blanks, field_size_limit, liberal_parsing)
1554
934
  init_converters(converters, :@converters, :convert)
@@ -1830,7 +1210,15 @@ class CSV
1830
1210
  @line = parse.clone
1831
1211
  end
1832
1212
 
1833
- parse.sub!(@parsers[:line_end], "")
1213
+ begin
1214
+ parse.sub!(@parsers[:line_end], "")
1215
+ rescue ArgumentError
1216
+ unless parse.valid_encoding?
1217
+ message = "Invalid byte sequence in #{parse.encoding}"
1218
+ raise MalformedCSVError.new(message, lineno + 1)
1219
+ end
1220
+ raise
1221
+ end
1834
1222
 
1835
1223
  if csv.empty?
1836
1224
  #
@@ -1844,7 +1232,7 @@ class CSV
1844
1232
  elsif @unconverted_fields
1845
1233
  return add_unconverted_fields(Array.new, Array.new)
1846
1234
  elsif @use_headers
1847
- return self.class::Row.new(Array.new, Array.new)
1235
+ return self.class::Row.new(@headers, Array.new)
1848
1236
  else
1849
1237
  return Array.new
1850
1238
  end
@@ -1853,7 +1241,7 @@ class CSV
1853
1241
 
1854
1242
  next if @skip_lines and @skip_lines.match parse
1855
1243
 
1856
- parts = parse.split(@col_sep, -1)
1244
+ parts = parse.split(@col_sep_split_separator, -1)
1857
1245
  if parts.empty?
1858
1246
  if in_extended_col
1859
1247
  csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
@@ -1870,9 +1258,9 @@ class CSV
1870
1258
  if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
1871
1259
  # extended column ends
1872
1260
  csv.last << part[0..-2]
1873
- if csv.last =~ @parsers[:stray_quote]
1874
- raise MalformedCSVError,
1875
- "Missing or stray quote in line #{lineno + 1}"
1261
+ if csv.last.match?(@parsers[:stray_quote])
1262
+ raise MalformedCSVError.new("Missing or stray quote",
1263
+ lineno + 1)
1876
1264
  end
1877
1265
  csv.last.gsub!(@double_quote_char, @quote_char)
1878
1266
  in_extended_col = false
@@ -1888,27 +1276,27 @@ class CSV
1888
1276
  elsif part.end_with?(@quote_char)
1889
1277
  # regular quoted column
1890
1278
  csv << part[1..-2]
1891
- if csv.last =~ @parsers[:stray_quote]
1892
- raise MalformedCSVError,
1893
- "Missing or stray quote in line #{lineno + 1}"
1279
+ if csv.last.match?(@parsers[:stray_quote])
1280
+ raise MalformedCSVError.new("Missing or stray quote",
1281
+ lineno + 1)
1894
1282
  end
1895
1283
  csv.last.gsub!(@double_quote_char, @quote_char)
1896
1284
  elsif @liberal_parsing
1897
1285
  csv << part
1898
1286
  else
1899
- raise MalformedCSVError,
1900
- "Missing or stray quote in line #{lineno + 1}"
1287
+ raise MalformedCSVError.new("Missing or stray quote",
1288
+ lineno + 1)
1901
1289
  end
1902
- elsif part =~ @parsers[:quote_or_nl]
1290
+ elsif part.match?(@parsers[:quote_or_nl])
1903
1291
  # Unquoted field with bad characters.
1904
- if part =~ @parsers[:nl_or_lf]
1905
- raise MalformedCSVError, "Unquoted fields do not allow " +
1906
- "\\r or \\n (line #{lineno + 1})."
1292
+ if part.match?(@parsers[:nl_or_lf])
1293
+ message = "Unquoted fields do not allow \\r or \\n"
1294
+ raise MalformedCSVError.new(message, lineno + 1)
1907
1295
  else
1908
1296
  if @liberal_parsing
1909
1297
  csv << part
1910
1298
  else
1911
- raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1299
+ raise MalformedCSVError.new("Illegal quoting", lineno + 1)
1912
1300
  end
1913
1301
  end
1914
1302
  else
@@ -1924,10 +1312,11 @@ class CSV
1924
1312
  if in_extended_col
1925
1313
  # if we're at eof?(), a quoted field wasn't closed...
1926
1314
  if @io.eof?
1927
- raise MalformedCSVError,
1928
- "Unclosed quoted field on line #{lineno + 1}."
1315
+ raise MalformedCSVError.new("Unclosed quoted field",
1316
+ lineno + 1)
1929
1317
  elsif @field_size_limit and csv.last.size >= @field_size_limit
1930
- raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1318
+ raise MalformedCSVError.new("Field size exceeded",
1319
+ lineno + 1)
1931
1320
  end
1932
1321
  # otherwise, we need to loop and pull some more data to complete the row
1933
1322
  else
@@ -1936,10 +1325,13 @@ class CSV
1936
1325
  # save fields unconverted fields, if needed...
1937
1326
  unconverted = csv.dup if @unconverted_fields
1938
1327
 
1939
- # convert fields, if needed...
1940
- csv = convert_fields(csv) unless @use_headers or @converters.empty?
1941
- # parse out header rows and handle CSV::Row conversions...
1942
- csv = parse_headers(csv) if @use_headers
1328
+ if @use_headers
1329
+ # parse out header rows and handle CSV::Row conversions...
1330
+ csv = parse_headers(csv)
1331
+ else
1332
+ # convert fields, if needed...
1333
+ csv = convert_fields(csv)
1334
+ end
1943
1335
 
1944
1336
  # inject unconverted fields and accessor, if requested...
1945
1337
  if @unconverted_fields and not csv.respond_to? :unconverted_fields
@@ -1995,6 +1387,21 @@ class CSV
1995
1387
 
1996
1388
  private
1997
1389
 
1390
+ def determine_encoding(encoding, internal_encoding)
1391
+ # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1392
+ io_encoding = raw_encoding(nil)
1393
+ return io_encoding if io_encoding
1394
+
1395
+ return Encoding.find(internal_encoding) if internal_encoding
1396
+
1397
+ if encoding
1398
+ encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1399
+ return Encoding.find(encoding)
1400
+ end
1401
+
1402
+ Encoding.default_internal || Encoding.default_external
1403
+ end
1404
+
1998
1405
  #
1999
1406
  # Stores the indicated separators for later use.
2000
1407
  #
@@ -2008,6 +1415,11 @@ class CSV
2008
1415
  def init_separators(col_sep, row_sep, quote_char, force_quotes)
2009
1416
  # store the selected separators
2010
1417
  @col_sep = col_sep.to_s.encode(@encoding)
1418
+ if @col_sep == " "
1419
+ @col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
1420
+ else
1421
+ @col_sep_split_separator = @col_sep
1422
+ end
2011
1423
  @row_sep = row_sep # encode after resolving :auto
2012
1424
  @quote_char = quote_char.to_s.encode(@encoding)
2013
1425
  @double_quote_char = @quote_char * 2
@@ -2037,15 +1449,28 @@ class CSV
2037
1449
  # (ensure will set default value)
2038
1450
  #
2039
1451
  break unless sample = @io.gets(nil, 1024)
1452
+
1453
+ cr = encode_str("\r")
1454
+ lf = encode_str("\n")
2040
1455
  # extend sample if we're unsure of the line ending
2041
- if sample.end_with? encode_str("\r")
1456
+ if sample.end_with?(cr)
2042
1457
  sample << (@io.gets(nil, 1) || "")
2043
1458
  end
2044
1459
 
2045
1460
  # try to find a standard separator
2046
- if sample =~ encode_re("\r\n?|\n")
2047
- @row_sep = $&
2048
- break
1461
+ sample.each_char.each_cons(2) do |char, next_char|
1462
+ case char
1463
+ when cr
1464
+ if next_char == lf
1465
+ @row_sep = encode_str("\r\n")
1466
+ else
1467
+ @row_sep = cr
1468
+ end
1469
+ break
1470
+ when lf
1471
+ @row_sep = lf
1472
+ break
1473
+ end
2049
1474
  end
2050
1475
  end
2051
1476
 
@@ -2199,10 +1624,24 @@ class CSV
2199
1624
  # shortcut.
2200
1625
  #
2201
1626
  def convert_fields(fields, headers = false)
2202
- # see if we are converting headers or fields
2203
- converters = headers ? @header_converters : @converters
1627
+ if headers
1628
+ converters = @header_converters
1629
+ else
1630
+ converters = @converters
1631
+ if !@use_headers and
1632
+ converters.empty? and
1633
+ @nil_value.nil? and
1634
+ @empty_value_is_empty_string
1635
+ return fields
1636
+ end
1637
+ end
2204
1638
 
2205
1639
  fields.map.with_index do |field, index|
1640
+ if field.nil?
1641
+ field = @nil_value
1642
+ elsif field.empty?
1643
+ field = @empty_value unless @empty_value_is_empty_string
1644
+ end
2206
1645
  converters.each do |converter|
2207
1646
  break if headers && field.nil?
2208
1647
  field = if converter.arity == 1 # straight field converter
@@ -2334,22 +1773,6 @@ def CSV(*args, &block)
2334
1773
  CSV.instance(*args, &block)
2335
1774
  end
2336
1775
 
2337
- class Array # :nodoc:
2338
- # Equivalent to CSV::generate_line(self, options)
2339
- #
2340
- # ["CSV", "data"].to_csv
2341
- # #=> "CSV,data\n"
2342
- def to_csv(**options)
2343
- CSV.generate_line(self, options)
2344
- end
2345
- end
2346
-
2347
- class String # :nodoc:
2348
- # Equivalent to CSV::parse_line(self, options)
2349
- #
2350
- # "CSV,data".parse_csv
2351
- # #=> ["CSV", "data"]
2352
- def parse_csv(**options)
2353
- CSV.parse_line(self, options)
2354
- end
2355
- end
1776
+ require_relative "csv/version"
1777
+ require_relative "csv/core_ext/array"
1778
+ require_relative "csv/core_ext/string"