csv 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 05b9101e168faf6acec7dd2c841e6a30084e3585
4
- data.tar.gz: ad75132c6ed63c2cf95c16b6dbf948d76d83d69f
2
+ SHA256:
3
+ metadata.gz: 12a1d0b486cc212d0b8cf8a044bc20b5bc4c2ae6d6d2c41633174ac8cbceb5d5
4
+ data.tar.gz: '06866abe09381a2a0af2bc36e325a6c8a83937231e446ed3ec4f1e3cb5ea4fd7'
5
5
  SHA512:
6
- metadata.gz: 45c5498cf70618cd26ddf0acd17db083beec0b9a532adf7279cc5cb830f400a1ae0b291e6492f4191020b97a427c46c019aa97fbfb83aaded9fc66b920015c26
7
- data.tar.gz: 5dd1c842d29df9e2c89da50b2ebbbed42e6386d05f3e5f76403b5d977e13a485b9978c815d418246798e4a4e72bf62995476ca4e9ea40095cc80f73319b30d7b
6
+ metadata.gz: 0bab4fe5e9e6612fda6041a86d10e88f8aacde4e97c87dd8306c3bbeed2db661cc45163ad12822af44395366e616f2bf3308fa37ea114d671401ef664a7204eb
7
+ data.tar.gz: d661fe106e45b04098fb3ac6e77418f73ec2f5cfc4117d3ba77666c317bca121853fd7bd3e9b71cad982b8c8eae61ccc1f8dc2fd4a84249b1c85d13c23af3df7
@@ -1,4 +1,15 @@
1
- Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
1
+ Copyright (C) 2005-2016 James Edward Gray II. All rights reserved.
2
+ Copyright (C) 2007-2017 Yukihiro Matsumoto. All rights reserved.
3
+ Copyright (C) 2017 SHIBATA Hiroshi. All rights reserved.
4
+ Copyright (C) 2017 Olivier Lacan. All rights reserved.
5
+ Copyright (C) 2017 Espartaco Palma. All rights reserved.
6
+ Copyright (C) 2017 Marcus Stollsteimer. All rights reserved.
7
+ Copyright (C) 2017 pavel. All rights reserved.
8
+ Copyright (C) 2017-2018 Steven Daniels. All rights reserved.
9
+ Copyright (C) 2018 Tomohiro Ogoke. All rights reserved.
10
+ Copyright (C) 2018 Kouhei Sutou. All rights reserved.
11
+ Copyright (C) 2018 Mitsutaka Mimura. All rights reserved.
12
+ Copyright (C) 2018 Vladislav. All rights reserved.
2
13
 
3
14
  Redistribution and use in source and binary forms, with or without
4
15
  modification, are permitted provided that the following conditions
data/README.md CHANGED
@@ -40,7 +40,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
40
40
 
41
41
  Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/csv.
42
42
 
43
+ ### NOTE: About RuboCop
44
+
45
+ We don't use RuboCop because we can manage our coding style by ourselves. We want to accept small fluctuations in our coding style because we use Ruby.
46
+ Please do not submit issues and PRs that aim to introduce RuboCop in this repository.
43
47
 
44
48
  ## License
45
49
 
46
50
  The gem is available as open source under the terms of the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause).
51
+
52
+ See LICENSE.txt for details.
data/lib/csv.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # frozen_string_literal: true
3
3
  # = csv.rb -- CSV Reading and Writing
4
4
  #
5
- # Created by James Edward Gray II on 2005-10-31.
6
- # Copyright 2005 James Edward Gray II. You can redistribute or modify this code
7
- # under the terms of Ruby's license.
5
+ # Created by James Edward Gray II on 2005-10-31.
8
6
  #
9
7
  # See CSV for documentation.
10
8
  #
@@ -95,74 +93,146 @@ require "forwardable"
95
93
  require "English"
96
94
  require "date"
97
95
  require "stringio"
96
+ require_relative "csv/table"
97
+ require_relative "csv/row"
98
+
99
+ # This provides String#match? and Regexp#match? for Ruby 2.3.
100
+ unless String.method_defined?(:match?)
101
+ class CSV
102
+ module MatchP
103
+ refine String do
104
+ def match?(pattern)
105
+ self =~ pattern
106
+ end
107
+ end
108
+
109
+ refine Regexp do
110
+ def match?(string)
111
+ self =~ string
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ using CSV::MatchP
118
+ end
98
119
 
99
120
  #
100
121
  # This class provides a complete interface to CSV files and data. It offers
101
122
  # tools to enable you to read and write to and from Strings or IO objects, as
102
123
  # needed.
103
124
  #
104
- # == Reading
125
+ # The most generic interface of a class is:
105
126
  #
106
- # === From a File
127
+ # csv = CSV.new(string_or_io, **options)
107
128
  #
108
- # ==== A Line at a Time
129
+ # # Reading: IO object should be open for read
130
+ # csv.read # => array of rows
131
+ # # or
132
+ # csv.each do |row|
133
+ # # ...
134
+ # end
135
+ # # or
136
+ # row = csv.shift
109
137
  #
110
- # CSV.foreach("path/to/file.csv") do |row|
111
- # # use row here...
112
- # end
138
+ # # Writing: IO object should be open for write
139
+ # csv << row
113
140
  #
114
- # ==== All at Once
141
+ # There are several specialized class methods for one-statement reading or writing,
142
+ # described in the Specialized Methods section.
115
143
  #
116
- # arr_of_arrs = CSV.read("path/to/file.csv")
144
+ # If a String passed into ::new, it is internally wrapped into a StringIO object.
117
145
  #
118
- # === From a String
146
+ # +options+ can be used for specifying the particular CSV flavor (column
147
+ # separators, row separators, value quoting and so on), and for data conversion,
148
+ # see Data Conversion section for the description of the latter.
119
149
  #
120
- # ==== A Line at a Time
150
+ # == Specialized Methods
121
151
  #
122
- # CSV.parse("CSV,data,String") do |row|
123
- # # use row here...
124
- # end
152
+ # === Reading
125
153
  #
126
- # ==== All at Once
127
- #
128
- # arr_of_arrs = CSV.parse("CSV,data,String")
154
+ # # From a file: all at once
155
+ # arr_of_rows = CSV.read("path/to/file.csv", **options)
156
+ # # iterator-style:
157
+ # CSV.foreach("path/to/file.csv", **options) do |row|
158
+ # # ...
159
+ # end
129
160
  #
130
- # == Writing
161
+ # # From a string
162
+ # arr_of_rows = CSV.parse("CSV,data,String", **options)
163
+ # # or
164
+ # CSV.parse("CSV,data,String", **options) do |row|
165
+ # # ...
166
+ # end
131
167
  #
132
- # === To a File
168
+ # === Writing
133
169
  #
170
+ # # To a file
134
171
  # CSV.open("path/to/file.csv", "wb") do |csv|
135
172
  # csv << ["row", "of", "CSV", "data"]
136
173
  # csv << ["another", "row"]
137
174
  # # ...
138
175
  # end
139
176
  #
140
- # === To a String
141
- #
177
+ # # To a String
142
178
  # csv_string = CSV.generate do |csv|
143
179
  # csv << ["row", "of", "CSV", "data"]
144
180
  # csv << ["another", "row"]
145
181
  # # ...
146
182
  # end
147
183
  #
148
- # == Convert a Single Line
184
+ # === Shortcuts
149
185
  #
186
+ # # Core extensions for converting one line
150
187
  # csv_string = ["CSV", "data"].to_csv # to CSV
151
188
  # csv_array = "CSV,String".parse_csv # from CSV
152
189
  #
153
- # == Shortcut Interface
154
- #
190
+ # # CSV() method
155
191
  # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
156
192
  # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
157
193
  # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
158
194
  # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
159
195
  #
160
- # == Advanced Usage
196
+ # == Data Conversion
197
+ #
198
+ # === CSV with headers
199
+ #
200
+ # CSV allows to specify column names of CSV file, whether they are in data, or
201
+ # provided separately. If headers specified, reading methods return an instance
202
+ # of CSV::Table, consisting of CSV::Row.
203
+ #
204
+ # # Headers are part of data
205
+ # data = CSV.parse(<<~ROWS, headers: true)
206
+ # Name,Department,Salary
207
+ # Bob,Engeneering,1000
208
+ # Jane,Sales,2000
209
+ # John,Management,5000
210
+ # ROWS
161
211
  #
162
- # === Wrap an IO Object
212
+ # data.class #=> CSV::Table
213
+ # data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engeneering" "Salary":"1000">
214
+ # data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engeneering", "Salary"=>"1000"}
163
215
  #
164
- # csv = CSV.new(io, options)
165
- # # ... read (with gets() or each()) from and write (with <<) to csv here ...
216
+ # # Headers provided by developer
217
+ # data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
218
+ # data.first #=> #<CSV::Row name:"Bob" department:"Engeneering" salary:"1000">
219
+ #
220
+ # === Typed data reading
221
+ #
222
+ # CSV allows to provide a set of data _converters_ e.g. transformations to try on input
223
+ # data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
224
+ #
225
+ # # Without any converters:
226
+ # CSV.parse('Bob,2018-03-01,100')
227
+ # #=> [["Bob", "2018-03-01", "100"]]
228
+ #
229
+ # # With built-in converters:
230
+ # CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
231
+ # #=> [["Bob", #<Date: 2018-03-01>, 100]]
232
+ #
233
+ # # With custom converters:
234
+ # CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
235
+ # #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
166
236
  #
167
237
  # == CSV and Character Encodings (M17n or Multilingualization)
168
238
  #
@@ -207,757 +277,17 @@ require "stringio"
207
277
  # find with it.
208
278
  #
209
279
  class CSV
210
- # The version of the installed library.
211
- VERSION = "1.0.1"
212
-
213
- #
214
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
215
- # and allows duplicates just as an Array would, but also allows you to access
216
- # fields by name just as you could if they were in a Hash.
217
- #
218
- # All rows returned by CSV will be constructed from this class, if header row
219
- # processing is activated.
220
- #
221
- class Row
222
- #
223
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
224
- # to be Arrays. If one Array is shorter than the other, it will be padded
225
- # with +nil+ objects.
226
- #
227
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
228
- # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
229
- # row. Otherwise, the row is assumes to be a field row.
230
- #
231
- # A CSV::Row object supports the following Array methods through delegation:
232
- #
233
- # * empty?()
234
- # * length()
235
- # * size()
236
- #
237
- def initialize(headers, fields, header_row = false)
238
- @header_row = header_row
239
- headers.each { |h| h.freeze if h.is_a? String }
240
-
241
- # handle extra headers or fields
242
- @row = if headers.size >= fields.size
243
- headers.zip(fields)
244
- else
245
- fields.zip(headers).each(&:reverse!)
246
- end
247
- end
248
-
249
- # Internal data format used to compare equality.
250
- attr_reader :row
251
- protected :row
252
-
253
- ### Array Delegation ###
254
-
255
- extend Forwardable
256
- def_delegators :@row, :empty?, :length, :size
257
-
258
- # Returns +true+ if this is a header row.
259
- def header_row?
260
- @header_row
261
- end
262
-
263
- # Returns +true+ if this is a field row.
264
- def field_row?
265
- not header_row?
266
- end
267
-
268
- # Returns the headers of this row.
269
- def headers
270
- @row.map(&:first)
271
- end
272
-
273
- #
274
- # :call-seq:
275
- # field( header )
276
- # field( header, offset )
277
- # field( index )
278
- #
279
- # This method will return the field value by +header+ or +index+. If a field
280
- # is not found, +nil+ is returned.
281
- #
282
- # When provided, +offset+ ensures that a header match occurs on or later
283
- # than the +offset+ index. You can use this to find duplicate headers,
284
- # without resorting to hard-coding exact indices.
285
- #
286
- def field(header_or_index, minimum_index = 0)
287
- # locate the pair
288
- finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
289
- pair = @row[minimum_index..-1].send(finder, header_or_index)
290
-
291
- # return the field if we have a pair
292
- if pair.nil?
293
- nil
294
- else
295
- header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
296
- end
297
- end
298
- alias_method :[], :field
299
-
300
- #
301
- # :call-seq:
302
- # fetch( header )
303
- # fetch( header ) { |row| ... }
304
- # fetch( header, default )
305
- #
306
- # This method will fetch the field value by +header+. It has the same
307
- # behavior as Hash#fetch: if there is a field with the given +header+, its
308
- # value is returned. Otherwise, if a block is given, it is yielded the
309
- # +header+ and its result is returned; if a +default+ is given as the
310
- # second argument, it is returned; otherwise a KeyError is raised.
311
- #
312
- def fetch(header, *varargs)
313
- raise ArgumentError, "Too many arguments" if varargs.length > 1
314
- pair = @row.assoc(header)
315
- if pair
316
- pair.last
317
- else
318
- if block_given?
319
- yield header
320
- elsif varargs.empty?
321
- raise KeyError, "key not found: #{header}"
322
- else
323
- varargs.first
324
- end
325
- end
326
- end
327
-
328
- # Returns +true+ if there is a field with the given +header+.
329
- def has_key?(header)
330
- !!@row.assoc(header)
331
- end
332
- alias_method :include?, :has_key?
333
- alias_method :key?, :has_key?
334
- alias_method :member?, :has_key?
335
-
336
- #
337
- # :call-seq:
338
- # []=( header, value )
339
- # []=( header, offset, value )
340
- # []=( index, value )
341
- #
342
- # Looks up the field by the semantics described in CSV::Row.field() and
343
- # assigns the +value+.
344
- #
345
- # Assigning past the end of the row with an index will set all pairs between
346
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
347
- # pair.
348
- #
349
- def []=(*args)
350
- value = args.pop
351
-
352
- if args.first.is_a? Integer
353
- if @row[args.first].nil? # extending past the end with index
354
- @row[args.first] = [nil, value]
355
- @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
356
- else # normal index assignment
357
- @row[args.first][1] = value
358
- end
359
- else
360
- index = index(*args)
361
- if index.nil? # appending a field
362
- self << [args.first, value]
363
- else # normal header assignment
364
- @row[index][1] = value
365
- end
366
- end
367
- end
368
-
369
- #
370
- # :call-seq:
371
- # <<( field )
372
- # <<( header_and_field_array )
373
- # <<( header_and_field_hash )
374
- #
375
- # If a two-element Array is provided, it is assumed to be a header and field
376
- # and the pair is appended. A Hash works the same way with the key being
377
- # the header and the value being the field. Anything else is assumed to be
378
- # a lone field which is appended with a +nil+ header.
379
- #
380
- # This method returns the row for chaining.
381
- #
382
- def <<(arg)
383
- if arg.is_a?(Array) and arg.size == 2 # appending a header and name
384
- @row << arg
385
- elsif arg.is_a?(Hash) # append header and name pairs
386
- arg.each { |pair| @row << pair }
387
- else # append field value
388
- @row << [nil, arg]
389
- end
390
-
391
- self # for chaining
392
- end
393
-
394
- #
395
- # A shortcut for appending multiple fields. Equivalent to:
396
- #
397
- # args.each { |arg| csv_row << arg }
398
- #
399
- # This method returns the row for chaining.
400
- #
401
- def push(*args)
402
- args.each { |arg| self << arg }
403
-
404
- self # for chaining
405
- end
406
-
407
- #
408
- # :call-seq:
409
- # delete( header )
410
- # delete( header, offset )
411
- # delete( index )
412
- #
413
- # Used to remove a pair from the row by +header+ or +index+. The pair is
414
- # located as described in CSV::Row.field(). The deleted pair is returned,
415
- # or +nil+ if a pair could not be found.
416
- #
417
- def delete(header_or_index, minimum_index = 0)
418
- if header_or_index.is_a? Integer # by index
419
- @row.delete_at(header_or_index)
420
- elsif i = index(header_or_index, minimum_index) # by header
421
- @row.delete_at(i)
422
- else
423
- [ ]
424
- end
425
- end
426
-
427
- #
428
- # The provided +block+ is passed a header and field for each pair in the row
429
- # and expected to return +true+ or +false+, depending on whether the pair
430
- # should be deleted.
431
- #
432
- # This method returns the row for chaining.
433
- #
434
- # If no block is given, an Enumerator is returned.
435
- #
436
- def delete_if(&block)
437
- return enum_for(__method__) { size } unless block_given?
438
-
439
- @row.delete_if(&block)
440
-
441
- self # for chaining
442
- end
443
-
444
- #
445
- # This method accepts any number of arguments which can be headers, indices,
446
- # Ranges of either, or two-element Arrays containing a header and offset.
447
- # Each argument will be replaced with a field lookup as described in
448
- # CSV::Row.field().
449
- #
450
- # If called with no arguments, all fields are returned.
451
- #
452
- def fields(*headers_and_or_indices)
453
- if headers_and_or_indices.empty? # return all fields--no arguments
454
- @row.map(&:last)
455
- else # or work like values_at()
456
- all = []
457
- headers_and_or_indices.each do |h_or_i|
458
- if h_or_i.is_a? Range
459
- index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
460
- index(h_or_i.begin)
461
- index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
462
- index(h_or_i.end)
463
- new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
464
- (index_begin..index_end)
465
- all.concat(fields.values_at(new_range))
466
- else
467
- all << field(*Array(h_or_i))
468
- end
469
- end
470
- return all
471
- end
472
- end
473
- alias_method :values_at, :fields
474
-
475
- #
476
- # :call-seq:
477
- # index( header )
478
- # index( header, offset )
479
- #
480
- # This method will return the index of a field with the provided +header+.
481
- # The +offset+ can be used to locate duplicate header names, as described in
482
- # CSV::Row.field().
483
- #
484
- def index(header, minimum_index = 0)
485
- # find the pair
486
- index = headers[minimum_index..-1].index(header)
487
- # return the index at the right offset, if we found one
488
- index.nil? ? nil : index + minimum_index
489
- end
490
-
491
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
492
- def header?(name)
493
- headers.include? name
494
- end
495
- alias_method :include?, :header?
496
-
497
- #
498
- # Returns +true+ if +data+ matches a field in this row, and +false+
499
- # otherwise.
500
- #
501
- def field?(data)
502
- fields.include? data
503
- end
504
-
505
- include Enumerable
506
-
507
- #
508
- # Yields each pair of the row as header and field tuples (much like
509
- # iterating over a Hash). This method returns the row for chaining.
510
- #
511
- # If no block is given, an Enumerator is returned.
512
- #
513
- # Support for Enumerable.
514
- #
515
- def each(&block)
516
- return enum_for(__method__) { size } unless block_given?
517
-
518
- @row.each(&block)
519
-
520
- self # for chaining
521
- end
522
-
523
- #
524
- # Returns +true+ if this row contains the same headers and fields in the
525
- # same order as +other+.
526
- #
527
- def ==(other)
528
- return @row == other.row if other.is_a? CSV::Row
529
- @row == other
530
- end
531
-
532
- #
533
- # Collapses the row into a simple Hash. Be warned that this discards field
534
- # order and clobbers duplicate fields.
535
- #
536
- def to_hash
537
- @row.to_h
538
- end
539
-
540
- #
541
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
542
- #
543
- # csv_row.fields.to_csv( options )
544
- #
545
- def to_csv(**options)
546
- fields.to_csv(options)
547
- end
548
- alias_method :to_s, :to_csv
549
280
 
550
- #
551
- # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step,
552
- # returning nil if any intermediate step is nil.
553
- #
554
- def dig(index_or_header, *indexes)
555
- value = field(index_or_header)
556
- if value.nil?
557
- nil
558
- elsif indexes.empty?
559
- value
560
- else
561
- unless value.respond_to?(:dig)
562
- raise TypeError, "#{value.class} does not have \#dig method"
563
- end
564
- value.dig(*indexes)
565
- end
566
- end
567
-
568
- # A summary of fields, by header, in an ASCII compatible String.
569
- def inspect
570
- str = ["#<", self.class.to_s]
571
- each do |header, field|
572
- str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
573
- ":" << field.inspect
574
- end
575
- str << ">"
576
- begin
577
- str.join('')
578
- rescue # any encoding error
579
- str.map do |s|
580
- e = Encoding::Converter.asciicompat_encoding(s.encoding)
581
- e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
582
- end.join('')
583
- end
584
- end
585
- end
586
-
587
- #
588
- # A CSV::Table is a two-dimensional data structure for representing CSV
589
- # documents. Tables allow you to work with the data by row or column,
590
- # manipulate the data, and even convert the results back to CSV, if needed.
591
- #
592
- # All tables returned by CSV will be constructed from this class, if header
593
- # row processing is activated.
594
- #
595
- class Table
596
- #
597
- # Construct a new CSV::Table from +array_of_rows+, which are expected
598
- # to be CSV::Row objects. All rows are assumed to have the same headers.
599
- #
600
- # A CSV::Table object supports the following Array methods through
601
- # delegation:
602
- #
603
- # * empty?()
604
- # * length()
605
- # * size()
606
- #
607
- def initialize(array_of_rows)
608
- @table = array_of_rows
609
- @mode = :col_or_row
610
- end
611
-
612
- # The current access mode for indexing and iteration.
613
- attr_reader :mode
614
-
615
- # Internal data format used to compare equality.
616
- attr_reader :table
617
- protected :table
618
-
619
- ### Array Delegation ###
620
-
621
- extend Forwardable
622
- def_delegators :@table, :empty?, :length, :size
623
-
624
- #
625
- # Returns a duplicate table object, in column mode. This is handy for
626
- # chaining in a single call without changing the table mode, but be aware
627
- # that this method can consume a fair amount of memory for bigger data sets.
628
- #
629
- # This method returns the duplicate table for chaining. Don't chain
630
- # destructive methods (like []=()) this way though, since you are working
631
- # with a duplicate.
632
- #
633
- def by_col
634
- self.class.new(@table.dup).by_col!
635
- end
636
-
637
- #
638
- # Switches the mode of this table to column mode. All calls to indexing and
639
- # iteration methods will work with columns until the mode is changed again.
640
- #
641
- # This method returns the table and is safe to chain.
642
- #
643
- def by_col!
644
- @mode = :col
645
-
646
- self
647
- end
648
-
649
- #
650
- # Returns a duplicate table object, in mixed mode. This is handy for
651
- # chaining in a single call without changing the table mode, but be aware
652
- # that this method can consume a fair amount of memory for bigger data sets.
653
- #
654
- # This method returns the duplicate table for chaining. Don't chain
655
- # destructive methods (like []=()) this way though, since you are working
656
- # with a duplicate.
657
- #
658
- def by_col_or_row
659
- self.class.new(@table.dup).by_col_or_row!
660
- end
661
-
662
- #
663
- # Switches the mode of this table to mixed mode. All calls to indexing and
664
- # iteration methods will use the default intelligent indexing system until
665
- # the mode is changed again. In mixed mode an index is assumed to be a row
666
- # reference while anything else is assumed to be column access by headers.
667
- #
668
- # This method returns the table and is safe to chain.
669
- #
670
- def by_col_or_row!
671
- @mode = :col_or_row
672
-
673
- self
674
- end
675
-
676
- #
677
- # Returns a duplicate table object, in row mode. This is handy for chaining
678
- # in a single call without changing the table mode, but be aware that this
679
- # method can consume a fair amount of memory for bigger data sets.
680
- #
681
- # This method returns the duplicate table for chaining. Don't chain
682
- # destructive methods (like []=()) this way though, since you are working
683
- # with a duplicate.
684
- #
685
- def by_row
686
- self.class.new(@table.dup).by_row!
687
- end
688
-
689
- #
690
- # Switches the mode of this table to row mode. All calls to indexing and
691
- # iteration methods will work with rows until the mode is changed again.
692
- #
693
- # This method returns the table and is safe to chain.
694
- #
695
- def by_row!
696
- @mode = :row
697
-
698
- self
699
- end
700
-
701
- #
702
- # Returns the headers for the first row of this table (assumed to match all
703
- # other rows). An empty Array is returned for empty tables.
704
- #
705
- def headers
706
- if @table.empty?
707
- Array.new
708
- else
709
- @table.first.headers
710
- end
711
- end
712
-
713
- #
714
- # In the default mixed mode, this method returns rows for index access and
715
- # columns for header access. You can force the index association by first
716
- # calling by_col!() or by_row!().
717
- #
718
- # Columns are returned as an Array of values. Altering that Array has no
719
- # effect on the table.
720
- #
721
- def [](index_or_header)
722
- if @mode == :row or # by index
723
- (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
724
- @table[index_or_header]
725
- else # by header
726
- @table.map { |row| row[index_or_header] }
727
- end
728
- end
729
-
730
- #
731
- # In the default mixed mode, this method assigns rows for index access and
732
- # columns for header access. You can force the index association by first
733
- # calling by_col!() or by_row!().
734
- #
735
- # Rows may be set to an Array of values (which will inherit the table's
736
- # headers()) or a CSV::Row.
737
- #
738
- # Columns may be set to a single value, which is copied to each row of the
739
- # column, or an Array of values. Arrays of values are assigned to rows top
740
- # to bottom in row major order. Excess values are ignored and if the Array
741
- # does not have a value for each row the extra rows will receive a +nil+.
742
- #
743
- # Assigning to an existing column or row clobbers the data. Assigning to
744
- # new columns creates them at the right end of the table.
745
- #
746
- def []=(index_or_header, value)
747
- if @mode == :row or # by index
748
- (@mode == :col_or_row and index_or_header.is_a? Integer)
749
- if value.is_a? Array
750
- @table[index_or_header] = Row.new(headers, value)
751
- else
752
- @table[index_or_header] = value
753
- end
754
- else # set column
755
- if value.is_a? Array # multiple values
756
- @table.each_with_index do |row, i|
757
- if row.header_row?
758
- row[index_or_header] = index_or_header
759
- else
760
- row[index_or_header] = value[i]
761
- end
762
- end
763
- else # repeated value
764
- @table.each do |row|
765
- if row.header_row?
766
- row[index_or_header] = index_or_header
767
- else
768
- row[index_or_header] = value
769
- end
770
- end
771
- end
772
- end
773
- end
774
-
775
- #
776
- # The mixed mode default is to treat a list of indices as row access,
777
- # returning the rows indicated. Anything else is considered columnar
778
- # access. For columnar access, the return set has an Array for each row
779
- # with the values indicated by the headers in each Array. You can force
780
- # column or row mode using by_col!() or by_row!().
781
- #
782
- # You cannot mix column and row access.
783
- #
784
- def values_at(*indices_or_headers)
785
- if @mode == :row or # by indices
786
- ( @mode == :col_or_row and indices_or_headers.all? do |index|
787
- index.is_a?(Integer) or
788
- ( index.is_a?(Range) and
789
- index.first.is_a?(Integer) and
790
- index.last.is_a?(Integer) )
791
- end )
792
- @table.values_at(*indices_or_headers)
793
- else # by headers
794
- @table.map { |row| row.values_at(*indices_or_headers) }
795
- end
796
- end
797
-
798
- #
799
- # Adds a new row to the bottom end of this table. You can provide an Array,
800
- # which will be converted to a CSV::Row (inheriting the table's headers()),
801
- # or a CSV::Row.
802
- #
803
- # This method returns the table for chaining.
804
- #
805
- def <<(row_or_array)
806
- if row_or_array.is_a? Array # append Array
807
- @table << Row.new(headers, row_or_array)
808
- else # append Row
809
- @table << row_or_array
810
- end
811
-
812
- self # for chaining
813
- end
814
-
815
- #
816
- # A shortcut for appending multiple rows. Equivalent to:
817
- #
818
- # rows.each { |row| self << row }
819
- #
820
- # This method returns the table for chaining.
821
- #
822
- def push(*rows)
823
- rows.each { |row| self << row }
824
-
825
- self # for chaining
826
- end
827
-
828
- #
829
- # Removes and returns the indicated columns or rows. In the default mixed
830
- # mode indices refer to rows and everything else is assumed to be a column
831
- # headers. Use by_col!() or by_row!() to force the lookup.
832
- #
833
- def delete(*indexes_or_headers)
834
- if indexes_or_headers.empty?
835
- raise ArgumentError, "wrong number of arguments (given 0, expected 1+)"
836
- end
837
- deleted_values = indexes_or_headers.map do |index_or_header|
838
- if @mode == :row or # by index
839
- (@mode == :col_or_row and index_or_header.is_a? Integer)
840
- @table.delete_at(index_or_header)
841
- else # by header
842
- @table.map { |row| row.delete(index_or_header).last }
843
- end
844
- end
845
- if indexes_or_headers.size == 1
846
- deleted_values[0]
847
- else
848
- deleted_values
849
- end
850
- end
851
-
852
- #
853
- # Removes any column or row for which the block returns +true+. In the
854
- # default mixed mode or row mode, iteration is the standard row major
855
- # walking of rows. In column mode, iteration will +yield+ two element
856
- # tuples containing the column name and an Array of values for that column.
857
- #
858
- # This method returns the table for chaining.
859
- #
860
- # If no block is given, an Enumerator is returned.
861
- #
862
- def delete_if(&block)
863
- return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given?
864
-
865
- if @mode == :row or @mode == :col_or_row # by index
866
- @table.delete_if(&block)
867
- else # by header
868
- deleted = []
869
- headers.each do |header|
870
- deleted << delete(header) if yield([header, self[header]])
871
- end
872
- end
873
-
874
- self # for chaining
875
- end
876
-
877
- include Enumerable
878
-
879
- #
880
- # In the default mixed mode or row mode, iteration is the standard row major
881
- # walking of rows. In column mode, iteration will +yield+ two element
882
- # tuples containing the column name and an Array of values for that column.
883
- #
884
- # This method returns the table for chaining.
885
- #
886
- # If no block is given, an Enumerator is returned.
887
- #
888
- def each(&block)
889
- return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
890
-
891
- if @mode == :col
892
- headers.each { |header| yield([header, self[header]]) }
893
- else
894
- @table.each(&block)
895
- end
896
-
897
- self # for chaining
898
- end
899
-
900
- # Returns +true+ if all rows of this table ==() +other+'s rows.
901
- def ==(other)
902
- return @table == other.table if other.is_a? CSV::Table
903
- @table == other
904
- end
905
-
906
- #
907
- # Returns the table as an Array of Arrays. Headers will be the first row,
908
- # then all of the field rows will follow.
909
- #
910
- def to_a
911
- array = [headers]
912
- @table.each do |row|
913
- array.push(row.fields) unless row.header_row?
914
- end
915
- return array
916
- end
917
-
918
- #
919
- # Returns the table as a complete CSV String. Headers will be listed first,
920
- # then all of the field rows.
921
- #
922
- # This method assumes you want the Table.headers(), unless you explicitly
923
- # pass <tt>:write_headers => false</tt>.
924
- #
925
- def to_csv(write_headers: true, **options)
926
- array = write_headers ? [headers.to_csv(options)] : []
927
- @table.each do |row|
928
- array.push(row.fields.to_csv(options)) unless row.header_row?
929
- end
930
- return array.join('')
931
- end
932
- alias_method :to_s, :to_csv
933
-
934
- #
935
- # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step,
936
- # returning nil if any intermediate step is nil.
937
- #
938
- def dig(index_or_header, *index_or_headers)
939
- value = self[index_or_header]
940
- if value.nil?
941
- nil
942
- elsif index_or_headers.empty?
943
- value
944
- else
945
- unless value.respond_to?(:dig)
946
- raise TypeError, "#{value.class} does not have \#dig method"
947
- end
948
- value.dig(*index_or_headers)
949
- end
950
- end
951
-
952
- # Shows the mode and size of this table in a US-ASCII String.
953
- def inspect
954
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
281
+ # The error thrown when the parser encounters illegal CSV formatting.
282
+ class MalformedCSVError < RuntimeError
283
+ attr_reader :line_number
284
+ alias_method :lineno, :line_number
285
+ def initialize(message, line_number)
286
+ @line_number = line_number
287
+ super("#{message} in line #{line_number}.")
955
288
  end
956
289
  end
957
290
 
958
- # The error thrown when the parser encounters illegal CSV formatting.
959
- class MalformedCSVError < RuntimeError; end
960
-
961
291
  #
962
292
  # A FieldInfo Struct contains details about a field's position in the data
963
293
  # source it was read from. CSV will pass this Struct to some blocks that make
@@ -1020,7 +350,7 @@ class CSV
1020
350
  date: lambda { |f|
1021
351
  begin
1022
352
  e = f.encode(ConverterEncoding)
1023
- e =~ DateMatcher ? Date.parse(e) : f
353
+ e.match?(DateMatcher) ? Date.parse(e) : f
1024
354
  rescue # encoding conversion or date parse errors
1025
355
  f
1026
356
  end
@@ -1028,7 +358,7 @@ class CSV
1028
358
  date_time: lambda { |f|
1029
359
  begin
1030
360
  e = f.encode(ConverterEncoding)
1031
- e =~ DateTimeMatcher ? DateTime.parse(e) : f
361
+ e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
1032
362
  rescue # encoding conversion or date parse errors
1033
363
  f
1034
364
  end
@@ -1321,7 +651,7 @@ class CSV
1321
651
  begin
1322
652
  f = File.open(filename, mode, file_opts)
1323
653
  rescue ArgumentError => e
1324
- raise unless /needs binmode/ =~ e.message and mode == "r"
654
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
1325
655
  mode = "rb"
1326
656
  file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1327
657
  retry
@@ -1560,6 +890,8 @@ class CSV
1560
890
  # attempt to parse input not conformant
1561
891
  # with RFC 4180, such as double quotes
1562
892
  # in unquoted fields.
893
+ # <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
894
+ # <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
1563
895
  #
1564
896
  # See CSV::DEFAULT_OPTIONS for the default settings.
1565
897
  #
@@ -1569,20 +901,14 @@ class CSV
1569
901
  def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
1570
902
  converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
1571
903
  write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
1572
- skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil)
904
+ skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
905
+ nil_value: nil,
906
+ empty_value: "")
1573
907
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
1574
908
 
1575
909
  # create the IO object we will read from
1576
910
  @io = data.is_a?(String) ? StringIO.new(data) : data
1577
- # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1578
- internal_encoding = Encoding.find(internal_encoding) if internal_encoding
1579
- external_encoding = Encoding.find(external_encoding) if external_encoding
1580
- if encoding
1581
- encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1582
- encoding = Encoding.find(encoding)
1583
- end
1584
- @encoding = raw_encoding(nil) || internal_encoding || encoding ||
1585
- Encoding.default_internal || Encoding.default_external
911
+ @encoding = determine_encoding(encoding, internal_encoding)
1586
912
  #
1587
913
  # prepare for building safe regular expressions in the target encoding,
1588
914
  # if we can transcode the needed characters
@@ -1599,6 +925,10 @@ class CSV
1599
925
  # headers must be delayed until shift(), in case they need a row of content
1600
926
  @headers = nil
1601
927
 
928
+ @nil_value = nil_value
929
+ @empty_value = empty_value
930
+ @empty_value_is_empty_string = (empty_value == "")
931
+
1602
932
  init_separators(col_sep, row_sep, quote_char, force_quotes)
1603
933
  init_parsers(skip_blanks, field_size_limit, liberal_parsing)
1604
934
  init_converters(converters, :@converters, :convert)
@@ -1880,7 +1210,15 @@ class CSV
1880
1210
  @line = parse.clone
1881
1211
  end
1882
1212
 
1883
- parse.sub!(@parsers[:line_end], "")
1213
+ begin
1214
+ parse.sub!(@parsers[:line_end], "")
1215
+ rescue ArgumentError
1216
+ unless parse.valid_encoding?
1217
+ message = "Invalid byte sequence in #{parse.encoding}"
1218
+ raise MalformedCSVError.new(message, lineno + 1)
1219
+ end
1220
+ raise
1221
+ end
1884
1222
 
1885
1223
  if csv.empty?
1886
1224
  #
@@ -1903,7 +1241,7 @@ class CSV
1903
1241
 
1904
1242
  next if @skip_lines and @skip_lines.match parse
1905
1243
 
1906
- parts = parse.split(@col_sep, -1)
1244
+ parts = parse.split(@col_sep_split_separator, -1)
1907
1245
  if parts.empty?
1908
1246
  if in_extended_col
1909
1247
  csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
@@ -1920,9 +1258,9 @@ class CSV
1920
1258
  if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
1921
1259
  # extended column ends
1922
1260
  csv.last << part[0..-2]
1923
- if csv.last =~ @parsers[:stray_quote]
1924
- raise MalformedCSVError,
1925
- "Missing or stray quote in line #{lineno + 1}"
1261
+ if csv.last.match?(@parsers[:stray_quote])
1262
+ raise MalformedCSVError.new("Missing or stray quote",
1263
+ lineno + 1)
1926
1264
  end
1927
1265
  csv.last.gsub!(@double_quote_char, @quote_char)
1928
1266
  in_extended_col = false
@@ -1938,27 +1276,27 @@ class CSV
1938
1276
  elsif part.end_with?(@quote_char)
1939
1277
  # regular quoted column
1940
1278
  csv << part[1..-2]
1941
- if csv.last =~ @parsers[:stray_quote]
1942
- raise MalformedCSVError,
1943
- "Missing or stray quote in line #{lineno + 1}"
1279
+ if csv.last.match?(@parsers[:stray_quote])
1280
+ raise MalformedCSVError.new("Missing or stray quote",
1281
+ lineno + 1)
1944
1282
  end
1945
1283
  csv.last.gsub!(@double_quote_char, @quote_char)
1946
1284
  elsif @liberal_parsing
1947
1285
  csv << part
1948
1286
  else
1949
- raise MalformedCSVError,
1950
- "Missing or stray quote in line #{lineno + 1}"
1287
+ raise MalformedCSVError.new("Missing or stray quote",
1288
+ lineno + 1)
1951
1289
  end
1952
- elsif part =~ @parsers[:quote_or_nl]
1290
+ elsif part.match?(@parsers[:quote_or_nl])
1953
1291
  # Unquoted field with bad characters.
1954
- if part =~ @parsers[:nl_or_lf]
1955
- raise MalformedCSVError, "Unquoted fields do not allow " +
1956
- "\\r or \\n (line #{lineno + 1})."
1292
+ if part.match?(@parsers[:nl_or_lf])
1293
+ message = "Unquoted fields do not allow \\r or \\n"
1294
+ raise MalformedCSVError.new(message, lineno + 1)
1957
1295
  else
1958
1296
  if @liberal_parsing
1959
1297
  csv << part
1960
1298
  else
1961
- raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1299
+ raise MalformedCSVError.new("Illegal quoting", lineno + 1)
1962
1300
  end
1963
1301
  end
1964
1302
  else
@@ -1974,10 +1312,11 @@ class CSV
1974
1312
  if in_extended_col
1975
1313
  # if we're at eof?(), a quoted field wasn't closed...
1976
1314
  if @io.eof?
1977
- raise MalformedCSVError,
1978
- "Unclosed quoted field on line #{lineno + 1}."
1315
+ raise MalformedCSVError.new("Unclosed quoted field",
1316
+ lineno + 1)
1979
1317
  elsif @field_size_limit and csv.last.size >= @field_size_limit
1980
- raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1318
+ raise MalformedCSVError.new("Field size exceeded",
1319
+ lineno + 1)
1981
1320
  end
1982
1321
  # otherwise, we need to loop and pull some more data to complete the row
1983
1322
  else
@@ -1986,10 +1325,13 @@ class CSV
1986
1325
  # save fields unconverted fields, if needed...
1987
1326
  unconverted = csv.dup if @unconverted_fields
1988
1327
 
1989
- # convert fields, if needed...
1990
- csv = convert_fields(csv) unless @use_headers or @converters.empty?
1991
- # parse out header rows and handle CSV::Row conversions...
1992
- csv = parse_headers(csv) if @use_headers
1328
+ if @use_headers
1329
+ # parse out header rows and handle CSV::Row conversions...
1330
+ csv = parse_headers(csv)
1331
+ else
1332
+ # convert fields, if needed...
1333
+ csv = convert_fields(csv)
1334
+ end
1993
1335
 
1994
1336
  # inject unconverted fields and accessor, if requested...
1995
1337
  if @unconverted_fields and not csv.respond_to? :unconverted_fields
@@ -2045,6 +1387,21 @@ class CSV
2045
1387
 
2046
1388
  private
2047
1389
 
1390
+ def determine_encoding(encoding, internal_encoding)
1391
+ # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1392
+ io_encoding = raw_encoding(nil)
1393
+ return io_encoding if io_encoding
1394
+
1395
+ return Encoding.find(internal_encoding) if internal_encoding
1396
+
1397
+ if encoding
1398
+ encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1399
+ return Encoding.find(encoding)
1400
+ end
1401
+
1402
+ Encoding.default_internal || Encoding.default_external
1403
+ end
1404
+
2048
1405
  #
2049
1406
  # Stores the indicated separators for later use.
2050
1407
  #
@@ -2058,6 +1415,11 @@ class CSV
2058
1415
  def init_separators(col_sep, row_sep, quote_char, force_quotes)
2059
1416
  # store the selected separators
2060
1417
  @col_sep = col_sep.to_s.encode(@encoding)
1418
+ if @col_sep == " "
1419
+ @col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
1420
+ else
1421
+ @col_sep_split_separator = @col_sep
1422
+ end
2061
1423
  @row_sep = row_sep # encode after resolving :auto
2062
1424
  @quote_char = quote_char.to_s.encode(@encoding)
2063
1425
  @double_quote_char = @quote_char * 2
@@ -2087,15 +1449,28 @@ class CSV
2087
1449
  # (ensure will set default value)
2088
1450
  #
2089
1451
  break unless sample = @io.gets(nil, 1024)
1452
+
1453
+ cr = encode_str("\r")
1454
+ lf = encode_str("\n")
2090
1455
  # extend sample if we're unsure of the line ending
2091
- if sample.end_with? encode_str("\r")
1456
+ if sample.end_with?(cr)
2092
1457
  sample << (@io.gets(nil, 1) || "")
2093
1458
  end
2094
1459
 
2095
1460
  # try to find a standard separator
2096
- if sample =~ encode_re("\r\n?|\n")
2097
- @row_sep = $&
2098
- break
1461
+ sample.each_char.each_cons(2) do |char, next_char|
1462
+ case char
1463
+ when cr
1464
+ if next_char == lf
1465
+ @row_sep = encode_str("\r\n")
1466
+ else
1467
+ @row_sep = cr
1468
+ end
1469
+ break
1470
+ when lf
1471
+ @row_sep = lf
1472
+ break
1473
+ end
2099
1474
  end
2100
1475
  end
2101
1476
 
@@ -2249,10 +1624,24 @@ class CSV
2249
1624
  # shortcut.
2250
1625
  #
2251
1626
  def convert_fields(fields, headers = false)
2252
- # see if we are converting headers or fields
2253
- converters = headers ? @header_converters : @converters
1627
+ if headers
1628
+ converters = @header_converters
1629
+ else
1630
+ converters = @converters
1631
+ if !@use_headers and
1632
+ converters.empty? and
1633
+ @nil_value.nil? and
1634
+ @empty_value_is_empty_string
1635
+ return fields
1636
+ end
1637
+ end
2254
1638
 
2255
1639
  fields.map.with_index do |field, index|
1640
+ if field.nil?
1641
+ field = @nil_value
1642
+ elsif field.empty?
1643
+ field = @empty_value unless @empty_value_is_empty_string
1644
+ end
2256
1645
  converters.each do |converter|
2257
1646
  break if headers && field.nil?
2258
1647
  field = if converter.arity == 1 # straight field converter
@@ -2384,5 +1773,6 @@ def CSV(*args, &block)
2384
1773
  CSV.instance(*args, &block)
2385
1774
  end
2386
1775
 
2387
- require_relative "core_ext/array"
2388
- require_relative "core_ext/string"
1776
+ require_relative "csv/version"
1777
+ require_relative "csv/core_ext/array"
1778
+ require_relative "csv/core_ext/string"