csv 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 05b9101e168faf6acec7dd2c841e6a30084e3585
4
- data.tar.gz: ad75132c6ed63c2cf95c16b6dbf948d76d83d69f
2
+ SHA256:
3
+ metadata.gz: 12a1d0b486cc212d0b8cf8a044bc20b5bc4c2ae6d6d2c41633174ac8cbceb5d5
4
+ data.tar.gz: '06866abe09381a2a0af2bc36e325a6c8a83937231e446ed3ec4f1e3cb5ea4fd7'
5
5
  SHA512:
6
- metadata.gz: 45c5498cf70618cd26ddf0acd17db083beec0b9a532adf7279cc5cb830f400a1ae0b291e6492f4191020b97a427c46c019aa97fbfb83aaded9fc66b920015c26
7
- data.tar.gz: 5dd1c842d29df9e2c89da50b2ebbbed42e6386d05f3e5f76403b5d977e13a485b9978c815d418246798e4a4e72bf62995476ca4e9ea40095cc80f73319b30d7b
6
+ metadata.gz: 0bab4fe5e9e6612fda6041a86d10e88f8aacde4e97c87dd8306c3bbeed2db661cc45163ad12822af44395366e616f2bf3308fa37ea114d671401ef664a7204eb
7
+ data.tar.gz: d661fe106e45b04098fb3ac6e77418f73ec2f5cfc4117d3ba77666c317bca121853fd7bd3e9b71cad982b8c8eae61ccc1f8dc2fd4a84249b1c85d13c23af3df7
@@ -1,4 +1,15 @@
1
- Copyright (C) 1993-2013 Yukihiro Matsumoto. All rights reserved.
1
+ Copyright (C) 2005-2016 James Edward Gray II. All rights reserved.
2
+ Copyright (C) 2007-2017 Yukihiro Matsumoto. All rights reserved.
3
+ Copyright (C) 2017 SHIBATA Hiroshi. All rights reserved.
4
+ Copyright (C) 2017 Olivier Lacan. All rights reserved.
5
+ Copyright (C) 2017 Espartaco Palma. All rights reserved.
6
+ Copyright (C) 2017 Marcus Stollsteimer. All rights reserved.
7
+ Copyright (C) 2017 pavel. All rights reserved.
8
+ Copyright (C) 2017-2018 Steven Daniels. All rights reserved.
9
+ Copyright (C) 2018 Tomohiro Ogoke. All rights reserved.
10
+ Copyright (C) 2018 Kouhei Sutou. All rights reserved.
11
+ Copyright (C) 2018 Mitsutaka Mimura. All rights reserved.
12
+ Copyright (C) 2018 Vladislav. All rights reserved.
2
13
 
3
14
  Redistribution and use in source and binary forms, with or without
4
15
  modification, are permitted provided that the following conditions
data/README.md CHANGED
@@ -40,7 +40,13 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
40
40
 
41
41
  Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/csv.
42
42
 
43
+ ### NOTE: About RuboCop
44
+
45
+ We don't use RuboCop because we can manage our coding style by ourselves. We want to accept small fluctuations in our coding style because we use Ruby.
46
+ Please do not submit issues and PRs that aim to introduce RuboCop in this repository.
43
47
 
44
48
  ## License
45
49
 
46
50
  The gem is available as open source under the terms of the [2-Clause BSD License](https://opensource.org/licenses/BSD-2-Clause).
51
+
52
+ See LICENSE.txt for details.
data/lib/csv.rb CHANGED
@@ -2,9 +2,7 @@
2
2
  # frozen_string_literal: true
3
3
  # = csv.rb -- CSV Reading and Writing
4
4
  #
5
- # Created by James Edward Gray II on 2005-10-31.
6
- # Copyright 2005 James Edward Gray II. You can redistribute or modify this code
7
- # under the terms of Ruby's license.
5
+ # Created by James Edward Gray II on 2005-10-31.
8
6
  #
9
7
  # See CSV for documentation.
10
8
  #
@@ -95,74 +93,146 @@ require "forwardable"
95
93
  require "English"
96
94
  require "date"
97
95
  require "stringio"
96
+ require_relative "csv/table"
97
+ require_relative "csv/row"
98
+
99
+ # This provides String#match? and Regexp#match? for Ruby 2.3.
100
+ unless String.method_defined?(:match?)
101
+ class CSV
102
+ module MatchP
103
+ refine String do
104
+ def match?(pattern)
105
+ self =~ pattern
106
+ end
107
+ end
108
+
109
+ refine Regexp do
110
+ def match?(string)
111
+ self =~ string
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ using CSV::MatchP
118
+ end
98
119
 
99
120
  #
100
121
  # This class provides a complete interface to CSV files and data. It offers
101
122
  # tools to enable you to read and write to and from Strings or IO objects, as
102
123
  # needed.
103
124
  #
104
- # == Reading
125
+ # The most generic interface of a class is:
105
126
  #
106
- # === From a File
127
+ # csv = CSV.new(string_or_io, **options)
107
128
  #
108
- # ==== A Line at a Time
129
+ # # Reading: IO object should be open for read
130
+ # csv.read # => array of rows
131
+ # # or
132
+ # csv.each do |row|
133
+ # # ...
134
+ # end
135
+ # # or
136
+ # row = csv.shift
109
137
  #
110
- # CSV.foreach("path/to/file.csv") do |row|
111
- # # use row here...
112
- # end
138
+ # # Writing: IO object should be open for write
139
+ # csv << row
113
140
  #
114
- # ==== All at Once
141
+ # There are several specialized class methods for one-statement reading or writing,
142
+ # described in the Specialized Methods section.
115
143
  #
116
- # arr_of_arrs = CSV.read("path/to/file.csv")
144
+ # If a String passed into ::new, it is internally wrapped into a StringIO object.
117
145
  #
118
- # === From a String
146
+ # +options+ can be used for specifying the particular CSV flavor (column
147
+ # separators, row separators, value quoting and so on), and for data conversion,
148
+ # see Data Conversion section for the description of the latter.
119
149
  #
120
- # ==== A Line at a Time
150
+ # == Specialized Methods
121
151
  #
122
- # CSV.parse("CSV,data,String") do |row|
123
- # # use row here...
124
- # end
152
+ # === Reading
125
153
  #
126
- # ==== All at Once
127
- #
128
- # arr_of_arrs = CSV.parse("CSV,data,String")
154
+ # # From a file: all at once
155
+ # arr_of_rows = CSV.read("path/to/file.csv", **options)
156
+ # # iterator-style:
157
+ # CSV.foreach("path/to/file.csv", **options) do |row|
158
+ # # ...
159
+ # end
129
160
  #
130
- # == Writing
161
+ # # From a string
162
+ # arr_of_rows = CSV.parse("CSV,data,String", **options)
163
+ # # or
164
+ # CSV.parse("CSV,data,String", **options) do |row|
165
+ # # ...
166
+ # end
131
167
  #
132
- # === To a File
168
+ # === Writing
133
169
  #
170
+ # # To a file
134
171
  # CSV.open("path/to/file.csv", "wb") do |csv|
135
172
  # csv << ["row", "of", "CSV", "data"]
136
173
  # csv << ["another", "row"]
137
174
  # # ...
138
175
  # end
139
176
  #
140
- # === To a String
141
- #
177
+ # # To a String
142
178
  # csv_string = CSV.generate do |csv|
143
179
  # csv << ["row", "of", "CSV", "data"]
144
180
  # csv << ["another", "row"]
145
181
  # # ...
146
182
  # end
147
183
  #
148
- # == Convert a Single Line
184
+ # === Shortcuts
149
185
  #
186
+ # # Core extensions for converting one line
150
187
  # csv_string = ["CSV", "data"].to_csv # to CSV
151
188
  # csv_array = "CSV,String".parse_csv # from CSV
152
189
  #
153
- # == Shortcut Interface
154
- #
190
+ # # CSV() method
155
191
  # CSV { |csv_out| csv_out << %w{my data here} } # to $stdout
156
192
  # CSV(csv = "") { |csv_str| csv_str << %w{my data here} } # to a String
157
193
  # CSV($stderr) { |csv_err| csv_err << %w{my data here} } # to $stderr
158
194
  # CSV($stdin) { |csv_in| csv_in.each { |row| p row } } # from $stdin
159
195
  #
160
- # == Advanced Usage
196
+ # == Data Conversion
197
+ #
198
+ # === CSV with headers
199
+ #
200
+ # CSV allows to specify column names of CSV file, whether they are in data, or
201
+ # provided separately. If headers specified, reading methods return an instance
202
+ # of CSV::Table, consisting of CSV::Row.
203
+ #
204
+ # # Headers are part of data
205
+ # data = CSV.parse(<<~ROWS, headers: true)
206
+ # Name,Department,Salary
207
+ # Bob,Engeneering,1000
208
+ # Jane,Sales,2000
209
+ # John,Management,5000
210
+ # ROWS
161
211
  #
162
- # === Wrap an IO Object
212
+ # data.class #=> CSV::Table
213
+ # data.first #=> #<CSV::Row "Name":"Bob" "Department":"Engeneering" "Salary":"1000">
214
+ # data.first.to_h #=> {"Name"=>"Bob", "Department"=>"Engeneering", "Salary"=>"1000"}
163
215
  #
164
- # csv = CSV.new(io, options)
165
- # # ... read (with gets() or each()) from and write (with <<) to csv here ...
216
+ # # Headers provided by developer
217
+ # data = CSV.parse('Bob,Engeneering,1000', headers: %i[name department salary])
218
+ # data.first #=> #<CSV::Row name:"Bob" department:"Engeneering" salary:"1000">
219
+ #
220
+ # === Typed data reading
221
+ #
222
+ # CSV allows to provide a set of data _converters_ e.g. transformations to try on input
223
+ # data. Converter could be a symbol from CSV::Converters constant's keys, or lambda.
224
+ #
225
+ # # Without any converters:
226
+ # CSV.parse('Bob,2018-03-01,100')
227
+ # #=> [["Bob", "2018-03-01", "100"]]
228
+ #
229
+ # # With built-in converters:
230
+ # CSV.parse('Bob,2018-03-01,100', converters: %i[numeric date])
231
+ # #=> [["Bob", #<Date: 2018-03-01>, 100]]
232
+ #
233
+ # # With custom converters:
234
+ # CSV.parse('Bob,2018-03-01,100', converters: [->(v) { Time.parse(v) rescue v }])
235
+ # #=> [["Bob", 2018-03-01 00:00:00 +0200, "100"]]
166
236
  #
167
237
  # == CSV and Character Encodings (M17n or Multilingualization)
168
238
  #
@@ -207,757 +277,17 @@ require "stringio"
207
277
  # find with it.
208
278
  #
209
279
  class CSV
210
- # The version of the installed library.
211
- VERSION = "1.0.1"
212
-
213
- #
214
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
215
- # and allows duplicates just as an Array would, but also allows you to access
216
- # fields by name just as you could if they were in a Hash.
217
- #
218
- # All rows returned by CSV will be constructed from this class, if header row
219
- # processing is activated.
220
- #
221
- class Row
222
- #
223
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
224
- # to be Arrays. If one Array is shorter than the other, it will be padded
225
- # with +nil+ objects.
226
- #
227
- # The optional +header_row+ parameter can be set to +true+ to indicate, via
228
- # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
229
- # row. Otherwise, the row is assumes to be a field row.
230
- #
231
- # A CSV::Row object supports the following Array methods through delegation:
232
- #
233
- # * empty?()
234
- # * length()
235
- # * size()
236
- #
237
- def initialize(headers, fields, header_row = false)
238
- @header_row = header_row
239
- headers.each { |h| h.freeze if h.is_a? String }
240
-
241
- # handle extra headers or fields
242
- @row = if headers.size >= fields.size
243
- headers.zip(fields)
244
- else
245
- fields.zip(headers).each(&:reverse!)
246
- end
247
- end
248
-
249
- # Internal data format used to compare equality.
250
- attr_reader :row
251
- protected :row
252
-
253
- ### Array Delegation ###
254
-
255
- extend Forwardable
256
- def_delegators :@row, :empty?, :length, :size
257
-
258
- # Returns +true+ if this is a header row.
259
- def header_row?
260
- @header_row
261
- end
262
-
263
- # Returns +true+ if this is a field row.
264
- def field_row?
265
- not header_row?
266
- end
267
-
268
- # Returns the headers of this row.
269
- def headers
270
- @row.map(&:first)
271
- end
272
-
273
- #
274
- # :call-seq:
275
- # field( header )
276
- # field( header, offset )
277
- # field( index )
278
- #
279
- # This method will return the field value by +header+ or +index+. If a field
280
- # is not found, +nil+ is returned.
281
- #
282
- # When provided, +offset+ ensures that a header match occurs on or later
283
- # than the +offset+ index. You can use this to find duplicate headers,
284
- # without resorting to hard-coding exact indices.
285
- #
286
- def field(header_or_index, minimum_index = 0)
287
- # locate the pair
288
- finder = (header_or_index.is_a?(Integer) || header_or_index.is_a?(Range)) ? :[] : :assoc
289
- pair = @row[minimum_index..-1].send(finder, header_or_index)
290
-
291
- # return the field if we have a pair
292
- if pair.nil?
293
- nil
294
- else
295
- header_or_index.is_a?(Range) ? pair.map(&:last) : pair.last
296
- end
297
- end
298
- alias_method :[], :field
299
-
300
- #
301
- # :call-seq:
302
- # fetch( header )
303
- # fetch( header ) { |row| ... }
304
- # fetch( header, default )
305
- #
306
- # This method will fetch the field value by +header+. It has the same
307
- # behavior as Hash#fetch: if there is a field with the given +header+, its
308
- # value is returned. Otherwise, if a block is given, it is yielded the
309
- # +header+ and its result is returned; if a +default+ is given as the
310
- # second argument, it is returned; otherwise a KeyError is raised.
311
- #
312
- def fetch(header, *varargs)
313
- raise ArgumentError, "Too many arguments" if varargs.length > 1
314
- pair = @row.assoc(header)
315
- if pair
316
- pair.last
317
- else
318
- if block_given?
319
- yield header
320
- elsif varargs.empty?
321
- raise KeyError, "key not found: #{header}"
322
- else
323
- varargs.first
324
- end
325
- end
326
- end
327
-
328
- # Returns +true+ if there is a field with the given +header+.
329
- def has_key?(header)
330
- !!@row.assoc(header)
331
- end
332
- alias_method :include?, :has_key?
333
- alias_method :key?, :has_key?
334
- alias_method :member?, :has_key?
335
-
336
- #
337
- # :call-seq:
338
- # []=( header, value )
339
- # []=( header, offset, value )
340
- # []=( index, value )
341
- #
342
- # Looks up the field by the semantics described in CSV::Row.field() and
343
- # assigns the +value+.
344
- #
345
- # Assigning past the end of the row with an index will set all pairs between
346
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
347
- # pair.
348
- #
349
- def []=(*args)
350
- value = args.pop
351
-
352
- if args.first.is_a? Integer
353
- if @row[args.first].nil? # extending past the end with index
354
- @row[args.first] = [nil, value]
355
- @row.map! { |pair| pair.nil? ? [nil, nil] : pair }
356
- else # normal index assignment
357
- @row[args.first][1] = value
358
- end
359
- else
360
- index = index(*args)
361
- if index.nil? # appending a field
362
- self << [args.first, value]
363
- else # normal header assignment
364
- @row[index][1] = value
365
- end
366
- end
367
- end
368
-
369
- #
370
- # :call-seq:
371
- # <<( field )
372
- # <<( header_and_field_array )
373
- # <<( header_and_field_hash )
374
- #
375
- # If a two-element Array is provided, it is assumed to be a header and field
376
- # and the pair is appended. A Hash works the same way with the key being
377
- # the header and the value being the field. Anything else is assumed to be
378
- # a lone field which is appended with a +nil+ header.
379
- #
380
- # This method returns the row for chaining.
381
- #
382
- def <<(arg)
383
- if arg.is_a?(Array) and arg.size == 2 # appending a header and name
384
- @row << arg
385
- elsif arg.is_a?(Hash) # append header and name pairs
386
- arg.each { |pair| @row << pair }
387
- else # append field value
388
- @row << [nil, arg]
389
- end
390
-
391
- self # for chaining
392
- end
393
-
394
- #
395
- # A shortcut for appending multiple fields. Equivalent to:
396
- #
397
- # args.each { |arg| csv_row << arg }
398
- #
399
- # This method returns the row for chaining.
400
- #
401
- def push(*args)
402
- args.each { |arg| self << arg }
403
-
404
- self # for chaining
405
- end
406
-
407
- #
408
- # :call-seq:
409
- # delete( header )
410
- # delete( header, offset )
411
- # delete( index )
412
- #
413
- # Used to remove a pair from the row by +header+ or +index+. The pair is
414
- # located as described in CSV::Row.field(). The deleted pair is returned,
415
- # or +nil+ if a pair could not be found.
416
- #
417
- def delete(header_or_index, minimum_index = 0)
418
- if header_or_index.is_a? Integer # by index
419
- @row.delete_at(header_or_index)
420
- elsif i = index(header_or_index, minimum_index) # by header
421
- @row.delete_at(i)
422
- else
423
- [ ]
424
- end
425
- end
426
-
427
- #
428
- # The provided +block+ is passed a header and field for each pair in the row
429
- # and expected to return +true+ or +false+, depending on whether the pair
430
- # should be deleted.
431
- #
432
- # This method returns the row for chaining.
433
- #
434
- # If no block is given, an Enumerator is returned.
435
- #
436
- def delete_if(&block)
437
- return enum_for(__method__) { size } unless block_given?
438
-
439
- @row.delete_if(&block)
440
-
441
- self # for chaining
442
- end
443
-
444
- #
445
- # This method accepts any number of arguments which can be headers, indices,
446
- # Ranges of either, or two-element Arrays containing a header and offset.
447
- # Each argument will be replaced with a field lookup as described in
448
- # CSV::Row.field().
449
- #
450
- # If called with no arguments, all fields are returned.
451
- #
452
- def fields(*headers_and_or_indices)
453
- if headers_and_or_indices.empty? # return all fields--no arguments
454
- @row.map(&:last)
455
- else # or work like values_at()
456
- all = []
457
- headers_and_or_indices.each do |h_or_i|
458
- if h_or_i.is_a? Range
459
- index_begin = h_or_i.begin.is_a?(Integer) ? h_or_i.begin :
460
- index(h_or_i.begin)
461
- index_end = h_or_i.end.is_a?(Integer) ? h_or_i.end :
462
- index(h_or_i.end)
463
- new_range = h_or_i.exclude_end? ? (index_begin...index_end) :
464
- (index_begin..index_end)
465
- all.concat(fields.values_at(new_range))
466
- else
467
- all << field(*Array(h_or_i))
468
- end
469
- end
470
- return all
471
- end
472
- end
473
- alias_method :values_at, :fields
474
-
475
- #
476
- # :call-seq:
477
- # index( header )
478
- # index( header, offset )
479
- #
480
- # This method will return the index of a field with the provided +header+.
481
- # The +offset+ can be used to locate duplicate header names, as described in
482
- # CSV::Row.field().
483
- #
484
- def index(header, minimum_index = 0)
485
- # find the pair
486
- index = headers[minimum_index..-1].index(header)
487
- # return the index at the right offset, if we found one
488
- index.nil? ? nil : index + minimum_index
489
- end
490
-
491
- # Returns +true+ if +name+ is a header for this row, and +false+ otherwise.
492
- def header?(name)
493
- headers.include? name
494
- end
495
- alias_method :include?, :header?
496
-
497
- #
498
- # Returns +true+ if +data+ matches a field in this row, and +false+
499
- # otherwise.
500
- #
501
- def field?(data)
502
- fields.include? data
503
- end
504
-
505
- include Enumerable
506
-
507
- #
508
- # Yields each pair of the row as header and field tuples (much like
509
- # iterating over a Hash). This method returns the row for chaining.
510
- #
511
- # If no block is given, an Enumerator is returned.
512
- #
513
- # Support for Enumerable.
514
- #
515
- def each(&block)
516
- return enum_for(__method__) { size } unless block_given?
517
-
518
- @row.each(&block)
519
-
520
- self # for chaining
521
- end
522
-
523
- #
524
- # Returns +true+ if this row contains the same headers and fields in the
525
- # same order as +other+.
526
- #
527
- def ==(other)
528
- return @row == other.row if other.is_a? CSV::Row
529
- @row == other
530
- end
531
-
532
- #
533
- # Collapses the row into a simple Hash. Be warned that this discards field
534
- # order and clobbers duplicate fields.
535
- #
536
- def to_hash
537
- @row.to_h
538
- end
539
-
540
- #
541
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
542
- #
543
- # csv_row.fields.to_csv( options )
544
- #
545
- def to_csv(**options)
546
- fields.to_csv(options)
547
- end
548
- alias_method :to_s, :to_csv
549
280
 
550
- #
551
- # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step,
552
- # returning nil if any intermediate step is nil.
553
- #
554
- def dig(index_or_header, *indexes)
555
- value = field(index_or_header)
556
- if value.nil?
557
- nil
558
- elsif indexes.empty?
559
- value
560
- else
561
- unless value.respond_to?(:dig)
562
- raise TypeError, "#{value.class} does not have \#dig method"
563
- end
564
- value.dig(*indexes)
565
- end
566
- end
567
-
568
- # A summary of fields, by header, in an ASCII compatible String.
569
- def inspect
570
- str = ["#<", self.class.to_s]
571
- each do |header, field|
572
- str << " " << (header.is_a?(Symbol) ? header.to_s : header.inspect) <<
573
- ":" << field.inspect
574
- end
575
- str << ">"
576
- begin
577
- str.join('')
578
- rescue # any encoding error
579
- str.map do |s|
580
- e = Encoding::Converter.asciicompat_encoding(s.encoding)
581
- e ? s.encode(e) : s.force_encoding("ASCII-8BIT")
582
- end.join('')
583
- end
584
- end
585
- end
586
-
587
- #
588
- # A CSV::Table is a two-dimensional data structure for representing CSV
589
- # documents. Tables allow you to work with the data by row or column,
590
- # manipulate the data, and even convert the results back to CSV, if needed.
591
- #
592
- # All tables returned by CSV will be constructed from this class, if header
593
- # row processing is activated.
594
- #
595
- class Table
596
- #
597
- # Construct a new CSV::Table from +array_of_rows+, which are expected
598
- # to be CSV::Row objects. All rows are assumed to have the same headers.
599
- #
600
- # A CSV::Table object supports the following Array methods through
601
- # delegation:
602
- #
603
- # * empty?()
604
- # * length()
605
- # * size()
606
- #
607
- def initialize(array_of_rows)
608
- @table = array_of_rows
609
- @mode = :col_or_row
610
- end
611
-
612
- # The current access mode for indexing and iteration.
613
- attr_reader :mode
614
-
615
- # Internal data format used to compare equality.
616
- attr_reader :table
617
- protected :table
618
-
619
- ### Array Delegation ###
620
-
621
- extend Forwardable
622
- def_delegators :@table, :empty?, :length, :size
623
-
624
- #
625
- # Returns a duplicate table object, in column mode. This is handy for
626
- # chaining in a single call without changing the table mode, but be aware
627
- # that this method can consume a fair amount of memory for bigger data sets.
628
- #
629
- # This method returns the duplicate table for chaining. Don't chain
630
- # destructive methods (like []=()) this way though, since you are working
631
- # with a duplicate.
632
- #
633
- def by_col
634
- self.class.new(@table.dup).by_col!
635
- end
636
-
637
- #
638
- # Switches the mode of this table to column mode. All calls to indexing and
639
- # iteration methods will work with columns until the mode is changed again.
640
- #
641
- # This method returns the table and is safe to chain.
642
- #
643
- def by_col!
644
- @mode = :col
645
-
646
- self
647
- end
648
-
649
- #
650
- # Returns a duplicate table object, in mixed mode. This is handy for
651
- # chaining in a single call without changing the table mode, but be aware
652
- # that this method can consume a fair amount of memory for bigger data sets.
653
- #
654
- # This method returns the duplicate table for chaining. Don't chain
655
- # destructive methods (like []=()) this way though, since you are working
656
- # with a duplicate.
657
- #
658
- def by_col_or_row
659
- self.class.new(@table.dup).by_col_or_row!
660
- end
661
-
662
- #
663
- # Switches the mode of this table to mixed mode. All calls to indexing and
664
- # iteration methods will use the default intelligent indexing system until
665
- # the mode is changed again. In mixed mode an index is assumed to be a row
666
- # reference while anything else is assumed to be column access by headers.
667
- #
668
- # This method returns the table and is safe to chain.
669
- #
670
- def by_col_or_row!
671
- @mode = :col_or_row
672
-
673
- self
674
- end
675
-
676
- #
677
- # Returns a duplicate table object, in row mode. This is handy for chaining
678
- # in a single call without changing the table mode, but be aware that this
679
- # method can consume a fair amount of memory for bigger data sets.
680
- #
681
- # This method returns the duplicate table for chaining. Don't chain
682
- # destructive methods (like []=()) this way though, since you are working
683
- # with a duplicate.
684
- #
685
- def by_row
686
- self.class.new(@table.dup).by_row!
687
- end
688
-
689
- #
690
- # Switches the mode of this table to row mode. All calls to indexing and
691
- # iteration methods will work with rows until the mode is changed again.
692
- #
693
- # This method returns the table and is safe to chain.
694
- #
695
- def by_row!
696
- @mode = :row
697
-
698
- self
699
- end
700
-
701
- #
702
- # Returns the headers for the first row of this table (assumed to match all
703
- # other rows). An empty Array is returned for empty tables.
704
- #
705
- def headers
706
- if @table.empty?
707
- Array.new
708
- else
709
- @table.first.headers
710
- end
711
- end
712
-
713
- #
714
- # In the default mixed mode, this method returns rows for index access and
715
- # columns for header access. You can force the index association by first
716
- # calling by_col!() or by_row!().
717
- #
718
- # Columns are returned as an Array of values. Altering that Array has no
719
- # effect on the table.
720
- #
721
- def [](index_or_header)
722
- if @mode == :row or # by index
723
- (@mode == :col_or_row and (index_or_header.is_a?(Integer) or index_or_header.is_a?(Range)))
724
- @table[index_or_header]
725
- else # by header
726
- @table.map { |row| row[index_or_header] }
727
- end
728
- end
729
-
730
- #
731
- # In the default mixed mode, this method assigns rows for index access and
732
- # columns for header access. You can force the index association by first
733
- # calling by_col!() or by_row!().
734
- #
735
- # Rows may be set to an Array of values (which will inherit the table's
736
- # headers()) or a CSV::Row.
737
- #
738
- # Columns may be set to a single value, which is copied to each row of the
739
- # column, or an Array of values. Arrays of values are assigned to rows top
740
- # to bottom in row major order. Excess values are ignored and if the Array
741
- # does not have a value for each row the extra rows will receive a +nil+.
742
- #
743
- # Assigning to an existing column or row clobbers the data. Assigning to
744
- # new columns creates them at the right end of the table.
745
- #
746
- def []=(index_or_header, value)
747
- if @mode == :row or # by index
748
- (@mode == :col_or_row and index_or_header.is_a? Integer)
749
- if value.is_a? Array
750
- @table[index_or_header] = Row.new(headers, value)
751
- else
752
- @table[index_or_header] = value
753
- end
754
- else # set column
755
- if value.is_a? Array # multiple values
756
- @table.each_with_index do |row, i|
757
- if row.header_row?
758
- row[index_or_header] = index_or_header
759
- else
760
- row[index_or_header] = value[i]
761
- end
762
- end
763
- else # repeated value
764
- @table.each do |row|
765
- if row.header_row?
766
- row[index_or_header] = index_or_header
767
- else
768
- row[index_or_header] = value
769
- end
770
- end
771
- end
772
- end
773
- end
774
-
775
- #
776
- # The mixed mode default is to treat a list of indices as row access,
777
- # returning the rows indicated. Anything else is considered columnar
778
- # access. For columnar access, the return set has an Array for each row
779
- # with the values indicated by the headers in each Array. You can force
780
- # column or row mode using by_col!() or by_row!().
781
- #
782
- # You cannot mix column and row access.
783
- #
784
- def values_at(*indices_or_headers)
785
- if @mode == :row or # by indices
786
- ( @mode == :col_or_row and indices_or_headers.all? do |index|
787
- index.is_a?(Integer) or
788
- ( index.is_a?(Range) and
789
- index.first.is_a?(Integer) and
790
- index.last.is_a?(Integer) )
791
- end )
792
- @table.values_at(*indices_or_headers)
793
- else # by headers
794
- @table.map { |row| row.values_at(*indices_or_headers) }
795
- end
796
- end
797
-
798
- #
799
- # Adds a new row to the bottom end of this table. You can provide an Array,
800
- # which will be converted to a CSV::Row (inheriting the table's headers()),
801
- # or a CSV::Row.
802
- #
803
- # This method returns the table for chaining.
804
- #
805
- def <<(row_or_array)
806
- if row_or_array.is_a? Array # append Array
807
- @table << Row.new(headers, row_or_array)
808
- else # append Row
809
- @table << row_or_array
810
- end
811
-
812
- self # for chaining
813
- end
814
-
815
- #
816
- # A shortcut for appending multiple rows. Equivalent to:
817
- #
818
- # rows.each { |row| self << row }
819
- #
820
- # This method returns the table for chaining.
821
- #
822
- def push(*rows)
823
- rows.each { |row| self << row }
824
-
825
- self # for chaining
826
- end
827
-
828
- #
829
- # Removes and returns the indicated columns or rows. In the default mixed
830
- # mode indices refer to rows and everything else is assumed to be a column
831
- # headers. Use by_col!() or by_row!() to force the lookup.
832
- #
833
- def delete(*indexes_or_headers)
834
- if indexes_or_headers.empty?
835
- raise ArgumentError, "wrong number of arguments (given 0, expected 1+)"
836
- end
837
- deleted_values = indexes_or_headers.map do |index_or_header|
838
- if @mode == :row or # by index
839
- (@mode == :col_or_row and index_or_header.is_a? Integer)
840
- @table.delete_at(index_or_header)
841
- else # by header
842
- @table.map { |row| row.delete(index_or_header).last }
843
- end
844
- end
845
- if indexes_or_headers.size == 1
846
- deleted_values[0]
847
- else
848
- deleted_values
849
- end
850
- end
851
-
852
- #
853
- # Removes any column or row for which the block returns +true+. In the
854
- # default mixed mode or row mode, iteration is the standard row major
855
- # walking of rows. In column mode, iteration will +yield+ two element
856
- # tuples containing the column name and an Array of values for that column.
857
- #
858
- # This method returns the table for chaining.
859
- #
860
- # If no block is given, an Enumerator is returned.
861
- #
862
- def delete_if(&block)
863
- return enum_for(__method__) { @mode == :row or @mode == :col_or_row ? size : headers.size } unless block_given?
864
-
865
- if @mode == :row or @mode == :col_or_row # by index
866
- @table.delete_if(&block)
867
- else # by header
868
- deleted = []
869
- headers.each do |header|
870
- deleted << delete(header) if yield([header, self[header]])
871
- end
872
- end
873
-
874
- self # for chaining
875
- end
876
-
877
- include Enumerable
878
-
879
- #
880
- # In the default mixed mode or row mode, iteration is the standard row major
881
- # walking of rows. In column mode, iteration will +yield+ two element
882
- # tuples containing the column name and an Array of values for that column.
883
- #
884
- # This method returns the table for chaining.
885
- #
886
- # If no block is given, an Enumerator is returned.
887
- #
888
- def each(&block)
889
- return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given?
890
-
891
- if @mode == :col
892
- headers.each { |header| yield([header, self[header]]) }
893
- else
894
- @table.each(&block)
895
- end
896
-
897
- self # for chaining
898
- end
899
-
900
- # Returns +true+ if all rows of this table ==() +other+'s rows.
901
- def ==(other)
902
- return @table == other.table if other.is_a? CSV::Table
903
- @table == other
904
- end
905
-
906
- #
907
- # Returns the table as an Array of Arrays. Headers will be the first row,
908
- # then all of the field rows will follow.
909
- #
910
- def to_a
911
- array = [headers]
912
- @table.each do |row|
913
- array.push(row.fields) unless row.header_row?
914
- end
915
- return array
916
- end
917
-
918
- #
919
- # Returns the table as a complete CSV String. Headers will be listed first,
920
- # then all of the field rows.
921
- #
922
- # This method assumes you want the Table.headers(), unless you explicitly
923
- # pass <tt>:write_headers => false</tt>.
924
- #
925
- def to_csv(write_headers: true, **options)
926
- array = write_headers ? [headers.to_csv(options)] : []
927
- @table.each do |row|
928
- array.push(row.fields.to_csv(options)) unless row.header_row?
929
- end
930
- return array.join('')
931
- end
932
- alias_method :to_s, :to_csv
933
-
934
- #
935
- # Extracts the nested value specified by the sequence of +index+ or +header+ objects by calling dig at each step,
936
- # returning nil if any intermediate step is nil.
937
- #
938
- def dig(index_or_header, *index_or_headers)
939
- value = self[index_or_header]
940
- if value.nil?
941
- nil
942
- elsif index_or_headers.empty?
943
- value
944
- else
945
- unless value.respond_to?(:dig)
946
- raise TypeError, "#{value.class} does not have \#dig method"
947
- end
948
- value.dig(*index_or_headers)
949
- end
950
- end
951
-
952
- # Shows the mode and size of this table in a US-ASCII String.
953
- def inspect
954
- "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII")
281
+ # The error thrown when the parser encounters illegal CSV formatting.
282
+ class MalformedCSVError < RuntimeError
283
+ attr_reader :line_number
284
+ alias_method :lineno, :line_number
285
+ def initialize(message, line_number)
286
+ @line_number = line_number
287
+ super("#{message} in line #{line_number}.")
955
288
  end
956
289
  end
957
290
 
958
- # The error thrown when the parser encounters illegal CSV formatting.
959
- class MalformedCSVError < RuntimeError; end
960
-
961
291
  #
962
292
  # A FieldInfo Struct contains details about a field's position in the data
963
293
  # source it was read from. CSV will pass this Struct to some blocks that make
@@ -1020,7 +350,7 @@ class CSV
1020
350
  date: lambda { |f|
1021
351
  begin
1022
352
  e = f.encode(ConverterEncoding)
1023
- e =~ DateMatcher ? Date.parse(e) : f
353
+ e.match?(DateMatcher) ? Date.parse(e) : f
1024
354
  rescue # encoding conversion or date parse errors
1025
355
  f
1026
356
  end
@@ -1028,7 +358,7 @@ class CSV
1028
358
  date_time: lambda { |f|
1029
359
  begin
1030
360
  e = f.encode(ConverterEncoding)
1031
- e =~ DateTimeMatcher ? DateTime.parse(e) : f
361
+ e.match?(DateTimeMatcher) ? DateTime.parse(e) : f
1032
362
  rescue # encoding conversion or date parse errors
1033
363
  f
1034
364
  end
@@ -1321,7 +651,7 @@ class CSV
1321
651
  begin
1322
652
  f = File.open(filename, mode, file_opts)
1323
653
  rescue ArgumentError => e
1324
- raise unless /needs binmode/ =~ e.message and mode == "r"
654
+ raise unless /needs binmode/.match?(e.message) and mode == "r"
1325
655
  mode = "rb"
1326
656
  file_opts = {encoding: Encoding.default_external}.merge(file_opts)
1327
657
  retry
@@ -1560,6 +890,8 @@ class CSV
1560
890
  # attempt to parse input not conformant
1561
891
  # with RFC 4180, such as double quotes
1562
892
  # in unquoted fields.
893
+ # <b><tt>:nil_value</tt></b>:: TODO: WRITE ME.
894
+ # <b><tt>:empty_value</tt></b>:: TODO: WRITE ME.
1563
895
  #
1564
896
  # See CSV::DEFAULT_OPTIONS for the default settings.
1565
897
  #
@@ -1569,20 +901,14 @@ class CSV
1569
901
  def initialize(data, col_sep: ",", row_sep: :auto, quote_char: '"', field_size_limit: nil,
1570
902
  converters: nil, unconverted_fields: nil, headers: false, return_headers: false,
1571
903
  write_headers: nil, header_converters: nil, skip_blanks: false, force_quotes: false,
1572
- skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil)
904
+ skip_lines: nil, liberal_parsing: false, internal_encoding: nil, external_encoding: nil, encoding: nil,
905
+ nil_value: nil,
906
+ empty_value: "")
1573
907
  raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
1574
908
 
1575
909
  # create the IO object we will read from
1576
910
  @io = data.is_a?(String) ? StringIO.new(data) : data
1577
- # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1578
- internal_encoding = Encoding.find(internal_encoding) if internal_encoding
1579
- external_encoding = Encoding.find(external_encoding) if external_encoding
1580
- if encoding
1581
- encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1582
- encoding = Encoding.find(encoding)
1583
- end
1584
- @encoding = raw_encoding(nil) || internal_encoding || encoding ||
1585
- Encoding.default_internal || Encoding.default_external
911
+ @encoding = determine_encoding(encoding, internal_encoding)
1586
912
  #
1587
913
  # prepare for building safe regular expressions in the target encoding,
1588
914
  # if we can transcode the needed characters
@@ -1599,6 +925,10 @@ class CSV
1599
925
  # headers must be delayed until shift(), in case they need a row of content
1600
926
  @headers = nil
1601
927
 
928
+ @nil_value = nil_value
929
+ @empty_value = empty_value
930
+ @empty_value_is_empty_string = (empty_value == "")
931
+
1602
932
  init_separators(col_sep, row_sep, quote_char, force_quotes)
1603
933
  init_parsers(skip_blanks, field_size_limit, liberal_parsing)
1604
934
  init_converters(converters, :@converters, :convert)
@@ -1880,7 +1210,15 @@ class CSV
1880
1210
  @line = parse.clone
1881
1211
  end
1882
1212
 
1883
- parse.sub!(@parsers[:line_end], "")
1213
+ begin
1214
+ parse.sub!(@parsers[:line_end], "")
1215
+ rescue ArgumentError
1216
+ unless parse.valid_encoding?
1217
+ message = "Invalid byte sequence in #{parse.encoding}"
1218
+ raise MalformedCSVError.new(message, lineno + 1)
1219
+ end
1220
+ raise
1221
+ end
1884
1222
 
1885
1223
  if csv.empty?
1886
1224
  #
@@ -1903,7 +1241,7 @@ class CSV
1903
1241
 
1904
1242
  next if @skip_lines and @skip_lines.match parse
1905
1243
 
1906
- parts = parse.split(@col_sep, -1)
1244
+ parts = parse.split(@col_sep_split_separator, -1)
1907
1245
  if parts.empty?
1908
1246
  if in_extended_col
1909
1247
  csv[-1] << @col_sep # will be replaced with a @row_sep after the parts.each loop
@@ -1920,9 +1258,9 @@ class CSV
1920
1258
  if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0
1921
1259
  # extended column ends
1922
1260
  csv.last << part[0..-2]
1923
- if csv.last =~ @parsers[:stray_quote]
1924
- raise MalformedCSVError,
1925
- "Missing or stray quote in line #{lineno + 1}"
1261
+ if csv.last.match?(@parsers[:stray_quote])
1262
+ raise MalformedCSVError.new("Missing or stray quote",
1263
+ lineno + 1)
1926
1264
  end
1927
1265
  csv.last.gsub!(@double_quote_char, @quote_char)
1928
1266
  in_extended_col = false
@@ -1938,27 +1276,27 @@ class CSV
1938
1276
  elsif part.end_with?(@quote_char)
1939
1277
  # regular quoted column
1940
1278
  csv << part[1..-2]
1941
- if csv.last =~ @parsers[:stray_quote]
1942
- raise MalformedCSVError,
1943
- "Missing or stray quote in line #{lineno + 1}"
1279
+ if csv.last.match?(@parsers[:stray_quote])
1280
+ raise MalformedCSVError.new("Missing or stray quote",
1281
+ lineno + 1)
1944
1282
  end
1945
1283
  csv.last.gsub!(@double_quote_char, @quote_char)
1946
1284
  elsif @liberal_parsing
1947
1285
  csv << part
1948
1286
  else
1949
- raise MalformedCSVError,
1950
- "Missing or stray quote in line #{lineno + 1}"
1287
+ raise MalformedCSVError.new("Missing or stray quote",
1288
+ lineno + 1)
1951
1289
  end
1952
- elsif part =~ @parsers[:quote_or_nl]
1290
+ elsif part.match?(@parsers[:quote_or_nl])
1953
1291
  # Unquoted field with bad characters.
1954
- if part =~ @parsers[:nl_or_lf]
1955
- raise MalformedCSVError, "Unquoted fields do not allow " +
1956
- "\\r or \\n (line #{lineno + 1})."
1292
+ if part.match?(@parsers[:nl_or_lf])
1293
+ message = "Unquoted fields do not allow \\r or \\n"
1294
+ raise MalformedCSVError.new(message, lineno + 1)
1957
1295
  else
1958
1296
  if @liberal_parsing
1959
1297
  csv << part
1960
1298
  else
1961
- raise MalformedCSVError, "Illegal quoting in line #{lineno + 1}."
1299
+ raise MalformedCSVError.new("Illegal quoting", lineno + 1)
1962
1300
  end
1963
1301
  end
1964
1302
  else
@@ -1974,10 +1312,11 @@ class CSV
1974
1312
  if in_extended_col
1975
1313
  # if we're at eof?(), a quoted field wasn't closed...
1976
1314
  if @io.eof?
1977
- raise MalformedCSVError,
1978
- "Unclosed quoted field on line #{lineno + 1}."
1315
+ raise MalformedCSVError.new("Unclosed quoted field",
1316
+ lineno + 1)
1979
1317
  elsif @field_size_limit and csv.last.size >= @field_size_limit
1980
- raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}."
1318
+ raise MalformedCSVError.new("Field size exceeded",
1319
+ lineno + 1)
1981
1320
  end
1982
1321
  # otherwise, we need to loop and pull some more data to complete the row
1983
1322
  else
@@ -1986,10 +1325,13 @@ class CSV
1986
1325
  # save fields unconverted fields, if needed...
1987
1326
  unconverted = csv.dup if @unconverted_fields
1988
1327
 
1989
- # convert fields, if needed...
1990
- csv = convert_fields(csv) unless @use_headers or @converters.empty?
1991
- # parse out header rows and handle CSV::Row conversions...
1992
- csv = parse_headers(csv) if @use_headers
1328
+ if @use_headers
1329
+ # parse out header rows and handle CSV::Row conversions...
1330
+ csv = parse_headers(csv)
1331
+ else
1332
+ # convert fields, if needed...
1333
+ csv = convert_fields(csv)
1334
+ end
1993
1335
 
1994
1336
  # inject unconverted fields and accessor, if requested...
1995
1337
  if @unconverted_fields and not csv.respond_to? :unconverted_fields
@@ -2045,6 +1387,21 @@ class CSV
2045
1387
 
2046
1388
  private
2047
1389
 
1390
+ def determine_encoding(encoding, internal_encoding)
1391
+ # honor the IO encoding if we can, otherwise default to ASCII-8BIT
1392
+ io_encoding = raw_encoding(nil)
1393
+ return io_encoding if io_encoding
1394
+
1395
+ return Encoding.find(internal_encoding) if internal_encoding
1396
+
1397
+ if encoding
1398
+ encoding, = encoding.split(":", 2) if encoding.is_a?(String)
1399
+ return Encoding.find(encoding)
1400
+ end
1401
+
1402
+ Encoding.default_internal || Encoding.default_external
1403
+ end
1404
+
2048
1405
  #
2049
1406
  # Stores the indicated separators for later use.
2050
1407
  #
@@ -2058,6 +1415,11 @@ class CSV
2058
1415
  def init_separators(col_sep, row_sep, quote_char, force_quotes)
2059
1416
  # store the selected separators
2060
1417
  @col_sep = col_sep.to_s.encode(@encoding)
1418
+ if @col_sep == " "
1419
+ @col_sep_split_separator = Regexp.new(/#{Regexp.escape(@col_sep)}/)
1420
+ else
1421
+ @col_sep_split_separator = @col_sep
1422
+ end
2061
1423
  @row_sep = row_sep # encode after resolving :auto
2062
1424
  @quote_char = quote_char.to_s.encode(@encoding)
2063
1425
  @double_quote_char = @quote_char * 2
@@ -2087,15 +1449,28 @@ class CSV
2087
1449
  # (ensure will set default value)
2088
1450
  #
2089
1451
  break unless sample = @io.gets(nil, 1024)
1452
+
1453
+ cr = encode_str("\r")
1454
+ lf = encode_str("\n")
2090
1455
  # extend sample if we're unsure of the line ending
2091
- if sample.end_with? encode_str("\r")
1456
+ if sample.end_with?(cr)
2092
1457
  sample << (@io.gets(nil, 1) || "")
2093
1458
  end
2094
1459
 
2095
1460
  # try to find a standard separator
2096
- if sample =~ encode_re("\r\n?|\n")
2097
- @row_sep = $&
2098
- break
1461
+ sample.each_char.each_cons(2) do |char, next_char|
1462
+ case char
1463
+ when cr
1464
+ if next_char == lf
1465
+ @row_sep = encode_str("\r\n")
1466
+ else
1467
+ @row_sep = cr
1468
+ end
1469
+ break
1470
+ when lf
1471
+ @row_sep = lf
1472
+ break
1473
+ end
2099
1474
  end
2100
1475
  end
2101
1476
 
@@ -2249,10 +1624,24 @@ class CSV
2249
1624
  # shortcut.
2250
1625
  #
2251
1626
  def convert_fields(fields, headers = false)
2252
- # see if we are converting headers or fields
2253
- converters = headers ? @header_converters : @converters
1627
+ if headers
1628
+ converters = @header_converters
1629
+ else
1630
+ converters = @converters
1631
+ if !@use_headers and
1632
+ converters.empty? and
1633
+ @nil_value.nil? and
1634
+ @empty_value_is_empty_string
1635
+ return fields
1636
+ end
1637
+ end
2254
1638
 
2255
1639
  fields.map.with_index do |field, index|
1640
+ if field.nil?
1641
+ field = @nil_value
1642
+ elsif field.empty?
1643
+ field = @empty_value unless @empty_value_is_empty_string
1644
+ end
2256
1645
  converters.each do |converter|
2257
1646
  break if headers && field.nil?
2258
1647
  field = if converter.arity == 1 # straight field converter
@@ -2384,5 +1773,6 @@ def CSV(*args, &block)
2384
1773
  CSV.instance(*args, &block)
2385
1774
  end
2386
1775
 
2387
- require_relative "core_ext/array"
2388
- require_relative "core_ext/string"
1776
+ require_relative "csv/version"
1777
+ require_relative "csv/core_ext/array"
1778
+ require_relative "csv/core_ext/string"