csv 3.1.1 → 3.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,6 @@ class Array # :nodoc:
4
4
  # ["CSV", "data"].to_csv
5
5
  # #=> "CSV,data\n"
6
6
  def to_csv(**options)
7
- CSV.generate_line(self, options)
7
+ CSV.generate_line(self, **options)
8
8
  end
9
9
  end
@@ -4,6 +4,6 @@ class String # :nodoc:
4
4
  # "CSV,data".parse_csv
5
5
  # #=> ["CSV", "data"]
6
6
  def parse_csv(**options)
7
- CSV.parse_line(self, options)
7
+ CSV.parse_line(self, **options)
8
8
  end
9
9
  end
@@ -1,8 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class CSV
4
+ # Note: Don't use this class directly. This is an internal class.
4
5
  class FieldsConverter
5
6
  include Enumerable
7
+ #
8
+ # A CSV::FieldsConverter is a data structure for storing the
9
+ # fields converter properties to be passed as a parameter
10
+ # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
11
+ #
6
12
 
7
13
  def initialize(options={})
8
14
  @converters = []
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
11
11
  using CSV::MatchP if CSV.const_defined?(:MatchP)
12
12
 
13
13
  class CSV
14
+ # Note: Don't use this class directly. This is an internal class.
14
15
  class Parser
16
+ #
17
+ # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
18
+ # or String object being read from or written to. Your data is never transcoded
19
+ # (unless you ask Ruby to transcode it for you) and will literally be parsed in
20
+ # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
21
+ # Encoding of your data. This is accomplished by transcoding the parser itself
22
+ # into your Encoding.
23
+ #
24
+
25
+ # Raised when encoding is invalid.
15
26
  class InvalidEncoding < StandardError
16
27
  end
17
28
 
29
+ #
30
+ # CSV::Scanner receives a CSV output, scans it and return the content.
31
+ # It also controls the life cycle of the object with its methods +keep_start+,
32
+ # +keep_end+, +keep_back+, +keep_drop+.
33
+ #
34
+ # Uses StringScanner (the official strscan gem). Strscan provides lexical
35
+ # scanning operations on a String. We inherit its object and take advantage
36
+ # on the methods. For more information, please visit:
37
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
38
+ #
18
39
  class Scanner < StringScanner
19
40
  alias_method :scan_all, :scan
20
41
 
@@ -38,7 +59,7 @@ class CSV
38
59
 
39
60
  def keep_end
40
61
  start = @keeps.pop
41
- string[start, pos - start]
62
+ string.byteslice(start, pos - start)
42
63
  end
43
64
 
44
65
  def keep_back
@@ -50,6 +71,18 @@ class CSV
50
71
  end
51
72
  end
52
73
 
74
+ #
75
+ # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
76
+ # It also controls the life cycle of the object with its methods +keep_start+,
77
+ # +keep_end+, +keep_back+, +keep_drop+.
78
+ #
79
+ # CSV::InputsScanner.scan() tries to match with pattern at the current position.
80
+ # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
81
+ # Otherwise, the scanner returns nil.
82
+ #
83
+ # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
84
+ # If there is no more data (eos? = true), it returns "".
85
+ #
53
86
  class InputsScanner
54
87
  def initialize(inputs, encoding, chunk_size: 8192)
55
88
  @inputs = inputs.dup
@@ -137,7 +170,7 @@ class CSV
137
170
 
138
171
  def keep_end
139
172
  start, buffer = @keeps.pop
140
- keep = @scanner.string[start, @scanner.pos - start]
173
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
141
174
  if buffer
142
175
  buffer << keep
143
176
  keep = buffer
@@ -192,7 +225,7 @@ class CSV
192
225
  input = @inputs.first
193
226
  case input
194
227
  when StringIO
195
- string = input.string
228
+ string = input.read
196
229
  raise InvalidEncoding unless string.valid_encoding?
197
230
  @scanner = StringScanner.new(string)
198
231
  @inputs.shift
@@ -319,6 +352,7 @@ class CSV
319
352
  end
320
353
 
321
354
  private
355
+ # A set of tasks to prepare the file in order to parse it
322
356
  def prepare
323
357
  prepare_variable
324
358
  prepare_quote_character
@@ -447,7 +481,13 @@ class CSV
447
481
  end
448
482
 
449
483
  def prepare_separators
450
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
484
+ column_separator = @options[:column_separator]
485
+ @column_separator = column_separator.to_s.encode(@encoding)
486
+ if @column_separator.size < 1
487
+ message = ":col_sep must be 1 or more characters: "
488
+ message += column_separator.inspect
489
+ raise ArgumentError, message
490
+ end
451
491
  @row_separator =
452
492
  resolve_row_separator(@options[:row_separator]).encode(@encoding)
453
493
 
@@ -534,7 +574,9 @@ class CSV
534
574
  cr = "\r".encode(@encoding)
535
575
  lf = "\n".encode(@encoding)
536
576
  if @input.is_a?(StringIO)
537
- separator = detect_row_separator(@input.string, cr, lf)
577
+ pos = @input.pos
578
+ separator = detect_row_separator(@input.read, cr, lf)
579
+ @input.seek(pos)
538
580
  elsif @input.respond_to?(:gets)
539
581
  if @input.is_a?(File)
540
582
  chunk_size = 32 * 1024
@@ -651,7 +693,9 @@ class CSV
651
693
  return false if @quote_character.nil?
652
694
 
653
695
  if @input.is_a?(StringIO)
654
- sample = @input.string
696
+ pos = @input.pos
697
+ sample = @input.read
698
+ @input.seek(pos)
655
699
  else
656
700
  return false if @samples.empty?
657
701
  sample = @samples.first
@@ -684,7 +728,7 @@ class CSV
684
728
  UnoptimizedStringIO.new(sample)
685
729
  end
686
730
  if @input.is_a?(StringIO)
687
- inputs << UnoptimizedStringIO.new(@input.string)
731
+ inputs << UnoptimizedStringIO.new(@input.read)
688
732
  else
689
733
  inputs << @input
690
734
  end
@@ -697,7 +741,7 @@ class CSV
697
741
  def build_scanner
698
742
  string = nil
699
743
  if @samples.empty? and @input.is_a?(StringIO)
700
- string = @input.string
744
+ string = @input.read
701
745
  elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
702
746
  string = @samples[0]
703
747
  end
@@ -4,7 +4,7 @@ require "forwardable"
4
4
 
5
5
  class CSV
6
6
  #
7
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
7
+ # A CSV::Row is part Array and part Hash. It retains an order for the fields
8
8
  # and allows duplicates just as an Array would, but also allows you to access
9
9
  # fields by name just as you could if they were in a Hash.
10
10
  #
@@ -13,13 +13,13 @@ class CSV
13
13
  #
14
14
  class Row
15
15
  #
16
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
17
- # to be Arrays. If one Array is shorter than the other, it will be padded
16
+ # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
17
+ # to be Arrays. If one Array is shorter than the other, it will be padded
18
18
  # with +nil+ objects.
19
19
  #
20
20
  # The optional +header_row+ parameter can be set to +true+ to indicate, via
21
21
  # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
22
- # row. Otherwise, the row is assumes to be a field row.
22
+ # row. Otherwise, the row assumes to be a field row.
23
23
  #
24
24
  # A CSV::Row object supports the following Array methods through delegation:
25
25
  #
@@ -74,11 +74,11 @@ class CSV
74
74
  # field( header, offset )
75
75
  # field( index )
76
76
  #
77
- # This method will return the field value by +header+ or +index+. If a field
77
+ # This method will return the field value by +header+ or +index+. If a field
78
78
  # is not found, +nil+ is returned.
79
79
  #
80
80
  # When provided, +offset+ ensures that a header match occurs on or later
81
- # than the +offset+ index. You can use this to find duplicate headers,
81
+ # than the +offset+ index. You can use this to find duplicate headers,
82
82
  # without resorting to hard-coding exact indices.
83
83
  #
84
84
  def field(header_or_index, minimum_index = 0)
@@ -142,7 +142,7 @@ class CSV
142
142
  # assigns the +value+.
143
143
  #
144
144
  # Assigning past the end of the row with an index will set all pairs between
145
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
145
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
146
146
  # pair.
147
147
  #
148
148
  def []=(*args)
@@ -172,8 +172,8 @@ class CSV
172
172
  # <<( header_and_field_hash )
173
173
  #
174
174
  # If a two-element Array is provided, it is assumed to be a header and field
175
- # and the pair is appended. A Hash works the same way with the key being
176
- # the header and the value being the field. Anything else is assumed to be
175
+ # and the pair is appended. A Hash works the same way with the key being
176
+ # the header and the value being the field. Anything else is assumed to be
177
177
  # a lone field which is appended with a +nil+ header.
178
178
  #
179
179
  # This method returns the row for chaining.
@@ -191,7 +191,7 @@ class CSV
191
191
  end
192
192
 
193
193
  #
194
- # A shortcut for appending multiple fields. Equivalent to:
194
+ # A shortcut for appending multiple fields. Equivalent to:
195
195
  #
196
196
  # args.each { |arg| csv_row << arg }
197
197
  #
@@ -209,8 +209,8 @@ class CSV
209
209
  # delete( header, offset )
210
210
  # delete( index )
211
211
  #
212
- # Used to remove a pair from the row by +header+ or +index+. The pair is
213
- # located as described in CSV::Row.field(). The deleted pair is returned,
212
+ # Removes a pair from the row by +header+ or +index+. The pair is
213
+ # located as described in CSV::Row.field(). The deleted pair is returned,
214
214
  # or +nil+ if a pair could not be found.
215
215
  #
216
216
  def delete(header_or_index, minimum_index = 0)
@@ -325,7 +325,7 @@ class CSV
325
325
  end
326
326
 
327
327
  #
328
- # Collapses the row into a simple Hash. Be warned that this discards field
328
+ # Collapses the row into a simple Hash. Be warned that this discards field
329
329
  # order and clobbers duplicate fields.
330
330
  #
331
331
  def to_h
@@ -340,12 +340,12 @@ class CSV
340
340
  alias_method :to_ary, :to_a
341
341
 
342
342
  #
343
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
343
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
344
344
  #
345
345
  # csv_row.fields.to_csv( options )
346
346
  #
347
347
  def to_csv(**options)
348
- fields.to_csv(options)
348
+ fields.to_csv(**options)
349
349
  end
350
350
  alias_method :to_s, :to_csv
351
351
 
@@ -367,7 +367,9 @@ class CSV
367
367
  end
368
368
  end
369
369
 
370
+ #
370
371
  # A summary of fields, by header, in an ASCII compatible String.
372
+ #
371
373
  def inspect
372
374
  str = ["#<", self.class.to_s]
373
375
  each do |header, field|
@@ -5,7 +5,7 @@ require "forwardable"
5
5
  class CSV
6
6
  #
7
7
  # A CSV::Table is a two-dimensional data structure for representing CSV
8
- # documents. Tables allow you to work with the data by row or column,
8
+ # documents. Tables allow you to work with the data by row or column,
9
9
  # manipulate the data, and even convert the results back to CSV, if needed.
10
10
  #
11
11
  # All tables returned by CSV will be constructed from this class, if header
@@ -13,8 +13,8 @@ class CSV
13
13
  #
14
14
  class Table
15
15
  #
16
- # Construct a new CSV::Table from +array_of_rows+, which are expected
17
- # to be CSV::Row objects. All rows are assumed to have the same headers.
16
+ # Constructs a new CSV::Table from +array_of_rows+, which are expected
17
+ # to be CSV::Row objects. All rows are assumed to have the same headers.
18
18
  #
19
19
  # The optional +headers+ parameter can be set to Array of headers.
20
20
  # If headers aren't set, headers are fetched from CSV::Row objects.
@@ -55,11 +55,11 @@ class CSV
55
55
  def_delegators :@table, :empty?, :length, :size
56
56
 
57
57
  #
58
- # Returns a duplicate table object, in column mode. This is handy for
58
+ # Returns a duplicate table object, in column mode. This is handy for
59
59
  # chaining in a single call without changing the table mode, but be aware
60
60
  # that this method can consume a fair amount of memory for bigger data sets.
61
61
  #
62
- # This method returns the duplicate table for chaining. Don't chain
62
+ # This method returns the duplicate table for chaining. Don't chain
63
63
  # destructive methods (like []=()) this way though, since you are working
64
64
  # with a duplicate.
65
65
  #
@@ -68,7 +68,7 @@ class CSV
68
68
  end
69
69
 
70
70
  #
71
- # Switches the mode of this table to column mode. All calls to indexing and
71
+ # Switches the mode of this table to column mode. All calls to indexing and
72
72
  # iteration methods will work with columns until the mode is changed again.
73
73
  #
74
74
  # This method returns the table and is safe to chain.
@@ -80,7 +80,7 @@ class CSV
80
80
  end
81
81
 
82
82
  #
83
- # Returns a duplicate table object, in mixed mode. This is handy for
83
+ # Returns a duplicate table object, in mixed mode. This is handy for
84
84
  # chaining in a single call without changing the table mode, but be aware
85
85
  # that this method can consume a fair amount of memory for bigger data sets.
86
86
  #
@@ -93,9 +93,9 @@ class CSV
93
93
  end
94
94
 
95
95
  #
96
- # Switches the mode of this table to mixed mode. All calls to indexing and
96
+ # Switches the mode of this table to mixed mode. All calls to indexing and
97
97
  # iteration methods will use the default intelligent indexing system until
98
- # the mode is changed again. In mixed mode an index is assumed to be a row
98
+ # the mode is changed again. In mixed mode an index is assumed to be a row
99
99
  # reference while anything else is assumed to be column access by headers.
100
100
  #
101
101
  # This method returns the table and is safe to chain.
@@ -120,7 +120,7 @@ class CSV
120
120
  end
121
121
 
122
122
  #
123
- # Switches the mode of this table to row mode. All calls to indexing and
123
+ # Switches the mode of this table to row mode. All calls to indexing and
124
124
  # iteration methods will work with rows until the mode is changed again.
125
125
  #
126
126
  # This method returns the table and is safe to chain.
@@ -146,7 +146,7 @@ class CSV
146
146
 
147
147
  #
148
148
  # In the default mixed mode, this method returns rows for index access and
149
- # columns for header access. You can force the index association by first
149
+ # columns for header access. You can force the index association by first
150
150
  # calling by_col!() or by_row!().
151
151
  #
152
152
  # Columns are returned as an Array of values. Altering that Array has no
@@ -163,18 +163,18 @@ class CSV
163
163
 
164
164
  #
165
165
  # In the default mixed mode, this method assigns rows for index access and
166
- # columns for header access. You can force the index association by first
166
+ # columns for header access. You can force the index association by first
167
167
  # calling by_col!() or by_row!().
168
168
  #
169
169
  # Rows may be set to an Array of values (which will inherit the table's
170
170
  # headers()) or a CSV::Row.
171
171
  #
172
172
  # Columns may be set to a single value, which is copied to each row of the
173
- # column, or an Array of values. Arrays of values are assigned to rows top
174
- # to bottom in row major order. Excess values are ignored and if the Array
173
+ # column, or an Array of values. Arrays of values are assigned to rows top
174
+ # to bottom in row major order. Excess values are ignored and if the Array
175
175
  # does not have a value for each row the extra rows will receive a +nil+.
176
176
  #
177
- # Assigning to an existing column or row clobbers the data. Assigning to
177
+ # Assigning to an existing column or row clobbers the data. Assigning to
178
178
  # new columns creates them at the right end of the table.
179
179
  #
180
180
  def []=(index_or_header, value)
@@ -212,9 +212,9 @@ class CSV
212
212
 
213
213
  #
214
214
  # The mixed mode default is to treat a list of indices as row access,
215
- # returning the rows indicated. Anything else is considered columnar
216
- # access. For columnar access, the return set has an Array for each row
217
- # with the values indicated by the headers in each Array. You can force
215
+ # returning the rows indicated. Anything else is considered columnar
216
+ # access. For columnar access, the return set has an Array for each row
217
+ # with the values indicated by the headers in each Array. You can force
218
218
  # column or row mode using by_col!() or by_row!().
219
219
  #
220
220
  # You cannot mix column and row access.
@@ -234,7 +234,7 @@ class CSV
234
234
  end
235
235
 
236
236
  #
237
- # Adds a new row to the bottom end of this table. You can provide an Array,
237
+ # Adds a new row to the bottom end of this table. You can provide an Array,
238
238
  # which will be converted to a CSV::Row (inheriting the table's headers()),
239
239
  # or a CSV::Row.
240
240
  #
@@ -251,7 +251,7 @@ class CSV
251
251
  end
252
252
 
253
253
  #
254
- # A shortcut for appending multiple rows. Equivalent to:
254
+ # A shortcut for appending multiple rows. Equivalent to:
255
255
  #
256
256
  # rows.each { |row| self << row }
257
257
  #
@@ -264,9 +264,9 @@ class CSV
264
264
  end
265
265
 
266
266
  #
267
- # Removes and returns the indicated columns or rows. In the default mixed
267
+ # Removes and returns the indicated columns or rows. In the default mixed
268
268
  # mode indices refer to rows and everything else is assumed to be a column
269
- # headers. Use by_col!() or by_row!() to force the lookup.
269
+ # headers. Use by_col!() or by_row!() to force the lookup.
270
270
  #
271
271
  def delete(*indexes_or_headers)
272
272
  if indexes_or_headers.empty?
@@ -293,9 +293,9 @@ class CSV
293
293
  end
294
294
 
295
295
  #
296
- # Removes any column or row for which the block returns +true+. In the
296
+ # Removes any column or row for which the block returns +true+. In the
297
297
  # default mixed mode or row mode, iteration is the standard row major
298
- # walking of rows. In column mode, iteration will +yield+ two element
298
+ # walking of rows. In column mode, iteration will +yield+ two element
299
299
  # tuples containing the column name and an Array of values for that column.
300
300
  #
301
301
  # This method returns the table for chaining.
@@ -321,7 +321,7 @@ class CSV
321
321
 
322
322
  #
323
323
  # In the default mixed mode or row mode, iteration is the standard row major
324
- # walking of rows. In column mode, iteration will +yield+ two element
324
+ # walking of rows. In column mode, iteration will +yield+ two element
325
325
  # tuples containing the column name and an Array of values for that column.
326
326
  #
327
327
  # This method returns the table for chaining.
@@ -347,7 +347,7 @@ class CSV
347
347
  end
348
348
 
349
349
  #
350
- # Returns the table as an Array of Arrays. Headers will be the first row,
350
+ # Returns the table as an Array of Arrays. Headers will be the first row,
351
351
  # then all of the field rows will follow.
352
352
  #
353
353
  def to_a
@@ -360,16 +360,16 @@ class CSV
360
360
  end
361
361
 
362
362
  #
363
- # Returns the table as a complete CSV String. Headers will be listed first,
363
+ # Returns the table as a complete CSV String. Headers will be listed first,
364
364
  # then all of the field rows.
365
365
  #
366
366
  # This method assumes you want the Table.headers(), unless you explicitly
367
367
  # pass <tt>:write_headers => false</tt>.
368
368
  #
369
369
  def to_csv(write_headers: true, **options)
370
- array = write_headers ? [headers.to_csv(options)] : []
370
+ array = write_headers ? [headers.to_csv(**options)] : []
371
371
  @table.each do |row|
372
- array.push(row.fields.to_csv(options)) unless row.header_row?
372
+ array.push(row.fields.to_csv(**options)) unless row.header_row?
373
373
  end
374
374
 
375
375
  array.join("")