csv 3.1.1 → 3.1.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,6 @@ class Array # :nodoc:
4
4
  # ["CSV", "data"].to_csv
5
5
  # #=> "CSV,data\n"
6
6
  def to_csv(**options)
7
- CSV.generate_line(self, options)
7
+ CSV.generate_line(self, **options)
8
8
  end
9
9
  end
@@ -4,6 +4,6 @@ class String # :nodoc:
4
4
  # "CSV,data".parse_csv
5
5
  # #=> ["CSV", "data"]
6
6
  def parse_csv(**options)
7
- CSV.parse_line(self, options)
7
+ CSV.parse_line(self, **options)
8
8
  end
9
9
  end
@@ -1,8 +1,14 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class CSV
4
+ # Note: Don't use this class directly. This is an internal class.
4
5
  class FieldsConverter
5
6
  include Enumerable
7
+ #
8
+ # A CSV::FieldsConverter is a data structure for storing the
9
+ # fields converter properties to be passed as a parameter
10
+ # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
11
+ #
6
12
 
7
13
  def initialize(options={})
8
14
  @converters = []
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
11
11
  using CSV::MatchP if CSV.const_defined?(:MatchP)
12
12
 
13
13
  class CSV
14
+ # Note: Don't use this class directly. This is an internal class.
14
15
  class Parser
16
+ #
17
+ # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
18
+ # or String object being read from or written to. Your data is never transcoded
19
+ # (unless you ask Ruby to transcode it for you) and will literally be parsed in
20
+ # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
21
+ # Encoding of your data. This is accomplished by transcoding the parser itself
22
+ # into your Encoding.
23
+ #
24
+
25
+ # Raised when encoding is invalid.
15
26
  class InvalidEncoding < StandardError
16
27
  end
17
28
 
29
+ #
30
+ # CSV::Scanner receives a CSV output, scans it and return the content.
31
+ # It also controls the life cycle of the object with its methods +keep_start+,
32
+ # +keep_end+, +keep_back+, +keep_drop+.
33
+ #
34
+ # Uses StringScanner (the official strscan gem). Strscan provides lexical
35
+ # scanning operations on a String. We inherit its object and take advantage
36
+ # on the methods. For more information, please visit:
37
+ # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
38
+ #
18
39
  class Scanner < StringScanner
19
40
  alias_method :scan_all, :scan
20
41
 
@@ -38,7 +59,7 @@ class CSV
38
59
 
39
60
  def keep_end
40
61
  start = @keeps.pop
41
- string[start, pos - start]
62
+ string.byteslice(start, pos - start)
42
63
  end
43
64
 
44
65
  def keep_back
@@ -50,6 +71,18 @@ class CSV
50
71
  end
51
72
  end
52
73
 
74
+ #
75
+ # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
76
+ # It also controls the life cycle of the object with its methods +keep_start+,
77
+ # +keep_end+, +keep_back+, +keep_drop+.
78
+ #
79
+ # CSV::InputsScanner.scan() tries to match with pattern at the current position.
80
+ # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
81
+ # Otherwise, the scanner returns nil.
82
+ #
83
+ # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
84
+ # If there is no more data (eos? = true), it returns "".
85
+ #
53
86
  class InputsScanner
54
87
  def initialize(inputs, encoding, chunk_size: 8192)
55
88
  @inputs = inputs.dup
@@ -137,7 +170,7 @@ class CSV
137
170
 
138
171
  def keep_end
139
172
  start, buffer = @keeps.pop
140
- keep = @scanner.string[start, @scanner.pos - start]
173
+ keep = @scanner.string.byteslice(start, @scanner.pos - start)
141
174
  if buffer
142
175
  buffer << keep
143
176
  keep = buffer
@@ -192,7 +225,7 @@ class CSV
192
225
  input = @inputs.first
193
226
  case input
194
227
  when StringIO
195
- string = input.string
228
+ string = input.read
196
229
  raise InvalidEncoding unless string.valid_encoding?
197
230
  @scanner = StringScanner.new(string)
198
231
  @inputs.shift
@@ -319,6 +352,7 @@ class CSV
319
352
  end
320
353
 
321
354
  private
355
+ # A set of tasks to prepare the file in order to parse it
322
356
  def prepare
323
357
  prepare_variable
324
358
  prepare_quote_character
@@ -447,7 +481,13 @@ class CSV
447
481
  end
448
482
 
449
483
  def prepare_separators
450
- @column_separator = @options[:column_separator].to_s.encode(@encoding)
484
+ column_separator = @options[:column_separator]
485
+ @column_separator = column_separator.to_s.encode(@encoding)
486
+ if @column_separator.size < 1
487
+ message = ":col_sep must be 1 or more characters: "
488
+ message += column_separator.inspect
489
+ raise ArgumentError, message
490
+ end
451
491
  @row_separator =
452
492
  resolve_row_separator(@options[:row_separator]).encode(@encoding)
453
493
 
@@ -534,7 +574,9 @@ class CSV
534
574
  cr = "\r".encode(@encoding)
535
575
  lf = "\n".encode(@encoding)
536
576
  if @input.is_a?(StringIO)
537
- separator = detect_row_separator(@input.string, cr, lf)
577
+ pos = @input.pos
578
+ separator = detect_row_separator(@input.read, cr, lf)
579
+ @input.seek(pos)
538
580
  elsif @input.respond_to?(:gets)
539
581
  if @input.is_a?(File)
540
582
  chunk_size = 32 * 1024
@@ -651,7 +693,9 @@ class CSV
651
693
  return false if @quote_character.nil?
652
694
 
653
695
  if @input.is_a?(StringIO)
654
- sample = @input.string
696
+ pos = @input.pos
697
+ sample = @input.read
698
+ @input.seek(pos)
655
699
  else
656
700
  return false if @samples.empty?
657
701
  sample = @samples.first
@@ -684,7 +728,7 @@ class CSV
684
728
  UnoptimizedStringIO.new(sample)
685
729
  end
686
730
  if @input.is_a?(StringIO)
687
- inputs << UnoptimizedStringIO.new(@input.string)
731
+ inputs << UnoptimizedStringIO.new(@input.read)
688
732
  else
689
733
  inputs << @input
690
734
  end
@@ -697,7 +741,7 @@ class CSV
697
741
  def build_scanner
698
742
  string = nil
699
743
  if @samples.empty? and @input.is_a?(StringIO)
700
- string = @input.string
744
+ string = @input.read
701
745
  elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
702
746
  string = @samples[0]
703
747
  end
@@ -4,7 +4,7 @@ require "forwardable"
4
4
 
5
5
  class CSV
6
6
  #
7
- # A CSV::Row is part Array and part Hash. It retains an order for the fields
7
+ # A CSV::Row is part Array and part Hash. It retains an order for the fields
8
8
  # and allows duplicates just as an Array would, but also allows you to access
9
9
  # fields by name just as you could if they were in a Hash.
10
10
  #
@@ -13,13 +13,13 @@ class CSV
13
13
  #
14
14
  class Row
15
15
  #
16
- # Construct a new CSV::Row from +headers+ and +fields+, which are expected
17
- # to be Arrays. If one Array is shorter than the other, it will be padded
16
+ # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
17
+ # to be Arrays. If one Array is shorter than the other, it will be padded
18
18
  # with +nil+ objects.
19
19
  #
20
20
  # The optional +header_row+ parameter can be set to +true+ to indicate, via
21
21
  # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
22
- # row. Otherwise, the row is assumes to be a field row.
22
+ # row. Otherwise, the row assumes to be a field row.
23
23
  #
24
24
  # A CSV::Row object supports the following Array methods through delegation:
25
25
  #
@@ -74,11 +74,11 @@ class CSV
74
74
  # field( header, offset )
75
75
  # field( index )
76
76
  #
77
- # This method will return the field value by +header+ or +index+. If a field
77
+ # This method will return the field value by +header+ or +index+. If a field
78
78
  # is not found, +nil+ is returned.
79
79
  #
80
80
  # When provided, +offset+ ensures that a header match occurs on or later
81
- # than the +offset+ index. You can use this to find duplicate headers,
81
+ # than the +offset+ index. You can use this to find duplicate headers,
82
82
  # without resorting to hard-coding exact indices.
83
83
  #
84
84
  def field(header_or_index, minimum_index = 0)
@@ -142,7 +142,7 @@ class CSV
142
142
  # assigns the +value+.
143
143
  #
144
144
  # Assigning past the end of the row with an index will set all pairs between
145
- # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
145
+ # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
146
146
  # pair.
147
147
  #
148
148
  def []=(*args)
@@ -172,8 +172,8 @@ class CSV
172
172
  # <<( header_and_field_hash )
173
173
  #
174
174
  # If a two-element Array is provided, it is assumed to be a header and field
175
- # and the pair is appended. A Hash works the same way with the key being
176
- # the header and the value being the field. Anything else is assumed to be
175
+ # and the pair is appended. A Hash works the same way with the key being
176
+ # the header and the value being the field. Anything else is assumed to be
177
177
  # a lone field which is appended with a +nil+ header.
178
178
  #
179
179
  # This method returns the row for chaining.
@@ -191,7 +191,7 @@ class CSV
191
191
  end
192
192
 
193
193
  #
194
- # A shortcut for appending multiple fields. Equivalent to:
194
+ # A shortcut for appending multiple fields. Equivalent to:
195
195
  #
196
196
  # args.each { |arg| csv_row << arg }
197
197
  #
@@ -209,8 +209,8 @@ class CSV
209
209
  # delete( header, offset )
210
210
  # delete( index )
211
211
  #
212
- # Used to remove a pair from the row by +header+ or +index+. The pair is
213
- # located as described in CSV::Row.field(). The deleted pair is returned,
212
+ # Removes a pair from the row by +header+ or +index+. The pair is
213
+ # located as described in CSV::Row.field(). The deleted pair is returned,
214
214
  # or +nil+ if a pair could not be found.
215
215
  #
216
216
  def delete(header_or_index, minimum_index = 0)
@@ -325,7 +325,7 @@ class CSV
325
325
  end
326
326
 
327
327
  #
328
- # Collapses the row into a simple Hash. Be warned that this discards field
328
+ # Collapses the row into a simple Hash. Be warned that this discards field
329
329
  # order and clobbers duplicate fields.
330
330
  #
331
331
  def to_h
@@ -340,12 +340,12 @@ class CSV
340
340
  alias_method :to_ary, :to_a
341
341
 
342
342
  #
343
- # Returns the row as a CSV String. Headers are not used. Equivalent to:
343
+ # Returns the row as a CSV String. Headers are not used. Equivalent to:
344
344
  #
345
345
  # csv_row.fields.to_csv( options )
346
346
  #
347
347
  def to_csv(**options)
348
- fields.to_csv(options)
348
+ fields.to_csv(**options)
349
349
  end
350
350
  alias_method :to_s, :to_csv
351
351
 
@@ -367,7 +367,9 @@ class CSV
367
367
  end
368
368
  end
369
369
 
370
+ #
370
371
  # A summary of fields, by header, in an ASCII compatible String.
372
+ #
371
373
  def inspect
372
374
  str = ["#<", self.class.to_s]
373
375
  each do |header, field|
@@ -5,7 +5,7 @@ require "forwardable"
5
5
  class CSV
6
6
  #
7
7
  # A CSV::Table is a two-dimensional data structure for representing CSV
8
- # documents. Tables allow you to work with the data by row or column,
8
+ # documents. Tables allow you to work with the data by row or column,
9
9
  # manipulate the data, and even convert the results back to CSV, if needed.
10
10
  #
11
11
  # All tables returned by CSV will be constructed from this class, if header
@@ -13,8 +13,8 @@ class CSV
13
13
  #
14
14
  class Table
15
15
  #
16
- # Construct a new CSV::Table from +array_of_rows+, which are expected
17
- # to be CSV::Row objects. All rows are assumed to have the same headers.
16
+ # Constructs a new CSV::Table from +array_of_rows+, which are expected
17
+ # to be CSV::Row objects. All rows are assumed to have the same headers.
18
18
  #
19
19
  # The optional +headers+ parameter can be set to Array of headers.
20
20
  # If headers aren't set, headers are fetched from CSV::Row objects.
@@ -55,11 +55,11 @@ class CSV
55
55
  def_delegators :@table, :empty?, :length, :size
56
56
 
57
57
  #
58
- # Returns a duplicate table object, in column mode. This is handy for
58
+ # Returns a duplicate table object, in column mode. This is handy for
59
59
  # chaining in a single call without changing the table mode, but be aware
60
60
  # that this method can consume a fair amount of memory for bigger data sets.
61
61
  #
62
- # This method returns the duplicate table for chaining. Don't chain
62
+ # This method returns the duplicate table for chaining. Don't chain
63
63
  # destructive methods (like []=()) this way though, since you are working
64
64
  # with a duplicate.
65
65
  #
@@ -68,7 +68,7 @@ class CSV
68
68
  end
69
69
 
70
70
  #
71
- # Switches the mode of this table to column mode. All calls to indexing and
71
+ # Switches the mode of this table to column mode. All calls to indexing and
72
72
  # iteration methods will work with columns until the mode is changed again.
73
73
  #
74
74
  # This method returns the table and is safe to chain.
@@ -80,7 +80,7 @@ class CSV
80
80
  end
81
81
 
82
82
  #
83
- # Returns a duplicate table object, in mixed mode. This is handy for
83
+ # Returns a duplicate table object, in mixed mode. This is handy for
84
84
  # chaining in a single call without changing the table mode, but be aware
85
85
  # that this method can consume a fair amount of memory for bigger data sets.
86
86
  #
@@ -93,9 +93,9 @@ class CSV
93
93
  end
94
94
 
95
95
  #
96
- # Switches the mode of this table to mixed mode. All calls to indexing and
96
+ # Switches the mode of this table to mixed mode. All calls to indexing and
97
97
  # iteration methods will use the default intelligent indexing system until
98
- # the mode is changed again. In mixed mode an index is assumed to be a row
98
+ # the mode is changed again. In mixed mode an index is assumed to be a row
99
99
  # reference while anything else is assumed to be column access by headers.
100
100
  #
101
101
  # This method returns the table and is safe to chain.
@@ -120,7 +120,7 @@ class CSV
120
120
  end
121
121
 
122
122
  #
123
- # Switches the mode of this table to row mode. All calls to indexing and
123
+ # Switches the mode of this table to row mode. All calls to indexing and
124
124
  # iteration methods will work with rows until the mode is changed again.
125
125
  #
126
126
  # This method returns the table and is safe to chain.
@@ -146,7 +146,7 @@ class CSV
146
146
 
147
147
  #
148
148
  # In the default mixed mode, this method returns rows for index access and
149
- # columns for header access. You can force the index association by first
149
+ # columns for header access. You can force the index association by first
150
150
  # calling by_col!() or by_row!().
151
151
  #
152
152
  # Columns are returned as an Array of values. Altering that Array has no
@@ -163,18 +163,18 @@ class CSV
163
163
 
164
164
  #
165
165
  # In the default mixed mode, this method assigns rows for index access and
166
- # columns for header access. You can force the index association by first
166
+ # columns for header access. You can force the index association by first
167
167
  # calling by_col!() or by_row!().
168
168
  #
169
169
  # Rows may be set to an Array of values (which will inherit the table's
170
170
  # headers()) or a CSV::Row.
171
171
  #
172
172
  # Columns may be set to a single value, which is copied to each row of the
173
- # column, or an Array of values. Arrays of values are assigned to rows top
174
- # to bottom in row major order. Excess values are ignored and if the Array
173
+ # column, or an Array of values. Arrays of values are assigned to rows top
174
+ # to bottom in row major order. Excess values are ignored and if the Array
175
175
  # does not have a value for each row the extra rows will receive a +nil+.
176
176
  #
177
- # Assigning to an existing column or row clobbers the data. Assigning to
177
+ # Assigning to an existing column or row clobbers the data. Assigning to
178
178
  # new columns creates them at the right end of the table.
179
179
  #
180
180
  def []=(index_or_header, value)
@@ -212,9 +212,9 @@ class CSV
212
212
 
213
213
  #
214
214
  # The mixed mode default is to treat a list of indices as row access,
215
- # returning the rows indicated. Anything else is considered columnar
216
- # access. For columnar access, the return set has an Array for each row
217
- # with the values indicated by the headers in each Array. You can force
215
+ # returning the rows indicated. Anything else is considered columnar
216
+ # access. For columnar access, the return set has an Array for each row
217
+ # with the values indicated by the headers in each Array. You can force
218
218
  # column or row mode using by_col!() or by_row!().
219
219
  #
220
220
  # You cannot mix column and row access.
@@ -234,7 +234,7 @@ class CSV
234
234
  end
235
235
 
236
236
  #
237
- # Adds a new row to the bottom end of this table. You can provide an Array,
237
+ # Adds a new row to the bottom end of this table. You can provide an Array,
238
238
  # which will be converted to a CSV::Row (inheriting the table's headers()),
239
239
  # or a CSV::Row.
240
240
  #
@@ -251,7 +251,7 @@ class CSV
251
251
  end
252
252
 
253
253
  #
254
- # A shortcut for appending multiple rows. Equivalent to:
254
+ # A shortcut for appending multiple rows. Equivalent to:
255
255
  #
256
256
  # rows.each { |row| self << row }
257
257
  #
@@ -264,9 +264,9 @@ class CSV
264
264
  end
265
265
 
266
266
  #
267
- # Removes and returns the indicated columns or rows. In the default mixed
267
+ # Removes and returns the indicated columns or rows. In the default mixed
268
268
  # mode indices refer to rows and everything else is assumed to be a column
269
- # headers. Use by_col!() or by_row!() to force the lookup.
269
+ # headers. Use by_col!() or by_row!() to force the lookup.
270
270
  #
271
271
  def delete(*indexes_or_headers)
272
272
  if indexes_or_headers.empty?
@@ -293,9 +293,9 @@ class CSV
293
293
  end
294
294
 
295
295
  #
296
- # Removes any column or row for which the block returns +true+. In the
296
+ # Removes any column or row for which the block returns +true+. In the
297
297
  # default mixed mode or row mode, iteration is the standard row major
298
- # walking of rows. In column mode, iteration will +yield+ two element
298
+ # walking of rows. In column mode, iteration will +yield+ two element
299
299
  # tuples containing the column name and an Array of values for that column.
300
300
  #
301
301
  # This method returns the table for chaining.
@@ -321,7 +321,7 @@ class CSV
321
321
 
322
322
  #
323
323
  # In the default mixed mode or row mode, iteration is the standard row major
324
- # walking of rows. In column mode, iteration will +yield+ two element
324
+ # walking of rows. In column mode, iteration will +yield+ two element
325
325
  # tuples containing the column name and an Array of values for that column.
326
326
  #
327
327
  # This method returns the table for chaining.
@@ -347,7 +347,7 @@ class CSV
347
347
  end
348
348
 
349
349
  #
350
- # Returns the table as an Array of Arrays. Headers will be the first row,
350
+ # Returns the table as an Array of Arrays. Headers will be the first row,
351
351
  # then all of the field rows will follow.
352
352
  #
353
353
  def to_a
@@ -360,16 +360,16 @@ class CSV
360
360
  end
361
361
 
362
362
  #
363
- # Returns the table as a complete CSV String. Headers will be listed first,
363
+ # Returns the table as a complete CSV String. Headers will be listed first,
364
364
  # then all of the field rows.
365
365
  #
366
366
  # This method assumes you want the Table.headers(), unless you explicitly
367
367
  # pass <tt>:write_headers => false</tt>.
368
368
  #
369
369
  def to_csv(write_headers: true, **options)
370
- array = write_headers ? [headers.to_csv(options)] : []
370
+ array = write_headers ? [headers.to_csv(**options)] : []
371
371
  @table.each do |row|
372
- array.push(row.fields.to_csv(options)) unless row.header_row?
372
+ array.push(row.fields.to_csv(**options)) unless row.header_row?
373
373
  end
374
374
 
375
375
  array.join("")