csv 3.1.1 → 3.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +47 -0
- data/lib/csv.rb +399 -379
- data/lib/csv/core_ext/array.rb +1 -1
- data/lib/csv/core_ext/string.rb +1 -1
- data/lib/csv/fields_converter.rb +6 -0
- data/lib/csv/parser.rb +52 -8
- data/lib/csv/row.rb +17 -15
- data/lib/csv/table.rb +29 -29
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +12 -0
- metadata +2 -2
data/lib/csv/core_ext/array.rb
CHANGED
data/lib/csv/core_ext/string.rb
CHANGED
data/lib/csv/fields_converter.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class CSV
|
4
|
+
# Note: Don't use this class directly. This is an internal class.
|
4
5
|
class FieldsConverter
|
5
6
|
include Enumerable
|
7
|
+
#
|
8
|
+
# A CSV::FieldsConverter is a data structure for storing the
|
9
|
+
# fields converter properties to be passed as a parameter
|
10
|
+
# when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
|
11
|
+
#
|
6
12
|
|
7
13
|
def initialize(options={})
|
8
14
|
@converters = []
|
data/lib/csv/parser.rb
CHANGED
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
|
11
11
|
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
12
12
|
|
13
13
|
class CSV
|
14
|
+
# Note: Don't use this class directly. This is an internal class.
|
14
15
|
class Parser
|
16
|
+
#
|
17
|
+
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
|
18
|
+
# or String object being read from or written to. Your data is never transcoded
|
19
|
+
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
|
20
|
+
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
|
21
|
+
# Encoding of your data. This is accomplished by transcoding the parser itself
|
22
|
+
# into your Encoding.
|
23
|
+
#
|
24
|
+
|
25
|
+
# Raised when encoding is invalid.
|
15
26
|
class InvalidEncoding < StandardError
|
16
27
|
end
|
17
28
|
|
29
|
+
#
|
30
|
+
# CSV::Scanner receives a CSV output, scans it and return the content.
|
31
|
+
# It also controls the life cycle of the object with its methods +keep_start+,
|
32
|
+
# +keep_end+, +keep_back+, +keep_drop+.
|
33
|
+
#
|
34
|
+
# Uses StringScanner (the official strscan gem). Strscan provides lexical
|
35
|
+
# scanning operations on a String. We inherit its object and take advantage
|
36
|
+
# on the methods. For more information, please visit:
|
37
|
+
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
|
38
|
+
#
|
18
39
|
class Scanner < StringScanner
|
19
40
|
alias_method :scan_all, :scan
|
20
41
|
|
@@ -38,7 +59,7 @@ class CSV
|
|
38
59
|
|
39
60
|
def keep_end
|
40
61
|
start = @keeps.pop
|
41
|
-
string
|
62
|
+
string.byteslice(start, pos - start)
|
42
63
|
end
|
43
64
|
|
44
65
|
def keep_back
|
@@ -50,6 +71,18 @@ class CSV
|
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
74
|
+
#
|
75
|
+
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
|
76
|
+
# It also controls the life cycle of the object with its methods +keep_start+,
|
77
|
+
# +keep_end+, +keep_back+, +keep_drop+.
|
78
|
+
#
|
79
|
+
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
80
|
+
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
|
81
|
+
# Otherwise, the scanner returns nil.
|
82
|
+
#
|
83
|
+
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
|
84
|
+
# If there is no more data (eos? = true), it returns "".
|
85
|
+
#
|
53
86
|
class InputsScanner
|
54
87
|
def initialize(inputs, encoding, chunk_size: 8192)
|
55
88
|
@inputs = inputs.dup
|
@@ -137,7 +170,7 @@ class CSV
|
|
137
170
|
|
138
171
|
def keep_end
|
139
172
|
start, buffer = @keeps.pop
|
140
|
-
keep = @scanner.string
|
173
|
+
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
141
174
|
if buffer
|
142
175
|
buffer << keep
|
143
176
|
keep = buffer
|
@@ -192,7 +225,7 @@ class CSV
|
|
192
225
|
input = @inputs.first
|
193
226
|
case input
|
194
227
|
when StringIO
|
195
|
-
string = input.
|
228
|
+
string = input.read
|
196
229
|
raise InvalidEncoding unless string.valid_encoding?
|
197
230
|
@scanner = StringScanner.new(string)
|
198
231
|
@inputs.shift
|
@@ -319,6 +352,7 @@ class CSV
|
|
319
352
|
end
|
320
353
|
|
321
354
|
private
|
355
|
+
# A set of tasks to prepare the file in order to parse it
|
322
356
|
def prepare
|
323
357
|
prepare_variable
|
324
358
|
prepare_quote_character
|
@@ -447,7 +481,13 @@ class CSV
|
|
447
481
|
end
|
448
482
|
|
449
483
|
def prepare_separators
|
450
|
-
|
484
|
+
column_separator = @options[:column_separator]
|
485
|
+
@column_separator = column_separator.to_s.encode(@encoding)
|
486
|
+
if @column_separator.size < 1
|
487
|
+
message = ":col_sep must be 1 or more characters: "
|
488
|
+
message += column_separator.inspect
|
489
|
+
raise ArgumentError, message
|
490
|
+
end
|
451
491
|
@row_separator =
|
452
492
|
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
453
493
|
|
@@ -534,7 +574,9 @@ class CSV
|
|
534
574
|
cr = "\r".encode(@encoding)
|
535
575
|
lf = "\n".encode(@encoding)
|
536
576
|
if @input.is_a?(StringIO)
|
537
|
-
|
577
|
+
pos = @input.pos
|
578
|
+
separator = detect_row_separator(@input.read, cr, lf)
|
579
|
+
@input.seek(pos)
|
538
580
|
elsif @input.respond_to?(:gets)
|
539
581
|
if @input.is_a?(File)
|
540
582
|
chunk_size = 32 * 1024
|
@@ -651,7 +693,9 @@ class CSV
|
|
651
693
|
return false if @quote_character.nil?
|
652
694
|
|
653
695
|
if @input.is_a?(StringIO)
|
654
|
-
|
696
|
+
pos = @input.pos
|
697
|
+
sample = @input.read
|
698
|
+
@input.seek(pos)
|
655
699
|
else
|
656
700
|
return false if @samples.empty?
|
657
701
|
sample = @samples.first
|
@@ -684,7 +728,7 @@ class CSV
|
|
684
728
|
UnoptimizedStringIO.new(sample)
|
685
729
|
end
|
686
730
|
if @input.is_a?(StringIO)
|
687
|
-
inputs << UnoptimizedStringIO.new(@input.
|
731
|
+
inputs << UnoptimizedStringIO.new(@input.read)
|
688
732
|
else
|
689
733
|
inputs << @input
|
690
734
|
end
|
@@ -697,7 +741,7 @@ class CSV
|
|
697
741
|
def build_scanner
|
698
742
|
string = nil
|
699
743
|
if @samples.empty? and @input.is_a?(StringIO)
|
700
|
-
string = @input.
|
744
|
+
string = @input.read
|
701
745
|
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
|
702
746
|
string = @samples[0]
|
703
747
|
end
|
data/lib/csv/row.rb
CHANGED
@@ -4,7 +4,7 @@ require "forwardable"
|
|
4
4
|
|
5
5
|
class CSV
|
6
6
|
#
|
7
|
-
# A CSV::Row is part Array and part Hash.
|
7
|
+
# A CSV::Row is part Array and part Hash. It retains an order for the fields
|
8
8
|
# and allows duplicates just as an Array would, but also allows you to access
|
9
9
|
# fields by name just as you could if they were in a Hash.
|
10
10
|
#
|
@@ -13,13 +13,13 @@ class CSV
|
|
13
13
|
#
|
14
14
|
class Row
|
15
15
|
#
|
16
|
-
#
|
17
|
-
# to be Arrays.
|
16
|
+
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected
|
17
|
+
# to be Arrays. If one Array is shorter than the other, it will be padded
|
18
18
|
# with +nil+ objects.
|
19
19
|
#
|
20
20
|
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
21
21
|
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
|
22
|
-
# row.
|
22
|
+
# row. Otherwise, the row assumes to be a field row.
|
23
23
|
#
|
24
24
|
# A CSV::Row object supports the following Array methods through delegation:
|
25
25
|
#
|
@@ -74,11 +74,11 @@ class CSV
|
|
74
74
|
# field( header, offset )
|
75
75
|
# field( index )
|
76
76
|
#
|
77
|
-
# This method will return the field value by +header+ or +index+.
|
77
|
+
# This method will return the field value by +header+ or +index+. If a field
|
78
78
|
# is not found, +nil+ is returned.
|
79
79
|
#
|
80
80
|
# When provided, +offset+ ensures that a header match occurs on or later
|
81
|
-
# than the +offset+ index.
|
81
|
+
# than the +offset+ index. You can use this to find duplicate headers,
|
82
82
|
# without resorting to hard-coding exact indices.
|
83
83
|
#
|
84
84
|
def field(header_or_index, minimum_index = 0)
|
@@ -142,7 +142,7 @@ class CSV
|
|
142
142
|
# assigns the +value+.
|
143
143
|
#
|
144
144
|
# Assigning past the end of the row with an index will set all pairs between
|
145
|
-
# to <tt>[nil, nil]</tt>.
|
145
|
+
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
|
146
146
|
# pair.
|
147
147
|
#
|
148
148
|
def []=(*args)
|
@@ -172,8 +172,8 @@ class CSV
|
|
172
172
|
# <<( header_and_field_hash )
|
173
173
|
#
|
174
174
|
# If a two-element Array is provided, it is assumed to be a header and field
|
175
|
-
# and the pair is appended.
|
176
|
-
# the header and the value being the field.
|
175
|
+
# and the pair is appended. A Hash works the same way with the key being
|
176
|
+
# the header and the value being the field. Anything else is assumed to be
|
177
177
|
# a lone field which is appended with a +nil+ header.
|
178
178
|
#
|
179
179
|
# This method returns the row for chaining.
|
@@ -191,7 +191,7 @@ class CSV
|
|
191
191
|
end
|
192
192
|
|
193
193
|
#
|
194
|
-
# A shortcut for appending multiple fields.
|
194
|
+
# A shortcut for appending multiple fields. Equivalent to:
|
195
195
|
#
|
196
196
|
# args.each { |arg| csv_row << arg }
|
197
197
|
#
|
@@ -209,8 +209,8 @@ class CSV
|
|
209
209
|
# delete( header, offset )
|
210
210
|
# delete( index )
|
211
211
|
#
|
212
|
-
#
|
213
|
-
# located as described in CSV::Row.field().
|
212
|
+
# Removes a pair from the row by +header+ or +index+. The pair is
|
213
|
+
# located as described in CSV::Row.field(). The deleted pair is returned,
|
214
214
|
# or +nil+ if a pair could not be found.
|
215
215
|
#
|
216
216
|
def delete(header_or_index, minimum_index = 0)
|
@@ -325,7 +325,7 @@ class CSV
|
|
325
325
|
end
|
326
326
|
|
327
327
|
#
|
328
|
-
# Collapses the row into a simple Hash.
|
328
|
+
# Collapses the row into a simple Hash. Be warned that this discards field
|
329
329
|
# order and clobbers duplicate fields.
|
330
330
|
#
|
331
331
|
def to_h
|
@@ -340,12 +340,12 @@ class CSV
|
|
340
340
|
alias_method :to_ary, :to_a
|
341
341
|
|
342
342
|
#
|
343
|
-
# Returns the row as a CSV String.
|
343
|
+
# Returns the row as a CSV String. Headers are not used. Equivalent to:
|
344
344
|
#
|
345
345
|
# csv_row.fields.to_csv( options )
|
346
346
|
#
|
347
347
|
def to_csv(**options)
|
348
|
-
fields.to_csv(options)
|
348
|
+
fields.to_csv(**options)
|
349
349
|
end
|
350
350
|
alias_method :to_s, :to_csv
|
351
351
|
|
@@ -367,7 +367,9 @@ class CSV
|
|
367
367
|
end
|
368
368
|
end
|
369
369
|
|
370
|
+
#
|
370
371
|
# A summary of fields, by header, in an ASCII compatible String.
|
372
|
+
#
|
371
373
|
def inspect
|
372
374
|
str = ["#<", self.class.to_s]
|
373
375
|
each do |header, field|
|
data/lib/csv/table.rb
CHANGED
@@ -5,7 +5,7 @@ require "forwardable"
|
|
5
5
|
class CSV
|
6
6
|
#
|
7
7
|
# A CSV::Table is a two-dimensional data structure for representing CSV
|
8
|
-
# documents.
|
8
|
+
# documents. Tables allow you to work with the data by row or column,
|
9
9
|
# manipulate the data, and even convert the results back to CSV, if needed.
|
10
10
|
#
|
11
11
|
# All tables returned by CSV will be constructed from this class, if header
|
@@ -13,8 +13,8 @@ class CSV
|
|
13
13
|
#
|
14
14
|
class Table
|
15
15
|
#
|
16
|
-
#
|
17
|
-
# to be CSV::Row objects.
|
16
|
+
# Constructs a new CSV::Table from +array_of_rows+, which are expected
|
17
|
+
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
18
18
|
#
|
19
19
|
# The optional +headers+ parameter can be set to Array of headers.
|
20
20
|
# If headers aren't set, headers are fetched from CSV::Row objects.
|
@@ -55,11 +55,11 @@ class CSV
|
|
55
55
|
def_delegators :@table, :empty?, :length, :size
|
56
56
|
|
57
57
|
#
|
58
|
-
# Returns a duplicate table object, in column mode.
|
58
|
+
# Returns a duplicate table object, in column mode. This is handy for
|
59
59
|
# chaining in a single call without changing the table mode, but be aware
|
60
60
|
# that this method can consume a fair amount of memory for bigger data sets.
|
61
61
|
#
|
62
|
-
# This method returns the duplicate table for chaining.
|
62
|
+
# This method returns the duplicate table for chaining. Don't chain
|
63
63
|
# destructive methods (like []=()) this way though, since you are working
|
64
64
|
# with a duplicate.
|
65
65
|
#
|
@@ -68,7 +68,7 @@ class CSV
|
|
68
68
|
end
|
69
69
|
|
70
70
|
#
|
71
|
-
# Switches the mode of this table to column mode.
|
71
|
+
# Switches the mode of this table to column mode. All calls to indexing and
|
72
72
|
# iteration methods will work with columns until the mode is changed again.
|
73
73
|
#
|
74
74
|
# This method returns the table and is safe to chain.
|
@@ -80,7 +80,7 @@ class CSV
|
|
80
80
|
end
|
81
81
|
|
82
82
|
#
|
83
|
-
# Returns a duplicate table object, in mixed mode.
|
83
|
+
# Returns a duplicate table object, in mixed mode. This is handy for
|
84
84
|
# chaining in a single call without changing the table mode, but be aware
|
85
85
|
# that this method can consume a fair amount of memory for bigger data sets.
|
86
86
|
#
|
@@ -93,9 +93,9 @@ class CSV
|
|
93
93
|
end
|
94
94
|
|
95
95
|
#
|
96
|
-
# Switches the mode of this table to mixed mode.
|
96
|
+
# Switches the mode of this table to mixed mode. All calls to indexing and
|
97
97
|
# iteration methods will use the default intelligent indexing system until
|
98
|
-
# the mode is changed again.
|
98
|
+
# the mode is changed again. In mixed mode an index is assumed to be a row
|
99
99
|
# reference while anything else is assumed to be column access by headers.
|
100
100
|
#
|
101
101
|
# This method returns the table and is safe to chain.
|
@@ -120,7 +120,7 @@ class CSV
|
|
120
120
|
end
|
121
121
|
|
122
122
|
#
|
123
|
-
# Switches the mode of this table to row mode.
|
123
|
+
# Switches the mode of this table to row mode. All calls to indexing and
|
124
124
|
# iteration methods will work with rows until the mode is changed again.
|
125
125
|
#
|
126
126
|
# This method returns the table and is safe to chain.
|
@@ -146,7 +146,7 @@ class CSV
|
|
146
146
|
|
147
147
|
#
|
148
148
|
# In the default mixed mode, this method returns rows for index access and
|
149
|
-
# columns for header access.
|
149
|
+
# columns for header access. You can force the index association by first
|
150
150
|
# calling by_col!() or by_row!().
|
151
151
|
#
|
152
152
|
# Columns are returned as an Array of values. Altering that Array has no
|
@@ -163,18 +163,18 @@ class CSV
|
|
163
163
|
|
164
164
|
#
|
165
165
|
# In the default mixed mode, this method assigns rows for index access and
|
166
|
-
# columns for header access.
|
166
|
+
# columns for header access. You can force the index association by first
|
167
167
|
# calling by_col!() or by_row!().
|
168
168
|
#
|
169
169
|
# Rows may be set to an Array of values (which will inherit the table's
|
170
170
|
# headers()) or a CSV::Row.
|
171
171
|
#
|
172
172
|
# Columns may be set to a single value, which is copied to each row of the
|
173
|
-
# column, or an Array of values.
|
174
|
-
# to bottom in row major order.
|
173
|
+
# column, or an Array of values. Arrays of values are assigned to rows top
|
174
|
+
# to bottom in row major order. Excess values are ignored and if the Array
|
175
175
|
# does not have a value for each row the extra rows will receive a +nil+.
|
176
176
|
#
|
177
|
-
# Assigning to an existing column or row clobbers the data.
|
177
|
+
# Assigning to an existing column or row clobbers the data. Assigning to
|
178
178
|
# new columns creates them at the right end of the table.
|
179
179
|
#
|
180
180
|
def []=(index_or_header, value)
|
@@ -212,9 +212,9 @@ class CSV
|
|
212
212
|
|
213
213
|
#
|
214
214
|
# The mixed mode default is to treat a list of indices as row access,
|
215
|
-
# returning the rows indicated.
|
216
|
-
# access.
|
217
|
-
# with the values indicated by the headers in each Array.
|
215
|
+
# returning the rows indicated. Anything else is considered columnar
|
216
|
+
# access. For columnar access, the return set has an Array for each row
|
217
|
+
# with the values indicated by the headers in each Array. You can force
|
218
218
|
# column or row mode using by_col!() or by_row!().
|
219
219
|
#
|
220
220
|
# You cannot mix column and row access.
|
@@ -234,7 +234,7 @@ class CSV
|
|
234
234
|
end
|
235
235
|
|
236
236
|
#
|
237
|
-
# Adds a new row to the bottom end of this table.
|
237
|
+
# Adds a new row to the bottom end of this table. You can provide an Array,
|
238
238
|
# which will be converted to a CSV::Row (inheriting the table's headers()),
|
239
239
|
# or a CSV::Row.
|
240
240
|
#
|
@@ -251,7 +251,7 @@ class CSV
|
|
251
251
|
end
|
252
252
|
|
253
253
|
#
|
254
|
-
# A shortcut for appending multiple rows.
|
254
|
+
# A shortcut for appending multiple rows. Equivalent to:
|
255
255
|
#
|
256
256
|
# rows.each { |row| self << row }
|
257
257
|
#
|
@@ -264,9 +264,9 @@ class CSV
|
|
264
264
|
end
|
265
265
|
|
266
266
|
#
|
267
|
-
# Removes and returns the indicated columns or rows.
|
267
|
+
# Removes and returns the indicated columns or rows. In the default mixed
|
268
268
|
# mode indices refer to rows and everything else is assumed to be a column
|
269
|
-
# headers.
|
269
|
+
# headers. Use by_col!() or by_row!() to force the lookup.
|
270
270
|
#
|
271
271
|
def delete(*indexes_or_headers)
|
272
272
|
if indexes_or_headers.empty?
|
@@ -293,9 +293,9 @@ class CSV
|
|
293
293
|
end
|
294
294
|
|
295
295
|
#
|
296
|
-
# Removes any column or row for which the block returns +true+.
|
296
|
+
# Removes any column or row for which the block returns +true+. In the
|
297
297
|
# default mixed mode or row mode, iteration is the standard row major
|
298
|
-
# walking of rows.
|
298
|
+
# walking of rows. In column mode, iteration will +yield+ two element
|
299
299
|
# tuples containing the column name and an Array of values for that column.
|
300
300
|
#
|
301
301
|
# This method returns the table for chaining.
|
@@ -321,7 +321,7 @@ class CSV
|
|
321
321
|
|
322
322
|
#
|
323
323
|
# In the default mixed mode or row mode, iteration is the standard row major
|
324
|
-
# walking of rows.
|
324
|
+
# walking of rows. In column mode, iteration will +yield+ two element
|
325
325
|
# tuples containing the column name and an Array of values for that column.
|
326
326
|
#
|
327
327
|
# This method returns the table for chaining.
|
@@ -347,7 +347,7 @@ class CSV
|
|
347
347
|
end
|
348
348
|
|
349
349
|
#
|
350
|
-
# Returns the table as an Array of Arrays.
|
350
|
+
# Returns the table as an Array of Arrays. Headers will be the first row,
|
351
351
|
# then all of the field rows will follow.
|
352
352
|
#
|
353
353
|
def to_a
|
@@ -360,16 +360,16 @@ class CSV
|
|
360
360
|
end
|
361
361
|
|
362
362
|
#
|
363
|
-
# Returns the table as a complete CSV String.
|
363
|
+
# Returns the table as a complete CSV String. Headers will be listed first,
|
364
364
|
# then all of the field rows.
|
365
365
|
#
|
366
366
|
# This method assumes you want the Table.headers(), unless you explicitly
|
367
367
|
# pass <tt>:write_headers => false</tt>.
|
368
368
|
#
|
369
369
|
def to_csv(write_headers: true, **options)
|
370
|
-
array = write_headers ? [headers.to_csv(options)] : []
|
370
|
+
array = write_headers ? [headers.to_csv(**options)] : []
|
371
371
|
@table.each do |row|
|
372
|
-
array.push(row.fields.to_csv(options)) unless row.header_row?
|
372
|
+
array.push(row.fields.to_csv(**options)) unless row.header_row?
|
373
373
|
end
|
374
374
|
|
375
375
|
array.join("")
|