csv 3.1.1 → 3.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/NEWS.md +47 -0
- data/lib/csv.rb +399 -379
- data/lib/csv/core_ext/array.rb +1 -1
- data/lib/csv/core_ext/string.rb +1 -1
- data/lib/csv/fields_converter.rb +6 -0
- data/lib/csv/parser.rb +52 -8
- data/lib/csv/row.rb +17 -15
- data/lib/csv/table.rb +29 -29
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +12 -0
- metadata +2 -2
data/lib/csv/core_ext/array.rb
CHANGED
data/lib/csv/core_ext/string.rb
CHANGED
data/lib/csv/fields_converter.rb
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
class CSV
|
4
|
+
# Note: Don't use this class directly. This is an internal class.
|
4
5
|
class FieldsConverter
|
5
6
|
include Enumerable
|
7
|
+
#
|
8
|
+
# A CSV::FieldsConverter is a data structure for storing the
|
9
|
+
# fields converter properties to be passed as a parameter
|
10
|
+
# when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
|
11
|
+
#
|
6
12
|
|
7
13
|
def initialize(options={})
|
8
14
|
@converters = []
|
data/lib/csv/parser.rb
CHANGED
@@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
|
|
11
11
|
using CSV::MatchP if CSV.const_defined?(:MatchP)
|
12
12
|
|
13
13
|
class CSV
|
14
|
+
# Note: Don't use this class directly. This is an internal class.
|
14
15
|
class Parser
|
16
|
+
#
|
17
|
+
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
|
18
|
+
# or String object being read from or written to. Your data is never transcoded
|
19
|
+
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
|
20
|
+
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
|
21
|
+
# Encoding of your data. This is accomplished by transcoding the parser itself
|
22
|
+
# into your Encoding.
|
23
|
+
#
|
24
|
+
|
25
|
+
# Raised when encoding is invalid.
|
15
26
|
class InvalidEncoding < StandardError
|
16
27
|
end
|
17
28
|
|
29
|
+
#
|
30
|
+
# CSV::Scanner receives a CSV output, scans it and return the content.
|
31
|
+
# It also controls the life cycle of the object with its methods +keep_start+,
|
32
|
+
# +keep_end+, +keep_back+, +keep_drop+.
|
33
|
+
#
|
34
|
+
# Uses StringScanner (the official strscan gem). Strscan provides lexical
|
35
|
+
# scanning operations on a String. We inherit its object and take advantage
|
36
|
+
# on the methods. For more information, please visit:
|
37
|
+
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
|
38
|
+
#
|
18
39
|
class Scanner < StringScanner
|
19
40
|
alias_method :scan_all, :scan
|
20
41
|
|
@@ -38,7 +59,7 @@ class CSV
|
|
38
59
|
|
39
60
|
def keep_end
|
40
61
|
start = @keeps.pop
|
41
|
-
string
|
62
|
+
string.byteslice(start, pos - start)
|
42
63
|
end
|
43
64
|
|
44
65
|
def keep_back
|
@@ -50,6 +71,18 @@ class CSV
|
|
50
71
|
end
|
51
72
|
end
|
52
73
|
|
74
|
+
#
|
75
|
+
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
|
76
|
+
# It also controls the life cycle of the object with its methods +keep_start+,
|
77
|
+
# +keep_end+, +keep_back+, +keep_drop+.
|
78
|
+
#
|
79
|
+
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
|
80
|
+
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
|
81
|
+
# Otherwise, the scanner returns nil.
|
82
|
+
#
|
83
|
+
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
|
84
|
+
# If there is no more data (eos? = true), it returns "".
|
85
|
+
#
|
53
86
|
class InputsScanner
|
54
87
|
def initialize(inputs, encoding, chunk_size: 8192)
|
55
88
|
@inputs = inputs.dup
|
@@ -137,7 +170,7 @@ class CSV
|
|
137
170
|
|
138
171
|
def keep_end
|
139
172
|
start, buffer = @keeps.pop
|
140
|
-
keep = @scanner.string
|
173
|
+
keep = @scanner.string.byteslice(start, @scanner.pos - start)
|
141
174
|
if buffer
|
142
175
|
buffer << keep
|
143
176
|
keep = buffer
|
@@ -192,7 +225,7 @@ class CSV
|
|
192
225
|
input = @inputs.first
|
193
226
|
case input
|
194
227
|
when StringIO
|
195
|
-
string = input.
|
228
|
+
string = input.read
|
196
229
|
raise InvalidEncoding unless string.valid_encoding?
|
197
230
|
@scanner = StringScanner.new(string)
|
198
231
|
@inputs.shift
|
@@ -319,6 +352,7 @@ class CSV
|
|
319
352
|
end
|
320
353
|
|
321
354
|
private
|
355
|
+
# A set of tasks to prepare the file in order to parse it
|
322
356
|
def prepare
|
323
357
|
prepare_variable
|
324
358
|
prepare_quote_character
|
@@ -447,7 +481,13 @@ class CSV
|
|
447
481
|
end
|
448
482
|
|
449
483
|
def prepare_separators
|
450
|
-
|
484
|
+
column_separator = @options[:column_separator]
|
485
|
+
@column_separator = column_separator.to_s.encode(@encoding)
|
486
|
+
if @column_separator.size < 1
|
487
|
+
message = ":col_sep must be 1 or more characters: "
|
488
|
+
message += column_separator.inspect
|
489
|
+
raise ArgumentError, message
|
490
|
+
end
|
451
491
|
@row_separator =
|
452
492
|
resolve_row_separator(@options[:row_separator]).encode(@encoding)
|
453
493
|
|
@@ -534,7 +574,9 @@ class CSV
|
|
534
574
|
cr = "\r".encode(@encoding)
|
535
575
|
lf = "\n".encode(@encoding)
|
536
576
|
if @input.is_a?(StringIO)
|
537
|
-
|
577
|
+
pos = @input.pos
|
578
|
+
separator = detect_row_separator(@input.read, cr, lf)
|
579
|
+
@input.seek(pos)
|
538
580
|
elsif @input.respond_to?(:gets)
|
539
581
|
if @input.is_a?(File)
|
540
582
|
chunk_size = 32 * 1024
|
@@ -651,7 +693,9 @@ class CSV
|
|
651
693
|
return false if @quote_character.nil?
|
652
694
|
|
653
695
|
if @input.is_a?(StringIO)
|
654
|
-
|
696
|
+
pos = @input.pos
|
697
|
+
sample = @input.read
|
698
|
+
@input.seek(pos)
|
655
699
|
else
|
656
700
|
return false if @samples.empty?
|
657
701
|
sample = @samples.first
|
@@ -684,7 +728,7 @@ class CSV
|
|
684
728
|
UnoptimizedStringIO.new(sample)
|
685
729
|
end
|
686
730
|
if @input.is_a?(StringIO)
|
687
|
-
inputs << UnoptimizedStringIO.new(@input.
|
731
|
+
inputs << UnoptimizedStringIO.new(@input.read)
|
688
732
|
else
|
689
733
|
inputs << @input
|
690
734
|
end
|
@@ -697,7 +741,7 @@ class CSV
|
|
697
741
|
def build_scanner
|
698
742
|
string = nil
|
699
743
|
if @samples.empty? and @input.is_a?(StringIO)
|
700
|
-
string = @input.
|
744
|
+
string = @input.read
|
701
745
|
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
|
702
746
|
string = @samples[0]
|
703
747
|
end
|
data/lib/csv/row.rb
CHANGED
@@ -4,7 +4,7 @@ require "forwardable"
|
|
4
4
|
|
5
5
|
class CSV
|
6
6
|
#
|
7
|
-
# A CSV::Row is part Array and part Hash.
|
7
|
+
# A CSV::Row is part Array and part Hash. It retains an order for the fields
|
8
8
|
# and allows duplicates just as an Array would, but also allows you to access
|
9
9
|
# fields by name just as you could if they were in a Hash.
|
10
10
|
#
|
@@ -13,13 +13,13 @@ class CSV
|
|
13
13
|
#
|
14
14
|
class Row
|
15
15
|
#
|
16
|
-
#
|
17
|
-
# to be Arrays.
|
16
|
+
# Constructs a new CSV::Row from +headers+ and +fields+, which are expected
|
17
|
+
# to be Arrays. If one Array is shorter than the other, it will be padded
|
18
18
|
# with +nil+ objects.
|
19
19
|
#
|
20
20
|
# The optional +header_row+ parameter can be set to +true+ to indicate, via
|
21
21
|
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
|
22
|
-
# row.
|
22
|
+
# row. Otherwise, the row assumes to be a field row.
|
23
23
|
#
|
24
24
|
# A CSV::Row object supports the following Array methods through delegation:
|
25
25
|
#
|
@@ -74,11 +74,11 @@ class CSV
|
|
74
74
|
# field( header, offset )
|
75
75
|
# field( index )
|
76
76
|
#
|
77
|
-
# This method will return the field value by +header+ or +index+.
|
77
|
+
# This method will return the field value by +header+ or +index+. If a field
|
78
78
|
# is not found, +nil+ is returned.
|
79
79
|
#
|
80
80
|
# When provided, +offset+ ensures that a header match occurs on or later
|
81
|
-
# than the +offset+ index.
|
81
|
+
# than the +offset+ index. You can use this to find duplicate headers,
|
82
82
|
# without resorting to hard-coding exact indices.
|
83
83
|
#
|
84
84
|
def field(header_or_index, minimum_index = 0)
|
@@ -142,7 +142,7 @@ class CSV
|
|
142
142
|
# assigns the +value+.
|
143
143
|
#
|
144
144
|
# Assigning past the end of the row with an index will set all pairs between
|
145
|
-
# to <tt>[nil, nil]</tt>.
|
145
|
+
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
|
146
146
|
# pair.
|
147
147
|
#
|
148
148
|
def []=(*args)
|
@@ -172,8 +172,8 @@ class CSV
|
|
172
172
|
# <<( header_and_field_hash )
|
173
173
|
#
|
174
174
|
# If a two-element Array is provided, it is assumed to be a header and field
|
175
|
-
# and the pair is appended.
|
176
|
-
# the header and the value being the field.
|
175
|
+
# and the pair is appended. A Hash works the same way with the key being
|
176
|
+
# the header and the value being the field. Anything else is assumed to be
|
177
177
|
# a lone field which is appended with a +nil+ header.
|
178
178
|
#
|
179
179
|
# This method returns the row for chaining.
|
@@ -191,7 +191,7 @@ class CSV
|
|
191
191
|
end
|
192
192
|
|
193
193
|
#
|
194
|
-
# A shortcut for appending multiple fields.
|
194
|
+
# A shortcut for appending multiple fields. Equivalent to:
|
195
195
|
#
|
196
196
|
# args.each { |arg| csv_row << arg }
|
197
197
|
#
|
@@ -209,8 +209,8 @@ class CSV
|
|
209
209
|
# delete( header, offset )
|
210
210
|
# delete( index )
|
211
211
|
#
|
212
|
-
#
|
213
|
-
# located as described in CSV::Row.field().
|
212
|
+
# Removes a pair from the row by +header+ or +index+. The pair is
|
213
|
+
# located as described in CSV::Row.field(). The deleted pair is returned,
|
214
214
|
# or +nil+ if a pair could not be found.
|
215
215
|
#
|
216
216
|
def delete(header_or_index, minimum_index = 0)
|
@@ -325,7 +325,7 @@ class CSV
|
|
325
325
|
end
|
326
326
|
|
327
327
|
#
|
328
|
-
# Collapses the row into a simple Hash.
|
328
|
+
# Collapses the row into a simple Hash. Be warned that this discards field
|
329
329
|
# order and clobbers duplicate fields.
|
330
330
|
#
|
331
331
|
def to_h
|
@@ -340,12 +340,12 @@ class CSV
|
|
340
340
|
alias_method :to_ary, :to_a
|
341
341
|
|
342
342
|
#
|
343
|
-
# Returns the row as a CSV String.
|
343
|
+
# Returns the row as a CSV String. Headers are not used. Equivalent to:
|
344
344
|
#
|
345
345
|
# csv_row.fields.to_csv( options )
|
346
346
|
#
|
347
347
|
def to_csv(**options)
|
348
|
-
fields.to_csv(options)
|
348
|
+
fields.to_csv(**options)
|
349
349
|
end
|
350
350
|
alias_method :to_s, :to_csv
|
351
351
|
|
@@ -367,7 +367,9 @@ class CSV
|
|
367
367
|
end
|
368
368
|
end
|
369
369
|
|
370
|
+
#
|
370
371
|
# A summary of fields, by header, in an ASCII compatible String.
|
372
|
+
#
|
371
373
|
def inspect
|
372
374
|
str = ["#<", self.class.to_s]
|
373
375
|
each do |header, field|
|
data/lib/csv/table.rb
CHANGED
@@ -5,7 +5,7 @@ require "forwardable"
|
|
5
5
|
class CSV
|
6
6
|
#
|
7
7
|
# A CSV::Table is a two-dimensional data structure for representing CSV
|
8
|
-
# documents.
|
8
|
+
# documents. Tables allow you to work with the data by row or column,
|
9
9
|
# manipulate the data, and even convert the results back to CSV, if needed.
|
10
10
|
#
|
11
11
|
# All tables returned by CSV will be constructed from this class, if header
|
@@ -13,8 +13,8 @@ class CSV
|
|
13
13
|
#
|
14
14
|
class Table
|
15
15
|
#
|
16
|
-
#
|
17
|
-
# to be CSV::Row objects.
|
16
|
+
# Constructs a new CSV::Table from +array_of_rows+, which are expected
|
17
|
+
# to be CSV::Row objects. All rows are assumed to have the same headers.
|
18
18
|
#
|
19
19
|
# The optional +headers+ parameter can be set to Array of headers.
|
20
20
|
# If headers aren't set, headers are fetched from CSV::Row objects.
|
@@ -55,11 +55,11 @@ class CSV
|
|
55
55
|
def_delegators :@table, :empty?, :length, :size
|
56
56
|
|
57
57
|
#
|
58
|
-
# Returns a duplicate table object, in column mode.
|
58
|
+
# Returns a duplicate table object, in column mode. This is handy for
|
59
59
|
# chaining in a single call without changing the table mode, but be aware
|
60
60
|
# that this method can consume a fair amount of memory for bigger data sets.
|
61
61
|
#
|
62
|
-
# This method returns the duplicate table for chaining.
|
62
|
+
# This method returns the duplicate table for chaining. Don't chain
|
63
63
|
# destructive methods (like []=()) this way though, since you are working
|
64
64
|
# with a duplicate.
|
65
65
|
#
|
@@ -68,7 +68,7 @@ class CSV
|
|
68
68
|
end
|
69
69
|
|
70
70
|
#
|
71
|
-
# Switches the mode of this table to column mode.
|
71
|
+
# Switches the mode of this table to column mode. All calls to indexing and
|
72
72
|
# iteration methods will work with columns until the mode is changed again.
|
73
73
|
#
|
74
74
|
# This method returns the table and is safe to chain.
|
@@ -80,7 +80,7 @@ class CSV
|
|
80
80
|
end
|
81
81
|
|
82
82
|
#
|
83
|
-
# Returns a duplicate table object, in mixed mode.
|
83
|
+
# Returns a duplicate table object, in mixed mode. This is handy for
|
84
84
|
# chaining in a single call without changing the table mode, but be aware
|
85
85
|
# that this method can consume a fair amount of memory for bigger data sets.
|
86
86
|
#
|
@@ -93,9 +93,9 @@ class CSV
|
|
93
93
|
end
|
94
94
|
|
95
95
|
#
|
96
|
-
# Switches the mode of this table to mixed mode.
|
96
|
+
# Switches the mode of this table to mixed mode. All calls to indexing and
|
97
97
|
# iteration methods will use the default intelligent indexing system until
|
98
|
-
# the mode is changed again.
|
98
|
+
# the mode is changed again. In mixed mode an index is assumed to be a row
|
99
99
|
# reference while anything else is assumed to be column access by headers.
|
100
100
|
#
|
101
101
|
# This method returns the table and is safe to chain.
|
@@ -120,7 +120,7 @@ class CSV
|
|
120
120
|
end
|
121
121
|
|
122
122
|
#
|
123
|
-
# Switches the mode of this table to row mode.
|
123
|
+
# Switches the mode of this table to row mode. All calls to indexing and
|
124
124
|
# iteration methods will work with rows until the mode is changed again.
|
125
125
|
#
|
126
126
|
# This method returns the table and is safe to chain.
|
@@ -146,7 +146,7 @@ class CSV
|
|
146
146
|
|
147
147
|
#
|
148
148
|
# In the default mixed mode, this method returns rows for index access and
|
149
|
-
# columns for header access.
|
149
|
+
# columns for header access. You can force the index association by first
|
150
150
|
# calling by_col!() or by_row!().
|
151
151
|
#
|
152
152
|
# Columns are returned as an Array of values. Altering that Array has no
|
@@ -163,18 +163,18 @@ class CSV
|
|
163
163
|
|
164
164
|
#
|
165
165
|
# In the default mixed mode, this method assigns rows for index access and
|
166
|
-
# columns for header access.
|
166
|
+
# columns for header access. You can force the index association by first
|
167
167
|
# calling by_col!() or by_row!().
|
168
168
|
#
|
169
169
|
# Rows may be set to an Array of values (which will inherit the table's
|
170
170
|
# headers()) or a CSV::Row.
|
171
171
|
#
|
172
172
|
# Columns may be set to a single value, which is copied to each row of the
|
173
|
-
# column, or an Array of values.
|
174
|
-
# to bottom in row major order.
|
173
|
+
# column, or an Array of values. Arrays of values are assigned to rows top
|
174
|
+
# to bottom in row major order. Excess values are ignored and if the Array
|
175
175
|
# does not have a value for each row the extra rows will receive a +nil+.
|
176
176
|
#
|
177
|
-
# Assigning to an existing column or row clobbers the data.
|
177
|
+
# Assigning to an existing column or row clobbers the data. Assigning to
|
178
178
|
# new columns creates them at the right end of the table.
|
179
179
|
#
|
180
180
|
def []=(index_or_header, value)
|
@@ -212,9 +212,9 @@ class CSV
|
|
212
212
|
|
213
213
|
#
|
214
214
|
# The mixed mode default is to treat a list of indices as row access,
|
215
|
-
# returning the rows indicated.
|
216
|
-
# access.
|
217
|
-
# with the values indicated by the headers in each Array.
|
215
|
+
# returning the rows indicated. Anything else is considered columnar
|
216
|
+
# access. For columnar access, the return set has an Array for each row
|
217
|
+
# with the values indicated by the headers in each Array. You can force
|
218
218
|
# column or row mode using by_col!() or by_row!().
|
219
219
|
#
|
220
220
|
# You cannot mix column and row access.
|
@@ -234,7 +234,7 @@ class CSV
|
|
234
234
|
end
|
235
235
|
|
236
236
|
#
|
237
|
-
# Adds a new row to the bottom end of this table.
|
237
|
+
# Adds a new row to the bottom end of this table. You can provide an Array,
|
238
238
|
# which will be converted to a CSV::Row (inheriting the table's headers()),
|
239
239
|
# or a CSV::Row.
|
240
240
|
#
|
@@ -251,7 +251,7 @@ class CSV
|
|
251
251
|
end
|
252
252
|
|
253
253
|
#
|
254
|
-
# A shortcut for appending multiple rows.
|
254
|
+
# A shortcut for appending multiple rows. Equivalent to:
|
255
255
|
#
|
256
256
|
# rows.each { |row| self << row }
|
257
257
|
#
|
@@ -264,9 +264,9 @@ class CSV
|
|
264
264
|
end
|
265
265
|
|
266
266
|
#
|
267
|
-
# Removes and returns the indicated columns or rows.
|
267
|
+
# Removes and returns the indicated columns or rows. In the default mixed
|
268
268
|
# mode indices refer to rows and everything else is assumed to be a column
|
269
|
-
# headers.
|
269
|
+
# headers. Use by_col!() or by_row!() to force the lookup.
|
270
270
|
#
|
271
271
|
def delete(*indexes_or_headers)
|
272
272
|
if indexes_or_headers.empty?
|
@@ -293,9 +293,9 @@ class CSV
|
|
293
293
|
end
|
294
294
|
|
295
295
|
#
|
296
|
-
# Removes any column or row for which the block returns +true+.
|
296
|
+
# Removes any column or row for which the block returns +true+. In the
|
297
297
|
# default mixed mode or row mode, iteration is the standard row major
|
298
|
-
# walking of rows.
|
298
|
+
# walking of rows. In column mode, iteration will +yield+ two element
|
299
299
|
# tuples containing the column name and an Array of values for that column.
|
300
300
|
#
|
301
301
|
# This method returns the table for chaining.
|
@@ -321,7 +321,7 @@ class CSV
|
|
321
321
|
|
322
322
|
#
|
323
323
|
# In the default mixed mode or row mode, iteration is the standard row major
|
324
|
-
# walking of rows.
|
324
|
+
# walking of rows. In column mode, iteration will +yield+ two element
|
325
325
|
# tuples containing the column name and an Array of values for that column.
|
326
326
|
#
|
327
327
|
# This method returns the table for chaining.
|
@@ -347,7 +347,7 @@ class CSV
|
|
347
347
|
end
|
348
348
|
|
349
349
|
#
|
350
|
-
# Returns the table as an Array of Arrays.
|
350
|
+
# Returns the table as an Array of Arrays. Headers will be the first row,
|
351
351
|
# then all of the field rows will follow.
|
352
352
|
#
|
353
353
|
def to_a
|
@@ -360,16 +360,16 @@ class CSV
|
|
360
360
|
end
|
361
361
|
|
362
362
|
#
|
363
|
-
# Returns the table as a complete CSV String.
|
363
|
+
# Returns the table as a complete CSV String. Headers will be listed first,
|
364
364
|
# then all of the field rows.
|
365
365
|
#
|
366
366
|
# This method assumes you want the Table.headers(), unless you explicitly
|
367
367
|
# pass <tt>:write_headers => false</tt>.
|
368
368
|
#
|
369
369
|
def to_csv(write_headers: true, **options)
|
370
|
-
array = write_headers ? [headers.to_csv(options)] : []
|
370
|
+
array = write_headers ? [headers.to_csv(**options)] : []
|
371
371
|
@table.each do |row|
|
372
|
-
array.push(row.fields.to_csv(options)) unless row.header_row?
|
372
|
+
array.push(row.fields.to_csv(**options)) unless row.header_row?
|
373
373
|
end
|
374
374
|
|
375
375
|
array.join("")
|