csv 3.1.2 → 3.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,7 +50,7 @@ class CSV
50
50
  fields.collect.with_index do |field, index|
51
51
  if field.nil?
52
52
  field = @nil_value
53
- elsif field.empty?
53
+ elsif field.is_a?(String) and field.empty?
54
54
  field = @empty_value unless @empty_value_is_empty_string
55
55
  end
56
56
  @converters.each do |converter|
@@ -446,6 +446,7 @@ class CSV
446
446
  @strip = @options[:strip]
447
447
  @escaped_strip = nil
448
448
  @strip_value = nil
449
+ @rstrip_value = nil
449
450
  if @strip.is_a?(String)
450
451
  case @strip.length
451
452
  when 0
@@ -460,6 +461,8 @@ class CSV
460
461
  if @quote_character
461
462
  @strip_value = Regexp.new(@escaped_strip +
462
463
  "+".encode(@encoding))
464
+ @rstrip_value = Regexp.new(@escaped_strip +
465
+ "+\\z".encode(@encoding))
463
466
  end
464
467
  @need_robust_parsing = true
465
468
  elsif @strip
@@ -467,6 +470,7 @@ class CSV
467
470
  @escaped_strip = strip_values.encode(@encoding)
468
471
  if @quote_character
469
472
  @strip_value = Regexp.new("[#{strip_values}]+".encode(@encoding))
473
+ @rstrip_value = Regexp.new("[#{strip_values}]+\\z".encode(@encoding))
470
474
  end
471
475
  @need_robust_parsing = true
472
476
  end
@@ -561,9 +565,6 @@ class CSV
561
565
  unless @liberal_parsing
562
566
  no_unquoted_values << @escaped_quote_character
563
567
  end
564
- if @escaped_strip
565
- no_unquoted_values << @escaped_strip
566
- end
567
568
  @unquoted_value = Regexp.new("[^".encode(@encoding) +
568
569
  no_unquoted_values +
569
570
  "]+".encode(@encoding))
@@ -707,7 +708,7 @@ class CSV
707
708
  if SCANNER_TEST
708
709
  class UnoptimizedStringIO
709
710
  def initialize(string)
710
- @io = StringIO.new(string)
711
+ @io = StringIO.new(string, "rb:#{string.encoding}")
711
712
  end
712
713
 
713
714
  def gets(*args)
@@ -769,7 +770,7 @@ class CSV
769
770
  def skip_needless_lines
770
771
  return unless @skip_lines
771
772
 
772
- while true
773
+ until @scanner.eos?
773
774
  @scanner.keep_start
774
775
  line = @scanner.scan_all(@not_line_end) || "".encode(@encoding)
775
776
  line << @row_separator if parse_row_end
@@ -939,6 +940,7 @@ class CSV
939
940
  if @liberal_parsing
940
941
  quoted_value = parse_quoted_column_value
941
942
  if quoted_value
943
+ @scanner.scan_all(@strip_value) if @strip_value
942
944
  unquoted_value = parse_unquoted_column_value
943
945
  if unquoted_value
944
946
  if @double_quote_outside_quote
@@ -986,6 +988,9 @@ class CSV
986
988
  end
987
989
  end
988
990
  value.gsub!(@backslash_quote_character, @quote_character) if @backslash_quote
991
+ if @rstrip_value
992
+ value.gsub!(@rstrip_value, "")
993
+ end
989
994
  value
990
995
  end
991
996
 
@@ -50,7 +50,7 @@ class CSV
50
50
 
51
51
  def initialize_copy(other)
52
52
  super
53
- @row = @row.dup
53
+ @row = @row.collect(&:dup)
54
54
  end
55
55
 
56
56
  # Returns +true+ if this is a header row.
@@ -3,31 +3,199 @@
3
3
  require "forwardable"
4
4
 
5
5
  class CSV
6
+ # = \CSV::Table
7
+ # A \CSV::Table instance is an object representing \CSV data.
8
+ # (see {class CSV}[../CSV.html]).
6
9
  #
7
- # A CSV::Table is a two-dimensional data structure for representing CSV
8
- # documents. Tables allow you to work with the data by row or column,
9
- # manipulate the data, and even convert the results back to CSV, if needed.
10
+ # The instance may have:
11
+ # - Rows: each is a Table::Row object.
12
+ # - Headers: names for the columns.
10
13
  #
11
- # All tables returned by CSV will be constructed from this class, if header
12
- # row processing is activated.
14
+ # === Instance Methods
13
15
  #
16
+ # \CSV::Table has three groups of instance methods:
17
+ # - Its own internally defined instance methods.
18
+ # - Methods included by module Enumerable.
19
+ # - Methods delegated to class Array.:
20
+ # * Array#empty?
21
+ # * Array#length
22
+ # * Array#size
23
+ #
24
+ # == Creating a \CSV::Table Instance
25
+ #
26
+ # Commonly, a new \CSV::Table instance is created by parsing \CSV source
27
+ # using headers:
28
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
29
+ # table = CSV.parse(source, headers: true)
30
+ # table.class # => CSV::Table
31
+ #
32
+ # You can also create an instance directly. See ::new.
33
+ #
34
+ # == Headers
35
+ #
36
+ # If a table has headers, the headers serve as labels for the columns of data.
37
+ # Each header serves as the label for its column.
38
+ #
39
+ # The headers for a \CSV::Table object are stored as an \Array of Strings.
40
+ #
41
+ # Commonly, headers are defined in the first row of \CSV source:
42
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
43
+ # table = CSV.parse(source, headers: true)
44
+ # table.headers # => ["Name", "Value"]
45
+ #
46
+ # If no headers are defined, the \Array is empty:
47
+ # table = CSV::Table.new([])
48
+ # table.headers # => []
49
+ #
50
+ # == Access Modes
51
+ #
52
+ # \CSV::Table provides three modes for accessing table data:
53
+ # - \Row mode.
54
+ # - Column mode.
55
+ # - Mixed mode (the default for a new table).
56
+ #
57
+ # The access mode for a\CSV::Table instance affects the behavior
58
+ # of some of its instance methods:
59
+ # - #[]
60
+ # - #[]=
61
+ # - #delete
62
+ # - #delete_if
63
+ # - #each
64
+ # - #values_at
65
+ #
66
+ # === \Row Mode
67
+ #
68
+ # Set a table to row mode with method #by_row!:
69
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
70
+ # table = CSV.parse(source, headers: true)
71
+ # table.by_row! # => #<CSV::Table mode:row row_count:4>
72
+ #
73
+ # Specify a single row by an \Integer index:
74
+ # # Get a row.
75
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
76
+ # # Set a row, then get it.
77
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
78
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
79
+ #
80
+ # Specify a sequence of rows by a \Range:
81
+ # # Get rows.
82
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
83
+ # # Set rows, then get them.
84
+ # table[1..2] = [
85
+ # CSV::Row.new(['Name', 'Value'], ['bat', 4]),
86
+ # CSV::Row.new(['Name', 'Value'], ['bad', 5]),
87
+ # ]
88
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
89
+ #
90
+ # === Column Mode
91
+ #
92
+ # Set a table to column mode with method #by_col!:
93
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
94
+ # table = CSV.parse(source, headers: true)
95
+ # table.by_col! # => #<CSV::Table mode:col row_count:4>
96
+ #
97
+ # Specify a column by an \Integer index:
98
+ # # Get a column.
99
+ # table[0]
100
+ # # Set a column, then get it.
101
+ # table[0] = ['FOO', 'BAR', 'BAZ']
102
+ # table[0] # => ["FOO", "BAR", "BAZ"]
103
+ #
104
+ # Specify a column by its \String header:
105
+ # # Get a column.
106
+ # table['Name'] # => ["FOO", "BAR", "BAZ"]
107
+ # # Set a column, then get it.
108
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
109
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
110
+ #
111
+ # === Mixed Mode
112
+ #
113
+ # In mixed mode, you can refer to either rows or columns:
114
+ # - An \Integer index refers to a row.
115
+ # - A \Range index refers to multiple rows.
116
+ # - A \String index refers to a column.
117
+ #
118
+ # Set a table to mixed mode with method #by_col_or_row!:
119
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
120
+ # table = CSV.parse(source, headers: true)
121
+ # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4>
122
+ #
123
+ # Specify a single row by an \Integer index:
124
+ # # Get a row.
125
+ # table[1] # => #<CSV::Row "Name":"bar" "Value":"1">
126
+ # # Set a row, then get it.
127
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3])
128
+ # table[1] # => #<CSV::Row "Name":"bam" "Value":3>
129
+ #
130
+ # Specify a sequence of rows by a \Range:
131
+ # # Get rows.
132
+ # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">]
133
+ # # Set rows, then get them.
134
+ # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4])
135
+ # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5])
136
+ # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]]
137
+ #
138
+ # Specify a column by its \String header:
139
+ # # Get a column.
140
+ # table['Name'] # => ["foo", "bat", "bad"]
141
+ # # Set a column, then get it.
142
+ # table['Name'] = ['Foo', 'Bar', 'Baz']
143
+ # table['Name'] # => ["Foo", "Bar", "Baz"]
14
144
  class Table
15
- #
16
- # Constructs a new CSV::Table from +array_of_rows+, which are expected
17
- # to be CSV::Row objects. All rows are assumed to have the same headers.
18
- #
19
- # The optional +headers+ parameter can be set to Array of headers.
20
- # If headers aren't set, headers are fetched from CSV::Row objects.
21
- # Otherwise, headers() method will return headers being set in
22
- # headers argument.
23
- #
24
- # A CSV::Table object supports the following Array methods through
25
- # delegation:
26
- #
27
- # * empty?()
28
- # * length()
29
- # * size()
30
- #
145
+ # :call-seq:
146
+ # CSV::Table.new(array_of_rows, headers = nil)
147
+ #
148
+ # Returns a new \CSV::Table object.
149
+ #
150
+ # - Argument +array_of_rows+ must be an \Array of CSV::Row objects.
151
+ # - Argument +headers+, if given, may be an \Array of Strings.
152
+ #
153
+ # ---
154
+ #
155
+ # Create an empty \CSV::Table object:
156
+ # table = CSV::Table.new([])
157
+ # table # => #<CSV::Table mode:col_or_row row_count:1>
158
+ #
159
+ # Create a non-empty \CSV::Table object:
160
+ # rows = [
161
+ # CSV::Row.new([], []),
162
+ # CSV::Row.new([], []),
163
+ # CSV::Row.new([], []),
164
+ # ]
165
+ # table = CSV::Table.new(rows)
166
+ # table # => #<CSV::Table mode:col_or_row row_count:4>
167
+ #
168
+ # ---
169
+ #
170
+ # If argument +headers+ is an \Array of Strings,
171
+ # those Strings become the table's headers:
172
+ # table = CSV::Table.new([], headers: ['Name', 'Age'])
173
+ # table.headers # => ["Name", "Age"]
174
+ #
175
+ # If argument +headers+ is not given and the table has rows,
176
+ # the headers are taken from the first row:
177
+ # rows = [
178
+ # CSV::Row.new(['Foo', 'Bar'], []),
179
+ # CSV::Row.new(['foo', 'bar'], []),
180
+ # CSV::Row.new(['FOO', 'BAR'], []),
181
+ # ]
182
+ # table = CSV::Table.new(rows)
183
+ # table.headers # => ["Foo", "Bar"]
184
+ #
185
+ # If argument +headers+ is not given and the table is empty (has no rows),
186
+ # the headers are also empty:
187
+ # table = CSV::Table.new([])
188
+ # table.headers # => []
189
+ #
190
+ # ---
191
+ #
192
+ # Raises an exception if argument +array_of_rows+ is not an \Array object:
193
+ # # Raises NoMethodError (undefined method `first' for :foo:Symbol):
194
+ # CSV::Table.new(:foo)
195
+ #
196
+ # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object:
197
+ # # Raises NoMethodError (undefined method `headers' for :foo:Symbol):
198
+ # CSV::Table.new([:foo])
31
199
  def initialize(array_of_rows, headers: nil)
32
200
  @table = array_of_rows
33
201
  @headers = headers
@@ -54,88 +222,141 @@ class CSV
54
222
  extend Forwardable
55
223
  def_delegators :@table, :empty?, :length, :size
56
224
 
225
+ # :call-seq:
226
+ # table.by_col
57
227
  #
58
- # Returns a duplicate table object, in column mode. This is handy for
59
- # chaining in a single call without changing the table mode, but be aware
60
- # that this method can consume a fair amount of memory for bigger data sets.
228
+ # Returns a duplicate of +self+, in column mode
229
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]):
230
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
231
+ # table = CSV.parse(source, headers: true)
232
+ # table.mode # => :col_or_row
233
+ # dup_table = table.by_col
234
+ # dup_table.mode # => :col
235
+ # dup_table.equal?(table) # => false # It's a dup
61
236
  #
62
- # This method returns the duplicate table for chaining. Don't chain
63
- # destructive methods (like []=()) this way though, since you are working
64
- # with a duplicate.
237
+ # This may be used to chain method calls without changing the mode
238
+ # (but also will affect performance and memory usage):
239
+ # dup_table.by_col['Name']
65
240
  #
241
+ # Also note that changes to the duplicate table will not affect the original.
66
242
  def by_col
67
243
  self.class.new(@table.dup).by_col!
68
244
  end
69
245
 
70
- #
71
- # Switches the mode of this table to column mode. All calls to indexing and
72
- # iteration methods will work with columns until the mode is changed again.
73
- #
74
- # This method returns the table and is safe to chain.
75
- #
246
+ # :call-seq:
247
+ # table.by_col!
248
+ #
249
+ # Sets the mode for +self+ to column mode
250
+ # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+:
251
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
252
+ # table = CSV.parse(source, headers: true)
253
+ # table.mode # => :col_or_row
254
+ # table1 = table.by_col!
255
+ # table.mode # => :col
256
+ # table1.equal?(table) # => true # Returned self
76
257
  def by_col!
77
258
  @mode = :col
78
259
 
79
260
  self
80
261
  end
81
262
 
263
+ # :call-seq:
264
+ # table.by_col_or_row
82
265
  #
83
- # Returns a duplicate table object, in mixed mode. This is handy for
84
- # chaining in a single call without changing the table mode, but be aware
85
- # that this method can consume a fair amount of memory for bigger data sets.
266
+ # Returns a duplicate of +self+, in mixed mode
267
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]):
268
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
269
+ # table = CSV.parse(source, headers: true).by_col!
270
+ # table.mode # => :col
271
+ # dup_table = table.by_col_or_row
272
+ # dup_table.mode # => :col_or_row
273
+ # dup_table.equal?(table) # => false # It's a dup
86
274
  #
87
- # This method returns the duplicate table for chaining. Don't chain
88
- # destructive methods (like []=()) this way though, since you are working
89
- # with a duplicate.
275
+ # This may be used to chain method calls without changing the mode
276
+ # (but also will affect performance and memory usage):
277
+ # dup_table.by_col_or_row['Name']
90
278
  #
279
+ # Also note that changes to the duplicate table will not affect the original.
91
280
  def by_col_or_row
92
281
  self.class.new(@table.dup).by_col_or_row!
93
282
  end
94
283
 
95
- #
96
- # Switches the mode of this table to mixed mode. All calls to indexing and
97
- # iteration methods will use the default intelligent indexing system until
98
- # the mode is changed again. In mixed mode an index is assumed to be a row
99
- # reference while anything else is assumed to be column access by headers.
100
- #
101
- # This method returns the table and is safe to chain.
102
- #
284
+ # :call-seq:
285
+ # table.by_col_or_row!
286
+ #
287
+ # Sets the mode for +self+ to mixed mode
288
+ # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+:
289
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
290
+ # table = CSV.parse(source, headers: true).by_col!
291
+ # table.mode # => :col
292
+ # table1 = table.by_col_or_row!
293
+ # table.mode # => :col_or_row
294
+ # table1.equal?(table) # => true # Returned self
103
295
  def by_col_or_row!
104
296
  @mode = :col_or_row
105
297
 
106
298
  self
107
299
  end
108
300
 
301
+ # :call-seq:
302
+ # table.by_row
109
303
  #
110
- # Returns a duplicate table object, in row mode. This is handy for chaining
111
- # in a single call without changing the table mode, but be aware that this
112
- # method can consume a fair amount of memory for bigger data sets.
304
+ # Returns a duplicate of +self+, in row mode
305
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]):
306
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
307
+ # table = CSV.parse(source, headers: true)
308
+ # table.mode # => :col_or_row
309
+ # dup_table = table.by_row
310
+ # dup_table.mode # => :row
311
+ # dup_table.equal?(table) # => false # It's a dup
113
312
  #
114
- # This method returns the duplicate table for chaining. Don't chain
115
- # destructive methods (like []=()) this way though, since you are working
116
- # with a duplicate.
313
+ # This may be used to chain method calls without changing the mode
314
+ # (but also will affect performance and memory usage):
315
+ # dup_table.by_row[1]
117
316
  #
317
+ # Also note that changes to the duplicate table will not affect the original.
118
318
  def by_row
119
319
  self.class.new(@table.dup).by_row!
120
320
  end
121
321
 
122
- #
123
- # Switches the mode of this table to row mode. All calls to indexing and
124
- # iteration methods will work with rows until the mode is changed again.
125
- #
126
- # This method returns the table and is safe to chain.
127
- #
322
+ # :call-seq:
323
+ # table.by_row!
324
+ #
325
+ # Sets the mode for +self+ to row mode
326
+ # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+:
327
+ # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n"
328
+ # table = CSV.parse(source, headers: true)
329
+ # table.mode # => :col_or_row
330
+ # table1 = table.by_row!
331
+ # table.mode # => :row
332
+ # table1.equal?(table) # => true # Returned self
128
333
  def by_row!
129
334
  @mode = :row
130
335
 
131
336
  self
132
337
  end
133
338
 
134
- #
135
- # Returns the headers for the first row of this table (assumed to match all
136
- # other rows). The headers Array passed to CSV::Table.new is returned for
137
- # empty tables.
138
- #
339
+ # :call-seq:
340
+ # table.headers
341
+ #
342
+ # Returns a new \Array containing the \String headers for the table.
343
+ #
344
+ # If the table is not empty, returns the headers from the first row:
345
+ # rows = [
346
+ # CSV::Row.new(['Foo', 'Bar'], []),
347
+ # CSV::Row.new(['FOO', 'BAR'], []),
348
+ # CSV::Row.new(['foo', 'bar'], []),
349
+ # ]
350
+ # table = CSV::Table.new(rows)
351
+ # table.headers # => ["Foo", "Bar"]
352
+ # table.delete(0)
353
+ # table.headers # => ["FOO", "BAR"]
354
+ # table.delete(0)
355
+ # table.headers # => ["foo", "bar"]
356
+ #
357
+ # If the table is empty, returns a copy of the headers in the table itself:
358
+ # table.delete(0)
359
+ # table.headers # => ["Foo", "Bar"]
139
360
  def headers
140
361
  if @table.empty?
141
362
  @headers.dup