csv 3.0.2 → 3.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/NEWS.md +43 -0
- data/lib/csv.rb +12 -8
- data/lib/csv/parser.rb +55 -6
- data/lib/csv/version.rb +1 -1
- data/lib/csv/writer.rb +37 -28
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a240b6e6a405972ac623bbc1c369d6833e57b449606ce02d01b183604d621353
|
4
|
+
data.tar.gz: 73cb12bafe60a7331b13d9d7b75837007c55044b78d4580d422ba123af692d25
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 739201c445c8b6ad1644d2ede0522635d10b83a1a2253cea7386190749b762d0301fc15af98c55976180263a210b5c954f49428897760682675db420704543a6
|
7
|
+
data.tar.gz: 6b55c8703407e7fdd09075ce5ce798d66fad1317b65b19b035cde5653f29c13087775cbf1117cdb044689b6a33c031c9624c503eb3c441a817741fdc5dc6084e
|
data/NEWS.md
CHANGED
@@ -1,5 +1,48 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 3.0.3 - 2019-01-12
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* Migrated benchmark tool to benchmark-driver from benchmark-ips.
|
8
|
+
[GitHub#57][Patch by 284km]
|
9
|
+
|
10
|
+
* Added `liberal_parsing: {double_quote_outside_quote: true}` parse
|
11
|
+
option.
|
12
|
+
[GitHub#66][Reported by Watson]
|
13
|
+
|
14
|
+
* Added `quote_empty:` write option.
|
15
|
+
[GitHub#35][Reported by Dave Myron]
|
16
|
+
|
17
|
+
### Fixes
|
18
|
+
|
19
|
+
* Fixed a compatibility bug that `CSV.generate` always return
|
20
|
+
`ASCII-8BIT` encoding string.
|
21
|
+
[GitHub#63][Patch by Watson]
|
22
|
+
|
23
|
+
* Fixed a compatibility bug that `CSV.parse("", headers: true)`
|
24
|
+
doesn't return `CSV::Table`.
|
25
|
+
[GitHub#64][Reported by Watson][Patch by 284km]
|
26
|
+
|
27
|
+
* Fixed a compatibility bug that multiple-characters column
|
28
|
+
separator doesn't work.
|
29
|
+
[GitHub#67][Reported by Jesse Reiss]
|
30
|
+
|
31
|
+
* Fixed a compatibility bug that double `#each` parse twice.
|
32
|
+
[GitHub#68][Reported by Max Schwenk]
|
33
|
+
|
34
|
+
### Thanks
|
35
|
+
|
36
|
+
* Watson
|
37
|
+
|
38
|
+
* 284km
|
39
|
+
|
40
|
+
* Jesse Reiss
|
41
|
+
|
42
|
+
* Dave Myron
|
43
|
+
|
44
|
+
* Max Schwenk
|
45
|
+
|
3
46
|
## 3.0.2 - 2018-12-23
|
4
47
|
|
5
48
|
### Improvements
|
data/lib/csv.rb
CHANGED
@@ -397,6 +397,7 @@ class CSV
|
|
397
397
|
# <b><tt>:force_quotes</tt></b>:: +false+
|
398
398
|
# <b><tt>:skip_lines</tt></b>:: +nil+
|
399
399
|
# <b><tt>:liberal_parsing</tt></b>:: +false+
|
400
|
+
# <b><tt>:quote_empty</tt></b>:: +true+
|
400
401
|
#
|
401
402
|
DEFAULT_OPTIONS = {
|
402
403
|
col_sep: ",",
|
@@ -412,6 +413,7 @@ class CSV
|
|
412
413
|
force_quotes: false,
|
413
414
|
skip_lines: nil,
|
414
415
|
liberal_parsing: false,
|
416
|
+
quote_empty: true,
|
415
417
|
}.freeze
|
416
418
|
|
417
419
|
#
|
@@ -534,7 +536,7 @@ class CSV
|
|
534
536
|
str.seek(0, IO::SEEK_END)
|
535
537
|
else
|
536
538
|
encoding = options[:encoding]
|
537
|
-
str
|
539
|
+
str = +""
|
538
540
|
str.force_encoding(encoding) if encoding
|
539
541
|
end
|
540
542
|
csv = new(str, options) # wrap
|
@@ -557,11 +559,11 @@ class CSV
|
|
557
559
|
#
|
558
560
|
def self.generate_line(row, **options)
|
559
561
|
options = {row_sep: $INPUT_RECORD_SEPARATOR}.merge(options)
|
560
|
-
str =
|
562
|
+
str = +""
|
561
563
|
if options[:encoding]
|
562
564
|
str.force_encoding(options[:encoding])
|
563
|
-
elsif field = row.find {
|
564
|
-
str.force_encoding(
|
565
|
+
elsif field = row.find {|f| f.is_a?(String)}
|
566
|
+
str.force_encoding(field.encoding)
|
565
567
|
end
|
566
568
|
(new(str, options) << row).string
|
567
569
|
end
|
@@ -882,6 +884,7 @@ class CSV
|
|
882
884
|
# <b><tt>:empty_value</tt></b>:: When set an object, any values of a
|
883
885
|
# blank string field is replaced by
|
884
886
|
# the set object.
|
887
|
+
# <b><tt>:quote_empty</tt></b>:: TODO
|
885
888
|
#
|
886
889
|
# See CSV::DEFAULT_OPTIONS for the default settings.
|
887
890
|
#
|
@@ -907,7 +910,8 @@ class CSV
|
|
907
910
|
external_encoding: nil,
|
908
911
|
encoding: nil,
|
909
912
|
nil_value: nil,
|
910
|
-
empty_value: ""
|
913
|
+
empty_value: "",
|
914
|
+
quote_empty: true)
|
911
915
|
raise ArgumentError.new("Cannot parse nil as CSV") if data.nil?
|
912
916
|
|
913
917
|
# create the IO object we will read from
|
@@ -947,6 +951,7 @@ class CSV
|
|
947
951
|
column_separator: col_sep,
|
948
952
|
row_separator: row_sep,
|
949
953
|
quote_character: quote_char,
|
954
|
+
quote_empty: quote_empty,
|
950
955
|
}
|
951
956
|
|
952
957
|
@writer = nil
|
@@ -1178,9 +1183,8 @@ class CSV
|
|
1178
1183
|
#
|
1179
1184
|
def read
|
1180
1185
|
rows = to_a
|
1181
|
-
|
1182
|
-
|
1183
|
-
Table.new(rows, headers: headers)
|
1186
|
+
if parser.use_headers?
|
1187
|
+
Table.new(rows, headers: parser.headers)
|
1184
1188
|
else
|
1185
1189
|
rows
|
1186
1190
|
end
|
data/lib/csv/parser.rb
CHANGED
@@ -170,6 +170,7 @@ class CSV
|
|
170
170
|
@input = input
|
171
171
|
@options = options
|
172
172
|
@samples = []
|
173
|
+
@parsed = false
|
173
174
|
|
174
175
|
prepare
|
175
176
|
end
|
@@ -229,6 +230,8 @@ class CSV
|
|
229
230
|
def parse(&block)
|
230
231
|
return to_enum(__method__) unless block_given?
|
231
232
|
|
233
|
+
return if @parsed
|
234
|
+
|
232
235
|
if @return_headers and @headers
|
233
236
|
headers = Row.new(@headers, @raw_headers, true)
|
234
237
|
if @unconverted_fields
|
@@ -262,10 +265,10 @@ class CSV
|
|
262
265
|
skip_needless_lines
|
263
266
|
start_row
|
264
267
|
elsif @scanner.eos?
|
265
|
-
|
268
|
+
break if row.empty? and value.nil?
|
266
269
|
row << value
|
267
270
|
emit_row(row, &block)
|
268
|
-
|
271
|
+
break
|
269
272
|
else
|
270
273
|
if @quoted_column_value
|
271
274
|
message = "Do not allow except col_sep_split_separator " +
|
@@ -287,6 +290,12 @@ class CSV
|
|
287
290
|
message = "Invalid byte sequence in #{@encoding}"
|
288
291
|
raise MalformedCSVError.new(message, @lineno + 1)
|
289
292
|
end
|
293
|
+
|
294
|
+
@parsed = true
|
295
|
+
end
|
296
|
+
|
297
|
+
def use_headers?
|
298
|
+
@use_headers
|
290
299
|
end
|
291
300
|
|
292
301
|
private
|
@@ -300,7 +309,18 @@ class CSV
|
|
300
309
|
|
301
310
|
def prepare_variable
|
302
311
|
@encoding = @options[:encoding]
|
303
|
-
|
312
|
+
liberal_parsing = @options[:liberal_parsing]
|
313
|
+
if liberal_parsing
|
314
|
+
@liberal_parsing = true
|
315
|
+
if liberal_parsing.is_a?(Hash)
|
316
|
+
@double_quote_outside_quote =
|
317
|
+
liberal_parsing[:double_quote_outside_quote]
|
318
|
+
else
|
319
|
+
@double_quote_outside_quote = false
|
320
|
+
end
|
321
|
+
else
|
322
|
+
@liberal_parsing = false
|
323
|
+
end
|
304
324
|
@unconverted_fields = @options[:unconverted_fields]
|
305
325
|
@field_size_limit = @options[:field_size_limit]
|
306
326
|
@skip_blanks = @options[:skip_blanks]
|
@@ -318,6 +338,7 @@ class CSV
|
|
318
338
|
end
|
319
339
|
|
320
340
|
escaped_column_separator = Regexp.escape(@column_separator)
|
341
|
+
escaped_first_column_separator = Regexp.escape(@column_separator[0])
|
321
342
|
escaped_row_separator = Regexp.escape(@row_separator)
|
322
343
|
escaped_quote_character = Regexp.escape(@quote_character)
|
323
344
|
|
@@ -341,8 +362,11 @@ class CSV
|
|
341
362
|
@column_ends = @column_separator.each_char.collect do |char|
|
342
363
|
Regexp.new(Regexp.escape(char))
|
343
364
|
end
|
365
|
+
@first_column_separators = Regexp.new(escaped_first_column_separator +
|
366
|
+
"+".encode(@encoding))
|
344
367
|
else
|
345
368
|
@column_ends = nil
|
369
|
+
@first_column_separators = nil
|
346
370
|
end
|
347
371
|
@row_end = Regexp.new(escaped_row_separator)
|
348
372
|
if @row_separator.size > 1
|
@@ -359,12 +383,12 @@ class CSV
|
|
359
383
|
"]+".encode(@encoding))
|
360
384
|
if @liberal_parsing
|
361
385
|
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
362
|
-
|
386
|
+
escaped_first_column_separator +
|
363
387
|
"\r\n]+".encode(@encoding))
|
364
388
|
else
|
365
389
|
@unquoted_value = Regexp.new("[^".encode(@encoding) +
|
366
390
|
escaped_quote_character +
|
367
|
-
|
391
|
+
escaped_first_column_separator +
|
368
392
|
"\r\n]+".encode(@encoding))
|
369
393
|
end
|
370
394
|
@cr_or_lf = Regexp.new("[\r\n]".encode(@encoding))
|
@@ -583,6 +607,13 @@ class CSV
|
|
583
607
|
if quoted_value
|
584
608
|
unquoted_value = parse_unquoted_column_value
|
585
609
|
if unquoted_value
|
610
|
+
if @double_quote_outside_quote
|
611
|
+
unquoted_value = unquoted_value.gsub(@quote_character * 2,
|
612
|
+
@quote_character)
|
613
|
+
if quoted_value.empty? # %Q{""...} case
|
614
|
+
return @quote_character + unquoted_value
|
615
|
+
end
|
616
|
+
end
|
586
617
|
@quote_character + quoted_value + @quote_character + unquoted_value
|
587
618
|
else
|
588
619
|
quoted_value
|
@@ -601,7 +632,25 @@ class CSV
|
|
601
632
|
|
602
633
|
def parse_unquoted_column_value
|
603
634
|
value = @scanner.scan_all(@unquoted_value)
|
604
|
-
|
635
|
+
return nil unless value
|
636
|
+
|
637
|
+
@unquoted_column_value = true
|
638
|
+
if @first_column_separators
|
639
|
+
while true
|
640
|
+
@scanner.keep_start
|
641
|
+
is_column_end = @column_ends.all? do |column_end|
|
642
|
+
@scanner.scan(column_end)
|
643
|
+
end
|
644
|
+
@scanner.keep_back
|
645
|
+
break if is_column_end
|
646
|
+
sub_separator = @scanner.scan_all(@first_column_separators)
|
647
|
+
break if sub_separator.nil?
|
648
|
+
value << sub_separator
|
649
|
+
sub_value = @scanner.scan_all(@unquoted_value)
|
650
|
+
break if sub_value.nil?
|
651
|
+
value << sub_value
|
652
|
+
end
|
653
|
+
end
|
605
654
|
value
|
606
655
|
end
|
607
656
|
|
data/lib/csv/version.rb
CHANGED
data/lib/csv/writer.rb
CHANGED
@@ -31,7 +31,10 @@ class CSV
|
|
31
31
|
@headers ||= row if @use_headers
|
32
32
|
@lineno += 1
|
33
33
|
|
34
|
-
|
34
|
+
converted_row = row.collect do |field|
|
35
|
+
quote(field)
|
36
|
+
end
|
37
|
+
line = converted_row.join(@column_separator) + @row_separator
|
35
38
|
if @output_encoding
|
36
39
|
line = line.encode(@output_encoding)
|
37
40
|
end
|
@@ -90,37 +93,16 @@ class CSV
|
|
90
93
|
else
|
91
94
|
@row_separator = row_separator.to_s.encode(@encoding)
|
92
95
|
end
|
93
|
-
quote_character = @options[:quote_character]
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
encoded_quote_character +
|
98
|
-
field.gsub(encoded_quote_character,
|
99
|
-
encoded_quote_character * 2) +
|
100
|
-
encoded_quote_character
|
101
|
-
end
|
102
|
-
if @options[:force_quotes]
|
103
|
-
@quote = quote
|
104
|
-
else
|
105
|
-
quotable_pattern =
|
96
|
+
@quote_character = @options[:quote_character]
|
97
|
+
@force_quotes = @options[:force_quotes]
|
98
|
+
unless @force_quotes
|
99
|
+
@quotable_pattern =
|
106
100
|
Regexp.new("[\r\n".encode(@encoding) +
|
107
101
|
Regexp.escape(@column_separator) +
|
108
|
-
Regexp.escape(quote_character.encode(@encoding)) +
|
102
|
+
Regexp.escape(@quote_character.encode(@encoding)) +
|
109
103
|
"]".encode(@encoding))
|
110
|
-
@quote = lambda do |field|
|
111
|
-
if field.nil? # represent +nil+ fields as empty unquoted fields
|
112
|
-
""
|
113
|
-
else
|
114
|
-
field = String(field) # Stringify fields
|
115
|
-
# represent empty fields as empty quoted fields
|
116
|
-
if field.empty? or quotable_pattern.match?(field)
|
117
|
-
quote.call(field)
|
118
|
-
else
|
119
|
-
field # unquoted field
|
120
|
-
end
|
121
|
-
end
|
122
|
-
end
|
123
104
|
end
|
105
|
+
@quote_empty = @options.fetch(:quote_empty, true)
|
124
106
|
end
|
125
107
|
|
126
108
|
def prepare_output
|
@@ -140,5 +122,32 @@ class CSV
|
|
140
122
|
end
|
141
123
|
end
|
142
124
|
end
|
125
|
+
|
126
|
+
def quote_field(field)
|
127
|
+
field = String(field)
|
128
|
+
encoded_quote_character = @quote_character.encode(field.encoding)
|
129
|
+
encoded_quote_character +
|
130
|
+
field.gsub(encoded_quote_character,
|
131
|
+
encoded_quote_character * 2) +
|
132
|
+
encoded_quote_character
|
133
|
+
end
|
134
|
+
|
135
|
+
def quote(field)
|
136
|
+
if @force_quotes
|
137
|
+
quote_field(field)
|
138
|
+
else
|
139
|
+
if field.nil? # represent +nil+ fields as empty unquoted fields
|
140
|
+
""
|
141
|
+
else
|
142
|
+
field = String(field) # Stringify fields
|
143
|
+
# represent empty fields as empty quoted fields
|
144
|
+
if (@quote_empty and field.empty?) or @quotable_pattern.match?(field)
|
145
|
+
quote_field(field)
|
146
|
+
else
|
147
|
+
field # unquoted field
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
143
152
|
end
|
144
153
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Edward Gray II
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2019-01-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -40,7 +40,7 @@ dependencies:
|
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
43
|
+
name: benchmark_driver
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - ">="
|
@@ -109,8 +109,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
requirements: []
|
112
|
-
|
113
|
-
rubygems_version: 3.0.0.beta2
|
112
|
+
rubygems_version: 3.0.2
|
114
113
|
signing_key:
|
115
114
|
specification_version: 4
|
116
115
|
summary: CSV Reading and Writing
|