csvreader 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/README.md +12 -22
- data/lib/csvreader.rb +2 -0
- data/lib/csvreader/builder.rb +56 -0
- data/lib/csvreader/parser.rb +1 -1
- data/lib/csvreader/parser_std.rb +39 -0
- data/lib/csvreader/parser_strict.rb +27 -12
- data/lib/csvreader/reader.rb +150 -98
- data/lib/csvreader/version.rb +1 -1
- data/test/data/cars11.csv +10 -0
- data/test/data/cities11.csv +12 -0
- data/test/data/customers11.csv +13 -0
- data/test/test_parser_java.rb +0 -11
- data/test/test_parser_null.rb +107 -0
- data/test/test_parser_strict.rb +11 -23
- data/test/test_reader.rb +13 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c61a8e62f99e1a06c119b4995e0e4e1d3c829d71
|
4
|
+
data.tar.gz: 1b59f3415f3f0fe449a8c2395d2cefc7a7bd855c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52e3e8effa09f492c38736f1fb16552341237ebe0e8e5452a72c080d03f48e47b9d66fbfa1b8f42e3ccd86d37038975254da0e3038f297174636a3b34ce57542
|
7
|
+
data.tar.gz: 66c17daaa22d6e5d526c76b9568737b4982da9f02b11e9264fdb9d9644d5e33881255f623a174515c78714352b2df54c811cb8b6e0222aa5cc79682ac320ee62
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ README.md
|
|
5
5
|
Rakefile
|
6
6
|
lib/csvreader.rb
|
7
7
|
lib/csvreader/buffer.rb
|
8
|
+
lib/csvreader/builder.rb
|
8
9
|
lib/csvreader/parser.rb
|
9
10
|
lib/csvreader/parser_std.rb
|
10
11
|
lib/csvreader/parser_strict.rb
|
@@ -14,11 +15,15 @@ lib/csvreader/reader_hash.rb
|
|
14
15
|
lib/csvreader/version.rb
|
15
16
|
test/data/beer.csv
|
16
17
|
test/data/beer11.csv
|
18
|
+
test/data/cars11.csv
|
19
|
+
test/data/cities11.csv
|
20
|
+
test/data/customers11.csv
|
17
21
|
test/data/shakespeare.csv
|
18
22
|
test/helper.rb
|
19
23
|
test/test_parser.rb
|
20
24
|
test/test_parser_formats.rb
|
21
25
|
test/test_parser_java.rb
|
26
|
+
test/test_parser_null.rb
|
22
27
|
test/test_parser_strict.rb
|
23
28
|
test/test_parser_tab.rb
|
24
29
|
test/test_reader.rb
|
data/README.md
CHANGED
@@ -11,14 +11,6 @@
|
|
11
11
|
|
12
12
|
## Usage
|
13
13
|
|
14
|
-
``` ruby
|
15
|
-
line = "1,2,3"
|
16
|
-
values = CsvReader.parse_line( line )
|
17
|
-
pp values
|
18
|
-
# => ["1","2","3"]
|
19
|
-
```
|
20
|
-
|
21
|
-
or use the convenience helpers:
|
22
14
|
|
23
15
|
``` ruby
|
24
16
|
txt <<=TXT
|
@@ -26,21 +18,21 @@ txt <<=TXT
|
|
26
18
|
4,5,6
|
27
19
|
TXT
|
28
20
|
|
29
|
-
records =
|
21
|
+
records = Csv.parse( txt ) ## or CsvReader.parse
|
30
22
|
pp records
|
31
23
|
# => [["1","2","3"],
|
32
24
|
# ["5","6","7"]]
|
33
25
|
|
34
26
|
# -or-
|
35
27
|
|
36
|
-
records =
|
28
|
+
records = Csv.read( "values.csv" ) ## or CsvReader.read
|
37
29
|
pp records
|
38
30
|
# => [["1","2","3"],
|
39
31
|
# ["5","6","7"]]
|
40
32
|
|
41
33
|
# -or-
|
42
34
|
|
43
|
-
|
35
|
+
Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
|
44
36
|
pp rec
|
45
37
|
end
|
46
38
|
# => ["1","2","3"]
|
@@ -50,7 +42,7 @@ end
|
|
50
42
|
|
51
43
|
### What about headers?
|
52
44
|
|
53
|
-
Use the `
|
45
|
+
Use the `CsvHash`
|
54
46
|
if the first line is a header (or if missing pass in the headers
|
55
47
|
as an array) and you want your records as hashes instead of arrays of strings.
|
56
48
|
Example:
|
@@ -62,7 +54,7 @@ A,B,C
|
|
62
54
|
4,5,6
|
63
55
|
TXT
|
64
56
|
|
65
|
-
records =
|
57
|
+
records = CsvHash.parse( txt ) ## or CsvHashReader.parse
|
66
58
|
pp records
|
67
59
|
|
68
60
|
# -or-
|
@@ -72,7 +64,7 @@ txt2 <<=TXT
|
|
72
64
|
4,5,6
|
73
65
|
TXT
|
74
66
|
|
75
|
-
records =
|
67
|
+
records = CsvHash.parse( txt2, headers: ["A","B","C"] ) ## or CsvHashReader.parse
|
76
68
|
pp records
|
77
69
|
|
78
70
|
# => [{"A": "1", "B": "2", "C": "3"},
|
@@ -80,14 +72,14 @@ pp records
|
|
80
72
|
|
81
73
|
# -or-
|
82
74
|
|
83
|
-
records =
|
75
|
+
records = CsvHash.read( "hash.csv" ) ## or CsvHashReader.read
|
84
76
|
pp records
|
85
77
|
# => [{"A": "1", "B": "2", "C": "3"},
|
86
78
|
# {"A": "4", "B": "5", "C": "6"}]
|
87
79
|
|
88
80
|
# -or-
|
89
81
|
|
90
|
-
|
82
|
+
CsvHash.foreach( "hash.csv" ) do |rec| ## or CsvHashReader.foreach
|
91
83
|
pp rec
|
92
84
|
end
|
93
85
|
# => {"A": "1", "B": "2", "C": "3"}
|
@@ -141,13 +133,11 @@ Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
|
|
141
133
|
Pass in the `sep` keyword option. Example:
|
142
134
|
|
143
135
|
``` ruby
|
144
|
-
|
145
|
-
|
146
|
-
CsvReader.read( ..., sep: ';' )
|
136
|
+
Csv.parse( ..., sep: ';' )
|
137
|
+
Csv.read( ..., sep: ';' )
|
147
138
|
# ...
|
148
|
-
|
149
|
-
|
150
|
-
CsvReader.read( ..., sep: '|' )
|
139
|
+
Csv.parse( ..., sep: '|' )
|
140
|
+
Csv.read( ..., sep: '|' )
|
151
141
|
# ...
|
152
142
|
# and so on
|
153
143
|
```
|
data/lib/csvreader.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'pp'
|
5
5
|
require 'logger'
|
6
|
+
require 'forwardable'
|
6
7
|
|
7
8
|
|
8
9
|
###
|
@@ -13,6 +14,7 @@ require 'csvreader/parser_std' # best practices pre-configured out-of-the-b
|
|
13
14
|
require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
|
14
15
|
require 'csvreader/parser_tab'
|
15
16
|
require 'csvreader/parser'
|
17
|
+
require 'csvreader/builder'
|
16
18
|
require 'csvreader/reader'
|
17
19
|
require 'csvreader/reader_hash'
|
18
20
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
|
5
|
+
def initialize( parser )
|
6
|
+
@parser = parser
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
## todo/fix:
|
11
|
+
## add parser config (attribute) setter e.g.
|
12
|
+
## - sep=(value)
|
13
|
+
## - comment=(value)
|
14
|
+
## - and so on!!!
|
15
|
+
##
|
16
|
+
## add config too - why? why not?
|
17
|
+
|
18
|
+
|
19
|
+
def open( path, mode='r:bom|utf-8',
|
20
|
+
sep: nil,
|
21
|
+
converters: nil,
|
22
|
+
parser: @parser, &block )
|
23
|
+
CsvReader.open( path, mode,
|
24
|
+
sep: sep, converters: converters,
|
25
|
+
parser: @parser, &block )
|
26
|
+
end
|
27
|
+
|
28
|
+
def read( path, sep: nil,
|
29
|
+
converters: nil )
|
30
|
+
CsvReader.read( path,
|
31
|
+
sep: sep, converters: converters,
|
32
|
+
parser: @parser )
|
33
|
+
end
|
34
|
+
|
35
|
+
def header( path, sep: nil )
|
36
|
+
CsvReader.header( path,
|
37
|
+
sep: sep,
|
38
|
+
parser: @parser )
|
39
|
+
end
|
40
|
+
|
41
|
+
def foreach( path, sep: nil,
|
42
|
+
converters: nil, &block )
|
43
|
+
CsvReader.foreach( path,
|
44
|
+
sep: sep, converters: converters,
|
45
|
+
parser: @parser, &block )
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
def parse( data, sep: nil,
|
51
|
+
converters: nil, &block )
|
52
|
+
CsvReader.parse( data,
|
53
|
+
sep: sep, converters: converters,
|
54
|
+
parser: @parser, &block )
|
55
|
+
end
|
56
|
+
end # class CsvBuilder
|
data/lib/csvreader/parser.rb
CHANGED
data/lib/csvreader/parser_std.rb
CHANGED
@@ -39,12 +39,29 @@ def logger() self.class.logger; end
|
|
39
39
|
|
40
40
|
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
41
41
|
|
42
|
+
##
|
43
|
+
## todo/check:
|
44
|
+
## null values - include NA - why? why not?
|
45
|
+
## make null values case sensitive or add an option for case sensitive
|
46
|
+
## or better allow a proc as option for checking too!!!
|
42
47
|
def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
|
43
48
|
)
|
44
49
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
50
|
+
|
51
|
+
## note: null values must get handled by parser
|
52
|
+
## only get checked for unquoted strings (and NOT for quoted strings)
|
53
|
+
## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
|
45
54
|
@config[:null] = null ## null values
|
46
55
|
end
|
47
56
|
|
57
|
+
#########################################
|
58
|
+
## config convenience helpers
|
59
|
+
## e.g. use like Csv.defaultl.null = '\N' etc. instead of
|
60
|
+
## Csv.default.config[:null] = '\N'
|
61
|
+
def null=( value ) @config[:null]=value; end
|
62
|
+
|
63
|
+
|
64
|
+
|
48
65
|
|
49
66
|
def parse( data, **kwargs, &block )
|
50
67
|
|
@@ -132,6 +149,7 @@ def parse_field( input )
|
|
132
149
|
skip_spaces( input ) ## strip leading spaces
|
133
150
|
|
134
151
|
if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
|
152
|
+
value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
|
135
153
|
## return value; do nothing
|
136
154
|
elsif input.peek == DOUBLE_QUOTE
|
137
155
|
logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -156,6 +174,8 @@ def parse_field( input )
|
|
156
174
|
## note: only strip **trailing** spaces (space and tab only)
|
157
175
|
## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
|
158
176
|
value = value.sub( /[ \t]+$/, '' )
|
177
|
+
value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
|
178
|
+
|
159
179
|
logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
160
180
|
end
|
161
181
|
|
@@ -251,5 +271,24 @@ def parse_lines( input, &block )
|
|
251
271
|
end # method parse_lines
|
252
272
|
|
253
273
|
|
274
|
+
|
275
|
+
|
276
|
+
def is_null?( value )
|
277
|
+
null = @config[:null]
|
278
|
+
if null.nil?
|
279
|
+
false ## nothing set; return always false (not null)
|
280
|
+
elsif null.is_a?( Proc )
|
281
|
+
null.call( value )
|
282
|
+
elsif null.is_a?( Array )
|
283
|
+
null.include?( value )
|
284
|
+
elsif null.is_a?( String )
|
285
|
+
value == null
|
286
|
+
else ## unknown config style / setting
|
287
|
+
## todo: issue a warning or error - why? why not?
|
288
|
+
false ## nothing set; return always false (not null)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
|
254
293
|
end # class ParserStd
|
255
294
|
end # class CsvReader
|
@@ -35,9 +35,7 @@ def initialize( sep: ',',
|
|
35
35
|
quote: '"', ## note: set to false/nil for no quote
|
36
36
|
doublequote: true,
|
37
37
|
escape: false, ## true/false
|
38
|
-
null:
|
39
|
-
quoted_empty_null: false,
|
40
|
-
unquoted_empty_null: false,
|
38
|
+
null: nil, ## note: set to nil for no null vales / not availabe (na)
|
41
39
|
comment: false ## note: comment char e.g. # or false/nil
|
42
40
|
)
|
43
41
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
@@ -46,8 +44,6 @@ def initialize( sep: ',',
|
|
46
44
|
@config[:doublequote] = doublequote
|
47
45
|
@config[:escape] = escape
|
48
46
|
@config[:null] = null
|
49
|
-
@config[:quoted_empty_null] = quoted_empty_null
|
50
|
-
@config[:unquoted_empty_null] = unquoted_empty_null
|
51
47
|
@config[:comment] = comment
|
52
48
|
end
|
53
49
|
|
@@ -55,9 +51,12 @@ end
|
|
55
51
|
## config convenience helpers
|
56
52
|
## e.g. use like Csv.mysql.sep = ',' etc. instead of
|
57
53
|
## Csv.mysql.config[:sep] = ','
|
58
|
-
def sep=( value )
|
59
|
-
def
|
60
|
-
def
|
54
|
+
def sep=( value ) @config[:sep]=value; end
|
55
|
+
def quote=( value ) @config[:quote]=value; end
|
56
|
+
def doublequote=( value ) @config[:doublequote]=value; end
|
57
|
+
def escape=( value ) @config[:escape]=value; end
|
58
|
+
def null=( value ) @config[:null]=value; end
|
59
|
+
def comment=( value ) @config[:comment]=value; end
|
61
60
|
|
62
61
|
|
63
62
|
|
@@ -156,14 +155,11 @@ def parse_field( input, sep: )
|
|
156
155
|
logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
157
156
|
|
158
157
|
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
|
159
|
-
value = nil
|
158
|
+
value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
|
160
159
|
## return value; do nothing
|
161
160
|
elsif quote && input.peek == quote
|
162
161
|
logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
163
162
|
value << parse_quote( input, sep: sep )
|
164
|
-
|
165
|
-
value = nil if config[:quoted_empty_null] && value == ""
|
166
|
-
|
167
163
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
168
164
|
else
|
169
165
|
logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -177,6 +173,8 @@ def parse_field( input, sep: )
|
|
177
173
|
value << input.getc
|
178
174
|
end
|
179
175
|
end
|
176
|
+
|
177
|
+
value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
|
180
178
|
logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
181
179
|
end
|
182
180
|
|
@@ -265,5 +263,22 @@ def parse_lines( input, sep:, &block )
|
|
265
263
|
end # method parse_lines
|
266
264
|
|
267
265
|
|
266
|
+
def is_null?( value )
|
267
|
+
null = @config[:null]
|
268
|
+
if null.nil?
|
269
|
+
false ## nothing set; return always false (not null)
|
270
|
+
elsif null.is_a?( Proc )
|
271
|
+
null.call( value )
|
272
|
+
elsif null.is_a?( Array )
|
273
|
+
null.include?( value )
|
274
|
+
elsif null.is_a?( String )
|
275
|
+
value == null
|
276
|
+
else ## unknown config style / setting
|
277
|
+
## todo: issue a warning or error - why? why not?
|
278
|
+
false ## nothing set; return always false (not null)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
|
268
283
|
end # class ParserStrict
|
269
284
|
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -1,18 +1,12 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
3
|
class CsvReader
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
STRICT = new( Parser::STRICT )
|
13
|
-
RFC4180 = new( Parser::RFC4180 )
|
14
|
-
EXCEL = new( Parser::EXCEL )
|
15
|
-
TAB = new( Parser::TAB )
|
5
|
+
DEFAULT = CsvBuilder.new( Parser::DEFAULT )
|
6
|
+
STRICT = CsvBuilder.new( Parser::STRICT )
|
7
|
+
RFC4180 = CsvBuilder.new( Parser::RFC4180 )
|
8
|
+
EXCEL = CsvBuilder.new( Parser::EXCEL )
|
9
|
+
TAB = CsvBuilder.new( Parser::TAB )
|
16
10
|
|
17
11
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
18
12
|
def self.strict() STRICT; end ## alternative alias for RFC4180
|
@@ -21,97 +15,155 @@ class CsvReader
|
|
21
15
|
def self.tab() TAB; end ## alternative alias for TAB
|
22
16
|
|
23
17
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
DEFAULT.parse( data, sep: sep, converters: converters ) do |record|
|
58
|
-
records << record
|
59
|
-
break # only parse first record
|
60
|
-
end
|
61
|
-
records.size == 0 ? nil : records.first
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
#############################
|
67
|
-
## all "high-level" reader methods
|
68
|
-
##
|
69
|
-
## note: allow "overriding" of separator
|
70
|
-
## if sep is not nil otherwise use default dialect/format separator
|
71
|
-
|
72
|
-
def parse( data, sep: nil,
|
73
|
-
converters: nil, &block )
|
74
|
-
kwargs = {
|
75
|
-
## converters: converters ## todo: add converters
|
76
|
-
}
|
77
|
-
## note: only add separator if present/defined (not nil)
|
78
|
-
kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' )
|
79
|
-
|
80
|
-
@parser.parse( data, kwargs, &block )
|
81
|
-
end
|
82
|
-
|
83
|
-
def read( path, sep: nil,
|
84
|
-
converters: nil )
|
85
|
-
## note: use our own file.open
|
86
|
-
## always use utf-8 for now
|
87
|
-
## check/todo: add skip option bom too - why? why not?
|
88
|
-
txt = File.open( path, 'r:bom|utf-8' ).read
|
89
|
-
parse( txt, sep: sep )
|
90
|
-
end
|
91
|
-
|
92
|
-
def foreach( path, sep: nil,
|
93
|
-
converters: nil, &block )
|
94
|
-
File.open( path, 'r:bom|utf-8' ) do |file|
|
95
|
-
parse( file, sep: sep, &block )
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
#######
|
22
|
+
## csv reader
|
23
|
+
|
24
|
+
def self.open( path, mode='r:bom|utf-8',
|
25
|
+
sep: nil,
|
26
|
+
converters: nil,
|
27
|
+
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
28
|
+
f = File.open( path, mode )
|
29
|
+
csv = new(f, sep: sep, converters: converters, parser: parser )
|
30
|
+
|
31
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
32
|
+
if block_given?
|
33
|
+
begin
|
34
|
+
block.call( csv )
|
35
|
+
ensure
|
36
|
+
csv.close
|
37
|
+
end
|
38
|
+
else
|
39
|
+
csv
|
40
|
+
end
|
41
|
+
end # method self.open
|
42
|
+
|
43
|
+
|
44
|
+
def self.read( path, sep: nil,
|
45
|
+
converters: nil,
|
46
|
+
parser: nil )
|
47
|
+
open( path,
|
48
|
+
sep: sep,
|
49
|
+
converters: converters,
|
50
|
+
parser: parser ) { |csv| csv.read }
|
96
51
|
end
|
97
|
-
end
|
98
52
|
|
99
|
-
def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
|
100
|
-
# read first lines (only)
|
101
|
-
# and parse with csv to get header from csv library itself
|
102
53
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
54
|
+
def self.header( path, sep: nil, parser: nil ) ## use header or headers - or use both (with alias)?
|
55
|
+
# read first lines (only)
|
56
|
+
# and parse with csv to get header from csv library itself
|
57
|
+
|
58
|
+
records = []
|
59
|
+
open( path, sep: sep, parser: parser ) do |csv|
|
60
|
+
csv.each do |record|
|
61
|
+
records << record
|
62
|
+
break ## only parse/read first record
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
## unwrap record if empty return nil - why? why not?
|
67
|
+
## return empty record e.g. [] - why? why not?
|
68
|
+
## returns nil for empty (for now) - why? why not?
|
69
|
+
records.size == 0 ? nil : records.first
|
70
|
+
end # method self.header
|
71
|
+
|
72
|
+
|
73
|
+
def self.foreach( path, sep: nil,
|
74
|
+
converters: nil, parser: nil, &block )
|
75
|
+
csv = open( path, sep: sep, converters: converters, parser: parser )
|
76
|
+
|
77
|
+
if block_given?
|
78
|
+
begin
|
79
|
+
csv.each( &block )
|
80
|
+
ensure
|
81
|
+
csv.close
|
108
82
|
end
|
109
|
-
|
83
|
+
else
|
84
|
+
csv.to_enum ## note: caller (responsible) must close file!!!
|
85
|
+
## remove version without block given - why? why not?
|
86
|
+
## use Csv.open().to_enum or Csv.open().each
|
87
|
+
## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
|
88
|
+
end
|
89
|
+
end # method self.foreach
|
90
|
+
|
91
|
+
|
92
|
+
def self.parse( data, sep: nil,
|
93
|
+
converters: nil,
|
94
|
+
parser: nil, &block )
|
95
|
+
csv = new( data, sep: sep, converters: converters, parser: parser )
|
96
|
+
|
97
|
+
if block_given?
|
98
|
+
csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
99
|
+
else # slurp contents, if no block is given
|
100
|
+
csv.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
101
|
+
end
|
102
|
+
end # method self.parse
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
############################
|
107
|
+
## note: only add parse_line convenience helper for default
|
108
|
+
## always use parse (do NOT/NOT/NOT use parse_line) - why? why not?
|
109
|
+
## todo/fix: remove parse_line!!!
|
110
|
+
def self.parse_line( data, sep: nil,
|
111
|
+
converters: nil )
|
112
|
+
records = []
|
113
|
+
parse( data, sep: sep, converters: converters ) do |record|
|
114
|
+
records << record
|
115
|
+
break # only parse first record
|
116
|
+
end
|
117
|
+
records.size == 0 ? nil : records.first
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
def initialize( data, sep: nil, converters: nil, parser: nil )
|
124
|
+
raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
|
125
|
+
## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
|
126
|
+
|
127
|
+
# create the IO object we will read from
|
128
|
+
@io = data.is_a?(String) ? StringIO.new(data) : data
|
129
|
+
|
130
|
+
@sep = sep
|
131
|
+
@converters = converters
|
132
|
+
|
133
|
+
@parser = parser.nil? ? Parser::DEFAULT : parser
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
### IO and StringIO Delegation ###
|
138
|
+
extend Forwardable
|
139
|
+
def_delegators :@io,
|
140
|
+
:close, :closed?, :eof, :eof?
|
141
|
+
|
142
|
+
## add more - why? why not?
|
143
|
+
## def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
|
144
|
+
## :closed?, :eof, :eof?, :external_encoding, :fcntl,
|
145
|
+
## :fileno, :flock, :flush, :fsync, :internal_encoding,
|
146
|
+
## :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
|
147
|
+
## :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
|
148
|
+
## :to_io, :truncate, :tty?
|
149
|
+
|
150
|
+
|
151
|
+
include Enumerable
|
152
|
+
|
153
|
+
def each( &block )
|
154
|
+
if block_given?
|
155
|
+
kwargs = {
|
156
|
+
## converters: converters ## todo: add converters
|
157
|
+
}
|
158
|
+
## note: only add separator if present/defined (not nil)
|
159
|
+
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
160
|
+
|
161
|
+
@parser.parse( @io, kwargs, &block )
|
162
|
+
else
|
163
|
+
to_enum
|
164
|
+
end
|
165
|
+
end # method each
|
110
166
|
|
111
|
-
|
112
|
-
## return empty record e.g. [] - why? why not?
|
113
|
-
## returns nil for empty (for now) - why? why not?
|
114
|
-
records.size == 0 ? nil : records.first
|
115
|
-
end # method self.header
|
167
|
+
def read() to_a; end # method read
|
116
168
|
|
117
169
|
end # class CsvReader
|
data/lib/csvreader/version.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
|
5
|
+
Year,Make,Model,Description,Price
|
6
|
+
1997, Ford, E350,"ac, abs, moon",3000.00
|
7
|
+
1999, Chevy, "Venture ""Extended Edition""","",4900.00
|
8
|
+
1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
|
9
|
+
1996, Jeep, Grand Cherokee,"MUST SELL!
|
10
|
+
air, moon roof, loaded",4799.00
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
#
|
5
|
+
# note:
|
6
|
+
# Double quote processing need only apply if the field starts
|
7
|
+
# with a double quote. Note, however, that double quotes are not
|
8
|
+
# allowed in unquoted fields according to RFC 4180
|
9
|
+
|
10
|
+
Los Angeles, 34°03'N, 118°15'W
|
11
|
+
New York City, 40°42'46"N, 74°00'21"W
|
12
|
+
Paris, 48°51'24"N, 2°21'03"E
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the article:
|
3
|
+
# A Guide to the Ruby CSV Library, Part I
|
4
|
+
# - sitepoint.com/guide-ruby-csv-library-part
|
5
|
+
|
6
|
+
Name,Times arrived,Total $ spent,Food feedback
|
7
|
+
Dan, 34, 2548, Lovin it!
|
8
|
+
Maria, 55, 5054, "Good, delicious food"
|
9
|
+
Carlos, 22, 4352, "I am ""pleased"", but could be better"
|
10
|
+
Stephany, 34, 6542, I want bigger steaks!!!!!
|
11
|
+
James, 1, 43, Not bad
|
12
|
+
Robin, 1, 56, Fish is tasty
|
13
|
+
Anna, 1, 79, "Good, better, the best!"
|
data/test/test_parser_java.rb
CHANGED
@@ -205,15 +205,4 @@ def test_lf
|
|
205
205
|
parser.default.parse( "character" + LF + "NotEscaped" )
|
206
206
|
end
|
207
207
|
|
208
|
-
|
209
|
-
|
210
|
-
def test_escaped_mysql_null_value
|
211
|
-
## MySQL uses \N to symbolize null values. We have to restore this
|
212
|
-
|
213
|
-
## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
|
214
|
-
## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
|
215
|
-
assert_equal [[ "character\\NEscaped" ]],
|
216
|
-
parser.default.parse( "character\\NEscaped" )
|
217
|
-
end
|
218
|
-
|
219
208
|
end # class TestParserJava
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_null.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestParserNull < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def parser
|
15
|
+
CsvReader::Parser
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_escaped_mysql_null_value
|
20
|
+
## MySQL uses \N to symbolize null values. We have to restore this
|
21
|
+
|
22
|
+
## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
|
23
|
+
## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
|
24
|
+
assert_equal [[ "character\\NEscaped" ]],
|
25
|
+
parser.default.parse( "character\\NEscaped" )
|
26
|
+
|
27
|
+
assert_equal [[ "character\\NEscaped" ]],
|
28
|
+
parser.strict.parse( "character\\NEscaped" )
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_mysql_null_value
|
33
|
+
default_null_values = parser.default.config[:null] ## save default null settings
|
34
|
+
|
35
|
+
assert_equal [[ nil, nil, "" ]],
|
36
|
+
parser.default.parse( "\\N, \\N ," )
|
37
|
+
|
38
|
+
## escaped with quotes
|
39
|
+
assert_equal [[ "\\N", "\\N", "" ]],
|
40
|
+
parser.default.parse( %Q{"\\N", "\\N" ,} )
|
41
|
+
|
42
|
+
## try single \N setting
|
43
|
+
parser.default.null = "\\N"
|
44
|
+
assert_equal [[ nil, nil, "" ]],
|
45
|
+
parser.default.parse( "\\N, \\N ," )
|
46
|
+
|
47
|
+
## try no null values setting
|
48
|
+
parser.default.null = nil
|
49
|
+
assert_equal [[ "\\N", "\\N", "" ]],
|
50
|
+
parser.default.parse( "\\N, \\N ," )
|
51
|
+
|
52
|
+
## try postgresql unquoted empty string is nil/null
|
53
|
+
parser.default.null = ""
|
54
|
+
assert_equal [[ nil, nil, "" ],
|
55
|
+
[ nil, nil, "", nil ]],
|
56
|
+
parser.default.parse( %Q{,,""\n , , "" ,} )
|
57
|
+
|
58
|
+
## try proc
|
59
|
+
parser.default.null = ->(value) { value.downcase == 'nil' }
|
60
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
61
|
+
parser.default.parse( "nil, Nil, NIL," )
|
62
|
+
|
63
|
+
## try array
|
64
|
+
parser.default.null = ['nil', 'Nil', 'NIL']
|
65
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
66
|
+
parser.default.parse( "nil, Nil, NIL," )
|
67
|
+
|
68
|
+
## restore defaults
|
69
|
+
parser.default.null = default_null_values ## ['\N', 'NA']
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
def test_strict_mysql_null_value
|
74
|
+
assert_equal [[ "\\N", " \\N ", "" ]],
|
75
|
+
parser.strict.parse( "\\N, \\N ," )
|
76
|
+
|
77
|
+
## try single \N setting
|
78
|
+
parser.strict.null = "\\N"
|
79
|
+
assert_equal [[ nil, nil, " \\N", "\\N ", "" ]],
|
80
|
+
parser.strict.parse( "\\N,\\N, \\N,\\N ," )
|
81
|
+
|
82
|
+
## escaped with quotes
|
83
|
+
assert_equal [[ "\\N", "\\N", nil, "" ]],
|
84
|
+
parser.strict.parse( %Q{"\\N","\\N",\\N,} )
|
85
|
+
|
86
|
+
|
87
|
+
## try postgresql unquoted empty string is nil/null
|
88
|
+
parser.strict.null = ""
|
89
|
+
assert_equal [[ nil, nil, "" ],
|
90
|
+
[ " ", " ", "", nil ]],
|
91
|
+
parser.strict.parse( %Q{,,""\n , ,"",} )
|
92
|
+
|
93
|
+
## try proc
|
94
|
+
parser.strict.null = ->(value) { value.downcase == 'nil' }
|
95
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
96
|
+
parser.strict.parse( "nil,Nil,NIL," )
|
97
|
+
|
98
|
+
## try array
|
99
|
+
parser.strict.null = ['nil', 'Nil', 'NIL']
|
100
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
101
|
+
parser.strict.parse( "nil,Nil,NIL," )
|
102
|
+
|
103
|
+
## restore defaults
|
104
|
+
parser.strict.null = nil
|
105
|
+
end
|
106
|
+
|
107
|
+
end # class TestParserNull
|
data/test/test_parser_strict.rb
CHANGED
@@ -60,32 +60,20 @@ end
|
|
60
60
|
|
61
61
|
|
62
62
|
def test_parse_empties
|
63
|
-
assert_equal [["","",""],["","",""]],
|
64
|
-
|
65
|
-
parser.config[:quoted_empty_null] = true
|
66
|
-
|
67
|
-
assert_equal true, parser.config[:quoted_empty_null]
|
68
|
-
assert_equal false, parser.config[:unquoted_empty_null]
|
69
|
-
|
70
|
-
assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
71
|
-
|
72
|
-
|
73
|
-
parser.config[:unquoted_empty_null] = true
|
74
|
-
|
75
|
-
assert_equal true, parser.config[:quoted_empty_null]
|
76
|
-
assert_equal true, parser.config[:unquoted_empty_null]
|
77
|
-
|
78
|
-
assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
63
|
+
assert_equal [["","",""],["","",""]],
|
64
|
+
parser.parse( %Q{"","",""\n,,} )
|
79
65
|
|
66
|
+
parser.null = ""
|
67
|
+
assert_equal [["","",""," "],[nil,nil,nil," "]],
|
68
|
+
parser.parse( %Q{"","",""," "\n,,, } )
|
69
|
+
parser.null = [""] ## try array (allows multiple null values)
|
70
|
+
assert_equal [[nil,nil,nil," "],["","",""," "]],
|
71
|
+
parser.parse( %Q{,,, \n"","",""," "} )
|
80
72
|
|
81
73
|
## reset to defaults
|
82
|
-
parser.
|
83
|
-
|
84
|
-
|
85
|
-
assert_equal false, parser.config[:quoted_empty_null]
|
86
|
-
assert_equal false, parser.config[:unquoted_empty_null]
|
87
|
-
|
88
|
-
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
74
|
+
parser.null = nil
|
75
|
+
assert_equal [["","",""],["","",""]],
|
76
|
+
parser.parse( %Q{"","",""\n,,} )
|
89
77
|
end
|
90
78
|
|
91
79
|
|
data/test/test_reader.rb
CHANGED
@@ -77,4 +77,17 @@ def test_foreach
|
|
77
77
|
assert true
|
78
78
|
end
|
79
79
|
|
80
|
+
|
81
|
+
def test_enum
|
82
|
+
csv = CsvReader.new( "a,b,c" )
|
83
|
+
enum = csv.to_enum
|
84
|
+
assert_equal ["a","b","c"], enum.next
|
85
|
+
|
86
|
+
## test Csv == CsvReader class alias
|
87
|
+
csv = Csv.new( "a,b,c" )
|
88
|
+
enum = csv.to_enum
|
89
|
+
assert_equal ["a","b","c"], enum.next
|
90
|
+
end
|
91
|
+
|
92
|
+
|
80
93
|
end # class TestReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- Rakefile
|
57
57
|
- lib/csvreader.rb
|
58
58
|
- lib/csvreader/buffer.rb
|
59
|
+
- lib/csvreader/builder.rb
|
59
60
|
- lib/csvreader/parser.rb
|
60
61
|
- lib/csvreader/parser_std.rb
|
61
62
|
- lib/csvreader/parser_strict.rb
|
@@ -65,11 +66,15 @@ files:
|
|
65
66
|
- lib/csvreader/version.rb
|
66
67
|
- test/data/beer.csv
|
67
68
|
- test/data/beer11.csv
|
69
|
+
- test/data/cars11.csv
|
70
|
+
- test/data/cities11.csv
|
71
|
+
- test/data/customers11.csv
|
68
72
|
- test/data/shakespeare.csv
|
69
73
|
- test/helper.rb
|
70
74
|
- test/test_parser.rb
|
71
75
|
- test/test_parser_formats.rb
|
72
76
|
- test/test_parser_java.rb
|
77
|
+
- test/test_parser_null.rb
|
73
78
|
- test/test_parser_strict.rb
|
74
79
|
- test/test_parser_tab.rb
|
75
80
|
- test/test_reader.rb
|