csvreader 0.6.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +5 -0
- data/README.md +12 -22
- data/lib/csvreader.rb +2 -0
- data/lib/csvreader/builder.rb +56 -0
- data/lib/csvreader/parser.rb +1 -1
- data/lib/csvreader/parser_std.rb +39 -0
- data/lib/csvreader/parser_strict.rb +27 -12
- data/lib/csvreader/reader.rb +150 -98
- data/lib/csvreader/version.rb +1 -1
- data/test/data/cars11.csv +10 -0
- data/test/data/cities11.csv +12 -0
- data/test/data/customers11.csv +13 -0
- data/test/test_parser_java.rb +0 -11
- data/test/test_parser_null.rb +107 -0
- data/test/test_parser_strict.rb +11 -23
- data/test/test_reader.rb +13 -0
- metadata +7 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c61a8e62f99e1a06c119b4995e0e4e1d3c829d71
|
4
|
+
data.tar.gz: 1b59f3415f3f0fe449a8c2395d2cefc7a7bd855c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 52e3e8effa09f492c38736f1fb16552341237ebe0e8e5452a72c080d03f48e47b9d66fbfa1b8f42e3ccd86d37038975254da0e3038f297174636a3b34ce57542
|
7
|
+
data.tar.gz: 66c17daaa22d6e5d526c76b9568737b4982da9f02b11e9264fdb9d9644d5e33881255f623a174515c78714352b2df54c811cb8b6e0222aa5cc79682ac320ee62
|
data/Manifest.txt
CHANGED
@@ -5,6 +5,7 @@ README.md
|
|
5
5
|
Rakefile
|
6
6
|
lib/csvreader.rb
|
7
7
|
lib/csvreader/buffer.rb
|
8
|
+
lib/csvreader/builder.rb
|
8
9
|
lib/csvreader/parser.rb
|
9
10
|
lib/csvreader/parser_std.rb
|
10
11
|
lib/csvreader/parser_strict.rb
|
@@ -14,11 +15,15 @@ lib/csvreader/reader_hash.rb
|
|
14
15
|
lib/csvreader/version.rb
|
15
16
|
test/data/beer.csv
|
16
17
|
test/data/beer11.csv
|
18
|
+
test/data/cars11.csv
|
19
|
+
test/data/cities11.csv
|
20
|
+
test/data/customers11.csv
|
17
21
|
test/data/shakespeare.csv
|
18
22
|
test/helper.rb
|
19
23
|
test/test_parser.rb
|
20
24
|
test/test_parser_formats.rb
|
21
25
|
test/test_parser_java.rb
|
26
|
+
test/test_parser_null.rb
|
22
27
|
test/test_parser_strict.rb
|
23
28
|
test/test_parser_tab.rb
|
24
29
|
test/test_reader.rb
|
data/README.md
CHANGED
@@ -11,14 +11,6 @@
|
|
11
11
|
|
12
12
|
## Usage
|
13
13
|
|
14
|
-
``` ruby
|
15
|
-
line = "1,2,3"
|
16
|
-
values = CsvReader.parse_line( line )
|
17
|
-
pp values
|
18
|
-
# => ["1","2","3"]
|
19
|
-
```
|
20
|
-
|
21
|
-
or use the convenience helpers:
|
22
14
|
|
23
15
|
``` ruby
|
24
16
|
txt <<=TXT
|
@@ -26,21 +18,21 @@ txt <<=TXT
|
|
26
18
|
4,5,6
|
27
19
|
TXT
|
28
20
|
|
29
|
-
records =
|
21
|
+
records = Csv.parse( txt ) ## or CsvReader.parse
|
30
22
|
pp records
|
31
23
|
# => [["1","2","3"],
|
32
24
|
# ["5","6","7"]]
|
33
25
|
|
34
26
|
# -or-
|
35
27
|
|
36
|
-
records =
|
28
|
+
records = Csv.read( "values.csv" ) ## or CsvReader.read
|
37
29
|
pp records
|
38
30
|
# => [["1","2","3"],
|
39
31
|
# ["5","6","7"]]
|
40
32
|
|
41
33
|
# -or-
|
42
34
|
|
43
|
-
|
35
|
+
Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
|
44
36
|
pp rec
|
45
37
|
end
|
46
38
|
# => ["1","2","3"]
|
@@ -50,7 +42,7 @@ end
|
|
50
42
|
|
51
43
|
### What about headers?
|
52
44
|
|
53
|
-
Use the `
|
45
|
+
Use the `CsvHash`
|
54
46
|
if the first line is a header (or if missing pass in the headers
|
55
47
|
as an array) and you want your records as hashes instead of arrays of strings.
|
56
48
|
Example:
|
@@ -62,7 +54,7 @@ A,B,C
|
|
62
54
|
4,5,6
|
63
55
|
TXT
|
64
56
|
|
65
|
-
records =
|
57
|
+
records = CsvHash.parse( txt ) ## or CsvHashReader.parse
|
66
58
|
pp records
|
67
59
|
|
68
60
|
# -or-
|
@@ -72,7 +64,7 @@ txt2 <<=TXT
|
|
72
64
|
4,5,6
|
73
65
|
TXT
|
74
66
|
|
75
|
-
records =
|
67
|
+
records = CsvHash.parse( txt2, headers: ["A","B","C"] ) ## or CsvHashReader.parse
|
76
68
|
pp records
|
77
69
|
|
78
70
|
# => [{"A": "1", "B": "2", "C": "3"},
|
@@ -80,14 +72,14 @@ pp records
|
|
80
72
|
|
81
73
|
# -or-
|
82
74
|
|
83
|
-
records =
|
75
|
+
records = CsvHash.read( "hash.csv" ) ## or CsvHashReader.read
|
84
76
|
pp records
|
85
77
|
# => [{"A": "1", "B": "2", "C": "3"},
|
86
78
|
# {"A": "4", "B": "5", "C": "6"}]
|
87
79
|
|
88
80
|
# -or-
|
89
81
|
|
90
|
-
|
82
|
+
CsvHash.foreach( "hash.csv" ) do |rec| ## or CsvHashReader.foreach
|
91
83
|
pp rec
|
92
84
|
end
|
93
85
|
# => {"A": "1", "B": "2", "C": "3"}
|
@@ -141,13 +133,11 @@ Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
|
|
141
133
|
Pass in the `sep` keyword option. Example:
|
142
134
|
|
143
135
|
``` ruby
|
144
|
-
|
145
|
-
|
146
|
-
CsvReader.read( ..., sep: ';' )
|
136
|
+
Csv.parse( ..., sep: ';' )
|
137
|
+
Csv.read( ..., sep: ';' )
|
147
138
|
# ...
|
148
|
-
|
149
|
-
|
150
|
-
CsvReader.read( ..., sep: '|' )
|
139
|
+
Csv.parse( ..., sep: '|' )
|
140
|
+
Csv.read( ..., sep: '|' )
|
151
141
|
# ...
|
152
142
|
# and so on
|
153
143
|
```
|
data/lib/csvreader.rb
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
require 'pp'
|
5
5
|
require 'logger'
|
6
|
+
require 'forwardable'
|
6
7
|
|
7
8
|
|
8
9
|
###
|
@@ -13,6 +14,7 @@ require 'csvreader/parser_std' # best practices pre-configured out-of-the-b
|
|
13
14
|
require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
|
14
15
|
require 'csvreader/parser_tab'
|
15
16
|
require 'csvreader/parser'
|
17
|
+
require 'csvreader/builder'
|
16
18
|
require 'csvreader/reader'
|
17
19
|
require 'csvreader/reader_hash'
|
18
20
|
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
|
5
|
+
def initialize( parser )
|
6
|
+
@parser = parser
|
7
|
+
end
|
8
|
+
|
9
|
+
|
10
|
+
## todo/fix:
|
11
|
+
## add parser config (attribute) setter e.g.
|
12
|
+
## - sep=(value)
|
13
|
+
## - comment=(value)
|
14
|
+
## - and so on!!!
|
15
|
+
##
|
16
|
+
## add config too - why? why not?
|
17
|
+
|
18
|
+
|
19
|
+
def open( path, mode='r:bom|utf-8',
|
20
|
+
sep: nil,
|
21
|
+
converters: nil,
|
22
|
+
parser: @parser, &block )
|
23
|
+
CsvReader.open( path, mode,
|
24
|
+
sep: sep, converters: converters,
|
25
|
+
parser: @parser, &block )
|
26
|
+
end
|
27
|
+
|
28
|
+
def read( path, sep: nil,
|
29
|
+
converters: nil )
|
30
|
+
CsvReader.read( path,
|
31
|
+
sep: sep, converters: converters,
|
32
|
+
parser: @parser )
|
33
|
+
end
|
34
|
+
|
35
|
+
def header( path, sep: nil )
|
36
|
+
CsvReader.header( path,
|
37
|
+
sep: sep,
|
38
|
+
parser: @parser )
|
39
|
+
end
|
40
|
+
|
41
|
+
def foreach( path, sep: nil,
|
42
|
+
converters: nil, &block )
|
43
|
+
CsvReader.foreach( path,
|
44
|
+
sep: sep, converters: converters,
|
45
|
+
parser: @parser, &block )
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
def parse( data, sep: nil,
|
51
|
+
converters: nil, &block )
|
52
|
+
CsvReader.parse( data,
|
53
|
+
sep: sep, converters: converters,
|
54
|
+
parser: @parser, &block )
|
55
|
+
end
|
56
|
+
end # class CsvBuilder
|
data/lib/csvreader/parser.rb
CHANGED
data/lib/csvreader/parser_std.rb
CHANGED
@@ -39,12 +39,29 @@ def logger() self.class.logger; end
|
|
39
39
|
|
40
40
|
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
41
41
|
|
42
|
+
##
|
43
|
+
## todo/check:
|
44
|
+
## null values - include NA - why? why not?
|
45
|
+
## make null values case sensitive or add an option for case sensitive
|
46
|
+
## or better allow a proc as option for checking too!!!
|
42
47
|
def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
|
43
48
|
)
|
44
49
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
50
|
+
|
51
|
+
## note: null values must get handled by parser
|
52
|
+
## only get checked for unquoted strings (and NOT for quoted strings)
|
53
|
+
## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
|
45
54
|
@config[:null] = null ## null values
|
46
55
|
end
|
47
56
|
|
57
|
+
#########################################
|
58
|
+
## config convenience helpers
|
59
|
+
## e.g. use like Csv.defaultl.null = '\N' etc. instead of
|
60
|
+
## Csv.default.config[:null] = '\N'
|
61
|
+
def null=( value ) @config[:null]=value; end
|
62
|
+
|
63
|
+
|
64
|
+
|
48
65
|
|
49
66
|
def parse( data, **kwargs, &block )
|
50
67
|
|
@@ -132,6 +149,7 @@ def parse_field( input )
|
|
132
149
|
skip_spaces( input ) ## strip leading spaces
|
133
150
|
|
134
151
|
if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
|
152
|
+
value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
|
135
153
|
## return value; do nothing
|
136
154
|
elsif input.peek == DOUBLE_QUOTE
|
137
155
|
logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -156,6 +174,8 @@ def parse_field( input )
|
|
156
174
|
## note: only strip **trailing** spaces (space and tab only)
|
157
175
|
## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
|
158
176
|
value = value.sub( /[ \t]+$/, '' )
|
177
|
+
value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
|
178
|
+
|
159
179
|
logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
160
180
|
end
|
161
181
|
|
@@ -251,5 +271,24 @@ def parse_lines( input, &block )
|
|
251
271
|
end # method parse_lines
|
252
272
|
|
253
273
|
|
274
|
+
|
275
|
+
|
276
|
+
def is_null?( value )
|
277
|
+
null = @config[:null]
|
278
|
+
if null.nil?
|
279
|
+
false ## nothing set; return always false (not null)
|
280
|
+
elsif null.is_a?( Proc )
|
281
|
+
null.call( value )
|
282
|
+
elsif null.is_a?( Array )
|
283
|
+
null.include?( value )
|
284
|
+
elsif null.is_a?( String )
|
285
|
+
value == null
|
286
|
+
else ## unknown config style / setting
|
287
|
+
## todo: issue a warning or error - why? why not?
|
288
|
+
false ## nothing set; return always false (not null)
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
|
254
293
|
end # class ParserStd
|
255
294
|
end # class CsvReader
|
@@ -35,9 +35,7 @@ def initialize( sep: ',',
|
|
35
35
|
quote: '"', ## note: set to false/nil for no quote
|
36
36
|
doublequote: true,
|
37
37
|
escape: false, ## true/false
|
38
|
-
null:
|
39
|
-
quoted_empty_null: false,
|
40
|
-
unquoted_empty_null: false,
|
38
|
+
null: nil, ## note: set to nil for no null vales / not availabe (na)
|
41
39
|
comment: false ## note: comment char e.g. # or false/nil
|
42
40
|
)
|
43
41
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
@@ -46,8 +44,6 @@ def initialize( sep: ',',
|
|
46
44
|
@config[:doublequote] = doublequote
|
47
45
|
@config[:escape] = escape
|
48
46
|
@config[:null] = null
|
49
|
-
@config[:quoted_empty_null] = quoted_empty_null
|
50
|
-
@config[:unquoted_empty_null] = unquoted_empty_null
|
51
47
|
@config[:comment] = comment
|
52
48
|
end
|
53
49
|
|
@@ -55,9 +51,12 @@ end
|
|
55
51
|
## config convenience helpers
|
56
52
|
## e.g. use like Csv.mysql.sep = ',' etc. instead of
|
57
53
|
## Csv.mysql.config[:sep] = ','
|
58
|
-
def sep=( value )
|
59
|
-
def
|
60
|
-
def
|
54
|
+
def sep=( value ) @config[:sep]=value; end
|
55
|
+
def quote=( value ) @config[:quote]=value; end
|
56
|
+
def doublequote=( value ) @config[:doublequote]=value; end
|
57
|
+
def escape=( value ) @config[:escape]=value; end
|
58
|
+
def null=( value ) @config[:null]=value; end
|
59
|
+
def comment=( value ) @config[:comment]=value; end
|
61
60
|
|
62
61
|
|
63
62
|
|
@@ -156,14 +155,11 @@ def parse_field( input, sep: )
|
|
156
155
|
logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
157
156
|
|
158
157
|
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
|
159
|
-
value = nil
|
158
|
+
value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
|
160
159
|
## return value; do nothing
|
161
160
|
elsif quote && input.peek == quote
|
162
161
|
logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
163
162
|
value << parse_quote( input, sep: sep )
|
164
|
-
|
165
|
-
value = nil if config[:quoted_empty_null] && value == ""
|
166
|
-
|
167
163
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
168
164
|
else
|
169
165
|
logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -177,6 +173,8 @@ def parse_field( input, sep: )
|
|
177
173
|
value << input.getc
|
178
174
|
end
|
179
175
|
end
|
176
|
+
|
177
|
+
value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
|
180
178
|
logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
181
179
|
end
|
182
180
|
|
@@ -265,5 +263,22 @@ def parse_lines( input, sep:, &block )
|
|
265
263
|
end # method parse_lines
|
266
264
|
|
267
265
|
|
266
|
+
def is_null?( value )
|
267
|
+
null = @config[:null]
|
268
|
+
if null.nil?
|
269
|
+
false ## nothing set; return always false (not null)
|
270
|
+
elsif null.is_a?( Proc )
|
271
|
+
null.call( value )
|
272
|
+
elsif null.is_a?( Array )
|
273
|
+
null.include?( value )
|
274
|
+
elsif null.is_a?( String )
|
275
|
+
value == null
|
276
|
+
else ## unknown config style / setting
|
277
|
+
## todo: issue a warning or error - why? why not?
|
278
|
+
false ## nothing set; return always false (not null)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
|
268
283
|
end # class ParserStrict
|
269
284
|
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -1,18 +1,12 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
3
|
class CsvReader
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
STRICT = new( Parser::STRICT )
|
13
|
-
RFC4180 = new( Parser::RFC4180 )
|
14
|
-
EXCEL = new( Parser::EXCEL )
|
15
|
-
TAB = new( Parser::TAB )
|
5
|
+
DEFAULT = CsvBuilder.new( Parser::DEFAULT )
|
6
|
+
STRICT = CsvBuilder.new( Parser::STRICT )
|
7
|
+
RFC4180 = CsvBuilder.new( Parser::RFC4180 )
|
8
|
+
EXCEL = CsvBuilder.new( Parser::EXCEL )
|
9
|
+
TAB = CsvBuilder.new( Parser::TAB )
|
16
10
|
|
17
11
|
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
18
12
|
def self.strict() STRICT; end ## alternative alias for RFC4180
|
@@ -21,97 +15,155 @@ class CsvReader
|
|
21
15
|
def self.tab() TAB; end ## alternative alias for TAB
|
22
16
|
|
23
17
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
DEFAULT.parse( data, sep: sep, converters: converters ) do |record|
|
58
|
-
records << record
|
59
|
-
break # only parse first record
|
60
|
-
end
|
61
|
-
records.size == 0 ? nil : records.first
|
62
|
-
end
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
#############################
|
67
|
-
## all "high-level" reader methods
|
68
|
-
##
|
69
|
-
## note: allow "overriding" of separator
|
70
|
-
## if sep is not nil otherwise use default dialect/format separator
|
71
|
-
|
72
|
-
def parse( data, sep: nil,
|
73
|
-
converters: nil, &block )
|
74
|
-
kwargs = {
|
75
|
-
## converters: converters ## todo: add converters
|
76
|
-
}
|
77
|
-
## note: only add separator if present/defined (not nil)
|
78
|
-
kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' )
|
79
|
-
|
80
|
-
@parser.parse( data, kwargs, &block )
|
81
|
-
end
|
82
|
-
|
83
|
-
def read( path, sep: nil,
|
84
|
-
converters: nil )
|
85
|
-
## note: use our own file.open
|
86
|
-
## always use utf-8 for now
|
87
|
-
## check/todo: add skip option bom too - why? why not?
|
88
|
-
txt = File.open( path, 'r:bom|utf-8' ).read
|
89
|
-
parse( txt, sep: sep )
|
90
|
-
end
|
91
|
-
|
92
|
-
def foreach( path, sep: nil,
|
93
|
-
converters: nil, &block )
|
94
|
-
File.open( path, 'r:bom|utf-8' ) do |file|
|
95
|
-
parse( file, sep: sep, &block )
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
#######
|
22
|
+
## csv reader
|
23
|
+
|
24
|
+
def self.open( path, mode='r:bom|utf-8',
|
25
|
+
sep: nil,
|
26
|
+
converters: nil,
|
27
|
+
parser: nil, &block ) ## rename path to filename or name - why? why not?
|
28
|
+
f = File.open( path, mode )
|
29
|
+
csv = new(f, sep: sep, converters: converters, parser: parser )
|
30
|
+
|
31
|
+
# handle blocks like Ruby's open(), not like the (old old) CSV library
|
32
|
+
if block_given?
|
33
|
+
begin
|
34
|
+
block.call( csv )
|
35
|
+
ensure
|
36
|
+
csv.close
|
37
|
+
end
|
38
|
+
else
|
39
|
+
csv
|
40
|
+
end
|
41
|
+
end # method self.open
|
42
|
+
|
43
|
+
|
44
|
+
def self.read( path, sep: nil,
|
45
|
+
converters: nil,
|
46
|
+
parser: nil )
|
47
|
+
open( path,
|
48
|
+
sep: sep,
|
49
|
+
converters: converters,
|
50
|
+
parser: parser ) { |csv| csv.read }
|
96
51
|
end
|
97
|
-
end
|
98
52
|
|
99
|
-
def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
|
100
|
-
# read first lines (only)
|
101
|
-
# and parse with csv to get header from csv library itself
|
102
53
|
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
54
|
+
def self.header( path, sep: nil, parser: nil ) ## use header or headers - or use both (with alias)?
|
55
|
+
# read first lines (only)
|
56
|
+
# and parse with csv to get header from csv library itself
|
57
|
+
|
58
|
+
records = []
|
59
|
+
open( path, sep: sep, parser: parser ) do |csv|
|
60
|
+
csv.each do |record|
|
61
|
+
records << record
|
62
|
+
break ## only parse/read first record
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
## unwrap record if empty return nil - why? why not?
|
67
|
+
## return empty record e.g. [] - why? why not?
|
68
|
+
## returns nil for empty (for now) - why? why not?
|
69
|
+
records.size == 0 ? nil : records.first
|
70
|
+
end # method self.header
|
71
|
+
|
72
|
+
|
73
|
+
def self.foreach( path, sep: nil,
|
74
|
+
converters: nil, parser: nil, &block )
|
75
|
+
csv = open( path, sep: sep, converters: converters, parser: parser )
|
76
|
+
|
77
|
+
if block_given?
|
78
|
+
begin
|
79
|
+
csv.each( &block )
|
80
|
+
ensure
|
81
|
+
csv.close
|
108
82
|
end
|
109
|
-
|
83
|
+
else
|
84
|
+
csv.to_enum ## note: caller (responsible) must close file!!!
|
85
|
+
## remove version without block given - why? why not?
|
86
|
+
## use Csv.open().to_enum or Csv.open().each
|
87
|
+
## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
|
88
|
+
end
|
89
|
+
end # method self.foreach
|
90
|
+
|
91
|
+
|
92
|
+
def self.parse( data, sep: nil,
|
93
|
+
converters: nil,
|
94
|
+
parser: nil, &block )
|
95
|
+
csv = new( data, sep: sep, converters: converters, parser: parser )
|
96
|
+
|
97
|
+
if block_given?
|
98
|
+
csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
99
|
+
else # slurp contents, if no block is given
|
100
|
+
csv.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
101
|
+
end
|
102
|
+
end # method self.parse
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
############################
|
107
|
+
## note: only add parse_line convenience helper for default
|
108
|
+
## always use parse (do NOT/NOT/NOT use parse_line) - why? why not?
|
109
|
+
## todo/fix: remove parse_line!!!
|
110
|
+
def self.parse_line( data, sep: nil,
|
111
|
+
converters: nil )
|
112
|
+
records = []
|
113
|
+
parse( data, sep: sep, converters: converters ) do |record|
|
114
|
+
records << record
|
115
|
+
break # only parse first record
|
116
|
+
end
|
117
|
+
records.size == 0 ? nil : records.first
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
def initialize( data, sep: nil, converters: nil, parser: nil )
|
124
|
+
raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
|
125
|
+
## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
|
126
|
+
|
127
|
+
# create the IO object we will read from
|
128
|
+
@io = data.is_a?(String) ? StringIO.new(data) : data
|
129
|
+
|
130
|
+
@sep = sep
|
131
|
+
@converters = converters
|
132
|
+
|
133
|
+
@parser = parser.nil? ? Parser::DEFAULT : parser
|
134
|
+
end
|
135
|
+
|
136
|
+
|
137
|
+
### IO and StringIO Delegation ###
|
138
|
+
extend Forwardable
|
139
|
+
def_delegators :@io,
|
140
|
+
:close, :closed?, :eof, :eof?
|
141
|
+
|
142
|
+
## add more - why? why not?
|
143
|
+
## def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
|
144
|
+
## :closed?, :eof, :eof?, :external_encoding, :fcntl,
|
145
|
+
## :fileno, :flock, :flush, :fsync, :internal_encoding,
|
146
|
+
## :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
|
147
|
+
## :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
|
148
|
+
## :to_io, :truncate, :tty?
|
149
|
+
|
150
|
+
|
151
|
+
include Enumerable
|
152
|
+
|
153
|
+
def each( &block )
|
154
|
+
if block_given?
|
155
|
+
kwargs = {
|
156
|
+
## converters: converters ## todo: add converters
|
157
|
+
}
|
158
|
+
## note: only add separator if present/defined (not nil)
|
159
|
+
kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
|
160
|
+
|
161
|
+
@parser.parse( @io, kwargs, &block )
|
162
|
+
else
|
163
|
+
to_enum
|
164
|
+
end
|
165
|
+
end # method each
|
110
166
|
|
111
|
-
|
112
|
-
## return empty record e.g. [] - why? why not?
|
113
|
-
## returns nil for empty (for now) - why? why not?
|
114
|
-
records.size == 0 ? nil : records.first
|
115
|
-
end # method self.header
|
167
|
+
def read() to_a; end # method read
|
116
168
|
|
117
169
|
end # class CsvReader
|
data/lib/csvreader/version.rb
CHANGED
@@ -0,0 +1,10 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
|
5
|
+
Year,Make,Model,Description,Price
|
6
|
+
1997, Ford, E350,"ac, abs, moon",3000.00
|
7
|
+
1999, Chevy, "Venture ""Extended Edition""","",4900.00
|
8
|
+
1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
|
9
|
+
1996, Jeep, Grand Cherokee,"MUST SELL!
|
10
|
+
air, moon roof, loaded",4799.00
|
@@ -0,0 +1,12 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the wikipedia article "Comma-separated values"
|
3
|
+
# see en.wikipedia.org/wiki/Comma-separated_values
|
4
|
+
#
|
5
|
+
# note:
|
6
|
+
# Double quote processing need only apply if the field starts
|
7
|
+
# with a double quote. Note, however, that double quotes are not
|
8
|
+
# allowed in unquoted fields according to RFC 4180
|
9
|
+
|
10
|
+
Los Angeles, 34°03'N, 118°15'W
|
11
|
+
New York City, 40°42'46"N, 74°00'21"W
|
12
|
+
Paris, 48°51'24"N, 2°21'03"E
|
@@ -0,0 +1,13 @@
|
|
1
|
+
#####
|
2
|
+
# csv sample from the article:
|
3
|
+
# A Guide to the Ruby CSV Library, Part I
|
4
|
+
# - sitepoint.com/guide-ruby-csv-library-part
|
5
|
+
|
6
|
+
Name,Times arrived,Total $ spent,Food feedback
|
7
|
+
Dan, 34, 2548, Lovin it!
|
8
|
+
Maria, 55, 5054, "Good, delicious food"
|
9
|
+
Carlos, 22, 4352, "I am ""pleased"", but could be better"
|
10
|
+
Stephany, 34, 6542, I want bigger steaks!!!!!
|
11
|
+
James, 1, 43, Not bad
|
12
|
+
Robin, 1, 56, Fish is tasty
|
13
|
+
Anna, 1, 79, "Good, better, the best!"
|
data/test/test_parser_java.rb
CHANGED
@@ -205,15 +205,4 @@ def test_lf
|
|
205
205
|
parser.default.parse( "character" + LF + "NotEscaped" )
|
206
206
|
end
|
207
207
|
|
208
|
-
|
209
|
-
|
210
|
-
def test_escaped_mysql_null_value
|
211
|
-
## MySQL uses \N to symbolize null values. We have to restore this
|
212
|
-
|
213
|
-
## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
|
214
|
-
## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
|
215
|
-
assert_equal [[ "character\\NEscaped" ]],
|
216
|
-
parser.default.parse( "character\\NEscaped" )
|
217
|
-
end
|
218
|
-
|
219
208
|
end # class TestParserJava
|
@@ -0,0 +1,107 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_null.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestParserNull < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def parser
|
15
|
+
CsvReader::Parser
|
16
|
+
end
|
17
|
+
|
18
|
+
|
19
|
+
def test_escaped_mysql_null_value
|
20
|
+
## MySQL uses \N to symbolize null values. We have to restore this
|
21
|
+
|
22
|
+
## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
|
23
|
+
## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
|
24
|
+
assert_equal [[ "character\\NEscaped" ]],
|
25
|
+
parser.default.parse( "character\\NEscaped" )
|
26
|
+
|
27
|
+
assert_equal [[ "character\\NEscaped" ]],
|
28
|
+
parser.strict.parse( "character\\NEscaped" )
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_mysql_null_value
|
33
|
+
default_null_values = parser.default.config[:null] ## save default null settings
|
34
|
+
|
35
|
+
assert_equal [[ nil, nil, "" ]],
|
36
|
+
parser.default.parse( "\\N, \\N ," )
|
37
|
+
|
38
|
+
## escaped with quotes
|
39
|
+
assert_equal [[ "\\N", "\\N", "" ]],
|
40
|
+
parser.default.parse( %Q{"\\N", "\\N" ,} )
|
41
|
+
|
42
|
+
## try single \N setting
|
43
|
+
parser.default.null = "\\N"
|
44
|
+
assert_equal [[ nil, nil, "" ]],
|
45
|
+
parser.default.parse( "\\N, \\N ," )
|
46
|
+
|
47
|
+
## try no null values setting
|
48
|
+
parser.default.null = nil
|
49
|
+
assert_equal [[ "\\N", "\\N", "" ]],
|
50
|
+
parser.default.parse( "\\N, \\N ," )
|
51
|
+
|
52
|
+
## try postgresql unquoted empty string is nil/null
|
53
|
+
parser.default.null = ""
|
54
|
+
assert_equal [[ nil, nil, "" ],
|
55
|
+
[ nil, nil, "", nil ]],
|
56
|
+
parser.default.parse( %Q{,,""\n , , "" ,} )
|
57
|
+
|
58
|
+
## try proc
|
59
|
+
parser.default.null = ->(value) { value.downcase == 'nil' }
|
60
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
61
|
+
parser.default.parse( "nil, Nil, NIL," )
|
62
|
+
|
63
|
+
## try array
|
64
|
+
parser.default.null = ['nil', 'Nil', 'NIL']
|
65
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
66
|
+
parser.default.parse( "nil, Nil, NIL," )
|
67
|
+
|
68
|
+
## restore defaults
|
69
|
+
parser.default.null = default_null_values ## ['\N', 'NA']
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
def test_strict_mysql_null_value
|
74
|
+
assert_equal [[ "\\N", " \\N ", "" ]],
|
75
|
+
parser.strict.parse( "\\N, \\N ," )
|
76
|
+
|
77
|
+
## try single \N setting
|
78
|
+
parser.strict.null = "\\N"
|
79
|
+
assert_equal [[ nil, nil, " \\N", "\\N ", "" ]],
|
80
|
+
parser.strict.parse( "\\N,\\N, \\N,\\N ," )
|
81
|
+
|
82
|
+
## escaped with quotes
|
83
|
+
assert_equal [[ "\\N", "\\N", nil, "" ]],
|
84
|
+
parser.strict.parse( %Q{"\\N","\\N",\\N,} )
|
85
|
+
|
86
|
+
|
87
|
+
## try postgresql unquoted empty string is nil/null
|
88
|
+
parser.strict.null = ""
|
89
|
+
assert_equal [[ nil, nil, "" ],
|
90
|
+
[ " ", " ", "", nil ]],
|
91
|
+
parser.strict.parse( %Q{,,""\n , ,"",} )
|
92
|
+
|
93
|
+
## try proc
|
94
|
+
parser.strict.null = ->(value) { value.downcase == 'nil' }
|
95
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
96
|
+
parser.strict.parse( "nil,Nil,NIL," )
|
97
|
+
|
98
|
+
## try array
|
99
|
+
parser.strict.null = ['nil', 'Nil', 'NIL']
|
100
|
+
assert_equal [[ nil, nil, nil, "" ]],
|
101
|
+
parser.strict.parse( "nil,Nil,NIL," )
|
102
|
+
|
103
|
+
## restore defaults
|
104
|
+
parser.strict.null = nil
|
105
|
+
end
|
106
|
+
|
107
|
+
end # class TestParserNull
|
data/test/test_parser_strict.rb
CHANGED
@@ -60,32 +60,20 @@ end
|
|
60
60
|
|
61
61
|
|
62
62
|
def test_parse_empties
|
63
|
-
assert_equal [["","",""],["","",""]],
|
64
|
-
|
65
|
-
parser.config[:quoted_empty_null] = true
|
66
|
-
|
67
|
-
assert_equal true, parser.config[:quoted_empty_null]
|
68
|
-
assert_equal false, parser.config[:unquoted_empty_null]
|
69
|
-
|
70
|
-
assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
71
|
-
|
72
|
-
|
73
|
-
parser.config[:unquoted_empty_null] = true
|
74
|
-
|
75
|
-
assert_equal true, parser.config[:quoted_empty_null]
|
76
|
-
assert_equal true, parser.config[:unquoted_empty_null]
|
77
|
-
|
78
|
-
assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
63
|
+
assert_equal [["","",""],["","",""]],
|
64
|
+
parser.parse( %Q{"","",""\n,,} )
|
79
65
|
|
66
|
+
parser.null = ""
|
67
|
+
assert_equal [["","",""," "],[nil,nil,nil," "]],
|
68
|
+
parser.parse( %Q{"","",""," "\n,,, } )
|
69
|
+
parser.null = [""] ## try array (allows multiple null values)
|
70
|
+
assert_equal [[nil,nil,nil," "],["","",""," "]],
|
71
|
+
parser.parse( %Q{,,, \n"","",""," "} )
|
80
72
|
|
81
73
|
## reset to defaults
|
82
|
-
parser.
|
83
|
-
|
84
|
-
|
85
|
-
assert_equal false, parser.config[:quoted_empty_null]
|
86
|
-
assert_equal false, parser.config[:unquoted_empty_null]
|
87
|
-
|
88
|
-
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
74
|
+
parser.null = nil
|
75
|
+
assert_equal [["","",""],["","",""]],
|
76
|
+
parser.parse( %Q{"","",""\n,,} )
|
89
77
|
end
|
90
78
|
|
91
79
|
|
data/test/test_reader.rb
CHANGED
@@ -77,4 +77,17 @@ def test_foreach
|
|
77
77
|
assert true
|
78
78
|
end
|
79
79
|
|
80
|
+
|
81
|
+
def test_enum
|
82
|
+
csv = CsvReader.new( "a,b,c" )
|
83
|
+
enum = csv.to_enum
|
84
|
+
assert_equal ["a","b","c"], enum.next
|
85
|
+
|
86
|
+
## test Csv == CsvReader class alias
|
87
|
+
csv = Csv.new( "a,b,c" )
|
88
|
+
enum = csv.to_enum
|
89
|
+
assert_equal ["a","b","c"], enum.next
|
90
|
+
end
|
91
|
+
|
92
|
+
|
80
93
|
end # class TestReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -56,6 +56,7 @@ files:
|
|
56
56
|
- Rakefile
|
57
57
|
- lib/csvreader.rb
|
58
58
|
- lib/csvreader/buffer.rb
|
59
|
+
- lib/csvreader/builder.rb
|
59
60
|
- lib/csvreader/parser.rb
|
60
61
|
- lib/csvreader/parser_std.rb
|
61
62
|
- lib/csvreader/parser_strict.rb
|
@@ -65,11 +66,15 @@ files:
|
|
65
66
|
- lib/csvreader/version.rb
|
66
67
|
- test/data/beer.csv
|
67
68
|
- test/data/beer11.csv
|
69
|
+
- test/data/cars11.csv
|
70
|
+
- test/data/cities11.csv
|
71
|
+
- test/data/customers11.csv
|
68
72
|
- test/data/shakespeare.csv
|
69
73
|
- test/helper.rb
|
70
74
|
- test/test_parser.rb
|
71
75
|
- test/test_parser_formats.rb
|
72
76
|
- test/test_parser_java.rb
|
77
|
+
- test/test_parser_null.rb
|
73
78
|
- test/test_parser_strict.rb
|
74
79
|
- test/test_parser_tab.rb
|
75
80
|
- test/test_reader.rb
|