csvreader 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c87e1cac5f0988f4423a0c5aaf96d2a625bf4d60
4
- data.tar.gz: 5af8f5875ac0e18ade4cc793ba8ad658f905d1df
3
+ metadata.gz: c61a8e62f99e1a06c119b4995e0e4e1d3c829d71
4
+ data.tar.gz: 1b59f3415f3f0fe449a8c2395d2cefc7a7bd855c
5
5
  SHA512:
6
- metadata.gz: c9528101aa8a2db3a8e0dfb3685e6d15fcd262a76ed16f69b34ca9d54003e772f9441eb1673e11886ee14ac3347a99c22bd06662a8191214189f5c57f0ecfe7b
7
- data.tar.gz: acc9ada28d539dbc7ce1a2178e904ba247f511327f5828eebfdda78b21b263ca5d153d8fc234d7483cb60229f764094bc3c2fbeefa2381335d4e21a30487a828
6
+ metadata.gz: 52e3e8effa09f492c38736f1fb16552341237ebe0e8e5452a72c080d03f48e47b9d66fbfa1b8f42e3ccd86d37038975254da0e3038f297174636a3b34ce57542
7
+ data.tar.gz: 66c17daaa22d6e5d526c76b9568737b4982da9f02b11e9264fdb9d9644d5e33881255f623a174515c78714352b2df54c811cb8b6e0222aa5cc79682ac320ee62
@@ -5,6 +5,7 @@ README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
7
  lib/csvreader/buffer.rb
8
+ lib/csvreader/builder.rb
8
9
  lib/csvreader/parser.rb
9
10
  lib/csvreader/parser_std.rb
10
11
  lib/csvreader/parser_strict.rb
@@ -14,11 +15,15 @@ lib/csvreader/reader_hash.rb
14
15
  lib/csvreader/version.rb
15
16
  test/data/beer.csv
16
17
  test/data/beer11.csv
18
+ test/data/cars11.csv
19
+ test/data/cities11.csv
20
+ test/data/customers11.csv
17
21
  test/data/shakespeare.csv
18
22
  test/helper.rb
19
23
  test/test_parser.rb
20
24
  test/test_parser_formats.rb
21
25
  test/test_parser_java.rb
26
+ test/test_parser_null.rb
22
27
  test/test_parser_strict.rb
23
28
  test/test_parser_tab.rb
24
29
  test/test_reader.rb
data/README.md CHANGED
@@ -11,14 +11,6 @@
11
11
 
12
12
  ## Usage
13
13
 
14
- ``` ruby
15
- line = "1,2,3"
16
- values = CsvReader.parse_line( line )
17
- pp values
18
- # => ["1","2","3"]
19
- ```
20
-
21
- or use the convenience helpers:
22
14
 
23
15
  ``` ruby
24
16
  txt <<=TXT
@@ -26,21 +18,21 @@ txt <<=TXT
26
18
  4,5,6
27
19
  TXT
28
20
 
29
- records = CsvReader.parse( txt )
21
+ records = Csv.parse( txt ) ## or CsvReader.parse
30
22
  pp records
31
23
  # => [["1","2","3"],
32
24
  # ["5","6","7"]]
33
25
 
34
26
  # -or-
35
27
 
36
- records = CsvReader.read( "values.csv" )
28
+ records = Csv.read( "values.csv" ) ## or CsvReader.read
37
29
  pp records
38
30
  # => [["1","2","3"],
39
31
  # ["5","6","7"]]
40
32
 
41
33
  # -or-
42
34
 
43
- CsvReader.foreach( "values.csv" ) do |rec|
35
+ Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
44
36
  pp rec
45
37
  end
46
38
  # => ["1","2","3"]
@@ -50,7 +42,7 @@ end
50
42
 
51
43
  ### What about headers?
52
44
 
53
- Use the `CsvHashReader`
45
+ Use the `CsvHash`
54
46
  if the first line is a header (or if missing pass in the headers
55
47
  as an array) and you want your records as hashes instead of arrays of strings.
56
48
  Example:
@@ -62,7 +54,7 @@ A,B,C
62
54
  4,5,6
63
55
  TXT
64
56
 
65
- records = CsvHashReader.parse( txt )
57
+ records = CsvHash.parse( txt ) ## or CsvHashReader.parse
66
58
  pp records
67
59
 
68
60
  # -or-
@@ -72,7 +64,7 @@ txt2 <<=TXT
72
64
  4,5,6
73
65
  TXT
74
66
 
75
- records = CsvHashReader.parse( txt2, headers: ["A","B","C"] )
67
+ records = CsvHash.parse( txt2, headers: ["A","B","C"] ) ## or CsvHashReader.parse
76
68
  pp records
77
69
 
78
70
  # => [{"A": "1", "B": "2", "C": "3"},
@@ -80,14 +72,14 @@ pp records
80
72
 
81
73
  # -or-
82
74
 
83
- records = CsvHashReader.read( "hash.csv" )
75
+ records = CsvHash.read( "hash.csv" ) ## or CsvHashReader.read
84
76
  pp records
85
77
  # => [{"A": "1", "B": "2", "C": "3"},
86
78
  # {"A": "4", "B": "5", "C": "6"}]
87
79
 
88
80
  # -or-
89
81
 
90
- CsvHashReader.foreach( "hash.csv" ) do |rec|
82
+ CsvHash.foreach( "hash.csv" ) do |rec| ## or CsvHashReader.foreach
91
83
  pp rec
92
84
  end
93
85
  # => {"A": "1", "B": "2", "C": "3"}
@@ -141,13 +133,11 @@ Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
141
133
  Pass in the `sep` keyword option. Example:
142
134
 
143
135
  ``` ruby
144
- CsvReader.parse_line( ..., sep: ';' )
145
- CsvReader.parse( ..., sep: ';' )
146
- CsvReader.read( ..., sep: ';' )
136
+ Csv.parse( ..., sep: ';' )
137
+ Csv.read( ..., sep: ';' )
147
138
  # ...
148
- CsvReader.parse_line( ..., sep: '|' )
149
- CsvReader.parse( ..., sep: '|' )
150
- CsvReader.read( ..., sep: '|' )
139
+ Csv.parse( ..., sep: '|' )
140
+ Csv.read( ..., sep: '|' )
151
141
  # ...
152
142
  # and so on
153
143
  ```
@@ -3,6 +3,7 @@
3
3
 
4
4
  require 'pp'
5
5
  require 'logger'
6
+ require 'forwardable'
6
7
 
7
8
 
8
9
  ###
@@ -13,6 +14,7 @@ require 'csvreader/parser_std' # best practices pre-configured out-of-the-b
13
14
  require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
14
15
  require 'csvreader/parser_tab'
15
16
  require 'csvreader/parser'
17
+ require 'csvreader/builder'
16
18
  require 'csvreader/reader'
17
19
  require 'csvreader/reader_hash'
18
20
 
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
5
+ def initialize( parser )
6
+ @parser = parser
7
+ end
8
+
9
+
10
+ ## todo/fix:
11
+ ## add parser config (attribute) setter e.g.
12
+ ## - sep=(value)
13
+ ## - comment=(value)
14
+ ## - and so on!!!
15
+ ##
16
+ ## add config too - why? why not?
17
+
18
+
19
+ def open( path, mode='r:bom|utf-8',
20
+ sep: nil,
21
+ converters: nil,
22
+ parser: @parser, &block )
23
+ CsvReader.open( path, mode,
24
+ sep: sep, converters: converters,
25
+ parser: @parser, &block )
26
+ end
27
+
28
+ def read( path, sep: nil,
29
+ converters: nil )
30
+ CsvReader.read( path,
31
+ sep: sep, converters: converters,
32
+ parser: @parser )
33
+ end
34
+
35
+ def header( path, sep: nil )
36
+ CsvReader.header( path,
37
+ sep: sep,
38
+ parser: @parser )
39
+ end
40
+
41
+ def foreach( path, sep: nil,
42
+ converters: nil, &block )
43
+ CsvReader.foreach( path,
44
+ sep: sep, converters: converters,
45
+ parser: @parser, &block )
46
+ end
47
+
48
+
49
+
50
+ def parse( data, sep: nil,
51
+ converters: nil, &block )
52
+ CsvReader.parse( data,
53
+ sep: sep, converters: converters,
54
+ parser: @parser, &block )
55
+ end
56
+ end # class CsvBuilder
@@ -25,7 +25,7 @@ MYSQL = ParserStrict.new( sep: "\t",
25
25
 
26
26
  POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
27
27
  escape: true,
28
- unquoted_empty_null: true )
28
+ null: "" )
29
29
 
30
30
  POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
31
31
  quote: false,
@@ -39,12 +39,29 @@ def logger() self.class.logger; end
39
39
 
40
40
  attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
41
41
 
42
+ ##
43
+ ## todo/check:
44
+ ## null values - include NA - why? why not?
45
+ ## make null values case sensitive or add an option for case sensitive
46
+ ## or better allow a proc as option for checking too!!!
42
47
  def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
43
48
  )
44
49
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
50
+
51
+ ## note: null values must get handled by parser
52
+ ## only get checked for unquoted strings (and NOT for quoted strings)
53
+ ## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
45
54
  @config[:null] = null ## null values
46
55
  end
47
56
 
57
+ #########################################
58
+ ## config convenience helpers
59
+ ## e.g. use like Csv.defaultl.null = '\N' etc. instead of
60
+ ## Csv.default.config[:null] = '\N'
61
+ def null=( value ) @config[:null]=value; end
62
+
63
+
64
+
48
65
 
49
66
  def parse( data, **kwargs, &block )
50
67
 
@@ -132,6 +149,7 @@ def parse_field( input )
132
149
  skip_spaces( input ) ## strip leading spaces
133
150
 
134
151
  if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
152
+ value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
135
153
  ## return value; do nothing
136
154
  elsif input.peek == DOUBLE_QUOTE
137
155
  logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -156,6 +174,8 @@ def parse_field( input )
156
174
  ## note: only strip **trailing** spaces (space and tab only)
157
175
  ## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
158
176
  value = value.sub( /[ \t]+$/, '' )
177
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
178
+
159
179
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
160
180
  end
161
181
 
@@ -251,5 +271,24 @@ def parse_lines( input, &block )
251
271
  end # method parse_lines
252
272
 
253
273
 
274
+
275
+
276
+ def is_null?( value )
277
+ null = @config[:null]
278
+ if null.nil?
279
+ false ## nothing set; return always false (not null)
280
+ elsif null.is_a?( Proc )
281
+ null.call( value )
282
+ elsif null.is_a?( Array )
283
+ null.include?( value )
284
+ elsif null.is_a?( String )
285
+ value == null
286
+ else ## unknown config style / setting
287
+ ## todo: issue a warning or error - why? why not?
288
+ false ## nothing set; return always false (not null)
289
+ end
290
+ end
291
+
292
+
254
293
  end # class ParserStd
255
294
  end # class CsvReader
@@ -35,9 +35,7 @@ def initialize( sep: ',',
35
35
  quote: '"', ## note: set to false/nil for no quote
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
- null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
39
- quoted_empty_null: false,
40
- unquoted_empty_null: false,
38
+ null: nil, ## note: set to nil for no null vales / not availabe (na)
41
39
  comment: false ## note: comment char e.g. # or false/nil
42
40
  )
43
41
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
@@ -46,8 +44,6 @@ def initialize( sep: ',',
46
44
  @config[:doublequote] = doublequote
47
45
  @config[:escape] = escape
48
46
  @config[:null] = null
49
- @config[:quoted_empty_null] = quoted_empty_null
50
- @config[:unquoted_empty_null] = unquoted_empty_null
51
47
  @config[:comment] = comment
52
48
  end
53
49
 
@@ -55,9 +51,12 @@ end
55
51
  ## config convenience helpers
56
52
  ## e.g. use like Csv.mysql.sep = ',' etc. instead of
57
53
  ## Csv.mysql.config[:sep] = ','
58
- def sep=( value ) @config[:sep]=value; end
59
- def comment=( value ) @config[:comment]=value; end
60
- def escape=( value ) @config[:escape]=value; end
54
+ def sep=( value ) @config[:sep]=value; end
55
+ def quote=( value ) @config[:quote]=value; end
56
+ def doublequote=( value ) @config[:doublequote]=value; end
57
+ def escape=( value ) @config[:escape]=value; end
58
+ def null=( value ) @config[:null]=value; end
59
+ def comment=( value ) @config[:comment]=value; end
61
60
 
62
61
 
63
62
 
@@ -156,14 +155,11 @@ def parse_field( input, sep: )
156
155
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
157
156
 
158
157
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
159
- value = nil if config[:unquoted_empty_null]
158
+ value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
160
159
  ## return value; do nothing
161
160
  elsif quote && input.peek == quote
162
161
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
163
162
  value << parse_quote( input, sep: sep )
164
-
165
- value = nil if config[:quoted_empty_null] && value == ""
166
-
167
163
  logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
168
164
  else
169
165
  logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -177,6 +173,8 @@ def parse_field( input, sep: )
177
173
  value << input.getc
178
174
  end
179
175
  end
176
+
177
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
180
178
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
181
179
  end
182
180
 
@@ -265,5 +263,22 @@ def parse_lines( input, sep:, &block )
265
263
  end # method parse_lines
266
264
 
267
265
 
266
+ def is_null?( value )
267
+ null = @config[:null]
268
+ if null.nil?
269
+ false ## nothing set; return always false (not null)
270
+ elsif null.is_a?( Proc )
271
+ null.call( value )
272
+ elsif null.is_a?( Array )
273
+ null.include?( value )
274
+ elsif null.is_a?( String )
275
+ value == null
276
+ else ## unknown config style / setting
277
+ ## todo: issue a warning or error - why? why not?
278
+ false ## nothing set; return always false (not null)
279
+ end
280
+ end
281
+
282
+
268
283
  end # class ParserStrict
269
284
  end # class CsvReader
@@ -1,18 +1,12 @@
1
1
  # encoding: utf-8
2
2
 
3
-
4
-
5
3
  class CsvReader
6
4
 
7
- def initialize( parser )
8
- @parser = parser
9
- end
10
-
11
- DEFAULT = new( Parser::DEFAULT )
12
- STRICT = new( Parser::STRICT )
13
- RFC4180 = new( Parser::RFC4180 )
14
- EXCEL = new( Parser::EXCEL )
15
- TAB = new( Parser::TAB )
5
+ DEFAULT = CsvBuilder.new( Parser::DEFAULT )
6
+ STRICT = CsvBuilder.new( Parser::STRICT )
7
+ RFC4180 = CsvBuilder.new( Parser::RFC4180 )
8
+ EXCEL = CsvBuilder.new( Parser::EXCEL )
9
+ TAB = CsvBuilder.new( Parser::TAB )
16
10
 
17
11
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
18
12
  def self.strict() STRICT; end ## alternative alias for RFC4180
@@ -21,97 +15,155 @@ class CsvReader
21
15
  def self.tab() TAB; end ## alternative alias for TAB
22
16
 
23
17
 
24
- #####################
25
- ## convenience helpers defaulting to default csv dialect/format reader
26
- ##
27
- ## CsvReader.parse is the same as
28
- ## CsvReader::DEFAULT.parse or CsvReader.default.parse
29
- ##
30
-
31
- def self.parse( data, sep: nil,
32
- converters: nil, &block )
33
- DEFAULT.parse( data, sep: sep, converters: converters, &block )
34
- end
35
-
36
- def self.read( path, sep: nil,
37
- converters: nil )
38
- DEFAULT.read( path, sep: sep, converters: converters )
39
- end
40
-
41
- def self.header( path, sep: nil )
42
- DEFAULT.header( path, sep: sep )
43
- end
44
-
45
- def self.foreach( path, sep: nil,
46
- converters: nil, &block )
47
- DEFAULT.foreach( path, sep: sep, converters: converters, &block )
48
- end
49
-
50
-
51
- ############################
52
- ## note: only add parse_line convenience helper for default
53
- ## always use parse (do NOT use parse_line) - why? why not?
54
- def self.parse_line( data, sep: nil,
55
- converters: nil )
56
- records = []
57
- DEFAULT.parse( data, sep: sep, converters: converters ) do |record|
58
- records << record
59
- break # only parse first record
60
- end
61
- records.size == 0 ? nil : records.first
62
- end
63
-
64
-
65
-
66
- #############################
67
- ## all "high-level" reader methods
68
- ##
69
- ## note: allow "overriding" of separator
70
- ## if sep is not nil otherwise use default dialect/format separator
71
-
72
- def parse( data, sep: nil,
73
- converters: nil, &block )
74
- kwargs = {
75
- ## converters: converters ## todo: add converters
76
- }
77
- ## note: only add separator if present/defined (not nil)
78
- kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' )
79
-
80
- @parser.parse( data, kwargs, &block )
81
- end
82
-
83
- def read( path, sep: nil,
84
- converters: nil )
85
- ## note: use our own file.open
86
- ## always use utf-8 for now
87
- ## check/todo: add skip option bom too - why? why not?
88
- txt = File.open( path, 'r:bom|utf-8' ).read
89
- parse( txt, sep: sep )
90
- end
91
-
92
- def foreach( path, sep: nil,
93
- converters: nil, &block )
94
- File.open( path, 'r:bom|utf-8' ) do |file|
95
- parse( file, sep: sep, &block )
18
+
19
+
20
+
21
+ #######
22
+ ## csv reader
23
+
24
+ def self.open( path, mode='r:bom|utf-8',
25
+ sep: nil,
26
+ converters: nil,
27
+ parser: nil, &block ) ## rename path to filename or name - why? why not?
28
+ f = File.open( path, mode )
29
+ csv = new(f, sep: sep, converters: converters, parser: parser )
30
+
31
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
32
+ if block_given?
33
+ begin
34
+ block.call( csv )
35
+ ensure
36
+ csv.close
37
+ end
38
+ else
39
+ csv
40
+ end
41
+ end # method self.open
42
+
43
+
44
+ def self.read( path, sep: nil,
45
+ converters: nil,
46
+ parser: nil )
47
+ open( path,
48
+ sep: sep,
49
+ converters: converters,
50
+ parser: parser ) { |csv| csv.read }
96
51
  end
97
- end
98
52
 
99
- def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
100
- # read first lines (only)
101
- # and parse with csv to get header from csv library itself
102
53
 
103
- records = []
104
- File.open( path, 'r:bom|utf-8' ) do |file|
105
- parse( file, sep: sep ) do |record|
106
- records << record
107
- break ## only parse/read first record
54
+ def self.header( path, sep: nil, parser: nil ) ## use header or headers - or use both (with alias)?
55
+ # read first lines (only)
56
+ # and parse with csv to get header from csv library itself
57
+
58
+ records = []
59
+ open( path, sep: sep, parser: parser ) do |csv|
60
+ csv.each do |record|
61
+ records << record
62
+ break ## only parse/read first record
63
+ end
64
+ end
65
+
66
+ ## unwrap record if empty return nil - why? why not?
67
+ ## return empty record e.g. [] - why? why not?
68
+ ## returns nil for empty (for now) - why? why not?
69
+ records.size == 0 ? nil : records.first
70
+ end # method self.header
71
+
72
+
73
+ def self.foreach( path, sep: nil,
74
+ converters: nil, parser: nil, &block )
75
+ csv = open( path, sep: sep, converters: converters, parser: parser )
76
+
77
+ if block_given?
78
+ begin
79
+ csv.each( &block )
80
+ ensure
81
+ csv.close
108
82
  end
109
- end
83
+ else
84
+ csv.to_enum ## note: caller (responsible) must close file!!!
85
+ ## remove version without block given - why? why not?
86
+ ## use Csv.open().to_enum or Csv.open().each
87
+ ## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
88
+ end
89
+ end # method self.foreach
90
+
91
+
92
+ def self.parse( data, sep: nil,
93
+ converters: nil,
94
+ parser: nil, &block )
95
+ csv = new( data, sep: sep, converters: converters, parser: parser )
96
+
97
+ if block_given?
98
+ csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
99
+ else # slurp contents, if no block is given
100
+ csv.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
101
+ end
102
+ end # method self.parse
103
+
104
+
105
+
106
+ ############################
107
+ ## note: only add parse_line convenience helper for default
108
+ ## always use parse (do NOT/NOT/NOT use parse_line) - why? why not?
109
+ ## todo/fix: remove parse_line!!!
110
+ def self.parse_line( data, sep: nil,
111
+ converters: nil )
112
+ records = []
113
+ parse( data, sep: sep, converters: converters ) do |record|
114
+ records << record
115
+ break # only parse first record
116
+ end
117
+ records.size == 0 ? nil : records.first
118
+ end
119
+
120
+
121
+
122
+
123
+ def initialize( data, sep: nil, converters: nil, parser: nil )
124
+ raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
125
+ ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
126
+
127
+ # create the IO object we will read from
128
+ @io = data.is_a?(String) ? StringIO.new(data) : data
129
+
130
+ @sep = sep
131
+ @converters = converters
132
+
133
+ @parser = parser.nil? ? Parser::DEFAULT : parser
134
+ end
135
+
136
+
137
+ ### IO and StringIO Delegation ###
138
+ extend Forwardable
139
+ def_delegators :@io,
140
+ :close, :closed?, :eof, :eof?
141
+
142
+ ## add more - why? why not?
143
+ ## def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
144
+ ## :closed?, :eof, :eof?, :external_encoding, :fcntl,
145
+ ## :fileno, :flock, :flush, :fsync, :internal_encoding,
146
+ ## :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
147
+ ## :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
148
+ ## :to_io, :truncate, :tty?
149
+
150
+
151
+ include Enumerable
152
+
153
+ def each( &block )
154
+ if block_given?
155
+ kwargs = {
156
+ ## converters: converters ## todo: add converters
157
+ }
158
+ ## note: only add separator if present/defined (not nil)
159
+ kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
160
+
161
+ @parser.parse( @io, kwargs, &block )
162
+ else
163
+ to_enum
164
+ end
165
+ end # method each
110
166
 
111
- ## unwrap record if empty return nil - why? why not?
112
- ## return empty record e.g. [] - why? why not?
113
- ## returns nil for empty (for now) - why? why not?
114
- records.size == 0 ? nil : records.first
115
- end # method self.header
167
+ def read() to_a; end # method read
116
168
 
117
169
  end # class CsvReader
@@ -4,7 +4,7 @@
4
4
  class CsvReader ## note: uses a class for now - change to module - why? why not?
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 6
7
+ MINOR = 7
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
@@ -0,0 +1,10 @@
1
+ #####
2
+ # csv sample from the wikipedia article "Comma-separated values"
3
+ # see en.wikipedia.org/wiki/Comma-separated_values
4
+
5
+ Year,Make,Model,Description,Price
6
+ 1997, Ford, E350,"ac, abs, moon",3000.00
7
+ 1999, Chevy, "Venture ""Extended Edition""","",4900.00
8
+ 1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
9
+ 1996, Jeep, Grand Cherokee,"MUST SELL!
10
+ air, moon roof, loaded",4799.00
@@ -0,0 +1,12 @@
1
+ #####
2
+ # csv sample from the wikipedia article "Comma-separated values"
3
+ # see en.wikipedia.org/wiki/Comma-separated_values
4
+ #
5
+ # note:
6
+ # Double quote processing need only apply if the field starts
7
+ # with a double quote. Note, however, that double quotes are not
8
+ # allowed in unquoted fields according to RFC 4180
9
+
10
+ Los Angeles, 34°03'N, 118°15'W
11
+ New York City, 40°42'46"N, 74°00'21"W
12
+ Paris, 48°51'24"N, 2°21'03"E
@@ -0,0 +1,13 @@
1
+ #####
2
+ # csv sample from the article:
3
+ # A Guide to the Ruby CSV Library, Part I
4
+ # - sitepoint.com/guide-ruby-csv-library-part
5
+
6
+ Name,Times arrived,Total $ spent,Food feedback
7
+ Dan, 34, 2548, Lovin it!
8
+ Maria, 55, 5054, "Good, delicious food"
9
+ Carlos, 22, 4352, "I am ""pleased"", but could be better"
10
+ Stephany, 34, 6542, I want bigger steaks!!!!!
11
+ James, 1, 43, Not bad
12
+ Robin, 1, 56, Fish is tasty
13
+ Anna, 1, 79, "Good, better, the best!"
@@ -205,15 +205,4 @@ def test_lf
205
205
  parser.default.parse( "character" + LF + "NotEscaped" )
206
206
  end
207
207
 
208
-
209
-
210
- def test_escaped_mysql_null_value
211
- ## MySQL uses \N to symbolize null values. We have to restore this
212
-
213
- ## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
214
- ## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
215
- assert_equal [[ "character\\NEscaped" ]],
216
- parser.default.parse( "character\\NEscaped" )
217
- end
218
-
219
208
  end # class TestParserJava
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_null.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestParserNull < MiniTest::Test
12
+
13
+
14
+ def parser
15
+ CsvReader::Parser
16
+ end
17
+
18
+
19
+ def test_escaped_mysql_null_value
20
+ ## MySQL uses \N to symbolize null values. We have to restore this
21
+
22
+ ## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
23
+ ## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
24
+ assert_equal [[ "character\\NEscaped" ]],
25
+ parser.default.parse( "character\\NEscaped" )
26
+
27
+ assert_equal [[ "character\\NEscaped" ]],
28
+ parser.strict.parse( "character\\NEscaped" )
29
+ end
30
+
31
+
32
+ def test_mysql_null_value
33
+ default_null_values = parser.default.config[:null] ## save default null settings
34
+
35
+ assert_equal [[ nil, nil, "" ]],
36
+ parser.default.parse( "\\N, \\N ," )
37
+
38
+ ## escaped with quotes
39
+ assert_equal [[ "\\N", "\\N", "" ]],
40
+ parser.default.parse( %Q{"\\N", "\\N" ,} )
41
+
42
+ ## try single \N setting
43
+ parser.default.null = "\\N"
44
+ assert_equal [[ nil, nil, "" ]],
45
+ parser.default.parse( "\\N, \\N ," )
46
+
47
+ ## try no null values setting
48
+ parser.default.null = nil
49
+ assert_equal [[ "\\N", "\\N", "" ]],
50
+ parser.default.parse( "\\N, \\N ," )
51
+
52
+ ## try postgresql unquoted empty string is nil/null
53
+ parser.default.null = ""
54
+ assert_equal [[ nil, nil, "" ],
55
+ [ nil, nil, "", nil ]],
56
+ parser.default.parse( %Q{,,""\n , , "" ,} )
57
+
58
+ ## try proc
59
+ parser.default.null = ->(value) { value.downcase == 'nil' }
60
+ assert_equal [[ nil, nil, nil, "" ]],
61
+ parser.default.parse( "nil, Nil, NIL," )
62
+
63
+ ## try array
64
+ parser.default.null = ['nil', 'Nil', 'NIL']
65
+ assert_equal [[ nil, nil, nil, "" ]],
66
+ parser.default.parse( "nil, Nil, NIL," )
67
+
68
+ ## restore defaults
69
+ parser.default.null = default_null_values ## ['\N', 'NA']
70
+ end
71
+
72
+
73
+ def test_strict_mysql_null_value
74
+ assert_equal [[ "\\N", " \\N ", "" ]],
75
+ parser.strict.parse( "\\N, \\N ," )
76
+
77
+ ## try single \N setting
78
+ parser.strict.null = "\\N"
79
+ assert_equal [[ nil, nil, " \\N", "\\N ", "" ]],
80
+ parser.strict.parse( "\\N,\\N, \\N,\\N ," )
81
+
82
+ ## escaped with quotes
83
+ assert_equal [[ "\\N", "\\N", nil, "" ]],
84
+ parser.strict.parse( %Q{"\\N","\\N",\\N,} )
85
+
86
+
87
+ ## try postgresql unquoted empty string is nil/null
88
+ parser.strict.null = ""
89
+ assert_equal [[ nil, nil, "" ],
90
+ [ " ", " ", "", nil ]],
91
+ parser.strict.parse( %Q{,,""\n , ,"",} )
92
+
93
+ ## try proc
94
+ parser.strict.null = ->(value) { value.downcase == 'nil' }
95
+ assert_equal [[ nil, nil, nil, "" ]],
96
+ parser.strict.parse( "nil,Nil,NIL," )
97
+
98
+ ## try array
99
+ parser.strict.null = ['nil', 'Nil', 'NIL']
100
+ assert_equal [[ nil, nil, nil, "" ]],
101
+ parser.strict.parse( "nil,Nil,NIL," )
102
+
103
+ ## restore defaults
104
+ parser.strict.null = nil
105
+ end
106
+
107
+ end # class TestParserNull
@@ -60,32 +60,20 @@ end
60
60
 
61
61
 
62
62
  def test_parse_empties
63
- assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
64
-
65
- parser.config[:quoted_empty_null] = true
66
-
67
- assert_equal true, parser.config[:quoted_empty_null]
68
- assert_equal false, parser.config[:unquoted_empty_null]
69
-
70
- assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
71
-
72
-
73
- parser.config[:unquoted_empty_null] = true
74
-
75
- assert_equal true, parser.config[:quoted_empty_null]
76
- assert_equal true, parser.config[:unquoted_empty_null]
77
-
78
- assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
63
+ assert_equal [["","",""],["","",""]],
64
+ parser.parse( %Q{"","",""\n,,} )
79
65
 
66
+ parser.null = ""
67
+ assert_equal [["","",""," "],[nil,nil,nil," "]],
68
+ parser.parse( %Q{"","",""," "\n,,, } )
69
+ parser.null = [""] ## try array (allows multiple null values)
70
+ assert_equal [[nil,nil,nil," "],["","",""," "]],
71
+ parser.parse( %Q{,,, \n"","",""," "} )
80
72
 
81
73
  ## reset to defaults
82
- parser.config[:quoted_empty_null] = false
83
- parser.config[:unquoted_empty_null] = false
84
-
85
- assert_equal false, parser.config[:quoted_empty_null]
86
- assert_equal false, parser.config[:unquoted_empty_null]
87
-
88
- assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
74
+ parser.null = nil
75
+ assert_equal [["","",""],["","",""]],
76
+ parser.parse( %Q{"","",""\n,,} )
89
77
  end
90
78
 
91
79
 
@@ -77,4 +77,17 @@ def test_foreach
77
77
  assert true
78
78
  end
79
79
 
80
+
81
+ def test_enum
82
+ csv = CsvReader.new( "a,b,c" )
83
+ enum = csv.to_enum
84
+ assert_equal ["a","b","c"], enum.next
85
+
86
+ ## test Csv == CsvReader class alias
87
+ csv = Csv.new( "a,b,c" )
88
+ enum = csv.to_enum
89
+ assert_equal ["a","b","c"], enum.next
90
+ end
91
+
92
+
80
93
  end # class TestReader
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-01 00:00:00.000000000 Z
11
+ date: 2018-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -56,6 +56,7 @@ files:
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
58
  - lib/csvreader/buffer.rb
59
+ - lib/csvreader/builder.rb
59
60
  - lib/csvreader/parser.rb
60
61
  - lib/csvreader/parser_std.rb
61
62
  - lib/csvreader/parser_strict.rb
@@ -65,11 +66,15 @@ files:
65
66
  - lib/csvreader/version.rb
66
67
  - test/data/beer.csv
67
68
  - test/data/beer11.csv
69
+ - test/data/cars11.csv
70
+ - test/data/cities11.csv
71
+ - test/data/customers11.csv
68
72
  - test/data/shakespeare.csv
69
73
  - test/helper.rb
70
74
  - test/test_parser.rb
71
75
  - test/test_parser_formats.rb
72
76
  - test/test_parser_java.rb
77
+ - test/test_parser_null.rb
73
78
  - test/test_parser_strict.rb
74
79
  - test/test_parser_tab.rb
75
80
  - test/test_reader.rb