csvreader 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c87e1cac5f0988f4423a0c5aaf96d2a625bf4d60
4
- data.tar.gz: 5af8f5875ac0e18ade4cc793ba8ad658f905d1df
3
+ metadata.gz: c61a8e62f99e1a06c119b4995e0e4e1d3c829d71
4
+ data.tar.gz: 1b59f3415f3f0fe449a8c2395d2cefc7a7bd855c
5
5
  SHA512:
6
- metadata.gz: c9528101aa8a2db3a8e0dfb3685e6d15fcd262a76ed16f69b34ca9d54003e772f9441eb1673e11886ee14ac3347a99c22bd06662a8191214189f5c57f0ecfe7b
7
- data.tar.gz: acc9ada28d539dbc7ce1a2178e904ba247f511327f5828eebfdda78b21b263ca5d153d8fc234d7483cb60229f764094bc3c2fbeefa2381335d4e21a30487a828
6
+ metadata.gz: 52e3e8effa09f492c38736f1fb16552341237ebe0e8e5452a72c080d03f48e47b9d66fbfa1b8f42e3ccd86d37038975254da0e3038f297174636a3b34ce57542
7
+ data.tar.gz: 66c17daaa22d6e5d526c76b9568737b4982da9f02b11e9264fdb9d9644d5e33881255f623a174515c78714352b2df54c811cb8b6e0222aa5cc79682ac320ee62
@@ -5,6 +5,7 @@ README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
7
  lib/csvreader/buffer.rb
8
+ lib/csvreader/builder.rb
8
9
  lib/csvreader/parser.rb
9
10
  lib/csvreader/parser_std.rb
10
11
  lib/csvreader/parser_strict.rb
@@ -14,11 +15,15 @@ lib/csvreader/reader_hash.rb
14
15
  lib/csvreader/version.rb
15
16
  test/data/beer.csv
16
17
  test/data/beer11.csv
18
+ test/data/cars11.csv
19
+ test/data/cities11.csv
20
+ test/data/customers11.csv
17
21
  test/data/shakespeare.csv
18
22
  test/helper.rb
19
23
  test/test_parser.rb
20
24
  test/test_parser_formats.rb
21
25
  test/test_parser_java.rb
26
+ test/test_parser_null.rb
22
27
  test/test_parser_strict.rb
23
28
  test/test_parser_tab.rb
24
29
  test/test_reader.rb
data/README.md CHANGED
@@ -11,14 +11,6 @@
11
11
 
12
12
  ## Usage
13
13
 
14
- ``` ruby
15
- line = "1,2,3"
16
- values = CsvReader.parse_line( line )
17
- pp values
18
- # => ["1","2","3"]
19
- ```
20
-
21
- or use the convenience helpers:
22
14
 
23
15
  ``` ruby
24
16
  txt <<=TXT
@@ -26,21 +18,21 @@ txt <<=TXT
26
18
  4,5,6
27
19
  TXT
28
20
 
29
- records = CsvReader.parse( txt )
21
+ records = Csv.parse( txt ) ## or CsvReader.parse
30
22
  pp records
31
23
  # => [["1","2","3"],
32
24
  # ["5","6","7"]]
33
25
 
34
26
  # -or-
35
27
 
36
- records = CsvReader.read( "values.csv" )
28
+ records = Csv.read( "values.csv" ) ## or CsvReader.read
37
29
  pp records
38
30
  # => [["1","2","3"],
39
31
  # ["5","6","7"]]
40
32
 
41
33
  # -or-
42
34
 
43
- CsvReader.foreach( "values.csv" ) do |rec|
35
+ Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
44
36
  pp rec
45
37
  end
46
38
  # => ["1","2","3"]
@@ -50,7 +42,7 @@ end
50
42
 
51
43
  ### What about headers?
52
44
 
53
- Use the `CsvHashReader`
45
+ Use the `CsvHash`
54
46
  if the first line is a header (or if missing pass in the headers
55
47
  as an array) and you want your records as hashes instead of arrays of strings.
56
48
  Example:
@@ -62,7 +54,7 @@ A,B,C
62
54
  4,5,6
63
55
  TXT
64
56
 
65
- records = CsvHashReader.parse( txt )
57
+ records = CsvHash.parse( txt ) ## or CsvHashReader.parse
66
58
  pp records
67
59
 
68
60
  # -or-
@@ -72,7 +64,7 @@ txt2 <<=TXT
72
64
  4,5,6
73
65
  TXT
74
66
 
75
- records = CsvHashReader.parse( txt2, headers: ["A","B","C"] )
67
+ records = CsvHash.parse( txt2, headers: ["A","B","C"] ) ## or CsvHashReader.parse
76
68
  pp records
77
69
 
78
70
  # => [{"A": "1", "B": "2", "C": "3"},
@@ -80,14 +72,14 @@ pp records
80
72
 
81
73
  # -or-
82
74
 
83
- records = CsvHashReader.read( "hash.csv" )
75
+ records = CsvHash.read( "hash.csv" ) ## or CsvHashReader.read
84
76
  pp records
85
77
  # => [{"A": "1", "B": "2", "C": "3"},
86
78
  # {"A": "4", "B": "5", "C": "6"}]
87
79
 
88
80
  # -or-
89
81
 
90
- CsvHashReader.foreach( "hash.csv" ) do |rec|
82
+ CsvHash.foreach( "hash.csv" ) do |rec| ## or CsvHashReader.foreach
91
83
  pp rec
92
84
  end
93
85
  # => {"A": "1", "B": "2", "C": "3"}
@@ -141,13 +133,11 @@ Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
141
133
  Pass in the `sep` keyword option. Example:
142
134
 
143
135
  ``` ruby
144
- CsvReader.parse_line( ..., sep: ';' )
145
- CsvReader.parse( ..., sep: ';' )
146
- CsvReader.read( ..., sep: ';' )
136
+ Csv.parse( ..., sep: ';' )
137
+ Csv.read( ..., sep: ';' )
147
138
  # ...
148
- CsvReader.parse_line( ..., sep: '|' )
149
- CsvReader.parse( ..., sep: '|' )
150
- CsvReader.read( ..., sep: '|' )
139
+ Csv.parse( ..., sep: '|' )
140
+ Csv.read( ..., sep: '|' )
151
141
  # ...
152
142
  # and so on
153
143
  ```
@@ -3,6 +3,7 @@
3
3
 
4
4
  require 'pp'
5
5
  require 'logger'
6
+ require 'forwardable'
6
7
 
7
8
 
8
9
  ###
@@ -13,6 +14,7 @@ require 'csvreader/parser_std' # best practices pre-configured out-of-the-b
13
14
  require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
14
15
  require 'csvreader/parser_tab'
15
16
  require 'csvreader/parser'
17
+ require 'csvreader/builder'
16
18
  require 'csvreader/reader'
17
19
  require 'csvreader/reader_hash'
18
20
 
@@ -0,0 +1,56 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
5
+ def initialize( parser )
6
+ @parser = parser
7
+ end
8
+
9
+
10
+ ## todo/fix:
11
+ ## add parser config (attribute) setter e.g.
12
+ ## - sep=(value)
13
+ ## - comment=(value)
14
+ ## - and so on!!!
15
+ ##
16
+ ## add config too - why? why not?
17
+
18
+
19
+ def open( path, mode='r:bom|utf-8',
20
+ sep: nil,
21
+ converters: nil,
22
+ parser: @parser, &block )
23
+ CsvReader.open( path, mode,
24
+ sep: sep, converters: converters,
25
+ parser: @parser, &block )
26
+ end
27
+
28
+ def read( path, sep: nil,
29
+ converters: nil )
30
+ CsvReader.read( path,
31
+ sep: sep, converters: converters,
32
+ parser: @parser )
33
+ end
34
+
35
+ def header( path, sep: nil )
36
+ CsvReader.header( path,
37
+ sep: sep,
38
+ parser: @parser )
39
+ end
40
+
41
+ def foreach( path, sep: nil,
42
+ converters: nil, &block )
43
+ CsvReader.foreach( path,
44
+ sep: sep, converters: converters,
45
+ parser: @parser, &block )
46
+ end
47
+
48
+
49
+
50
+ def parse( data, sep: nil,
51
+ converters: nil, &block )
52
+ CsvReader.parse( data,
53
+ sep: sep, converters: converters,
54
+ parser: @parser, &block )
55
+ end
56
+ end # class CsvBuilder
@@ -25,7 +25,7 @@ MYSQL = ParserStrict.new( sep: "\t",
25
25
 
26
26
  POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
27
27
  escape: true,
28
- unquoted_empty_null: true )
28
+ null: "" )
29
29
 
30
30
  POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
31
31
  quote: false,
@@ -39,12 +39,29 @@ def logger() self.class.logger; end
39
39
 
40
40
  attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
41
41
 
42
+ ##
43
+ ## todo/check:
44
+ ## null values - include NA - why? why not?
45
+ ## make null values case sensitive or add an option for case sensitive
46
+ ## or better allow a proc as option for checking too!!!
42
47
  def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
43
48
  )
44
49
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
50
+
51
+ ## note: null values must get handled by parser
52
+ ## only get checked for unquoted strings (and NOT for quoted strings)
53
+ ## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
45
54
  @config[:null] = null ## null values
46
55
  end
47
56
 
57
+ #########################################
58
+ ## config convenience helpers
59
+ ## e.g. use like Csv.defaultl.null = '\N' etc. instead of
60
+ ## Csv.default.config[:null] = '\N'
61
+ def null=( value ) @config[:null]=value; end
62
+
63
+
64
+
48
65
 
49
66
  def parse( data, **kwargs, &block )
50
67
 
@@ -132,6 +149,7 @@ def parse_field( input )
132
149
  skip_spaces( input ) ## strip leading spaces
133
150
 
134
151
  if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
152
+ value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
135
153
  ## return value; do nothing
136
154
  elsif input.peek == DOUBLE_QUOTE
137
155
  logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -156,6 +174,8 @@ def parse_field( input )
156
174
  ## note: only strip **trailing** spaces (space and tab only)
157
175
  ## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
158
176
  value = value.sub( /[ \t]+$/, '' )
177
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
178
+
159
179
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
160
180
  end
161
181
 
@@ -251,5 +271,24 @@ def parse_lines( input, &block )
251
271
  end # method parse_lines
252
272
 
253
273
 
274
+
275
+
276
+ def is_null?( value )
277
+ null = @config[:null]
278
+ if null.nil?
279
+ false ## nothing set; return always false (not null)
280
+ elsif null.is_a?( Proc )
281
+ null.call( value )
282
+ elsif null.is_a?( Array )
283
+ null.include?( value )
284
+ elsif null.is_a?( String )
285
+ value == null
286
+ else ## unknown config style / setting
287
+ ## todo: issue a warning or error - why? why not?
288
+ false ## nothing set; return always false (not null)
289
+ end
290
+ end
291
+
292
+
254
293
  end # class ParserStd
255
294
  end # class CsvReader
@@ -35,9 +35,7 @@ def initialize( sep: ',',
35
35
  quote: '"', ## note: set to false/nil for no quote
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
- null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
39
- quoted_empty_null: false,
40
- unquoted_empty_null: false,
38
+ null: nil, ## note: set to nil for no null vales / not availabe (na)
41
39
  comment: false ## note: comment char e.g. # or false/nil
42
40
  )
43
41
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
@@ -46,8 +44,6 @@ def initialize( sep: ',',
46
44
  @config[:doublequote] = doublequote
47
45
  @config[:escape] = escape
48
46
  @config[:null] = null
49
- @config[:quoted_empty_null] = quoted_empty_null
50
- @config[:unquoted_empty_null] = unquoted_empty_null
51
47
  @config[:comment] = comment
52
48
  end
53
49
 
@@ -55,9 +51,12 @@ end
55
51
  ## config convenience helpers
56
52
  ## e.g. use like Csv.mysql.sep = ',' etc. instead of
57
53
  ## Csv.mysql.config[:sep] = ','
58
- def sep=( value ) @config[:sep]=value; end
59
- def comment=( value ) @config[:comment]=value; end
60
- def escape=( value ) @config[:escape]=value; end
54
+ def sep=( value ) @config[:sep]=value; end
55
+ def quote=( value ) @config[:quote]=value; end
56
+ def doublequote=( value ) @config[:doublequote]=value; end
57
+ def escape=( value ) @config[:escape]=value; end
58
+ def null=( value ) @config[:null]=value; end
59
+ def comment=( value ) @config[:comment]=value; end
61
60
 
62
61
 
63
62
 
@@ -156,14 +155,11 @@ def parse_field( input, sep: )
156
155
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
157
156
 
158
157
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
159
- value = nil if config[:unquoted_empty_null]
158
+ value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
160
159
  ## return value; do nothing
161
160
  elsif quote && input.peek == quote
162
161
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
163
162
  value << parse_quote( input, sep: sep )
164
-
165
- value = nil if config[:quoted_empty_null] && value == ""
166
-
167
163
  logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
168
164
  else
169
165
  logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -177,6 +173,8 @@ def parse_field( input, sep: )
177
173
  value << input.getc
178
174
  end
179
175
  end
176
+
177
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
180
178
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
181
179
  end
182
180
 
@@ -265,5 +263,22 @@ def parse_lines( input, sep:, &block )
265
263
  end # method parse_lines
266
264
 
267
265
 
266
+ def is_null?( value )
267
+ null = @config[:null]
268
+ if null.nil?
269
+ false ## nothing set; return always false (not null)
270
+ elsif null.is_a?( Proc )
271
+ null.call( value )
272
+ elsif null.is_a?( Array )
273
+ null.include?( value )
274
+ elsif null.is_a?( String )
275
+ value == null
276
+ else ## unknown config style / setting
277
+ ## todo: issue a warning or error - why? why not?
278
+ false ## nothing set; return always false (not null)
279
+ end
280
+ end
281
+
282
+
268
283
  end # class ParserStrict
269
284
  end # class CsvReader
@@ -1,18 +1,12 @@
1
1
  # encoding: utf-8
2
2
 
3
-
4
-
5
3
  class CsvReader
6
4
 
7
- def initialize( parser )
8
- @parser = parser
9
- end
10
-
11
- DEFAULT = new( Parser::DEFAULT )
12
- STRICT = new( Parser::STRICT )
13
- RFC4180 = new( Parser::RFC4180 )
14
- EXCEL = new( Parser::EXCEL )
15
- TAB = new( Parser::TAB )
5
+ DEFAULT = CsvBuilder.new( Parser::DEFAULT )
6
+ STRICT = CsvBuilder.new( Parser::STRICT )
7
+ RFC4180 = CsvBuilder.new( Parser::RFC4180 )
8
+ EXCEL = CsvBuilder.new( Parser::EXCEL )
9
+ TAB = CsvBuilder.new( Parser::TAB )
16
10
 
17
11
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
18
12
  def self.strict() STRICT; end ## alternative alias for RFC4180
@@ -21,97 +15,155 @@ class CsvReader
21
15
  def self.tab() TAB; end ## alternative alias for TAB
22
16
 
23
17
 
24
- #####################
25
- ## convenience helpers defaulting to default csv dialect/format reader
26
- ##
27
- ## CsvReader.parse is the same as
28
- ## CsvReader::DEFAULT.parse or CsvReader.default.parse
29
- ##
30
-
31
- def self.parse( data, sep: nil,
32
- converters: nil, &block )
33
- DEFAULT.parse( data, sep: sep, converters: converters, &block )
34
- end
35
-
36
- def self.read( path, sep: nil,
37
- converters: nil )
38
- DEFAULT.read( path, sep: sep, converters: converters )
39
- end
40
-
41
- def self.header( path, sep: nil )
42
- DEFAULT.header( path, sep: sep )
43
- end
44
-
45
- def self.foreach( path, sep: nil,
46
- converters: nil, &block )
47
- DEFAULT.foreach( path, sep: sep, converters: converters, &block )
48
- end
49
-
50
-
51
- ############################
52
- ## note: only add parse_line convenience helper for default
53
- ## always use parse (do NOT use parse_line) - why? why not?
54
- def self.parse_line( data, sep: nil,
55
- converters: nil )
56
- records = []
57
- DEFAULT.parse( data, sep: sep, converters: converters ) do |record|
58
- records << record
59
- break # only parse first record
60
- end
61
- records.size == 0 ? nil : records.first
62
- end
63
-
64
-
65
-
66
- #############################
67
- ## all "high-level" reader methods
68
- ##
69
- ## note: allow "overriding" of separator
70
- ## if sep is not nil otherwise use default dialect/format separator
71
-
72
- def parse( data, sep: nil,
73
- converters: nil, &block )
74
- kwargs = {
75
- ## converters: converters ## todo: add converters
76
- }
77
- ## note: only add separator if present/defined (not nil)
78
- kwargs[:sep] = sep if sep && @parser.respond_to?( :'sep=' )
79
-
80
- @parser.parse( data, kwargs, &block )
81
- end
82
-
83
- def read( path, sep: nil,
84
- converters: nil )
85
- ## note: use our own file.open
86
- ## always use utf-8 for now
87
- ## check/todo: add skip option bom too - why? why not?
88
- txt = File.open( path, 'r:bom|utf-8' ).read
89
- parse( txt, sep: sep )
90
- end
91
-
92
- def foreach( path, sep: nil,
93
- converters: nil, &block )
94
- File.open( path, 'r:bom|utf-8' ) do |file|
95
- parse( file, sep: sep, &block )
18
+
19
+
20
+
21
+ #######
22
+ ## csv reader
23
+
24
+ def self.open( path, mode='r:bom|utf-8',
25
+ sep: nil,
26
+ converters: nil,
27
+ parser: nil, &block ) ## rename path to filename or name - why? why not?
28
+ f = File.open( path, mode )
29
+ csv = new(f, sep: sep, converters: converters, parser: parser )
30
+
31
+ # handle blocks like Ruby's open(), not like the (old old) CSV library
32
+ if block_given?
33
+ begin
34
+ block.call( csv )
35
+ ensure
36
+ csv.close
37
+ end
38
+ else
39
+ csv
40
+ end
41
+ end # method self.open
42
+
43
+
44
+ def self.read( path, sep: nil,
45
+ converters: nil,
46
+ parser: nil )
47
+ open( path,
48
+ sep: sep,
49
+ converters: converters,
50
+ parser: parser ) { |csv| csv.read }
96
51
  end
97
- end
98
52
 
99
- def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
100
- # read first lines (only)
101
- # and parse with csv to get header from csv library itself
102
53
 
103
- records = []
104
- File.open( path, 'r:bom|utf-8' ) do |file|
105
- parse( file, sep: sep ) do |record|
106
- records << record
107
- break ## only parse/read first record
54
+ def self.header( path, sep: nil, parser: nil ) ## use header or headers - or use both (with alias)?
55
+ # read first lines (only)
56
+ # and parse with csv to get header from csv library itself
57
+
58
+ records = []
59
+ open( path, sep: sep, parser: parser ) do |csv|
60
+ csv.each do |record|
61
+ records << record
62
+ break ## only parse/read first record
63
+ end
64
+ end
65
+
66
+ ## unwrap record if empty return nil - why? why not?
67
+ ## return empty record e.g. [] - why? why not?
68
+ ## returns nil for empty (for now) - why? why not?
69
+ records.size == 0 ? nil : records.first
70
+ end # method self.header
71
+
72
+
73
+ def self.foreach( path, sep: nil,
74
+ converters: nil, parser: nil, &block )
75
+ csv = open( path, sep: sep, converters: converters, parser: parser )
76
+
77
+ if block_given?
78
+ begin
79
+ csv.each( &block )
80
+ ensure
81
+ csv.close
108
82
  end
109
- end
83
+ else
84
+ csv.to_enum ## note: caller (responsible) must close file!!!
85
+ ## remove version without block given - why? why not?
86
+ ## use Csv.open().to_enum or Csv.open().each
87
+ ## or Csv.new( File.new() ).to_enum or Csv.new( File.new() ).each ???
88
+ end
89
+ end # method self.foreach
90
+
91
+
92
+ def self.parse( data, sep: nil,
93
+ converters: nil,
94
+ parser: nil, &block )
95
+ csv = new( data, sep: sep, converters: converters, parser: parser )
96
+
97
+ if block_given?
98
+ csv.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
99
+ else # slurp contents, if no block is given
100
+ csv.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
101
+ end
102
+ end # method self.parse
103
+
104
+
105
+
106
+ ############################
107
+ ## note: only add parse_line convenience helper for default
108
+ ## always use parse (do NOT/NOT/NOT use parse_line) - why? why not?
109
+ ## todo/fix: remove parse_line!!!
110
+ def self.parse_line( data, sep: nil,
111
+ converters: nil )
112
+ records = []
113
+ parse( data, sep: sep, converters: converters ) do |record|
114
+ records << record
115
+ break # only parse first record
116
+ end
117
+ records.size == 0 ? nil : records.first
118
+ end
119
+
120
+
121
+
122
+
123
+ def initialize( data, sep: nil, converters: nil, parser: nil )
124
+ raise ArgumentError.new( "Cannot parse nil as CSV" ) if data.nil?
125
+ ## todo: use (why? why not) - raise ArgumentError, "Cannot parse nil as CSV" if data.nil?
126
+
127
+ # create the IO object we will read from
128
+ @io = data.is_a?(String) ? StringIO.new(data) : data
129
+
130
+ @sep = sep
131
+ @converters = converters
132
+
133
+ @parser = parser.nil? ? Parser::DEFAULT : parser
134
+ end
135
+
136
+
137
+ ### IO and StringIO Delegation ###
138
+ extend Forwardable
139
+ def_delegators :@io,
140
+ :close, :closed?, :eof, :eof?
141
+
142
+ ## add more - why? why not?
143
+ ## def_delegators :@io, :binmode, :binmode?, :close, :close_read, :close_write,
144
+ ## :closed?, :eof, :eof?, :external_encoding, :fcntl,
145
+ ## :fileno, :flock, :flush, :fsync, :internal_encoding,
146
+ ## :ioctl, :isatty, :path, :pid, :pos, :pos=, :reopen,
147
+ ## :seek, :stat, :string, :sync, :sync=, :tell, :to_i,
148
+ ## :to_io, :truncate, :tty?
149
+
150
+
151
+ include Enumerable
152
+
153
+ def each( &block )
154
+ if block_given?
155
+ kwargs = {
156
+ ## converters: converters ## todo: add converters
157
+ }
158
+ ## note: only add separator if present/defined (not nil)
159
+ kwargs[:sep] = @sep if @sep && @parser.respond_to?( :'sep=' )
160
+
161
+ @parser.parse( @io, kwargs, &block )
162
+ else
163
+ to_enum
164
+ end
165
+ end # method each
110
166
 
111
- ## unwrap record if empty return nil - why? why not?
112
- ## return empty record e.g. [] - why? why not?
113
- ## returns nil for empty (for now) - why? why not?
114
- records.size == 0 ? nil : records.first
115
- end # method self.header
167
+ def read() to_a; end # method read
116
168
 
117
169
  end # class CsvReader
@@ -4,7 +4,7 @@
4
4
  class CsvReader ## note: uses a class for now - change to module - why? why not?
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 6
7
+ MINOR = 7
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
@@ -0,0 +1,10 @@
1
+ #####
2
+ # csv sample from the wikipedia article "Comma-separated values"
3
+ # see en.wikipedia.org/wiki/Comma-separated_values
4
+
5
+ Year,Make,Model,Description,Price
6
+ 1997, Ford, E350,"ac, abs, moon",3000.00
7
+ 1999, Chevy, "Venture ""Extended Edition""","",4900.00
8
+ 1999, Chevy, "Venture ""Extended Edition, Very Large""",,5000.00
9
+ 1996, Jeep, Grand Cherokee,"MUST SELL!
10
+ air, moon roof, loaded",4799.00
@@ -0,0 +1,12 @@
1
+ #####
2
+ # csv sample from the wikipedia article "Comma-separated values"
3
+ # see en.wikipedia.org/wiki/Comma-separated_values
4
+ #
5
+ # note:
6
+ # Double quote processing need only apply if the field starts
7
+ # with a double quote. Note, however, that double quotes are not
8
+ # allowed in unquoted fields according to RFC 4180
9
+
10
+ Los Angeles, 34°03'N, 118°15'W
11
+ New York City, 40°42'46"N, 74°00'21"W
12
+ Paris, 48°51'24"N, 2°21'03"E
@@ -0,0 +1,13 @@
1
+ #####
2
+ # csv sample from the article:
3
+ # A Guide to the Ruby CSV Library, Part I
4
+ # - sitepoint.com/guide-ruby-csv-library-part
5
+
6
+ Name,Times arrived,Total $ spent,Food feedback
7
+ Dan, 34, 2548, Lovin it!
8
+ Maria, 55, 5054, "Good, delicious food"
9
+ Carlos, 22, 4352, "I am ""pleased"", but could be better"
10
+ Stephany, 34, 6542, I want bigger steaks!!!!!
11
+ James, 1, 43, Not bad
12
+ Robin, 1, 56, Fish is tasty
13
+ Anna, 1, 79, "Good, better, the best!"
@@ -205,15 +205,4 @@ def test_lf
205
205
  parser.default.parse( "character" + LF + "NotEscaped" )
206
206
  end
207
207
 
208
-
209
-
210
- def test_escaped_mysql_null_value
211
- ## MySQL uses \N to symbolize null values. We have to restore this
212
-
213
- ## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
214
- ## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
215
- assert_equal [[ "character\\NEscaped" ]],
216
- parser.default.parse( "character\\NEscaped" )
217
- end
218
-
219
208
  end # class TestParserJava
@@ -0,0 +1,107 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_null.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestParserNull < MiniTest::Test
12
+
13
+
14
+ def parser
15
+ CsvReader::Parser
16
+ end
17
+
18
+
19
+ def test_escaped_mysql_null_value
20
+ ## MySQL uses \N to symbolize null values. We have to restore this
21
+
22
+ ## note: "unknown escape sequences e.g. \N get passed "through" as-is (unescaped)"
23
+ ## only supports \n \r (sep e.g \, or \t) (quote e.g. \") for now - any others?
24
+ assert_equal [[ "character\\NEscaped" ]],
25
+ parser.default.parse( "character\\NEscaped" )
26
+
27
+ assert_equal [[ "character\\NEscaped" ]],
28
+ parser.strict.parse( "character\\NEscaped" )
29
+ end
30
+
31
+
32
+ def test_mysql_null_value
33
+ default_null_values = parser.default.config[:null] ## save default null settings
34
+
35
+ assert_equal [[ nil, nil, "" ]],
36
+ parser.default.parse( "\\N, \\N ," )
37
+
38
+ ## escaped with quotes
39
+ assert_equal [[ "\\N", "\\N", "" ]],
40
+ parser.default.parse( %Q{"\\N", "\\N" ,} )
41
+
42
+ ## try single \N setting
43
+ parser.default.null = "\\N"
44
+ assert_equal [[ nil, nil, "" ]],
45
+ parser.default.parse( "\\N, \\N ," )
46
+
47
+ ## try no null values setting
48
+ parser.default.null = nil
49
+ assert_equal [[ "\\N", "\\N", "" ]],
50
+ parser.default.parse( "\\N, \\N ," )
51
+
52
+ ## try postgresql unquoted empty string is nil/null
53
+ parser.default.null = ""
54
+ assert_equal [[ nil, nil, "" ],
55
+ [ nil, nil, "", nil ]],
56
+ parser.default.parse( %Q{,,""\n , , "" ,} )
57
+
58
+ ## try proc
59
+ parser.default.null = ->(value) { value.downcase == 'nil' }
60
+ assert_equal [[ nil, nil, nil, "" ]],
61
+ parser.default.parse( "nil, Nil, NIL," )
62
+
63
+ ## try array
64
+ parser.default.null = ['nil', 'Nil', 'NIL']
65
+ assert_equal [[ nil, nil, nil, "" ]],
66
+ parser.default.parse( "nil, Nil, NIL," )
67
+
68
+ ## restore defaults
69
+ parser.default.null = default_null_values ## ['\N', 'NA']
70
+ end
71
+
72
+
73
+ def test_strict_mysql_null_value
74
+ assert_equal [[ "\\N", " \\N ", "" ]],
75
+ parser.strict.parse( "\\N, \\N ," )
76
+
77
+ ## try single \N setting
78
+ parser.strict.null = "\\N"
79
+ assert_equal [[ nil, nil, " \\N", "\\N ", "" ]],
80
+ parser.strict.parse( "\\N,\\N, \\N,\\N ," )
81
+
82
+ ## escaped with quotes
83
+ assert_equal [[ "\\N", "\\N", nil, "" ]],
84
+ parser.strict.parse( %Q{"\\N","\\N",\\N,} )
85
+
86
+
87
+ ## try postgresql unquoted empty string is nil/null
88
+ parser.strict.null = ""
89
+ assert_equal [[ nil, nil, "" ],
90
+ [ " ", " ", "", nil ]],
91
+ parser.strict.parse( %Q{,,""\n , ,"",} )
92
+
93
+ ## try proc
94
+ parser.strict.null = ->(value) { value.downcase == 'nil' }
95
+ assert_equal [[ nil, nil, nil, "" ]],
96
+ parser.strict.parse( "nil,Nil,NIL," )
97
+
98
+ ## try array
99
+ parser.strict.null = ['nil', 'Nil', 'NIL']
100
+ assert_equal [[ nil, nil, nil, "" ]],
101
+ parser.strict.parse( "nil,Nil,NIL," )
102
+
103
+ ## restore defaults
104
+ parser.strict.null = nil
105
+ end
106
+
107
+ end # class TestParserNull
@@ -60,32 +60,20 @@ end
60
60
 
61
61
 
62
62
  def test_parse_empties
63
- assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
64
-
65
- parser.config[:quoted_empty_null] = true
66
-
67
- assert_equal true, parser.config[:quoted_empty_null]
68
- assert_equal false, parser.config[:unquoted_empty_null]
69
-
70
- assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
71
-
72
-
73
- parser.config[:unquoted_empty_null] = true
74
-
75
- assert_equal true, parser.config[:quoted_empty_null]
76
- assert_equal true, parser.config[:unquoted_empty_null]
77
-
78
- assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
63
+ assert_equal [["","",""],["","",""]],
64
+ parser.parse( %Q{"","",""\n,,} )
79
65
 
66
+ parser.null = ""
67
+ assert_equal [["","",""," "],[nil,nil,nil," "]],
68
+ parser.parse( %Q{"","",""," "\n,,, } )
69
+ parser.null = [""] ## try array (allows multiple null values)
70
+ assert_equal [[nil,nil,nil," "],["","",""," "]],
71
+ parser.parse( %Q{,,, \n"","",""," "} )
80
72
 
81
73
  ## reset to defaults
82
- parser.config[:quoted_empty_null] = false
83
- parser.config[:unquoted_empty_null] = false
84
-
85
- assert_equal false, parser.config[:quoted_empty_null]
86
- assert_equal false, parser.config[:unquoted_empty_null]
87
-
88
- assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
74
+ parser.null = nil
75
+ assert_equal [["","",""],["","",""]],
76
+ parser.parse( %Q{"","",""\n,,} )
89
77
  end
90
78
 
91
79
 
@@ -77,4 +77,17 @@ def test_foreach
77
77
  assert true
78
78
  end
79
79
 
80
+
81
+ def test_enum
82
+ csv = CsvReader.new( "a,b,c" )
83
+ enum = csv.to_enum
84
+ assert_equal ["a","b","c"], enum.next
85
+
86
+ ## test Csv == CsvReader class alias
87
+ csv = Csv.new( "a,b,c" )
88
+ enum = csv.to_enum
89
+ assert_equal ["a","b","c"], enum.next
90
+ end
91
+
92
+
80
93
  end # class TestReader
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-01 00:00:00.000000000 Z
11
+ date: 2018-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -56,6 +56,7 @@ files:
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
58
  - lib/csvreader/buffer.rb
59
+ - lib/csvreader/builder.rb
59
60
  - lib/csvreader/parser.rb
60
61
  - lib/csvreader/parser_std.rb
61
62
  - lib/csvreader/parser_strict.rb
@@ -65,11 +66,15 @@ files:
65
66
  - lib/csvreader/version.rb
66
67
  - test/data/beer.csv
67
68
  - test/data/beer11.csv
69
+ - test/data/cars11.csv
70
+ - test/data/cities11.csv
71
+ - test/data/customers11.csv
68
72
  - test/data/shakespeare.csv
69
73
  - test/helper.rb
70
74
  - test/test_parser.rb
71
75
  - test/test_parser_formats.rb
72
76
  - test/test_parser_java.rb
77
+ - test/test_parser_null.rb
73
78
  - test/test_parser_strict.rb
74
79
  - test/test_parser_tab.rb
75
80
  - test/test_reader.rb