csvreader 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa8aec6ffb59bb3e27d09889ebd1294364d288eb
4
- data.tar.gz: 913002d3c342651381bf38fc952b56913f2554da
3
+ metadata.gz: 47c630dbfe75b03e4f2d03710cca0f4b3c66ea84
4
+ data.tar.gz: 7b604c9b9144190b1b8b4a4dcd848ea5b7f88aa4
5
5
  SHA512:
6
- metadata.gz: 23d5bedb995926f464a4bd95e62c52eb50dd8ee109ae883b934c616b62cbbf9b9239f184e89027dda2fdf14d41af63c7d41a26f869951544c90ed6ad662be8b3
7
- data.tar.gz: c212ad5acdc55f5105bd5c412fcfef36f18370282f84295b47ae517cbcf5f03d9bb78440709cf6137c657feb67028de266c18cd3cb577877547249413cc1783a
6
+ metadata.gz: 2dad1ae27b4273b8e5a22cf6eae60f141eca381229ed03bfbe6f403e99aae07a16aa4fc84e1f22a953cd9db5da1b9fa2fb6266666fdc6a756872e8bd4ec8dfb9
7
+ data.tar.gz: 38b0002ea3bdfff0b7ce994064d4fb4993e75a3b1225128a50ad9e18180c12cdae9d2a40b6f694e70ebccf4d65005fd5a2145450c20a565de8d3f5af7b398a58
@@ -4,6 +4,7 @@ Manifest.txt
4
4
  README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
+ lib/csvreader/base.rb
7
8
  lib/csvreader/buffer.rb
8
9
  lib/csvreader/builder.rb
9
10
  lib/csvreader/converter.rb
@@ -26,6 +27,7 @@ test/test_parser.rb
26
27
  test/test_parser_formats.rb
27
28
  test/test_parser_java.rb
28
29
  test/test_parser_null.rb
30
+ test/test_parser_numeric.rb
29
31
  test/test_parser_strict.rb
30
32
  test/test_parser_tab.rb
31
33
  test/test_reader.rb
data/README.md CHANGED
@@ -21,14 +21,14 @@ TXT
21
21
  records = Csv.parse( txt ) ## or CsvReader.parse
22
22
  pp records
23
23
  # => [["1","2","3"],
24
- # ["5","6","7"]]
24
+ # ["4","5","6"]]
25
25
 
26
26
  # -or-
27
27
 
28
28
  records = Csv.read( "values.csv" ) ## or CsvReader.read
29
29
  pp records
30
30
  # => [["1","2","3"],
31
- # ["5","6","7"]]
31
+ # ["4","5","6"]]
32
32
 
33
33
  # -or-
34
34
 
@@ -36,11 +36,11 @@ Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
36
36
  pp rec
37
37
  end
38
38
  # => ["1","2","3"]
39
- # => ["5","6","7"]
39
+ # => ["4","5","6"]
40
40
  ```
41
41
 
42
42
 
43
- ### What about converters?
43
+ ### What about type inference and data converters?
44
44
 
45
45
  Use the converters keyword option to (auto-)convert strings to nulls, booleans, integers, floats, dates, etc.
46
46
  Example:
@@ -72,6 +72,18 @@ Built-in converters include:
72
72
  | `:all` | shortcut for `[:null, :boolean, :date_time, :numeric]` |
73
73
 
74
74
 
75
+ Or add your own converters. Example:
76
+
77
+ ``` ruby
78
+ Csv.parse( 'Ruby, 2020-03-01, 100', converters: [->(v) { Time.parse(v) rescue v }] )
79
+ #=> [["Ruby", 2020-03-01 00:00:00 +0200, "100"]]
80
+ ```
81
+
82
+ A custom converter is a method that gets the value passed in
83
+ and if successful returns a non-string type (e.g. integer, float, date, etc.)
84
+ or a string (for further processing with all other converters in the "pipeline" configuration).
85
+
86
+
75
87
 
76
88
  ### What about Enumerable?
77
89
 
@@ -94,7 +106,7 @@ it = csv.to_enum
94
106
  pp it.next
95
107
  # => ["1","2","3"]
96
108
  pp it.next
97
- # => ["5","6","7"]
109
+ # => ["4","5","6"]
98
110
  ```
99
111
 
100
112
 
@@ -150,7 +162,7 @@ end
150
162
 
151
163
  ### What about symbol keys for hashes?
152
164
 
153
- Yes, use can use the header_converters keyword option.
165
+ Yes, you can use the header_converters keyword option.
154
166
  Use `:symbol` for (auto-)converting header (strings) to symbols.
155
167
  Note: the symbol converter will also downcase all letters and
156
168
  remove all non-alphanumeric (e.g. `!?$%`) chars
@@ -169,6 +181,15 @@ records = CsvHash.parse( txt, :converters => :all, :header_converters => :symbol
169
181
  pp records
170
182
  # => [{a: 1, b: 2, c: 3},
171
183
  # {a: true, b: false, c: nil}]
184
+
185
+ # -or-
186
+ options = { :converters => :all,
187
+ :header_converters => :symbol }
188
+
189
+ records = CsvHash.parse( txt, options )
190
+ pp records
191
+ # => [{a: 1, b: 2, c: 3},
192
+ # {a: true, b: false, c: nil}]
172
193
  ```
173
194
 
174
195
  Built-in header converters include:
@@ -180,6 +201,91 @@ Built-in header converters include:
180
201
 
181
202
 
182
203
 
204
+ ### What about (typed) structs?
205
+
206
+ See the [csvrecord library »](https://github.com/csv11/csvrecord)
207
+
208
+ Example from the csvrecord docu:
209
+
210
+ Step 1: Define a (typed) struct for the comma-separated values (csv) records. Example:
211
+
212
+ ```ruby
213
+ require 'csvrecord'
214
+
215
+ Beer = CsvRecord.define do
216
+ field :brewery ## note: default type is :string
217
+ field :city
218
+ field :name
219
+ field :abv, Float ## allows type specified as class (or use :float)
220
+ end
221
+ ```
222
+
223
+ or in "classic" style:
224
+
225
+ ```ruby
226
+ class Beer < CsvRecord::Base
227
+ field :brewery
228
+ field :city
229
+ field :name
230
+ field :abv, Float
231
+ end
232
+ ```
233
+
234
+
235
+ Step 2: Read in the comma-separated values (csv) datafile. Example:
236
+
237
+ ```ruby
238
+ beers = Beer.read( 'beer.csv' )
239
+
240
+ puts "#{beers.size} beers:"
241
+ pp beers
242
+ ```
243
+
244
+ pretty prints (pp):
245
+
246
+ ```
247
+ 6 beers:
248
+ [#<Beer:0x302c760 @values=
249
+ ["Andechser Klosterbrauerei", "Andechs", "Doppelbock Dunkel", 7.0]>,
250
+ #<Beer:0x3026fe8 @values=
251
+ ["Augustiner Br\u00E4u M\u00FCnchen", "M\u00FCnchen", "Edelstoff", 5.6]>,
252
+ #<Beer:0x30257a0 @values=
253
+ ["Bayerische Staatsbrauerei Weihenstephan", "Freising", "Hefe Weissbier", 5.4]>,
254
+ ...
255
+ ]
256
+ ```
257
+
258
+ Or loop over the records. Example:
259
+
260
+ ``` ruby
261
+ Beer.read( 'beer.csv' ).each do |rec|
262
+ puts "#{rec.name} (#{rec.abv}%) by #{rec.brewery}, #{rec.city}"
263
+ end
264
+
265
+ # -or-
266
+
267
+ Beer.foreach( 'beer.csv' ) do |rec|
268
+ puts "#{rec.name} (#{rec.abv}%) by #{rec.brewery}, #{rec.city}"
269
+ end
270
+ ```
271
+
272
+
273
+ printing:
274
+
275
+ ```
276
+ Doppelbock Dunkel (7.0%) by Andechser Klosterbrauerei, Andechs
277
+ Edelstoff (5.6%) by Augustiner Bräu München, München
278
+ Hefe Weissbier (5.4%) by Bayerische Staatsbrauerei Weihenstephan, Freising
279
+ Rauchbier Märzen (5.1%) by Brauerei Spezial, Bamberg
280
+ Münchner Dunkel (5.0%) by Hacker-Pschorr Bräu, München
281
+ Hofbräu Oktoberfestbier (6.3%) by Staatliches Hofbräuhaus München, München
282
+ ```
283
+
284
+
285
+ ### What about tabular data packages with pre-defined types / schemas?
286
+
287
+ See the [csvpack library »](https://github.com/csv11/csvpack)
288
+
183
289
 
184
290
 
185
291
 
@@ -319,7 +425,7 @@ Csv.strict.read( ..., sep: "\t" )
319
425
 
320
426
  Two major design bugs and many many minor.
321
427
 
322
- (1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1248) with some kludges (†) with the claim it's faster.
428
+ (1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1255) with some kludges (†) with the claim it's faster.
323
429
  What?! The right way: CSV needs its own purpose-built parser. There's no other
324
430
  way you can handle all the (edge) cases with double quotes and escaped doubled up
325
431
  double quotes. Period.
@@ -1,138 +1,12 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'pp'
5
- require 'logger'
6
- require 'forwardable'
7
- require 'stringio'
8
- require 'date' ## use for Date.parse and DateTime.parse
9
-
10
-
11
- ###
12
- # our own code
13
- require 'csvreader/version' # let version always go first
14
- require 'csvreader/buffer'
15
- require 'csvreader/parser_std' # best practices pre-configured out-of-the-box
16
- require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
17
- require 'csvreader/parser_tab'
18
- require 'csvreader/parser'
19
- require 'csvreader/builder'
20
- require 'csvreader/reader'
21
- require 'csvreader/reader_hash'
22
- require 'csvreader/converter'
23
-
24
-
25
-
26
- class CsvReader
27
- class Parser
28
-
29
- ## use/allow different "backends" e.g. ParserStd, ParserStrict, ParserTab, etc.
30
- ## parser must support parse method (with and without block)
31
- ## e.g. records = parse( data )
32
- ## -or-
33
- ## parse( data ) do |record|
34
- ## end
35
-
36
- DEFAULT = ParserStd.new
37
-
38
- RFC4180 = ParserStrict.new
39
- STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
40
- EXCEL = ParserStrict.new ## note: make excel its own instance (so you can change configs without "breaking" rfc4180/strict)
41
-
42
- MYSQL = ParserStrict.new( sep: "\t",
43
- quote: false,
44
- escape: true,
45
- null: "\\N" )
46
-
47
- POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
48
- escape: true,
49
- null: "" )
50
-
51
- POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
52
- quote: false,
53
- escape: true,
54
- null: "\\N" )
55
-
56
- TAB = ParserTab.new
57
-
58
-
59
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
60
- def self.strict() STRICT; end ## alternative alias for STRICT
61
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
62
- def self.excel() EXCEL; end ## alternative alias for EXCEL
63
- def self.mysql() MYSQL; end
64
- def self.postgresql() POSTGRESQL; end
65
- def self.postgres() postgresql; end
66
- def self.postgresql_text() POSTGRESQL_TEXT; end
67
- def self.postgres_text() postgresql_text; end
68
- def self.tab() TAB; end
69
- end # class Parser
70
- end # class CsvReader
71
-
72
-
73
-
74
- class CsvReader
75
- ### pre-define CsvReader (built-in) formats/dialect
76
- DEFAULT = CsvBuilder.new( Parser::DEFAULT )
77
-
78
- STRICT = CsvBuilder.new( Parser::STRICT )
79
- RFC4180 = CsvBuilder.new( Parser::RFC4180 )
80
- EXCEL = CsvBuilder.new( Parser::EXCEL )
81
-
82
- MYSQL = CsvBuilder.new( Parser::MYSQL )
83
- POSTGRES = POSTGRESQL = CsvBuilder.new( Parser::POSTGRESQL )
84
- POSTGRES_TEXT = POSTGRESQL_TEXT = CsvBuilder.new( Parser::POSTGRESQL_TEXT )
85
-
86
- TAB = CsvBuilder.new( Parser::TAB )
87
-
88
-
89
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
90
- def self.strict() STRICT; end ## alternative alias for STRICT
91
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
92
- def self.excel() EXCEL; end ## alternative alias for EXCEL
93
- def self.mysql() MYSQL; end
94
- def self.postgresql() POSTGRESQL; end
95
- def self.postgres() postgresql; end
96
- def self.postgresql_text() POSTGRESQL_TEXT; end
97
- def self.postgres_text() postgresql_text; end
98
- def self.tab() TAB; end
99
- end # class CsvReader
100
-
101
-
102
- class CsvHashReader
103
- ### pre-define CsvReader (built-in) formats/dialect
104
- DEFAULT = CsvHashBuilder.new( CsvReader::Parser::DEFAULT )
105
-
106
- STRICT = CsvHashBuilder.new( CsvReader::Parser::STRICT )
107
- RFC4180 = CsvHashBuilder.new( CsvReader::Parser::RFC4180 )
108
- EXCEL = CsvHashBuilder.new( CsvReader::Parser::EXCEL )
109
-
110
- MYSQL = CsvHashBuilder.new( CsvReader::Parser::MYSQL )
111
- POSTGRES = POSTGRESQL = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL )
112
- POSTGRES_TEXT = POSTGRESQL_TEXT = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL_TEXT )
113
-
114
- TAB = CsvHashBuilder.new( CsvReader::Parser::TAB )
115
-
116
-
117
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
118
- def self.strict() STRICT; end ## alternative alias for STRICT
119
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
120
- def self.excel() EXCEL; end ## alternative alias for EXCEL
121
- def self.mysql() MYSQL; end
122
- def self.postgresql() POSTGRESQL; end
123
- def self.postgres() postgresql; end
124
- def self.postgresql_text() POSTGRESQL_TEXT; end
125
- def self.postgres_text() postgresql_text; end
126
- def self.tab() TAB; end
127
- end # class CsvHashReader
128
-
129
-
130
-
131
-
132
- ## add convenience / shortcut alias
133
- Csv = CsvReader
134
- CsvHash = CsvHashReader
135
-
136
-
137
-
138
- puts CsvReader.banner # say hello
1
+ # encoding: utf-8
2
+
3
+
4
+ ## our own code (without "top-level" shortcuts e.g. "modular version")
5
+ require 'csvreader/base'
6
+
7
+
8
+ ###
9
+ # add convenience top-level shortcuts / aliases
10
+
11
+ Csv = CsvReader
12
+ CsvHash = CsvHashReader
@@ -0,0 +1,144 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'pp'
5
+ require 'logger'
6
+ require 'forwardable'
7
+ require 'stringio'
8
+ require 'date' ## use for Date.parse and DateTime.parse
9
+
10
+
11
+ ###
12
+ # our own code
13
+ require 'csvreader/version' # let version always go first
14
+ require 'csvreader/buffer'
15
+ require 'csvreader/parser_std' # best practices pre-configured out-of-the-box
16
+ require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
17
+ require 'csvreader/parser_tab'
18
+ require 'csvreader/parser'
19
+ require 'csvreader/converter'
20
+ require 'csvreader/reader'
21
+ require 'csvreader/reader_hash'
22
+ require 'csvreader/builder'
23
+
24
+
25
+
26
+ class CsvReader
27
+ class Parser
28
+
29
+ ## use/allow different "backends" e.g. ParserStd, ParserStrict, ParserTab, etc.
30
+ ## parser must support parse method (with and without block)
31
+ ## e.g. records = parse( data )
32
+ ## -or-
33
+ ## parse( data ) do |record|
34
+ ## end
35
+
36
+ DEFAULT = ParserStd.new
37
+
38
+ RFC4180 = ParserStrict.new
39
+ STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
40
+ EXCEL = ParserStrict.new ## note: make excel its own instance (so you can change configs without "breaking" rfc4180/strict)
41
+
42
+ MYSQL = ParserStrict.new( sep: "\t",
43
+ quote: false,
44
+ escape: true,
45
+ null: "\\N" )
46
+
47
+ POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
48
+ escape: true,
49
+ null: "" )
50
+
51
+ POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
52
+ quote: false,
53
+ escape: true,
54
+ null: "\\N" )
55
+
56
+ NUMERIC = ParserStrict.new( numeric: true,
57
+ nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
58
+ null: "" )
59
+
60
+
61
+ TAB = ParserTab.new
62
+
63
+
64
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
65
+ def self.strict() STRICT; end ## alternative alias for STRICT
66
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
67
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
68
+ def self.mysql() MYSQL; end
69
+ def self.postgresql() POSTGRESQL; end
70
+ def self.postgres() postgresql; end
71
+ def self.postgresql_text() POSTGRESQL_TEXT; end
72
+ def self.postgres_text() postgresql_text; end
73
+ def self.numeric() NUMERIC; end
74
+ def self.tab() TAB; end
75
+ end # class Parser
76
+ end # class CsvReader
77
+
78
+
79
+
80
+ class CsvReader
81
+ ### pre-define CsvReader (built-in) formats/dialect
82
+ DEFAULT = Builder.new( Parser::DEFAULT )
83
+
84
+ STRICT = Builder.new( Parser::STRICT )
85
+ RFC4180 = Builder.new( Parser::RFC4180 )
86
+ EXCEL = Builder.new( Parser::EXCEL )
87
+
88
+ MYSQL = Builder.new( Parser::MYSQL )
89
+ POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
90
+ POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
91
+
92
+ NUMERIC = Builder.new( Parser::NUMERIC )
93
+
94
+ TAB = Builder.new( Parser::TAB )
95
+
96
+
97
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
98
+ def self.strict() STRICT; end ## alternative alias for STRICT
99
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
100
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
101
+ def self.mysql() MYSQL; end
102
+ def self.postgresql() POSTGRESQL; end
103
+ def self.postgres() postgresql; end
104
+ def self.postgresql_text() POSTGRESQL_TEXT; end
105
+ def self.postgres_text() postgresql_text; end
106
+ def self.numeric() NUMERIC; end
107
+ def self.tab() TAB; end
108
+ end # class CsvReader
109
+
110
+
111
+ class CsvHashReader
112
+ ### pre-define CsvReader (built-in) formats/dialect
113
+ DEFAULT = Builder.new( Parser::DEFAULT )
114
+
115
+ STRICT = Builder.new( Parser::STRICT )
116
+ RFC4180 = Builder.new( Parser::RFC4180 )
117
+ EXCEL = Builder.new( Parser::EXCEL )
118
+
119
+ MYSQL = Builder.new( Parser::MYSQL )
120
+ POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
121
+ POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
122
+
123
+ NUMERIC = Builder.new( Parser::NUMERIC )
124
+
125
+ TAB = Builder.new( Parser::TAB )
126
+
127
+
128
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
129
+ def self.strict() STRICT; end ## alternative alias for STRICT
130
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
131
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
132
+ def self.mysql() MYSQL; end
133
+ def self.postgresql() POSTGRESQL; end
134
+ def self.postgres() postgresql; end
135
+ def self.postgresql_text() POSTGRESQL_TEXT; end
136
+ def self.postgres_text() postgresql_text; end
137
+ def self.numeric() NUMERIC; end
138
+ def self.tab() TAB; end
139
+ end # class CsvHashReader
140
+
141
+
142
+
143
+
144
+ puts CsvReader.banner # say hello
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
-
4
- class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
3
+ class CsvReader
4
+ class Builder ## rename to CsvReaderBuilder - why? why not?
5
5
 
6
6
 
7
7
  def initialize( parser )
@@ -53,12 +53,13 @@ class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
53
53
  sep: sep, converters: converters,
54
54
  parser: @parser, &block )
55
55
  end
56
- end # class CsvBuilder
57
-
56
+ end # class Builder
57
+ end # class CsvReader
58
58
 
59
59
 
60
60
 
61
- class CsvHashBuilder ## rename to CsvHashReaderBuilder - why? why not?
61
+ class CsvHashReader
62
+ class Builder ## rename to CsvHashReaderBuilder - why? why not?
62
63
  def initialize( parser )
63
64
  @parser = parser
64
65
  end
@@ -117,4 +118,5 @@ class CsvHashBuilder ## rename to CsvHashReaderBuilder - why? why not?
117
118
  header_converters: header_converters,
118
119
  parser: @parser, &block )
119
120
  end
120
- end # class CsvHashBuilder
121
+ end # class Builder
122
+ end # class CsvHashReader
@@ -2,6 +2,12 @@
2
2
 
3
3
  class CsvReader
4
4
 
5
+ class Parser
6
+ ## "forward" reference,
7
+ ## see base.rb for more
8
+ end
9
+
10
+
5
11
  ####################################
6
12
  # define errors / exceptions
7
13
  # for all parsers for (re)use
@@ -36,7 +36,9 @@ def initialize( sep: ',',
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
38
  null: nil, ## note: set to nil for no null vales / not availabe (na)
39
- comment: false ## note: comment char e.g. # or false/nil
39
+ comment: false, ## note: comment char e.g. # or false/nil
40
+ numeric: false, ## (auto-)convert all non-quoted values to float
41
+ nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
40
42
  )
41
43
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
42
44
  @config[:sep] = sep
@@ -45,8 +47,11 @@ def initialize( sep: ',',
45
47
  @config[:escape] = escape
46
48
  @config[:null] = null
47
49
  @config[:comment] = comment
50
+ @config[:numeric] = numeric
51
+ @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
48
52
  end
49
53
 
54
+
50
55
  #########################################
51
56
  ## config convenience helpers
52
57
  ## e.g. use like Csv.mysql.sep = ',' etc. instead of
@@ -57,6 +62,8 @@ def doublequote=( value ) @config[:doublequote]=value; end
57
62
  def escape=( value ) @config[:escape]=value; end
58
63
  def null=( value ) @config[:null]=value; end
59
64
  def comment=( value ) @config[:comment]=value; end
65
+ def numeric=( value ) @config[:numeric]=value; end
66
+ def nan=( value ) @config[:nan]=value; end
60
67
 
61
68
 
62
69
 
@@ -149,14 +156,22 @@ end
149
156
  def parse_field( input, sep: )
150
157
  value = ""
151
158
 
152
- quote = config[:quote]
153
- escape = config[:escape]
159
+ quote = config[:quote]
160
+ escape = config[:escape]
161
+ numeric = config[:numeric]
154
162
 
155
163
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
156
164
 
157
165
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
158
- value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
159
- ## return value; do nothing
166
+ ## note: allows null = '' that is turn unquoted empty strings into null/nil
167
+ ## or if using numeric into NotANumber (NaN)
168
+ if is_null?( value )
169
+ value = nil
170
+ elsif numeric & is_nan?( value )
171
+ value = Float::NAN
172
+ else
173
+ # do nothing - keep value as is :-) e.g. "".
174
+ end
160
175
  elsif quote && input.peek == quote
161
176
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
162
177
  value << parse_quote( input, sep: sep )
@@ -174,7 +189,24 @@ def parse_field( input, sep: )
174
189
  end
175
190
  end
176
191
 
177
- value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
192
+
193
+ if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
194
+ value = nil
195
+ elsif numeric
196
+ if is_nan?( value )
197
+ value = Float::NAN
198
+ else
199
+ ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
200
+ if numeric.is_a?( Proc )
201
+ value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
202
+ else
203
+ value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
204
+ end
205
+ end
206
+ else
207
+ # do nothing - keep value as is :-).
208
+ end
209
+
178
210
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
179
211
  end
180
212
 
@@ -182,6 +214,7 @@ def parse_field( input, sep: )
182
214
  end
183
215
 
184
216
 
217
+
185
218
  def parse_record( input, sep: )
186
219
  values = []
187
220
 
@@ -263,6 +296,24 @@ def parse_lines( input, sep:, &block )
263
296
  end # method parse_lines
264
297
 
265
298
 
299
+ def convert_to_float( value ) Float( value ) rescue value; end
300
+
301
+ def is_nan?( value )
302
+ nan = @config[:nan]
303
+ if nan.nil?
304
+ false ## nothing set; return always false (not NaN)
305
+ elsif nan.is_a?( Proc )
306
+ nan.call( value )
307
+ elsif nan.is_a?( Array )
308
+ nan.include?( value )
309
+ elsif nan.is_a?( String )
310
+ value == nan
311
+ else ## unknown config style / setting
312
+ ## todo: issue a warning or error - why? why not?
313
+ false ## nothing set; return always false (not nan)
314
+ end
315
+ end
316
+
266
317
  def is_null?( value )
267
318
  null = @config[:null]
268
319
  if null.nil?
@@ -2,9 +2,6 @@
2
2
 
3
3
  class CsvReader
4
4
 
5
- #######
6
- ## csv reader
7
-
8
5
  def self.open( path, mode=nil,
9
6
  sep: nil,
10
7
  converters: nil,
@@ -3,6 +3,12 @@
3
3
  class CsvHashReader
4
4
 
5
5
 
6
+ ## add convenience shortcuts / aliases for CsvReader support classes
7
+ Parser = CsvReader::Parser
8
+ Converter = CsvReader::Converter
9
+
10
+
11
+
6
12
  def self.open( path, mode=nil,
7
13
  headers: nil,
8
14
  sep: nil,
@@ -113,10 +119,10 @@ def initialize( data, headers: nil, sep: nil,
113
119
 
114
120
  @sep = sep
115
121
 
116
- @converters = CsvReader::Converter.create_converters( converters )
117
- @header_converters = CsvReader::Converter.create_header_converters( header_converters )
122
+ @converters = Converter.create_converters( converters )
123
+ @header_converters = Converter.create_header_converters( header_converters )
118
124
 
119
- @parser = parser.nil? ? CsvReader::Parser::DEFAULT : parser
125
+ @parser = parser.nil? ? Parser::DEFAULT : parser
120
126
  end
121
127
 
122
128
 
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 0
8
+ PATCH = 1
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -7,6 +7,8 @@ require 'minitest/autorun'
7
7
 
8
8
  ## our own code
9
9
  require 'csvreader'
10
+ ## require 'csvreader/base' ## try modular version (that is, without Csv,CsvHash "top-level" shortcuts)
11
+
10
12
 
11
13
  ## add test_data_dir helper
12
14
  class CsvReader
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_numeric.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestParserNumeric < MiniTest::Test
12
+
13
+ def parser
14
+ CsvReader::Parser::NUMERIC
15
+ end
16
+
17
+
18
+ def test_parser_numeric
19
+ pp CsvReader::Parser::NUMERIC
20
+ pp CsvReader::Parser.numeric
21
+ assert true
22
+ end
23
+
24
+ def test_parse
25
+ assert_equal [[1.0,2.0,3.0],
26
+ [4.0,5.0,6.0]], parser.parse( "1,2,3\n4,5,6" )
27
+ assert_equal [[1.0,2.0,3.0],
28
+ ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n"4","5","6"} )
29
+ assert_equal [["a","b","c"]], parser.parse( %Q{"a","b","c"} )
30
+ end
31
+
32
+
33
+ def test_empty
34
+ assert_equal [[nil,nil,nil],
35
+ ["","",""]], parser.parse( %Q{,,\n"","",""} )
36
+ end
37
+
38
+ end # class TestParserNumeric
@@ -84,9 +84,11 @@ def test_enum
84
84
  assert_equal ["a","b","c"], enum.next
85
85
 
86
86
  ## test Csv == CsvReader class alias
87
- csv = Csv.new( "a,b,c" )
88
- enum = csv.to_enum
89
- assert_equal ["a","b","c"], enum.next
87
+ if defined?( Csv )
88
+ csv = Csv.new( "a,b,c" )
89
+ enum = csv.to_enum
90
+ assert_equal ["a","b","c"], enum.next
91
+ end
90
92
  end
91
93
 
92
94
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-09 00:00:00.000000000 Z
11
+ date: 2018-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -55,6 +55,7 @@ files:
55
55
  - README.md
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
+ - lib/csvreader/base.rb
58
59
  - lib/csvreader/buffer.rb
59
60
  - lib/csvreader/builder.rb
60
61
  - lib/csvreader/converter.rb
@@ -77,6 +78,7 @@ files:
77
78
  - test/test_parser_formats.rb
78
79
  - test/test_parser_java.rb
79
80
  - test/test_parser_null.rb
81
+ - test/test_parser_numeric.rb
80
82
  - test/test_parser_strict.rb
81
83
  - test/test_parser_tab.rb
82
84
  - test/test_reader.rb