csvreader 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: aa8aec6ffb59bb3e27d09889ebd1294364d288eb
4
- data.tar.gz: 913002d3c342651381bf38fc952b56913f2554da
3
+ metadata.gz: 47c630dbfe75b03e4f2d03710cca0f4b3c66ea84
4
+ data.tar.gz: 7b604c9b9144190b1b8b4a4dcd848ea5b7f88aa4
5
5
  SHA512:
6
- metadata.gz: 23d5bedb995926f464a4bd95e62c52eb50dd8ee109ae883b934c616b62cbbf9b9239f184e89027dda2fdf14d41af63c7d41a26f869951544c90ed6ad662be8b3
7
- data.tar.gz: c212ad5acdc55f5105bd5c412fcfef36f18370282f84295b47ae517cbcf5f03d9bb78440709cf6137c657feb67028de266c18cd3cb577877547249413cc1783a
6
+ metadata.gz: 2dad1ae27b4273b8e5a22cf6eae60f141eca381229ed03bfbe6f403e99aae07a16aa4fc84e1f22a953cd9db5da1b9fa2fb6266666fdc6a756872e8bd4ec8dfb9
7
+ data.tar.gz: 38b0002ea3bdfff0b7ce994064d4fb4993e75a3b1225128a50ad9e18180c12cdae9d2a40b6f694e70ebccf4d65005fd5a2145450c20a565de8d3f5af7b398a58
@@ -4,6 +4,7 @@ Manifest.txt
4
4
  README.md
5
5
  Rakefile
6
6
  lib/csvreader.rb
7
+ lib/csvreader/base.rb
7
8
  lib/csvreader/buffer.rb
8
9
  lib/csvreader/builder.rb
9
10
  lib/csvreader/converter.rb
@@ -26,6 +27,7 @@ test/test_parser.rb
26
27
  test/test_parser_formats.rb
27
28
  test/test_parser_java.rb
28
29
  test/test_parser_null.rb
30
+ test/test_parser_numeric.rb
29
31
  test/test_parser_strict.rb
30
32
  test/test_parser_tab.rb
31
33
  test/test_reader.rb
data/README.md CHANGED
@@ -21,14 +21,14 @@ TXT
21
21
  records = Csv.parse( txt ) ## or CsvReader.parse
22
22
  pp records
23
23
  # => [["1","2","3"],
24
- # ["5","6","7"]]
24
+ # ["4","5","6"]]
25
25
 
26
26
  # -or-
27
27
 
28
28
  records = Csv.read( "values.csv" ) ## or CsvReader.read
29
29
  pp records
30
30
  # => [["1","2","3"],
31
- # ["5","6","7"]]
31
+ # ["4","5","6"]]
32
32
 
33
33
  # -or-
34
34
 
@@ -36,11 +36,11 @@ Csv.foreach( "values.csv" ) do |rec| ## or CsvReader.foreach
36
36
  pp rec
37
37
  end
38
38
  # => ["1","2","3"]
39
- # => ["5","6","7"]
39
+ # => ["4","5","6"]
40
40
  ```
41
41
 
42
42
 
43
- ### What about converters?
43
+ ### What about type inference and data converters?
44
44
 
45
45
  Use the converters keyword option to (auto-)convert strings to nulls, booleans, integers, floats, dates, etc.
46
46
  Example:
@@ -72,6 +72,18 @@ Built-in converters include:
72
72
  | `:all` | shortcut for `[:null, :boolean, :date_time, :numeric]` |
73
73
 
74
74
 
75
+ Or add your own converters. Example:
76
+
77
+ ``` ruby
78
+ Csv.parse( 'Ruby, 2020-03-01, 100', converters: [->(v) { Time.parse(v) rescue v }] )
79
+ #=> [["Ruby", 2020-03-01 00:00:00 +0200, "100"]]
80
+ ```
81
+
82
+ A custom converter is a method that gets the value passed in
83
+ and if successful returns a non-string type (e.g. integer, float, date, etc.)
84
+ or a string (for further processing with all other converters in the "pipeline" configuration).
85
+
86
+
75
87
 
76
88
  ### What about Enumerable?
77
89
 
@@ -94,7 +106,7 @@ it = csv.to_enum
94
106
  pp it.next
95
107
  # => ["1","2","3"]
96
108
  pp it.next
97
- # => ["5","6","7"]
109
+ # => ["4","5","6"]
98
110
  ```
99
111
 
100
112
 
@@ -150,7 +162,7 @@ end
150
162
 
151
163
  ### What about symbol keys for hashes?
152
164
 
153
- Yes, use can use the header_converters keyword option.
165
+ Yes, you can use the header_converters keyword option.
154
166
  Use `:symbol` for (auto-)converting header (strings) to symbols.
155
167
  Note: the symbol converter will also downcase all letters and
156
168
  remove all non-alphanumeric (e.g. `!?$%`) chars
@@ -169,6 +181,15 @@ records = CsvHash.parse( txt, :converters => :all, :header_converters => :symbol
169
181
  pp records
170
182
  # => [{a: 1, b: 2, c: 3},
171
183
  # {a: true, b: false, c: nil}]
184
+
185
+ # -or-
186
+ options = { :converters => :all,
187
+ :header_converters => :symbol }
188
+
189
+ records = CsvHash.parse( txt, options )
190
+ pp records
191
+ # => [{a: 1, b: 2, c: 3},
192
+ # {a: true, b: false, c: nil}]
172
193
  ```
173
194
 
174
195
  Built-in header converters include:
@@ -180,6 +201,91 @@ Built-in header converters include:
180
201
 
181
202
 
182
203
 
204
+ ### What about (typed) structs?
205
+
206
+ See the [csvrecord library »](https://github.com/csv11/csvrecord)
207
+
208
+ Example from the csvrecord docu:
209
+
210
+ Step 1: Define a (typed) struct for the comma-separated values (csv) records. Example:
211
+
212
+ ```ruby
213
+ require 'csvrecord'
214
+
215
+ Beer = CsvRecord.define do
216
+ field :brewery ## note: default type is :string
217
+ field :city
218
+ field :name
219
+ field :abv, Float ## allows type specified as class (or use :float)
220
+ end
221
+ ```
222
+
223
+ or in "classic" style:
224
+
225
+ ```ruby
226
+ class Beer < CsvRecord::Base
227
+ field :brewery
228
+ field :city
229
+ field :name
230
+ field :abv, Float
231
+ end
232
+ ```
233
+
234
+
235
+ Step 2: Read in the comma-separated values (csv) datafile. Example:
236
+
237
+ ```ruby
238
+ beers = Beer.read( 'beer.csv' )
239
+
240
+ puts "#{beers.size} beers:"
241
+ pp beers
242
+ ```
243
+
244
+ pretty prints (pp):
245
+
246
+ ```
247
+ 6 beers:
248
+ [#<Beer:0x302c760 @values=
249
+ ["Andechser Klosterbrauerei", "Andechs", "Doppelbock Dunkel", 7.0]>,
250
+ #<Beer:0x3026fe8 @values=
251
+ ["Augustiner Br\u00E4u M\u00FCnchen", "M\u00FCnchen", "Edelstoff", 5.6]>,
252
+ #<Beer:0x30257a0 @values=
253
+ ["Bayerische Staatsbrauerei Weihenstephan", "Freising", "Hefe Weissbier", 5.4]>,
254
+ ...
255
+ ]
256
+ ```
257
+
258
+ Or loop over the records. Example:
259
+
260
+ ``` ruby
261
+ Beer.read( 'beer.csv' ).each do |rec|
262
+ puts "#{rec.name} (#{rec.abv}%) by #{rec.brewery}, #{rec.city}"
263
+ end
264
+
265
+ # -or-
266
+
267
+ Beer.foreach( 'beer.csv' ) do |rec|
268
+ puts "#{rec.name} (#{rec.abv}%) by #{rec.brewery}, #{rec.city}"
269
+ end
270
+ ```
271
+
272
+
273
+ printing:
274
+
275
+ ```
276
+ Doppelbock Dunkel (7.0%) by Andechser Klosterbrauerei, Andechs
277
+ Edelstoff (5.6%) by Augustiner Bräu München, München
278
+ Hefe Weissbier (5.4%) by Bayerische Staatsbrauerei Weihenstephan, Freising
279
+ Rauchbier Märzen (5.1%) by Brauerei Spezial, Bamberg
280
+ Münchner Dunkel (5.0%) by Hacker-Pschorr Bräu, München
281
+ Hofbräu Oktoberfestbier (6.3%) by Staatliches Hofbräuhaus München, München
282
+ ```
283
+
284
+
285
+ ### What about tabular data packages with pre-defined types / schemas?
286
+
287
+ See the [csvpack library »](https://github.com/csv11/csvpack)
288
+
183
289
 
184
290
 
185
291
 
@@ -319,7 +425,7 @@ Csv.strict.read( ..., sep: "\t" )
319
425
 
320
426
  Two major design bugs and many many minor.
321
427
 
322
- (1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1248) with some kludges (†) with the claim it's faster.
428
+ (1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1255) with some kludges (†) with the claim it's faster.
323
429
  What?! The right way: CSV needs its own purpose-built parser. There's no other
324
430
  way you can handle all the (edge) cases with double quotes and escaped doubled up
325
431
  double quotes. Period.
@@ -1,138 +1,12 @@
1
- # encoding: utf-8
2
-
3
-
4
- require 'pp'
5
- require 'logger'
6
- require 'forwardable'
7
- require 'stringio'
8
- require 'date' ## use for Date.parse and DateTime.parse
9
-
10
-
11
- ###
12
- # our own code
13
- require 'csvreader/version' # let version always go first
14
- require 'csvreader/buffer'
15
- require 'csvreader/parser_std' # best practices pre-configured out-of-the-box
16
- require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
17
- require 'csvreader/parser_tab'
18
- require 'csvreader/parser'
19
- require 'csvreader/builder'
20
- require 'csvreader/reader'
21
- require 'csvreader/reader_hash'
22
- require 'csvreader/converter'
23
-
24
-
25
-
26
- class CsvReader
27
- class Parser
28
-
29
- ## use/allow different "backends" e.g. ParserStd, ParserStrict, ParserTab, etc.
30
- ## parser must support parse method (with and without block)
31
- ## e.g. records = parse( data )
32
- ## -or-
33
- ## parse( data ) do |record|
34
- ## end
35
-
36
- DEFAULT = ParserStd.new
37
-
38
- RFC4180 = ParserStrict.new
39
- STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
40
- EXCEL = ParserStrict.new ## note: make excel its own instance (so you can change configs without "breaking" rfc4180/strict)
41
-
42
- MYSQL = ParserStrict.new( sep: "\t",
43
- quote: false,
44
- escape: true,
45
- null: "\\N" )
46
-
47
- POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
48
- escape: true,
49
- null: "" )
50
-
51
- POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
52
- quote: false,
53
- escape: true,
54
- null: "\\N" )
55
-
56
- TAB = ParserTab.new
57
-
58
-
59
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
60
- def self.strict() STRICT; end ## alternative alias for STRICT
61
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
62
- def self.excel() EXCEL; end ## alternative alias for EXCEL
63
- def self.mysql() MYSQL; end
64
- def self.postgresql() POSTGRESQL; end
65
- def self.postgres() postgresql; end
66
- def self.postgresql_text() POSTGRESQL_TEXT; end
67
- def self.postgres_text() postgresql_text; end
68
- def self.tab() TAB; end
69
- end # class Parser
70
- end # class CsvReader
71
-
72
-
73
-
74
- class CsvReader
75
- ### pre-define CsvReader (built-in) formats/dialect
76
- DEFAULT = CsvBuilder.new( Parser::DEFAULT )
77
-
78
- STRICT = CsvBuilder.new( Parser::STRICT )
79
- RFC4180 = CsvBuilder.new( Parser::RFC4180 )
80
- EXCEL = CsvBuilder.new( Parser::EXCEL )
81
-
82
- MYSQL = CsvBuilder.new( Parser::MYSQL )
83
- POSTGRES = POSTGRESQL = CsvBuilder.new( Parser::POSTGRESQL )
84
- POSTGRES_TEXT = POSTGRESQL_TEXT = CsvBuilder.new( Parser::POSTGRESQL_TEXT )
85
-
86
- TAB = CsvBuilder.new( Parser::TAB )
87
-
88
-
89
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
90
- def self.strict() STRICT; end ## alternative alias for STRICT
91
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
92
- def self.excel() EXCEL; end ## alternative alias for EXCEL
93
- def self.mysql() MYSQL; end
94
- def self.postgresql() POSTGRESQL; end
95
- def self.postgres() postgresql; end
96
- def self.postgresql_text() POSTGRESQL_TEXT; end
97
- def self.postgres_text() postgresql_text; end
98
- def self.tab() TAB; end
99
- end # class CsvReader
100
-
101
-
102
- class CsvHashReader
103
- ### pre-define CsvReader (built-in) formats/dialect
104
- DEFAULT = CsvHashBuilder.new( CsvReader::Parser::DEFAULT )
105
-
106
- STRICT = CsvHashBuilder.new( CsvReader::Parser::STRICT )
107
- RFC4180 = CsvHashBuilder.new( CsvReader::Parser::RFC4180 )
108
- EXCEL = CsvHashBuilder.new( CsvReader::Parser::EXCEL )
109
-
110
- MYSQL = CsvHashBuilder.new( CsvReader::Parser::MYSQL )
111
- POSTGRES = POSTGRESQL = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL )
112
- POSTGRES_TEXT = POSTGRESQL_TEXT = CsvHashBuilder.new( CsvReader::Parser::POSTGRESQL_TEXT )
113
-
114
- TAB = CsvHashBuilder.new( CsvReader::Parser::TAB )
115
-
116
-
117
- def self.default() DEFAULT; end ## alternative alias for DEFAULT
118
- def self.strict() STRICT; end ## alternative alias for STRICT
119
- def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
120
- def self.excel() EXCEL; end ## alternative alias for EXCEL
121
- def self.mysql() MYSQL; end
122
- def self.postgresql() POSTGRESQL; end
123
- def self.postgres() postgresql; end
124
- def self.postgresql_text() POSTGRESQL_TEXT; end
125
- def self.postgres_text() postgresql_text; end
126
- def self.tab() TAB; end
127
- end # class CsvHashReader
128
-
129
-
130
-
131
-
132
- ## add convenience / shortcut alias
133
- Csv = CsvReader
134
- CsvHash = CsvHashReader
135
-
136
-
137
-
138
- puts CsvReader.banner # say hello
1
+ # encoding: utf-8
2
+
3
+
4
+ ## our own code (without "top-level" shortcuts e.g. "modular version")
5
+ require 'csvreader/base'
6
+
7
+
8
+ ###
9
+ # add convenience top-level shortcuts / aliases
10
+
11
+ Csv = CsvReader
12
+ CsvHash = CsvHashReader
@@ -0,0 +1,144 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ require 'pp'
5
+ require 'logger'
6
+ require 'forwardable'
7
+ require 'stringio'
8
+ require 'date' ## use for Date.parse and DateTime.parse
9
+
10
+
11
+ ###
12
+ # our own code
13
+ require 'csvreader/version' # let version always go first
14
+ require 'csvreader/buffer'
15
+ require 'csvreader/parser_std' # best practices pre-configured out-of-the-box
16
+ require 'csvreader/parser_strict' # flexible (strict - no leading/trailing space triming, blanks, etc.), configure for different formats/dialects
17
+ require 'csvreader/parser_tab'
18
+ require 'csvreader/parser'
19
+ require 'csvreader/converter'
20
+ require 'csvreader/reader'
21
+ require 'csvreader/reader_hash'
22
+ require 'csvreader/builder'
23
+
24
+
25
+
26
+ class CsvReader
27
+ class Parser
28
+
29
+ ## use/allow different "backends" e.g. ParserStd, ParserStrict, ParserTab, etc.
30
+ ## parser must support parse method (with and without block)
31
+ ## e.g. records = parse( data )
32
+ ## -or-
33
+ ## parse( data ) do |record|
34
+ ## end
35
+
36
+ DEFAULT = ParserStd.new
37
+
38
+ RFC4180 = ParserStrict.new
39
+ STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
40
+ EXCEL = ParserStrict.new ## note: make excel its own instance (so you can change configs without "breaking" rfc4180/strict)
41
+
42
+ MYSQL = ParserStrict.new( sep: "\t",
43
+ quote: false,
44
+ escape: true,
45
+ null: "\\N" )
46
+
47
+ POSTGRES = POSTGRESQL = ParserStrict.new( doublequote: false,
48
+ escape: true,
49
+ null: "" )
50
+
51
+ POSTGRES_TEXT = POSTGRESQL_TEXT = ParserStrict.new( sep: "\t",
52
+ quote: false,
53
+ escape: true,
54
+ null: "\\N" )
55
+
56
+ NUMERIC = ParserStrict.new( numeric: true,
57
+ nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
58
+ null: "" )
59
+
60
+
61
+ TAB = ParserTab.new
62
+
63
+
64
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
65
+ def self.strict() STRICT; end ## alternative alias for STRICT
66
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
67
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
68
+ def self.mysql() MYSQL; end
69
+ def self.postgresql() POSTGRESQL; end
70
+ def self.postgres() postgresql; end
71
+ def self.postgresql_text() POSTGRESQL_TEXT; end
72
+ def self.postgres_text() postgresql_text; end
73
+ def self.numeric() NUMERIC; end
74
+ def self.tab() TAB; end
75
+ end # class Parser
76
+ end # class CsvReader
77
+
78
+
79
+
80
+ class CsvReader
81
+ ### pre-define CsvReader (built-in) formats/dialect
82
+ DEFAULT = Builder.new( Parser::DEFAULT )
83
+
84
+ STRICT = Builder.new( Parser::STRICT )
85
+ RFC4180 = Builder.new( Parser::RFC4180 )
86
+ EXCEL = Builder.new( Parser::EXCEL )
87
+
88
+ MYSQL = Builder.new( Parser::MYSQL )
89
+ POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
90
+ POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
91
+
92
+ NUMERIC = Builder.new( Parser::NUMERIC )
93
+
94
+ TAB = Builder.new( Parser::TAB )
95
+
96
+
97
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
98
+ def self.strict() STRICT; end ## alternative alias for STRICT
99
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
100
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
101
+ def self.mysql() MYSQL; end
102
+ def self.postgresql() POSTGRESQL; end
103
+ def self.postgres() postgresql; end
104
+ def self.postgresql_text() POSTGRESQL_TEXT; end
105
+ def self.postgres_text() postgresql_text; end
106
+ def self.numeric() NUMERIC; end
107
+ def self.tab() TAB; end
108
+ end # class CsvReader
109
+
110
+
111
+ class CsvHashReader
112
+ ### pre-define CsvReader (built-in) formats/dialect
113
+ DEFAULT = Builder.new( Parser::DEFAULT )
114
+
115
+ STRICT = Builder.new( Parser::STRICT )
116
+ RFC4180 = Builder.new( Parser::RFC4180 )
117
+ EXCEL = Builder.new( Parser::EXCEL )
118
+
119
+ MYSQL = Builder.new( Parser::MYSQL )
120
+ POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
121
+ POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
122
+
123
+ NUMERIC = Builder.new( Parser::NUMERIC )
124
+
125
+ TAB = Builder.new( Parser::TAB )
126
+
127
+
128
+ def self.default() DEFAULT; end ## alternative alias for DEFAULT
129
+ def self.strict() STRICT; end ## alternative alias for STRICT
130
+ def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
131
+ def self.excel() EXCEL; end ## alternative alias for EXCEL
132
+ def self.mysql() MYSQL; end
133
+ def self.postgresql() POSTGRESQL; end
134
+ def self.postgres() postgresql; end
135
+ def self.postgresql_text() POSTGRESQL_TEXT; end
136
+ def self.postgres_text() postgresql_text; end
137
+ def self.numeric() NUMERIC; end
138
+ def self.tab() TAB; end
139
+ end # class CsvHashReader
140
+
141
+
142
+
143
+
144
+ puts CsvReader.banner # say hello
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
-
4
- class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
3
+ class CsvReader
4
+ class Builder ## rename to CsvReaderBuilder - why? why not?
5
5
 
6
6
 
7
7
  def initialize( parser )
@@ -53,12 +53,13 @@ class CsvBuilder ## rename to CsvReaderBuilder - why? why not?
53
53
  sep: sep, converters: converters,
54
54
  parser: @parser, &block )
55
55
  end
56
- end # class CsvBuilder
57
-
56
+ end # class Builder
57
+ end # class CsvReader
58
58
 
59
59
 
60
60
 
61
- class CsvHashBuilder ## rename to CsvHashReaderBuilder - why? why not?
61
+ class CsvHashReader
62
+ class Builder ## rename to CsvHashReaderBuilder - why? why not?
62
63
  def initialize( parser )
63
64
  @parser = parser
64
65
  end
@@ -117,4 +118,5 @@ class CsvHashBuilder ## rename to CsvHashReaderBuilder - why? why not?
117
118
  header_converters: header_converters,
118
119
  parser: @parser, &block )
119
120
  end
120
- end # class CsvHashBuilder
121
+ end # class Builder
122
+ end # class CsvHashReader
@@ -2,6 +2,12 @@
2
2
 
3
3
  class CsvReader
4
4
 
5
+ class Parser
6
+ ## "forward" reference,
7
+ ## see base.rb for more
8
+ end
9
+
10
+
5
11
  ####################################
6
12
  # define errors / exceptions
7
13
  # for all parsers for (re)use
@@ -36,7 +36,9 @@ def initialize( sep: ',',
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
38
  null: nil, ## note: set to nil for no null vales / not availabe (na)
39
- comment: false ## note: comment char e.g. # or false/nil
39
+ comment: false, ## note: comment char e.g. # or false/nil
40
+ numeric: false, ## (auto-)convert all non-quoted values to float
41
+ nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
40
42
  )
41
43
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
42
44
  @config[:sep] = sep
@@ -45,8 +47,11 @@ def initialize( sep: ',',
45
47
  @config[:escape] = escape
46
48
  @config[:null] = null
47
49
  @config[:comment] = comment
50
+ @config[:numeric] = numeric
51
+ @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
48
52
  end
49
53
 
54
+
50
55
  #########################################
51
56
  ## config convenience helpers
52
57
  ## e.g. use like Csv.mysql.sep = ',' etc. instead of
@@ -57,6 +62,8 @@ def doublequote=( value ) @config[:doublequote]=value; end
57
62
  def escape=( value ) @config[:escape]=value; end
58
63
  def null=( value ) @config[:null]=value; end
59
64
  def comment=( value ) @config[:comment]=value; end
65
+ def numeric=( value ) @config[:numeric]=value; end
66
+ def nan=( value ) @config[:nan]=value; end
60
67
 
61
68
 
62
69
 
@@ -149,14 +156,22 @@ end
149
156
  def parse_field( input, sep: )
150
157
  value = ""
151
158
 
152
- quote = config[:quote]
153
- escape = config[:escape]
159
+ quote = config[:quote]
160
+ escape = config[:escape]
161
+ numeric = config[:numeric]
154
162
 
155
163
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
156
164
 
157
165
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
158
- value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
159
- ## return value; do nothing
166
+ ## note: allows null = '' that is turn unquoted empty strings into null/nil
167
+ ## or if using numeric into NotANumber (NaN)
168
+ if is_null?( value )
169
+ value = nil
170
+ elsif numeric & is_nan?( value )
171
+ value = Float::NAN
172
+ else
173
+ # do nothing - keep value as is :-) e.g. "".
174
+ end
160
175
  elsif quote && input.peek == quote
161
176
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
162
177
  value << parse_quote( input, sep: sep )
@@ -174,7 +189,24 @@ def parse_field( input, sep: )
174
189
  end
175
190
  end
176
191
 
177
- value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
192
+
193
+ if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
194
+ value = nil
195
+ elsif numeric
196
+ if is_nan?( value )
197
+ value = Float::NAN
198
+ else
199
+ ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
200
+ if numeric.is_a?( Proc )
201
+ value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
202
+ else
203
+ value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
204
+ end
205
+ end
206
+ else
207
+ # do nothing - keep value as is :-).
208
+ end
209
+
178
210
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
179
211
  end
180
212
 
@@ -182,6 +214,7 @@ def parse_field( input, sep: )
182
214
  end
183
215
 
184
216
 
217
+
185
218
  def parse_record( input, sep: )
186
219
  values = []
187
220
 
@@ -263,6 +296,24 @@ def parse_lines( input, sep:, &block )
263
296
  end # method parse_lines
264
297
 
265
298
 
299
+ def convert_to_float( value ) Float( value ) rescue value; end
300
+
301
+ def is_nan?( value )
302
+ nan = @config[:nan]
303
+ if nan.nil?
304
+ false ## nothing set; return always false (not NaN)
305
+ elsif nan.is_a?( Proc )
306
+ nan.call( value )
307
+ elsif nan.is_a?( Array )
308
+ nan.include?( value )
309
+ elsif nan.is_a?( String )
310
+ value == nan
311
+ else ## unknown config style / setting
312
+ ## todo: issue a warning or error - why? why not?
313
+ false ## nothing set; return always false (not nan)
314
+ end
315
+ end
316
+
266
317
  def is_null?( value )
267
318
  null = @config[:null]
268
319
  if null.nil?
@@ -2,9 +2,6 @@
2
2
 
3
3
  class CsvReader
4
4
 
5
- #######
6
- ## csv reader
7
-
8
5
  def self.open( path, mode=nil,
9
6
  sep: nil,
10
7
  converters: nil,
@@ -3,6 +3,12 @@
3
3
  class CsvHashReader
4
4
 
5
5
 
6
+ ## add convenience shortcuts / aliases for CsvReader support classes
7
+ Parser = CsvReader::Parser
8
+ Converter = CsvReader::Converter
9
+
10
+
11
+
6
12
  def self.open( path, mode=nil,
7
13
  headers: nil,
8
14
  sep: nil,
@@ -113,10 +119,10 @@ def initialize( data, headers: nil, sep: nil,
113
119
 
114
120
  @sep = sep
115
121
 
116
- @converters = CsvReader::Converter.create_converters( converters )
117
- @header_converters = CsvReader::Converter.create_header_converters( header_converters )
122
+ @converters = Converter.create_converters( converters )
123
+ @header_converters = Converter.create_header_converters( header_converters )
118
124
 
119
- @parser = parser.nil? ? CsvReader::Parser::DEFAULT : parser
125
+ @parser = parser.nil? ? Parser::DEFAULT : parser
120
126
  end
121
127
 
122
128
 
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 0
8
+ PATCH = 1
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -7,6 +7,8 @@ require 'minitest/autorun'
7
7
 
8
8
  ## our own code
9
9
  require 'csvreader'
10
+ ## require 'csvreader/base' ## try modular version (that is, without Csv,CsvHash "top-level" shortcuts)
11
+
10
12
 
11
13
  ## add test_data_dir helper
12
14
  class CsvReader
@@ -0,0 +1,38 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_numeric.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestParserNumeric < MiniTest::Test
12
+
13
+ def parser
14
+ CsvReader::Parser::NUMERIC
15
+ end
16
+
17
+
18
+ def test_parser_numeric
19
+ pp CsvReader::Parser::NUMERIC
20
+ pp CsvReader::Parser.numeric
21
+ assert true
22
+ end
23
+
24
+ def test_parse
25
+ assert_equal [[1.0,2.0,3.0],
26
+ [4.0,5.0,6.0]], parser.parse( "1,2,3\n4,5,6" )
27
+ assert_equal [[1.0,2.0,3.0],
28
+ ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n"4","5","6"} )
29
+ assert_equal [["a","b","c"]], parser.parse( %Q{"a","b","c"} )
30
+ end
31
+
32
+
33
+ def test_empty
34
+ assert_equal [[nil,nil,nil],
35
+ ["","",""]], parser.parse( %Q{,,\n"","",""} )
36
+ end
37
+
38
+ end # class TestParserNumeric
@@ -84,9 +84,11 @@ def test_enum
84
84
  assert_equal ["a","b","c"], enum.next
85
85
 
86
86
  ## test Csv == CsvReader class alias
87
- csv = Csv.new( "a,b,c" )
88
- enum = csv.to_enum
89
- assert_equal ["a","b","c"], enum.next
87
+ if defined?( Csv )
88
+ csv = Csv.new( "a,b,c" )
89
+ enum = csv.to_enum
90
+ assert_equal ["a","b","c"], enum.next
91
+ end
90
92
  end
91
93
 
92
94
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-09 00:00:00.000000000 Z
11
+ date: 2018-10-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -55,6 +55,7 @@ files:
55
55
  - README.md
56
56
  - Rakefile
57
57
  - lib/csvreader.rb
58
+ - lib/csvreader/base.rb
58
59
  - lib/csvreader/buffer.rb
59
60
  - lib/csvreader/builder.rb
60
61
  - lib/csvreader/converter.rb
@@ -77,6 +78,7 @@ files:
77
78
  - test/test_parser_formats.rb
78
79
  - test/test_parser_java.rb
79
80
  - test/test_parser_null.rb
81
+ - test/test_parser_numeric.rb
80
82
  - test/test_parser_strict.rb
81
83
  - test/test_parser_tab.rb
82
84
  - test/test_reader.rb