csvreader 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47c630dbfe75b03e4f2d03710cca0f4b3c66ea84
4
- data.tar.gz: 7b604c9b9144190b1b8b4a4dcd848ea5b7f88aa4
3
+ metadata.gz: 8d41f765a3d0091d6f4fe392f014e386ef35a166
4
+ data.tar.gz: 268048267f364829801e12f7c41331153ce8c138
5
5
  SHA512:
6
- metadata.gz: 2dad1ae27b4273b8e5a22cf6eae60f141eca381229ed03bfbe6f403e99aae07a16aa4fc84e1f22a953cd9db5da1b9fa2fb6266666fdc6a756872e8bd4ec8dfb9
7
- data.tar.gz: 38b0002ea3bdfff0b7ce994064d4fb4993e75a3b1225128a50ad9e18180c12cdae9d2a40b6f694e70ebccf4d65005fd5a2145450c20a565de8d3f5af7b398a58
6
+ metadata.gz: d891d31e0447639dfc1d70895c81eba369028b0a32c0daf7aba605d6a91f8e1fe1304eda3d2a2efcff018a1ceb3e8f9f964301269b150771550e5e372cb45c48
7
+ data.tar.gz: b8d51ae12ce5a8dbba0772ee6222338c9e6a7091585b2bf6480c88fd27f4738c5e4472f329568c92ff6c31397a8c297b157fc0b1c1e916e9497213efb2ef7245
data/Manifest.txt CHANGED
@@ -9,6 +9,7 @@ lib/csvreader/buffer.rb
9
9
  lib/csvreader/builder.rb
10
10
  lib/csvreader/converter.rb
11
11
  lib/csvreader/parser.rb
12
+ lib/csvreader/parser_json.rb
12
13
  lib/csvreader/parser_std.rb
13
14
  lib/csvreader/parser_strict.rb
14
15
  lib/csvreader/parser_tab.rb
@@ -34,3 +35,4 @@ test/test_reader.rb
34
35
  test/test_reader_converters.rb
35
36
  test/test_reader_hash.rb
36
37
  test/test_reader_hash_converters.rb
38
+ test/test_samples.rb
@@ -34,6 +34,10 @@ class Parser
34
34
  ## end
35
35
 
36
36
  DEFAULT = ParserStd.new
37
+ NUMERIC = ParserStd.new( numeric: true,
38
+ nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
39
+ null: "" )
40
+
37
41
 
38
42
  RFC4180 = ParserStrict.new
39
43
  STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
@@ -53,15 +57,14 @@ class Parser
53
57
  escape: true,
54
58
  null: "\\N" )
55
59
 
56
- NUMERIC = ParserStrict.new( numeric: true,
57
- nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
58
- null: "" )
59
-
60
60
 
61
61
  TAB = ParserTab.new
62
62
 
63
63
 
64
64
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
65
+ def self.numeric() NUMERIC; end
66
+ def self.num() numeric; end
67
+ def self.n() numeric; end
65
68
  def self.strict() STRICT; end ## alternative alias for STRICT
66
69
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
67
70
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -70,7 +73,6 @@ class Parser
70
73
  def self.postgres() postgresql; end
71
74
  def self.postgresql_text() POSTGRESQL_TEXT; end
72
75
  def self.postgres_text() postgresql_text; end
73
- def self.numeric() NUMERIC; end
74
76
  def self.tab() TAB; end
75
77
  end # class Parser
76
78
  end # class CsvReader
@@ -80,6 +82,7 @@ end # class CsvReader
80
82
  class CsvReader
81
83
  ### pre-define CsvReader (built-in) formats/dialect
82
84
  DEFAULT = Builder.new( Parser::DEFAULT )
85
+ NUMERIC = Builder.new( Parser::NUMERIC )
83
86
 
84
87
  STRICT = Builder.new( Parser::STRICT )
85
88
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -89,12 +92,14 @@ class CsvReader
89
92
  POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
90
93
  POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
91
94
 
92
- NUMERIC = Builder.new( Parser::NUMERIC )
93
95
 
94
96
  TAB = Builder.new( Parser::TAB )
95
97
 
96
98
 
97
99
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
100
+ def self.numeric() NUMERIC; end
101
+ def self.num() numeric; end
102
+ def self.n() numeric; end
98
103
  def self.strict() STRICT; end ## alternative alias for STRICT
99
104
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
100
105
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -103,14 +108,15 @@ class CsvReader
103
108
  def self.postgres() postgresql; end
104
109
  def self.postgresql_text() POSTGRESQL_TEXT; end
105
110
  def self.postgres_text() postgresql_text; end
106
- def self.numeric() NUMERIC; end
107
111
  def self.tab() TAB; end
108
112
  end # class CsvReader
109
113
 
110
114
 
115
+
111
116
  class CsvHashReader
112
117
  ### pre-define CsvReader (built-in) formats/dialect
113
118
  DEFAULT = Builder.new( Parser::DEFAULT )
119
+ NUMERIC = Builder.new( Parser::NUMERIC )
114
120
 
115
121
  STRICT = Builder.new( Parser::STRICT )
116
122
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -120,12 +126,14 @@ class CsvHashReader
120
126
  POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
121
127
  POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
122
128
 
123
- NUMERIC = Builder.new( Parser::NUMERIC )
124
129
 
125
130
  TAB = Builder.new( Parser::TAB )
126
131
 
127
132
 
128
133
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
134
+ def self.numeric() NUMERIC; end
135
+ def self.num() numeric; end
136
+ def self.n() numeric; end
129
137
  def self.strict() STRICT; end ## alternative alias for STRICT
130
138
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
131
139
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -134,7 +142,6 @@ class CsvHashReader
134
142
  def self.postgres() postgresql; end
135
143
  def self.postgresql_text() POSTGRESQL_TEXT; end
136
144
  def self.postgres_text() postgresql_text; end
137
- def self.numeric() NUMERIC; end
138
145
  def self.tab() TAB; end
139
146
  end # class CsvHashReader
140
147
 
@@ -0,0 +1,2 @@
1
+ ##
2
+ ## todo: add parser for new CSV <3 JSON format
@@ -44,21 +44,29 @@ attr_reader :config ## todo/fix: change config to proper dialect class/struct
44
44
  ## null values - include NA - why? why not?
45
45
  ## make null values case sensitive or add an option for case sensitive
46
46
  ## or better allow a proc as option for checking too!!!
47
- def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
47
+ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
48
+ numeric: false, ## (auto-)convert all non-quoted values to float
49
+ nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
48
50
  )
49
51
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
50
52
 
51
53
  ## note: null values must get handled by parser
52
54
  ## only get checked for unquoted strings (and NOT for quoted strings)
53
55
  ## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
54
- @config[:null] = null ## null values
56
+ @config[:null] = null ## null values
57
+ @config[:numeric] = numeric
58
+ @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
55
59
  end
56
60
 
61
+
62
+
57
63
  #########################################
58
64
  ## config convenience helpers
59
65
  ## e.g. use like Csv.defaultl.null = '\N' etc. instead of
60
66
  ## Csv.default.config[:null] = '\N'
61
67
  def null=( value ) @config[:null]=value; end
68
+ def numeric=( value ) @config[:numeric]=value; end
69
+ def nan=( value ) @config[:nan]=value; end
62
70
 
63
71
 
64
72
 
@@ -143,14 +151,25 @@ end
143
151
 
144
152
 
145
153
  def parse_field( input )
154
+ value = ""
155
+
156
+ numeric = config[:numeric]
157
+
146
158
  logger.debug "parse field" if logger.debug?
147
159
 
148
- value = ""
149
160
  skip_spaces( input ) ## strip leading spaces
150
161
 
162
+
151
163
  if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
152
- value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
153
- ## return value; do nothing
164
+ ## note: allows null = '' that is turn unquoted empty strings into null/nil
165
+ ## or if using numeric into NotANumber (NaN)
166
+ if is_null?( value )
167
+ value = nil
168
+ elsif numeric && is_nan?( value ) ## todo: check - how to handle numeric? return nil, NaN, or "" ???
169
+ value = Float::NAN
170
+ else
171
+ # do nothing - keep value as is :-) e.g. "".
172
+ end
154
173
  elsif input.peek == DOUBLE_QUOTE
155
174
  logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
156
175
  value << parse_doublequote( input )
@@ -174,7 +193,23 @@ def parse_field( input )
174
193
  ## note: only strip **trailing** spaces (space and tab only)
175
194
  ## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
176
195
  value = value.sub( /[ \t]+$/, '' )
177
- value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
196
+
197
+ if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
198
+ value = nil
199
+ elsif numeric
200
+ if is_nan?( value )
201
+ value = Float::NAN
202
+ else
203
+ ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
204
+ if numeric.is_a?( Proc )
205
+ value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
206
+ else
207
+ value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
208
+ end
209
+ end
210
+ else
211
+ # do nothing - keep value as is :-).
212
+ end
178
213
 
179
214
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
180
215
  end
@@ -273,6 +308,25 @@ end # method parse_lines
273
308
 
274
309
 
275
310
 
311
+ def convert_to_float( value ) Float( value ) rescue value; end
312
+
313
+ def is_nan?( value )
314
+ nan = @config[:nan]
315
+ if nan.nil?
316
+ false ## nothing set; return always false (not NaN)
317
+ elsif nan.is_a?( Proc )
318
+ nan.call( value )
319
+ elsif nan.is_a?( Array )
320
+ nan.include?( value )
321
+ elsif nan.is_a?( String )
322
+ value == nan
323
+ else ## unknown config style / setting
324
+ ## todo: issue a warning or error - why? why not?
325
+ false ## nothing set; return always false (not nan)
326
+ end
327
+ end
328
+
329
+
276
330
  def is_null?( value )
277
331
  null = @config[:null]
278
332
  if null.nil?
@@ -36,9 +36,7 @@ def initialize( sep: ',',
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
38
  null: nil, ## note: set to nil for no null vales / not availabe (na)
39
- comment: false, ## note: comment char e.g. # or false/nil
40
- numeric: false, ## (auto-)convert all non-quoted values to float
41
- nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
39
+ comment: false ## note: comment char e.g. # or false/nil
42
40
  )
43
41
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
44
42
  @config[:sep] = sep
@@ -47,8 +45,6 @@ def initialize( sep: ',',
47
45
  @config[:escape] = escape
48
46
  @config[:null] = null
49
47
  @config[:comment] = comment
50
- @config[:numeric] = numeric
51
- @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
52
48
  end
53
49
 
54
50
 
@@ -62,8 +58,6 @@ def doublequote=( value ) @config[:doublequote]=value; end
62
58
  def escape=( value ) @config[:escape]=value; end
63
59
  def null=( value ) @config[:null]=value; end
64
60
  def comment=( value ) @config[:comment]=value; end
65
- def numeric=( value ) @config[:numeric]=value; end
66
- def nan=( value ) @config[:nan]=value; end
67
61
 
68
62
 
69
63
 
@@ -158,20 +152,14 @@ def parse_field( input, sep: )
158
152
 
159
153
  quote = config[:quote]
160
154
  escape = config[:escape]
161
- numeric = config[:numeric]
162
155
 
163
156
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
164
157
 
165
158
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
166
159
  ## note: allows null = '' that is turn unquoted empty strings into null/nil
167
160
  ## or if using numeric into NotANumber (NaN)
168
- if is_null?( value )
169
- value = nil
170
- elsif numeric & is_nan?( value )
171
- value = Float::NAN
172
- else
173
- # do nothing - keep value as is :-) e.g. "".
174
- end
161
+ value = nil if is_null?( value )
162
+ ## do nothing - keep value as is :-) e.g. "".
175
163
  elsif quote && input.peek == quote
176
164
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
177
165
  value << parse_quote( input, sep: sep )
@@ -190,22 +178,8 @@ def parse_field( input, sep: )
190
178
  end
191
179
 
192
180
 
193
- if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
194
- value = nil
195
- elsif numeric
196
- if is_nan?( value )
197
- value = Float::NAN
198
- else
199
- ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
200
- if numeric.is_a?( Proc )
201
- value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
202
- else
203
- value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
204
- end
205
- end
206
- else
207
- # do nothing - keep value as is :-).
208
- end
181
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
182
+ # do nothing - keep value as is :-).
209
183
 
210
184
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
211
185
  end
@@ -296,24 +270,6 @@ def parse_lines( input, sep:, &block )
296
270
  end # method parse_lines
297
271
 
298
272
 
299
- def convert_to_float( value ) Float( value ) rescue value; end
300
-
301
- def is_nan?( value )
302
- nan = @config[:nan]
303
- if nan.nil?
304
- false ## nothing set; return always false (not NaN)
305
- elsif nan.is_a?( Proc )
306
- nan.call( value )
307
- elsif nan.is_a?( Array )
308
- nan.include?( value )
309
- elsif nan.is_a?( String )
310
- value == nan
311
- else ## unknown config style / setting
312
- ## todo: issue a warning or error - why? why not?
313
- false ## nothing set; return always false (not nan)
314
- end
315
- end
316
-
317
273
  def is_null?( value )
318
274
  null = @config[:null]
319
275
  if null.nil?
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 1
8
+ PATCH = 2
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -26,6 +26,8 @@ def test_parse
26
26
  [4.0,5.0,6.0]], parser.parse( "1,2,3\n4,5,6" )
27
27
  assert_equal [[1.0,2.0,3.0],
28
28
  ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n"4","5","6"} )
29
+ assert_equal [[1.0,2.0,3.0],
30
+ ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n "4", "5" ,"6" } )
29
31
  assert_equal [["a","b","c"]], parser.parse( %Q{"a","b","c"} )
30
32
  end
31
33
 
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_samples.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestSamples < MiniTest::Test
11
+
12
+
13
+ def test_cities11
14
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/cities11.csv" )
15
+ pp records
16
+
17
+ assert_equal [["Los Angeles", "34°03'N", "118°15'W"],
18
+ ["New York City", %Q{40°42'46"N}, %Q{74°00'21"W}],
19
+ ["Paris", %Q{48°51'24"N}, %Q{2°21'03"E}]], records
20
+ end
21
+
22
+
23
+ def test_cars11
24
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/cars11.csv" )
25
+ pp records
26
+
27
+ assert_equal [["Year", "Make", "Model", "Description", "Price"],
28
+ ["1997", "Ford", "E350", "ac, abs, moon", "3000.00"],
29
+ ["1999", "Chevy", %Q{Venture "Extended Edition"}, "", "4900.00"],
30
+ ["1999", "Chevy", %Q{Venture "Extended Edition, Very Large"}, "", "5000.00"],
31
+ ["1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"]], records
32
+ end
33
+
34
+
35
+ def test_customers11
36
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/customers11.csv" )
37
+ pp records
38
+
39
+ assert_equal [["Name", "Times arrived", "Total $ spent", "Food feedback"],
40
+ ["Dan", "34", "2548", "Lovin it!"],
41
+ ["Maria", "55", "5054", "Good, delicious food"],
42
+ ["Carlos", "22", "4352", %Q{I am "pleased", but could be better}],
43
+ ["Stephany", "34", "6542", "I want bigger steaks!!!!!"],
44
+ ["James", "1", "43", "Not bad"],
45
+ ["Robin", "1", "56", "Fish is tasty"],
46
+ ["Anna", "1", "79", "Good, better, the best!"]], records
47
+ end
48
+
49
+ def test_shakespeare11
50
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/shakespeare.csv" )
51
+ pp records
52
+
53
+ assert_equal [["Quote", "Play", "Cite"],
54
+ ["Sweet are the uses of adversity", "As You Like It", "Act 2, scene 1, 12"],
55
+ ["All the world's a stage", "As You Like It", "Act 2, scene 7, 139"],
56
+ ["We few, we happy few", "Henry V", ""],
57
+ [%Q{"Seems," madam! Nay it is; I know not "seems."}, "Hamlet", "(1.ii.76)"],
58
+ ["To be, or not to be", "Hamlet", "Act 3, scene 1, 55"],
59
+ ["What's in a name? That which we call a rose by any other name would smell as sweet.", "Romeo and Juliet", "(II, ii, 1-2)"],
60
+ ["O Romeo, Romeo, wherefore art thou Romeo?", "Romeo and Juliet", "Act 2, scene 2, 33"],
61
+ ["Tomorrow, and tomorrow, and tomorrow", "Macbeth", "Act 5, scene 5, 19"]], records
62
+ end
63
+
64
+ end # class TestSamples
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2018-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -60,6 +60,7 @@ files:
60
60
  - lib/csvreader/builder.rb
61
61
  - lib/csvreader/converter.rb
62
62
  - lib/csvreader/parser.rb
63
+ - lib/csvreader/parser_json.rb
63
64
  - lib/csvreader/parser_std.rb
64
65
  - lib/csvreader/parser_strict.rb
65
66
  - lib/csvreader/parser_tab.rb
@@ -85,6 +86,7 @@ files:
85
86
  - test/test_reader_converters.rb
86
87
  - test/test_reader_hash.rb
87
88
  - test/test_reader_hash_converters.rb
89
+ - test/test_samples.rb
88
90
  homepage: https://github.com/csv11/csvreader
89
91
  licenses:
90
92
  - Public Domain