csvreader 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 47c630dbfe75b03e4f2d03710cca0f4b3c66ea84
4
- data.tar.gz: 7b604c9b9144190b1b8b4a4dcd848ea5b7f88aa4
3
+ metadata.gz: 8d41f765a3d0091d6f4fe392f014e386ef35a166
4
+ data.tar.gz: 268048267f364829801e12f7c41331153ce8c138
5
5
  SHA512:
6
- metadata.gz: 2dad1ae27b4273b8e5a22cf6eae60f141eca381229ed03bfbe6f403e99aae07a16aa4fc84e1f22a953cd9db5da1b9fa2fb6266666fdc6a756872e8bd4ec8dfb9
7
- data.tar.gz: 38b0002ea3bdfff0b7ce994064d4fb4993e75a3b1225128a50ad9e18180c12cdae9d2a40b6f694e70ebccf4d65005fd5a2145450c20a565de8d3f5af7b398a58
6
+ metadata.gz: d891d31e0447639dfc1d70895c81eba369028b0a32c0daf7aba605d6a91f8e1fe1304eda3d2a2efcff018a1ceb3e8f9f964301269b150771550e5e372cb45c48
7
+ data.tar.gz: b8d51ae12ce5a8dbba0772ee6222338c9e6a7091585b2bf6480c88fd27f4738c5e4472f329568c92ff6c31397a8c297b157fc0b1c1e916e9497213efb2ef7245
data/Manifest.txt CHANGED
@@ -9,6 +9,7 @@ lib/csvreader/buffer.rb
9
9
  lib/csvreader/builder.rb
10
10
  lib/csvreader/converter.rb
11
11
  lib/csvreader/parser.rb
12
+ lib/csvreader/parser_json.rb
12
13
  lib/csvreader/parser_std.rb
13
14
  lib/csvreader/parser_strict.rb
14
15
  lib/csvreader/parser_tab.rb
@@ -34,3 +35,4 @@ test/test_reader.rb
34
35
  test/test_reader_converters.rb
35
36
  test/test_reader_hash.rb
36
37
  test/test_reader_hash_converters.rb
38
+ test/test_samples.rb
@@ -34,6 +34,10 @@ class Parser
34
34
  ## end
35
35
 
36
36
  DEFAULT = ParserStd.new
37
+ NUMERIC = ParserStd.new( numeric: true,
38
+ nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
39
+ null: "" )
40
+
37
41
 
38
42
  RFC4180 = ParserStrict.new
39
43
  STRICT = ParserStrict.new ## note: make strict its own instance (so you can change config without "breaking" rfc4180)
@@ -53,15 +57,14 @@ class Parser
53
57
  escape: true,
54
58
  null: "\\N" )
55
59
 
56
- NUMERIC = ParserStrict.new( numeric: true,
57
- nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
58
- null: "" )
59
-
60
60
 
61
61
  TAB = ParserTab.new
62
62
 
63
63
 
64
64
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
65
+ def self.numeric() NUMERIC; end
66
+ def self.num() numeric; end
67
+ def self.n() numeric; end
65
68
  def self.strict() STRICT; end ## alternative alias for STRICT
66
69
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
67
70
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -70,7 +73,6 @@ class Parser
70
73
  def self.postgres() postgresql; end
71
74
  def self.postgresql_text() POSTGRESQL_TEXT; end
72
75
  def self.postgres_text() postgresql_text; end
73
- def self.numeric() NUMERIC; end
74
76
  def self.tab() TAB; end
75
77
  end # class Parser
76
78
  end # class CsvReader
@@ -80,6 +82,7 @@ end # class CsvReader
80
82
  class CsvReader
81
83
  ### pre-define CsvReader (built-in) formats/dialect
82
84
  DEFAULT = Builder.new( Parser::DEFAULT )
85
+ NUMERIC = Builder.new( Parser::NUMERIC )
83
86
 
84
87
  STRICT = Builder.new( Parser::STRICT )
85
88
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -89,12 +92,14 @@ class CsvReader
89
92
  POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
90
93
  POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
91
94
 
92
- NUMERIC = Builder.new( Parser::NUMERIC )
93
95
 
94
96
  TAB = Builder.new( Parser::TAB )
95
97
 
96
98
 
97
99
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
100
+ def self.numeric() NUMERIC; end
101
+ def self.num() numeric; end
102
+ def self.n() numeric; end
98
103
  def self.strict() STRICT; end ## alternative alias for STRICT
99
104
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
100
105
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -103,14 +108,15 @@ class CsvReader
103
108
  def self.postgres() postgresql; end
104
109
  def self.postgresql_text() POSTGRESQL_TEXT; end
105
110
  def self.postgres_text() postgresql_text; end
106
- def self.numeric() NUMERIC; end
107
111
  def self.tab() TAB; end
108
112
  end # class CsvReader
109
113
 
110
114
 
115
+
111
116
  class CsvHashReader
112
117
  ### pre-define CsvReader (built-in) formats/dialect
113
118
  DEFAULT = Builder.new( Parser::DEFAULT )
119
+ NUMERIC = Builder.new( Parser::NUMERIC )
114
120
 
115
121
  STRICT = Builder.new( Parser::STRICT )
116
122
  RFC4180 = Builder.new( Parser::RFC4180 )
@@ -120,12 +126,14 @@ class CsvHashReader
120
126
  POSTGRES = POSTGRESQL = Builder.new( Parser::POSTGRESQL )
121
127
  POSTGRES_TEXT = POSTGRESQL_TEXT = Builder.new( Parser::POSTGRESQL_TEXT )
122
128
 
123
- NUMERIC = Builder.new( Parser::NUMERIC )
124
129
 
125
130
  TAB = Builder.new( Parser::TAB )
126
131
 
127
132
 
128
133
  def self.default() DEFAULT; end ## alternative alias for DEFAULT
134
+ def self.numeric() NUMERIC; end
135
+ def self.num() numeric; end
136
+ def self.n() numeric; end
129
137
  def self.strict() STRICT; end ## alternative alias for STRICT
130
138
  def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
131
139
  def self.excel() EXCEL; end ## alternative alias for EXCEL
@@ -134,7 +142,6 @@ class CsvHashReader
134
142
  def self.postgres() postgresql; end
135
143
  def self.postgresql_text() POSTGRESQL_TEXT; end
136
144
  def self.postgres_text() postgresql_text; end
137
- def self.numeric() NUMERIC; end
138
145
  def self.tab() TAB; end
139
146
  end # class CsvHashReader
140
147
 
@@ -0,0 +1,2 @@
1
+ ##
2
+ ## todo: add parser for new CSV <3 JSON format
@@ -44,21 +44,29 @@ attr_reader :config ## todo/fix: change config to proper dialect class/struct
44
44
  ## null values - include NA - why? why not?
45
45
  ## make null values case sensitive or add an option for case sensitive
46
46
  ## or better allow a proc as option for checking too!!!
47
- def initialize( null: ['\N', 'NA'] ## note: set to nil for no null vales / not availabe (na)
47
+ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
48
+ numeric: false, ## (auto-)convert all non-quoted values to float
49
+ nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
48
50
  )
49
51
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
50
52
 
51
53
  ## note: null values must get handled by parser
52
54
  ## only get checked for unquoted strings (and NOT for quoted strings)
53
55
  ## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
54
- @config[:null] = null ## null values
56
+ @config[:null] = null ## null values
57
+ @config[:numeric] = numeric
58
+ @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
55
59
  end
56
60
 
61
+
62
+
57
63
  #########################################
58
64
  ## config convenience helpers
59
65
  ## e.g. use like Csv.defaultl.null = '\N' etc. instead of
60
66
  ## Csv.default.config[:null] = '\N'
61
67
  def null=( value ) @config[:null]=value; end
68
+ def numeric=( value ) @config[:numeric]=value; end
69
+ def nan=( value ) @config[:nan]=value; end
62
70
 
63
71
 
64
72
 
@@ -143,14 +151,25 @@ end
143
151
 
144
152
 
145
153
  def parse_field( input )
154
+ value = ""
155
+
156
+ numeric = config[:numeric]
157
+
146
158
  logger.debug "parse field" if logger.debug?
147
159
 
148
- value = ""
149
160
  skip_spaces( input ) ## strip leading spaces
150
161
 
162
+
151
163
  if (c=input.peek; c=="," || c==LF || c==CR || input.eof?) ## empty field
152
- value = nil if is_null?( value ) ## note: allows null = '' that is turn unquoted empty strings into null/nil
153
- ## return value; do nothing
164
+ ## note: allows null = '' that is turn unquoted empty strings into null/nil
165
+ ## or if using numeric into NotANumber (NaN)
166
+ if is_null?( value )
167
+ value = nil
168
+ elsif numeric && is_nan?( value ) ## todo: check - how to handle numeric? return nil, NaN, or "" ???
169
+ value = Float::NAN
170
+ else
171
+ # do nothing - keep value as is :-) e.g. "".
172
+ end
154
173
  elsif input.peek == DOUBLE_QUOTE
155
174
  logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
156
175
  value << parse_doublequote( input )
@@ -174,7 +193,23 @@ def parse_field( input )
174
193
  ## note: only strip **trailing** spaces (space and tab only)
175
194
  ## do NOT strip newlines etc. might have been added via escape! e.g. \\\n
176
195
  value = value.sub( /[ \t]+$/, '' )
177
- value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
196
+
197
+ if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
198
+ value = nil
199
+ elsif numeric
200
+ if is_nan?( value )
201
+ value = Float::NAN
202
+ else
203
+ ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
204
+ if numeric.is_a?( Proc )
205
+ value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
206
+ else
207
+ value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
208
+ end
209
+ end
210
+ else
211
+ # do nothing - keep value as is :-).
212
+ end
178
213
 
179
214
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
180
215
  end
@@ -273,6 +308,25 @@ end # method parse_lines
273
308
 
274
309
 
275
310
 
311
+ def convert_to_float( value ) Float( value ) rescue value; end
312
+
313
+ def is_nan?( value )
314
+ nan = @config[:nan]
315
+ if nan.nil?
316
+ false ## nothing set; return always false (not NaN)
317
+ elsif nan.is_a?( Proc )
318
+ nan.call( value )
319
+ elsif nan.is_a?( Array )
320
+ nan.include?( value )
321
+ elsif nan.is_a?( String )
322
+ value == nan
323
+ else ## unknown config style / setting
324
+ ## todo: issue a warning or error - why? why not?
325
+ false ## nothing set; return always false (not nan)
326
+ end
327
+ end
328
+
329
+
276
330
  def is_null?( value )
277
331
  null = @config[:null]
278
332
  if null.nil?
@@ -36,9 +36,7 @@ def initialize( sep: ',',
36
36
  doublequote: true,
37
37
  escape: false, ## true/false
38
38
  null: nil, ## note: set to nil for no null vales / not availabe (na)
39
- comment: false, ## note: comment char e.g. # or false/nil
40
- numeric: false, ## (auto-)convert all non-quoted values to float
41
- nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
39
+ comment: false ## note: comment char e.g. # or false/nil
42
40
  )
43
41
  @config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
44
42
  @config[:sep] = sep
@@ -47,8 +45,6 @@ def initialize( sep: ',',
47
45
  @config[:escape] = escape
48
46
  @config[:null] = null
49
47
  @config[:comment] = comment
50
- @config[:numeric] = numeric
51
- @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
52
48
  end
53
49
 
54
50
 
@@ -62,8 +58,6 @@ def doublequote=( value ) @config[:doublequote]=value; end
62
58
  def escape=( value ) @config[:escape]=value; end
63
59
  def null=( value ) @config[:null]=value; end
64
60
  def comment=( value ) @config[:comment]=value; end
65
- def numeric=( value ) @config[:numeric]=value; end
66
- def nan=( value ) @config[:nan]=value; end
67
61
 
68
62
 
69
63
 
@@ -158,20 +152,14 @@ def parse_field( input, sep: )
158
152
 
159
153
  quote = config[:quote]
160
154
  escape = config[:escape]
161
- numeric = config[:numeric]
162
155
 
163
156
  logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
164
157
 
165
158
  if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty unquoted field
166
159
  ## note: allows null = '' that is turn unquoted empty strings into null/nil
167
160
  ## or if using numeric into NotANumber (NaN)
168
- if is_null?( value )
169
- value = nil
170
- elsif numeric & is_nan?( value )
171
- value = Float::NAN
172
- else
173
- # do nothing - keep value as is :-) e.g. "".
174
- end
161
+ value = nil if is_null?( value )
162
+ ## do nothing - keep value as is :-) e.g. "".
175
163
  elsif quote && input.peek == quote
176
164
  logger.debug "start quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
177
165
  value << parse_quote( input, sep: sep )
@@ -190,22 +178,8 @@ def parse_field( input, sep: )
190
178
  end
191
179
 
192
180
 
193
- if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
194
- value = nil
195
- elsif numeric
196
- if is_nan?( value )
197
- value = Float::NAN
198
- else
199
- ## numeric - (auto-convert) non-quoted values (if NOT nil) to floats
200
- if numeric.is_a?( Proc )
201
- value = numeric.call( value ) ## allow custom converter proc (e.g. how to handle NaN and conversion errors?)
202
- else
203
- value = convert_to_float( value ) # default (fails silently) keep string value if cannot convert - change - why? why not?
204
- end
205
- end
206
- else
207
- # do nothing - keep value as is :-).
208
- end
181
+ value = nil if is_null?( value ) ## note: null check only for UNQUOTED (not quoted/escaped) values
182
+ # do nothing - keep value as is :-).
209
183
 
210
184
  logger.debug "end reg field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
211
185
  end
@@ -296,24 +270,6 @@ def parse_lines( input, sep:, &block )
296
270
  end # method parse_lines
297
271
 
298
272
 
299
- def convert_to_float( value ) Float( value ) rescue value; end
300
-
301
- def is_nan?( value )
302
- nan = @config[:nan]
303
- if nan.nil?
304
- false ## nothing set; return always false (not NaN)
305
- elsif nan.is_a?( Proc )
306
- nan.call( value )
307
- elsif nan.is_a?( Array )
308
- nan.include?( value )
309
- elsif nan.is_a?( String )
310
- value == nan
311
- else ## unknown config style / setting
312
- ## todo: issue a warning or error - why? why not?
313
- false ## nothing set; return always false (not nan)
314
- end
315
- end
316
-
317
273
  def is_null?( value )
318
274
  null = @config[:null]
319
275
  if null.nil?
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 1
8
+ PATCH = 2
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -26,6 +26,8 @@ def test_parse
26
26
  [4.0,5.0,6.0]], parser.parse( "1,2,3\n4,5,6" )
27
27
  assert_equal [[1.0,2.0,3.0],
28
28
  ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n"4","5","6"} )
29
+ assert_equal [[1.0,2.0,3.0],
30
+ ["4","5","6"]], parser.parse( %Q{ 1,2 , 3\n "4", "5" ,"6" } )
29
31
  assert_equal [["a","b","c"]], parser.parse( %Q{"a","b","c"} )
30
32
  end
31
33
 
@@ -0,0 +1,64 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_samples.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestSamples < MiniTest::Test
11
+
12
+
13
+ def test_cities11
14
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/cities11.csv" )
15
+ pp records
16
+
17
+ assert_equal [["Los Angeles", "34°03'N", "118°15'W"],
18
+ ["New York City", %Q{40°42'46"N}, %Q{74°00'21"W}],
19
+ ["Paris", %Q{48°51'24"N}, %Q{2°21'03"E}]], records
20
+ end
21
+
22
+
23
+ def test_cars11
24
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/cars11.csv" )
25
+ pp records
26
+
27
+ assert_equal [["Year", "Make", "Model", "Description", "Price"],
28
+ ["1997", "Ford", "E350", "ac, abs, moon", "3000.00"],
29
+ ["1999", "Chevy", %Q{Venture "Extended Edition"}, "", "4900.00"],
30
+ ["1999", "Chevy", %Q{Venture "Extended Edition, Very Large"}, "", "5000.00"],
31
+ ["1996", "Jeep", "Grand Cherokee", "MUST SELL!\nair, moon roof, loaded", "4799.00"]], records
32
+ end
33
+
34
+
35
+ def test_customers11
36
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/customers11.csv" )
37
+ pp records
38
+
39
+ assert_equal [["Name", "Times arrived", "Total $ spent", "Food feedback"],
40
+ ["Dan", "34", "2548", "Lovin it!"],
41
+ ["Maria", "55", "5054", "Good, delicious food"],
42
+ ["Carlos", "22", "4352", %Q{I am "pleased", but could be better}],
43
+ ["Stephany", "34", "6542", "I want bigger steaks!!!!!"],
44
+ ["James", "1", "43", "Not bad"],
45
+ ["Robin", "1", "56", "Fish is tasty"],
46
+ ["Anna", "1", "79", "Good, better, the best!"]], records
47
+ end
48
+
49
+ def test_shakespeare11
50
+ records = CsvReader.read( "#{CsvReader.test_data_dir}/shakespeare.csv" )
51
+ pp records
52
+
53
+ assert_equal [["Quote", "Play", "Cite"],
54
+ ["Sweet are the uses of adversity", "As You Like It", "Act 2, scene 1, 12"],
55
+ ["All the world's a stage", "As You Like It", "Act 2, scene 7, 139"],
56
+ ["We few, we happy few", "Henry V", ""],
57
+ [%Q{"Seems," madam! Nay it is; I know not "seems."}, "Hamlet", "(1.ii.76)"],
58
+ ["To be, or not to be", "Hamlet", "Act 3, scene 1, 55"],
59
+ ["What's in a name? That which we call a rose by any other name would smell as sweet.", "Romeo and Juliet", "(II, ii, 1-2)"],
60
+ ["O Romeo, Romeo, wherefore art thou Romeo?", "Romeo and Juliet", "Act 2, scene 2, 33"],
61
+ ["Tomorrow, and tomorrow, and tomorrow", "Macbeth", "Act 5, scene 5, 19"]], records
62
+ end
63
+
64
+ end # class TestSamples
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-11 00:00:00.000000000 Z
11
+ date: 2018-10-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -60,6 +60,7 @@ files:
60
60
  - lib/csvreader/builder.rb
61
61
  - lib/csvreader/converter.rb
62
62
  - lib/csvreader/parser.rb
63
+ - lib/csvreader/parser_json.rb
63
64
  - lib/csvreader/parser_std.rb
64
65
  - lib/csvreader/parser_strict.rb
65
66
  - lib/csvreader/parser_tab.rb
@@ -85,6 +86,7 @@ files:
85
86
  - test/test_reader_converters.rb
86
87
  - test/test_reader_hash.rb
87
88
  - test/test_reader_hash_converters.rb
89
+ - test/test_samples.rb
88
90
  homepage: https://github.com/csv11/csvreader
89
91
  licenses:
90
92
  - Public Domain