csvreader 1.1.5 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/README.md +34 -6
- data/lib/csvreader/parser_std.rb +70 -29
- data/lib/csvreader/parser_table.rb +36 -2
- data/lib/csvreader/version.rb +2 -2
- data/test/data/test.csv +21 -0
- data/test/test_parser.rb +73 -0
- data/test/test_parser_table.rb +42 -0
- data/test/test_samples.rb +14 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e83a3e71ad1ec014c4744e80be07aa7b6caef10
|
4
|
+
data.tar.gz: c181a4d7f379f241e5a8c1a21af99523c8a0c9d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be9862e8ff97642f27a18e8d9160d534e3197a862e2a8c2d94b2ffe01b264a47b2e0996693dbc57b5cc68bd6113813fce7ea75289fd2a0e227ad0adc476868b3
|
7
|
+
data.tar.gz: b7696e7342f7676a928c15f6f35b90c78c56e7519de3eb6de95e4fbb3dd3c7823136bda2676f46aea29930c5026ee9d7384e86fb26cffefb1603e9279fbe0ce7
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -11,6 +11,17 @@
|
|
11
11
|
|
12
12
|
## What's News?
|
13
13
|
|
14
|
+
**v1.2** Add support for alternative (non-space) separators (e.g. `;|^:`)
|
15
|
+
to the default parser (`ParserStd`).
|
16
|
+
|
17
|
+
|
18
|
+
**v1.1.5** Added built-in support for (optional) alternative space
|
19
|
+
character
|
20
|
+
(e.g. `_-+•`)
|
21
|
+
to the default parser (`ParserStd`) and the table parser (`ParserTable`).
|
22
|
+
Turns `Man_Utd` into `Man Utd`, for example. Default is turned off (`nil`).
|
23
|
+
|
24
|
+
|
14
25
|
**v1.1.4** Added new "classic" table parser (see `ParserTable`) for supporting fields separated by (one or more) spaces
|
15
26
|
e.g. `Csv.table.parse( txt )`.
|
16
27
|
|
@@ -484,20 +495,33 @@ and so on.
|
|
484
495
|
### Q: How can I change the separator to semicolon (`;`) or pipe (`|`) or tab (`\t`)?
|
485
496
|
|
486
497
|
Pass in the `sep` keyword option
|
487
|
-
to the
|
498
|
+
to the parser. Example:
|
488
499
|
|
489
500
|
``` ruby
|
490
|
-
Csv.
|
491
|
-
Csv.
|
501
|
+
Csv.parse( ..., sep: ';' )
|
502
|
+
Csv.read( ..., sep: ';' )
|
492
503
|
# ...
|
493
|
-
Csv.
|
494
|
-
Csv.
|
504
|
+
Csv.parse( ..., sep: '|' )
|
505
|
+
Csv.read( ..., sep: '|' )
|
495
506
|
# and so on
|
496
507
|
```
|
497
508
|
|
498
509
|
Note: If you use tab (`\t`) use the `TabReader`
|
499
510
|
(or for your convenience the built-in `Csv.tab` alias)!
|
500
|
-
|
511
|
+
If you use the "classic" one or more space or tab (`/[ \t]+/`) regex
|
512
|
+
use the `TableReader`
|
513
|
+
(or for your convenience the built-in `Csv.table` alias)!
|
514
|
+
|
515
|
+
|
516
|
+
Note: The default ("The Right Way") parser does NOT allow space or tab
|
517
|
+
as separator (because leading and trailing space always gets trimmed
|
518
|
+
unless inside quotes, etc.). Use the `strict` parser if you want
|
519
|
+
to make up your own format with space or tab as a separator
|
520
|
+
or if you want that every space or tab counts (is significant).
|
521
|
+
|
522
|
+
|
523
|
+
|
524
|
+
Aside: Why? Tab =! CSV. Yes, tab is
|
501
525
|
its own (even) simpler format
|
502
526
|
(e.g. no escape rules, no newlines in values, etc.),
|
503
527
|
see [`TabReader` »](https://github.com/csvreader/tabreader).
|
@@ -506,6 +530,10 @@ see [`TabReader` »](https://github.com/csvreader/tabreader).
|
|
506
530
|
Csv.tab.parse( ... ) # note: "classic" strict tab format
|
507
531
|
Csv.tab.read( ... )
|
508
532
|
# ...
|
533
|
+
|
534
|
+
Csv.table.parse( ... ) # note: "classic" strict tab format
|
535
|
+
Csv.table.read( ... )
|
536
|
+
# ...
|
509
537
|
```
|
510
538
|
|
511
539
|
If you want double quote escape rules, newlines in quotes values, etc. use
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -49,12 +49,17 @@ attr_reader :meta
|
|
49
49
|
## null values - include NA - why? why not?
|
50
50
|
## make null values case sensitive or add an option for case sensitive
|
51
51
|
## or better allow a proc as option for checking too!!!
|
52
|
-
def initialize(
|
52
|
+
def initialize( sep: ',',
|
53
|
+
null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
53
54
|
numeric: false, ## (auto-)convert all non-quoted values to float
|
54
|
-
nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
55
|
+
nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
56
|
+
space: nil
|
55
57
|
)
|
56
58
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
57
59
|
|
60
|
+
check_sep( sep )
|
61
|
+
@config[:sep] = sep
|
62
|
+
|
58
63
|
## note: null values must get handled by parser
|
59
64
|
## only get checked for unquoted strings (and NOT for quoted strings)
|
60
65
|
## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
|
@@ -62,40 +67,66 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
|
|
62
67
|
@config[:numeric] = numeric
|
63
68
|
@config[:nan] = nan # not a number (NaN) e.g. Float::NAN
|
64
69
|
|
70
|
+
## e.g. treat/convert char to space e.g. _-+• etc
|
71
|
+
## Man_Utd => Man Utd
|
72
|
+
## or use it for leading and trailing spaces without quotes
|
73
|
+
## todo/check: only use for unquoted values? why? why not?
|
74
|
+
@config[:space] = space
|
75
|
+
|
76
|
+
|
65
77
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
66
78
|
end
|
67
79
|
|
68
80
|
|
81
|
+
SEPARATORS = ",;|^:"
|
82
|
+
|
83
|
+
def check_sep( sep )
|
84
|
+
## note: parse does NOT support space or tab as separator!!
|
85
|
+
## leading and trailing space or tab (whitespace) gets by default trimmed
|
86
|
+
## unless quoted (or alternative space char used e.g. _-+ if configured)
|
87
|
+
|
88
|
+
if SEPARATORS.include?( sep )
|
89
|
+
## everything ok
|
90
|
+
else
|
91
|
+
raise ArgumentError, "invalid/unsupported sep >#{sep}< - for now only >#{SEPARATORS}< allowed; sorry"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
69
95
|
|
70
96
|
#########################################
|
71
97
|
## config convenience helpers
|
72
98
|
## e.g. use like Csv.defaultl.null = '\N' etc. instead of
|
73
99
|
## Csv.default.config[:null] = '\N'
|
74
|
-
def
|
100
|
+
def sep=( value ) check_sep( value ); @config[:sep]=value; end
|
101
|
+
|
102
|
+
def null=( value ) @config[:null]=value; end
|
75
103
|
def numeric=( value ) @config[:numeric]=value; end
|
76
104
|
def nan=( value ) @config[:nan]=value; end
|
105
|
+
def space=( value ) @config[:space]=value; end
|
77
106
|
|
78
107
|
|
79
108
|
|
80
109
|
|
81
|
-
def parse(
|
110
|
+
def parse( str_or_readable, sep: config[:sep], &block )
|
111
|
+
|
112
|
+
check_sep( sep )
|
82
113
|
|
83
114
|
## note: data - will wrap either a String or IO object passed in data
|
84
115
|
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
85
116
|
|
86
117
|
## make sure data (string or io) is a wrapped into Buffer!!!!!!
|
87
|
-
if
|
88
|
-
input =
|
118
|
+
if str_or_readable.is_a?( Buffer ) ### allow (re)use of Buffer if managed from "outside"
|
119
|
+
input = str_or_readable
|
89
120
|
else
|
90
|
-
input = Buffer.new(
|
121
|
+
input = Buffer.new( str_or_readable )
|
91
122
|
end
|
92
123
|
|
93
124
|
if block_given?
|
94
|
-
parse_lines( input, &block )
|
125
|
+
parse_lines( input, sep: sep, &block )
|
95
126
|
else
|
96
127
|
records = []
|
97
128
|
|
98
|
-
parse_lines( input ) do |record|
|
129
|
+
parse_lines( input, sep: sep ) do |record|
|
99
130
|
records << record
|
100
131
|
end
|
101
132
|
|
@@ -108,11 +139,11 @@ end ## method parse
|
|
108
139
|
|
109
140
|
private
|
110
141
|
|
111
|
-
def parse_escape( input )
|
142
|
+
def parse_escape( input, sep: )
|
112
143
|
value = ""
|
113
144
|
if input.peek == BACKSLASH
|
114
145
|
input.getc ## eat-up backslash
|
115
|
-
if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==
|
146
|
+
if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==sep || c==DOUBLE_QUOTE || c==SINGLE_QUOTE )
|
116
147
|
logger.debug " add escaped char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
117
148
|
value << input.getc ## add escaped char (e.g. lf, cr, etc.)
|
118
149
|
else
|
@@ -128,7 +159,7 @@ end
|
|
128
159
|
|
129
160
|
|
130
161
|
|
131
|
-
def parse_quote( input, opening_quote:, closing_quote:)
|
162
|
+
def parse_quote( input, sep:, opening_quote:, closing_quote:)
|
132
163
|
value = ""
|
133
164
|
if input.peek == opening_quote
|
134
165
|
input.getc ## eat-up opening quote
|
@@ -141,7 +172,7 @@ def parse_quote( input, opening_quote:, closing_quote:)
|
|
141
172
|
if input.eof?
|
142
173
|
break
|
143
174
|
elsif input.peek == BACKSLASH
|
144
|
-
value << parse_escape( input )
|
175
|
+
value << parse_escape( input, sep: sep )
|
145
176
|
else ## assume input.peek == quote
|
146
177
|
input.getc ## eat-up quote
|
147
178
|
if opening_quote == closing_quote && input.peek == closing_quote
|
@@ -162,7 +193,7 @@ end
|
|
162
193
|
|
163
194
|
|
164
195
|
|
165
|
-
def parse_field( input )
|
196
|
+
def parse_field( input, sep: )
|
166
197
|
value = ""
|
167
198
|
|
168
199
|
numeric = config[:numeric]
|
@@ -172,7 +203,7 @@ def parse_field( input )
|
|
172
203
|
skip_spaces( input ) ## strip leading spaces
|
173
204
|
|
174
205
|
|
175
|
-
if (c=input.peek; c==
|
206
|
+
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty field
|
176
207
|
## note: allows null = '' that is turn unquoted empty strings into null/nil
|
177
208
|
## or if using numeric into NotANumber (NaN)
|
178
209
|
if is_null?( value )
|
@@ -184,7 +215,8 @@ def parse_field( input )
|
|
184
215
|
end
|
185
216
|
elsif input.peek == DOUBLE_QUOTE
|
186
217
|
logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
187
|
-
value << parse_quote( input,
|
218
|
+
value << parse_quote( input, sep: sep,
|
219
|
+
opening_quote: DOUBLE_QUOTE,
|
188
220
|
closing_quote: DOUBLE_QUOTE )
|
189
221
|
|
190
222
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
@@ -192,26 +224,31 @@ def parse_field( input )
|
|
192
224
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
193
225
|
elsif input.peek == SINGLE_QUOTE ## allow single quote too (by default)
|
194
226
|
logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
195
|
-
value << parse_quote( input,
|
227
|
+
value << parse_quote( input, sep: sep,
|
228
|
+
opening_quote: SINGLE_QUOTE,
|
196
229
|
closing_quote: SINGLE_QUOTE )
|
197
230
|
|
198
231
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
199
232
|
skip_spaces( input )
|
200
233
|
logger.debug "end single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
201
234
|
elsif input.peek == "«"
|
202
|
-
value << parse_quote( input,
|
235
|
+
value << parse_quote( input, sep: sep,
|
236
|
+
opening_quote: "«",
|
203
237
|
closing_quote: "»" )
|
204
238
|
skip_spaces( input )
|
205
239
|
elsif input.peek == "»"
|
206
|
-
value << parse_quote( input,
|
240
|
+
value << parse_quote( input, sep: sep,
|
241
|
+
opening_quote: "»",
|
207
242
|
closing_quote: "«" )
|
208
243
|
skip_spaces( input )
|
209
244
|
elsif input.peek == "‹"
|
210
|
-
value << parse_quote( input,
|
245
|
+
value << parse_quote( input, sep: sep,
|
246
|
+
opening_quote: "‹",
|
211
247
|
closing_quote: "›" )
|
212
248
|
skip_spaces( input )
|
213
249
|
elsif input.peek == "›"
|
214
|
-
value << parse_quote( input,
|
250
|
+
value << parse_quote( input, sep: sep,
|
251
|
+
opening_quote: "›",
|
215
252
|
closing_quote: "‹" )
|
216
253
|
skip_spaces( input )
|
217
254
|
else
|
@@ -219,9 +256,9 @@ def parse_field( input )
|
|
219
256
|
## consume simple value
|
220
257
|
## until we hit "," or "\n" or "\r"
|
221
258
|
## note: will eat-up quotes too!!!
|
222
|
-
while (c=input.peek; !(c==
|
259
|
+
while (c=input.peek; !(c==sep || c==LF || c==CR || input.eof?))
|
223
260
|
if input.peek == BACKSLASH
|
224
|
-
value << parse_escape( input )
|
261
|
+
value << parse_escape( input, sep: sep )
|
225
262
|
else
|
226
263
|
logger.debug " add char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
227
264
|
value << input.getc ## note: eat-up all spaces (" ") and tabs (\t) too (strip trailing spaces at the end)
|
@@ -256,11 +293,15 @@ end
|
|
256
293
|
|
257
294
|
|
258
295
|
|
259
|
-
def parse_record( input )
|
296
|
+
def parse_record( input, sep: )
|
260
297
|
values = []
|
261
298
|
|
299
|
+
space = config[:space]
|
300
|
+
|
262
301
|
loop do
|
263
|
-
value = parse_field( input )
|
302
|
+
value = parse_field( input, sep: sep )
|
303
|
+
value = value.tr( space, ' ' ) if space && value.is_a?( String )
|
304
|
+
|
264
305
|
logger.debug "value: »#{value}«" if logger.debug?
|
265
306
|
values << value
|
266
307
|
|
@@ -269,10 +310,10 @@ def parse_record( input )
|
|
269
310
|
elsif (c=input.peek; c==LF || c==CR)
|
270
311
|
skip_newline( input )
|
271
312
|
break
|
272
|
-
elsif input.peek ==
|
313
|
+
elsif input.peek == sep
|
273
314
|
input.getc ## eat-up FS(,)
|
274
315
|
else
|
275
|
-
raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - FS (
|
316
|
+
raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - FS (#{sep}) or RS (\\n) expected!!!!" )
|
276
317
|
end
|
277
318
|
end
|
278
319
|
|
@@ -375,7 +416,7 @@ end
|
|
375
416
|
|
376
417
|
|
377
418
|
|
378
|
-
def parse_lines( input, &block )
|
419
|
+
def parse_lines( input, sep:, &block )
|
379
420
|
## note: reset (optional) meta data block
|
380
421
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
381
422
|
|
@@ -426,7 +467,7 @@ def parse_lines( input, &block )
|
|
426
467
|
else
|
427
468
|
logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
428
469
|
|
429
|
-
record = parse_record( input )
|
470
|
+
record = parse_record( input, sep: sep )
|
430
471
|
record_num +=1
|
431
472
|
|
432
473
|
## note: requires block - enforce? how? why? why not?
|
@@ -22,12 +22,38 @@ def logger() self.class.logger; end
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
26
|
+
|
27
|
+
##
|
28
|
+
## todo/check:
|
29
|
+
## null values - include NA - why? why not?
|
30
|
+
## make null values case sensitive or add an option for case sensitive
|
31
|
+
## or better allow a proc as option for checking too!!!
|
32
|
+
def initialize( space: nil )
|
33
|
+
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
34
|
+
|
35
|
+
## e.g. treat/convert char to space e.g. _-+• etc
|
36
|
+
## Man_Utd => Man Utd
|
37
|
+
## or use it for leading and trailing spaces without quotes
|
38
|
+
## todo/check: only use for unquoted values? why? why not?
|
39
|
+
@config[:space] = space
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#########################################
|
44
|
+
## config convenience helpers
|
45
|
+
def space=( value ) @config[:space]=value; end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
def parse( str_or_readable, **kwargs, &block )
|
26
52
|
|
27
53
|
## note: input: required each_line (string or io/file for example)
|
28
54
|
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
29
55
|
|
30
|
-
input =
|
56
|
+
input = str_or_readable ## assume it's a string or io/file handle
|
31
57
|
|
32
58
|
if block_given?
|
33
59
|
parse_lines( input, &block )
|
@@ -48,6 +74,8 @@ private
|
|
48
74
|
|
49
75
|
def parse_lines( input, &block )
|
50
76
|
|
77
|
+
space = config[:space]
|
78
|
+
|
51
79
|
## note: each line only works with \n (windows) or \r\n (unix)
|
52
80
|
## will NOT work with \r (old mac, any others?) only!!!!
|
53
81
|
input.each_line do |line|
|
@@ -79,6 +107,12 @@ def parse_lines( input, &block )
|
|
79
107
|
values = line.split( /[ \t]+/ )
|
80
108
|
logger.debug values.pretty_inspect if logger.debug?
|
81
109
|
|
110
|
+
if space
|
111
|
+
## e.g. translate _-+ etc. if configured to space
|
112
|
+
## Man_Utd => Man Utd etc.
|
113
|
+
values = values.map {|value| value.tr(space,' ') }
|
114
|
+
end
|
115
|
+
|
82
116
|
## note: requires block - enforce? how? why? why not?
|
83
117
|
block.call( values )
|
84
118
|
end
|
data/lib/csvreader/version.rb
CHANGED
@@ -5,8 +5,8 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
|
|
5
5
|
|
6
6
|
module Version
|
7
7
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
8
|
-
MINOR =
|
9
|
-
PATCH =
|
8
|
+
MINOR = 2
|
9
|
+
PATCH = 0
|
10
10
|
|
11
11
|
## self.to_s - why? why not?
|
12
12
|
end
|
data/test/data/test.csv
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
##################################################
|
2
|
+
## Apache Commons CSV Reader Test Sample
|
3
|
+
## see https://github.com/apache/commons-csv/blob/master/src/test/resources/CSVFileParser/test.csv
|
4
|
+
|
5
|
+
|
6
|
+
A,B,C,"D"
|
7
|
+
# plain values
|
8
|
+
a,b,c,d
|
9
|
+
# spaces before and after
|
10
|
+
e ,f , g,h
|
11
|
+
# quoted: with spaces before and after
|
12
|
+
" i ", " j " , " k "," l "
|
13
|
+
# empty values
|
14
|
+
,,,
|
15
|
+
# empty quoted values
|
16
|
+
"","","",""
|
17
|
+
# 3 empty lines
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
# EOF on next line
|
data/test/test_parser.rb
CHANGED
@@ -41,6 +41,79 @@ def test_parse
|
|
41
41
|
end
|
42
42
|
|
43
43
|
|
44
|
+
def test_parse_space
|
45
|
+
records = [["1", "Man City"],
|
46
|
+
["2", "Liverpool"],
|
47
|
+
["3", "Chelsea"],
|
48
|
+
["4", "Arsenal"],
|
49
|
+
["8", "Man Utd"],
|
50
|
+
["13", "West Ham"],
|
51
|
+
["14", "Crystal Palace"]]
|
52
|
+
|
53
|
+
parser.space='_'
|
54
|
+
|
55
|
+
assert_equal records, parser.parse( <<TXT )
|
56
|
+
1, Man_City
|
57
|
+
2, Liverpool
|
58
|
+
3, Chelsea
|
59
|
+
4, Arsenal
|
60
|
+
8, Man_Utd
|
61
|
+
13, West_Ham
|
62
|
+
14, Crystal_Palace
|
63
|
+
TXT
|
64
|
+
|
65
|
+
assert_equal [[" "," "," "]], parser.parse( "_ , _ , __" )
|
66
|
+
|
67
|
+
|
68
|
+
parser.space='•'
|
69
|
+
|
70
|
+
assert_equal records, parser.parse( <<TXT )
|
71
|
+
1, Man•City
|
72
|
+
2, Liverpool
|
73
|
+
3, Chelsea
|
74
|
+
4, Arsenal
|
75
|
+
8, Man•Utd
|
76
|
+
13, West•Ham
|
77
|
+
14, Crystal•Palace
|
78
|
+
TXT
|
79
|
+
|
80
|
+
assert_equal [[" "," "," "]], parser.parse( "• , • , ••" )
|
81
|
+
|
82
|
+
parser.space = nil ## reset to default setting
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_parse_semicolon
|
86
|
+
records = [["a", "b", "c"],
|
87
|
+
["1", "2", "3"],
|
88
|
+
["4", "5", "6"]]
|
89
|
+
|
90
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
91
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6", sep: ';' )
|
92
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6\n", sep: ';' )
|
93
|
+
assert_equal records, parser.parse( "a;b;c\r1;2;3\r4;5;6", sep: ';' )
|
94
|
+
assert_equal records, parser.parse( "a;b;c\r\n1;2;3\r\n4;5;6\r\n", sep: ';' )
|
95
|
+
|
96
|
+
assert_equal records, parser.parse( " a; b ; c\n1; 2; 3\n 4; 5;6 ", sep: ';' )
|
97
|
+
assert_equal records, parser.parse( "a; b; c\n 1; 2 ;3 \n4;5;6\n", sep: ';' )
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_parse_pipe # or bar e.g. |||
|
101
|
+
records = [["a", "b", "c"],
|
102
|
+
["1", "2", "3"],
|
103
|
+
["4", "5", "6"]]
|
104
|
+
|
105
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
106
|
+
assert_equal records, parser.parse( "a|b|c\n1|2|3\n4|5|6", sep: '|' )
|
107
|
+
assert_equal records, parser.parse( "a|b|c\n1|2|3\n4|5|6\n", sep: '|' )
|
108
|
+
assert_equal records, parser.parse( "a|b|c\r1|2|3\r4|5|6", sep: '|' )
|
109
|
+
assert_equal records, parser.parse( "a|b|c\r\n1|2|3\r\n4|5|6\r\n", sep: '|' )
|
110
|
+
|
111
|
+
assert_equal records, parser.parse( " a| b | c\n1| 2| 3\n 4| 5|6 ", sep: '|' )
|
112
|
+
assert_equal records, parser.parse( "a| b| c\n 1| 2 |3 \n4|5|6\n", sep: '|' )
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
44
117
|
def test_parse_quotes
|
45
118
|
records = [["a", "b", "c"],
|
46
119
|
["11 \n 11", "\"2\"", "3"]]
|
data/test/test_parser_table.rb
CHANGED
@@ -13,6 +13,48 @@ class TestParserTable < MiniTest::Test
|
|
13
13
|
def parser() CsvReader::Parser::TABLE; end
|
14
14
|
|
15
15
|
|
16
|
+
def test_space
|
17
|
+
records = [["1", "Man City", "10", "8", "2", "0", "27", "3", "24", "26"],
|
18
|
+
["2", "Liverpool", "10", "8", "2", "0", "20", "4", "16", "26"],
|
19
|
+
["3", "Chelsea", "10", "7", "3", "0", "24", "7", "17", "24"],
|
20
|
+
["4", "Arsenal", "10", "7", "1", "2", "24", "13", "11", "22"],
|
21
|
+
["8", "Man Utd", "10", "5", "2", "3", "17", "17", "0", "17"],
|
22
|
+
["13", "West Ham", "10", "2", "2", "6", "9", "15", "-6", "8"],
|
23
|
+
["14", "Crystal Palace", "10", "2", "2", "6", "7", "13", "-6", "8"]]
|
24
|
+
|
25
|
+
parser.space='_'
|
26
|
+
|
27
|
+
assert_equal records, parser.parse( <<TXT )
|
28
|
+
1 Man_City 10 8 2 0 27 3 24 26
|
29
|
+
2 Liverpool 10 8 2 0 20 4 16 26
|
30
|
+
3 Chelsea 10 7 3 0 24 7 17 24
|
31
|
+
4 Arsenal 10 7 1 2 24 13 11 22
|
32
|
+
8 Man_Utd 10 5 2 3 17 17 0 17
|
33
|
+
13 West_Ham 10 2 2 6 9 15 -6 8
|
34
|
+
14 Crystal_Palace 10 2 2 6 7 13 -6 8
|
35
|
+
TXT
|
36
|
+
|
37
|
+
assert_equal [[" "," "," "]], parser.parse( "_ _ __" )
|
38
|
+
|
39
|
+
|
40
|
+
parser.space='•'
|
41
|
+
|
42
|
+
assert_equal records, parser.parse( <<TXT )
|
43
|
+
1 Man•City 10 8 2 0 27 3 24 26
|
44
|
+
2 Liverpool 10 8 2 0 20 4 16 26
|
45
|
+
3 Chelsea 10 7 3 0 24 7 17 24
|
46
|
+
4 Arsenal 10 7 1 2 24 13 11 22
|
47
|
+
8 Man•Utd 10 5 2 3 17 17 0 17
|
48
|
+
13 West•Ham 10 2 2 6 9 15 -6 8
|
49
|
+
14 Crystal•Palace 10 2 2 6 7 13 -6 8
|
50
|
+
TXT
|
51
|
+
|
52
|
+
assert_equal [[" "," "," "]], parser.parse( "• • ••" )
|
53
|
+
|
54
|
+
parser.space = nil ## reset to default setting
|
55
|
+
end
|
56
|
+
|
57
|
+
|
16
58
|
def test_contacts
|
17
59
|
records = [["aa", "bbb"],
|
18
60
|
["cc", "dd", "ee"]]
|
data/test/test_samples.rb
CHANGED
@@ -61,4 +61,18 @@ def test_shakespeare11
|
|
61
61
|
["Tomorrow, and tomorrow, and tomorrow", "Macbeth", "Act 5, scene 5, 19"]], records
|
62
62
|
end
|
63
63
|
|
64
|
+
|
65
|
+
def test_test
|
66
|
+
records = CsvReader.read( "#{CsvReader.test_data_dir}/test.csv" )
|
67
|
+
pp records
|
68
|
+
|
69
|
+
assert_equal [["A", "B", "C", "D"],
|
70
|
+
["a", "b", "c", "d"],
|
71
|
+
["e", "f", "g", "h"],
|
72
|
+
[" i ", " j ", " k ", " l "],
|
73
|
+
["", "", "", ""],
|
74
|
+
["", "", "", ""]], records
|
75
|
+
end
|
76
|
+
|
77
|
+
|
64
78
|
end # class TestSamples
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- test/data/iris11.csv
|
79
79
|
- test/data/lcc.attrib.csv
|
80
80
|
- test/data/shakespeare.csv
|
81
|
+
- test/data/test.csv
|
81
82
|
- test/helper.rb
|
82
83
|
- test/test_buffer.rb
|
83
84
|
- test/test_converter.rb
|