csvreader 1.1.5 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +1 -0
- data/README.md +34 -6
- data/lib/csvreader/parser_std.rb +70 -29
- data/lib/csvreader/parser_table.rb +36 -2
- data/lib/csvreader/version.rb +2 -2
- data/test/data/test.csv +21 -0
- data/test/test_parser.rb +73 -0
- data/test/test_parser_table.rb +42 -0
- data/test/test_samples.rb +14 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5e83a3e71ad1ec014c4744e80be07aa7b6caef10
|
4
|
+
data.tar.gz: c181a4d7f379f241e5a8c1a21af99523c8a0c9d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be9862e8ff97642f27a18e8d9160d534e3197a862e2a8c2d94b2ffe01b264a47b2e0996693dbc57b5cc68bd6113813fce7ea75289fd2a0e227ad0adc476868b3
|
7
|
+
data.tar.gz: b7696e7342f7676a928c15f6f35b90c78c56e7519de3eb6de95e4fbb3dd3c7823136bda2676f46aea29930c5026ee9d7384e86fb26cffefb1603e9279fbe0ce7
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -11,6 +11,17 @@
|
|
11
11
|
|
12
12
|
## What's News?
|
13
13
|
|
14
|
+
**v1.2** Add support for alternative (non-space) separators (e.g. `;|^:`)
|
15
|
+
to the default parser (`ParserStd`).
|
16
|
+
|
17
|
+
|
18
|
+
**v1.1.5** Added built-in support for (optional) alternative space
|
19
|
+
character
|
20
|
+
(e.g. `_-+•`)
|
21
|
+
to the default parser (`ParserStd`) and the table parser (`ParserTable`).
|
22
|
+
Turns `Man_Utd` into `Man Utd`, for example. Default is turned off (`nil`).
|
23
|
+
|
24
|
+
|
14
25
|
**v1.1.4** Added new "classic" table parser (see `ParserTable`) for supporting fields separated by (one or more) spaces
|
15
26
|
e.g. `Csv.table.parse( txt )`.
|
16
27
|
|
@@ -484,20 +495,33 @@ and so on.
|
|
484
495
|
### Q: How can I change the separator to semicolon (`;`) or pipe (`|`) or tab (`\t`)?
|
485
496
|
|
486
497
|
Pass in the `sep` keyword option
|
487
|
-
to the
|
498
|
+
to the parser. Example:
|
488
499
|
|
489
500
|
``` ruby
|
490
|
-
Csv.
|
491
|
-
Csv.
|
501
|
+
Csv.parse( ..., sep: ';' )
|
502
|
+
Csv.read( ..., sep: ';' )
|
492
503
|
# ...
|
493
|
-
Csv.
|
494
|
-
Csv.
|
504
|
+
Csv.parse( ..., sep: '|' )
|
505
|
+
Csv.read( ..., sep: '|' )
|
495
506
|
# and so on
|
496
507
|
```
|
497
508
|
|
498
509
|
Note: If you use tab (`\t`) use the `TabReader`
|
499
510
|
(or for your convenience the built-in `Csv.tab` alias)!
|
500
|
-
|
511
|
+
If you use the "classic" one or more space or tab (`/[ \t]+/`) regex
|
512
|
+
use the `TableReader`
|
513
|
+
(or for your convenience the built-in `Csv.table` alias)!
|
514
|
+
|
515
|
+
|
516
|
+
Note: The default ("The Right Way") parser does NOT allow space or tab
|
517
|
+
as separator (because leading and trailing space always gets trimmed
|
518
|
+
unless inside quotes, etc.). Use the `strict` parser if you want
|
519
|
+
to make up your own format with space or tab as a separator
|
520
|
+
or if you want that every space or tab counts (is significant).
|
521
|
+
|
522
|
+
|
523
|
+
|
524
|
+
Aside: Why? Tab =! CSV. Yes, tab is
|
501
525
|
its own (even) simpler format
|
502
526
|
(e.g. no escape rules, no newlines in values, etc.),
|
503
527
|
see [`TabReader` »](https://github.com/csvreader/tabreader).
|
@@ -506,6 +530,10 @@ see [`TabReader` »](https://github.com/csvreader/tabreader).
|
|
506
530
|
Csv.tab.parse( ... ) # note: "classic" strict tab format
|
507
531
|
Csv.tab.read( ... )
|
508
532
|
# ...
|
533
|
+
|
534
|
+
Csv.table.parse( ... ) # note: "classic" strict tab format
|
535
|
+
Csv.table.read( ... )
|
536
|
+
# ...
|
509
537
|
```
|
510
538
|
|
511
539
|
If you want double quote escape rules, newlines in quotes values, etc. use
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -49,12 +49,17 @@ attr_reader :meta
|
|
49
49
|
## null values - include NA - why? why not?
|
50
50
|
## make null values case sensitive or add an option for case sensitive
|
51
51
|
## or better allow a proc as option for checking too!!!
|
52
|
-
def initialize(
|
52
|
+
def initialize( sep: ',',
|
53
|
+
null: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
53
54
|
numeric: false, ## (auto-)convert all non-quoted values to float
|
54
|
-
nan: nil ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
55
|
+
nan: nil, ## note: only if numeric - set mappings for Float::NAN (not a number) values
|
56
|
+
space: nil
|
55
57
|
)
|
56
58
|
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
57
59
|
|
60
|
+
check_sep( sep )
|
61
|
+
@config[:sep] = sep
|
62
|
+
|
58
63
|
## note: null values must get handled by parser
|
59
64
|
## only get checked for unquoted strings (and NOT for quoted strings)
|
60
65
|
## "higher-level" code only knows about strings and has no longer any info if string was quoted or unquoted
|
@@ -62,40 +67,66 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
|
|
62
67
|
@config[:numeric] = numeric
|
63
68
|
@config[:nan] = nan # not a number (NaN) e.g. Float::NAN
|
64
69
|
|
70
|
+
## e.g. treat/convert char to space e.g. _-+• etc
|
71
|
+
## Man_Utd => Man Utd
|
72
|
+
## or use it for leading and trailing spaces without quotes
|
73
|
+
## todo/check: only use for unquoted values? why? why not?
|
74
|
+
@config[:space] = space
|
75
|
+
|
76
|
+
|
65
77
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
66
78
|
end
|
67
79
|
|
68
80
|
|
81
|
+
SEPARATORS = ",;|^:"
|
82
|
+
|
83
|
+
def check_sep( sep )
|
84
|
+
## note: parse does NOT support space or tab as separator!!
|
85
|
+
## leading and trailing space or tab (whitespace) gets by default trimmed
|
86
|
+
## unless quoted (or alternative space char used e.g. _-+ if configured)
|
87
|
+
|
88
|
+
if SEPARATORS.include?( sep )
|
89
|
+
## everything ok
|
90
|
+
else
|
91
|
+
raise ArgumentError, "invalid/unsupported sep >#{sep}< - for now only >#{SEPARATORS}< allowed; sorry"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
69
95
|
|
70
96
|
#########################################
|
71
97
|
## config convenience helpers
|
72
98
|
## e.g. use like Csv.defaultl.null = '\N' etc. instead of
|
73
99
|
## Csv.default.config[:null] = '\N'
|
74
|
-
def
|
100
|
+
def sep=( value ) check_sep( value ); @config[:sep]=value; end
|
101
|
+
|
102
|
+
def null=( value ) @config[:null]=value; end
|
75
103
|
def numeric=( value ) @config[:numeric]=value; end
|
76
104
|
def nan=( value ) @config[:nan]=value; end
|
105
|
+
def space=( value ) @config[:space]=value; end
|
77
106
|
|
78
107
|
|
79
108
|
|
80
109
|
|
81
|
-
def parse(
|
110
|
+
def parse( str_or_readable, sep: config[:sep], &block )
|
111
|
+
|
112
|
+
check_sep( sep )
|
82
113
|
|
83
114
|
## note: data - will wrap either a String or IO object passed in data
|
84
115
|
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
85
116
|
|
86
117
|
## make sure data (string or io) is a wrapped into Buffer!!!!!!
|
87
|
-
if
|
88
|
-
input =
|
118
|
+
if str_or_readable.is_a?( Buffer ) ### allow (re)use of Buffer if managed from "outside"
|
119
|
+
input = str_or_readable
|
89
120
|
else
|
90
|
-
input = Buffer.new(
|
121
|
+
input = Buffer.new( str_or_readable )
|
91
122
|
end
|
92
123
|
|
93
124
|
if block_given?
|
94
|
-
parse_lines( input, &block )
|
125
|
+
parse_lines( input, sep: sep, &block )
|
95
126
|
else
|
96
127
|
records = []
|
97
128
|
|
98
|
-
parse_lines( input ) do |record|
|
129
|
+
parse_lines( input, sep: sep ) do |record|
|
99
130
|
records << record
|
100
131
|
end
|
101
132
|
|
@@ -108,11 +139,11 @@ end ## method parse
|
|
108
139
|
|
109
140
|
private
|
110
141
|
|
111
|
-
def parse_escape( input )
|
142
|
+
def parse_escape( input, sep: )
|
112
143
|
value = ""
|
113
144
|
if input.peek == BACKSLASH
|
114
145
|
input.getc ## eat-up backslash
|
115
|
-
if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==
|
146
|
+
if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==sep || c==DOUBLE_QUOTE || c==SINGLE_QUOTE )
|
116
147
|
logger.debug " add escaped char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
117
148
|
value << input.getc ## add escaped char (e.g. lf, cr, etc.)
|
118
149
|
else
|
@@ -128,7 +159,7 @@ end
|
|
128
159
|
|
129
160
|
|
130
161
|
|
131
|
-
def parse_quote( input, opening_quote:, closing_quote:)
|
162
|
+
def parse_quote( input, sep:, opening_quote:, closing_quote:)
|
132
163
|
value = ""
|
133
164
|
if input.peek == opening_quote
|
134
165
|
input.getc ## eat-up opening quote
|
@@ -141,7 +172,7 @@ def parse_quote( input, opening_quote:, closing_quote:)
|
|
141
172
|
if input.eof?
|
142
173
|
break
|
143
174
|
elsif input.peek == BACKSLASH
|
144
|
-
value << parse_escape( input )
|
175
|
+
value << parse_escape( input, sep: sep )
|
145
176
|
else ## assume input.peek == quote
|
146
177
|
input.getc ## eat-up quote
|
147
178
|
if opening_quote == closing_quote && input.peek == closing_quote
|
@@ -162,7 +193,7 @@ end
|
|
162
193
|
|
163
194
|
|
164
195
|
|
165
|
-
def parse_field( input )
|
196
|
+
def parse_field( input, sep: )
|
166
197
|
value = ""
|
167
198
|
|
168
199
|
numeric = config[:numeric]
|
@@ -172,7 +203,7 @@ def parse_field( input )
|
|
172
203
|
skip_spaces( input ) ## strip leading spaces
|
173
204
|
|
174
205
|
|
175
|
-
if (c=input.peek; c==
|
206
|
+
if (c=input.peek; c==sep || c==LF || c==CR || input.eof?) ## empty field
|
176
207
|
## note: allows null = '' that is turn unquoted empty strings into null/nil
|
177
208
|
## or if using numeric into NotANumber (NaN)
|
178
209
|
if is_null?( value )
|
@@ -184,7 +215,8 @@ def parse_field( input )
|
|
184
215
|
end
|
185
216
|
elsif input.peek == DOUBLE_QUOTE
|
186
217
|
logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
187
|
-
value << parse_quote( input,
|
218
|
+
value << parse_quote( input, sep: sep,
|
219
|
+
opening_quote: DOUBLE_QUOTE,
|
188
220
|
closing_quote: DOUBLE_QUOTE )
|
189
221
|
|
190
222
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
@@ -192,26 +224,31 @@ def parse_field( input )
|
|
192
224
|
logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
193
225
|
elsif input.peek == SINGLE_QUOTE ## allow single quote too (by default)
|
194
226
|
logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
195
|
-
value << parse_quote( input,
|
227
|
+
value << parse_quote( input, sep: sep,
|
228
|
+
opening_quote: SINGLE_QUOTE,
|
196
229
|
closing_quote: SINGLE_QUOTE )
|
197
230
|
|
198
231
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
199
232
|
skip_spaces( input )
|
200
233
|
logger.debug "end single_quote field - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
201
234
|
elsif input.peek == "«"
|
202
|
-
value << parse_quote( input,
|
235
|
+
value << parse_quote( input, sep: sep,
|
236
|
+
opening_quote: "«",
|
203
237
|
closing_quote: "»" )
|
204
238
|
skip_spaces( input )
|
205
239
|
elsif input.peek == "»"
|
206
|
-
value << parse_quote( input,
|
240
|
+
value << parse_quote( input, sep: sep,
|
241
|
+
opening_quote: "»",
|
207
242
|
closing_quote: "«" )
|
208
243
|
skip_spaces( input )
|
209
244
|
elsif input.peek == "‹"
|
210
|
-
value << parse_quote( input,
|
245
|
+
value << parse_quote( input, sep: sep,
|
246
|
+
opening_quote: "‹",
|
211
247
|
closing_quote: "›" )
|
212
248
|
skip_spaces( input )
|
213
249
|
elsif input.peek == "›"
|
214
|
-
value << parse_quote( input,
|
250
|
+
value << parse_quote( input, sep: sep,
|
251
|
+
opening_quote: "›",
|
215
252
|
closing_quote: "‹" )
|
216
253
|
skip_spaces( input )
|
217
254
|
else
|
@@ -219,9 +256,9 @@ def parse_field( input )
|
|
219
256
|
## consume simple value
|
220
257
|
## until we hit "," or "\n" or "\r"
|
221
258
|
## note: will eat-up quotes too!!!
|
222
|
-
while (c=input.peek; !(c==
|
259
|
+
while (c=input.peek; !(c==sep || c==LF || c==CR || input.eof?))
|
223
260
|
if input.peek == BACKSLASH
|
224
|
-
value << parse_escape( input )
|
261
|
+
value << parse_escape( input, sep: sep )
|
225
262
|
else
|
226
263
|
logger.debug " add char >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
227
264
|
value << input.getc ## note: eat-up all spaces (" ") and tabs (\t) too (strip trailing spaces at the end)
|
@@ -256,11 +293,15 @@ end
|
|
256
293
|
|
257
294
|
|
258
295
|
|
259
|
-
def parse_record( input )
|
296
|
+
def parse_record( input, sep: )
|
260
297
|
values = []
|
261
298
|
|
299
|
+
space = config[:space]
|
300
|
+
|
262
301
|
loop do
|
263
|
-
value = parse_field( input )
|
302
|
+
value = parse_field( input, sep: sep )
|
303
|
+
value = value.tr( space, ' ' ) if space && value.is_a?( String )
|
304
|
+
|
264
305
|
logger.debug "value: »#{value}«" if logger.debug?
|
265
306
|
values << value
|
266
307
|
|
@@ -269,10 +310,10 @@ def parse_record( input )
|
|
269
310
|
elsif (c=input.peek; c==LF || c==CR)
|
270
311
|
skip_newline( input )
|
271
312
|
break
|
272
|
-
elsif input.peek ==
|
313
|
+
elsif input.peek == sep
|
273
314
|
input.getc ## eat-up FS(,)
|
274
315
|
else
|
275
|
-
raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - FS (
|
316
|
+
raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - FS (#{sep}) or RS (\\n) expected!!!!" )
|
276
317
|
end
|
277
318
|
end
|
278
319
|
|
@@ -375,7 +416,7 @@ end
|
|
375
416
|
|
376
417
|
|
377
418
|
|
378
|
-
def parse_lines( input, &block )
|
419
|
+
def parse_lines( input, sep:, &block )
|
379
420
|
## note: reset (optional) meta data block
|
380
421
|
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
381
422
|
|
@@ -426,7 +467,7 @@ def parse_lines( input, &block )
|
|
426
467
|
else
|
427
468
|
logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
428
469
|
|
429
|
-
record = parse_record( input )
|
470
|
+
record = parse_record( input, sep: sep )
|
430
471
|
record_num +=1
|
431
472
|
|
432
473
|
## note: requires block - enforce? how? why? why not?
|
@@ -22,12 +22,38 @@ def logger() self.class.logger; end
|
|
22
22
|
|
23
23
|
|
24
24
|
|
25
|
-
|
25
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
26
|
+
|
27
|
+
##
|
28
|
+
## todo/check:
|
29
|
+
## null values - include NA - why? why not?
|
30
|
+
## make null values case sensitive or add an option for case sensitive
|
31
|
+
## or better allow a proc as option for checking too!!!
|
32
|
+
def initialize( space: nil )
|
33
|
+
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
34
|
+
|
35
|
+
## e.g. treat/convert char to space e.g. _-+• etc
|
36
|
+
## Man_Utd => Man Utd
|
37
|
+
## or use it for leading and trailing spaces without quotes
|
38
|
+
## todo/check: only use for unquoted values? why? why not?
|
39
|
+
@config[:space] = space
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
#########################################
|
44
|
+
## config convenience helpers
|
45
|
+
def space=( value ) @config[:space]=value; end
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
|
50
|
+
|
51
|
+
def parse( str_or_readable, **kwargs, &block )
|
26
52
|
|
27
53
|
## note: input: required each_line (string or io/file for example)
|
28
54
|
## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
|
29
55
|
|
30
|
-
input =
|
56
|
+
input = str_or_readable ## assume it's a string or io/file handle
|
31
57
|
|
32
58
|
if block_given?
|
33
59
|
parse_lines( input, &block )
|
@@ -48,6 +74,8 @@ private
|
|
48
74
|
|
49
75
|
def parse_lines( input, &block )
|
50
76
|
|
77
|
+
space = config[:space]
|
78
|
+
|
51
79
|
## note: each line only works with \n (windows) or \r\n (unix)
|
52
80
|
## will NOT work with \r (old mac, any others?) only!!!!
|
53
81
|
input.each_line do |line|
|
@@ -79,6 +107,12 @@ def parse_lines( input, &block )
|
|
79
107
|
values = line.split( /[ \t]+/ )
|
80
108
|
logger.debug values.pretty_inspect if logger.debug?
|
81
109
|
|
110
|
+
if space
|
111
|
+
## e.g. translate _-+ etc. if configured to space
|
112
|
+
## Man_Utd => Man Utd etc.
|
113
|
+
values = values.map {|value| value.tr(space,' ') }
|
114
|
+
end
|
115
|
+
|
82
116
|
## note: requires block - enforce? how? why? why not?
|
83
117
|
block.call( values )
|
84
118
|
end
|
data/lib/csvreader/version.rb
CHANGED
@@ -5,8 +5,8 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
|
|
5
5
|
|
6
6
|
module Version
|
7
7
|
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
8
|
-
MINOR =
|
9
|
-
PATCH =
|
8
|
+
MINOR = 2
|
9
|
+
PATCH = 0
|
10
10
|
|
11
11
|
## self.to_s - why? why not?
|
12
12
|
end
|
data/test/data/test.csv
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
##################################################
|
2
|
+
## Apache Commons CSV Reader Test Sample
|
3
|
+
## see https://github.com/apache/commons-csv/blob/master/src/test/resources/CSVFileParser/test.csv
|
4
|
+
|
5
|
+
|
6
|
+
A,B,C,"D"
|
7
|
+
# plain values
|
8
|
+
a,b,c,d
|
9
|
+
# spaces before and after
|
10
|
+
e ,f , g,h
|
11
|
+
# quoted: with spaces before and after
|
12
|
+
" i ", " j " , " k "," l "
|
13
|
+
# empty values
|
14
|
+
,,,
|
15
|
+
# empty quoted values
|
16
|
+
"","","",""
|
17
|
+
# 3 empty lines
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
# EOF on next line
|
data/test/test_parser.rb
CHANGED
@@ -41,6 +41,79 @@ def test_parse
|
|
41
41
|
end
|
42
42
|
|
43
43
|
|
44
|
+
def test_parse_space
|
45
|
+
records = [["1", "Man City"],
|
46
|
+
["2", "Liverpool"],
|
47
|
+
["3", "Chelsea"],
|
48
|
+
["4", "Arsenal"],
|
49
|
+
["8", "Man Utd"],
|
50
|
+
["13", "West Ham"],
|
51
|
+
["14", "Crystal Palace"]]
|
52
|
+
|
53
|
+
parser.space='_'
|
54
|
+
|
55
|
+
assert_equal records, parser.parse( <<TXT )
|
56
|
+
1, Man_City
|
57
|
+
2, Liverpool
|
58
|
+
3, Chelsea
|
59
|
+
4, Arsenal
|
60
|
+
8, Man_Utd
|
61
|
+
13, West_Ham
|
62
|
+
14, Crystal_Palace
|
63
|
+
TXT
|
64
|
+
|
65
|
+
assert_equal [[" "," "," "]], parser.parse( "_ , _ , __" )
|
66
|
+
|
67
|
+
|
68
|
+
parser.space='•'
|
69
|
+
|
70
|
+
assert_equal records, parser.parse( <<TXT )
|
71
|
+
1, Man•City
|
72
|
+
2, Liverpool
|
73
|
+
3, Chelsea
|
74
|
+
4, Arsenal
|
75
|
+
8, Man•Utd
|
76
|
+
13, West•Ham
|
77
|
+
14, Crystal•Palace
|
78
|
+
TXT
|
79
|
+
|
80
|
+
assert_equal [[" "," "," "]], parser.parse( "• , • , ••" )
|
81
|
+
|
82
|
+
parser.space = nil ## reset to default setting
|
83
|
+
end
|
84
|
+
|
85
|
+
def test_parse_semicolon
|
86
|
+
records = [["a", "b", "c"],
|
87
|
+
["1", "2", "3"],
|
88
|
+
["4", "5", "6"]]
|
89
|
+
|
90
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
91
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6", sep: ';' )
|
92
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6\n", sep: ';' )
|
93
|
+
assert_equal records, parser.parse( "a;b;c\r1;2;3\r4;5;6", sep: ';' )
|
94
|
+
assert_equal records, parser.parse( "a;b;c\r\n1;2;3\r\n4;5;6\r\n", sep: ';' )
|
95
|
+
|
96
|
+
assert_equal records, parser.parse( " a; b ; c\n1; 2; 3\n 4; 5;6 ", sep: ';' )
|
97
|
+
assert_equal records, parser.parse( "a; b; c\n 1; 2 ;3 \n4;5;6\n", sep: ';' )
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_parse_pipe # or bar e.g. |||
|
101
|
+
records = [["a", "b", "c"],
|
102
|
+
["1", "2", "3"],
|
103
|
+
["4", "5", "6"]]
|
104
|
+
|
105
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
106
|
+
assert_equal records, parser.parse( "a|b|c\n1|2|3\n4|5|6", sep: '|' )
|
107
|
+
assert_equal records, parser.parse( "a|b|c\n1|2|3\n4|5|6\n", sep: '|' )
|
108
|
+
assert_equal records, parser.parse( "a|b|c\r1|2|3\r4|5|6", sep: '|' )
|
109
|
+
assert_equal records, parser.parse( "a|b|c\r\n1|2|3\r\n4|5|6\r\n", sep: '|' )
|
110
|
+
|
111
|
+
assert_equal records, parser.parse( " a| b | c\n1| 2| 3\n 4| 5|6 ", sep: '|' )
|
112
|
+
assert_equal records, parser.parse( "a| b| c\n 1| 2 |3 \n4|5|6\n", sep: '|' )
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
|
44
117
|
def test_parse_quotes
|
45
118
|
records = [["a", "b", "c"],
|
46
119
|
["11 \n 11", "\"2\"", "3"]]
|
data/test/test_parser_table.rb
CHANGED
@@ -13,6 +13,48 @@ class TestParserTable < MiniTest::Test
|
|
13
13
|
def parser() CsvReader::Parser::TABLE; end
|
14
14
|
|
15
15
|
|
16
|
+
def test_space
|
17
|
+
records = [["1", "Man City", "10", "8", "2", "0", "27", "3", "24", "26"],
|
18
|
+
["2", "Liverpool", "10", "8", "2", "0", "20", "4", "16", "26"],
|
19
|
+
["3", "Chelsea", "10", "7", "3", "0", "24", "7", "17", "24"],
|
20
|
+
["4", "Arsenal", "10", "7", "1", "2", "24", "13", "11", "22"],
|
21
|
+
["8", "Man Utd", "10", "5", "2", "3", "17", "17", "0", "17"],
|
22
|
+
["13", "West Ham", "10", "2", "2", "6", "9", "15", "-6", "8"],
|
23
|
+
["14", "Crystal Palace", "10", "2", "2", "6", "7", "13", "-6", "8"]]
|
24
|
+
|
25
|
+
parser.space='_'
|
26
|
+
|
27
|
+
assert_equal records, parser.parse( <<TXT )
|
28
|
+
1 Man_City 10 8 2 0 27 3 24 26
|
29
|
+
2 Liverpool 10 8 2 0 20 4 16 26
|
30
|
+
3 Chelsea 10 7 3 0 24 7 17 24
|
31
|
+
4 Arsenal 10 7 1 2 24 13 11 22
|
32
|
+
8 Man_Utd 10 5 2 3 17 17 0 17
|
33
|
+
13 West_Ham 10 2 2 6 9 15 -6 8
|
34
|
+
14 Crystal_Palace 10 2 2 6 7 13 -6 8
|
35
|
+
TXT
|
36
|
+
|
37
|
+
assert_equal [[" "," "," "]], parser.parse( "_ _ __" )
|
38
|
+
|
39
|
+
|
40
|
+
parser.space='•'
|
41
|
+
|
42
|
+
assert_equal records, parser.parse( <<TXT )
|
43
|
+
1 Man•City 10 8 2 0 27 3 24 26
|
44
|
+
2 Liverpool 10 8 2 0 20 4 16 26
|
45
|
+
3 Chelsea 10 7 3 0 24 7 17 24
|
46
|
+
4 Arsenal 10 7 1 2 24 13 11 22
|
47
|
+
8 Man•Utd 10 5 2 3 17 17 0 17
|
48
|
+
13 West•Ham 10 2 2 6 9 15 -6 8
|
49
|
+
14 Crystal•Palace 10 2 2 6 7 13 -6 8
|
50
|
+
TXT
|
51
|
+
|
52
|
+
assert_equal [[" "," "," "]], parser.parse( "• • ••" )
|
53
|
+
|
54
|
+
parser.space = nil ## reset to default setting
|
55
|
+
end
|
56
|
+
|
57
|
+
|
16
58
|
def test_contacts
|
17
59
|
records = [["aa", "bbb"],
|
18
60
|
["cc", "dd", "ee"]]
|
data/test/test_samples.rb
CHANGED
@@ -61,4 +61,18 @@ def test_shakespeare11
|
|
61
61
|
["Tomorrow, and tomorrow, and tomorrow", "Macbeth", "Act 5, scene 5, 19"]], records
|
62
62
|
end
|
63
63
|
|
64
|
+
|
65
|
+
def test_test
|
66
|
+
records = CsvReader.read( "#{CsvReader.test_data_dir}/test.csv" )
|
67
|
+
pp records
|
68
|
+
|
69
|
+
assert_equal [["A", "B", "C", "D"],
|
70
|
+
["a", "b", "c", "d"],
|
71
|
+
["e", "f", "g", "h"],
|
72
|
+
[" i ", " j ", " k ", " l "],
|
73
|
+
["", "", "", ""],
|
74
|
+
["", "", "", ""]], records
|
75
|
+
end
|
76
|
+
|
77
|
+
|
64
78
|
end # class TestSamples
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-11-
|
11
|
+
date: 2018-11-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -78,6 +78,7 @@ files:
|
|
78
78
|
- test/data/iris11.csv
|
79
79
|
- test/data/lcc.attrib.csv
|
80
80
|
- test/data/shakespeare.csv
|
81
|
+
- test/data/test.csv
|
81
82
|
- test/helper.rb
|
82
83
|
- test/test_buffer.rb
|
83
84
|
- test/test_converter.rb
|