csvreader 0.4.0 → 0.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +32 -4
- data/lib/csvreader.rb +1 -0
- data/lib/csvreader/buffer.rb +2 -13
- data/lib/csvreader/parser.rb +220 -107
- data/lib/csvreader/reader.rb +81 -140
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser.rb +38 -24
- data/test/test_parser_formats.rb +69 -0
- data/test/test_parser_rfc4180.rb +95 -0
- data/test/test_reader.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea1d667219773e3a355c81f815d91e92340d61a1
|
4
|
+
data.tar.gz: ba7a43ccb5e110fc1f6eca76ca2a74a62f1131fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0543a4338d2d12e36da16acdad9abff28633e519baa1d92044d1ca8f5e3472d835d00a10d8b19c24561b06e0d724f87414495600f4c83eef7c9e033474b4c09e
|
7
|
+
data.tar.gz: 8df669bc86f2066b2650a67bda5698fae7b6d58766b9c318f47958b0499671d0a4d39e862b8d3af842105a2777a3cf7ad05168380c338a3053fd3d363697abfb
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -164,17 +164,15 @@ see [`TabReader` »](https://github.com/datatxt/tabreader).
|
|
164
164
|
|
165
165
|
Two major design bugs and many many minor.
|
166
166
|
|
167
|
-
(1) The CSV class uses `line.split(',')` with some kludges (†) with the claim
|
167
|
+
(1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1248) with some kludges (†) with the claim it's faster.
|
168
168
|
What?! The right way: CSV needs its own purpose-built parser. There's no other
|
169
169
|
way you can handle all the (edge) cases with double quotes and escaped doubled up
|
170
170
|
double quotes. Period.
|
171
171
|
|
172
|
-
For example, the CSV class cannot handle leading or trailing spaces
|
172
|
+
For example, the CSV class cannot handle leading or trailing spaces
|
173
173
|
for double quoted values `1,•"2","3"•`.
|
174
174
|
Or handling double quotes inside values and so on and on.
|
175
175
|
|
176
|
-
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
177
|
-
|
178
176
|
(2) The CSV class returns `nil` for `,,` but an empty string (`""`)
|
179
177
|
for `"","",""`. The right way: All values are always strings. Period.
|
180
178
|
|
@@ -182,6 +180,36 @@ If you want to use `nil` you MUST configure a string (or strings)
|
|
182
180
|
such as `NA`, `n/a`, `\N`, or similar that map to `nil`.
|
183
181
|
|
184
182
|
|
183
|
+
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
184
|
+
|
185
|
+
Appendix: Simple examples the standard csv library cannot read:
|
186
|
+
|
187
|
+
Quoted values with leading or trailing spaces e.g.
|
188
|
+
|
189
|
+
```
|
190
|
+
1, "2","3" , "4" ,5
|
191
|
+
```
|
192
|
+
|
193
|
+
=>
|
194
|
+
|
195
|
+
``` ruby
|
196
|
+
["1", "2", "3", "4" ,"5"]
|
197
|
+
```
|
198
|
+
|
199
|
+
"Auto-fix" unambiguous quotes in "unquoted" values e.g.
|
200
|
+
|
201
|
+
```
|
202
|
+
value with "quotes", another value
|
203
|
+
```
|
204
|
+
|
205
|
+
=>
|
206
|
+
|
207
|
+
``` ruby
|
208
|
+
["value with \"quotes\"", "another value"]
|
209
|
+
```
|
210
|
+
|
211
|
+
and some more.
|
212
|
+
|
185
213
|
|
186
214
|
|
187
215
|
|
data/lib/csvreader.rb
CHANGED
data/lib/csvreader/buffer.rb
CHANGED
@@ -18,22 +18,10 @@ class BufferIO ## todo: find a better name - why? why not? is really just for
|
|
18
18
|
end
|
19
19
|
end # method getc
|
20
20
|
|
21
|
-
|
22
|
-
def ungetc( c )
|
23
|
-
## add upfront as first char in buffer
|
24
|
-
## last in/first out queue!!!!
|
25
|
-
@buf.unshift( c )
|
26
|
-
## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
21
|
def peek
|
31
|
-
## todo/fix:
|
32
|
-
## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
|
33
22
|
if @buf.size == 0 && @io.eof?
|
34
23
|
puts "peek - hitting eof!!!"
|
35
|
-
## return
|
36
|
-
return "\0"
|
24
|
+
return "\0" ## return NUL char (0) for now
|
37
25
|
end
|
38
26
|
|
39
27
|
if @buf.size == 0
|
@@ -44,5 +32,6 @@ class BufferIO ## todo: find a better name - why? why not? is really just for
|
|
44
32
|
|
45
33
|
@buf.first
|
46
34
|
end # method peek
|
35
|
+
|
47
36
|
end # class BufferIO
|
48
37
|
end # class CsvReader
|
data/lib/csvreader/parser.rb
CHANGED
@@ -1,74 +1,92 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
class CsvReader
|
4
|
-
class Parser
|
5
|
-
|
6
4
|
|
7
|
-
## char constants
|
8
|
-
DOUBLE_QUOTE = "\""
|
9
|
-
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
10
|
-
SPACE = " "
|
11
|
-
TAB = "\t"
|
12
|
-
LF = "\n" ## 0A (hex) 10 (dec)
|
13
|
-
CR = "\r" ## 0D (hex) 13 (dec)
|
14
5
|
|
15
6
|
|
16
|
-
def self.parse( data )
|
17
|
-
puts "parse:"
|
18
|
-
pp data
|
19
7
|
|
20
|
-
parser = new
|
21
|
-
parser.parse( data )
|
22
|
-
end
|
23
8
|
|
24
|
-
|
25
|
-
puts "parse_line:"
|
9
|
+
class Parser
|
26
10
|
|
27
|
-
parser = new
|
28
|
-
records = parser.parse( data, limit: 1 )
|
29
11
|
|
30
|
-
|
31
|
-
|
32
|
-
|
12
|
+
## char constants
|
13
|
+
DOUBLE_QUOTE = "\""
|
14
|
+
BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
|
15
|
+
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
16
|
+
SPACE = " " ## \s == ASCII 32 (dec) = (Space)
|
17
|
+
TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
|
18
|
+
LF = "\n" ## \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
|
19
|
+
CR = "\r" ## \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
|
20
|
+
|
21
|
+
|
22
|
+
###################################
|
23
|
+
## add simple logger with debug flag/switch
|
24
|
+
#
|
25
|
+
# use Parser.debug = true # to turn on
|
26
|
+
#
|
27
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
28
|
+
|
29
|
+
def self.logger() @@logger ||= Logger.new( STDOUT ); end
|
30
|
+
def logger() self.class.logger; end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
35
|
+
|
36
|
+
def initialize( sep: ',',
|
37
|
+
quote: DOUBLE_QUOTE, ## note: set to nil for no quote
|
38
|
+
doublequote: true,
|
39
|
+
escape: BACKSLASH, ## note: set to nil for no escapes
|
40
|
+
trim: true, ## note: will toggle between human/default and strict mode parser!!!
|
41
|
+
na: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
42
|
+
quoted_empty: '', ## note: only available in strict mode (e.g. trim=false)
|
43
|
+
unquoted_empty: '' ## note: only available in strict mode (e.g. trim=false)
|
44
|
+
)
|
45
|
+
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
46
|
+
@config[:sep] = sep
|
47
|
+
@config[:quote] = quote
|
48
|
+
@config[:doublequote] = doublequote
|
49
|
+
@config[:escape] = escape
|
50
|
+
@config[:trim] = trim
|
51
|
+
@config[:na] = na
|
52
|
+
@config[:quoted_empty] = quoted_empty
|
53
|
+
@config[:unquoted_empty] = unquoted_empty
|
33
54
|
end
|
34
55
|
|
35
56
|
|
36
57
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|
58
|
+
def strict?
|
59
|
+
## note: use trim for separating two different parsers / code paths:
|
60
|
+
## - human with trim leading and trailing whitespace and
|
61
|
+
## - strict with no leading and trailing whitespaces allowed
|
43
62
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
parser.foreach( file, &block )
|
48
|
-
end
|
63
|
+
## for now use - trim == false for strict version flag alias
|
64
|
+
## todo/fix: add strict flag - why? why not?
|
65
|
+
@config[:trim] ? false : true
|
49
66
|
end
|
50
67
|
|
51
|
-
def self.parse_lines( data, &block )
|
52
|
-
parser = new
|
53
|
-
parser.parse_lines( data, &block )
|
54
|
-
end
|
55
68
|
|
69
|
+
DEFAULT = new( sep: ',', trim: true )
|
70
|
+
RFC4180 = new( sep: ',', trim: false )
|
71
|
+
EXCEL = new( sep: ',', trim: false )
|
56
72
|
|
73
|
+
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
74
|
+
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
75
|
+
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
57
76
|
|
58
77
|
|
59
78
|
|
60
|
-
|
79
|
+
|
80
|
+
def parse_field( io, sep: )
|
81
|
+
logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
82
|
+
|
61
83
|
value = ""
|
62
|
-
|
84
|
+
skip_spaces( io ) ## strip leading spaces
|
63
85
|
|
64
86
|
if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
|
65
|
-
value = value.strip if trim ## strip all spaces
|
66
87
|
## return value; do nothing
|
67
88
|
elsif io.peek == DOUBLE_QUOTE
|
68
|
-
|
69
|
-
value = value.strip ## note always strip/trim leading spaces in quoted value
|
70
|
-
|
71
|
-
puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
89
|
+
logger.debug "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
72
90
|
io.getc ## eat-up double_quote
|
73
91
|
|
74
92
|
loop do
|
@@ -89,18 +107,18 @@ def parse_field( io, trim: true )
|
|
89
107
|
|
90
108
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
91
109
|
skip_spaces( io )
|
92
|
-
|
110
|
+
logger.debug "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
93
111
|
else
|
94
|
-
|
112
|
+
logger.debug "start reg field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
95
113
|
## consume simple value
|
96
114
|
## until we hit "," or "\n" or "\r"
|
97
115
|
## note: will eat-up quotes too!!!
|
98
116
|
while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
|
99
|
-
|
117
|
+
logger.debug " add char >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
100
118
|
value << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
101
119
|
end
|
102
|
-
value = value.strip
|
103
|
-
|
120
|
+
value = value.strip ## strip all trailing spaces
|
121
|
+
logger.debug "end reg field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
104
122
|
end
|
105
123
|
|
106
124
|
value
|
@@ -108,12 +126,60 @@ end
|
|
108
126
|
|
109
127
|
|
110
128
|
|
111
|
-
|
129
|
+
|
130
|
+
def parse_field_strict( io, sep: )
|
131
|
+
logger.debug "parse field (strict) - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
132
|
+
|
133
|
+
value = ""
|
134
|
+
|
135
|
+
if (c=io.peek; c==sep || c==LF || c==CR || io.eof?) ## empty unquoted field
|
136
|
+
value = config[:unquoted_empty] ## defaults to "" (might be set to nil if needed)
|
137
|
+
## return value; do nothing
|
138
|
+
elsif config[:quote] && io.peek == config[:quote]
|
139
|
+
logger.debug "start quote field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
140
|
+
io.getc ## eat-up double_quote
|
141
|
+
|
142
|
+
loop do
|
143
|
+
while (c=io.peek; !(c==config[:quote] || io.eof?))
|
144
|
+
value << io.getc ## eat-up everything unit quote (")
|
145
|
+
end
|
146
|
+
|
147
|
+
break if io.eof?
|
148
|
+
|
149
|
+
io.getc ## eat-up double_quote
|
150
|
+
|
151
|
+
if config[:doublequote] && io.peek == config[:quote] ## doubled up quote?
|
152
|
+
value << io.getc ## add doube quote and continue!!!!
|
153
|
+
else
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
value = config[:quoted_empty] if value == "" ## defaults to "" (might be set to nil if needed)
|
159
|
+
|
160
|
+
logger.debug "end double_quote field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
161
|
+
else
|
162
|
+
logger.debug "start reg field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
163
|
+
## consume simple value
|
164
|
+
## until we hit "," or "\n" or "\r" or stroy "\"" double quote
|
165
|
+
while (c=io.peek; !(c==sep || c==LF || c==CR || c==config[:quote] || io.eof?))
|
166
|
+
logger.debug " add char >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
167
|
+
value << io.getc
|
168
|
+
end
|
169
|
+
logger.debug "end reg field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
170
|
+
end
|
171
|
+
|
172
|
+
value
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
def parse_record( io, sep: )
|
112
178
|
values = []
|
113
179
|
|
114
180
|
loop do
|
115
|
-
value = parse_field( io,
|
116
|
-
|
181
|
+
value = parse_field( io, sep: sep )
|
182
|
+
logger.debug "value: »#{value}«" if logger.debug?
|
117
183
|
values << value
|
118
184
|
|
119
185
|
if io.eof?
|
@@ -133,6 +199,33 @@ def parse_record( io, trim: true )
|
|
133
199
|
end
|
134
200
|
|
135
201
|
|
202
|
+
|
203
|
+
def parse_record_strict( io, sep: )
|
204
|
+
values = []
|
205
|
+
|
206
|
+
loop do
|
207
|
+
value = parse_field_strict( io, sep: sep )
|
208
|
+
logger.debug "value: »#{value}«" if logger.debug?
|
209
|
+
values << value
|
210
|
+
|
211
|
+
if io.eof?
|
212
|
+
break
|
213
|
+
elsif (c=io.peek; c==LF || c==CR)
|
214
|
+
skip_newline( io ) ## note: singular / single newline only (NOT plural)
|
215
|
+
break
|
216
|
+
elsif io.peek == sep
|
217
|
+
io.getc ## eat-up FS (,)
|
218
|
+
else
|
219
|
+
puts "*** csv parse error (strict): found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
|
220
|
+
exit(1)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
values
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
|
136
229
|
def skip_newlines( io )
|
137
230
|
return if io.eof?
|
138
231
|
|
@@ -142,6 +235,22 @@ def skip_newlines( io )
|
|
142
235
|
end
|
143
236
|
|
144
237
|
|
238
|
+
def skip_newline( io ) ## note: singular (strict) version
|
239
|
+
return if io.eof?
|
240
|
+
|
241
|
+
## only skip CR LF or LF or CR
|
242
|
+
if io.peek == CR
|
243
|
+
io.getc ## eat-up
|
244
|
+
io.getc if io.peek == LF
|
245
|
+
elsif io.peek == LF
|
246
|
+
io.getc ## eat-up
|
247
|
+
else
|
248
|
+
# do nothing
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
|
145
254
|
def skip_until_eol( io )
|
146
255
|
return if io.eof?
|
147
256
|
|
@@ -161,91 +270,95 @@ end
|
|
161
270
|
|
162
271
|
|
163
272
|
|
164
|
-
def parse_spaces( io ) ## helper method
|
165
|
-
spaces = ""
|
166
|
-
## add leading spaces
|
167
|
-
while (c=io.peek; c==SPACE || c==TAB)
|
168
|
-
spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
169
|
-
end
|
170
|
-
spaces
|
171
|
-
end
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
def parse_lines( io_maybe, trim: true,
|
177
|
-
comments: true,
|
178
|
-
blanks: true, &block )
|
179
273
|
|
180
|
-
## find a better name for io_maybe
|
181
|
-
## make sure io is a wrapped into BufferIO!!!!!!
|
182
|
-
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
183
|
-
io = io_maybe
|
184
|
-
else
|
185
|
-
io = BufferIO.new( io_maybe )
|
186
|
-
end
|
187
274
|
|
275
|
+
def parse_lines_human( io, sep:, &block )
|
188
276
|
|
189
277
|
loop do
|
190
278
|
break if io.eof?
|
191
279
|
|
192
|
-
|
193
|
-
## check for comments or blank lines
|
194
|
-
if comments || blanks
|
195
|
-
spaces = parse_spaces( io )
|
196
|
-
end
|
280
|
+
skip_spaces( io )
|
197
281
|
|
198
|
-
if
|
199
|
-
|
282
|
+
if io.peek == COMMENT ## comment line
|
283
|
+
logger.debug "skipping comment - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
200
284
|
skip_until_eol( io )
|
201
285
|
skip_newlines( io )
|
202
|
-
elsif
|
203
|
-
|
286
|
+
elsif (c=io.peek; c==LF || c==CR || io.eof?)
|
287
|
+
logger.debug "skipping blank - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
204
288
|
skip_newlines( io )
|
205
|
-
else
|
206
|
-
|
207
|
-
|
208
|
-
if comments || blanks
|
209
|
-
## note: MUST ungetc in "reverse" order
|
210
|
-
## ## buffer is last in/first out queue!!!!
|
211
|
-
spaces.reverse.each_char { |space| io.ungetc( space ) }
|
212
|
-
end
|
213
|
-
|
214
|
-
record = parse_record( io, trim: trim )
|
289
|
+
else
|
290
|
+
logger.debug "start record - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
215
291
|
|
292
|
+
record = parse_record( io, sep: sep )
|
216
293
|
## note: requires block - enforce? how? why? why not?
|
217
294
|
block.call( record ) ## yield( record )
|
218
295
|
end
|
219
296
|
end # loop
|
220
|
-
end # method
|
297
|
+
end # method parse_lines_human
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
def parse_lines_strict( io, sep:, &block )
|
302
|
+
|
303
|
+
## no leading and trailing whitespaces trimmed/stripped
|
304
|
+
## no comments skipped
|
305
|
+
## no blanks skipped
|
306
|
+
## - follows strict rules of
|
307
|
+
## note: this csv format is NOT recommended;
|
308
|
+
## please, use a format with comments, leading and trailing whitespaces, etc.
|
309
|
+
## only added for checking compatibility
|
310
|
+
|
311
|
+
loop do
|
312
|
+
break if io.eof?
|
313
|
+
|
314
|
+
logger.debug "start record (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
315
|
+
|
316
|
+
record = parse_record_strict( io, sep: sep )
|
317
|
+
|
318
|
+
## note: requires block - enforce? how? why? why not?
|
319
|
+
block.call( record ) ## yield( record )
|
320
|
+
end # loop
|
321
|
+
end # method parse_lines_strict
|
322
|
+
|
221
323
|
|
222
324
|
|
325
|
+
def parse_lines( io_maybe, sep: config[:sep], &block )
|
326
|
+
## find a better name for io_maybe
|
327
|
+
## make sure io is a wrapped into BufferIO!!!!!!
|
328
|
+
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
329
|
+
io = io_maybe
|
330
|
+
else
|
331
|
+
io = BufferIO.new( io_maybe )
|
332
|
+
end
|
223
333
|
|
334
|
+
if strict?
|
335
|
+
parse_lines_strict( io, sep: sep, &block )
|
336
|
+
else
|
337
|
+
parse_lines_human( io, sep: sep, &block )
|
338
|
+
end
|
339
|
+
end ## parse_lines
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
## fix: add optional block - lets you use it like foreach!!!
|
344
|
+
## make foreach an alias of parse with block - why? why not?
|
345
|
+
##
|
346
|
+
## unifiy with (make one) parse and parse_lines!!!! - why? why not?
|
224
347
|
|
225
|
-
def parse( io_maybe,
|
226
|
-
comments: true,
|
227
|
-
blanks: true,
|
228
|
-
limit: nil )
|
348
|
+
def parse( io_maybe, sep: config[:sep], limit: nil )
|
229
349
|
records = []
|
230
350
|
|
231
|
-
parse_lines( io_maybe,
|
351
|
+
parse_lines( io_maybe, sep: sep ) do |record|
|
232
352
|
records << record
|
233
353
|
|
234
354
|
## set limit to 1 for processing "single" line (that is, get one record)
|
235
|
-
|
355
|
+
break if limit && limit >= records.size
|
236
356
|
end
|
237
357
|
|
238
358
|
records
|
239
359
|
end ## method parse
|
240
360
|
|
241
361
|
|
242
|
-
def foreach( io_maybe, trim: true,
|
243
|
-
comments: true,
|
244
|
-
blanks: true, &block )
|
245
|
-
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
|
246
|
-
end
|
247
|
-
|
248
|
-
|
249
362
|
|
250
363
|
end # class Parser
|
251
364
|
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -1,150 +1,98 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
|
4
|
-
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
4
|
|
5
|
+
class CsvReader
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
# :tab -> uses TabReader(!)
|
11
|
-
# :strict|:rfc4180
|
12
|
-
# :unix -> uses unix-style escapes e.g. \n \" etc.
|
13
|
-
# :windows|:excel
|
14
|
-
# :guess|:auto -> guess (auto-detect) separator - why? why not?
|
15
|
-
|
16
|
-
## e.g. use Dialect.registry[:unix] = { ... } etc.
|
17
|
-
## note use @@ - there is only one registry
|
18
|
-
def self.registry() @@registry ||={} end
|
19
|
-
|
20
|
-
## add built-in dialects:
|
21
|
-
## trim - use strip? why? why not? use alias?
|
22
|
-
registry[:tab] = {} ##{ class: TabReader }
|
23
|
-
registry[:strict] = { strict: true, trim: false } ## add no comments, blank lines, etc. ???
|
24
|
-
registry[:rfc4180] = :strict ## alternative name
|
25
|
-
registry[:windows] = {}
|
26
|
-
registry[:excel] = :windows
|
27
|
-
registry[:unix] = {}
|
28
|
-
|
29
|
-
## todo: add some more
|
30
|
-
end # class Dialect
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
class Configuration
|
35
|
-
|
7
|
+
def initialize( parser )
|
8
|
+
@parser = parser
|
9
|
+
end
|
36
10
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
attr_accessor :blanks
|
41
|
-
attr_accessor :comments
|
42
|
-
attr_accessor :dialect
|
11
|
+
DEFAULT = new( Parser::DEFAULT )
|
12
|
+
RFC4180 = new( Parser::RFC4180 )
|
13
|
+
EXCEL = new( Parser::EXCEL )
|
43
14
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
@comments = true
|
48
|
-
@trim = true
|
49
|
-
## note: do NOT add headers as global - should ALWAYS be explicit
|
50
|
-
## headers (true/false) - changes resultset and requires different processing!!!
|
15
|
+
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
16
|
+
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
17
|
+
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
51
18
|
|
52
|
-
self ## return self for chaining
|
53
|
-
end
|
54
19
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def comments?() @comments; end
|
64
|
-
|
65
|
-
|
66
|
-
## built-in (default) options
|
67
|
-
## todo: find a better name?
|
68
|
-
def default_options
|
69
|
-
## note:
|
70
|
-
## do NOT include sep character and
|
71
|
-
## do NOT include headers true/false here
|
72
|
-
##
|
73
|
-
## make default sep its own "global" default config
|
74
|
-
## e.g. Csv.config.sep =
|
75
|
-
|
76
|
-
## common options
|
77
|
-
## skip comments starting with #
|
78
|
-
## skip blank lines
|
79
|
-
## strip leading and trailing spaces
|
80
|
-
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
81
|
-
defaults = {
|
82
|
-
blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
83
|
-
comments: @comments,
|
84
|
-
trim: @trim
|
85
|
-
## :converters => :strip
|
86
|
-
}
|
87
|
-
defaults
|
88
|
-
end
|
89
|
-
end # class Configuration
|
20
|
+
#####################
|
21
|
+
## convenience helpers defaulting to default csv dialect/format reader
|
22
|
+
##
|
23
|
+
## CsvReader.parse_line is the same as
|
24
|
+
## CsvReader::DEFAULT.parse_line or CsvReader.default.parse_line
|
25
|
+
##
|
90
26
|
|
27
|
+
def self.parse_line( data, sep: nil,
|
28
|
+
converters: nil )
|
29
|
+
DEFAULT.parse_line( data, sep: sep, converters: converters )
|
30
|
+
end
|
91
31
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
32
|
+
def self.parse( data, sep: nil,
|
33
|
+
converters: nil )
|
34
|
+
DEFAULT.parse( data, sep: sep, converters: converters )
|
35
|
+
end
|
96
36
|
|
97
|
-
|
98
|
-
|
37
|
+
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
38
|
+
def self.parse_lines( data, sep: nil,
|
39
|
+
converters: nil, &block )
|
40
|
+
DEFAULT.parse_lines( data, sep: sep, converters: nil, &block )
|
99
41
|
end
|
100
42
|
|
101
|
-
def self.
|
102
|
-
|
43
|
+
def self.read( path, sep: nil,
|
44
|
+
converters: nil )
|
45
|
+
DEFAULT.read( path, sep: sep, converters: converters )
|
103
46
|
end
|
104
|
-
end # module Csvv
|
105
47
|
|
48
|
+
def self.header( path, sep: nil )
|
49
|
+
DEFAULT.header( path, sep: sep )
|
50
|
+
end
|
106
51
|
|
52
|
+
def self.foreach( path, sep: nil,
|
53
|
+
converters: nil, &block )
|
54
|
+
DEFAULT.foreach( path, sep: sep, converters: converters, &block )
|
55
|
+
end
|
107
56
|
|
108
|
-
####
|
109
|
-
## use our own wrapper
|
110
57
|
|
111
|
-
class CsvReader
|
112
58
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
119
|
-
csv_options = Csv.config.default_options.merge(
|
120
|
-
col_sep: sep
|
121
|
-
)
|
122
|
-
## pp csv_options
|
123
|
-
Parser.parse_line( txt ) ##, csv_options )
|
124
|
-
end
|
59
|
+
#############################
|
60
|
+
## all "high-level" reader methods
|
61
|
+
##
|
62
|
+
## note: allow "overriding" of separator
|
63
|
+
## if sep is not nil otherwise use default dialect/format separator
|
125
64
|
|
126
65
|
|
127
66
|
##
|
128
67
|
## todo/fix: "unify" parse and parse_lines !!!
|
129
68
|
## check for block_given? - why? why not?
|
130
69
|
|
131
|
-
def
|
132
|
-
|
133
|
-
|
134
|
-
)
|
135
|
-
|
136
|
-
|
70
|
+
def parse( data, sep: nil, limit: nil,
|
71
|
+
converters: nil )
|
72
|
+
sep = @parser.config[:sep] if sep.nil?
|
73
|
+
@parser.parse( data, sep: sep, limit: limit )
|
74
|
+
end
|
75
|
+
|
76
|
+
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
77
|
+
def parse_lines( data, sep: nil,
|
78
|
+
converters: nil, &block )
|
79
|
+
sep = @parser.config[:sep] if sep.nil?
|
80
|
+
@parser.parse_lines( data, sep: sep, &block )
|
137
81
|
end
|
138
82
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
83
|
+
|
84
|
+
|
85
|
+
def parse_line( data, sep: nil,
|
86
|
+
converters: nil )
|
87
|
+
records = parse( data, sep: sep, limit: 1 )
|
88
|
+
|
89
|
+
## unwrap record if empty return nil - why? why not?
|
90
|
+
## return empty record e.g. [] - why? why not?
|
91
|
+
records.size == 0 ? nil : records.first
|
145
92
|
end
|
146
93
|
|
147
|
-
def
|
94
|
+
def read( path, sep: nil,
|
95
|
+
converters: nil )
|
148
96
|
## note: use our own file.open
|
149
97
|
## always use utf-8 for now
|
150
98
|
## check/todo: add skip option bom too - why? why not?
|
@@ -152,33 +100,26 @@ class CsvReader
|
|
152
100
|
parse( txt, sep: sep )
|
153
101
|
end
|
154
102
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
Parser.foreach( path, &block ) ###, csv_options )
|
103
|
+
def foreach( path, sep: nil,
|
104
|
+
converters: nil, &block )
|
105
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
106
|
+
parse_lines( file, sep: sep, &block )
|
107
|
+
end
|
162
108
|
end
|
163
109
|
|
164
110
|
|
165
|
-
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
166
|
-
# read first lines (only)
|
167
|
-
# and parse with csv to get header from csv library itself
|
168
|
-
#
|
169
|
-
# check - if there's an easier or built-in way for the csv library
|
170
111
|
|
171
|
-
|
172
|
-
|
173
|
-
|
112
|
+
def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
|
113
|
+
# read first lines (only)
|
114
|
+
# and parse with csv to get header from csv library itself
|
174
115
|
|
175
116
|
record = nil
|
176
117
|
File.open( path, 'r:bom|utf-8' ) do |file|
|
177
|
-
record =
|
118
|
+
record = parse_line( file, sep: sep )
|
178
119
|
end
|
179
120
|
|
180
|
-
record ## todo/fix:
|
181
|
-
|
121
|
+
record ## todo/fix: returns nil for empty - why? why not?
|
122
|
+
end # method self.header
|
182
123
|
|
183
124
|
end # class CsvReader
|
184
125
|
|
@@ -188,13 +129,13 @@ end # class CsvReader
|
|
188
129
|
class CsvHashReader
|
189
130
|
|
190
131
|
|
191
|
-
def self.parse(
|
132
|
+
def self.parse( data, sep: nil, headers: nil )
|
192
133
|
|
193
134
|
## pass in headers as array e.g. ['A', 'B', 'C']
|
194
135
|
names = headers ? headers : nil
|
195
136
|
|
196
137
|
records = []
|
197
|
-
CsvReader.parse_lines(
|
138
|
+
CsvReader.parse_lines( data ) do |values| # sep: sep
|
198
139
|
if names.nil?
|
199
140
|
names = values ## store header row / a.k.a. field/column names
|
200
141
|
else
|
@@ -206,13 +147,13 @@ def self.parse( txt, sep: Csv.config.sep, headers: nil )
|
|
206
147
|
end
|
207
148
|
|
208
149
|
|
209
|
-
def self.read( path, sep:
|
150
|
+
def self.read( path, sep: nil, headers: nil )
|
210
151
|
txt = File.open( path, 'r:bom|utf-8' ).read
|
211
152
|
parse( txt, sep: sep, headers: headers )
|
212
153
|
end
|
213
154
|
|
214
155
|
|
215
|
-
def self.foreach( path, sep:
|
156
|
+
def self.foreach( path, sep: nil, headers: nil, &block )
|
216
157
|
|
217
158
|
## pass in headers as array e.g. ['A', 'B', 'C']
|
218
159
|
names = headers ? headers : nil
|
@@ -228,7 +169,7 @@ def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
|
|
228
169
|
end
|
229
170
|
|
230
171
|
|
231
|
-
def self.header( path, sep:
|
172
|
+
def self.header( path, sep: nil ) ## add header too? why? why not?
|
232
173
|
## same as "classic" header method - delegate/reuse :-)
|
233
174
|
CsvReader.header( path, sep: sep )
|
234
175
|
end
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
@@ -9,24 +9,38 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestParser < MiniTest::Test
|
11
11
|
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
parser = CsvReader::Parser::DEFAULT
|
18
|
+
end
|
19
|
+
|
12
20
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
21
|
+
def test_parser_default
|
22
|
+
pp CsvReader::Parser::DEFAULT
|
23
|
+
pp CsvReader::Parser.default
|
24
|
+
assert true
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
records = [["a", "b", "c"],
|
29
|
+
["1", "2", "3"],
|
30
|
+
["4", "5", "6"]]
|
31
|
+
|
32
|
+
## don't care about newlines (\r\n)
|
33
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
34
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
35
|
+
assert_equal records, parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
36
|
+
assert_equal records, parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
37
|
+
|
38
|
+
## or leading and trailing spaces
|
39
|
+
assert_equal records, parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
|
40
|
+
assert_equal records, parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
|
41
|
+
assert_equal records, parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
|
42
|
+
assert_equal records, parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
|
43
|
+
assert_equal records, parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
|
30
44
|
end
|
31
45
|
|
32
46
|
|
@@ -34,19 +48,19 @@ def test_parse_quotes
|
|
34
48
|
records = [["a", "b", "c"],
|
35
49
|
["11 \n 11", "\"2\"", "3"]]
|
36
50
|
|
37
|
-
assert_equal records,
|
38
|
-
assert_equal records,
|
51
|
+
assert_equal records, parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
|
52
|
+
assert_equal records, parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
|
39
53
|
end
|
40
54
|
|
41
55
|
def test_parse_empties
|
42
56
|
records = [["", "", ""]]
|
43
57
|
|
44
|
-
assert_equal records,
|
45
|
-
assert_equal records,
|
58
|
+
assert_equal records, parser.parse( ",," )
|
59
|
+
assert_equal records, parser.parse( <<TXT )
|
46
60
|
"","",""
|
47
61
|
TXT
|
48
62
|
|
49
|
-
assert_equal [],
|
63
|
+
assert_equal [], parser.parse( "" )
|
50
64
|
end
|
51
65
|
|
52
66
|
|
@@ -54,7 +68,7 @@ def test_parse_comments
|
|
54
68
|
records = [["a", "b", "c"],
|
55
69
|
["1", "2", "3"]]
|
56
70
|
|
57
|
-
assert_equal records,
|
71
|
+
assert_equal records, parser.parse( <<TXT )
|
58
72
|
# comment
|
59
73
|
# comment
|
60
74
|
## comment
|
@@ -64,7 +78,7 @@ a, b, c
|
|
64
78
|
|
65
79
|
TXT
|
66
80
|
|
67
|
-
assert_equal records,
|
81
|
+
assert_equal records, parser.parse( <<TXT )
|
68
82
|
a, b, c
|
69
83
|
1, 2, 3
|
70
84
|
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_formats.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserFormats < MiniTest::Test
|
11
|
+
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
CsvReader::Parser
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_parse_whitespace
|
22
|
+
records = [["a", "b", "c"],
|
23
|
+
["1", "2", "3"]]
|
24
|
+
|
25
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
26
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
|
27
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
|
28
|
+
assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
|
29
|
+
assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
|
30
|
+
|
31
|
+
assert_equal [["a", "b", "c"],
|
32
|
+
[""],
|
33
|
+
["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
|
34
|
+
assert_equal [["", ""],
|
35
|
+
[""],
|
36
|
+
["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
|
37
|
+
|
38
|
+
|
39
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
40
|
+
assert_equal records, parser.rfc4180.parse( "a,b,c\n1,2,3" )
|
41
|
+
assert_equal [["a", "b", "c"],
|
42
|
+
[""],
|
43
|
+
["1", "2", "3"]], parser.rfc4180.parse( "a,b,c\n\n1,2,3" )
|
44
|
+
assert_equal [[" a", " b ", "c "],
|
45
|
+
[""],
|
46
|
+
["1", "2", "3"]], parser.rfc4180.parse( " a, b ,c \n\n1,2,3" )
|
47
|
+
assert_equal [[" a", " b ", "c "],
|
48
|
+
[" "],
|
49
|
+
["",""],
|
50
|
+
["1", "2", "3"]], parser.rfc4180.parse( " a, b ,c \n \n,\n1,2,3" )
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def test_parse_empties
|
55
|
+
assert_equal [], parser.default.parse( "\n \n \n" )
|
56
|
+
|
57
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
58
|
+
assert_equal [[""],
|
59
|
+
[" "],
|
60
|
+
[" "]], parser.rfc4180.parse( "\n \n \n" )
|
61
|
+
assert_equal [[""],
|
62
|
+
[" "],
|
63
|
+
[" "]], parser.rfc4180.parse( "\n \n " )
|
64
|
+
|
65
|
+
assert_equal [[""]], parser.rfc4180.parse( "\n" )
|
66
|
+
assert_equal [], parser.rfc4180.parse( "" )
|
67
|
+
end
|
68
|
+
|
69
|
+
end # class TestParserFormats
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_rfc4180.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserRfc4180 < MiniTest::Test
|
11
|
+
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
CsvReader::Parser::RFC4180
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_parser_rfc4180
|
22
|
+
pp CsvReader::Parser::RFC4180
|
23
|
+
pp CsvReader::Parser.rfc4180
|
24
|
+
assert true
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
records = [["a", "b", "c"],
|
29
|
+
["1", "2", "3"],
|
30
|
+
["4", "5", "6"]]
|
31
|
+
|
32
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
33
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
34
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
35
|
+
assert_equal records, parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
36
|
+
assert_equal records, parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_parse_semicolon
|
40
|
+
records = [["a", "b", "c"],
|
41
|
+
["1", "2", "3"],
|
42
|
+
["4", "5", "6"]]
|
43
|
+
|
44
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
45
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6", sep: ';' )
|
46
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6\n", sep: ';' )
|
47
|
+
assert_equal records, parser.parse( "a;b;c\r1;2;3\r4;5;6", sep: ';' )
|
48
|
+
assert_equal records, parser.parse( "a;b;c\r\n1;2;3\r\n4;5;6\r\n", sep: ';' )
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_parse_tab
|
52
|
+
records = [["a", "b", "c"],
|
53
|
+
["1", "2", "3"],
|
54
|
+
["4", "5", "6"]]
|
55
|
+
|
56
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
57
|
+
assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6", sep: "\t" )
|
58
|
+
assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6\n", sep: "\t" )
|
59
|
+
assert_equal records, parser.parse( "a\tb\tc\r1\t2\t3\r4\t5\t6", sep: "\t" )
|
60
|
+
assert_equal records, parser.parse( "a\tb\tc\r\n1\t2\t3\r\n4\t5\t6\r\n", sep: "\t" )
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
def test_parse_empties
|
66
|
+
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
67
|
+
|
68
|
+
parser.config[:quoted_empty] = nil
|
69
|
+
|
70
|
+
assert_nil parser.config[:quoted_empty]
|
71
|
+
assert_equal "", parser.config[:unquoted_empty]
|
72
|
+
|
73
|
+
assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
74
|
+
|
75
|
+
|
76
|
+
parser.config[:unquoted_empty] = nil
|
77
|
+
|
78
|
+
assert_nil parser.config[:quoted_empty]
|
79
|
+
assert_nil parser.config[:unquoted_empty]
|
80
|
+
|
81
|
+
assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
82
|
+
|
83
|
+
|
84
|
+
## reset to defaults
|
85
|
+
parser.config[:quoted_empty] = ""
|
86
|
+
parser.config[:unquoted_empty] = ""
|
87
|
+
|
88
|
+
assert_equal "", parser.config[:quoted_empty]
|
89
|
+
assert_equal "", parser.config[:unquoted_empty]
|
90
|
+
|
91
|
+
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
end # class TestParserRfc4180
|
data/test/test_reader.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -64,6 +64,8 @@ files:
|
|
64
64
|
- test/data/shakespeare.csv
|
65
65
|
- test/helper.rb
|
66
66
|
- test/test_parser.rb
|
67
|
+
- test/test_parser_formats.rb
|
68
|
+
- test/test_parser_rfc4180.rb
|
67
69
|
- test/test_reader.rb
|
68
70
|
- test/test_reader_hash.rb
|
69
71
|
homepage: https://github.com/csv11/csvreader
|