csvreader 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +32 -4
- data/lib/csvreader.rb +1 -0
- data/lib/csvreader/buffer.rb +2 -13
- data/lib/csvreader/parser.rb +220 -107
- data/lib/csvreader/reader.rb +81 -140
- data/lib/csvreader/version.rb +1 -1
- data/test/test_parser.rb +38 -24
- data/test/test_parser_formats.rb +69 -0
- data/test/test_parser_rfc4180.rb +95 -0
- data/test/test_reader.rb +4 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ea1d667219773e3a355c81f815d91e92340d61a1
|
4
|
+
data.tar.gz: ba7a43ccb5e110fc1f6eca76ca2a74a62f1131fb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0543a4338d2d12e36da16acdad9abff28633e519baa1d92044d1ca8f5e3472d835d00a10d8b19c24561b06e0d724f87414495600f4c83eef7c9e033474b4c09e
|
7
|
+
data.tar.gz: 8df669bc86f2066b2650a67bda5698fae7b6d58766b9c318f47958b0499671d0a4d39e862b8d3af842105a2777a3cf7ad05168380c338a3053fd3d363697abfb
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -164,17 +164,15 @@ see [`TabReader` »](https://github.com/datatxt/tabreader).
|
|
164
164
|
|
165
165
|
Two major design bugs and many many minor.
|
166
166
|
|
167
|
-
(1) The CSV class uses `line.split(',')` with some kludges (†) with the claim
|
167
|
+
(1) The CSV class uses [`line.split(',')`](https://github.com/ruby/csv/blob/master/lib/csv.rb#L1248) with some kludges (†) with the claim it's faster.
|
168
168
|
What?! The right way: CSV needs its own purpose-built parser. There's no other
|
169
169
|
way you can handle all the (edge) cases with double quotes and escaped doubled up
|
170
170
|
double quotes. Period.
|
171
171
|
|
172
|
-
For example, the CSV class cannot handle leading or trailing spaces
|
172
|
+
For example, the CSV class cannot handle leading or trailing spaces
|
173
173
|
for double quoted values `1,•"2","3"•`.
|
174
174
|
Or handling double quotes inside values and so on and on.
|
175
175
|
|
176
|
-
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
177
|
-
|
178
176
|
(2) The CSV class returns `nil` for `,,` but an empty string (`""`)
|
179
177
|
for `"","",""`. The right way: All values are always strings. Period.
|
180
178
|
|
@@ -182,6 +180,36 @@ If you want to use `nil` you MUST configure a string (or strings)
|
|
182
180
|
such as `NA`, `n/a`, `\N`, or similar that map to `nil`.
|
183
181
|
|
184
182
|
|
183
|
+
(†): kludge - a workaround or quick-and-dirty solution that is clumsy, inelegant, inefficient, difficult to extend and hard to maintain
|
184
|
+
|
185
|
+
Appendix: Simple examples the standard csv library cannot read:
|
186
|
+
|
187
|
+
Quoted values with leading or trailing spaces e.g.
|
188
|
+
|
189
|
+
```
|
190
|
+
1, "2","3" , "4" ,5
|
191
|
+
```
|
192
|
+
|
193
|
+
=>
|
194
|
+
|
195
|
+
``` ruby
|
196
|
+
["1", "2", "3", "4" ,"5"]
|
197
|
+
```
|
198
|
+
|
199
|
+
"Auto-fix" unambiguous quotes in "unquoted" values e.g.
|
200
|
+
|
201
|
+
```
|
202
|
+
value with "quotes", another value
|
203
|
+
```
|
204
|
+
|
205
|
+
=>
|
206
|
+
|
207
|
+
``` ruby
|
208
|
+
["value with \"quotes\"", "another value"]
|
209
|
+
```
|
210
|
+
|
211
|
+
and some more.
|
212
|
+
|
185
213
|
|
186
214
|
|
187
215
|
|
data/lib/csvreader.rb
CHANGED
data/lib/csvreader/buffer.rb
CHANGED
@@ -18,22 +18,10 @@ class BufferIO ## todo: find a better name - why? why not? is really just for
|
|
18
18
|
end
|
19
19
|
end # method getc
|
20
20
|
|
21
|
-
|
22
|
-
def ungetc( c )
|
23
|
-
## add upfront as first char in buffer
|
24
|
-
## last in/first out queue!!!!
|
25
|
-
@buf.unshift( c )
|
26
|
-
## puts "ungetc - >#{c} (#{c.ord})< => >#{@buf}<"
|
27
|
-
end
|
28
|
-
|
29
|
-
|
30
21
|
def peek
|
31
|
-
## todo/fix:
|
32
|
-
## use Hexadecimal code: 1A, U+001A for eof char - why? why not?
|
33
22
|
if @buf.size == 0 && @io.eof?
|
34
23
|
puts "peek - hitting eof!!!"
|
35
|
-
## return
|
36
|
-
return "\0"
|
24
|
+
return "\0" ## return NUL char (0) for now
|
37
25
|
end
|
38
26
|
|
39
27
|
if @buf.size == 0
|
@@ -44,5 +32,6 @@ class BufferIO ## todo: find a better name - why? why not? is really just for
|
|
44
32
|
|
45
33
|
@buf.first
|
46
34
|
end # method peek
|
35
|
+
|
47
36
|
end # class BufferIO
|
48
37
|
end # class CsvReader
|
data/lib/csvreader/parser.rb
CHANGED
@@ -1,74 +1,92 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
class CsvReader
|
4
|
-
class Parser
|
5
|
-
|
6
4
|
|
7
|
-
## char constants
|
8
|
-
DOUBLE_QUOTE = "\""
|
9
|
-
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
10
|
-
SPACE = " "
|
11
|
-
TAB = "\t"
|
12
|
-
LF = "\n" ## 0A (hex) 10 (dec)
|
13
|
-
CR = "\r" ## 0D (hex) 13 (dec)
|
14
5
|
|
15
6
|
|
16
|
-
def self.parse( data )
|
17
|
-
puts "parse:"
|
18
|
-
pp data
|
19
7
|
|
20
|
-
parser = new
|
21
|
-
parser.parse( data )
|
22
|
-
end
|
23
8
|
|
24
|
-
|
25
|
-
puts "parse_line:"
|
9
|
+
class Parser
|
26
10
|
|
27
|
-
parser = new
|
28
|
-
records = parser.parse( data, limit: 1 )
|
29
11
|
|
30
|
-
|
31
|
-
|
32
|
-
|
12
|
+
## char constants
|
13
|
+
DOUBLE_QUOTE = "\""
|
14
|
+
BACKSLASH = "\\" ## use BACKSLASH_ESCAPE ??
|
15
|
+
COMMENT = "#" ## use COMMENT_HASH or HASH or ??
|
16
|
+
SPACE = " " ## \s == ASCII 32 (dec) = (Space)
|
17
|
+
TAB = "\t" ## \t == ASCII 0x09 (hex) = HT (Tab/horizontal tab)
|
18
|
+
LF = "\n" ## \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
|
19
|
+
CR = "\r" ## \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
|
20
|
+
|
21
|
+
|
22
|
+
###################################
|
23
|
+
## add simple logger with debug flag/switch
|
24
|
+
#
|
25
|
+
# use Parser.debug = true # to turn on
|
26
|
+
#
|
27
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
28
|
+
|
29
|
+
def self.logger() @@logger ||= Logger.new( STDOUT ); end
|
30
|
+
def logger() self.class.logger; end
|
31
|
+
|
32
|
+
|
33
|
+
|
34
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
35
|
+
|
36
|
+
def initialize( sep: ',',
|
37
|
+
quote: DOUBLE_QUOTE, ## note: set to nil for no quote
|
38
|
+
doublequote: true,
|
39
|
+
escape: BACKSLASH, ## note: set to nil for no escapes
|
40
|
+
trim: true, ## note: will toggle between human/default and strict mode parser!!!
|
41
|
+
na: ['\N', 'NA'], ## note: set to nil for no null vales / not availabe (na)
|
42
|
+
quoted_empty: '', ## note: only available in strict mode (e.g. trim=false)
|
43
|
+
unquoted_empty: '' ## note: only available in strict mode (e.g. trim=false)
|
44
|
+
)
|
45
|
+
@config = {} ## todo/fix: change config to proper dialect class/struct - why? why not?
|
46
|
+
@config[:sep] = sep
|
47
|
+
@config[:quote] = quote
|
48
|
+
@config[:doublequote] = doublequote
|
49
|
+
@config[:escape] = escape
|
50
|
+
@config[:trim] = trim
|
51
|
+
@config[:na] = na
|
52
|
+
@config[:quoted_empty] = quoted_empty
|
53
|
+
@config[:unquoted_empty] = unquoted_empty
|
33
54
|
end
|
34
55
|
|
35
56
|
|
36
57
|
|
37
|
-
def
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
end
|
42
|
-
end
|
58
|
+
def strict?
|
59
|
+
## note: use trim for separating two different parsers / code paths:
|
60
|
+
## - human with trim leading and trailing whitespace and
|
61
|
+
## - strict with no leading and trailing whitespaces allowed
|
43
62
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
parser.foreach( file, &block )
|
48
|
-
end
|
63
|
+
## for now use - trim == false for strict version flag alias
|
64
|
+
## todo/fix: add strict flag - why? why not?
|
65
|
+
@config[:trim] ? false : true
|
49
66
|
end
|
50
67
|
|
51
|
-
def self.parse_lines( data, &block )
|
52
|
-
parser = new
|
53
|
-
parser.parse_lines( data, &block )
|
54
|
-
end
|
55
68
|
|
69
|
+
DEFAULT = new( sep: ',', trim: true )
|
70
|
+
RFC4180 = new( sep: ',', trim: false )
|
71
|
+
EXCEL = new( sep: ',', trim: false )
|
56
72
|
|
73
|
+
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
74
|
+
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
75
|
+
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
57
76
|
|
58
77
|
|
59
78
|
|
60
|
-
|
79
|
+
|
80
|
+
def parse_field( io, sep: )
|
81
|
+
logger.debug "parse field - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
82
|
+
|
61
83
|
value = ""
|
62
|
-
|
84
|
+
skip_spaces( io ) ## strip leading spaces
|
63
85
|
|
64
86
|
if (c=io.peek; c=="," || c==LF || c==CR || io.eof?) ## empty field
|
65
|
-
value = value.strip if trim ## strip all spaces
|
66
87
|
## return value; do nothing
|
67
88
|
elsif io.peek == DOUBLE_QUOTE
|
68
|
-
|
69
|
-
value = value.strip ## note always strip/trim leading spaces in quoted value
|
70
|
-
|
71
|
-
puts "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})"
|
89
|
+
logger.debug "start double_quote field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
72
90
|
io.getc ## eat-up double_quote
|
73
91
|
|
74
92
|
loop do
|
@@ -89,18 +107,18 @@ def parse_field( io, trim: true )
|
|
89
107
|
|
90
108
|
## note: always eat-up all trailing spaces (" ") and tabs (\t)
|
91
109
|
skip_spaces( io )
|
92
|
-
|
110
|
+
logger.debug "end double_quote field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
93
111
|
else
|
94
|
-
|
112
|
+
logger.debug "start reg field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
95
113
|
## consume simple value
|
96
114
|
## until we hit "," or "\n" or "\r"
|
97
115
|
## note: will eat-up quotes too!!!
|
98
116
|
while (c=io.peek; !(c=="," || c==LF || c==CR || io.eof?))
|
99
|
-
|
117
|
+
logger.debug " add char >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
100
118
|
value << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
101
119
|
end
|
102
|
-
value = value.strip
|
103
|
-
|
120
|
+
value = value.strip ## strip all trailing spaces
|
121
|
+
logger.debug "end reg field - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
104
122
|
end
|
105
123
|
|
106
124
|
value
|
@@ -108,12 +126,60 @@ end
|
|
108
126
|
|
109
127
|
|
110
128
|
|
111
|
-
|
129
|
+
|
130
|
+
def parse_field_strict( io, sep: )
|
131
|
+
logger.debug "parse field (strict) - sep: >#{sep}< (#{sep.ord})" if logger.debug?
|
132
|
+
|
133
|
+
value = ""
|
134
|
+
|
135
|
+
if (c=io.peek; c==sep || c==LF || c==CR || io.eof?) ## empty unquoted field
|
136
|
+
value = config[:unquoted_empty] ## defaults to "" (might be set to nil if needed)
|
137
|
+
## return value; do nothing
|
138
|
+
elsif config[:quote] && io.peek == config[:quote]
|
139
|
+
logger.debug "start quote field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
140
|
+
io.getc ## eat-up double_quote
|
141
|
+
|
142
|
+
loop do
|
143
|
+
while (c=io.peek; !(c==config[:quote] || io.eof?))
|
144
|
+
value << io.getc ## eat-up everything unit quote (")
|
145
|
+
end
|
146
|
+
|
147
|
+
break if io.eof?
|
148
|
+
|
149
|
+
io.getc ## eat-up double_quote
|
150
|
+
|
151
|
+
if config[:doublequote] && io.peek == config[:quote] ## doubled up quote?
|
152
|
+
value << io.getc ## add doube quote and continue!!!!
|
153
|
+
else
|
154
|
+
break
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
value = config[:quoted_empty] if value == "" ## defaults to "" (might be set to nil if needed)
|
159
|
+
|
160
|
+
logger.debug "end double_quote field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
161
|
+
else
|
162
|
+
logger.debug "start reg field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
163
|
+
## consume simple value
|
164
|
+
## until we hit "," or "\n" or "\r" or stroy "\"" double quote
|
165
|
+
while (c=io.peek; !(c==sep || c==LF || c==CR || c==config[:quote] || io.eof?))
|
166
|
+
logger.debug " add char >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
167
|
+
value << io.getc
|
168
|
+
end
|
169
|
+
logger.debug "end reg field (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
170
|
+
end
|
171
|
+
|
172
|
+
value
|
173
|
+
end
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
def parse_record( io, sep: )
|
112
178
|
values = []
|
113
179
|
|
114
180
|
loop do
|
115
|
-
value = parse_field( io,
|
116
|
-
|
181
|
+
value = parse_field( io, sep: sep )
|
182
|
+
logger.debug "value: »#{value}«" if logger.debug?
|
117
183
|
values << value
|
118
184
|
|
119
185
|
if io.eof?
|
@@ -133,6 +199,33 @@ def parse_record( io, trim: true )
|
|
133
199
|
end
|
134
200
|
|
135
201
|
|
202
|
+
|
203
|
+
def parse_record_strict( io, sep: )
|
204
|
+
values = []
|
205
|
+
|
206
|
+
loop do
|
207
|
+
value = parse_field_strict( io, sep: sep )
|
208
|
+
logger.debug "value: »#{value}«" if logger.debug?
|
209
|
+
values << value
|
210
|
+
|
211
|
+
if io.eof?
|
212
|
+
break
|
213
|
+
elsif (c=io.peek; c==LF || c==CR)
|
214
|
+
skip_newline( io ) ## note: singular / single newline only (NOT plural)
|
215
|
+
break
|
216
|
+
elsif io.peek == sep
|
217
|
+
io.getc ## eat-up FS (,)
|
218
|
+
else
|
219
|
+
puts "*** csv parse error (strict): found >#{io.peek} (#{io.peek.ord})< - FS (,) or RS (\\n) expected!!!!"
|
220
|
+
exit(1)
|
221
|
+
end
|
222
|
+
end
|
223
|
+
|
224
|
+
values
|
225
|
+
end
|
226
|
+
|
227
|
+
|
228
|
+
|
136
229
|
def skip_newlines( io )
|
137
230
|
return if io.eof?
|
138
231
|
|
@@ -142,6 +235,22 @@ def skip_newlines( io )
|
|
142
235
|
end
|
143
236
|
|
144
237
|
|
238
|
+
def skip_newline( io ) ## note: singular (strict) version
|
239
|
+
return if io.eof?
|
240
|
+
|
241
|
+
## only skip CR LF or LF or CR
|
242
|
+
if io.peek == CR
|
243
|
+
io.getc ## eat-up
|
244
|
+
io.getc if io.peek == LF
|
245
|
+
elsif io.peek == LF
|
246
|
+
io.getc ## eat-up
|
247
|
+
else
|
248
|
+
# do nothing
|
249
|
+
end
|
250
|
+
end
|
251
|
+
|
252
|
+
|
253
|
+
|
145
254
|
def skip_until_eol( io )
|
146
255
|
return if io.eof?
|
147
256
|
|
@@ -161,91 +270,95 @@ end
|
|
161
270
|
|
162
271
|
|
163
272
|
|
164
|
-
def parse_spaces( io ) ## helper method
|
165
|
-
spaces = ""
|
166
|
-
## add leading spaces
|
167
|
-
while (c=io.peek; c==SPACE || c==TAB)
|
168
|
-
spaces << io.getc ## eat-up all spaces (" ") and tabs (\t)
|
169
|
-
end
|
170
|
-
spaces
|
171
|
-
end
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
def parse_lines( io_maybe, trim: true,
|
177
|
-
comments: true,
|
178
|
-
blanks: true, &block )
|
179
273
|
|
180
|
-
## find a better name for io_maybe
|
181
|
-
## make sure io is a wrapped into BufferIO!!!!!!
|
182
|
-
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
183
|
-
io = io_maybe
|
184
|
-
else
|
185
|
-
io = BufferIO.new( io_maybe )
|
186
|
-
end
|
187
274
|
|
275
|
+
def parse_lines_human( io, sep:, &block )
|
188
276
|
|
189
277
|
loop do
|
190
278
|
break if io.eof?
|
191
279
|
|
192
|
-
|
193
|
-
## check for comments or blank lines
|
194
|
-
if comments || blanks
|
195
|
-
spaces = parse_spaces( io )
|
196
|
-
end
|
280
|
+
skip_spaces( io )
|
197
281
|
|
198
|
-
if
|
199
|
-
|
282
|
+
if io.peek == COMMENT ## comment line
|
283
|
+
logger.debug "skipping comment - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
200
284
|
skip_until_eol( io )
|
201
285
|
skip_newlines( io )
|
202
|
-
elsif
|
203
|
-
|
286
|
+
elsif (c=io.peek; c==LF || c==CR || io.eof?)
|
287
|
+
logger.debug "skipping blank - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
204
288
|
skip_newlines( io )
|
205
|
-
else
|
206
|
-
|
207
|
-
|
208
|
-
if comments || blanks
|
209
|
-
## note: MUST ungetc in "reverse" order
|
210
|
-
## ## buffer is last in/first out queue!!!!
|
211
|
-
spaces.reverse.each_char { |space| io.ungetc( space ) }
|
212
|
-
end
|
213
|
-
|
214
|
-
record = parse_record( io, trim: trim )
|
289
|
+
else
|
290
|
+
logger.debug "start record - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
215
291
|
|
292
|
+
record = parse_record( io, sep: sep )
|
216
293
|
## note: requires block - enforce? how? why? why not?
|
217
294
|
block.call( record ) ## yield( record )
|
218
295
|
end
|
219
296
|
end # loop
|
220
|
-
end # method
|
297
|
+
end # method parse_lines_human
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
def parse_lines_strict( io, sep:, &block )
|
302
|
+
|
303
|
+
## no leading and trailing whitespaces trimmed/stripped
|
304
|
+
## no comments skipped
|
305
|
+
## no blanks skipped
|
306
|
+
## - follows strict rules of
|
307
|
+
## note: this csv format is NOT recommended;
|
308
|
+
## please, use a format with comments, leading and trailing whitespaces, etc.
|
309
|
+
## only added for checking compatibility
|
310
|
+
|
311
|
+
loop do
|
312
|
+
break if io.eof?
|
313
|
+
|
314
|
+
logger.debug "start record (strict) - peek >#{io.peek}< (#{io.peek.ord})" if logger.debug?
|
315
|
+
|
316
|
+
record = parse_record_strict( io, sep: sep )
|
317
|
+
|
318
|
+
## note: requires block - enforce? how? why? why not?
|
319
|
+
block.call( record ) ## yield( record )
|
320
|
+
end # loop
|
321
|
+
end # method parse_lines_strict
|
322
|
+
|
221
323
|
|
222
324
|
|
325
|
+
def parse_lines( io_maybe, sep: config[:sep], &block )
|
326
|
+
## find a better name for io_maybe
|
327
|
+
## make sure io is a wrapped into BufferIO!!!!!!
|
328
|
+
if io_maybe.is_a?( BufferIO ) ### allow (re)use of BufferIO if managed from "outside"
|
329
|
+
io = io_maybe
|
330
|
+
else
|
331
|
+
io = BufferIO.new( io_maybe )
|
332
|
+
end
|
223
333
|
|
334
|
+
if strict?
|
335
|
+
parse_lines_strict( io, sep: sep, &block )
|
336
|
+
else
|
337
|
+
parse_lines_human( io, sep: sep, &block )
|
338
|
+
end
|
339
|
+
end ## parse_lines
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
## fix: add optional block - lets you use it like foreach!!!
|
344
|
+
## make foreach an alias of parse with block - why? why not?
|
345
|
+
##
|
346
|
+
## unifiy with (make one) parse and parse_lines!!!! - why? why not?
|
224
347
|
|
225
|
-
def parse( io_maybe,
|
226
|
-
comments: true,
|
227
|
-
blanks: true,
|
228
|
-
limit: nil )
|
348
|
+
def parse( io_maybe, sep: config[:sep], limit: nil )
|
229
349
|
records = []
|
230
350
|
|
231
|
-
parse_lines( io_maybe,
|
351
|
+
parse_lines( io_maybe, sep: sep ) do |record|
|
232
352
|
records << record
|
233
353
|
|
234
354
|
## set limit to 1 for processing "single" line (that is, get one record)
|
235
|
-
|
355
|
+
break if limit && limit >= records.size
|
236
356
|
end
|
237
357
|
|
238
358
|
records
|
239
359
|
end ## method parse
|
240
360
|
|
241
361
|
|
242
|
-
def foreach( io_maybe, trim: true,
|
243
|
-
comments: true,
|
244
|
-
blanks: true, &block )
|
245
|
-
parse_lines( io_maybe, trim: trim, comments: comments, blanks: blanks, &block )
|
246
|
-
end
|
247
|
-
|
248
|
-
|
249
362
|
|
250
363
|
end # class Parser
|
251
364
|
end # class CsvReader
|
data/lib/csvreader/reader.rb
CHANGED
@@ -1,150 +1,98 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
3
|
|
4
|
-
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
4
|
|
5
|
+
class CsvReader
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
# :tab -> uses TabReader(!)
|
11
|
-
# :strict|:rfc4180
|
12
|
-
# :unix -> uses unix-style escapes e.g. \n \" etc.
|
13
|
-
# :windows|:excel
|
14
|
-
# :guess|:auto -> guess (auto-detect) separator - why? why not?
|
15
|
-
|
16
|
-
## e.g. use Dialect.registry[:unix] = { ... } etc.
|
17
|
-
## note use @@ - there is only one registry
|
18
|
-
def self.registry() @@registry ||={} end
|
19
|
-
|
20
|
-
## add built-in dialects:
|
21
|
-
## trim - use strip? why? why not? use alias?
|
22
|
-
registry[:tab] = {} ##{ class: TabReader }
|
23
|
-
registry[:strict] = { strict: true, trim: false } ## add no comments, blank lines, etc. ???
|
24
|
-
registry[:rfc4180] = :strict ## alternative name
|
25
|
-
registry[:windows] = {}
|
26
|
-
registry[:excel] = :windows
|
27
|
-
registry[:unix] = {}
|
28
|
-
|
29
|
-
## todo: add some more
|
30
|
-
end # class Dialect
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
class Configuration
|
35
|
-
|
7
|
+
def initialize( parser )
|
8
|
+
@parser = parser
|
9
|
+
end
|
36
10
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
attr_accessor :blanks
|
41
|
-
attr_accessor :comments
|
42
|
-
attr_accessor :dialect
|
11
|
+
DEFAULT = new( Parser::DEFAULT )
|
12
|
+
RFC4180 = new( Parser::RFC4180 )
|
13
|
+
EXCEL = new( Parser::EXCEL )
|
43
14
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
@comments = true
|
48
|
-
@trim = true
|
49
|
-
## note: do NOT add headers as global - should ALWAYS be explicit
|
50
|
-
## headers (true/false) - changes resultset and requires different processing!!!
|
15
|
+
def self.default() DEFAULT; end ## alternative alias for DEFAULT
|
16
|
+
def self.rfc4180() RFC4180; end ## alternative alias for RFC4180
|
17
|
+
def self.excel() EXCEL; end ## alternative alias for EXCEL
|
51
18
|
|
52
|
-
self ## return self for chaining
|
53
|
-
end
|
54
19
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
def comments?() @comments; end
|
64
|
-
|
65
|
-
|
66
|
-
## built-in (default) options
|
67
|
-
## todo: find a better name?
|
68
|
-
def default_options
|
69
|
-
## note:
|
70
|
-
## do NOT include sep character and
|
71
|
-
## do NOT include headers true/false here
|
72
|
-
##
|
73
|
-
## make default sep its own "global" default config
|
74
|
-
## e.g. Csv.config.sep =
|
75
|
-
|
76
|
-
## common options
|
77
|
-
## skip comments starting with #
|
78
|
-
## skip blank lines
|
79
|
-
## strip leading and trailing spaces
|
80
|
-
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
81
|
-
defaults = {
|
82
|
-
blanks: @blanks, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
83
|
-
comments: @comments,
|
84
|
-
trim: @trim
|
85
|
-
## :converters => :strip
|
86
|
-
}
|
87
|
-
defaults
|
88
|
-
end
|
89
|
-
end # class Configuration
|
20
|
+
#####################
|
21
|
+
## convenience helpers defaulting to default csv dialect/format reader
|
22
|
+
##
|
23
|
+
## CsvReader.parse_line is the same as
|
24
|
+
## CsvReader::DEFAULT.parse_line or CsvReader.default.parse_line
|
25
|
+
##
|
90
26
|
|
27
|
+
def self.parse_line( data, sep: nil,
|
28
|
+
converters: nil )
|
29
|
+
DEFAULT.parse_line( data, sep: sep, converters: converters )
|
30
|
+
end
|
91
31
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
32
|
+
def self.parse( data, sep: nil,
|
33
|
+
converters: nil )
|
34
|
+
DEFAULT.parse( data, sep: sep, converters: converters )
|
35
|
+
end
|
96
36
|
|
97
|
-
|
98
|
-
|
37
|
+
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
38
|
+
def self.parse_lines( data, sep: nil,
|
39
|
+
converters: nil, &block )
|
40
|
+
DEFAULT.parse_lines( data, sep: sep, converters: nil, &block )
|
99
41
|
end
|
100
42
|
|
101
|
-
def self.
|
102
|
-
|
43
|
+
def self.read( path, sep: nil,
|
44
|
+
converters: nil )
|
45
|
+
DEFAULT.read( path, sep: sep, converters: converters )
|
103
46
|
end
|
104
|
-
end # module Csvv
|
105
47
|
|
48
|
+
def self.header( path, sep: nil )
|
49
|
+
DEFAULT.header( path, sep: sep )
|
50
|
+
end
|
106
51
|
|
52
|
+
def self.foreach( path, sep: nil,
|
53
|
+
converters: nil, &block )
|
54
|
+
DEFAULT.foreach( path, sep: sep, converters: converters, &block )
|
55
|
+
end
|
107
56
|
|
108
|
-
####
|
109
|
-
## use our own wrapper
|
110
57
|
|
111
|
-
class CsvReader
|
112
58
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
119
|
-
csv_options = Csv.config.default_options.merge(
|
120
|
-
col_sep: sep
|
121
|
-
)
|
122
|
-
## pp csv_options
|
123
|
-
Parser.parse_line( txt ) ##, csv_options )
|
124
|
-
end
|
59
|
+
#############################
|
60
|
+
## all "high-level" reader methods
|
61
|
+
##
|
62
|
+
## note: allow "overriding" of separator
|
63
|
+
## if sep is not nil otherwise use default dialect/format separator
|
125
64
|
|
126
65
|
|
127
66
|
##
|
128
67
|
## todo/fix: "unify" parse and parse_lines !!!
|
129
68
|
## check for block_given? - why? why not?
|
130
69
|
|
131
|
-
def
|
132
|
-
|
133
|
-
|
134
|
-
)
|
135
|
-
|
136
|
-
|
70
|
+
def parse( data, sep: nil, limit: nil,
|
71
|
+
converters: nil )
|
72
|
+
sep = @parser.config[:sep] if sep.nil?
|
73
|
+
@parser.parse( data, sep: sep, limit: limit )
|
74
|
+
end
|
75
|
+
|
76
|
+
#### fix!!! remove - replace with parse with (optional) block!!!!!
|
77
|
+
def parse_lines( data, sep: nil,
|
78
|
+
converters: nil, &block )
|
79
|
+
sep = @parser.config[:sep] if sep.nil?
|
80
|
+
@parser.parse_lines( data, sep: sep, &block )
|
137
81
|
end
|
138
82
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
83
|
+
|
84
|
+
|
85
|
+
def parse_line( data, sep: nil,
|
86
|
+
converters: nil )
|
87
|
+
records = parse( data, sep: sep, limit: 1 )
|
88
|
+
|
89
|
+
## unwrap record if empty return nil - why? why not?
|
90
|
+
## return empty record e.g. [] - why? why not?
|
91
|
+
records.size == 0 ? nil : records.first
|
145
92
|
end
|
146
93
|
|
147
|
-
def
|
94
|
+
def read( path, sep: nil,
|
95
|
+
converters: nil )
|
148
96
|
## note: use our own file.open
|
149
97
|
## always use utf-8 for now
|
150
98
|
## check/todo: add skip option bom too - why? why not?
|
@@ -152,33 +100,26 @@ class CsvReader
|
|
152
100
|
parse( txt, sep: sep )
|
153
101
|
end
|
154
102
|
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
Parser.foreach( path, &block ) ###, csv_options )
|
103
|
+
def foreach( path, sep: nil,
|
104
|
+
converters: nil, &block )
|
105
|
+
File.open( path, 'r:bom|utf-8' ) do |file|
|
106
|
+
parse_lines( file, sep: sep, &block )
|
107
|
+
end
|
162
108
|
end
|
163
109
|
|
164
110
|
|
165
|
-
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
166
|
-
# read first lines (only)
|
167
|
-
# and parse with csv to get header from csv library itself
|
168
|
-
#
|
169
|
-
# check - if there's an easier or built-in way for the csv library
|
170
111
|
|
171
|
-
|
172
|
-
|
173
|
-
|
112
|
+
def header( path, sep: nil ) ## use header or headers - or use both (with alias)?
|
113
|
+
# read first lines (only)
|
114
|
+
# and parse with csv to get header from csv library itself
|
174
115
|
|
175
116
|
record = nil
|
176
117
|
File.open( path, 'r:bom|utf-8' ) do |file|
|
177
|
-
record =
|
118
|
+
record = parse_line( file, sep: sep )
|
178
119
|
end
|
179
120
|
|
180
|
-
record ## todo/fix:
|
181
|
-
|
121
|
+
record ## todo/fix: returns nil for empty - why? why not?
|
122
|
+
end # method self.header
|
182
123
|
|
183
124
|
end # class CsvReader
|
184
125
|
|
@@ -188,13 +129,13 @@ end # class CsvReader
|
|
188
129
|
class CsvHashReader
|
189
130
|
|
190
131
|
|
191
|
-
def self.parse(
|
132
|
+
def self.parse( data, sep: nil, headers: nil )
|
192
133
|
|
193
134
|
## pass in headers as array e.g. ['A', 'B', 'C']
|
194
135
|
names = headers ? headers : nil
|
195
136
|
|
196
137
|
records = []
|
197
|
-
CsvReader.parse_lines(
|
138
|
+
CsvReader.parse_lines( data ) do |values| # sep: sep
|
198
139
|
if names.nil?
|
199
140
|
names = values ## store header row / a.k.a. field/column names
|
200
141
|
else
|
@@ -206,13 +147,13 @@ def self.parse( txt, sep: Csv.config.sep, headers: nil )
|
|
206
147
|
end
|
207
148
|
|
208
149
|
|
209
|
-
def self.read( path, sep:
|
150
|
+
def self.read( path, sep: nil, headers: nil )
|
210
151
|
txt = File.open( path, 'r:bom|utf-8' ).read
|
211
152
|
parse( txt, sep: sep, headers: headers )
|
212
153
|
end
|
213
154
|
|
214
155
|
|
215
|
-
def self.foreach( path, sep:
|
156
|
+
def self.foreach( path, sep: nil, headers: nil, &block )
|
216
157
|
|
217
158
|
## pass in headers as array e.g. ['A', 'B', 'C']
|
218
159
|
names = headers ? headers : nil
|
@@ -228,7 +169,7 @@ def self.foreach( path, sep: Csv.config.sep, headers: nil, &block )
|
|
228
169
|
end
|
229
170
|
|
230
171
|
|
231
|
-
def self.header( path, sep:
|
172
|
+
def self.header( path, sep: nil ) ## add header too? why? why not?
|
232
173
|
## same as "classic" header method - delegate/reuse :-)
|
233
174
|
CsvReader.header( path, sep: sep )
|
234
175
|
end
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_parser.rb
CHANGED
@@ -9,24 +9,38 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestParser < MiniTest::Test
|
11
11
|
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
parser = CsvReader::Parser::DEFAULT
|
18
|
+
end
|
19
|
+
|
12
20
|
|
13
|
-
def
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
21
|
+
def test_parser_default
|
22
|
+
pp CsvReader::Parser::DEFAULT
|
23
|
+
pp CsvReader::Parser.default
|
24
|
+
assert true
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
records = [["a", "b", "c"],
|
29
|
+
["1", "2", "3"],
|
30
|
+
["4", "5", "6"]]
|
31
|
+
|
32
|
+
## don't care about newlines (\r\n)
|
33
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
34
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
35
|
+
assert_equal records, parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
36
|
+
assert_equal records, parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
37
|
+
|
38
|
+
## or leading and trailing spaces
|
39
|
+
assert_equal records, parser.parse( " \n a , b , c \n 1,2 ,3 \n 4,5,6 " )
|
40
|
+
assert_equal records, parser.parse( "\n\na, b,c \n 1, 2, 3\n 4, 5, 6" )
|
41
|
+
assert_equal records, parser.parse( " \"a\" , b , \"c\" \n1, 2,\"3\" \n4,5, \"6\"" )
|
42
|
+
assert_equal records, parser.parse( "a, b, c\n1, 2,3\n\n\n4,5,6\n\n\n" )
|
43
|
+
assert_equal records, parser.parse( " a, b ,c \n 1 , 2 , 3 \n4,5,6 " )
|
30
44
|
end
|
31
45
|
|
32
46
|
|
@@ -34,19 +48,19 @@ def test_parse_quotes
|
|
34
48
|
records = [["a", "b", "c"],
|
35
49
|
["11 \n 11", "\"2\"", "3"]]
|
36
50
|
|
37
|
-
assert_equal records,
|
38
|
-
assert_equal records,
|
51
|
+
assert_equal records, parser.parse( " a, b ,c \n\"11 \n 11\", \"\"\"2\"\"\" , 3 \n" )
|
52
|
+
assert_equal records, parser.parse( "\n\n \"a\", \"b\" ,\"c\" \n \"11 \n 11\" , \"\"\"2\"\"\" , 3 \n" )
|
39
53
|
end
|
40
54
|
|
41
55
|
def test_parse_empties
|
42
56
|
records = [["", "", ""]]
|
43
57
|
|
44
|
-
assert_equal records,
|
45
|
-
assert_equal records,
|
58
|
+
assert_equal records, parser.parse( ",," )
|
59
|
+
assert_equal records, parser.parse( <<TXT )
|
46
60
|
"","",""
|
47
61
|
TXT
|
48
62
|
|
49
|
-
assert_equal [],
|
63
|
+
assert_equal [], parser.parse( "" )
|
50
64
|
end
|
51
65
|
|
52
66
|
|
@@ -54,7 +68,7 @@ def test_parse_comments
|
|
54
68
|
records = [["a", "b", "c"],
|
55
69
|
["1", "2", "3"]]
|
56
70
|
|
57
|
-
assert_equal records,
|
71
|
+
assert_equal records, parser.parse( <<TXT )
|
58
72
|
# comment
|
59
73
|
# comment
|
60
74
|
## comment
|
@@ -64,7 +78,7 @@ a, b, c
|
|
64
78
|
|
65
79
|
TXT
|
66
80
|
|
67
|
-
assert_equal records,
|
81
|
+
assert_equal records, parser.parse( <<TXT )
|
68
82
|
a, b, c
|
69
83
|
1, 2, 3
|
70
84
|
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_formats.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserFormats < MiniTest::Test
|
11
|
+
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
CsvReader::Parser
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_parse_whitespace
|
22
|
+
records = [["a", "b", "c"],
|
23
|
+
["1", "2", "3"]]
|
24
|
+
|
25
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
26
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
|
27
|
+
assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
|
28
|
+
assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
|
29
|
+
assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
|
30
|
+
|
31
|
+
assert_equal [["a", "b", "c"],
|
32
|
+
[""],
|
33
|
+
["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
|
34
|
+
assert_equal [["", ""],
|
35
|
+
[""],
|
36
|
+
["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
|
37
|
+
|
38
|
+
|
39
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
40
|
+
assert_equal records, parser.rfc4180.parse( "a,b,c\n1,2,3" )
|
41
|
+
assert_equal [["a", "b", "c"],
|
42
|
+
[""],
|
43
|
+
["1", "2", "3"]], parser.rfc4180.parse( "a,b,c\n\n1,2,3" )
|
44
|
+
assert_equal [[" a", " b ", "c "],
|
45
|
+
[""],
|
46
|
+
["1", "2", "3"]], parser.rfc4180.parse( " a, b ,c \n\n1,2,3" )
|
47
|
+
assert_equal [[" a", " b ", "c "],
|
48
|
+
[" "],
|
49
|
+
["",""],
|
50
|
+
["1", "2", "3"]], parser.rfc4180.parse( " a, b ,c \n \n,\n1,2,3" )
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def test_parse_empties
|
55
|
+
assert_equal [], parser.default.parse( "\n \n \n" )
|
56
|
+
|
57
|
+
## strict rfc4180 - no trim leading or trailing spaces or blank lines
|
58
|
+
assert_equal [[""],
|
59
|
+
[" "],
|
60
|
+
[" "]], parser.rfc4180.parse( "\n \n \n" )
|
61
|
+
assert_equal [[""],
|
62
|
+
[" "],
|
63
|
+
[" "]], parser.rfc4180.parse( "\n \n " )
|
64
|
+
|
65
|
+
assert_equal [[""]], parser.rfc4180.parse( "\n" )
|
66
|
+
assert_equal [], parser.rfc4180.parse( "" )
|
67
|
+
end
|
68
|
+
|
69
|
+
end # class TestParserFormats
|
@@ -0,0 +1,95 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_rfc4180.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserRfc4180 < MiniTest::Test
|
11
|
+
|
12
|
+
def setup
|
13
|
+
CsvReader::Parser.logger.level = :debug ## turn on "global" logging - move to helper - why? why not?
|
14
|
+
end
|
15
|
+
|
16
|
+
def parser
|
17
|
+
CsvReader::Parser::RFC4180
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def test_parser_rfc4180
|
22
|
+
pp CsvReader::Parser::RFC4180
|
23
|
+
pp CsvReader::Parser.rfc4180
|
24
|
+
assert true
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_parse
|
28
|
+
records = [["a", "b", "c"],
|
29
|
+
["1", "2", "3"],
|
30
|
+
["4", "5", "6"]]
|
31
|
+
|
32
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
33
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6" )
|
34
|
+
assert_equal records, parser.parse( "a,b,c\n1,2,3\n4,5,6\n" )
|
35
|
+
assert_equal records, parser.parse( "a,b,c\r1,2,3\r4,5,6" )
|
36
|
+
assert_equal records, parser.parse( "a,b,c\r\n1,2,3\r\n4,5,6\r\n" )
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_parse_semicolon
|
40
|
+
records = [["a", "b", "c"],
|
41
|
+
["1", "2", "3"],
|
42
|
+
["4", "5", "6"]]
|
43
|
+
|
44
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
45
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6", sep: ';' )
|
46
|
+
assert_equal records, parser.parse( "a;b;c\n1;2;3\n4;5;6\n", sep: ';' )
|
47
|
+
assert_equal records, parser.parse( "a;b;c\r1;2;3\r4;5;6", sep: ';' )
|
48
|
+
assert_equal records, parser.parse( "a;b;c\r\n1;2;3\r\n4;5;6\r\n", sep: ';' )
|
49
|
+
end
|
50
|
+
|
51
|
+
def test_parse_tab
|
52
|
+
records = [["a", "b", "c"],
|
53
|
+
["1", "2", "3"],
|
54
|
+
["4", "5", "6"]]
|
55
|
+
|
56
|
+
## don't care about newlines (\r\n) ??? - fix? why? why not?
|
57
|
+
assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6", sep: "\t" )
|
58
|
+
assert_equal records, parser.parse( "a\tb\tc\n1\t2\t3\n4\t5\t6\n", sep: "\t" )
|
59
|
+
assert_equal records, parser.parse( "a\tb\tc\r1\t2\t3\r4\t5\t6", sep: "\t" )
|
60
|
+
assert_equal records, parser.parse( "a\tb\tc\r\n1\t2\t3\r\n4\t5\t6\r\n", sep: "\t" )
|
61
|
+
end
|
62
|
+
|
63
|
+
|
64
|
+
|
65
|
+
def test_parse_empties
|
66
|
+
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
67
|
+
|
68
|
+
parser.config[:quoted_empty] = nil
|
69
|
+
|
70
|
+
assert_nil parser.config[:quoted_empty]
|
71
|
+
assert_equal "", parser.config[:unquoted_empty]
|
72
|
+
|
73
|
+
assert_equal [[nil,nil,nil," "],["","",""," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
74
|
+
|
75
|
+
|
76
|
+
parser.config[:unquoted_empty] = nil
|
77
|
+
|
78
|
+
assert_nil parser.config[:quoted_empty]
|
79
|
+
assert_nil parser.config[:unquoted_empty]
|
80
|
+
|
81
|
+
assert_equal [[nil,nil,nil," "],[nil,nil,nil," "]], parser.parse( %Q{"","",""," "\n,,, } )
|
82
|
+
|
83
|
+
|
84
|
+
## reset to defaults
|
85
|
+
parser.config[:quoted_empty] = ""
|
86
|
+
parser.config[:unquoted_empty] = ""
|
87
|
+
|
88
|
+
assert_equal "", parser.config[:quoted_empty]
|
89
|
+
assert_equal "", parser.config[:unquoted_empty]
|
90
|
+
|
91
|
+
assert_equal [["","",""],["","",""]], parser.parse( %Q{"","",""\n,,} )
|
92
|
+
end
|
93
|
+
|
94
|
+
|
95
|
+
end # class TestParserRfc4180
|
data/test/test_reader.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -64,6 +64,8 @@ files:
|
|
64
64
|
- test/data/shakespeare.csv
|
65
65
|
- test/helper.rb
|
66
66
|
- test/test_parser.rb
|
67
|
+
- test/test_parser_formats.rb
|
68
|
+
- test/test_parser_rfc4180.rb
|
67
69
|
- test/test_reader.rb
|
68
70
|
- test/test_reader_hash.rb
|
69
71
|
homepage: https://github.com/csv11/csvreader
|