csvreader 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/lib/csvreader/base.rb +2 -0
- data/lib/csvreader/buffer.rb +31 -6
- data/lib/csvreader/parser_std.rb +78 -3
- data/lib/csvreader/version.rb +1 -1
- data/test/test_buffer.rb +39 -0
- data/test/test_parser_meta.rb +71 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46909e44ebe97a9bbc19c95f1979821d001aac93
|
4
|
+
data.tar.gz: 4820888344741534cfc391d1b610a71a4c73f922
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cb35f119810de48868c758bd60d53e621d093e7dbc0c7b1459818f187b4ce5785e04d232432f32e54bb683c0585c97df56eb0a3352ae07aa8bfe990bdf2f6f8
|
7
|
+
data.tar.gz: 507245efca585926a7e21675ebf0fa997c5be792cb00d47647ee082a68482b1d96d3b9aec6b8ddf4b1278145604931b399a0dc82b956e939c52911680c9ac10c
|
data/Manifest.txt
CHANGED
@@ -23,10 +23,12 @@ test/data/cities11.csv
|
|
23
23
|
test/data/customers11.csv
|
24
24
|
test/data/shakespeare.csv
|
25
25
|
test/helper.rb
|
26
|
+
test/test_buffer.rb
|
26
27
|
test/test_converter.rb
|
27
28
|
test/test_parser.rb
|
28
29
|
test/test_parser_formats.rb
|
29
30
|
test/test_parser_java.rb
|
31
|
+
test/test_parser_meta.rb
|
30
32
|
test/test_parser_null.rb
|
31
33
|
test/test_parser_numeric.rb
|
32
34
|
test/test_parser_strict.rb
|
data/lib/csvreader/base.rb
CHANGED
data/lib/csvreader/buffer.rb
CHANGED
@@ -23,20 +23,45 @@ class Buffer ## todo: find a better name:
|
|
23
23
|
end
|
24
24
|
end # method getc
|
25
25
|
|
26
|
-
|
26
|
+
|
27
|
+
def peekn( lookahead )
|
28
|
+
## todo/check: use a new method peekstr or match or something
|
29
|
+
## for more than
|
30
|
+
if @buf.size == 0 && @io.eof?
|
31
|
+
## puts "peek - hitting eof!!!"
|
32
|
+
return "\0" ## return NUL char (0) for now
|
33
|
+
end
|
34
|
+
|
35
|
+
while @buf.size < lookahead do
|
36
|
+
## todo/check: add/append NUL char (0) - why? why not?
|
37
|
+
break if @io.eof? ## nothing more to read; break out of filling up buffer
|
38
|
+
|
39
|
+
c = @io.getc
|
40
|
+
@buf.push( c )
|
41
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
42
|
+
end
|
43
|
+
|
44
|
+
@buf[0,lookahead].join
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def peek1
|
27
49
|
if @buf.size == 0 && @io.eof?
|
28
50
|
## puts "peek - hitting eof!!!"
|
29
51
|
return "\0" ## return NUL char (0) for now
|
30
52
|
end
|
31
53
|
|
32
54
|
if @buf.size == 0
|
33
|
-
|
34
|
-
|
35
|
-
|
55
|
+
c = @io.getc
|
56
|
+
@buf.push( c )
|
57
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
36
58
|
end
|
37
59
|
|
38
|
-
@buf.first
|
39
|
-
end # method
|
60
|
+
@buf[0] ## @buf.first
|
61
|
+
end # method peek1
|
62
|
+
alias :peek :peek1 ## for now alias for peek1
|
63
|
+
|
64
|
+
|
40
65
|
|
41
66
|
end # class Buffer
|
42
67
|
end # class CsvReader
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -37,7 +37,8 @@ def logger() self.class.logger; end
|
|
37
37
|
|
38
38
|
|
39
39
|
|
40
|
-
attr_reader
|
40
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
41
|
+
attr_reader :meta
|
41
42
|
|
42
43
|
##
|
43
44
|
## todo/check:
|
@@ -56,6 +57,8 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
|
|
56
57
|
@config[:null] = null ## null values
|
57
58
|
@config[:numeric] = numeric
|
58
59
|
@config[:nan] = nan # not a number (NaN) e.g. Float::NAN
|
60
|
+
|
61
|
+
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
59
62
|
end
|
60
63
|
|
61
64
|
|
@@ -244,6 +247,58 @@ end
|
|
244
247
|
|
245
248
|
|
246
249
|
|
250
|
+
def parse_meta( input )
|
251
|
+
## todo/check:
|
252
|
+
## check again for input.peekn(4) =~ /^---[\n\r \t]$/ - why? why not?
|
253
|
+
|
254
|
+
input.getc ## eat-up (add document header ---) - skip "---"
|
255
|
+
input.getc
|
256
|
+
input.getc
|
257
|
+
|
258
|
+
## todo/fix: make peekn(4)=~/^---[\n\r \t]$/ "more strict"
|
259
|
+
## use match() or something to always match regexp
|
260
|
+
skip_spaces( input ) # eat-up optional whitespaces in header line
|
261
|
+
skip_newline( input )
|
262
|
+
|
263
|
+
buf = "---\n" ## note: start buffer with yaml header line - why?
|
264
|
+
## YAML.load("") return false !!!
|
265
|
+
## YAML.load("---\n") returns nil -- yes!! if we get nil return empty hash {}
|
266
|
+
|
267
|
+
newline = true
|
268
|
+
|
269
|
+
## eat-up until we hit "---" again
|
270
|
+
loop do
|
271
|
+
if input.eof?
|
272
|
+
raise ParseError.new( "end of input/stream - meta block footer >---< expected!!!!" )
|
273
|
+
elsif (c=input.peek; c==LF || c==CR)
|
274
|
+
while (c=input.peek; c==LF || c==CR ) ## add newlines
|
275
|
+
buf << input.getc ## eat-up all until end of line
|
276
|
+
end
|
277
|
+
newline = true
|
278
|
+
elsif newline && input.peekn(4) =~ /^---[\n\r \t]?$/ ## check if meta block end marker?
|
279
|
+
## todo/fix/check: allow (ignore) spaces after --- why? why not?
|
280
|
+
input.getc ## eat-up (add document header ---) - skip "---"
|
281
|
+
input.getc
|
282
|
+
input.getc
|
283
|
+
skip_spaces( input ) # eat-up optional whitespaces in header line
|
284
|
+
skip_newline( input )
|
285
|
+
break
|
286
|
+
else
|
287
|
+
buf << input.getc
|
288
|
+
newline = false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
data = YAML.load( buf )
|
293
|
+
## todo: check edge cases - always should return a hash or nil
|
294
|
+
## what to do with just integer, string or array etc. ???
|
295
|
+
|
296
|
+
data = {} if data.nil? ## note: if nil return empty hash e.g. {}
|
297
|
+
data
|
298
|
+
end ## parse_meta
|
299
|
+
|
300
|
+
|
301
|
+
|
247
302
|
def skip_newline( input ) ## note: singular (strict) version
|
248
303
|
return if input.eof?
|
249
304
|
|
@@ -268,12 +323,17 @@ def skip_until_eol( input )
|
|
268
323
|
end
|
269
324
|
end
|
270
325
|
|
326
|
+
|
271
327
|
def skip_spaces( input )
|
272
|
-
return if input.eof?
|
328
|
+
return 0 if input.eof?
|
273
329
|
|
330
|
+
## note: return number of spaces skipped (e.g. 0,1,2,etc.)
|
331
|
+
spaces_count = 0
|
274
332
|
while (c=input.peek; c==SPACE || c==TAB)
|
275
333
|
input.getc ## note: always eat-up all spaces (" ") and tabs (\t)
|
334
|
+
spaces_count += 1
|
276
335
|
end
|
336
|
+
spaces_count
|
277
337
|
end
|
278
338
|
|
279
339
|
|
@@ -282,11 +342,17 @@ end
|
|
282
342
|
|
283
343
|
|
284
344
|
def parse_lines( input, &block )
|
345
|
+
## note: reset (optional) meta data block
|
346
|
+
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
347
|
+
|
348
|
+
## note: track number of records
|
349
|
+
## used for meta block (can only start before any records e.g. if record_num == 0)
|
350
|
+
record_num = 0
|
285
351
|
|
286
352
|
loop do
|
287
353
|
break if input.eof?
|
288
354
|
|
289
|
-
skip_spaces( input )
|
355
|
+
skipped_spaces = skip_spaces( input )
|
290
356
|
|
291
357
|
if input.peek == COMMENT ## comment line
|
292
358
|
logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -295,10 +361,19 @@ def parse_lines( input, &block )
|
|
295
361
|
elsif (c=input.peek; c==LF || c==CR || input.eof?)
|
296
362
|
logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
297
363
|
skip_newline( input )
|
364
|
+
elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
|
365
|
+
## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
|
366
|
+
logger.debug "start meta block" if logger.debug?
|
367
|
+
## note: meta gets stored as object attribute (state/state/state!!)
|
368
|
+
## use meta attribute to get meta data after reading first record
|
369
|
+
@meta = parse_meta( input ) ## note: assumes a hash gets returned
|
370
|
+
logger.debug " meta: >#{meta.inspect}<" if logger.debug?
|
298
371
|
else
|
299
372
|
logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
300
373
|
|
301
374
|
record = parse_record( input )
|
375
|
+
record_num +=1
|
376
|
+
|
302
377
|
## note: requires block - enforce? how? why? why not?
|
303
378
|
block.call( record ) ## yield( record )
|
304
379
|
end
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_buffer.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_buffer.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestBuffer < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def test_peek
|
15
|
+
|
16
|
+
buf = CsvReader::Buffer.new( <<TXT )
|
17
|
+
# hello
|
18
|
+
1,2,3
|
19
|
+
TXT
|
20
|
+
|
21
|
+
assert_equal '#', buf.peek
|
22
|
+
assert_equal '#', buf.peek1
|
23
|
+
assert_equal '#', buf.peekn(1)
|
24
|
+
assert_equal '# ', buf.peekn(2)
|
25
|
+
assert_equal '# h', buf.peekn(3)
|
26
|
+
assert_equal '# he', buf.peekn(4)
|
27
|
+
|
28
|
+
buf.getc ## eat first char
|
29
|
+
|
30
|
+
assert_equal ' ', buf.peek
|
31
|
+
assert_equal ' ', buf.peek1
|
32
|
+
assert_equal ' ', buf.peekn(1)
|
33
|
+
assert_equal ' h', buf.peekn(2)
|
34
|
+
assert_equal ' he', buf.peekn(3)
|
35
|
+
assert_equal ' hel', buf.peekn(4)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
end # class TestBuffer
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_meta.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserMeta < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def parser
|
14
|
+
parser = CsvReader::Parser::DEFAULT
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def test_parse
|
19
|
+
records = [["a", "b", "c"],
|
20
|
+
["1", "2", "3"]]
|
21
|
+
|
22
|
+
assert_equal records, parser.parse( <<TXT )
|
23
|
+
# with meta data
|
24
|
+
## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
|
25
|
+
---
|
26
|
+
columns:
|
27
|
+
- title: Purchase Date
|
28
|
+
type: date
|
29
|
+
- title: Item
|
30
|
+
type: string
|
31
|
+
- title: Amount (€)
|
32
|
+
type: float
|
33
|
+
---
|
34
|
+
a,b,c
|
35
|
+
1,2,3
|
36
|
+
TXT
|
37
|
+
|
38
|
+
pp parser.meta
|
39
|
+
meta = { "columns"=>
|
40
|
+
[{"title"=>"Purchase Date", "type"=>"date"},
|
41
|
+
{"title"=>"Item", "type"=>"string"},
|
42
|
+
{"title"=>"Amount (€)", "type"=>"float"}]
|
43
|
+
}
|
44
|
+
assert_equal meta, parser.meta
|
45
|
+
|
46
|
+
|
47
|
+
assert_equal records, parser.parse( <<TXT )
|
48
|
+
# with (empty) meta data
|
49
|
+
---
|
50
|
+
---
|
51
|
+
a,b,c
|
52
|
+
1,2,3
|
53
|
+
TXT
|
54
|
+
|
55
|
+
pp parser.meta
|
56
|
+
meta = {}
|
57
|
+
assert_equal meta, parser.meta
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
assert_equal records, parser.parse( <<TXT )
|
62
|
+
# without meta data
|
63
|
+
a,b,c
|
64
|
+
1,2,3
|
65
|
+
TXT
|
66
|
+
|
67
|
+
assert_nil parser.meta
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
end # class TestParserMeta
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -74,10 +74,12 @@ files:
|
|
74
74
|
- test/data/customers11.csv
|
75
75
|
- test/data/shakespeare.csv
|
76
76
|
- test/helper.rb
|
77
|
+
- test/test_buffer.rb
|
77
78
|
- test/test_converter.rb
|
78
79
|
- test/test_parser.rb
|
79
80
|
- test/test_parser_formats.rb
|
80
81
|
- test/test_parser_java.rb
|
82
|
+
- test/test_parser_meta.rb
|
81
83
|
- test/test_parser_null.rb
|
82
84
|
- test/test_parser_numeric.rb
|
83
85
|
- test/test_parser_strict.rb
|