csvreader 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/lib/csvreader/base.rb +2 -0
- data/lib/csvreader/buffer.rb +31 -6
- data/lib/csvreader/parser_std.rb +78 -3
- data/lib/csvreader/version.rb +1 -1
- data/test/test_buffer.rb +39 -0
- data/test/test_parser_meta.rb +71 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 46909e44ebe97a9bbc19c95f1979821d001aac93
|
4
|
+
data.tar.gz: 4820888344741534cfc391d1b610a71a4c73f922
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5cb35f119810de48868c758bd60d53e621d093e7dbc0c7b1459818f187b4ce5785e04d232432f32e54bb683c0585c97df56eb0a3352ae07aa8bfe990bdf2f6f8
|
7
|
+
data.tar.gz: 507245efca585926a7e21675ebf0fa997c5be792cb00d47647ee082a68482b1d96d3b9aec6b8ddf4b1278145604931b399a0dc82b956e939c52911680c9ac10c
|
data/Manifest.txt
CHANGED
@@ -23,10 +23,12 @@ test/data/cities11.csv
|
|
23
23
|
test/data/customers11.csv
|
24
24
|
test/data/shakespeare.csv
|
25
25
|
test/helper.rb
|
26
|
+
test/test_buffer.rb
|
26
27
|
test/test_converter.rb
|
27
28
|
test/test_parser.rb
|
28
29
|
test/test_parser_formats.rb
|
29
30
|
test/test_parser_java.rb
|
31
|
+
test/test_parser_meta.rb
|
30
32
|
test/test_parser_null.rb
|
31
33
|
test/test_parser_numeric.rb
|
32
34
|
test/test_parser_strict.rb
|
data/lib/csvreader/base.rb
CHANGED
data/lib/csvreader/buffer.rb
CHANGED
@@ -23,20 +23,45 @@ class Buffer ## todo: find a better name:
|
|
23
23
|
end
|
24
24
|
end # method getc
|
25
25
|
|
26
|
-
|
26
|
+
|
27
|
+
def peekn( lookahead )
|
28
|
+
## todo/check: use a new method peekstr or match or something
|
29
|
+
## for more than
|
30
|
+
if @buf.size == 0 && @io.eof?
|
31
|
+
## puts "peek - hitting eof!!!"
|
32
|
+
return "\0" ## return NUL char (0) for now
|
33
|
+
end
|
34
|
+
|
35
|
+
while @buf.size < lookahead do
|
36
|
+
## todo/check: add/append NUL char (0) - why? why not?
|
37
|
+
break if @io.eof? ## nothing more to read; break out of filling up buffer
|
38
|
+
|
39
|
+
c = @io.getc
|
40
|
+
@buf.push( c )
|
41
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
42
|
+
end
|
43
|
+
|
44
|
+
@buf[0,lookahead].join
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def peek1
|
27
49
|
if @buf.size == 0 && @io.eof?
|
28
50
|
## puts "peek - hitting eof!!!"
|
29
51
|
return "\0" ## return NUL char (0) for now
|
30
52
|
end
|
31
53
|
|
32
54
|
if @buf.size == 0
|
33
|
-
|
34
|
-
|
35
|
-
|
55
|
+
c = @io.getc
|
56
|
+
@buf.push( c )
|
57
|
+
## puts "peek - fill buffer >#{c}< (#{c.ord})"
|
36
58
|
end
|
37
59
|
|
38
|
-
@buf.first
|
39
|
-
end # method
|
60
|
+
@buf[0] ## @buf.first
|
61
|
+
end # method peek1
|
62
|
+
alias :peek :peek1 ## for now alias for peek1
|
63
|
+
|
64
|
+
|
40
65
|
|
41
66
|
end # class Buffer
|
42
67
|
end # class CsvReader
|
data/lib/csvreader/parser_std.rb
CHANGED
@@ -37,7 +37,8 @@ def logger() self.class.logger; end
|
|
37
37
|
|
38
38
|
|
39
39
|
|
40
|
-
attr_reader
|
40
|
+
attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
|
41
|
+
attr_reader :meta
|
41
42
|
|
42
43
|
##
|
43
44
|
## todo/check:
|
@@ -56,6 +57,8 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
|
|
56
57
|
@config[:null] = null ## null values
|
57
58
|
@config[:numeric] = numeric
|
58
59
|
@config[:nan] = nan # not a number (NaN) e.g. Float::NAN
|
60
|
+
|
61
|
+
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
59
62
|
end
|
60
63
|
|
61
64
|
|
@@ -244,6 +247,58 @@ end
|
|
244
247
|
|
245
248
|
|
246
249
|
|
250
|
+
def parse_meta( input )
|
251
|
+
## todo/check:
|
252
|
+
## check again for input.peekn(4) =~ /^---[\n\r \t]$/ - why? why not?
|
253
|
+
|
254
|
+
input.getc ## eat-up (add document header ---) - skip "---"
|
255
|
+
input.getc
|
256
|
+
input.getc
|
257
|
+
|
258
|
+
## todo/fix: make peekn(4)=~/^---[\n\r \t]$/ "more strict"
|
259
|
+
## use match() or something to always match regexp
|
260
|
+
skip_spaces( input ) # eat-up optional whitespaces in header line
|
261
|
+
skip_newline( input )
|
262
|
+
|
263
|
+
buf = "---\n" ## note: start buffer with yaml header line - why?
|
264
|
+
## YAML.load("") return false !!!
|
265
|
+
## YAML.load("---\n") returns nil -- yes!! if we get nil return empty hash {}
|
266
|
+
|
267
|
+
newline = true
|
268
|
+
|
269
|
+
## eat-up until we hit "---" again
|
270
|
+
loop do
|
271
|
+
if input.eof?
|
272
|
+
raise ParseError.new( "end of input/stream - meta block footer >---< expected!!!!" )
|
273
|
+
elsif (c=input.peek; c==LF || c==CR)
|
274
|
+
while (c=input.peek; c==LF || c==CR ) ## add newlines
|
275
|
+
buf << input.getc ## eat-up all until end of line
|
276
|
+
end
|
277
|
+
newline = true
|
278
|
+
elsif newline && input.peekn(4) =~ /^---[\n\r \t]?$/ ## check if meta block end marker?
|
279
|
+
## todo/fix/check: allow (ignore) spaces after --- why? why not?
|
280
|
+
input.getc ## eat-up (add document header ---) - skip "---"
|
281
|
+
input.getc
|
282
|
+
input.getc
|
283
|
+
skip_spaces( input ) # eat-up optional whitespaces in header line
|
284
|
+
skip_newline( input )
|
285
|
+
break
|
286
|
+
else
|
287
|
+
buf << input.getc
|
288
|
+
newline = false
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
data = YAML.load( buf )
|
293
|
+
## todo: check edge cases - always should return a hash or nil
|
294
|
+
## what to do with just integer, string or array etc. ???
|
295
|
+
|
296
|
+
data = {} if data.nil? ## note: if nil return empty hash e.g. {}
|
297
|
+
data
|
298
|
+
end ## parse_meta
|
299
|
+
|
300
|
+
|
301
|
+
|
247
302
|
def skip_newline( input ) ## note: singular (strict) version
|
248
303
|
return if input.eof?
|
249
304
|
|
@@ -268,12 +323,17 @@ def skip_until_eol( input )
|
|
268
323
|
end
|
269
324
|
end
|
270
325
|
|
326
|
+
|
271
327
|
def skip_spaces( input )
|
272
|
-
return if input.eof?
|
328
|
+
return 0 if input.eof?
|
273
329
|
|
330
|
+
## note: return number of spaces skipped (e.g. 0,1,2,etc.)
|
331
|
+
spaces_count = 0
|
274
332
|
while (c=input.peek; c==SPACE || c==TAB)
|
275
333
|
input.getc ## note: always eat-up all spaces (" ") and tabs (\t)
|
334
|
+
spaces_count += 1
|
276
335
|
end
|
336
|
+
spaces_count
|
277
337
|
end
|
278
338
|
|
279
339
|
|
@@ -282,11 +342,17 @@ end
|
|
282
342
|
|
283
343
|
|
284
344
|
def parse_lines( input, &block )
|
345
|
+
## note: reset (optional) meta data block
|
346
|
+
@meta = nil ## no meta data block (use empty hash {} - why? why not?)
|
347
|
+
|
348
|
+
## note: track number of records
|
349
|
+
## used for meta block (can only start before any records e.g. if record_num == 0)
|
350
|
+
record_num = 0
|
285
351
|
|
286
352
|
loop do
|
287
353
|
break if input.eof?
|
288
354
|
|
289
|
-
skip_spaces( input )
|
355
|
+
skipped_spaces = skip_spaces( input )
|
290
356
|
|
291
357
|
if input.peek == COMMENT ## comment line
|
292
358
|
logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
@@ -295,10 +361,19 @@ def parse_lines( input, &block )
|
|
295
361
|
elsif (c=input.peek; c==LF || c==CR || input.eof?)
|
296
362
|
logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
297
363
|
skip_newline( input )
|
364
|
+
elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
|
365
|
+
## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
|
366
|
+
logger.debug "start meta block" if logger.debug?
|
367
|
+
## note: meta gets stored as object attribute (state/state/state!!)
|
368
|
+
## use meta attribute to get meta data after reading first record
|
369
|
+
@meta = parse_meta( input ) ## note: assumes a hash gets returned
|
370
|
+
logger.debug " meta: >#{meta.inspect}<" if logger.debug?
|
298
371
|
else
|
299
372
|
logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
|
300
373
|
|
301
374
|
record = parse_record( input )
|
375
|
+
record_num +=1
|
376
|
+
|
302
377
|
## note: requires block - enforce? how? why? why not?
|
303
378
|
block.call( record ) ## yield( record )
|
304
379
|
end
|
data/lib/csvreader/version.rb
CHANGED
data/test/test_buffer.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_buffer.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
|
11
|
+
class TestBuffer < MiniTest::Test
|
12
|
+
|
13
|
+
|
14
|
+
def test_peek
|
15
|
+
|
16
|
+
buf = CsvReader::Buffer.new( <<TXT )
|
17
|
+
# hello
|
18
|
+
1,2,3
|
19
|
+
TXT
|
20
|
+
|
21
|
+
assert_equal '#', buf.peek
|
22
|
+
assert_equal '#', buf.peek1
|
23
|
+
assert_equal '#', buf.peekn(1)
|
24
|
+
assert_equal '# ', buf.peekn(2)
|
25
|
+
assert_equal '# h', buf.peekn(3)
|
26
|
+
assert_equal '# he', buf.peekn(4)
|
27
|
+
|
28
|
+
buf.getc ## eat first char
|
29
|
+
|
30
|
+
assert_equal ' ', buf.peek
|
31
|
+
assert_equal ' ', buf.peek1
|
32
|
+
assert_equal ' ', buf.peekn(1)
|
33
|
+
assert_equal ' h', buf.peekn(2)
|
34
|
+
assert_equal ' he', buf.peekn(3)
|
35
|
+
assert_equal ' hel', buf.peekn(4)
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
end # class TestBuffer
|
@@ -0,0 +1,71 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_parser_meta.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestParserMeta < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def parser
|
14
|
+
parser = CsvReader::Parser::DEFAULT
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def test_parse
|
19
|
+
records = [["a", "b", "c"],
|
20
|
+
["1", "2", "3"]]
|
21
|
+
|
22
|
+
assert_equal records, parser.parse( <<TXT )
|
23
|
+
# with meta data
|
24
|
+
## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
|
25
|
+
---
|
26
|
+
columns:
|
27
|
+
- title: Purchase Date
|
28
|
+
type: date
|
29
|
+
- title: Item
|
30
|
+
type: string
|
31
|
+
- title: Amount (€)
|
32
|
+
type: float
|
33
|
+
---
|
34
|
+
a,b,c
|
35
|
+
1,2,3
|
36
|
+
TXT
|
37
|
+
|
38
|
+
pp parser.meta
|
39
|
+
meta = { "columns"=>
|
40
|
+
[{"title"=>"Purchase Date", "type"=>"date"},
|
41
|
+
{"title"=>"Item", "type"=>"string"},
|
42
|
+
{"title"=>"Amount (€)", "type"=>"float"}]
|
43
|
+
}
|
44
|
+
assert_equal meta, parser.meta
|
45
|
+
|
46
|
+
|
47
|
+
assert_equal records, parser.parse( <<TXT )
|
48
|
+
# with (empty) meta data
|
49
|
+
---
|
50
|
+
---
|
51
|
+
a,b,c
|
52
|
+
1,2,3
|
53
|
+
TXT
|
54
|
+
|
55
|
+
pp parser.meta
|
56
|
+
meta = {}
|
57
|
+
assert_equal meta, parser.meta
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
assert_equal records, parser.parse( <<TXT )
|
62
|
+
# without meta data
|
63
|
+
a,b,c
|
64
|
+
1,2,3
|
65
|
+
TXT
|
66
|
+
|
67
|
+
assert_nil parser.meta
|
68
|
+
end
|
69
|
+
|
70
|
+
|
71
|
+
end # class TestParserMeta
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-10-
|
11
|
+
date: 2018-10-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -74,10 +74,12 @@ files:
|
|
74
74
|
- test/data/customers11.csv
|
75
75
|
- test/data/shakespeare.csv
|
76
76
|
- test/helper.rb
|
77
|
+
- test/test_buffer.rb
|
77
78
|
- test/test_converter.rb
|
78
79
|
- test/test_parser.rb
|
79
80
|
- test/test_parser_formats.rb
|
80
81
|
- test/test_parser_java.rb
|
82
|
+
- test/test_parser_meta.rb
|
81
83
|
- test/test_parser_null.rb
|
82
84
|
- test/test_parser_numeric.rb
|
83
85
|
- test/test_parser_strict.rb
|