csvreader 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d41f765a3d0091d6f4fe392f014e386ef35a166
4
- data.tar.gz: 268048267f364829801e12f7c41331153ce8c138
3
+ metadata.gz: 46909e44ebe97a9bbc19c95f1979821d001aac93
4
+ data.tar.gz: 4820888344741534cfc391d1b610a71a4c73f922
5
5
  SHA512:
6
- metadata.gz: d891d31e0447639dfc1d70895c81eba369028b0a32c0daf7aba605d6a91f8e1fe1304eda3d2a2efcff018a1ceb3e8f9f964301269b150771550e5e372cb45c48
7
- data.tar.gz: b8d51ae12ce5a8dbba0772ee6222338c9e6a7091585b2bf6480c88fd27f4738c5e4472f329568c92ff6c31397a8c297b157fc0b1c1e916e9497213efb2ef7245
6
+ metadata.gz: 5cb35f119810de48868c758bd60d53e621d093e7dbc0c7b1459818f187b4ce5785e04d232432f32e54bb683c0585c97df56eb0a3352ae07aa8bfe990bdf2f6f8
7
+ data.tar.gz: 507245efca585926a7e21675ebf0fa997c5be792cb00d47647ee082a68482b1d96d3b9aec6b8ddf4b1278145604931b399a0dc82b956e939c52911680c9ac10c
data/Manifest.txt CHANGED
@@ -23,10 +23,12 @@ test/data/cities11.csv
23
23
  test/data/customers11.csv
24
24
  test/data/shakespeare.csv
25
25
  test/helper.rb
26
+ test/test_buffer.rb
26
27
  test/test_converter.rb
27
28
  test/test_parser.rb
28
29
  test/test_parser_formats.rb
29
30
  test/test_parser_java.rb
31
+ test/test_parser_meta.rb
30
32
  test/test_parser_null.rb
31
33
  test/test_parser_numeric.rb
32
34
  test/test_parser_strict.rb
@@ -6,6 +6,8 @@ require 'logger'
6
6
  require 'forwardable'
7
7
  require 'stringio'
8
8
  require 'date' ## use for Date.parse and DateTime.parse
9
+ require 'yaml' ## used for (optional) meta data blocks
10
+
9
11
 
10
12
 
11
13
  ###
@@ -23,20 +23,45 @@ class Buffer ## todo: find a better name:
23
23
  end
24
24
  end # method getc
25
25
 
26
- def peek
26
+
27
+ def peekn( lookahead )
28
+ ## todo/check: use a new method peekstr or match or something
29
+ ## for more than
30
+ if @buf.size == 0 && @io.eof?
31
+ ## puts "peek - hitting eof!!!"
32
+ return "\0" ## return NUL char (0) for now
33
+ end
34
+
35
+ while @buf.size < lookahead do
36
+ ## todo/check: add/append NUL char (0) - why? why not?
37
+ break if @io.eof? ## nothing more to read; break out of filling up buffer
38
+
39
+ c = @io.getc
40
+ @buf.push( c )
41
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
42
+ end
43
+
44
+ @buf[0,lookahead].join
45
+ end
46
+
47
+
48
+ def peek1
27
49
  if @buf.size == 0 && @io.eof?
28
50
  ## puts "peek - hitting eof!!!"
29
51
  return "\0" ## return NUL char (0) for now
30
52
  end
31
53
 
32
54
  if @buf.size == 0
33
- c = @io.getc
34
- @buf.push( c )
35
- ## puts "peek - fill buffer >#{c}< (#{c.ord})"
55
+ c = @io.getc
56
+ @buf.push( c )
57
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
36
58
  end
37
59
 
38
- @buf.first
39
- end # method peek
60
+ @buf[0] ## @buf.first
61
+ end # method peek1
62
+ alias :peek :peek1 ## for now alias for peek1
63
+
64
+
40
65
 
41
66
  end # class Buffer
42
67
  end # class CsvReader
@@ -37,7 +37,8 @@ def logger() self.class.logger; end
37
37
 
38
38
 
39
39
 
40
- attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
40
+ attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
41
+ attr_reader :meta
41
42
 
42
43
  ##
43
44
  ## todo/check:
@@ -56,6 +57,8 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
56
57
  @config[:null] = null ## null values
57
58
  @config[:numeric] = numeric
58
59
  @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
60
+
61
+ @meta = nil ## no meta data block (use empty hash {} - why? why not?)
59
62
  end
60
63
 
61
64
 
@@ -244,6 +247,58 @@ end
244
247
 
245
248
 
246
249
 
250
+ def parse_meta( input )
251
+ ## todo/check:
252
+ ## check again for input.peekn(4) =~ /^---[\n\r \t]$/ - why? why not?
253
+
254
+ input.getc ## eat-up (add document header ---) - skip "---"
255
+ input.getc
256
+ input.getc
257
+
258
+ ## todo/fix: make peekn(4)=~/^---[\n\r \t]$/ "more strict"
259
+ ## use match() or something to always match regexp
260
+ skip_spaces( input ) # eat-up optional whitespaces in header line
261
+ skip_newline( input )
262
+
263
+ buf = "---\n" ## note: start buffer with yaml header line - why?
264
+ ## YAML.load("") return false !!!
265
+ ## YAML.load("---\n") returns nil -- yes!! if we get nil return empty hash {}
266
+
267
+ newline = true
268
+
269
+ ## eat-up until we hit "---" again
270
+ loop do
271
+ if input.eof?
272
+ raise ParseError.new( "end of input/stream - meta block footer >---< expected!!!!" )
273
+ elsif (c=input.peek; c==LF || c==CR)
274
+ while (c=input.peek; c==LF || c==CR ) ## add newlines
275
+ buf << input.getc ## eat-up all until end of line
276
+ end
277
+ newline = true
278
+ elsif newline && input.peekn(4) =~ /^---[\n\r \t]?$/ ## check if meta block end marker?
279
+ ## todo/fix/check: allow (ignore) spaces after --- why? why not?
280
+ input.getc ## eat-up (add document header ---) - skip "---"
281
+ input.getc
282
+ input.getc
283
+ skip_spaces( input ) # eat-up optional whitespaces in header line
284
+ skip_newline( input )
285
+ break
286
+ else
287
+ buf << input.getc
288
+ newline = false
289
+ end
290
+ end
291
+
292
+ data = YAML.load( buf )
293
+ ## todo: check edge cases - always should return a hash or nil
294
+ ## what to do with just integer, string or array etc. ???
295
+
296
+ data = {} if data.nil? ## note: if nil return empty hash e.g. {}
297
+ data
298
+ end ## parse_meta
299
+
300
+
301
+
247
302
  def skip_newline( input ) ## note: singular (strict) version
248
303
  return if input.eof?
249
304
 
@@ -268,12 +323,17 @@ def skip_until_eol( input )
268
323
  end
269
324
  end
270
325
 
326
+
271
327
  def skip_spaces( input )
272
- return if input.eof?
328
+ return 0 if input.eof?
273
329
 
330
+ ## note: return number of spaces skipped (e.g. 0,1,2,etc.)
331
+ spaces_count = 0
274
332
  while (c=input.peek; c==SPACE || c==TAB)
275
333
  input.getc ## note: always eat-up all spaces (" ") and tabs (\t)
334
+ spaces_count += 1
276
335
  end
336
+ spaces_count
277
337
  end
278
338
 
279
339
 
@@ -282,11 +342,17 @@ end
282
342
 
283
343
 
284
344
  def parse_lines( input, &block )
345
+ ## note: reset (optional) meta data block
346
+ @meta = nil ## no meta data block (use empty hash {} - why? why not?)
347
+
348
+ ## note: track number of records
349
+ ## used for meta block (can only start before any records e.g. if record_num == 0)
350
+ record_num = 0
285
351
 
286
352
  loop do
287
353
  break if input.eof?
288
354
 
289
- skip_spaces( input )
355
+ skipped_spaces = skip_spaces( input )
290
356
 
291
357
  if input.peek == COMMENT ## comment line
292
358
  logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -295,10 +361,19 @@ def parse_lines( input, &block )
295
361
  elsif (c=input.peek; c==LF || c==CR || input.eof?)
296
362
  logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
297
363
  skip_newline( input )
364
+ elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
365
+ ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
366
+ logger.debug "start meta block" if logger.debug?
367
+ ## note: meta gets stored as object attribute (state/state/state!!)
368
+ ## use meta attribute to get meta data after reading first record
369
+ @meta = parse_meta( input ) ## note: assumes a hash gets returned
370
+ logger.debug " meta: >#{meta.inspect}<" if logger.debug?
298
371
  else
299
372
  logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
300
373
 
301
374
  record = parse_record( input )
375
+ record_num +=1
376
+
302
377
  ## note: requires block - enforce? how? why? why not?
303
378
  block.call( record ) ## yield( record )
304
379
  end
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 2
8
+ PATCH = 3
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_buffer.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestBuffer < MiniTest::Test
12
+
13
+
14
+ def test_peek
15
+
16
+ buf = CsvReader::Buffer.new( <<TXT )
17
+ # hello
18
+ 1,2,3
19
+ TXT
20
+
21
+ assert_equal '#', buf.peek
22
+ assert_equal '#', buf.peek1
23
+ assert_equal '#', buf.peekn(1)
24
+ assert_equal '# ', buf.peekn(2)
25
+ assert_equal '# h', buf.peekn(3)
26
+ assert_equal '# he', buf.peekn(4)
27
+
28
+ buf.getc ## eat first char
29
+
30
+ assert_equal ' ', buf.peek
31
+ assert_equal ' ', buf.peek1
32
+ assert_equal ' ', buf.peekn(1)
33
+ assert_equal ' h', buf.peekn(2)
34
+ assert_equal ' he', buf.peekn(3)
35
+ assert_equal ' hel', buf.peekn(4)
36
+ end
37
+
38
+
39
+ end # class TestBuffer
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_meta.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParserMeta < MiniTest::Test
11
+
12
+
13
+ def parser
14
+ parser = CsvReader::Parser::DEFAULT
15
+ end
16
+
17
+
18
+ def test_parse
19
+ records = [["a", "b", "c"],
20
+ ["1", "2", "3"]]
21
+
22
+ assert_equal records, parser.parse( <<TXT )
23
+ # with meta data
24
+ ## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
25
+ ---
26
+ columns:
27
+ - title: Purchase Date
28
+ type: date
29
+ - title: Item
30
+ type: string
31
+ - title: Amount (€)
32
+ type: float
33
+ ---
34
+ a,b,c
35
+ 1,2,3
36
+ TXT
37
+
38
+ pp parser.meta
39
+ meta = { "columns"=>
40
+ [{"title"=>"Purchase Date", "type"=>"date"},
41
+ {"title"=>"Item", "type"=>"string"},
42
+ {"title"=>"Amount (€)", "type"=>"float"}]
43
+ }
44
+ assert_equal meta, parser.meta
45
+
46
+
47
+ assert_equal records, parser.parse( <<TXT )
48
+ # with (empty) meta data
49
+ ---
50
+ ---
51
+ a,b,c
52
+ 1,2,3
53
+ TXT
54
+
55
+ pp parser.meta
56
+ meta = {}
57
+ assert_equal meta, parser.meta
58
+
59
+
60
+
61
+ assert_equal records, parser.parse( <<TXT )
62
+ # without meta data
63
+ a,b,c
64
+ 1,2,3
65
+ TXT
66
+
67
+ assert_nil parser.meta
68
+ end
69
+
70
+
71
+ end # class TestParserMeta
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-21 00:00:00.000000000 Z
11
+ date: 2018-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -74,10 +74,12 @@ files:
74
74
  - test/data/customers11.csv
75
75
  - test/data/shakespeare.csv
76
76
  - test/helper.rb
77
+ - test/test_buffer.rb
77
78
  - test/test_converter.rb
78
79
  - test/test_parser.rb
79
80
  - test/test_parser_formats.rb
80
81
  - test/test_parser_java.rb
82
+ - test/test_parser_meta.rb
81
83
  - test/test_parser_null.rb
82
84
  - test/test_parser_numeric.rb
83
85
  - test/test_parser_strict.rb