csvreader 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8d41f765a3d0091d6f4fe392f014e386ef35a166
4
- data.tar.gz: 268048267f364829801e12f7c41331153ce8c138
3
+ metadata.gz: 46909e44ebe97a9bbc19c95f1979821d001aac93
4
+ data.tar.gz: 4820888344741534cfc391d1b610a71a4c73f922
5
5
  SHA512:
6
- metadata.gz: d891d31e0447639dfc1d70895c81eba369028b0a32c0daf7aba605d6a91f8e1fe1304eda3d2a2efcff018a1ceb3e8f9f964301269b150771550e5e372cb45c48
7
- data.tar.gz: b8d51ae12ce5a8dbba0772ee6222338c9e6a7091585b2bf6480c88fd27f4738c5e4472f329568c92ff6c31397a8c297b157fc0b1c1e916e9497213efb2ef7245
6
+ metadata.gz: 5cb35f119810de48868c758bd60d53e621d093e7dbc0c7b1459818f187b4ce5785e04d232432f32e54bb683c0585c97df56eb0a3352ae07aa8bfe990bdf2f6f8
7
+ data.tar.gz: 507245efca585926a7e21675ebf0fa997c5be792cb00d47647ee082a68482b1d96d3b9aec6b8ddf4b1278145604931b399a0dc82b956e939c52911680c9ac10c
data/Manifest.txt CHANGED
@@ -23,10 +23,12 @@ test/data/cities11.csv
23
23
  test/data/customers11.csv
24
24
  test/data/shakespeare.csv
25
25
  test/helper.rb
26
+ test/test_buffer.rb
26
27
  test/test_converter.rb
27
28
  test/test_parser.rb
28
29
  test/test_parser_formats.rb
29
30
  test/test_parser_java.rb
31
+ test/test_parser_meta.rb
30
32
  test/test_parser_null.rb
31
33
  test/test_parser_numeric.rb
32
34
  test/test_parser_strict.rb
@@ -6,6 +6,8 @@ require 'logger'
6
6
  require 'forwardable'
7
7
  require 'stringio'
8
8
  require 'date' ## use for Date.parse and DateTime.parse
9
+ require 'yaml' ## used for (optional) meta data blocks
10
+
9
11
 
10
12
 
11
13
  ###
@@ -23,20 +23,45 @@ class Buffer ## todo: find a better name:
23
23
  end
24
24
  end # method getc
25
25
 
26
- def peek
26
+
27
+ def peekn( lookahead )
28
+ ## todo/check: use a new method peekstr or match or something
29
+ ## for more than
30
+ if @buf.size == 0 && @io.eof?
31
+ ## puts "peek - hitting eof!!!"
32
+ return "\0" ## return NUL char (0) for now
33
+ end
34
+
35
+ while @buf.size < lookahead do
36
+ ## todo/check: add/append NUL char (0) - why? why not?
37
+ break if @io.eof? ## nothing more to read; break out of filling up buffer
38
+
39
+ c = @io.getc
40
+ @buf.push( c )
41
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
42
+ end
43
+
44
+ @buf[0,lookahead].join
45
+ end
46
+
47
+
48
+ def peek1
27
49
  if @buf.size == 0 && @io.eof?
28
50
  ## puts "peek - hitting eof!!!"
29
51
  return "\0" ## return NUL char (0) for now
30
52
  end
31
53
 
32
54
  if @buf.size == 0
33
- c = @io.getc
34
- @buf.push( c )
35
- ## puts "peek - fill buffer >#{c}< (#{c.ord})"
55
+ c = @io.getc
56
+ @buf.push( c )
57
+ ## puts "peek - fill buffer >#{c}< (#{c.ord})"
36
58
  end
37
59
 
38
- @buf.first
39
- end # method peek
60
+ @buf[0] ## @buf.first
61
+ end # method peek1
62
+ alias :peek :peek1 ## for now alias for peek1
63
+
64
+
40
65
 
41
66
  end # class Buffer
42
67
  end # class CsvReader
@@ -37,7 +37,8 @@ def logger() self.class.logger; end
37
37
 
38
38
 
39
39
 
40
- attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
40
+ attr_reader :config ## todo/fix: change config to proper dialect class/struct - why? why not?
41
+ attr_reader :meta
41
42
 
42
43
  ##
43
44
  ## todo/check:
@@ -56,6 +57,8 @@ def initialize( null: ['\N', 'NA'], ## note: set to nil for no null vales /
56
57
  @config[:null] = null ## null values
57
58
  @config[:numeric] = numeric
58
59
  @config[:nan] = nan # not a number (NaN) e.g. Float::NAN
60
+
61
+ @meta = nil ## no meta data block (use empty hash {} - why? why not?)
59
62
  end
60
63
 
61
64
 
@@ -244,6 +247,58 @@ end
244
247
 
245
248
 
246
249
 
250
+ def parse_meta( input )
251
+ ## todo/check:
252
+ ## check again for input.peekn(4) =~ /^---[\n\r \t]$/ - why? why not?
253
+
254
+ input.getc ## eat-up (add document header ---) - skip "---"
255
+ input.getc
256
+ input.getc
257
+
258
+ ## todo/fix: make peekn(4)=~/^---[\n\r \t]$/ "more strict"
259
+ ## use match() or something to always match regexp
260
+ skip_spaces( input ) # eat-up optional whitespaces in header line
261
+ skip_newline( input )
262
+
263
+ buf = "---\n" ## note: start buffer with yaml header line - why?
264
+ ## YAML.load("") return false !!!
265
+ ## YAML.load("---\n") returns nil -- yes!! if we get nil return empty hash {}
266
+
267
+ newline = true
268
+
269
+ ## eat-up until we hit "---" again
270
+ loop do
271
+ if input.eof?
272
+ raise ParseError.new( "end of input/stream - meta block footer >---< expected!!!!" )
273
+ elsif (c=input.peek; c==LF || c==CR)
274
+ while (c=input.peek; c==LF || c==CR ) ## add newlines
275
+ buf << input.getc ## eat-up all until end of line
276
+ end
277
+ newline = true
278
+ elsif newline && input.peekn(4) =~ /^---[\n\r \t]?$/ ## check if meta block end marker?
279
+ ## todo/fix/check: allow (ignore) spaces after --- why? why not?
280
+ input.getc ## eat-up (add document header ---) - skip "---"
281
+ input.getc
282
+ input.getc
283
+ skip_spaces( input ) # eat-up optional whitespaces in header line
284
+ skip_newline( input )
285
+ break
286
+ else
287
+ buf << input.getc
288
+ newline = false
289
+ end
290
+ end
291
+
292
+ data = YAML.load( buf )
293
+ ## todo: check edge cases - always should return a hash or nil
294
+ ## what to do with just integer, string or array etc. ???
295
+
296
+ data = {} if data.nil? ## note: if nil return empty hash e.g. {}
297
+ data
298
+ end ## parse_meta
299
+
300
+
301
+
247
302
  def skip_newline( input ) ## note: singular (strict) version
248
303
  return if input.eof?
249
304
 
@@ -268,12 +323,17 @@ def skip_until_eol( input )
268
323
  end
269
324
  end
270
325
 
326
+
271
327
  def skip_spaces( input )
272
- return if input.eof?
328
+ return 0 if input.eof?
273
329
 
330
+ ## note: return number of spaces skipped (e.g. 0,1,2,etc.)
331
+ spaces_count = 0
274
332
  while (c=input.peek; c==SPACE || c==TAB)
275
333
  input.getc ## note: always eat-up all spaces (" ") and tabs (\t)
334
+ spaces_count += 1
276
335
  end
336
+ spaces_count
277
337
  end
278
338
 
279
339
 
@@ -282,11 +342,17 @@ end
282
342
 
283
343
 
284
344
  def parse_lines( input, &block )
345
+ ## note: reset (optional) meta data block
346
+ @meta = nil ## no meta data block (use empty hash {} - why? why not?)
347
+
348
+ ## note: track number of records
349
+ ## used for meta block (can only start before any records e.g. if record_num == 0)
350
+ record_num = 0
285
351
 
286
352
  loop do
287
353
  break if input.eof?
288
354
 
289
- skip_spaces( input )
355
+ skipped_spaces = skip_spaces( input )
290
356
 
291
357
  if input.peek == COMMENT ## comment line
292
358
  logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
@@ -295,10 +361,19 @@ def parse_lines( input, &block )
295
361
  elsif (c=input.peek; c==LF || c==CR || input.eof?)
296
362
  logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
297
363
  skip_newline( input )
364
+ elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
365
+ ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
366
+ logger.debug "start meta block" if logger.debug?
367
+ ## note: meta gets stored as object attribute (state/state/state!!)
368
+ ## use meta attribute to get meta data after reading first record
369
+ @meta = parse_meta( input ) ## note: assumes a hash gets returned
370
+ logger.debug " meta: >#{meta.inspect}<" if logger.debug?
298
371
  else
299
372
  logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})" if logger.debug?
300
373
 
301
374
  record = parse_record( input )
375
+ record_num +=1
376
+
302
377
  ## note: requires block - enforce? how? why? why not?
303
378
  block.call( record ) ## yield( record )
304
379
  end
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 0
8
- PATCH = 2
8
+ PATCH = 3
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -0,0 +1,39 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_buffer.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+
11
+ class TestBuffer < MiniTest::Test
12
+
13
+
14
+ def test_peek
15
+
16
+ buf = CsvReader::Buffer.new( <<TXT )
17
+ # hello
18
+ 1,2,3
19
+ TXT
20
+
21
+ assert_equal '#', buf.peek
22
+ assert_equal '#', buf.peek1
23
+ assert_equal '#', buf.peekn(1)
24
+ assert_equal '# ', buf.peekn(2)
25
+ assert_equal '# h', buf.peekn(3)
26
+ assert_equal '# he', buf.peekn(4)
27
+
28
+ buf.getc ## eat first char
29
+
30
+ assert_equal ' ', buf.peek
31
+ assert_equal ' ', buf.peek1
32
+ assert_equal ' ', buf.peekn(1)
33
+ assert_equal ' h', buf.peekn(2)
34
+ assert_equal ' he', buf.peekn(3)
35
+ assert_equal ' hel', buf.peekn(4)
36
+ end
37
+
38
+
39
+ end # class TestBuffer
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_parser_meta.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestParserMeta < MiniTest::Test
11
+
12
+
13
+ def parser
14
+ parser = CsvReader::Parser::DEFAULT
15
+ end
16
+
17
+
18
+ def test_parse
19
+ records = [["a", "b", "c"],
20
+ ["1", "2", "3"]]
21
+
22
+ assert_equal records, parser.parse( <<TXT )
23
+ # with meta data
24
+ ## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
25
+ ---
26
+ columns:
27
+ - title: Purchase Date
28
+ type: date
29
+ - title: Item
30
+ type: string
31
+ - title: Amount (€)
32
+ type: float
33
+ ---
34
+ a,b,c
35
+ 1,2,3
36
+ TXT
37
+
38
+ pp parser.meta
39
+ meta = { "columns"=>
40
+ [{"title"=>"Purchase Date", "type"=>"date"},
41
+ {"title"=>"Item", "type"=>"string"},
42
+ {"title"=>"Amount (€)", "type"=>"float"}]
43
+ }
44
+ assert_equal meta, parser.meta
45
+
46
+
47
+ assert_equal records, parser.parse( <<TXT )
48
+ # with (empty) meta data
49
+ ---
50
+ ---
51
+ a,b,c
52
+ 1,2,3
53
+ TXT
54
+
55
+ pp parser.meta
56
+ meta = {}
57
+ assert_equal meta, parser.meta
58
+
59
+
60
+
61
+ assert_equal records, parser.parse( <<TXT )
62
+ # without meta data
63
+ a,b,c
64
+ 1,2,3
65
+ TXT
66
+
67
+ assert_nil parser.meta
68
+ end
69
+
70
+
71
+ end # class TestParserMeta
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 1.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-10-21 00:00:00.000000000 Z
11
+ date: 2018-10-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdoc
@@ -74,10 +74,12 @@ files:
74
74
  - test/data/customers11.csv
75
75
  - test/data/shakespeare.csv
76
76
  - test/helper.rb
77
+ - test/test_buffer.rb
77
78
  - test/test_converter.rb
78
79
  - test/test_parser.rb
79
80
  - test/test_parser_formats.rb
80
81
  - test/test_parser_java.rb
82
+ - test/test_parser_meta.rb
81
83
  - test/test_parser_null.rb
82
84
  - test/test_parser_numeric.rb
83
85
  - test/test_parser_strict.rb