RubyGems - csvreader - Versions diffs - 1.0.2 → 1.0.3 - Mend

csvreader 1.0.2 → 1.0.3

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8d41f765a3d0091d6f4fe392f014e386ef35a166
-  data.tar.gz: 268048267f364829801e12f7c41331153ce8c138
+  metadata.gz: 46909e44ebe97a9bbc19c95f1979821d001aac93
+  data.tar.gz: 4820888344741534cfc391d1b610a71a4c73f922
 SHA512:
-  metadata.gz: d891d31e0447639dfc1d70895c81eba369028b0a32c0daf7aba605d6a91f8e1fe1304eda3d2a2efcff018a1ceb3e8f9f964301269b150771550e5e372cb45c48
-  data.tar.gz: b8d51ae12ce5a8dbba0772ee6222338c9e6a7091585b2bf6480c88fd27f4738c5e4472f329568c92ff6c31397a8c297b157fc0b1c1e916e9497213efb2ef7245
+  metadata.gz: 5cb35f119810de48868c758bd60d53e621d093e7dbc0c7b1459818f187b4ce5785e04d232432f32e54bb683c0585c97df56eb0a3352ae07aa8bfe990bdf2f6f8
+  data.tar.gz: 507245efca585926a7e21675ebf0fa997c5be792cb00d47647ee082a68482b1d96d3b9aec6b8ddf4b1278145604931b399a0dc82b956e939c52911680c9ac10c

data/Manifest.txt CHANGED Viewed

@@ -23,10 +23,12 @@ test/data/cities11.csv
 test/data/customers11.csv
 test/data/shakespeare.csv
 test/helper.rb
+test/test_buffer.rb
 test/test_converter.rb
 test/test_parser.rb
 test/test_parser_formats.rb
 test/test_parser_java.rb
+test/test_parser_meta.rb
 test/test_parser_null.rb
 test/test_parser_numeric.rb
 test/test_parser_strict.rb

data/lib/csvreader/base.rb CHANGED Viewed

@@ -6,6 +6,8 @@ require 'logger'
 require 'forwardable'
 require 'stringio'
 require 'date'    ## use for Date.parse and DateTime.parse
+require 'yaml'    ## used for (optional) meta data blocks
 ###

data/lib/csvreader/buffer.rb CHANGED Viewed

@@ -23,20 +23,45 @@ class Buffer   ## todo: find a better name:
     end
   end # method getc
-  def peek
+  def peekn( lookahead )
+    ## todo/check:  use a new method peekstr or match or something
+    ##    for more than
+      if @buf.size == 0 && @io.eof?
+        ## puts "peek - hitting eof!!!"
+        return  "\0"   ## return NUL char (0) for now
+      end
+      while @buf.size < lookahead do
+         ## todo/check: add/append NUL char (0) - why? why not?
+         break if @io.eof?    ## nothing more to read; break out of filling up buffer
+         c = @io.getc
+         @buf.push( c )
+         ## puts "peek - fill buffer >#{c}< (#{c.ord})"
+      end
+      @buf[0,lookahead].join
+  end
+  def peek1
     if @buf.size == 0 && @io.eof?
       ## puts "peek - hitting eof!!!"
       return  "\0"   ## return NUL char (0) for now
     end
     if @buf.size == 0
-       c = @io.getc
-       @buf.push( c )
-       ## puts "peek - fill buffer >#{c}< (#{c.ord})"
+        c = @io.getc
+        @buf.push( c )
+        ## puts "peek - fill buffer >#{c}< (#{c.ord})"
     end
-    @buf.first
-  end # method peek
+    @buf[0]    ## @buf.first
+  end # method peek1
+  alias :peek :peek1  ## for now alias for peek1
 end # class Buffer
 end # class CsvReader

data/lib/csvreader/parser_std.rb CHANGED Viewed

@@ -37,7 +37,8 @@ def logger()  self.class.logger; end
-attr_reader :config   ## todo/fix: change config to proper dialect class/struct - why? why not?
+attr_reader   :config   ## todo/fix: change config to proper dialect class/struct - why? why not?
+attr_reader   :meta
 ##
 ##  todo/check:
@@ -56,6 +57,8 @@ def initialize( null:     ['\N', 'NA'],  ## note: set to nil for no null vales /
   @config[:null]    = null   ## null values
   @config[:numeric] = numeric
   @config[:nan]     = nan   # not a number (NaN) e.g. Float::NAN
+  @meta  = nil     ## no meta data block   (use empty hash {} - why? why not?)
 end
@@ -244,6 +247,58 @@ end
+def parse_meta( input )
+  ## todo/check:
+  ##  check again for input.peekn(4) =~ /^---[\n\r \t]$/ - why? why not?
+  input.getc   ## eat-up (add document header ---) - skip "---"
+  input.getc
+  input.getc
+  ## todo/fix: make peekn(4)=~/^---[\n\r \t]$/ "more strict"
+  ##    use match() or something to always match regexp
+  skip_spaces( input )   # eat-up optional whitespaces in header line
+  skip_newline( input )
+  buf = "---\n"    ## note: start buffer with yaml header line - why?
+  ##   YAML.load("")        return false !!!
+  ##   YAML.load("---\n")   returns nil -- yes!!  if we get nil return empty hash {}
+  newline = true
+  ## eat-up until we hit "---" again
+  loop do
+    if input.eof?
+      raise ParseError.new( "end of input/stream - meta block footer >---< expected!!!!" )
+    elsif (c=input.peek; c==LF || c==CR)
+      while (c=input.peek; c==LF || c==CR )   ## add newlines
+        buf << input.getc    ## eat-up all until end of line
+      end
+      newline = true
+    elsif newline && input.peekn(4) =~ /^---[\n\r \t]?$/   ## check if meta block end marker?
+      ## todo/fix/check: allow (ignore) spaces after ---  why? why not?
+      input.getc   ## eat-up (add document header ---) - skip "---"
+      input.getc
+      input.getc
+      skip_spaces( input )   # eat-up optional whitespaces in header line
+      skip_newline( input )
+      break
+    else
+      buf << input.getc
+      newline = false
+    end
+  end
+  data = YAML.load( buf )
+  ## todo: check edge cases - always should return a hash or nil
+  ##     what to do with just integer, string or array etc. ???
+  data = {}   if data.nil?     ## note: if nil return empty hash e.g. {}
+  data
+end  ## parse_meta
 def skip_newline( input )    ## note: singular (strict) version
   return if input.eof?
@@ -268,12 +323,17 @@ def skip_until_eol( input )
   end
 end
 def skip_spaces( input )
-  return if input.eof?
+  return 0   if input.eof?
+  ## note: return number of spaces skipped (e.g. 0,1,2,etc.)
+  spaces_count = 0
   while (c=input.peek; c==SPACE || c==TAB)
     input.getc   ## note: always eat-up all spaces (" ") and tabs (\t)
+    spaces_count += 1
   end
+  spaces_count
 end
@@ -282,11 +342,17 @@ end
 def parse_lines( input, &block )
+  ## note: reset (optional) meta data block
+  @meta  = nil     ## no meta data block   (use empty hash {} - why? why not?)
+  ## note: track number of records
+  ##   used for meta block (can only start before any records e.g. if record_num == 0)
+  record_num = 0
   loop do
     break if input.eof?
-    skip_spaces( input )
+    skipped_spaces = skip_spaces( input )
     if input.peek == COMMENT        ## comment line
       logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
@@ -295,10 +361,19 @@ def parse_lines( input, &block )
     elsif (c=input.peek; c==LF || c==CR || input.eof?)
       logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       skip_newline( input )
+    elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
+      ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
+      logger.debug "start meta block"  if logger.debug?
+      ## note: meta gets stored as object attribute (state/state/state!!)
+      ##   use meta attribute to get meta data after reading first record
+      @meta = parse_meta( input )   ## note: assumes a hash gets returned
+      logger.debug "  meta: >#{meta.inspect}<"  if logger.debug?
     else
       logger.debug "start record - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       record = parse_record( input )
+      record_num +=1
       ## note: requires block - enforce? how? why? why not?
       block.call( record )   ## yield( record )
     end

data/lib/csvreader/version.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class CsvReader   ## note: uses a class for now - change to module - why? why no
   MAJOR = 1    ## todo: namespace inside version or something - why? why not??
   MINOR = 0
-  PATCH = 2
+  PATCH = 3
   VERSION = [MAJOR,MINOR,PATCH].join('.')

data/test/test_buffer.rb ADDED Viewed

@@ -0,0 +1,39 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_buffer.rb
+require 'helper'
+class TestBuffer < MiniTest::Test
+def test_peek
+  buf = CsvReader::Buffer.new( <<TXT )
+# hello
+1,2,3
+TXT
+  assert_equal '#',    buf.peek
+  assert_equal '#',    buf.peek1
+  assert_equal '#',    buf.peekn(1)
+  assert_equal '# ',   buf.peekn(2)
+  assert_equal '# h',  buf.peekn(3)
+  assert_equal '# he', buf.peekn(4)
+  buf.getc   ## eat first char
+  assert_equal ' ',    buf.peek
+  assert_equal ' ',    buf.peek1
+  assert_equal ' ',    buf.peekn(1)
+  assert_equal ' h',   buf.peekn(2)
+  assert_equal ' he',  buf.peekn(3)
+  assert_equal ' hel', buf.peekn(4)
+end
+end # class TestBuffer

data/test/test_parser_meta.rb ADDED Viewed

@@ -0,0 +1,71 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_parser_meta.rb
+require 'helper'
+class TestParserMeta < MiniTest::Test
+def parser
+  parser = CsvReader::Parser::DEFAULT
+end
+def test_parse
+  records = [["a", "b", "c"],
+             ["1", "2", "3"]]
+  assert_equal records, parser.parse( <<TXT )
+# with meta data
+## see https://blog.datacite.org/using-yaml-frontmatter-with-csv/
+---
+columns:
+- title: Purchase Date
+  type: date
+- title: Item
+  type: string
+- title: Amount (€)
+  type: float
+---
+a,b,c
+1,2,3
+TXT
+  pp parser.meta
+  meta = { "columns"=>
+             [{"title"=>"Purchase Date", "type"=>"date"},
+              {"title"=>"Item",          "type"=>"string"},
+              {"title"=>"Amount (€)",    "type"=>"float"}]
+         }
+  assert_equal meta, parser.meta
+  assert_equal records, parser.parse( <<TXT )
+# with (empty) meta data
+---
+---
+a,b,c
+1,2,3
+TXT
+  pp parser.meta
+  meta = {}
+  assert_equal meta, parser.meta
+  assert_equal records, parser.parse( <<TXT )
+# without meta data
+a,b,c
+1,2,3
+TXT
+  assert_nil parser.meta
+end
+end # class TestParserMeta

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: csvreader
 version: !ruby/object:Gem::Version
-  version: 1.0.2
+  version: 1.0.3
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-10-21 00:00:00.000000000 Z
+date: 2018-10-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdoc
@@ -74,10 +74,12 @@ files:
 - test/data/customers11.csv
 - test/data/shakespeare.csv
 - test/helper.rb
+- test/test_buffer.rb
 - test/test_converter.rb
 - test/test_parser.rb
 - test/test_parser_formats.rb
 - test/test_parser_java.rb
+- test/test_parser_meta.rb
 - test/test_parser_null.rb
 - test/test_parser_numeric.rb
 - test/test_parser_strict.rb