RubyGems - csvreader - Versions diffs - 1.1.1 → 1.1.2 - Mend

csvreader 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/Manifest.txt +3 -0
data/README.md +30 -0
data/lib/csvreader/base.rb +2 -1
data/lib/csvreader/parser_std.rb +56 -21
data/lib/csvreader/version.rb +1 -1
data/test/data/iris.attrib.csv +25 -0
data/test/data/lcc.attrib.csv +14 -0
data/test/test_parser_directive.rb +68 -0
metadata +5 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c522e332ef3c1fead487b99d5fe147ba43ad2090
-  data.tar.gz: 51dd6d88ef8dc35615513961bab7e0e1c3b3512b
+  metadata.gz: cf620967ec1983a211f8e2436a4b50aca3bbe023
+  data.tar.gz: 76da0bbce4a76c4b60e37f1cb93be23d2aec504e
 SHA512:
-  metadata.gz: 7e563f75e916829e8de1b0a3b1208dd089de9a7907d010e3ba2cd23f1a70fedcb8d98c95e65c15ab7d3ad8705ae41a4ad6cd543ba20d6a72dc67f27b0060286b
-  data.tar.gz: 57036e2457b4dc1837748538062150650b47abef3d2493f4c4f42db4291fdd3001cb6fb218eca38c7c11816c67360cfea74030e137ff7edf8de1fb9e47f991ec
+  metadata.gz: 6024f630a6c982beffd597107cfa75c1e2d6e86e174408632f4e31aa8d4c5a2ea6be8608f678f64da6bd6ba914e9f3ed55fce044a25593bd92757a82bb0d082e
+  data.tar.gz: 98bed6e7938399640d942d5c8d9f420d01f4d048d06c09dec2f1e6e7e833a8c38c42419a520445b13166743615de7bd120eec20a4c607d377ebf40a0109bcc47

data/Manifest.txt CHANGED Viewed

@@ -22,12 +22,15 @@ test/data/beer11.csv
 test/data/cars11.csv
 test/data/cities11.csv
 test/data/customers11.csv
+test/data/iris.attrib.csv
 test/data/iris11.csv
+test/data/lcc.attrib.csv
 test/data/shakespeare.csv
 test/helper.rb
 test/test_buffer.rb
 test/test_converter.rb
 test/test_parser.rb
+test/test_parser_directive.rb
 test/test_parser_fixed.rb
 test/test_parser_formats.rb
 test/test_parser_java.rb

data/README.md CHANGED Viewed

@@ -8,6 +8,36 @@
 * forum :: [wwwmake](http://groups.google.com/group/wwwmake)
+## What's News?
+**v1.1.2**: Added built-in support for single quotes (`'`) to default parser ("The Right Way").
+Now you can use both, that is, single (`'...'`) or double quotes (`"..."`)
+like in ruby (or javascript or html or ...) :-).
+**v1.1.1**: Added built-in support for (optional) alternative comments (`%`) - used by
+ARFF (attribute relation file format) -
+and support for (optional) directives (`@`) in header (that is, before any records)
+to default parser ("The Right Way").
+Now you can use either `#` or `%` for comments, the first one "wins" - you CANNOT use both.
+Now you can use either a front matter (`---`) block
+or directives (e.g. `@attribute`, `@relation`, etc.)
+for meta data, the first one "wins" - you CANNOT use both.
+**v1.1.0**: Added new fixed width field (fwf) parser (see `ParserFixed`) for supporting fields with fixed width (and no separator)
+e.g.`Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] )`.
+**v1.0.3**: Added built-in support for an (optional) front matter (`---`) meta data block
+in header (that is, before any records)
+to default parser ("The Right Way"). See [CSVY.org](http://csvy.org) for more.
+Use `Csv.parser.meta` to get the parsed meta data block hash (or `nil`) if none.
 ## Usage

data/lib/csvreader/base.rb CHANGED Viewed

@@ -165,4 +165,5 @@ end # class CsvHashReader
-puts CsvReader.banner   # say hello
+# say hello
+puts CsvReader.banner    if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)

data/lib/csvreader/parser_std.rb CHANGED Viewed

@@ -10,13 +10,17 @@ class ParserStd
 ## char constants
-DOUBLE_QUOTE = "\""
-BACKSLASH    = "\\"    ## use BACKSLASH_ESCAPE ??
-COMMENT      = "#"      ## use COMMENT_HASH or HASH or ??
-SPACE        = " "      ##   \s == ASCII 32 (dec)            =    (Space)
-TAB          = "\t"     ##   \t == ASCII 0x09 (hex)          = HT (Tab/horizontal tab)
-LF	         = "\n"     ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
-CR	         = "\r"     ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
+DOUBLE_QUOTE  = "\""
+SINGLE_QUOTE  = "'"
+BACKSLASH     = "\\"    ## use BACKSLASH_ESCAPE ??
+COMMENT1      = "#"      ## use COMMENT_HASH or HASH or ??
+COMMENT2      = "%"      ## use COMMENT_PERCENT or PERCENT or ??
+DIRECTIVE     = "@"     ## use a different name e.g. AT or ??
+SPACE         = " "      ##   \s == ASCII 32 (dec)            =    (Space)
+TAB           = "\t"     ##   \t == ASCII 0x09 (hex)          = HT (Tab/horizontal tab)
+LF	          = "\n"     ##   \n == ASCII 0x0A (hex) 10 (dec) = LF (Newline/line feed)
+CR	          = "\r"     ##   \r == ASCII 0x0D (hex) 13 (dec) = CR (Carriage return)
 ###################################
@@ -101,13 +105,14 @@ end ## method parse
 private
 def parse_escape( input )
   value = ""
   if input.peek == BACKSLASH
     input.getc ## eat-up backslash
-    if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==',' || c=='"' )
+    if (c=input.peek; c==BACKSLASH || c==LF || c==CR || c==',' || c==DOUBLE_QUOTE || c==SINGLE_QUOTE )
       logger.debug "  add escaped char >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       value << input.getc     ## add escaped char (e.g. lf, cr, etc.)
     else
@@ -122,23 +127,24 @@ def parse_escape( input )
 end
-def parse_doublequote( input )
+def parse_quote( input, quote:)
   value = ""
-  if input.peek == DOUBLE_QUOTE
-    input.getc  ## eat-up double_quote
+  if input.peek == quote
+    input.getc  ## eat-up quote
     loop do
-      while (c=input.peek; !(c==DOUBLE_QUOTE || c==BACKSLASH || input.eof?))
-        value << input.getc   ## eat-up everything until hitting double_quote (") or backslash (escape)
+      while (c=input.peek; !(c==quote || c==BACKSLASH || input.eof?))
+        value << input.getc   ## eat-up everything until hitting quote (e.g. " or ') or backslash (escape)
       end
       if input.eof?
         break
       elsif input.peek == BACKSLASH
         value << parse_escape( input )
-      else   ## assume input.peek == DOUBLE_QUOTE
-        input.getc ## eat-up double_quote
-        if input.peek == DOUBLE_QUOTE  ## doubled up quote?
+      else   ## assume input.peek == quote
+        input.getc ## eat-up quote
+        if input.peek == quote  ## doubled up quote?
           value << input.getc   ## add doube quote and continue!!!!
         else
           break
@@ -146,13 +152,14 @@ def parse_doublequote( input )
       end
     end
   else
-    raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - DOUBLE_QUOTE (\") expected in parse_double_quote!!!!" )
+    raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - QUOTE (#{quote}) expected in parse_quote!!!!" )
   end
   value
 end
 def parse_field( input )
   value = ""
@@ -175,11 +182,18 @@ def parse_field( input )
     end
   elsif input.peek == DOUBLE_QUOTE
     logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
-    value << parse_doublequote( input )
+    value << parse_quote( input, quote: DOUBLE_QUOTE )
     ## note: always eat-up all trailing spaces (" ") and tabs (\t)
     skip_spaces( input )
     logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
+  elsif input.peek == SINGLE_QUOTE    ## allow single quote too (by default)
+    logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
+    value << parse_quote( input, quote: SINGLE_QUOTE )
+    ## note: always eat-up all trailing spaces (" ") and tabs (\t)
+    skip_spaces( input )
+    logger.debug "end single_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
   else
     logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
     ## consume simple value
@@ -349,20 +363,41 @@ def parse_lines( input, &block )
   ##   used for meta block (can only start before any records e.g. if record_num == 0)
   record_num = 0
+  ## note: can either use '#' or '%' but NOT both; first one "wins"
+  comment = nil
+  ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
+  has_seen_directive   = false
+  has_seen_frontmatter = false   ## - renameto  has_seen_dash (---) - why? why not???
   loop do
     break if input.eof?
     skipped_spaces = skip_spaces( input )
-    if input.peek == COMMENT        ## comment line
-      logger.debug "skipping comment - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
+    if comment.nil? && (c=input.peek; c==COMMENT1 || c==COMMENT2)
+      logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
+      comment = input.getc  ## first comment line (determines/fixes "allowed" comment-style)
+      skip_until_eol( input )
+      skip_newline( input )
+    elsif comment && input.peek == comment        ## (anther) comment line
+      logger.debug "skipping comment (follow-up) - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       skip_until_eol( input )
       skip_newline( input )
     elsif (c=input.peek; c==LF || c==CR || input.eof?)
       logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       skip_newline( input )
-    elsif record_num == 0 && skipped_spaces == 0 && meta.nil? && input.peekn(4) =~ /^---[\n\r \t]$/
+    elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
+      ## note: "skip" directives for now
+      has_seen_directive = true
+      logger.debug "skip directive"  if logger.debug?
+      skip_until_eol( input )
+      skip_newline( input )
+    elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
+          skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
       ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
+      has_seen_frontmatter = true
       logger.debug "start meta block"  if logger.debug?
       ## note: meta gets stored as object attribute (state/state/state!!)
       ##   use meta attribute to get meta data after reading first record

data/lib/csvreader/version.rb CHANGED Viewed

@@ -5,7 +5,7 @@ class CsvReader   ## note: uses a class for now - change to module - why? why no
   MAJOR = 1    ## todo: namespace inside version or something - why? why not??
   MINOR = 1
-  PATCH = 1
+  PATCH = 2
   VERSION = [MAJOR,MINOR,PATCH].join('.')

data/test/data/iris.attrib.csv ADDED Viewed

@@ -0,0 +1,25 @@
+% 1. Title: Iris Plants Database
+%
+% 2. Sources:
+% (a) Creator: R.A. Fisher
+@RELATION iris
+@ATTRIBUTE sepallength NUMERIC
+@ATTRIBUTE sepalwidth NUMERIC
+@ATTRIBUTE petallength NUMERIC
+@ATTRIBUTE petalwidth NUMERIC
+@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
+@DATA
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+4.7,3.2,1.3,0.2,Iris-setosa
+4.6,3.1,1.5,0.2,Iris-setosa
+5.0,3.6,1.4,0.2,Iris-setosa
+5.4,3.9,1.7,0.4,Iris-setosa
+4.6,3.4,1.4,0.3,Iris-setosa
+5.0,3.4,1.5,0.2,Iris-setosa
+4.4,2.9,1.4,0.2,Iris-setosa
+4.9,3.1,1.5,0.1,Iris-setosa

data/test/data/lcc.attrib.csv ADDED Viewed

@@ -0,0 +1,14 @@
+%  Attribute-Relation File Format (ARFF) Example
+%    see https://www.cs.waikato.ac.nz/ml/weka/arff.html
+@relation LCCvsLCSH
+@attribute LCC string
+@attribute LCSH string
+@data
+AG5,   'Encyclopedias and dictionaries.;Twentieth century.'
+AS262, 'Science -- Soviet Union -- History.'
+AE5,   'Encyclopedias and dictionaries.'
+AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
+AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'

data/test/test_parser_directive.rb ADDED Viewed

@@ -0,0 +1,68 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_parser_directive.rb
+require 'helper'
+class TestParserDirective < MiniTest::Test
+def parser
+  parser = CsvReader::Parser::DEFAULT
+end
+def test_iris
+  records = [["5.1","3.5","1.4","0.2","Iris-setosa"],
+             ["4.9","3.0","1.4","0.2","Iris-setosa"]]
+  assert_equal records, parser.parse( <<TXT )
+% with meta data - arff (attribute relation file format)-style
+%
+@RELATION iris
+@ATTRIBUTE sepallength NUMERIC
+@ATTRIBUTE sepalwidth NUMERIC
+@ATTRIBUTE petallength NUMERIC
+@ATTRIBUTE petalwidth NUMERIC
+@ATTRIBUTE class {Iris-setosa,Iris-versicolor,Iris-virginica}
+@DATA
+5.1,3.5,1.4,0.2,Iris-setosa
+4.9,3.0,1.4,0.2,Iris-setosa
+TXT
+end
+def test_lcc
+  records = [['AG5',   'Encyclopedias and dictionaries.;Twentieth century.'],
+             ['AS262', 'Science -- Soviet Union -- History.'],
+             ['AE5',   'Encyclopedias and dictionaries.'],
+             ['AS281', 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'],
+             ['AS281', 'Astronomy, Assyro-Babylonian.;Moon -- Tables.']]
+  assert_equal records, parser.parse( <<TXT )
+%  Attribute-Relation File Format (ARFF) Example
+%    see https://www.cs.waikato.ac.nz/ml/weka/arff.html
+@relation LCCvsLCSH
+@attribute LCC string
+@attribute LCSH string
+@data
+AG5,   'Encyclopedias and dictionaries.;Twentieth century.'
+AS262, 'Science -- Soviet Union -- History.'
+AE5,   'Encyclopedias and dictionaries.'
+AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Phases.'
+AS281, 'Astronomy, Assyro-Babylonian.;Moon -- Tables.'
+TXT
+end
+end # class TestParserDirective

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: csvreader
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.1.2
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-10-23 00:00:00.000000000 Z
+date: 2018-10-24 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdoc
@@ -73,12 +73,15 @@ files:
 - test/data/cars11.csv
 - test/data/cities11.csv
 - test/data/customers11.csv
+- test/data/iris.attrib.csv
 - test/data/iris11.csv
+- test/data/lcc.attrib.csv
 - test/data/shakespeare.csv
 - test/helper.rb
 - test/test_buffer.rb
 - test/test_converter.rb
 - test/test_parser.rb
+- test/test_parser_directive.rb
 - test/test_parser_fixed.rb
 - test/test_parser_formats.rb
 - test/test_parser_java.rb