RubyGems - csvreader - Versions diffs - 1.1.2 → 1.1.3 - Mend

csvreader 1.1.2 → 1.1.3

Files changed (8) hide show

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: cf620967ec1983a211f8e2436a4b50aca3bbe023
-  data.tar.gz: 76da0bbce4a76c4b60e37f1cb93be23d2aec504e
+  metadata.gz: a920108ec183cff7c7cad8c0d967390b4f2bd38f
+  data.tar.gz: 2a32715b6e1eb3e83b3837de1d151169d8b3455f
 SHA512:
-  metadata.gz: 6024f630a6c982beffd597107cfa75c1e2d6e86e174408632f4e31aa8d4c5a2ea6be8608f678f64da6bd6ba914e9f3ed55fce044a25593bd92757a82bb0d082e
-  data.tar.gz: 98bed6e7938399640d942d5c8d9f420d01f4d048d06c09dec2f1e6e7e833a8c38c42419a520445b13166743615de7bd120eec20a4c607d377ebf40a0109bcc47
+  metadata.gz: f2264455eda5136261628cc77de24494d9ea11bb116c9ca5e36495f4f4b90101356444c9da75c37b6d5b9419b57ce4a145830bd1d6919ce0cbdb2ef05673bfad
+  data.tar.gz: 9c539db1ccac369ae23113587e9d529a95de0b080f3c12687e237d46bea1bdbb157b57f7b2f61d72f637cd914aecb11fbd8daeec11ecb38f49b65669a004e774

data/Manifest.txt CHANGED

@@ -37,6 +37,7 @@ test/test_parser_java.rb
 test/test_parser_meta.rb
 test/test_parser_null.rb
 test/test_parser_numeric.rb
+test/test_parser_quotes.rb
 test/test_parser_strict.rb
 test/test_parser_tab.rb
 test/test_reader.rb

data/README.md CHANGED

@@ -10,15 +10,25 @@
 ## What's News?
+**v1.1.3**: Added built-in support for french single and double quotes / guillemets (`‹› «»`) to default parser ("The Right Way").
+Now you can use both, that is, single (`‹...›'` or `›...‹'`)
+or double (`«...»` or `»...«`).
+Note: A quote only "kicks-in" if it's the first (non-whitespace)
+character of the value (otherwise it's just a "vanilla" literal character).
 **v1.1.2**: Added built-in support for single quotes (`'`) to default parser ("The Right Way").
 Now you can use both, that is, single (`'...'`) or double quotes (`"..."`)
 like in ruby (or javascript or html or ...) :-).
+Note: A quote only "kicks-in" if it's the first (non-whitespace)
+character of the value (otherwise it's just a "vanilla" literal character)
+e.g. `48°51'24"N` needs no quote :-).
+With the "strict" parser you will get a firework of "stray" quote errors / exceptions.
 **v1.1.1**: Added built-in support for (optional) alternative comments (`%`) - used by
-ARFF (attribute relation file format) -
+[ARFF (attribute-relation file format)](https://waikato.github.io/weka-wiki/arff/) -
 and support for (optional) directives (`@`) in header (that is, before any records)
 to default parser ("The Right Way").
 Now you can use either `#` or `%` for comments, the first one "wins" - you CANNOT use both.
@@ -33,12 +43,13 @@ e.g.`Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] )`.
 **v1.0.3**: Added built-in support for an (optional) front matter (`---`) meta data block
 in header (that is, before any records)
-to default parser ("The Right Way"). See [CSVY.org](http://csvy.org) for more.
+to default parser ("The Right Way") - used by [CSVY (yaml front matter for csv file format)](http://csvy.org).
 Use `Csv.parser.meta` to get the parsed meta data block hash (or `nil`) if none.
 ## Usage
@@ -359,6 +370,32 @@ Staatliches Hofbräuhaus München,München,Hofbräu Oktoberfestbier,6.3%
 ```
+Or use the ARFF (attribute-relation file format)-like alternative style
+with `%` for comments and `@`-directives
+for "meta data" in the header (before any records):
+```
+%%%%%%%%%%%%%%%%%%
+% try with some comments
+%   and blank lines even before @-directives in header
+@RELATION Beer
+@ATTRIBUTE Brewery
+@ATTRIBUTE City
+@ATTRIBUTE Name
+@ATTRIBUTE Abv
+@DATA
+Andechser Klosterbrauerei,Andechs,Doppelbock Dunkel,7%
+Augustiner Bräu München,München,Edelstoff,5.6%
+Bayerische Staatsbrauerei Weihenstephan,  Freising,  Hefe Weissbier,   5.4%
+Brauerei Spezial,                         Bamberg,   Rauchbier Märzen, 5.1%
+Hacker-Pschorr Bräu,                      München,   Münchner Dunkel,  5.0%
+Staatliches Hofbräuhaus München,          München,   Hofbräu Oktoberfestbier, 6.3%
+```
 ### Q: How can I change the default format / dialect?

data/lib/csvreader/base.rb CHANGED

@@ -166,4 +166,4 @@ end # class CsvHashReader
 # say hello
-puts CsvReader.banner    if $DEBUG || (defined?($RUBYLIBS_DEBUG) && $RUBYLIBS_DEBUG)
+puts CsvReader.banner    if $DEBUG || (defined?($RUBYCOCO_DEBUG) && $RUBYCOCO_DEBUG)

data/lib/csvreader/parser_std.rb CHANGED

@@ -128,13 +128,13 @@ end
-def parse_quote( input, quote:)
+def parse_quote( input, opening_quote:, closing_quote:)
   value = ""
-  if input.peek == quote
-    input.getc  ## eat-up quote
+  if input.peek == opening_quote
+    input.getc  ## eat-up opening quote
     loop do
-      while (c=input.peek; !(c==quote || c==BACKSLASH || input.eof?))
+      while (c=input.peek; !(c==closing_quote || c==BACKSLASH || input.eof?))
         value << input.getc   ## eat-up everything until hitting quote (e.g. " or ') or backslash (escape)
       end
@@ -144,7 +144,9 @@ def parse_quote( input, quote:)
         value << parse_escape( input )
       else   ## assume input.peek == quote
         input.getc ## eat-up quote
-        if input.peek == quote  ## doubled up quote?
+        if opening_quote == closing_quote && input.peek == closing_quote
+          ## doubled up quote?
+          #   note: only works (enabled) for "" or '' and NOT for «»,‹›.. (if opening and closing differ)
           value << input.getc   ## add doube quote and continue!!!!
         else
           break
@@ -152,7 +154,7 @@ def parse_quote( input, quote:)
       end
     end
   else
-    raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - QUOTE (#{quote}) expected in parse_quote!!!!" )
+    raise ParseError.new( "found >#{input.peek} (#{input.peek.ord})< - CLOSING QUOTE (#{closing_quote}) expected in parse_quote!!!!" )
   end
   value
 end
@@ -182,18 +184,36 @@ def parse_field( input )
     end
   elsif input.peek == DOUBLE_QUOTE
     logger.debug "start double_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
-    value << parse_quote( input, quote: DOUBLE_QUOTE )
+    value << parse_quote( input, opening_quote: DOUBLE_QUOTE,
+                                 closing_quote: DOUBLE_QUOTE )
     ## note: always eat-up all trailing spaces (" ") and tabs (\t)
     skip_spaces( input )
     logger.debug "end double_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
   elsif input.peek == SINGLE_QUOTE    ## allow single quote too (by default)
     logger.debug "start single_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
-    value << parse_quote( input, quote: SINGLE_QUOTE )
+    value << parse_quote( input, opening_quote: SINGLE_QUOTE,
+                                 closing_quote: SINGLE_QUOTE )
     ## note: always eat-up all trailing spaces (" ") and tabs (\t)
     skip_spaces( input )
     logger.debug "end single_quote field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
+  elsif input.peek == "«"
+    value << parse_quote( input, opening_quote: "«",
+                                 closing_quote: "»" )
+    skip_spaces( input )
+  elsif input.peek == "»"
+    value << parse_quote( input, opening_quote: "»",
+                                 closing_quote: "«" )
+    skip_spaces( input )
+  elsif input.peek == "‹"
+    value << parse_quote( input, opening_quote: "‹",
+                                 closing_quote: "›" )
+    skip_spaces( input )
+  elsif input.peek == "›"
+    value << parse_quote( input, opening_quote: "›",
+                                 closing_quote: "‹" )
+    skip_spaces( input )
   else
     logger.debug "start reg field - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
     ## consume simple value

data/lib/csvreader/version.rb CHANGED

@@ -5,7 +5,7 @@ class CsvReader   ## note: uses a class for now - change to module - why? why no
   MAJOR = 1    ## todo: namespace inside version or something - why? why not??
   MINOR = 1
-  PATCH = 2
+  PATCH = 3
   VERSION = [MAJOR,MINOR,PATCH].join('.')

data/test/test_parser_quotes.rb ADDED

@@ -0,0 +1,53 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_parser_quotes.rb
+require 'helper'
+class TestParserQuotes < MiniTest::Test
+def parser
+  CsvReader::Parser::DEFAULT
+end
+def test_french_single
+  assert_equal [[ "a", "b", "c" ]],
+               parser.parse( " ‹a›, ‹b›, ›c‹ " )
+  assert_equal [[ "a,1", " b,2", "c, 3" ]],
+               parser.parse( " ‹a,1›, ‹ b,2›, ›c, 3‹ " )
+  assert_equal [[ %Q{"a"}, %Q{'b'}, %Q{c'"'"} ]],
+               parser.parse( %Q{ ‹"a"›, ‹'b'›, ›c'"'"‹} )
+  # note: quote matches only if first non-whitespace char
+  assert_equal [[ "_‹a›", "_‹b›", "›c‹" ]],
+               parser.parse( %Q{ _‹a›, _‹b›, "›c‹"} )
+end
+def test_french_double
+  assert_equal [[ "a", "b", "c" ]],
+               parser.parse( " «a», «b», »c« " )
+  assert_equal [[ "a,1", " b,2", "c, 3" ]],
+               parser.parse( " «a,1», « b,2», »c, 3« " )
+  assert_equal [[ %Q{"a"}, %Q{'b'}, %Q{c'"'"} ]],
+               parser.parse( %Q{ «"a"», «'b'», »c'"'"«} )
+  # note: quote matches only if first non-whitespace char
+  assert_equal [[ "_«a»", "_«b»", "»c«" ]],
+               parser.parse( %Q{ _«a», _«b», "»c«"} )
+end
+end # class TestParserQuotes

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: csvreader
 version: !ruby/object:Gem::Version
-  version: 1.1.2
+  version: 1.1.3
 platform: ruby
 authors:
 - Gerald Bauer
@@ -88,6 +88,7 @@ files:
 - test/test_parser_meta.rb
 - test/test_parser_null.rb
 - test/test_parser_numeric.rb
+- test/test_parser_quotes.rb
 - test/test_parser_strict.rb
 - test/test_parser_tab.rb
 - test/test_reader.rb