RubyGems - csvreader - Versions diffs - 1.2.0 → 1.2.1 - Mend

csvreader 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +11 -2
data/lib/csvreader/base.rb +10 -0
data/lib/csvreader/parser_std.rb +26 -11
data/lib/csvreader/version.rb +1 -1
data/test/test_parser_meta.rb +2 -0
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 5e83a3e71ad1ec014c4744e80be07aa7b6caef10
-  data.tar.gz: c181a4d7f379f241e5a8c1a21af99523c8a0c9d3
+  metadata.gz: e61cda6f5b0fae762451efa0b0819e53b6da9966
+  data.tar.gz: e6dadbde1d714247046603fbdcb1fbd348cacc4c
 SHA512:
-  metadata.gz: be9862e8ff97642f27a18e8d9160d534e3197a862e2a8c2d94b2ffe01b264a47b2e0996693dbc57b5cc68bd6113813fce7ea75289fd2a0e227ad0adc476868b3
-  data.tar.gz: b7696e7342f7676a928c15f6f35b90c78c56e7519de3eb6de95e4fbb3dd3c7823136bda2676f46aea29930c5026ee9d7384e86fb26cffefb1603e9279fbe0ce7
+  metadata.gz: 6494cb0052000592cff4766946c3b7db0ec026db220f2e3857563d6070f282089034a8405c3eb4d8807f1a2dbe4cce67bd789b2794f803765f5fe9702d62a856
+  data.tar.gz: b47bd4cc6a342c5cc5e01e5ec1a67c7ae2fc3a7fc6fb4c1bcbcac018ab0291c6af076444aafb7ecac2f83ba814a502366583bbb61fa36c961e7009b217a3819b

data/README.md CHANGED

@@ -9,9 +9,18 @@
 ## What's News?
-**v1.2** Add support for alternative (non-space) separators (e.g. `;|^:`)
+**v1.2.1** Added support for (optional) hashtag to the
+to the default parser (`ParserStd`) for
+supporting the [Humanitarian eXchange Language (HXL)](http://hxlstandard.org).
+Default is turned off (`false`). Use `Csv.human`
+or `Csv.hum` or `Csv.hxl` for pre-defined with hashtag turned on.
+**v1.2** Added support for alternative (non-space) separators (e.g. `;|^:`)
 to the default parser (`ParserStd`).
@@ -531,7 +540,7 @@ Csv.tab.parse( ... )  # note: "classic" strict tab format
 Csv.tab.read( ... )
 # ...
-Csv.table.parse( ... )  # note: "classic" strict tab format
+Csv.table.parse( ... )  # note: "classic" one or more space (or tab) table format
 Csv.table.read( ... )
 # ...
 ```

data/lib/csvreader/base.rb CHANGED

@@ -42,6 +42,7 @@ class Parser
   NUMERIC = ParserStd.new( numeric: true,
                            nan: ['#NAN', 'NAN', 'NaN', 'nan' ],
                            null: "" )
+  HUMAN   = ParserStd.new( hashtag: true )
   RFC4180 = ParserStrict.new
@@ -72,6 +73,10 @@ class Parser
   def self.numeric()         NUMERIC;         end
   def self.num()             numeric;         end
   def self.n()               numeric;         end
+  def self.human()           HUMAN;           end
+  def self.hum()             human;           end
+  def self.hxl()             human;           end  ## HXL (humanitarian eXchange language)
   def self.strict()          STRICT;          end ## alternative alias for STRICT
   def self.rfc4180()         RFC4180;         end ## alternative alias for RFC4180
   def self.excel()           EXCEL;           end ## alternative alias for EXCEL
@@ -94,6 +99,7 @@ class CsvReader
   ### pre-define CsvReader (built-in) formats/dialect
   DEFAULT = Builder.new( Parser::DEFAULT )
   NUMERIC = Builder.new( Parser::NUMERIC )
+  HUMAN   = Builder.new( Parser::HUMAN )
   STRICT  = Builder.new( Parser::STRICT )
   RFC4180 = Builder.new( Parser::RFC4180 )
@@ -113,6 +119,10 @@ class CsvReader
   def self.numeric()         NUMERIC;         end
   def self.num()             numeric;         end
   def self.n()               numeric;         end
+  def self.human()           HUMAN;           end
+  def self.hum()             human;           end
+  def self.hxl()             human;           end  ## HXL (humanitarian eXchange language)
   def self.strict()          STRICT;          end ## alternative alias for STRICT
   def self.rfc4180()         RFC4180;         end ## alternative alias for RFC4180
   def self.excel()           EXCEL;           end ## alternative alias for EXCEL

data/lib/csvreader/parser_std.rb CHANGED

@@ -13,8 +13,8 @@ class ParserStd
 DOUBLE_QUOTE  = "\""
 SINGLE_QUOTE  = "'"
 BACKSLASH     = "\\"    ## use BACKSLASH_ESCAPE ??
-COMMENT1      = "#"      ## use COMMENT_HASH or HASH or ??
-COMMENT2      = "%"      ## use COMMENT_PERCENT or PERCENT or ??
+COMMENT_HASH    = "#"      ## use COMMENT1 or COMMENT_HASH or HASH or ??
+COMMENT_PERCENT = "%"      ## use COMMENT2 or COMMENT_PERCENT or PERCENT or ??
 DIRECTIVE     = "@"     ## use a different name e.g. AT or ??
 SPACE         = " "      ##   \s == ASCII 32 (dec)            =    (Space)
 TAB           = "\t"     ##   \t == ASCII 0x09 (hex)          = HT (Tab/horizontal tab)
@@ -53,7 +53,8 @@ def initialize( sep:      ',',
                 null:     ['\N', 'NA'],  ## note: set to nil for no null vales / not availabe (na)
                 numeric:  false,   ## (auto-)convert all non-quoted values to float
                 nan:      nil,      ## note: only if numeric - set mappings for Float::NAN (not a number) values
-                space:    nil
+                space:    nil,
+                hashtag:  false
               )
   @config = {}   ## todo/fix: change config to proper dialect class/struct - why? why not?
@@ -73,6 +74,10 @@ def initialize( sep:      ',',
   ##  todo/check: only use for unquoted values? why? why not?
   @config[:space]   = space
+  ## hxl - humanitarian eXchange language uses a hashtag row for "meta data"
+  ##  e.g. #sector+en,#subsector,#org,#country,#sex+#targeted,#sex+#targeted,#adm1
+  ##  do NOT treat # as a comment (always use % for now)
+  @config[:hashtag] = hashtag
   @meta  = nil     ## no meta data block   (use empty hash {} - why? why not?)
 end
@@ -103,6 +108,7 @@ def null=( value )        @config[:null]=value; end
 def numeric=( value )     @config[:numeric]=value; end
 def nan=( value )         @config[:nan]=value; end
 def space=( value )       @config[:space]=value; end
+def hashtag=( value )     @config[:hashtag]=value; end
@@ -296,7 +302,7 @@ end
 def parse_record( input, sep: )
   values = []
-  space = config[:space]
+  space   = config[:space]
   loop do
      value = parse_field( input, sep: sep )
@@ -424,20 +430,29 @@ def parse_lines( input, sep:, &block )
   ##   used for meta block (can only start before any records e.g. if record_num == 0)
   record_num = 0
-  ## note: can either use '#' or '%' but NOT both; first one "wins"
-  comment = nil
-  ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
+  hashtag = config[:hashtag]
+  if hashtag
+    comment = COMMENT_PERCENT
+    ## todo/check: use a "heuristic" to check if its a comment or a hashtag line? why? why not?
+  else
+    ## note: can either use '#' or '%' but NOT both; first one "wins"
+    comment = nil
+  end
   has_seen_directive   = false
   has_seen_frontmatter = false   ## - renameto  has_seen_dash (---) - why? why not???
+  ## note: can either use directives (@) or frontmatter (---) block; first one "wins"
   loop do
     break if input.eof?
     skipped_spaces = skip_spaces( input )
-    if comment.nil? && (c=input.peek; c==COMMENT1 || c==COMMENT2)
+    if comment.nil? && (c=input.peek; c==COMMENT_HASH || c==COMMENT_PERCENT)
       logger.debug "skipping comment (first) - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       comment = input.getc  ## first comment line (determines/fixes "allowed" comment-style)
       skip_until_eol( input )
@@ -449,13 +464,13 @@ def parse_lines( input, sep:, &block )
     elsif (c=input.peek; c==LF || c==CR || input.eof?)
       logger.debug "skipping blank - peek >#{input.peek}< (#{input.peek.ord})"  if logger.debug?
       skip_newline( input )
-    elsif record_num == 0 && has_seen_frontmatter == false && input.peek==DIRECTIVE
+    elsif record_num == 0 && hashtag == false && has_seen_frontmatter == false && input.peek==DIRECTIVE
       ## note: "skip" directives for now
       has_seen_directive = true
       logger.debug "skip directive"  if logger.debug?
       skip_until_eol( input )
       skip_newline( input )
-    elsif record_num == 0 && has_seen_directive == false && has_seen_frontmatter == false &&
+    elsif record_num == 0 && hashtag == false && has_seen_directive == false && has_seen_frontmatter == false &&
           skipped_spaces == 0 && input.peekn(4) =~ /^---[\n\r \t]$/
       ## note: assume "---" (MUST BE) followed by newline (\r or \n) or space starts a meta block
       has_seen_frontmatter = true

data/lib/csvreader/version.rb CHANGED

@@ -6,7 +6,7 @@ class CsvReader   ## note: uses a class for now - change to module - why? why no
   module Version
     MAJOR = 1    ## todo: namespace inside version or something - why? why not??
     MINOR = 2
-    PATCH = 0
+    PATCH = 1
     ## self.to_s  - why? why not?
   end

data/test/test_parser_meta.rb CHANGED

@@ -16,6 +16,8 @@ end
 def test_parse
+  pp parser.config
   records = [["a", "b", "c"],
              ["1", "2", "3"]]

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: csvreader
 version: !ruby/object:Gem::Version
-  version: 1.2.0
+  version: 1.2.1
 platform: ruby
 authors:
 - Gerald Bauer
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-11-05 00:00:00.000000000 Z
+date: 2018-11-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdoc