RubyGems - csvreader - Versions diffs - 1.2.1 → 1.2.5 - Mend

csvreader 1.2.1 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

checksums.yaml +5 -5
data/{HISTORY.md → CHANGELOG.md} +3 -3
data/Manifest.txt +13 -12
data/README.md +682 -677
data/Rakefile +33 -26
data/{test/data → datasets}/beer.csv +0 -0
data/{test/data → datasets}/beer11.csv +0 -0
data/{test/data → datasets}/cars11.csv +10 -10
data/{test/data → datasets}/cities11.csv +12 -12
data/{test/data → datasets}/customers11.csv +13 -13
data/{test/data → datasets}/iris.attrib.csv +25 -25
data/{test/data → datasets}/iris11.csv +163 -163
data/{test/data → datasets}/lcc.attrib.csv +14 -14
data/{test/data → datasets}/shakespeare.csv +9 -9
data/{test/data → datasets}/test.csv +0 -0
data/lib/csvreader/base.rb +36 -2
data/lib/csvreader/buffer.rb +0 -1
data/lib/csvreader/builder.rb +0 -1
data/lib/csvreader/converter.rb +0 -1
data/lib/csvreader/parser.rb +32 -33
data/lib/csvreader/parser_fixed.rb +105 -106
data/lib/csvreader/parser_json.rb +23 -5
data/lib/csvreader/parser_std.rb +582 -534
data/lib/csvreader/parser_strict.rb +290 -291
data/lib/csvreader/parser_tab.rb +22 -62
data/lib/csvreader/parser_table.rb +122 -123
data/lib/csvreader/parser_yaml.rb +23 -0
data/lib/csvreader/reader.rb +2 -3
data/lib/csvreader/reader_hash.rb +3 -2
data/lib/csvreader/version.rb +30 -32
data/lib/csvreader.rb +0 -1
data/test/helper.rb +1 -1
data/test/test_parser_autofix.rb +28 -0
data/test/test_parser_formats.rb +66 -66
data/test/test_parser_java.rb +208 -208
metadata +72 -25
data/LICENSE.md +0 -116

data/lib/csvreader/parser_tab.rb CHANGED Viewed

@@ -1,62 +1,22 @@
-# encoding: utf-8
-class CsvReader
-class ParserTab
-def parse( data, **kwargs, &block )
-  ## note: input: required each_line (string or io/file for example)
-  ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
-  input = data   ## assume it's a string or io/file handle
-  if block_given?
-    parse_lines( input, &block )
-  else
-    records = []
-    parse_lines( input ) do |record|
-      records << record
-    end
-    records
-  end
-end ## method parse
-private
-def parse_lines( input, &block )
-  ## note: each line only works with \n (windows) or \r\n (unix)
-  ##   will NOT work with \r (old mac, any others?) only!!!!
-  input.each_line do |line|
-    ## puts "line:"
-    ## pp line
-    ##  note: chomp('') if is an empty string,
-    ##    it will remove all trailing newlines from the string.
-    ##    use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
-    line = line.chomp( '' )
-    ## pp line
-    # note: trailing empty fields get (auto-)trimmed by split !!!!!!!
-    #  Solution!!  change split( "\t" ) to split( "\t", -1 )
-    #    If the limit parameter is omitted, trailing null fields are suppressed.
-    #     If limit is a positive number, at most that number of fields will be returned
-    #     (if limit is 1, the entire string is returned as the only entry in an array).
-    #     If negative, there is no limit to the number of fields returned, and trailing null fields are not suppressed.
-    values = line.split( "\t", -1 )
-    ## pp values
-    ## note: requires block - enforce? how? why? why not?
-    block.call( values )
-  end
-end # method parse_lines
-end # class ParserTab
-end # class CsvReader
+class CsvReader
+class ParserTab
+def parse( data, **kwargs, &block )
+  ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
+  ## note: input: required each_line (string or io/file for example)
+  ## assume data is a string or io/file handle
+  tab = TabReader.new( data )
+  if block_given?
+    tab.each( &block )
+  else
+    tab.to_a
+  end
+end ## method parse
+end # class ParserTab
+end # class CsvReader

data/lib/csvreader/parser_table.rb CHANGED Viewed

@@ -1,123 +1,122 @@
-# encoding: utf-8
-class CsvReader
-class ParserTable
-###################################
-## add simple logger with debug flag/switch
-#
-#  use Parser.debug = true   # to turn on
-#
-#  todo/fix: use logutils instead of std logger - why? why not?
-def self.build_logger()
-  l = Logger.new( STDOUT )
-  l.level = :info    ## set to :info on start; note: is 0 (debug) by default
-  l
-end
-def self.logger() @@logger ||= build_logger; end
-def logger()  self.class.logger; end
-attr_reader   :config   ## todo/fix: change config to proper dialect class/struct - why? why not?
-##
-##  todo/check:
-##    null values - include NA - why? why not?
-##        make null values case sensitive or add an option for case sensitive
-##        or better allow a proc as option for checking too!!!
-def initialize( space: nil )
-  @config = {}   ## todo/fix: change config to proper dialect class/struct - why? why not?
-  ## e.g. treat/convert char to space e.g. _-+• etc
-  ##   Man_Utd   => Man Utd
-  ##  or use it for leading and trailing spaces without quotes
-  ##  todo/check: only use for unquoted values? why? why not?
-  @config[:space]   = space
-end
-#########################################
-## config convenience helpers
-def space=( value )       @config[:space]=value; end
-def parse( str_or_readable, **kwargs, &block )
-  ## note: input: required each_line (string or io/file for example)
-  ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
-  input = str_or_readable   ## assume it's a string or io/file handle
-  if block_given?
-    parse_lines( input, &block )
-  else
-    records = []
-    parse_lines( input ) do |record|
-      records << record
-    end
-    records
-  end
-end ## method parse
-private
-def parse_lines( input, &block )
-  space = config[:space]
-  ## note: each line only works with \n (windows) or \r\n (unix)
-  ##   will NOT work with \r (old mac, any others?) only!!!!
-  input.each_line do |line|
-    logger.debug  "line:"             if logger.debug?
-    logger.debug line.pretty_inspect  if logger.debug?
-    ##  note: chomp('') if is an empty string,
-    ##    it will remove all trailing newlines from the string.
-    ##    use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
-    line = line.chomp( '' )
-    line = line.strip         ## strip leading and trailing whitespaces (space/tab) too
-    logger.debug line.pretty_inspect    if logger.debug?
-    if line.empty?             ## skip blank lines
-      logger.debug "skip blank line"    if logger.debug?
-      next
-    end
-    if line.start_with?( "#" )  ## skip comment lines
-      logger.debug "skip comment line"   if logger.debug?
-      next
-    end
-    # note: string.split defaults to split by space (e.g. /\s+/) :-)
-    #          for  just make it "explicit" with /[ \t]+/
-    values = line.split( /[ \t]+/ )
-    logger.debug values.pretty_inspect   if logger.debug?
-    if space
-      ## e.g. translate _-+ etc. if configured to space
-      ##  Man_Utd => Man Utd etc.
-       values = values.map {|value| value.tr(space,' ') }
-    end
-    ## note: requires block - enforce? how? why? why not?
-    block.call( values )
-  end
-end # method parse_lines
-end # class ParserTable
-end # class CsvReader
+class CsvReader
+class ParserTable
+###################################
+## add simple logger with debug flag/switch
+#
+#  use Parser.debug = true   # to turn on
+#
+#  todo/fix: use logutils instead of std logger - why? why not?
+def self.build_logger()
+  l = Logger.new( STDOUT )
+  l.level = :info    ## set to :info on start; note: is 0 (debug) by default
+  l
+end
+def self.logger() @@logger ||= build_logger; end
+def logger()  self.class.logger; end
+attr_reader   :config   ## todo/fix: change config to proper dialect class/struct - why? why not?
+##
+##  todo/check:
+##    null values - include NA - why? why not?
+##        make null values case sensitive or add an option for case sensitive
+##        or better allow a proc as option for checking too!!!
+def initialize( space: nil )
+  @config = {}   ## todo/fix: change config to proper dialect class/struct - why? why not?
+  ## e.g. treat/convert char to space e.g. _-+• etc
+  ##   Man_Utd   => Man Utd
+  ##  or use it for leading and trailing spaces without quotes
+  ##  todo/check: only use for unquoted values? why? why not?
+  @config[:space]   = space
+end
+#########################################
+## config convenience helpers
+def space=( value )       @config[:space]=value; end
+def parse( str_or_readable, **kwargs, &block )
+  ## note: input: required each_line (string or io/file for example)
+  ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
+  input = str_or_readable   ## assume it's a string or io/file handle
+  if block_given?
+    parse_lines( input, &block )
+  else
+    records = []
+    parse_lines( input ) do |record|
+      records << record
+    end
+    records
+  end
+end ## method parse
+private
+def parse_lines( input, &block )
+  space = config[:space]
+  ## note: each line only works with \n (windows) or \r\n (unix)
+  ##   will NOT work with \r (old mac, any others?) only!!!!
+  input.each_line do |line|
+    logger.debug  "line:"             if logger.debug?
+    logger.debug line.pretty_inspect  if logger.debug?
+    ##  note: chomp('') if is an empty string,
+    ##    it will remove all trailing newlines from the string.
+    ##    use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
+    line = line.chomp( '' )
+    line = line.strip         ## strip leading and trailing whitespaces (space/tab) too
+    logger.debug line.pretty_inspect    if logger.debug?
+    if line.empty?             ## skip blank lines
+      logger.debug "skip blank line"    if logger.debug?
+      next
+    end
+    if line.start_with?( "#" )  ## skip comment lines
+      logger.debug "skip comment line"   if logger.debug?
+      next
+    end
+    # note: string.split defaults to split by space (e.g. /\s+/) :-)
+    #          for  just make it "explicit" with /[ \t]+/
+    values = line.split( /[ \t]+/ )
+    logger.debug values.pretty_inspect   if logger.debug?
+    if space
+      ## e.g. translate _-+ etc. if configured to space
+      ##  Man_Utd => Man Utd etc.
+       values = values.map {|value| value.tr(space,' ') }
+    end
+    ## note: requires block - enforce? how? why? why not?
+    block.call( values )
+  end
+end # method parse_lines
+end # class ParserTable
+end # class CsvReader

data/lib/csvreader/parser_yaml.rb ADDED Viewed

@@ -0,0 +1,23 @@
+class CsvReader
+class ParserYaml
+def parse( data, **kwargs, &block )
+  ## note: kwargs NOT used for now (but required for "protocol/interface" by other parsers)
+  ## note: input: required each_line (string or io/file for example)
+  ## assume data is a string or io/file handle
+  csv = CsvYaml.new( data )
+  if block_given?
+    csv.each( &block )
+  else
+    csv.to_a
+  end
+end ## method parse
+end # class ParserYaml
+end # class CsvReader

data/lib/csvreader/reader.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-# encoding: utf-8
 class CsvReader
@@ -155,11 +154,11 @@ class CsvReader
          ## check array / pipeline of converters is empty (size=0 e.g. is [])
          if @converters.empty?
-           @parser.parse( @io, kwargs, &block )
+           @parser.parse( @io, **kwargs, &block )
          else
            ## add "post"-processing with converters pipeline
            ##   that is, convert all strings to integer, float, date, ... if wanted
-           @parser.parse( @io, kwargs ) do |raw_record|
+           @parser.parse( @io, **kwargs ) do |raw_record|
              record = []
              raw_record.each_with_index do | value, i |
                record << @converters.convert( value, i )

data/lib/csvreader/reader_hash.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-# encoding: utf-8
 class CsvHashReader
@@ -6,6 +5,8 @@ class CsvHashReader
 ## add convenience shortcuts / aliases for CsvReader support classes
 Parser      = CsvReader::Parser
 ParserFixed = CsvReader::ParserFixed
+ParserJson  = CsvReader::ParserJson
+ParserYaml  = CsvReader::ParserYaml
 Converter   = CsvReader::Converter
@@ -167,7 +168,7 @@ def_delegators :@io,
      kwargs[:width] = @kwargs[:width]    if @parser.is_a?( ParserFixed )
-     @parser.parse( @io, kwargs ) do |raw_values|     # sep: sep
+     @parser.parse( @io, **kwargs ) do |raw_values|     # sep: sep
         if @names.nil?    ## check for (first) headers row
           if @header_converters.empty?
             @names = raw_values   ## store header row / a.k.a. field/column names

data/lib/csvreader/version.rb CHANGED Viewed

@@ -1,32 +1,30 @@
-# encoding: utf-8
-class CsvReader   ## note: uses a class for now - change to module - why? why not?
-  module Version
-    MAJOR = 1    ## todo: namespace inside version or something - why? why not??
-    MINOR = 2
-    PATCH = 1
-    ## self.to_s  - why? why not?
-  end
-  VERSION = [Version::MAJOR,
-             Version::MINOR,
-             Version::PATCH].join('.')
-  def self.version   ## keep (as an alternative to VERSION) - why? why not?
-    VERSION
-  end
-  def self.banner
-    "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
-  end
-  def self.root
-    File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
-  end
-end # class CsvReader
+class CsvReader   ## note: uses a class for now - change to module - why? why not?
+  module Version
+    MAJOR = 1    ## todo: namespace inside version or something - why? why not??
+    MINOR = 2
+    PATCH = 5
+    ## self.to_s  - why? why not?
+  end
+  VERSION = [Version::MAJOR,
+             Version::MINOR,
+             Version::PATCH].join('.')
+  def self.version   ## keep (as an alternative to VERSION) - why? why not?
+    VERSION
+  end
+  def self.banner
+    "csvreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}] in (#{root})"
+  end
+  def self.root
+    File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
+  end
+end # class CsvReader

data/lib/csvreader.rb CHANGED Viewed

@@ -1,4 +1,3 @@
-# encoding: utf-8
 ## our own code (without "top-level" shortcuts e.g. "modular version")

data/test/helper.rb CHANGED Viewed

@@ -13,7 +13,7 @@ require 'csvreader'
 ## add test_data_dir helper
 class CsvReader
   def self.test_data_dir
-    "#{root}/test/data"
+    "#{root}/datasets"
   end
 end

data/test/test_parser_autofix.rb ADDED Viewed

@@ -0,0 +1,28 @@
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_parser_autofix.rb
+require 'helper'
+class TestParserAutofix < MiniTest::Test
+def parser
+  CsvReader::Parser::DEFAULT
+end
+def test_quote_with_trailing_value
+  recs = [[ "Farrokh", "\"Freddy\" Mercury", "Bulsara" ]]
+  assert_equal recs, parser.parse( %Q{Farrokh,"Freddy" Mercury,Bulsara} )
+  assert_equal recs, parser.parse( %Q{  Farrokh , "Freddy" Mercury  , Bulsara } )
+  assert_equal recs, parser.parse( %Q{Farrokh,  "Freddy" Mercury   ,Bulsara} )
+end
+end # class TestParserAutofix

data/test/test_parser_formats.rb CHANGED Viewed

@@ -1,66 +1,66 @@
-# encoding: utf-8
-###
-#  to run use
-#     ruby -I ./lib -I ./test test/test_parser_formats.rb
-require 'helper'
-class TestParserFormats < MiniTest::Test
-def parser
-  CsvReader::Parser
-end
-def test_parse_whitespace
-   records = [["a", "b", "c"],
-              ["1", "2", "3"]]
-   ## don't care about newlines (\r\n) ??? - fix? why? why not?
-   assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
-   assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
-   assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
-   assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
-   assert_equal [["a", "b", "c"],
-                 [""],
-                 ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
-   assert_equal [["", ""],
-                 [""],
-                 ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
-   ## strict rfc4180 - no trim leading or trailing spaces or blank lines
-   assert_equal records,   parser.strict.parse( "a,b,c\n1,2,3" )
-   assert_equal [["a", "b", "c"],
-                 [""],
-                 ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
-   assert_equal [[" a", " b ", "c "],
-                 [""],
-                 ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
-    assert_equal [[" a", " b ", "c "],
-                  [" "],
-                  ["",""],
-                  ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
-end
-def test_parse_empties
-    assert_equal [], parser.default.parse( "\n \n \n" )
-    ## strict rfc4180 - no trim leading or trailing spaces or blank lines
-    assert_equal [[""],
-                  [" "],
-                  [" "]], parser.strict.parse( "\n \n \n" )
-    assert_equal [[""],
-                  [" "],
-                  [" "]], parser.strict.parse( "\n \n " )
-    assert_equal [[""]], parser.strict.parse( "\n" )
-    assert_equal [],     parser.strict.parse( "" )
-end
-end # class TestParserFormats
+# encoding: utf-8
+###
+#  to run use
+#     ruby -I ./lib -I ./test test/test_parser_formats.rb
+require 'helper'
+class TestParserFormats < MiniTest::Test
+def parser
+  CsvReader::Parser
+end
+def test_parse_whitespace
+   records = [["a", "b", "c"],
+              ["1", "2", "3"]]
+   ## don't care about newlines (\r\n) ??? - fix? why? why not?
+   assert_equal records, parser.default.parse( "a,b,c\n1,2,3" )
+   assert_equal records, parser.default.parse( "a,b,c\n1,2,3\n" )
+   assert_equal records, parser.default.parse( " a, b ,c \n\n1,2,3\n" )
+   assert_equal records, parser.default.parse( " a, b ,c \n \n1,2,3\n" )
+   assert_equal [["a", "b", "c"],
+                 [""],
+                 ["1", "2", "3"]], parser.default.parse( %Q{a,b,c\n""\n1,2,3\n} )
+   assert_equal [["", ""],
+                 [""],
+                 ["", "", ""]], parser.default.parse( %Q{,\n""\n"","",""\n} )
+   ## strict rfc4180 - no trim leading or trailing spaces or blank lines
+   assert_equal records,   parser.strict.parse( "a,b,c\n1,2,3" )
+   assert_equal [["a", "b", "c"],
+                 [""],
+                 ["1", "2", "3"]], parser.strict.parse( "a,b,c\n\n1,2,3" )
+   assert_equal [[" a", " b ", "c "],
+                 [""],
+                 ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n\n1,2,3" )
+    assert_equal [[" a", " b ", "c "],
+                  [" "],
+                  ["",""],
+                  ["1", "2", "3"]], parser.strict.parse( " a, b ,c \n \n,\n1,2,3" )
+end
+def test_parse_empties
+    assert_equal [], parser.default.parse( "\n \n \n" )
+    ## strict rfc4180 - no trim leading or trailing spaces or blank lines
+    assert_equal [[""],
+                  [" "],
+                  [" "]], parser.strict.parse( "\n \n \n" )
+    assert_equal [[""],
+                  [" "],
+                  [" "]], parser.strict.parse( "\n \n " )
+    assert_equal [[""]], parser.strict.parse( "\n" )
+    assert_equal [],     parser.strict.parse( "" )
+end
+end # class TestParserFormats