RubyGems - csvutils - Versions diffs - 0.1.0 → 0.1.1 - Mend

csvutils 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +4 -4
data/Manifest.txt +3 -0
data/README.md +1 -1
data/Rakefile +1 -1
data/lib/csvutils.rb +109 -0
data/lib/csvutils/cut.rb +47 -47
data/lib/csvutils/head.rb +25 -0
data/lib/csvutils/header.rb +28 -0
data/lib/csvutils/split.rb +107 -107
data/lib/csvutils/stat.rb +86 -0
data/lib/csvutils/test.rb +1 -1
data/lib/csvutils/utils.rb +13 -29
data/lib/csvutils/version.rb +1 -1
data/test/data/at-austria/AUT.csv +363 -363
data/test/data/eng-england/2017-18/E0.csv +381 -381
data/test/test_headers.rb +50 -41
data/test/test_misc.rb +12 -1
data/test/test_version.rb +20 -20
metadata +6 -3

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 91b99040998dec29903bb139c7650aec805981c9
-  data.tar.gz: 4ca0a379e0a99dc01f35957ca021ccb4b56aaef3
+  metadata.gz: 435468b42345511466981b9f470e39e1dc78bfea
+  data.tar.gz: f2b4c42ec30da76fe929942d6d0706420dd63f63
 SHA512:
-  metadata.gz: 7cae580d42247f3df41c846880cdfc78f4f410fc553acff75b57cf85d542a80e97c6ef4ec7e1e924b1cdd85934596addc4d0941bdab9e31478bf0e012386eaf2
-  data.tar.gz: 8398dabd6e1bf01134ba31016b77a866c308c7a75ef426b1ebc995d043c11ec757c15c07bc5b8ba2f81764fa796ad96013055b19af30b70504647aff3915b570
+  metadata.gz: 1b1edc99a8b3e7257a899df34ed2b144467c1268f21877676194b138e59113d69733990ba7a0c70b1a4c51cb2861dff6e3c429cbc9f6be4a0a83c9f08a52fd22
+  data.tar.gz: b57408a1a5d2743649538c79a269480ce2a20d21df483c8133772bbb46589d7dbff6d1c9b22d4ec329bf331f70bd5a207650bedfd83821ba2cdeb7787595621b

data/Manifest.txt CHANGED

@@ -4,7 +4,10 @@ README.md
 Rakefile
 lib/csvutils.rb
 lib/csvutils/cut.rb
+lib/csvutils/head.rb
+lib/csvutils/header.rb
 lib/csvutils/split.rb
+lib/csvutils/stat.rb
 lib/csvutils/test.rb
 lib/csvutils/utils.rb
 lib/csvutils/version.rb

data/README.md CHANGED

@@ -1,4 +1,4 @@
-# csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text"
+# csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text
 * home  :: [github.com/csv11/csvutils](https://github.com/csv11/csvutils)

data/Rakefile CHANGED

@@ -11,7 +11,7 @@ Hoe.spec 'csvutils' do
   self.urls = ['https://github.com/csv11/csvutils']
   self.author = 'Gerald Bauer'
-  self.email = 'opensport@googlegroups.com'
+  self.email = 'wwwmake@googlegroups.com'
   # switch extension to .markdown for gihub formatting
   self.readme_file = 'README.md'

data/lib/csvutils.rb CHANGED

@@ -4,6 +4,8 @@ require 'pp'
 require 'csv'
 require 'date'
 require 'fileutils'
+require 'optparse'
 ###
@@ -13,7 +15,114 @@ require 'csvutils/utils'
 require 'csvutils/split'
 require 'csvutils/cut'
 require 'csvutils/test'
+require 'csvutils/stat'
+require 'csvutils/header'
+require 'csvutils/head'
+class CsvTool
+## command line tools
+def self.header( args )
+  config = {}
+  parser = OptionParser.new do |opts|
+     opts.banner = "Usage: csvheader [OPTS] datafile ..."
+     opts.on("-h", "--help", "Prints this help") do
+       puts opts
+       exit
+     end
+  end
+  parser.parse!( args )
+  ## pp config
+  ## pp args
+  args.each do |arg|
+    path = arg
+    puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
+    puts
+    CsvUtils.pp_header( CsvUtils.header( path ) )
+    puts
+  end # each arg
+end
+def self.stat( args )
+  config = { columns: [] }
+  parser = OptionParser.new do |opts|
+     opts.banner = "Usage: csvstat [OPTS] datafile ..."
+     opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
+       config[:columns] = columns.split(/[,|;]/)   ## allow differnt separators
+     end
+     opts.on("-h", "--help", "Prints this help") do
+       puts opts
+       exit
+     end
+  end
+  parser.parse!( args )
+  ## pp config
+  ## pp args
+  args.each do |arg|
+    path = arg
+    columns = config[:columns]
+    puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
+    puts
+    CsvUtils.stat( path, *columns )
+    puts
+  end # each arg
+end
+def self.head( args )
+  config = { n: 4 }
+  parser = OptionParser.new do |opts|
+     opts.banner = "Usage: csvhead [OPTS] datafile ..."
+     opts.on("-n", "--num=NUM", "Number of rows" ) do |num|
+       config[:n] = num.to_i
+     end
+     opts.on("-h", "--help", "Prints this help") do
+       puts opts
+       exit
+     end
+  end
+  parser.parse!( args )
+  ## pp config
+  ## pp args
+  args.each do |arg|
+    path = arg
+    n = config[:n]
+    puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
+    puts
+    CsvUtils.head( path, n: n )
+    puts
+  end # each arg
+end
+end # class CsvTool
 puts CsvUtils.banner   # say hello

data/lib/csvutils/cut.rb CHANGED

@@ -1,47 +1,47 @@
-# encoding: utf-8
-## check/use class or module ???
-class CsvUtils
-  def self.cut( inpath, outpath, *columns, sep: ',' )
-    puts "cvscut in: >#{inpath}<  out: >#{outpath}<"
-    ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
-    puts "columns:"
-    pp columns
-    text = File.open( inpath, 'r:utf-8' ).read   ## note: make sure to use (assume) utf-8
-    csv_options = { headers: true,
-                    col_sep: sep }
-    table = CSV.parse( text, csv_options )
-    ## for convenience - make sure parent folders/directories exist
-    FileUtils.mkdir_p( File.dirname( outpath ))  unless Dir.exists?( File.dirname( outpath ))
-    ## use wb mode - why? why not?
-    ##   assumes same encoding as input?
-    ##   fix/todo: better (always) use utf8!!!!
-    ## CSV.open( out_path, 'wb' ) do |out|
-    ## use just "regular" File for output - why? why not?
-    ##    downside will not encode comma (for now) if present ("Beethoven, van")
-    ##      all values will be unquoted etc. - keep it simple?
-    CSV.open( outpath, 'w:utf-8' ) do |out|
-      out << columns   ## for row add headers/columns
-      table.each do |row|
-        values = columns.map { |col| row[col].strip }  ## find data for column
-        out << values
-      end
-    end
-    puts 'Done.'
-  end  ## method self.cut
-end # class CsvUtils
+# encoding: utf-8
+## check/use class or module ???
+class CsvUtils
+  def self.cut( inpath, outpath, *columns, sep: ',' )
+    puts "cvscut in: >#{inpath}<  out: >#{outpath}<"
+    ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
+    puts "columns:"
+    pp columns
+    text = File.open( inpath, 'r:utf-8' ).read   ## note: make sure to use (assume) utf-8
+    csv_options = { headers: true,
+                    col_sep: sep }
+    table = CSV.parse( text, csv_options )
+    ## for convenience - make sure parent folders/directories exist
+    FileUtils.mkdir_p( File.dirname( outpath ))  unless Dir.exists?( File.dirname( outpath ))
+    ## use wb mode - why? why not?
+    ##   assumes same encoding as input?
+    ##   fix/todo: better (always) use utf8!!!!
+    ## CSV.open( out_path, 'wb' ) do |out|
+    ## use just "regular" File for output - why? why not?
+    ##    downside will not encode comma (for now) if present ("Beethoven, van")
+    ##      all values will be unquoted etc. - keep it simple?
+    CSV.open( outpath, 'w:utf-8' ) do |out|
+      out << columns   ## for row add headers/columns
+      table.each do |row|
+        values = columns.map { |col| row[col].strip }  ## find data for column
+        out << values
+      end
+    end
+    puts 'Done.'
+  end  ## method self.cut
+end # class CsvUtils

data/lib/csvutils/head.rb ADDED

@@ -0,0 +1,25 @@
+# encoding: utf-8
+class CsvUtils
+  ## test or dry run to check if rows can get read/scanned
+  def self.head( path, sep: ',', n: 4 )
+    i = 0
+    csv_options = { headers: true,
+                    col_sep: sep,
+                    external_encoding: 'utf-8'  ## note:  always (auto-)add utf-8 external encoding!!!
+                   }
+    CSV.foreach( path, csv_options ) do |row|
+      i += 1
+      pp row
+      break if i >= n
+    end
+    puts " #{i} rows"
+  end
+end  # class CsvUtils

data/lib/csvutils/header.rb ADDED

@@ -0,0 +1,28 @@
+# encoding: utf-8
+class CsvUtils
+  def self.header( path, sep: ',', debug: false )   ## use header or headers - or use both (with alias)?
+    # read first line (only)
+    #  and parse with csv to get header from csv library itself
+    #
+    #  check - if there's an easier or built-in way for the csv library
+    line = File.open( path, 'r:utf-8' ) { |f| f.readline }
+    pp line   if debug
+    ## e.g.:
+    #  "Country,League,Season,Date,Time,Home,Away,HG,AG,Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA\n"
+    csv_options = {  col_sep: sep }
+    ## note: do NOT use headers: true to get "plain" data array (no hash records)
+    ##   hash record does NOT work for single line/row
+    rows = CSV.parse( line, csv_options )
+    pp rows   if debug
+    rows[0]   ## return first row
+  end  # method self.header
+end  # class CsvUtils

data/lib/csvutils/split.rb CHANGED

@@ -1,107 +1,107 @@
-# encoding: utf-8
-class CsvUtils
-  def self.split( path, *columns, sep: ',', &blk )
-    puts "cvssplit in: >#{path}<"
-    ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
-    puts "columns:"
-    pp columns
-    text = File.open( path, 'r:utf-8' ).read   ## note: make sure to use (assume) utf-8
-    ## note: do NOT use headers
-    ##   for easy sorting use "plain" array of array for records
-    csv_options = { col_sep: sep }
-    data = CSV.parse( text, csv_options )
-    ## todo/check: (auto-) strip (remove all leading and trailing spaces)
-    ##     from all values - why? why not?
-    ##   check if CSV.parse has an option for it?
-    headers = data.shift   ## remove top array item (that is, row with headers)
-    header_mapping = {}
-    headers.each_with_index  { | header,i | header_mapping[header]=i }
-    pp header_mapping
-    ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
-    column_indices = columns.map { |col| header_mapping[col] }
-    pp column_indices
-    ###################################################
-    ## note: sort data by columns (before split)
-    data = data.sort do |row1,row2|
-       res = 0
-       column_indices.each do |col|
-         res = row1[col] <=> row2[col]    if res == 0
-       end
-       res
-    end
-    chunk = []
-    data.each_with_index do |row,i|
-      chunk << row
-      next_row = data[i+1]
-      changed = false
-      if next_row.nil?   ## end-of-file
-        changed = true
-      else
-        column_indices.each do |col|
-          if row[col] != next_row[col]
-             changed = true
-             break   ## out of each column_indices loop
-           end
-        end
-      end
-      if changed
-        puts "save new chunk:"
-        column_values = column_indices.map {|col| row[col] }
-        pp column_values
-        # note: add header(s) row upfront (as first row) to chunk (with unshift)
-        chunk_with_headers = chunk.unshift( headers )
-        if blk
-          yield( column_values, chunk_with_headers )
-        else
-          ## auto-save (write-to-file) by default - why? why not?
-          split_write( path, column_values, chunk_with_headers )
-        end
-        chunk = []   ## reset chunk for next batch of records
-      end
-    end
-    puts 'Done.'
-  end  ## method self.split
-  def self.split_write( inpath, values, chunk )
-    basename = File.basename( inpath, '.*' )
-    dirname  = File.dirname( inpath )
-    ## check/change invalid filename chars
-    ##  e.g. change 1990/91 to 1990-91
-    extraname = values.map {|value| value.tr('/','-')}.join('~')
-    outpath = "#{dirname}/#{basename}_#{extraname}.csv"
-    puts "saving >#{basename}_#{extraname}.csv<..."
-    CSV.open( outpath, 'w:utf-8' ) do |out|
-      chunk.each do |row|
-        out << row
-      end
-    end
-  end
-end # class CsvUtils
+# encoding: utf-8
+class CsvUtils
+  def self.split( path, *columns, sep: ',', &blk )
+    puts "cvssplit in: >#{path}<"
+    ##  ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
+    puts "columns:"
+    pp columns
+    text = File.open( path, 'r:utf-8' ).read   ## note: make sure to use (assume) utf-8
+    ## note: do NOT use headers
+    ##   for easy sorting use "plain" array of array for records
+    csv_options = { col_sep: sep }
+    data = CSV.parse( text, csv_options )
+    ## todo/check: (auto-) strip (remove all leading and trailing spaces)
+    ##     from all values - why? why not?
+    ##   check if CSV.parse has an option for it?
+    headers = data.shift   ## remove top array item (that is, row with headers)
+    header_mapping = {}
+    headers.each_with_index  { | header,i | header_mapping[header]=i }
+    pp header_mapping
+    ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
+    column_indices = columns.map { |col| header_mapping[col] }
+    pp column_indices
+    ###################################################
+    ## note: sort data by columns (before split)
+    data = data.sort do |row1,row2|
+       res = 0
+       column_indices.each do |col|
+         res = row1[col] <=> row2[col]    if res == 0
+       end
+       res
+    end
+    chunk = []
+    data.each_with_index do |row,i|
+      chunk << row
+      next_row = data[i+1]
+      changed = false
+      if next_row.nil?   ## end-of-file
+        changed = true
+      else
+        column_indices.each do |col|
+          if row[col] != next_row[col]
+             changed = true
+             break   ## out of each column_indices loop
+           end
+        end
+      end
+      if changed
+        puts "save new chunk:"
+        column_values = column_indices.map {|col| row[col] }
+        pp column_values
+        # note: add header(s) row upfront (as first row) to chunk (with unshift)
+        chunk_with_headers = chunk.unshift( headers )
+        if blk
+          yield( column_values, chunk_with_headers )
+        else
+          ## auto-save (write-to-file) by default - why? why not?
+          split_write( path, column_values, chunk_with_headers )
+        end
+        chunk = []   ## reset chunk for next batch of records
+      end
+    end
+    puts 'Done.'
+  end  ## method self.split
+  def self.split_write( inpath, values, chunk )
+    basename = File.basename( inpath, '.*' )
+    dirname  = File.dirname( inpath )
+    ## check/change invalid filename chars
+    ##  e.g. change 1990/91 to 1990-91
+    extraname = values.map {|value| value.tr('/','-')}.join('~')
+    outpath = "#{dirname}/#{basename}_#{extraname}.csv"
+    puts "saving >#{basename}_#{extraname}.csv<..."
+    CSV.open( outpath, 'w:utf-8' ) do |out|
+      chunk.each do |row|
+        out << row
+      end
+    end
+  end
+end # class CsvUtils