csvutils 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 91b99040998dec29903bb139c7650aec805981c9
4
- data.tar.gz: 4ca0a379e0a99dc01f35957ca021ccb4b56aaef3
3
+ metadata.gz: 435468b42345511466981b9f470e39e1dc78bfea
4
+ data.tar.gz: f2b4c42ec30da76fe929942d6d0706420dd63f63
5
5
  SHA512:
6
- metadata.gz: 7cae580d42247f3df41c846880cdfc78f4f410fc553acff75b57cf85d542a80e97c6ef4ec7e1e924b1cdd85934596addc4d0941bdab9e31478bf0e012386eaf2
7
- data.tar.gz: 8398dabd6e1bf01134ba31016b77a866c308c7a75ef426b1ebc995d043c11ec757c15c07bc5b8ba2f81764fa796ad96013055b19af30b70504647aff3915b570
6
+ metadata.gz: 1b1edc99a8b3e7257a899df34ed2b144467c1268f21877676194b138e59113d69733990ba7a0c70b1a4c51cb2861dff6e3c429cbc9f6be4a0a83c9f08a52fd22
7
+ data.tar.gz: b57408a1a5d2743649538c79a269480ce2a20d21df483c8133772bbb46589d7dbff6d1c9b22d4ec329bf331f70bd5a207650bedfd83821ba2cdeb7787595621b
@@ -4,7 +4,10 @@ README.md
4
4
  Rakefile
5
5
  lib/csvutils.rb
6
6
  lib/csvutils/cut.rb
7
+ lib/csvutils/head.rb
8
+ lib/csvutils/header.rb
7
9
  lib/csvutils/split.rb
10
+ lib/csvutils/stat.rb
8
11
  lib/csvutils/test.rb
9
12
  lib/csvutils/utils.rb
10
13
  lib/csvutils/version.rb
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text"
1
+ # csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text
2
2
 
3
3
 
4
4
  * home :: [github.com/csv11/csvutils](https://github.com/csv11/csvutils)
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'csvutils' do
11
11
  self.urls = ['https://github.com/csv11/csvutils']
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'opensport@googlegroups.com'
14
+ self.email = 'wwwmake@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -4,6 +4,8 @@ require 'pp'
4
4
  require 'csv'
5
5
  require 'date'
6
6
  require 'fileutils'
7
+ require 'optparse'
8
+
7
9
 
8
10
 
9
11
  ###
@@ -13,7 +15,114 @@ require 'csvutils/utils'
13
15
  require 'csvutils/split'
14
16
  require 'csvutils/cut'
15
17
  require 'csvutils/test'
18
+ require 'csvutils/stat'
19
+ require 'csvutils/header'
20
+ require 'csvutils/head'
21
+
22
+
23
+
24
+
25
+ class CsvTool
26
+
27
+ ## command line tools
28
+ def self.header( args )
29
+
30
+ config = {}
31
+
32
+ parser = OptionParser.new do |opts|
33
+ opts.banner = "Usage: csvheader [OPTS] datafile ..."
34
+
35
+ opts.on("-h", "--help", "Prints this help") do
36
+ puts opts
37
+ exit
38
+ end
39
+ end
40
+
41
+ parser.parse!( args )
42
+
43
+ ## pp config
44
+ ## pp args
45
+
46
+ args.each do |arg|
47
+ path = arg
48
+
49
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
50
+ puts
51
+ CsvUtils.pp_header( CsvUtils.header( path ) )
52
+ puts
53
+ end # each arg
54
+ end
55
+
56
+
57
+ def self.stat( args )
58
+
59
+ config = { columns: [] }
60
+
61
+ parser = OptionParser.new do |opts|
62
+ opts.banner = "Usage: csvstat [OPTS] datafile ..."
63
+
64
+ opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
65
+ config[:columns] = columns.split(/[,|;]/) ## allow differnt separators
66
+ end
67
+
68
+ opts.on("-h", "--help", "Prints this help") do
69
+ puts opts
70
+ exit
71
+ end
72
+ end
73
+
74
+ parser.parse!( args )
75
+
76
+ ## pp config
77
+ ## pp args
78
+
79
+ args.each do |arg|
80
+ path = arg
81
+ columns = config[:columns]
82
+
83
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
84
+ puts
85
+ CsvUtils.stat( path, *columns )
86
+ puts
87
+ end # each arg
88
+ end
89
+
90
+
91
+ def self.head( args )
92
+
93
+ config = { n: 4 }
94
+
95
+ parser = OptionParser.new do |opts|
96
+ opts.banner = "Usage: csvhead [OPTS] datafile ..."
97
+
98
+ opts.on("-n", "--num=NUM", "Number of rows" ) do |num|
99
+ config[:n] = num.to_i
100
+ end
101
+
102
+ opts.on("-h", "--help", "Prints this help") do
103
+ puts opts
104
+ exit
105
+ end
106
+ end
107
+
108
+ parser.parse!( args )
109
+
110
+ ## pp config
111
+ ## pp args
112
+
113
+ args.each do |arg|
114
+ path = arg
115
+ n = config[:n]
116
+
117
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
118
+ puts
119
+ CsvUtils.head( path, n: n )
120
+ puts
121
+ end # each arg
122
+ end
123
+
16
124
 
125
+ end # class CsvTool
17
126
 
18
127
 
19
128
  puts CsvUtils.banner # say hello
@@ -1,47 +1,47 @@
1
- # encoding: utf-8
2
-
3
- ## check/use class or module ???
4
-
5
-
6
- class CsvUtils
7
-
8
- def self.cut( inpath, outpath, *columns, sep: ',' )
9
-
10
- puts "cvscut in: >#{inpath}< out: >#{outpath}<"
11
-
12
- ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
- puts "columns:"
14
- pp columns
15
-
16
- text = File.open( inpath, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
-
18
- csv_options = { headers: true,
19
- col_sep: sep }
20
-
21
- table = CSV.parse( text, csv_options )
22
-
23
-
24
- ## for convenience - make sure parent folders/directories exist
25
- FileUtils.mkdir_p( File.dirname( outpath )) unless Dir.exists?( File.dirname( outpath ))
26
-
27
- ## use wb mode - why? why not?
28
- ## assumes same encoding as input?
29
- ## fix/todo: better (always) use utf8!!!!
30
- ## CSV.open( out_path, 'wb' ) do |out|
31
-
32
- ## use just "regular" File for output - why? why not?
33
- ## downside will not encode comma (for now) if present ("Beethoven, van")
34
- ## all values will be unquoted etc. - keep it simple?
35
-
36
- CSV.open( outpath, 'w:utf-8' ) do |out|
37
- out << columns ## for row add headers/columns
38
- table.each do |row|
39
- values = columns.map { |col| row[col].strip } ## find data for column
40
- out << values
41
- end
42
- end
43
-
44
- puts 'Done.'
45
- end ## method self.cut
46
-
47
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+ ## check/use class or module ???
4
+
5
+
6
+ class CsvUtils
7
+
8
+ def self.cut( inpath, outpath, *columns, sep: ',' )
9
+
10
+ puts "cvscut in: >#{inpath}< out: >#{outpath}<"
11
+
12
+ ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
+ puts "columns:"
14
+ pp columns
15
+
16
+ text = File.open( inpath, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
+
18
+ csv_options = { headers: true,
19
+ col_sep: sep }
20
+
21
+ table = CSV.parse( text, csv_options )
22
+
23
+
24
+ ## for convenience - make sure parent folders/directories exist
25
+ FileUtils.mkdir_p( File.dirname( outpath )) unless Dir.exists?( File.dirname( outpath ))
26
+
27
+ ## use wb mode - why? why not?
28
+ ## assumes same encoding as input?
29
+ ## fix/todo: better (always) use utf8!!!!
30
+ ## CSV.open( out_path, 'wb' ) do |out|
31
+
32
+ ## use just "regular" File for output - why? why not?
33
+ ## downside will not encode comma (for now) if present ("Beethoven, van")
34
+ ## all values will be unquoted etc. - keep it simple?
35
+
36
+ CSV.open( outpath, 'w:utf-8' ) do |out|
37
+ out << columns ## for row add headers/columns
38
+ table.each do |row|
39
+ values = columns.map { |col| row[col].strip } ## find data for column
40
+ out << values
41
+ end
42
+ end
43
+
44
+ puts 'Done.'
45
+ end ## method self.cut
46
+
47
+ end # class CsvUtils
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ ## test or dry run to check if rows can get read/scanned
7
+ def self.head( path, sep: ',', n: 4 )
8
+ i = 0
9
+ csv_options = { headers: true,
10
+ col_sep: sep,
11
+ external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding!!!
12
+ }
13
+
14
+ CSV.foreach( path, csv_options ) do |row|
15
+ i += 1
16
+
17
+ pp row
18
+
19
+ break if i >= n
20
+ end
21
+
22
+ puts " #{i} rows"
23
+ end
24
+
25
+ end # class CsvUtils
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ def self.header( path, sep: ',', debug: false ) ## use header or headers - or use both (with alias)?
7
+
8
+ # read first line (only)
9
+ # and parse with csv to get header from csv library itself
10
+ #
11
+ # check - if there's an easier or built-in way for the csv library
12
+
13
+ line = File.open( path, 'r:utf-8' ) { |f| f.readline }
14
+
15
+ pp line if debug
16
+ ## e.g.:
17
+ # "Country,League,Season,Date,Time,Home,Away,HG,AG,Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA\n"
18
+
19
+ csv_options = { col_sep: sep }
20
+
21
+ ## note: do NOT use headers: true to get "plain" data array (no hash records)
22
+ ## hash record does NOT work for single line/row
23
+ rows = CSV.parse( line, csv_options )
24
+ pp rows if debug
25
+ rows[0] ## return first row
26
+ end # method self.header
27
+
28
+ end # class CsvUtils
@@ -1,107 +1,107 @@
1
- # encoding: utf-8
2
-
3
-
4
-
5
-
6
- class CsvUtils
7
-
8
- def self.split( path, *columns, sep: ',', &blk )
9
-
10
- puts "cvssplit in: >#{path}<"
11
-
12
- ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
- puts "columns:"
14
- pp columns
15
-
16
- text = File.open( path, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
-
18
- ## note: do NOT use headers
19
- ## for easy sorting use "plain" array of array for records
20
- csv_options = { col_sep: sep }
21
-
22
- data = CSV.parse( text, csv_options )
23
-
24
- ## todo/check: (auto-) strip (remove all leading and trailing spaces)
25
- ## from all values - why? why not?
26
- ## check if CSV.parse has an option for it?
27
-
28
- headers = data.shift ## remove top array item (that is, row with headers)
29
-
30
- header_mapping = {}
31
- headers.each_with_index { | header,i | header_mapping[header]=i }
32
- pp header_mapping
33
-
34
- ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
35
- column_indices = columns.map { |col| header_mapping[col] }
36
- pp column_indices
37
-
38
-
39
- ###################################################
40
- ## note: sort data by columns (before split)
41
- data = data.sort do |row1,row2|
42
- res = 0
43
- column_indices.each do |col|
44
- res = row1[col] <=> row2[col] if res == 0
45
- end
46
- res
47
- end
48
-
49
- chunk = []
50
- data.each_with_index do |row,i|
51
- chunk << row
52
-
53
- next_row = data[i+1]
54
-
55
- changed = false
56
- if next_row.nil? ## end-of-file
57
- changed = true
58
- else
59
- column_indices.each do |col|
60
- if row[col] != next_row[col]
61
- changed = true
62
- break ## out of each column_indices loop
63
- end
64
- end
65
- end
66
-
67
- if changed
68
- puts "save new chunk:"
69
- column_values = column_indices.map {|col| row[col] }
70
- pp column_values
71
-
72
- # note: add header(s) row upfront (as first row) to chunk (with unshift)
73
- chunk_with_headers = chunk.unshift( headers )
74
- if blk
75
- yield( column_values, chunk_with_headers )
76
- else
77
- ## auto-save (write-to-file) by default - why? why not?
78
- split_write( path, column_values, chunk_with_headers )
79
- end
80
-
81
- chunk = [] ## reset chunk for next batch of records
82
- end
83
- end
84
-
85
- puts 'Done.'
86
- end ## method self.split
87
-
88
-
89
- def self.split_write( inpath, values, chunk )
90
- basename = File.basename( inpath, '.*' )
91
- dirname = File.dirname( inpath )
92
-
93
- ## check/change invalid filename chars
94
- ## e.g. change 1990/91 to 1990-91
95
- extraname = values.map {|value| value.tr('/','-')}.join('~')
96
-
97
- outpath = "#{dirname}/#{basename}_#{extraname}.csv"
98
- puts "saving >#{basename}_#{extraname}.csv<..."
99
-
100
- CSV.open( outpath, 'w:utf-8' ) do |out|
101
- chunk.each do |row|
102
- out << row
103
- end
104
- end
105
- end
106
-
107
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+
5
+
6
+ class CsvUtils
7
+
8
+ def self.split( path, *columns, sep: ',', &blk )
9
+
10
+ puts "cvssplit in: >#{path}<"
11
+
12
+ ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
+ puts "columns:"
14
+ pp columns
15
+
16
+ text = File.open( path, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
+
18
+ ## note: do NOT use headers
19
+ ## for easy sorting use "plain" array of array for records
20
+ csv_options = { col_sep: sep }
21
+
22
+ data = CSV.parse( text, csv_options )
23
+
24
+ ## todo/check: (auto-) strip (remove all leading and trailing spaces)
25
+ ## from all values - why? why not?
26
+ ## check if CSV.parse has an option for it?
27
+
28
+ headers = data.shift ## remove top array item (that is, row with headers)
29
+
30
+ header_mapping = {}
31
+ headers.each_with_index { | header,i | header_mapping[header]=i }
32
+ pp header_mapping
33
+
34
+ ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
35
+ column_indices = columns.map { |col| header_mapping[col] }
36
+ pp column_indices
37
+
38
+
39
+ ###################################################
40
+ ## note: sort data by columns (before split)
41
+ data = data.sort do |row1,row2|
42
+ res = 0
43
+ column_indices.each do |col|
44
+ res = row1[col] <=> row2[col] if res == 0
45
+ end
46
+ res
47
+ end
48
+
49
+ chunk = []
50
+ data.each_with_index do |row,i|
51
+ chunk << row
52
+
53
+ next_row = data[i+1]
54
+
55
+ changed = false
56
+ if next_row.nil? ## end-of-file
57
+ changed = true
58
+ else
59
+ column_indices.each do |col|
60
+ if row[col] != next_row[col]
61
+ changed = true
62
+ break ## out of each column_indices loop
63
+ end
64
+ end
65
+ end
66
+
67
+ if changed
68
+ puts "save new chunk:"
69
+ column_values = column_indices.map {|col| row[col] }
70
+ pp column_values
71
+
72
+ # note: add header(s) row upfront (as first row) to chunk (with unshift)
73
+ chunk_with_headers = chunk.unshift( headers )
74
+ if blk
75
+ yield( column_values, chunk_with_headers )
76
+ else
77
+ ## auto-save (write-to-file) by default - why? why not?
78
+ split_write( path, column_values, chunk_with_headers )
79
+ end
80
+
81
+ chunk = [] ## reset chunk for next batch of records
82
+ end
83
+ end
84
+
85
+ puts 'Done.'
86
+ end ## method self.split
87
+
88
+
89
+ def self.split_write( inpath, values, chunk )
90
+ basename = File.basename( inpath, '.*' )
91
+ dirname = File.dirname( inpath )
92
+
93
+ ## check/change invalid filename chars
94
+ ## e.g. change 1990/91 to 1990-91
95
+ extraname = values.map {|value| value.tr('/','-')}.join('~')
96
+
97
+ outpath = "#{dirname}/#{basename}_#{extraname}.csv"
98
+ puts "saving >#{basename}_#{extraname}.csv<..."
99
+
100
+ CSV.open( outpath, 'w:utf-8' ) do |out|
101
+ chunk.each do |row|
102
+ out << row
103
+ end
104
+ end
105
+ end
106
+
107
+ end # class CsvUtils