csvutils 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 91b99040998dec29903bb139c7650aec805981c9
4
- data.tar.gz: 4ca0a379e0a99dc01f35957ca021ccb4b56aaef3
3
+ metadata.gz: 435468b42345511466981b9f470e39e1dc78bfea
4
+ data.tar.gz: f2b4c42ec30da76fe929942d6d0706420dd63f63
5
5
  SHA512:
6
- metadata.gz: 7cae580d42247f3df41c846880cdfc78f4f410fc553acff75b57cf85d542a80e97c6ef4ec7e1e924b1cdd85934596addc4d0941bdab9e31478bf0e012386eaf2
7
- data.tar.gz: 8398dabd6e1bf01134ba31016b77a866c308c7a75ef426b1ebc995d043c11ec757c15c07bc5b8ba2f81764fa796ad96013055b19af30b70504647aff3915b570
6
+ metadata.gz: 1b1edc99a8b3e7257a899df34ed2b144467c1268f21877676194b138e59113d69733990ba7a0c70b1a4c51cb2861dff6e3c429cbc9f6be4a0a83c9f08a52fd22
7
+ data.tar.gz: b57408a1a5d2743649538c79a269480ce2a20d21df483c8133772bbb46589d7dbff6d1c9b22d4ec329bf331f70bd5a207650bedfd83821ba2cdeb7787595621b
@@ -4,7 +4,10 @@ README.md
4
4
  Rakefile
5
5
  lib/csvutils.rb
6
6
  lib/csvutils/cut.rb
7
+ lib/csvutils/head.rb
8
+ lib/csvutils/header.rb
7
9
  lib/csvutils/split.rb
10
+ lib/csvutils/stat.rb
8
11
  lib/csvutils/test.rb
9
12
  lib/csvutils/utils.rb
10
13
  lib/csvutils/version.rb
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text"
1
+ # csvutils - tools 'n' scripts for working with comma-separated values (csv) datafiles - the world's most popular tabular date interchange format in text
2
2
 
3
3
 
4
4
  * home :: [github.com/csv11/csvutils](https://github.com/csv11/csvutils)
data/Rakefile CHANGED
@@ -11,7 +11,7 @@ Hoe.spec 'csvutils' do
11
11
  self.urls = ['https://github.com/csv11/csvutils']
12
12
 
13
13
  self.author = 'Gerald Bauer'
14
- self.email = 'opensport@googlegroups.com'
14
+ self.email = 'wwwmake@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
17
  self.readme_file = 'README.md'
@@ -4,6 +4,8 @@ require 'pp'
4
4
  require 'csv'
5
5
  require 'date'
6
6
  require 'fileutils'
7
+ require 'optparse'
8
+
7
9
 
8
10
 
9
11
  ###
@@ -13,7 +15,114 @@ require 'csvutils/utils'
13
15
  require 'csvutils/split'
14
16
  require 'csvutils/cut'
15
17
  require 'csvutils/test'
18
+ require 'csvutils/stat'
19
+ require 'csvutils/header'
20
+ require 'csvutils/head'
21
+
22
+
23
+
24
+
25
+ class CsvTool
26
+
27
+ ## command line tools
28
+ def self.header( args )
29
+
30
+ config = {}
31
+
32
+ parser = OptionParser.new do |opts|
33
+ opts.banner = "Usage: csvheader [OPTS] datafile ..."
34
+
35
+ opts.on("-h", "--help", "Prints this help") do
36
+ puts opts
37
+ exit
38
+ end
39
+ end
40
+
41
+ parser.parse!( args )
42
+
43
+ ## pp config
44
+ ## pp args
45
+
46
+ args.each do |arg|
47
+ path = arg
48
+
49
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
50
+ puts
51
+ CsvUtils.pp_header( CsvUtils.header( path ) )
52
+ puts
53
+ end # each arg
54
+ end
55
+
56
+
57
+ def self.stat( args )
58
+
59
+ config = { columns: [] }
60
+
61
+ parser = OptionParser.new do |opts|
62
+ opts.banner = "Usage: csvstat [OPTS] datafile ..."
63
+
64
+ opts.on("-c", "--columns=COLUMNS", "Name of header columns" ) do |columns|
65
+ config[:columns] = columns.split(/[,|;]/) ## allow differnt separators
66
+ end
67
+
68
+ opts.on("-h", "--help", "Prints this help") do
69
+ puts opts
70
+ exit
71
+ end
72
+ end
73
+
74
+ parser.parse!( args )
75
+
76
+ ## pp config
77
+ ## pp args
78
+
79
+ args.each do |arg|
80
+ path = arg
81
+ columns = config[:columns]
82
+
83
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
84
+ puts
85
+ CsvUtils.stat( path, *columns )
86
+ puts
87
+ end # each arg
88
+ end
89
+
90
+
91
+ def self.head( args )
92
+
93
+ config = { n: 4 }
94
+
95
+ parser = OptionParser.new do |opts|
96
+ opts.banner = "Usage: csvhead [OPTS] datafile ..."
97
+
98
+ opts.on("-n", "--num=NUM", "Number of rows" ) do |num|
99
+ config[:n] = num.to_i
100
+ end
101
+
102
+ opts.on("-h", "--help", "Prints this help") do
103
+ puts opts
104
+ exit
105
+ end
106
+ end
107
+
108
+ parser.parse!( args )
109
+
110
+ ## pp config
111
+ ## pp args
112
+
113
+ args.each do |arg|
114
+ path = arg
115
+ n = config[:n]
116
+
117
+ puts "== #{File.basename(path)} (#{File.dirname(path)}) =="
118
+ puts
119
+ CsvUtils.head( path, n: n )
120
+ puts
121
+ end # each arg
122
+ end
123
+
16
124
 
125
+ end # class CsvTool
17
126
 
18
127
 
19
128
  puts CsvUtils.banner # say hello
@@ -1,47 +1,47 @@
1
- # encoding: utf-8
2
-
3
- ## check/use class or module ???
4
-
5
-
6
- class CsvUtils
7
-
8
- def self.cut( inpath, outpath, *columns, sep: ',' )
9
-
10
- puts "cvscut in: >#{inpath}< out: >#{outpath}<"
11
-
12
- ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
- puts "columns:"
14
- pp columns
15
-
16
- text = File.open( inpath, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
-
18
- csv_options = { headers: true,
19
- col_sep: sep }
20
-
21
- table = CSV.parse( text, csv_options )
22
-
23
-
24
- ## for convenience - make sure parent folders/directories exist
25
- FileUtils.mkdir_p( File.dirname( outpath )) unless Dir.exists?( File.dirname( outpath ))
26
-
27
- ## use wb mode - why? why not?
28
- ## assumes same encoding as input?
29
- ## fix/todo: better (always) use utf8!!!!
30
- ## CSV.open( out_path, 'wb' ) do |out|
31
-
32
- ## use just "regular" File for output - why? why not?
33
- ## downside will not encode comma (for now) if present ("Beethoven, van")
34
- ## all values will be unquoted etc. - keep it simple?
35
-
36
- CSV.open( outpath, 'w:utf-8' ) do |out|
37
- out << columns ## for row add headers/columns
38
- table.each do |row|
39
- values = columns.map { |col| row[col].strip } ## find data for column
40
- out << values
41
- end
42
- end
43
-
44
- puts 'Done.'
45
- end ## method self.cut
46
-
47
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+ ## check/use class or module ???
4
+
5
+
6
+ class CsvUtils
7
+
8
+ def self.cut( inpath, outpath, *columns, sep: ',' )
9
+
10
+ puts "cvscut in: >#{inpath}< out: >#{outpath}<"
11
+
12
+ ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
+ puts "columns:"
14
+ pp columns
15
+
16
+ text = File.open( inpath, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
+
18
+ csv_options = { headers: true,
19
+ col_sep: sep }
20
+
21
+ table = CSV.parse( text, csv_options )
22
+
23
+
24
+ ## for convenience - make sure parent folders/directories exist
25
+ FileUtils.mkdir_p( File.dirname( outpath )) unless Dir.exists?( File.dirname( outpath ))
26
+
27
+ ## use wb mode - why? why not?
28
+ ## assumes same encoding as input?
29
+ ## fix/todo: better (always) use utf8!!!!
30
+ ## CSV.open( out_path, 'wb' ) do |out|
31
+
32
+ ## use just "regular" File for output - why? why not?
33
+ ## downside will not encode comma (for now) if present ("Beethoven, van")
34
+ ## all values will be unquoted etc. - keep it simple?
35
+
36
+ CSV.open( outpath, 'w:utf-8' ) do |out|
37
+ out << columns ## for row add headers/columns
38
+ table.each do |row|
39
+ values = columns.map { |col| row[col].strip } ## find data for column
40
+ out << values
41
+ end
42
+ end
43
+
44
+ puts 'Done.'
45
+ end ## method self.cut
46
+
47
+ end # class CsvUtils
@@ -0,0 +1,25 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ ## test or dry run to check if rows can get read/scanned
7
+ def self.head( path, sep: ',', n: 4 )
8
+ i = 0
9
+ csv_options = { headers: true,
10
+ col_sep: sep,
11
+ external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding!!!
12
+ }
13
+
14
+ CSV.foreach( path, csv_options ) do |row|
15
+ i += 1
16
+
17
+ pp row
18
+
19
+ break if i >= n
20
+ end
21
+
22
+ puts " #{i} rows"
23
+ end
24
+
25
+ end # class CsvUtils
@@ -0,0 +1,28 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ def self.header( path, sep: ',', debug: false ) ## use header or headers - or use both (with alias)?
7
+
8
+ # read first line (only)
9
+ # and parse with csv to get header from csv library itself
10
+ #
11
+ # check - if there's an easier or built-in way for the csv library
12
+
13
+ line = File.open( path, 'r:utf-8' ) { |f| f.readline }
14
+
15
+ pp line if debug
16
+ ## e.g.:
17
+ # "Country,League,Season,Date,Time,Home,Away,HG,AG,Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA\n"
18
+
19
+ csv_options = { col_sep: sep }
20
+
21
+ ## note: do NOT use headers: true to get "plain" data array (no hash records)
22
+ ## hash record does NOT work for single line/row
23
+ rows = CSV.parse( line, csv_options )
24
+ pp rows if debug
25
+ rows[0] ## return first row
26
+ end # method self.header
27
+
28
+ end # class CsvUtils
@@ -1,107 +1,107 @@
1
- # encoding: utf-8
2
-
3
-
4
-
5
-
6
- class CsvUtils
7
-
8
- def self.split( path, *columns, sep: ',', &blk )
9
-
10
- puts "cvssplit in: >#{path}<"
11
-
12
- ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
- puts "columns:"
14
- pp columns
15
-
16
- text = File.open( path, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
-
18
- ## note: do NOT use headers
19
- ## for easy sorting use "plain" array of array for records
20
- csv_options = { col_sep: sep }
21
-
22
- data = CSV.parse( text, csv_options )
23
-
24
- ## todo/check: (auto-) strip (remove all leading and trailing spaces)
25
- ## from all values - why? why not?
26
- ## check if CSV.parse has an option for it?
27
-
28
- headers = data.shift ## remove top array item (that is, row with headers)
29
-
30
- header_mapping = {}
31
- headers.each_with_index { | header,i | header_mapping[header]=i }
32
- pp header_mapping
33
-
34
- ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
35
- column_indices = columns.map { |col| header_mapping[col] }
36
- pp column_indices
37
-
38
-
39
- ###################################################
40
- ## note: sort data by columns (before split)
41
- data = data.sort do |row1,row2|
42
- res = 0
43
- column_indices.each do |col|
44
- res = row1[col] <=> row2[col] if res == 0
45
- end
46
- res
47
- end
48
-
49
- chunk = []
50
- data.each_with_index do |row,i|
51
- chunk << row
52
-
53
- next_row = data[i+1]
54
-
55
- changed = false
56
- if next_row.nil? ## end-of-file
57
- changed = true
58
- else
59
- column_indices.each do |col|
60
- if row[col] != next_row[col]
61
- changed = true
62
- break ## out of each column_indices loop
63
- end
64
- end
65
- end
66
-
67
- if changed
68
- puts "save new chunk:"
69
- column_values = column_indices.map {|col| row[col] }
70
- pp column_values
71
-
72
- # note: add header(s) row upfront (as first row) to chunk (with unshift)
73
- chunk_with_headers = chunk.unshift( headers )
74
- if blk
75
- yield( column_values, chunk_with_headers )
76
- else
77
- ## auto-save (write-to-file) by default - why? why not?
78
- split_write( path, column_values, chunk_with_headers )
79
- end
80
-
81
- chunk = [] ## reset chunk for next batch of records
82
- end
83
- end
84
-
85
- puts 'Done.'
86
- end ## method self.split
87
-
88
-
89
- def self.split_write( inpath, values, chunk )
90
- basename = File.basename( inpath, '.*' )
91
- dirname = File.dirname( inpath )
92
-
93
- ## check/change invalid filename chars
94
- ## e.g. change 1990/91 to 1990-91
95
- extraname = values.map {|value| value.tr('/','-')}.join('~')
96
-
97
- outpath = "#{dirname}/#{basename}_#{extraname}.csv"
98
- puts "saving >#{basename}_#{extraname}.csv<..."
99
-
100
- CSV.open( outpath, 'w:utf-8' ) do |out|
101
- chunk.each do |row|
102
- out << row
103
- end
104
- end
105
- end
106
-
107
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+
5
+
6
+ class CsvUtils
7
+
8
+ def self.split( path, *columns, sep: ',', &blk )
9
+
10
+ puts "cvssplit in: >#{path}<"
11
+
12
+ ## ["Date", "HomeTeam", "AwayTeam", "FTHG", "FTAG", "HTHG", "HTAG"]
13
+ puts "columns:"
14
+ pp columns
15
+
16
+ text = File.open( path, 'r:utf-8' ).read ## note: make sure to use (assume) utf-8
17
+
18
+ ## note: do NOT use headers
19
+ ## for easy sorting use "plain" array of array for records
20
+ csv_options = { col_sep: sep }
21
+
22
+ data = CSV.parse( text, csv_options )
23
+
24
+ ## todo/check: (auto-) strip (remove all leading and trailing spaces)
25
+ ## from all values - why? why not?
26
+ ## check if CSV.parse has an option for it?
27
+
28
+ headers = data.shift ## remove top array item (that is, row with headers)
29
+
30
+ header_mapping = {}
31
+ headers.each_with_index { | header,i | header_mapping[header]=i }
32
+ pp header_mapping
33
+
34
+ ## map columns to array indices e.g. ['Season', 'Div'] => [1,2]
35
+ column_indices = columns.map { |col| header_mapping[col] }
36
+ pp column_indices
37
+
38
+
39
+ ###################################################
40
+ ## note: sort data by columns (before split)
41
+ data = data.sort do |row1,row2|
42
+ res = 0
43
+ column_indices.each do |col|
44
+ res = row1[col] <=> row2[col] if res == 0
45
+ end
46
+ res
47
+ end
48
+
49
+ chunk = []
50
+ data.each_with_index do |row,i|
51
+ chunk << row
52
+
53
+ next_row = data[i+1]
54
+
55
+ changed = false
56
+ if next_row.nil? ## end-of-file
57
+ changed = true
58
+ else
59
+ column_indices.each do |col|
60
+ if row[col] != next_row[col]
61
+ changed = true
62
+ break ## out of each column_indices loop
63
+ end
64
+ end
65
+ end
66
+
67
+ if changed
68
+ puts "save new chunk:"
69
+ column_values = column_indices.map {|col| row[col] }
70
+ pp column_values
71
+
72
+ # note: add header(s) row upfront (as first row) to chunk (with unshift)
73
+ chunk_with_headers = chunk.unshift( headers )
74
+ if blk
75
+ yield( column_values, chunk_with_headers )
76
+ else
77
+ ## auto-save (write-to-file) by default - why? why not?
78
+ split_write( path, column_values, chunk_with_headers )
79
+ end
80
+
81
+ chunk = [] ## reset chunk for next batch of records
82
+ end
83
+ end
84
+
85
+ puts 'Done.'
86
+ end ## method self.split
87
+
88
+
89
+ def self.split_write( inpath, values, chunk )
90
+ basename = File.basename( inpath, '.*' )
91
+ dirname = File.dirname( inpath )
92
+
93
+ ## check/change invalid filename chars
94
+ ## e.g. change 1990/91 to 1990-91
95
+ extraname = values.map {|value| value.tr('/','-')}.join('~')
96
+
97
+ outpath = "#{dirname}/#{basename}_#{extraname}.csv"
98
+ puts "saving >#{basename}_#{extraname}.csv<..."
99
+
100
+ CSV.open( outpath, 'w:utf-8' ) do |out|
101
+ chunk.each do |row|
102
+ out << row
103
+ end
104
+ end
105
+ end
106
+
107
+ end # class CsvUtils