csvutils 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,22 +1,19 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvUtils
5
-
6
- ## test or dry run to check if rows can get read/scanned
7
- def self.test( path, sep: ',' )
8
- i = 0
9
- csv_options = { headers: true,
10
- col_sep: sep,
11
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding!!!
12
- }
13
-
14
- CSV.foreach( path, csv_options ) do |row|
15
- i += 1
16
- print '.' if i % 100 == 0
17
- end
18
-
19
- puts " #{i} rows"
20
- end
21
-
22
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ ## test or dry run to check if rows can get read/scanned
7
+ def self.test( path, sep: ',' )
8
+ i = 0
9
+ csv_options = { sep: sep }
10
+
11
+ CsvHash.foreach( path, csv_options ) do |rec|
12
+ i += 1
13
+ print '.' if i % 100 == 0
14
+ end
15
+
16
+ puts " #{i} rows"
17
+ end
18
+
19
+ end # class CsvUtils
@@ -1,13 +1,29 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvUtils
5
-
6
- def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
7
- puts "#{headers.size} columns:"
8
- headers.each_with_index do |header,i|
9
- puts " #{i+1}: #{header}"
10
- end
11
- end
12
-
13
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
7
+ puts "#{headers.size} columns:"
8
+ headers.each_with_index do |header,i|
9
+ puts " #{i+1}: #{header}"
10
+ end
11
+ end
12
+
13
+
14
+ ###################
15
+ ## (simple) helper for "csv-encoding" values / row
16
+ ##
17
+ ## todo: check for newline in value too? why? why not?
18
+ def self.csv_row( *values, sep: ',' )
19
+ values.map do |value|
20
+ if value && (value.index( sep ) || value.index('"'))
21
+ ## double quotes and enclose in double qoutes
22
+ value = %Q{"#{value.gsub('"', '""')}"}
23
+ else
24
+ value
25
+ end
26
+ end
27
+ end
28
+
29
+ end # class CsvUtils
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
-
3
-
4
- ## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
5
- class CsvUtils
6
-
7
- MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 2
9
- PATCH = 2
10
- VERSION = [MAJOR,MINOR,PATCH].join('.')
11
-
12
- def self.version
13
- VERSION
14
- end
15
-
16
- def self.banner
17
- "csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
- end
19
-
20
- def self.root
21
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
- end
23
-
24
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ ## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
5
+ class CsvUtils
6
+
7
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
+ MINOR = 3
9
+ PATCH = 0
10
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
+ end
23
+
24
+ end # class CsvUtils
@@ -1,16 +1,16 @@
1
- ## $:.unshift(File.dirname(__FILE__))
2
-
3
- ## minitest setup
4
-
5
- require 'minitest/autorun'
6
-
7
-
8
- ## our own code
9
- require 'csvutils'
10
-
11
- ## add test_data_dir helper
12
- class CsvUtils
13
- def self.test_data_dir
14
- "#{root}/test/data"
15
- end
16
- end
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+ require 'csvutils'
10
+
11
+ ## add test_data_dir helper
12
+ class CsvUtils
13
+ def self.test_data_dir
14
+ "#{root}/datasets"
15
+ end
16
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_cut.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestCut < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+ columns = [ 'HomeTeam', 'FTHG', 'FTAG', 'AwayTeam', 'Date' ]
15
+ CsvUtils.cut( path, *columns, output: './tmp/cut_test_eng.csv' )
16
+ end
17
+
18
+ def test_at
19
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20
+ columns = [ 'Home', 'HG', 'AG', 'Away', 'Date', 'Time' ]
21
+ CsvUtils.cut( path, *columns, output: './tmp/cut_test_at.csv' )
22
+ end
23
+
24
+ def test_de
25
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26
+ columns = ['Saison', 'Spieltag',
27
+ 'Heim', 'Ergebnis', 'Gast', 'Datum', 'Uhrzeit' ]
28
+ CsvUtils.cut( path, *columns, sep: ';', output: './tmp/cut_test_de.csv' )
29
+ end
30
+
31
+ end # class TestHead
@@ -0,0 +1,30 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_head.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHead < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+
15
+ CsvUtils.head( path )
16
+ end
17
+
18
+ def test_at
19
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20
+
21
+ CsvUtils.head( path )
22
+ end
23
+
24
+ def test_de
25
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26
+
27
+ CsvUtils.head( path, sep: ';' )
28
+ end
29
+
30
+ end # class TestHead
@@ -1,50 +1,50 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_headers.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestHeaders < MiniTest::Test
11
-
12
-
13
- ##
14
- # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
15
- # Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
16
- # B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
17
- # WHH,WHD,WHA,VCH,VCD,VCA,
18
- # Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
19
- # BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
20
- def test_eng
21
- path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
22
-
23
- headers = CsvUtils.header( path )
24
- pp headers
25
-
26
- assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
27
- end
28
-
29
- ###
30
- # Country,League,Season,Date,Time,Home,Away,HG,AG,
31
- # Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
32
- def test_at
33
- path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
34
-
35
- headers = CsvUtils.header( path )
36
- pp headers
37
-
38
- assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
39
- end
40
-
41
- def test_de
42
- path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
43
-
44
- headers = CsvUtils.header( path, sep: ';' )
45
- pp headers
46
-
47
- assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
48
- end
49
-
50
- end # class TestHeaders
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_header.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHeader < MiniTest::Test
11
+
12
+
13
+ ##
14
+ # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
15
+ # Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
16
+ # B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
17
+ # WHH,WHD,WHA,VCH,VCD,VCA,
18
+ # Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
19
+ # BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
20
+ def test_eng
21
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
22
+
23
+ headers = CsvUtils.header( path )
24
+ pp headers
25
+
26
+ assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
27
+ end
28
+
29
+ ###
30
+ # Country,League,Season,Date,Time,Home,Away,HG,AG,
31
+ # Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
32
+ def test_at
33
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
34
+
35
+ headers = CsvUtils.header( path )
36
+ pp headers
37
+
38
+ assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
39
+ end
40
+
41
+ def test_de
42
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
43
+
44
+ headers = CsvUtils.header( path, sep: ';' )
45
+ pp headers
46
+
47
+ assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
48
+ end
49
+
50
+ end # class TestHeader
@@ -1,44 +1,44 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_misc.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestMiscellaneous < MiniTest::Test
11
-
12
- def test_eng
13
- path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
-
15
- CsvUtils.test( path )
16
-
17
- CsvUtils.stat( path )
18
- CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
19
-
20
- assert true
21
- end
22
-
23
- def test_test_de
24
- path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
25
-
26
- CsvUtils.test( path, sep: ';' )
27
-
28
- CsvUtils.stat( path, sep: ';' )
29
- CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
30
-
31
- assert true
32
- end
33
-
34
- def test_test_at
35
- path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
36
-
37
- CsvUtils.test( path )
38
-
39
- CsvUtils.stat( path )
40
- CsvUtils.stat( path, 'Season', 'Home', 'Away' )
41
- assert true
42
- end
43
-
44
- end # class TestMiscellaneous
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_misc.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestMiscellaneous < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+
15
+ CsvUtils.test( path )
16
+
17
+ CsvUtils.stat( path )
18
+ CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
19
+
20
+ assert true
21
+ end
22
+
23
+ def test_test_de
24
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
25
+
26
+ CsvUtils.test( path, sep: ';' )
27
+
28
+ CsvUtils.stat( path, sep: ';' )
29
+ CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
30
+
31
+ assert true
32
+ end
33
+
34
+ def test_test_at
35
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
36
+
37
+ CsvUtils.test( path )
38
+
39
+ CsvUtils.stat( path )
40
+ CsvUtils.stat( path, 'Season', 'Home', 'Away' )
41
+ assert true
42
+ end
43
+
44
+ end # class TestMiscellaneous
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_split.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestSplit < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+ columns = [ 'HomeTeam' ]
15
+ CsvUtils.split( path, *columns ) do |values, chunk|
16
+ pp values
17
+ pp chunk
18
+ end
19
+ end
20
+
21
+
22
+ def test_de
23
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
24
+ columns = ['Saison', 'Spieltag' ]
25
+ CsvUtils.split( path, *columns, sep: ';' ) do |values, chunk|
26
+ pp values
27
+ pp chunk
28
+ end
29
+ end
30
+
31
+ end # class TestSplit