csvutils 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,19 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvUtils
5
-
6
- ## test or dry run to check if rows can get read/scanned
7
- def self.test( path, sep: ',' )
8
- i = 0
9
- csv_options = { headers: true,
10
- col_sep: sep,
11
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding!!!
12
- }
13
-
14
- CSV.foreach( path, csv_options ) do |row|
15
- i += 1
16
- print '.' if i % 100 == 0
17
- end
18
-
19
- puts " #{i} rows"
20
- end
21
-
22
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ ## test or dry run to check if rows can get read/scanned
7
+ def self.test( path, sep: ',' )
8
+ i = 0
9
+ csv_options = { sep: sep }
10
+
11
+ CsvHash.foreach( path, csv_options ) do |rec|
12
+ i += 1
13
+ print '.' if i % 100 == 0
14
+ end
15
+
16
+ puts " #{i} rows"
17
+ end
18
+
19
+ end # class CsvUtils
@@ -1,13 +1,29 @@
1
- # encoding: utf-8
2
-
3
-
4
- class CsvUtils
5
-
6
- def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
7
- puts "#{headers.size} columns:"
8
- headers.each_with_index do |header,i|
9
- puts " #{i+1}: #{header}"
10
- end
11
- end
12
-
13
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ class CsvUtils
5
+
6
+ def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
7
+ puts "#{headers.size} columns:"
8
+ headers.each_with_index do |header,i|
9
+ puts " #{i+1}: #{header}"
10
+ end
11
+ end
12
+
13
+
14
+ ###################
15
+ ## (simple) helper for "csv-encoding" values / row
16
+ ##
17
+ ## todo: check for newline in value too? why? why not?
18
+ def self.csv_row( *values, sep: ',' )
19
+ values.map do |value|
20
+ if value && (value.index( sep ) || value.index('"'))
21
+ ## double quotes and enclose in double qoutes
22
+ value = %Q{"#{value.gsub('"', '""')}"}
23
+ else
24
+ value
25
+ end
26
+ end
27
+ end
28
+
29
+ end # class CsvUtils
@@ -1,24 +1,24 @@
1
- # encoding: utf-8
2
-
3
-
4
- ## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
5
- class CsvUtils
6
-
7
- MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
- MINOR = 2
9
- PATCH = 2
10
- VERSION = [MAJOR,MINOR,PATCH].join('.')
11
-
12
- def self.version
13
- VERSION
14
- end
15
-
16
- def self.banner
17
- "csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
- end
19
-
20
- def self.root
21
- File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
- end
23
-
24
- end # class CsvUtils
1
+ # encoding: utf-8
2
+
3
+
4
+ ## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
5
+ class CsvUtils
6
+
7
+ MAJOR = 0 ## todo: namespace inside version or something - why? why not??
8
+ MINOR = 3
9
+ PATCH = 0
10
+ VERSION = [MAJOR,MINOR,PATCH].join('.')
11
+
12
+ def self.version
13
+ VERSION
14
+ end
15
+
16
+ def self.banner
17
+ "csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
18
+ end
19
+
20
+ def self.root
21
+ File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
22
+ end
23
+
24
+ end # class CsvUtils
@@ -1,16 +1,16 @@
1
- ## $:.unshift(File.dirname(__FILE__))
2
-
3
- ## minitest setup
4
-
5
- require 'minitest/autorun'
6
-
7
-
8
- ## our own code
9
- require 'csvutils'
10
-
11
- ## add test_data_dir helper
12
- class CsvUtils
13
- def self.test_data_dir
14
- "#{root}/test/data"
15
- end
16
- end
1
+ ## $:.unshift(File.dirname(__FILE__))
2
+
3
+ ## minitest setup
4
+
5
+ require 'minitest/autorun'
6
+
7
+
8
+ ## our own code
9
+ require 'csvutils'
10
+
11
+ ## add test_data_dir helper
12
+ class CsvUtils
13
+ def self.test_data_dir
14
+ "#{root}/datasets"
15
+ end
16
+ end
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_cut.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestCut < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+ columns = [ 'HomeTeam', 'FTHG', 'FTAG', 'AwayTeam', 'Date' ]
15
+ CsvUtils.cut( path, *columns, output: './tmp/cut_test_eng.csv' )
16
+ end
17
+
18
+ def test_at
19
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20
+ columns = [ 'Home', 'HG', 'AG', 'Away', 'Date', 'Time' ]
21
+ CsvUtils.cut( path, *columns, output: './tmp/cut_test_at.csv' )
22
+ end
23
+
24
+ def test_de
25
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26
+ columns = ['Saison', 'Spieltag',
27
+ 'Heim', 'Ergebnis', 'Gast', 'Datum', 'Uhrzeit' ]
28
+ CsvUtils.cut( path, *columns, sep: ';', output: './tmp/cut_test_de.csv' )
29
+ end
30
+
31
+ end # class TestHead
@@ -0,0 +1,30 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_head.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHead < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+
15
+ CsvUtils.head( path )
16
+ end
17
+
18
+ def test_at
19
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
20
+
21
+ CsvUtils.head( path )
22
+ end
23
+
24
+ def test_de
25
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
26
+
27
+ CsvUtils.head( path, sep: ';' )
28
+ end
29
+
30
+ end # class TestHead
@@ -1,50 +1,50 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_headers.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestHeaders < MiniTest::Test
11
-
12
-
13
- ##
14
- # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
15
- # Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
16
- # B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
17
- # WHH,WHD,WHA,VCH,VCD,VCA,
18
- # Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
19
- # BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
20
- def test_eng
21
- path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
22
-
23
- headers = CsvUtils.header( path )
24
- pp headers
25
-
26
- assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
27
- end
28
-
29
- ###
30
- # Country,League,Season,Date,Time,Home,Away,HG,AG,
31
- # Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
32
- def test_at
33
- path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
34
-
35
- headers = CsvUtils.header( path )
36
- pp headers
37
-
38
- assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
39
- end
40
-
41
- def test_de
42
- path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
43
-
44
- headers = CsvUtils.header( path, sep: ';' )
45
- pp headers
46
-
47
- assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
48
- end
49
-
50
- end # class TestHeaders
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_header.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestHeader < MiniTest::Test
11
+
12
+
13
+ ##
14
+ # Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
15
+ # Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
16
+ # B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
17
+ # WHH,WHD,WHA,VCH,VCD,VCA,
18
+ # Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
19
+ # BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
20
+ def test_eng
21
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
22
+
23
+ headers = CsvUtils.header( path )
24
+ pp headers
25
+
26
+ assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
27
+ end
28
+
29
+ ###
30
+ # Country,League,Season,Date,Time,Home,Away,HG,AG,
31
+ # Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
32
+ def test_at
33
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
34
+
35
+ headers = CsvUtils.header( path )
36
+ pp headers
37
+
38
+ assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
39
+ end
40
+
41
+ def test_de
42
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
43
+
44
+ headers = CsvUtils.header( path, sep: ';' )
45
+ pp headers
46
+
47
+ assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
48
+ end
49
+
50
+ end # class TestHeader
@@ -1,44 +1,44 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_misc.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestMiscellaneous < MiniTest::Test
11
-
12
- def test_eng
13
- path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
-
15
- CsvUtils.test( path )
16
-
17
- CsvUtils.stat( path )
18
- CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
19
-
20
- assert true
21
- end
22
-
23
- def test_test_de
24
- path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
25
-
26
- CsvUtils.test( path, sep: ';' )
27
-
28
- CsvUtils.stat( path, sep: ';' )
29
- CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
30
-
31
- assert true
32
- end
33
-
34
- def test_test_at
35
- path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
36
-
37
- CsvUtils.test( path )
38
-
39
- CsvUtils.stat( path )
40
- CsvUtils.stat( path, 'Season', 'Home', 'Away' )
41
- assert true
42
- end
43
-
44
- end # class TestMiscellaneous
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_misc.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestMiscellaneous < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+
15
+ CsvUtils.test( path )
16
+
17
+ CsvUtils.stat( path )
18
+ CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
19
+
20
+ assert true
21
+ end
22
+
23
+ def test_test_de
24
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
25
+
26
+ CsvUtils.test( path, sep: ';' )
27
+
28
+ CsvUtils.stat( path, sep: ';' )
29
+ CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
30
+
31
+ assert true
32
+ end
33
+
34
+ def test_test_at
35
+ path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
36
+
37
+ CsvUtils.test( path )
38
+
39
+ CsvUtils.stat( path )
40
+ CsvUtils.stat( path, 'Season', 'Home', 'Away' )
41
+ assert true
42
+ end
43
+
44
+ end # class TestMiscellaneous
@@ -0,0 +1,31 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_split.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestSplit < MiniTest::Test
11
+
12
+ def test_eng
13
+ path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
14
+ columns = [ 'HomeTeam' ]
15
+ CsvUtils.split( path, *columns ) do |values, chunk|
16
+ pp values
17
+ pp chunk
18
+ end
19
+ end
20
+
21
+
22
+ def test_de
23
+ path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
24
+ columns = ['Saison', 'Spieltag' ]
25
+ CsvUtils.split( path, *columns, sep: ';' ) do |values, chunk|
26
+ pp values
27
+ pp chunk
28
+ end
29
+ end
30
+
31
+ end # class TestSplit