csvutils 0.2.2 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +3 -3
- data/LICENSE.md +116 -0
- data/Manifest.txt +8 -4
- data/README.md +302 -286
- data/Rakefile +30 -26
- data/bin/csvcut +17 -17
- data/bin/csvhead +17 -17
- data/bin/csvheader +17 -17
- data/bin/csvsplit +17 -17
- data/bin/csvstat +17 -17
- data/{test/data → datasets}/at-austria/AUT.csv +363 -363
- data/{test/data → datasets}/de-deutschland/bundesliga.csv +481 -481
- data/{test/data → datasets}/eng-england/2017-18/E0.csv +381 -381
- data/lib/csvutils.rb +32 -31
- data/lib/csvutils/commands/cut.rb +43 -43
- data/lib/csvutils/commands/head.rb +40 -40
- data/lib/csvutils/commands/header.rb +35 -35
- data/lib/csvutils/commands/split.rb +41 -41
- data/lib/csvutils/commands/stat.rb +41 -41
- data/lib/csvutils/cut.rb +43 -50
- data/lib/csvutils/head.rb +22 -25
- data/lib/csvutils/header.rb +16 -28
- data/lib/csvutils/split.rb +106 -107
- data/lib/csvutils/stat.rb +81 -86
- data/lib/csvutils/test.rb +19 -22
- data/lib/csvutils/utils.rb +29 -13
- data/lib/csvutils/version.rb +24 -24
- data/test/helper.rb +16 -16
- data/test/test_cut.rb +31 -0
- data/test/test_head.rb +30 -0
- data/test/{test_headers.rb → test_header.rb} +50 -50
- data/test/test_misc.rb +44 -44
- data/test/test_split.rb +31 -0
- data/test/test_version.rb +20 -20
- metadata +28 -9
data/lib/csvutils/test.rb
CHANGED
@@ -1,22 +1,19 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
class CsvUtils
|
5
|
-
|
6
|
-
## test or dry run to check if rows can get read/scanned
|
7
|
-
def self.test( path, sep: ',' )
|
8
|
-
i = 0
|
9
|
-
csv_options = {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
21
|
-
|
22
|
-
end # class CsvUtils
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvUtils
|
5
|
+
|
6
|
+
## test or dry run to check if rows can get read/scanned
|
7
|
+
def self.test( path, sep: ',' )
|
8
|
+
i = 0
|
9
|
+
csv_options = { sep: sep }
|
10
|
+
|
11
|
+
CsvHash.foreach( path, csv_options ) do |rec|
|
12
|
+
i += 1
|
13
|
+
print '.' if i % 100 == 0
|
14
|
+
end
|
15
|
+
|
16
|
+
puts " #{i} rows"
|
17
|
+
end
|
18
|
+
|
19
|
+
end # class CsvUtils
|
data/lib/csvutils/utils.rb
CHANGED
@@ -1,13 +1,29 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
class CsvUtils
|
5
|
-
|
6
|
-
def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
|
7
|
-
puts "#{headers.size} columns:"
|
8
|
-
headers.each_with_index do |header,i|
|
9
|
-
puts " #{i+1}: #{header}"
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvUtils
|
5
|
+
|
6
|
+
def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
|
7
|
+
puts "#{headers.size} columns:"
|
8
|
+
headers.each_with_index do |header,i|
|
9
|
+
puts " #{i+1}: #{header}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
###################
|
15
|
+
## (simple) helper for "csv-encoding" values / row
|
16
|
+
##
|
17
|
+
## todo: check for newline in value too? why? why not?
|
18
|
+
def self.csv_row( *values, sep: ',' )
|
19
|
+
values.map do |value|
|
20
|
+
if value && (value.index( sep ) || value.index('"'))
|
21
|
+
## double quotes and enclose in double qoutes
|
22
|
+
value = %Q{"#{value.gsub('"', '""')}"}
|
23
|
+
else
|
24
|
+
value
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end # class CsvUtils
|
data/lib/csvutils/version.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
|
5
|
-
class CsvUtils
|
6
|
-
|
7
|
-
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
-
MINOR =
|
9
|
-
PATCH =
|
10
|
-
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
-
|
12
|
-
def self.version
|
13
|
-
VERSION
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.banner
|
17
|
-
"csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.root
|
21
|
-
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
22
|
-
end
|
23
|
-
|
24
|
-
end # class CsvUtils
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
|
5
|
+
class CsvUtils
|
6
|
+
|
7
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 3
|
9
|
+
PATCH = 0
|
10
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
22
|
+
end
|
23
|
+
|
24
|
+
end # class CsvUtils
|
data/test/helper.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
## $:.unshift(File.dirname(__FILE__))
|
2
|
-
|
3
|
-
## minitest setup
|
4
|
-
|
5
|
-
require 'minitest/autorun'
|
6
|
-
|
7
|
-
|
8
|
-
## our own code
|
9
|
-
require 'csvutils'
|
10
|
-
|
11
|
-
## add test_data_dir helper
|
12
|
-
class CsvUtils
|
13
|
-
def self.test_data_dir
|
14
|
-
"#{root}/
|
15
|
-
end
|
16
|
-
end
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
|
5
|
+
require 'minitest/autorun'
|
6
|
+
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'csvutils'
|
10
|
+
|
11
|
+
## add test_data_dir helper
|
12
|
+
class CsvUtils
|
13
|
+
def self.test_data_dir
|
14
|
+
"#{root}/datasets"
|
15
|
+
end
|
16
|
+
end
|
data/test/test_cut.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_cut.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestCut < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
columns = [ 'HomeTeam', 'FTHG', 'FTAG', 'AwayTeam', 'Date' ]
|
15
|
+
CsvUtils.cut( path, *columns, output: './tmp/cut_test_eng.csv' )
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_at
|
19
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
20
|
+
columns = [ 'Home', 'HG', 'AG', 'Away', 'Date', 'Time' ]
|
21
|
+
CsvUtils.cut( path, *columns, output: './tmp/cut_test_at.csv' )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_de
|
25
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
26
|
+
columns = ['Saison', 'Spieltag',
|
27
|
+
'Heim', 'Ergebnis', 'Gast', 'Datum', 'Uhrzeit' ]
|
28
|
+
CsvUtils.cut( path, *columns, sep: ';', output: './tmp/cut_test_de.csv' )
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class TestHead
|
data/test/test_head.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_head.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHead < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
|
15
|
+
CsvUtils.head( path )
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_at
|
19
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
20
|
+
|
21
|
+
CsvUtils.head( path )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_de
|
25
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
26
|
+
|
27
|
+
CsvUtils.head( path, sep: ';' )
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class TestHead
|
@@ -1,50 +1,50 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class
|
11
|
-
|
12
|
-
|
13
|
-
##
|
14
|
-
# Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
|
15
|
-
# Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
|
16
|
-
# B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
|
17
|
-
# WHH,WHD,WHA,VCH,VCD,VCA,
|
18
|
-
# Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
|
19
|
-
# BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
|
20
|
-
def test_eng
|
21
|
-
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
22
|
-
|
23
|
-
headers = CsvUtils.header( path )
|
24
|
-
pp headers
|
25
|
-
|
26
|
-
assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
|
27
|
-
end
|
28
|
-
|
29
|
-
###
|
30
|
-
# Country,League,Season,Date,Time,Home,Away,HG,AG,
|
31
|
-
# Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
|
32
|
-
def test_at
|
33
|
-
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
34
|
-
|
35
|
-
headers = CsvUtils.header( path )
|
36
|
-
pp headers
|
37
|
-
|
38
|
-
assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_de
|
42
|
-
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
43
|
-
|
44
|
-
headers = CsvUtils.header( path, sep: ';' )
|
45
|
-
pp headers
|
46
|
-
|
47
|
-
assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
|
48
|
-
end
|
49
|
-
|
50
|
-
end # class
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_header.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHeader < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
##
|
14
|
+
# Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
|
15
|
+
# Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
|
16
|
+
# B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
|
17
|
+
# WHH,WHD,WHA,VCH,VCD,VCA,
|
18
|
+
# Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
|
19
|
+
# BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
|
20
|
+
def test_eng
|
21
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
22
|
+
|
23
|
+
headers = CsvUtils.header( path )
|
24
|
+
pp headers
|
25
|
+
|
26
|
+
assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
# Country,League,Season,Date,Time,Home,Away,HG,AG,
|
31
|
+
# Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
|
32
|
+
def test_at
|
33
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
34
|
+
|
35
|
+
headers = CsvUtils.header( path )
|
36
|
+
pp headers
|
37
|
+
|
38
|
+
assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_de
|
42
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
43
|
+
|
44
|
+
headers = CsvUtils.header( path, sep: ';' )
|
45
|
+
pp headers
|
46
|
+
|
47
|
+
assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
|
48
|
+
end
|
49
|
+
|
50
|
+
end # class TestHeader
|
data/test/test_misc.rb
CHANGED
@@ -1,44 +1,44 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_misc.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestMiscellaneous < MiniTest::Test
|
11
|
-
|
12
|
-
def test_eng
|
13
|
-
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
-
|
15
|
-
CsvUtils.test( path )
|
16
|
-
|
17
|
-
CsvUtils.stat( path )
|
18
|
-
CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
|
19
|
-
|
20
|
-
assert true
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_test_de
|
24
|
-
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
25
|
-
|
26
|
-
CsvUtils.test( path, sep: ';' )
|
27
|
-
|
28
|
-
CsvUtils.stat( path, sep: ';' )
|
29
|
-
CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
|
30
|
-
|
31
|
-
assert true
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_test_at
|
35
|
-
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
36
|
-
|
37
|
-
CsvUtils.test( path )
|
38
|
-
|
39
|
-
CsvUtils.stat( path )
|
40
|
-
CsvUtils.stat( path, 'Season', 'Home', 'Away' )
|
41
|
-
assert true
|
42
|
-
end
|
43
|
-
|
44
|
-
end # class TestMiscellaneous
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_misc.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestMiscellaneous < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
|
15
|
+
CsvUtils.test( path )
|
16
|
+
|
17
|
+
CsvUtils.stat( path )
|
18
|
+
CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
|
19
|
+
|
20
|
+
assert true
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_test_de
|
24
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
25
|
+
|
26
|
+
CsvUtils.test( path, sep: ';' )
|
27
|
+
|
28
|
+
CsvUtils.stat( path, sep: ';' )
|
29
|
+
CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
|
30
|
+
|
31
|
+
assert true
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_test_at
|
35
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
36
|
+
|
37
|
+
CsvUtils.test( path )
|
38
|
+
|
39
|
+
CsvUtils.stat( path )
|
40
|
+
CsvUtils.stat( path, 'Season', 'Home', 'Away' )
|
41
|
+
assert true
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class TestMiscellaneous
|
data/test/test_split.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_split.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestSplit < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
columns = [ 'HomeTeam' ]
|
15
|
+
CsvUtils.split( path, *columns ) do |values, chunk|
|
16
|
+
pp values
|
17
|
+
pp chunk
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def test_de
|
23
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
24
|
+
columns = ['Saison', 'Spieltag' ]
|
25
|
+
CsvUtils.split( path, *columns, sep: ';' ) do |values, chunk|
|
26
|
+
pp values
|
27
|
+
pp chunk
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class TestSplit
|