csvutils 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +3 -3
- data/LICENSE.md +116 -0
- data/Manifest.txt +8 -4
- data/README.md +302 -286
- data/Rakefile +30 -26
- data/bin/csvcut +17 -17
- data/bin/csvhead +17 -17
- data/bin/csvheader +17 -17
- data/bin/csvsplit +17 -17
- data/bin/csvstat +17 -17
- data/{test/data → datasets}/at-austria/AUT.csv +363 -363
- data/{test/data → datasets}/de-deutschland/bundesliga.csv +481 -481
- data/{test/data → datasets}/eng-england/2017-18/E0.csv +381 -381
- data/lib/csvutils.rb +32 -31
- data/lib/csvutils/commands/cut.rb +43 -43
- data/lib/csvutils/commands/head.rb +40 -40
- data/lib/csvutils/commands/header.rb +35 -35
- data/lib/csvutils/commands/split.rb +41 -41
- data/lib/csvutils/commands/stat.rb +41 -41
- data/lib/csvutils/cut.rb +43 -50
- data/lib/csvutils/head.rb +22 -25
- data/lib/csvutils/header.rb +16 -28
- data/lib/csvutils/split.rb +106 -107
- data/lib/csvutils/stat.rb +81 -86
- data/lib/csvutils/test.rb +19 -22
- data/lib/csvutils/utils.rb +29 -13
- data/lib/csvutils/version.rb +24 -24
- data/test/helper.rb +16 -16
- data/test/test_cut.rb +31 -0
- data/test/test_head.rb +30 -0
- data/test/{test_headers.rb → test_header.rb} +50 -50
- data/test/test_misc.rb +44 -44
- data/test/test_split.rb +31 -0
- data/test/test_version.rb +20 -20
- metadata +28 -9
data/lib/csvutils/test.rb
CHANGED
@@ -1,22 +1,19 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
class CsvUtils
|
5
|
-
|
6
|
-
## test or dry run to check if rows can get read/scanned
|
7
|
-
def self.test( path, sep: ',' )
|
8
|
-
i = 0
|
9
|
-
csv_options = {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
end
|
21
|
-
|
22
|
-
end # class CsvUtils
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvUtils
|
5
|
+
|
6
|
+
## test or dry run to check if rows can get read/scanned
|
7
|
+
def self.test( path, sep: ',' )
|
8
|
+
i = 0
|
9
|
+
csv_options = { sep: sep }
|
10
|
+
|
11
|
+
CsvHash.foreach( path, csv_options ) do |rec|
|
12
|
+
i += 1
|
13
|
+
print '.' if i % 100 == 0
|
14
|
+
end
|
15
|
+
|
16
|
+
puts " #{i} rows"
|
17
|
+
end
|
18
|
+
|
19
|
+
end # class CsvUtils
|
data/lib/csvutils/utils.rb
CHANGED
@@ -1,13 +1,29 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
class CsvUtils
|
5
|
-
|
6
|
-
def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
|
7
|
-
puts "#{headers.size} columns:"
|
8
|
-
headers.each_with_index do |header,i|
|
9
|
-
puts " #{i+1}: #{header}"
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
class CsvUtils
|
5
|
+
|
6
|
+
def self.pp_header( headers ) ## check: rename to print_headers or prettyprint_header - why? why not?
|
7
|
+
puts "#{headers.size} columns:"
|
8
|
+
headers.each_with_index do |header,i|
|
9
|
+
puts " #{i+1}: #{header}"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
|
14
|
+
###################
|
15
|
+
## (simple) helper for "csv-encoding" values / row
|
16
|
+
##
|
17
|
+
## todo: check for newline in value too? why? why not?
|
18
|
+
def self.csv_row( *values, sep: ',' )
|
19
|
+
values.map do |value|
|
20
|
+
if value && (value.index( sep ) || value.index('"'))
|
21
|
+
## double quotes and enclose in double qoutes
|
22
|
+
value = %Q{"#{value.gsub('"', '""')}"}
|
23
|
+
else
|
24
|
+
value
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end # class CsvUtils
|
data/lib/csvutils/version.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
|
5
|
-
class CsvUtils
|
6
|
-
|
7
|
-
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
-
MINOR =
|
9
|
-
PATCH =
|
10
|
-
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
-
|
12
|
-
def self.version
|
13
|
-
VERSION
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.banner
|
17
|
-
"csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
-
end
|
19
|
-
|
20
|
-
def self.root
|
21
|
-
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
22
|
-
end
|
23
|
-
|
24
|
-
end # class CsvUtils
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
## note: for now CsvUtils is a class!!! NOT a module - change - why? why not?
|
5
|
+
class CsvUtils
|
6
|
+
|
7
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 3
|
9
|
+
PATCH = 0
|
10
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
|
+
|
12
|
+
def self.version
|
13
|
+
VERSION
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.banner
|
17
|
+
"csvutils/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.root
|
21
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
22
|
+
end
|
23
|
+
|
24
|
+
end # class CsvUtils
|
data/test/helper.rb
CHANGED
@@ -1,16 +1,16 @@
|
|
1
|
-
## $:.unshift(File.dirname(__FILE__))
|
2
|
-
|
3
|
-
## minitest setup
|
4
|
-
|
5
|
-
require 'minitest/autorun'
|
6
|
-
|
7
|
-
|
8
|
-
## our own code
|
9
|
-
require 'csvutils'
|
10
|
-
|
11
|
-
## add test_data_dir helper
|
12
|
-
class CsvUtils
|
13
|
-
def self.test_data_dir
|
14
|
-
"#{root}/
|
15
|
-
end
|
16
|
-
end
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
2
|
+
|
3
|
+
## minitest setup
|
4
|
+
|
5
|
+
require 'minitest/autorun'
|
6
|
+
|
7
|
+
|
8
|
+
## our own code
|
9
|
+
require 'csvutils'
|
10
|
+
|
11
|
+
## add test_data_dir helper
|
12
|
+
class CsvUtils
|
13
|
+
def self.test_data_dir
|
14
|
+
"#{root}/datasets"
|
15
|
+
end
|
16
|
+
end
|
data/test/test_cut.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_cut.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestCut < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
columns = [ 'HomeTeam', 'FTHG', 'FTAG', 'AwayTeam', 'Date' ]
|
15
|
+
CsvUtils.cut( path, *columns, output: './tmp/cut_test_eng.csv' )
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_at
|
19
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
20
|
+
columns = [ 'Home', 'HG', 'AG', 'Away', 'Date', 'Time' ]
|
21
|
+
CsvUtils.cut( path, *columns, output: './tmp/cut_test_at.csv' )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_de
|
25
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
26
|
+
columns = ['Saison', 'Spieltag',
|
27
|
+
'Heim', 'Ergebnis', 'Gast', 'Datum', 'Uhrzeit' ]
|
28
|
+
CsvUtils.cut( path, *columns, sep: ';', output: './tmp/cut_test_de.csv' )
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class TestHead
|
data/test/test_head.rb
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_head.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHead < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
|
15
|
+
CsvUtils.head( path )
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_at
|
19
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
20
|
+
|
21
|
+
CsvUtils.head( path )
|
22
|
+
end
|
23
|
+
|
24
|
+
def test_de
|
25
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
26
|
+
|
27
|
+
CsvUtils.head( path, sep: ';' )
|
28
|
+
end
|
29
|
+
|
30
|
+
end # class TestHead
|
@@ -1,50 +1,50 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class
|
11
|
-
|
12
|
-
|
13
|
-
##
|
14
|
-
# Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
|
15
|
-
# Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
|
16
|
-
# B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
|
17
|
-
# WHH,WHD,WHA,VCH,VCD,VCA,
|
18
|
-
# Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
|
19
|
-
# BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
|
20
|
-
def test_eng
|
21
|
-
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
22
|
-
|
23
|
-
headers = CsvUtils.header( path )
|
24
|
-
pp headers
|
25
|
-
|
26
|
-
assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
|
27
|
-
end
|
28
|
-
|
29
|
-
###
|
30
|
-
# Country,League,Season,Date,Time,Home,Away,HG,AG,
|
31
|
-
# Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
|
32
|
-
def test_at
|
33
|
-
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
34
|
-
|
35
|
-
headers = CsvUtils.header( path )
|
36
|
-
pp headers
|
37
|
-
|
38
|
-
assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
|
39
|
-
end
|
40
|
-
|
41
|
-
def test_de
|
42
|
-
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
43
|
-
|
44
|
-
headers = CsvUtils.header( path, sep: ';' )
|
45
|
-
pp headers
|
46
|
-
|
47
|
-
assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
|
48
|
-
end
|
49
|
-
|
50
|
-
end # class
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_header.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestHeader < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
##
|
14
|
+
# Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,
|
15
|
+
# Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,
|
16
|
+
# B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,LBH,LBD,LBA,PSH,PSD,PSA,
|
17
|
+
# WHH,WHD,WHA,VCH,VCD,VCA,
|
18
|
+
# Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,
|
19
|
+
# BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,PSCH,PSCD,PSCA
|
20
|
+
def test_eng
|
21
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
22
|
+
|
23
|
+
headers = CsvUtils.header( path )
|
24
|
+
pp headers
|
25
|
+
|
26
|
+
assert_equal ['Date','HomeTeam','AwayTeam','FTHG','FTAG','HTHG','HTAG'], headers
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
# Country,League,Season,Date,Time,Home,Away,HG,AG,
|
31
|
+
# Res,PH,PD,PA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA
|
32
|
+
def test_at
|
33
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
34
|
+
|
35
|
+
headers = CsvUtils.header( path )
|
36
|
+
pp headers
|
37
|
+
|
38
|
+
assert_equal ['Season','Date','Time','Home','Away','HG','AG'], headers
|
39
|
+
end
|
40
|
+
|
41
|
+
def test_de
|
42
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
43
|
+
|
44
|
+
headers = CsvUtils.header( path, sep: ';' )
|
45
|
+
pp headers
|
46
|
+
|
47
|
+
assert_equal ['Spielzeit','Saison','Spieltag','Datum','Uhrzeit','Heim','Gast','Ergebnis','Halbzeit'], headers
|
48
|
+
end
|
49
|
+
|
50
|
+
end # class TestHeader
|
data/test/test_misc.rb
CHANGED
@@ -1,44 +1,44 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_misc.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestMiscellaneous < MiniTest::Test
|
11
|
-
|
12
|
-
def test_eng
|
13
|
-
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
-
|
15
|
-
CsvUtils.test( path )
|
16
|
-
|
17
|
-
CsvUtils.stat( path )
|
18
|
-
CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
|
19
|
-
|
20
|
-
assert true
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_test_de
|
24
|
-
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
25
|
-
|
26
|
-
CsvUtils.test( path, sep: ';' )
|
27
|
-
|
28
|
-
CsvUtils.stat( path, sep: ';' )
|
29
|
-
CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
|
30
|
-
|
31
|
-
assert true
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_test_at
|
35
|
-
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
36
|
-
|
37
|
-
CsvUtils.test( path )
|
38
|
-
|
39
|
-
CsvUtils.stat( path )
|
40
|
-
CsvUtils.stat( path, 'Season', 'Home', 'Away' )
|
41
|
-
assert true
|
42
|
-
end
|
43
|
-
|
44
|
-
end # class TestMiscellaneous
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_misc.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestMiscellaneous < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
|
15
|
+
CsvUtils.test( path )
|
16
|
+
|
17
|
+
CsvUtils.stat( path )
|
18
|
+
CsvUtils.stat( path, 'HomeTeam', 'AwayTeam' )
|
19
|
+
|
20
|
+
assert true
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_test_de
|
24
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
25
|
+
|
26
|
+
CsvUtils.test( path, sep: ';' )
|
27
|
+
|
28
|
+
CsvUtils.stat( path, sep: ';' )
|
29
|
+
CsvUtils.stat( path, 'Spielzeit', 'Saison', 'Heim', 'Gast', sep: ';' )
|
30
|
+
|
31
|
+
assert true
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_test_at
|
35
|
+
path = "#{CsvUtils.test_data_dir}/at-austria/AUT.csv"
|
36
|
+
|
37
|
+
CsvUtils.test( path )
|
38
|
+
|
39
|
+
CsvUtils.stat( path )
|
40
|
+
CsvUtils.stat( path, 'Season', 'Home', 'Away' )
|
41
|
+
assert true
|
42
|
+
end
|
43
|
+
|
44
|
+
end # class TestMiscellaneous
|
data/test/test_split.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_split.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestSplit < MiniTest::Test
|
11
|
+
|
12
|
+
def test_eng
|
13
|
+
path = "#{CsvUtils.test_data_dir}/eng-england/2017-18/E0.csv"
|
14
|
+
columns = [ 'HomeTeam' ]
|
15
|
+
CsvUtils.split( path, *columns ) do |values, chunk|
|
16
|
+
pp values
|
17
|
+
pp chunk
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
def test_de
|
23
|
+
path = "#{CsvUtils.test_data_dir}/de-deutschland/bundesliga.csv"
|
24
|
+
columns = ['Saison', 'Spieltag' ]
|
25
|
+
CsvUtils.split( path, *columns, sep: ';' ) do |values, chunk|
|
26
|
+
pp values
|
27
|
+
pp chunk
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end # class TestSplit
|