sportdb-config 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/config/leagues/fr.txt +9 -9
- data/config/leagues/gr.txt +7 -7
- data/config/leagues/sco.txt +19 -19
- data/config/world/eng.txt +162 -162
- data/lib/sportdb/config/club_reader.rb +278 -278
- data/lib/sportdb/config/clubs.rb +7 -0
- data/lib/sportdb/config/config.rb +123 -123
- data/lib/sportdb/config/league.rb +118 -118
- data/lib/sportdb/config/league_reader.rb +65 -65
- data/lib/sportdb/config/league_utils.rb +24 -24
- data/lib/sportdb/config/variants.rb +91 -81
- data/lib/sportdb/config/version.rb +1 -1
- data/test/test_club_reader.rb +150 -150
- data/test/test_league_reader.rb +54 -54
- data/test/test_league_utils.rb +46 -46
- data/test/test_season_utils.rb +29 -29
- data/test/test_variants.rb +14 -0
- metadata +12 -6
data/lib/sportdb/config/clubs.rb
CHANGED
@@ -93,6 +93,10 @@ class ClubIndex
|
|
93
93
|
attr_reader :errors
|
94
94
|
def errors?() @errors.empty? == false; end
|
95
95
|
|
96
|
+
def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
|
97
|
+
def clubs() @clubs.values; end
|
98
|
+
|
99
|
+
|
96
100
|
|
97
101
|
def add( rec_or_recs ) ## add club record / alt_names
|
98
102
|
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
@@ -202,6 +206,9 @@ class ClubIndex
|
|
202
206
|
|
203
207
|
|
204
208
|
def dump_duplicates # debug helper - report duplicate club name records
|
209
|
+
|
210
|
+
## todo/fix: remove club.duplicates - alreay included in reports -see TeamDuplicatePart
|
211
|
+
## more a "feature" of Clubs than ClubIndex class - why? why not?
|
205
212
|
@clubs.values.each do |club|
|
206
213
|
if club.duplicates?
|
207
214
|
duplicates = club.duplicates
|
@@ -1,123 +1,123 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class Configuration
|
8
|
-
|
9
|
-
##
|
10
|
-
## todo: allow configure of countries_dir like clubs_dir
|
11
|
-
## "fallback" and use a default built-in world/countries.txt
|
12
|
-
|
13
|
-
## todo/check: rename to country_mappings/index - why? why not?
|
14
|
-
## or countries_by_code or countries_by_key
|
15
|
-
def countries
|
16
|
-
@countries ||= build_country_index
|
17
|
-
@countries
|
18
|
-
end
|
19
|
-
|
20
|
-
def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
|
21
|
-
recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
|
22
|
-
CountryIndex.new( recs )
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
def clubs
|
28
|
-
@clubs ||= build_club_index
|
29
|
-
@clubs
|
30
|
-
end
|
31
|
-
|
32
|
-
####
|
33
|
-
# todo/fix: find a better way to configure club / team datasets
|
34
|
-
attr_accessor :clubs_dir
|
35
|
-
def clubs_dir() @clubs_dir ||= './clubs'; end
|
36
|
-
|
37
|
-
|
38
|
-
CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
39
|
-
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
|
40
|
-
clubs\.txt$
|
41
|
-
}x
|
42
|
-
|
43
|
-
def find_clubs_datafiles( path )
|
44
|
-
datafiles = [] ## note: [country, path] pairs for now
|
45
|
-
|
46
|
-
## check all txt files as candidates (MUST include country code for now)
|
47
|
-
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
|
-
pp candidates
|
49
|
-
candidates.each do |candidate|
|
50
|
-
datafiles << candidate if CLUBS_REGEX.match( candidate )
|
51
|
-
end
|
52
|
-
|
53
|
-
pp datafiles
|
54
|
-
datafiles
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def build_club_index
|
59
|
-
## unify team names; team (builtin/known/shared) name mappings
|
60
|
-
## cleanup team names - use local ("native") name with umlaut etc.
|
61
|
-
recs = []
|
62
|
-
|
63
|
-
## todo/fix: pass along / use country code too
|
64
|
-
## note: country code no longer needed in path (is now expected as heading inside the file)
|
65
|
-
|
66
|
-
## todo/fix: add to teamreader
|
67
|
-
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
-
datafiles = find_clubs_datafiles( clubs_dir )
|
69
|
-
datafiles.each do |datafile|
|
70
|
-
recs += ClubReader.read( datafile )
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
clubs = ClubIndex.new
|
75
|
-
clubs.add( recs )
|
76
|
-
|
77
|
-
if clubs.errors?
|
78
|
-
puts ""
|
79
|
-
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
80
|
-
puts " #{clubs.errors.size} errors:"
|
81
|
-
pp clubs.errors
|
82
|
-
## exit 1
|
83
|
-
end
|
84
|
-
|
85
|
-
clubs
|
86
|
-
end # method build_club_index
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
def leagues
|
92
|
-
read_leagues() if @leagues.nil?
|
93
|
-
@leagues
|
94
|
-
end
|
95
|
-
|
96
|
-
def read_leagues
|
97
|
-
#####
|
98
|
-
# add / read-in leagues config
|
99
|
-
@leagues = LeagueConfig.new
|
100
|
-
|
101
|
-
self ## return self for chaining
|
102
|
-
end
|
103
|
-
end # class Configuration
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
## lets you use
|
110
|
-
## SportDb::Import.configure do |config|
|
111
|
-
## config.hello = 'World'
|
112
|
-
## end
|
113
|
-
|
114
|
-
def self.configure
|
115
|
-
yield( config )
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.config
|
119
|
-
@config ||= Configuration.new
|
120
|
-
end
|
121
|
-
|
122
|
-
end # module Import
|
123
|
-
end # module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
module Import
|
5
|
+
|
6
|
+
|
7
|
+
class Configuration
|
8
|
+
|
9
|
+
##
|
10
|
+
## todo: allow configure of countries_dir like clubs_dir
|
11
|
+
## "fallback" and use a default built-in world/countries.txt
|
12
|
+
|
13
|
+
## todo/check: rename to country_mappings/index - why? why not?
|
14
|
+
## or countries_by_code or countries_by_key
|
15
|
+
def countries
|
16
|
+
@countries ||= build_country_index
|
17
|
+
@countries
|
18
|
+
end
|
19
|
+
|
20
|
+
def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
|
21
|
+
recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
|
22
|
+
CountryIndex.new( recs )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def clubs
|
28
|
+
@clubs ||= build_club_index
|
29
|
+
@clubs
|
30
|
+
end
|
31
|
+
|
32
|
+
####
|
33
|
+
# todo/fix: find a better way to configure club / team datasets
|
34
|
+
attr_accessor :clubs_dir
|
35
|
+
def clubs_dir() @clubs_dir ||= './clubs'; end
|
36
|
+
|
37
|
+
|
38
|
+
CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
39
|
+
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
|
40
|
+
clubs\.txt$
|
41
|
+
}x
|
42
|
+
|
43
|
+
def find_clubs_datafiles( path )
|
44
|
+
datafiles = [] ## note: [country, path] pairs for now
|
45
|
+
|
46
|
+
## check all txt files as candidates (MUST include country code for now)
|
47
|
+
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
|
+
pp candidates
|
49
|
+
candidates.each do |candidate|
|
50
|
+
datafiles << candidate if CLUBS_REGEX.match( candidate )
|
51
|
+
end
|
52
|
+
|
53
|
+
pp datafiles
|
54
|
+
datafiles
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def build_club_index
|
59
|
+
## unify team names; team (builtin/known/shared) name mappings
|
60
|
+
## cleanup team names - use local ("native") name with umlaut etc.
|
61
|
+
recs = []
|
62
|
+
|
63
|
+
## todo/fix: pass along / use country code too
|
64
|
+
## note: country code no longer needed in path (is now expected as heading inside the file)
|
65
|
+
|
66
|
+
## todo/fix: add to teamreader
|
67
|
+
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
+
datafiles = find_clubs_datafiles( clubs_dir )
|
69
|
+
datafiles.each do |datafile|
|
70
|
+
recs += ClubReader.read( datafile )
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
clubs = ClubIndex.new
|
75
|
+
clubs.add( recs )
|
76
|
+
|
77
|
+
if clubs.errors?
|
78
|
+
puts ""
|
79
|
+
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
80
|
+
puts " #{clubs.errors.size} errors:"
|
81
|
+
pp clubs.errors
|
82
|
+
## exit 1
|
83
|
+
end
|
84
|
+
|
85
|
+
clubs
|
86
|
+
end # method build_club_index
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
def leagues
|
92
|
+
read_leagues() if @leagues.nil?
|
93
|
+
@leagues
|
94
|
+
end
|
95
|
+
|
96
|
+
def read_leagues
|
97
|
+
#####
|
98
|
+
# add / read-in leagues config
|
99
|
+
@leagues = LeagueConfig.new
|
100
|
+
|
101
|
+
self ## return self for chaining
|
102
|
+
end
|
103
|
+
end # class Configuration
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
## lets you use
|
110
|
+
## SportDb::Import.configure do |config|
|
111
|
+
## config.hello = 'World'
|
112
|
+
## end
|
113
|
+
|
114
|
+
def self.configure
|
115
|
+
yield( config )
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.config
|
119
|
+
@config ||= Configuration.new
|
120
|
+
end
|
121
|
+
|
122
|
+
end # module Import
|
123
|
+
end # module SportDb
|
@@ -1,118 +1,118 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
|
11
|
-
## just use leagues without latest for latest - why? why not?
|
12
|
-
@leagues_latest = {
|
13
|
-
'es' => { '1' => 'liga', # spanish liga 1
|
14
|
-
'2' => 'liga2', # spanish liga 2
|
15
|
-
},
|
16
|
-
'it' => { '1' => 'seriea', # italian serie a
|
17
|
-
'2' => 'serieb', # italian serie b
|
18
|
-
},
|
19
|
-
'de' => { '1' => 'bundesliga', # german bundesliga
|
20
|
-
'2' => 'bundesliga2', # german 2. bundesliga
|
21
|
-
},
|
22
|
-
'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
|
23
|
-
'be' => { '1' => 'proleague' }, # belgian pro league
|
24
|
-
'pt' => { '1' => 'liga' }, # portugese Primeira Liga
|
25
|
-
'tr' => { '1' => 'superlig' }, # turkish Süper Lig
|
26
|
-
|
27
|
-
# note: eng now read from txt
|
28
|
-
# 'eng' => { '1' => 'premierleague', # english premier league
|
29
|
-
# '2' => 'championship', # english championship league
|
30
|
-
# '3' => 'league1', # english league 1
|
31
|
-
# },
|
32
|
-
}
|
33
|
-
|
34
|
-
## change history to past or changes/changelog something - why? why not?
|
35
|
-
@leagues_history = {
|
36
|
-
|
37
|
-
# note: eng now read from txt
|
38
|
-
# 'eng' => {
|
39
|
-
# ## until (including) 2003-04 season
|
40
|
-
# '2003-04' => { '1' => 'premierleague', # english premier league
|
41
|
-
# '2' => 'division1', # english division 1
|
42
|
-
# },
|
43
|
-
# ## until (including) 1991-92} season
|
44
|
-
# '1991-92' => { '1' => 'division1', # english division 1
|
45
|
-
# '2' => 'division2', # english division 2
|
46
|
-
# }
|
47
|
-
# }
|
48
|
-
}
|
49
|
-
|
50
|
-
pp @leagues_latest
|
51
|
-
pp @leagues_history
|
52
|
-
|
53
|
-
%w(eng sco fr gr).each do |country|
|
54
|
-
hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
|
55
|
-
pp hash
|
56
|
-
|
57
|
-
hash.each do |season,league_hash|
|
58
|
-
if season == '*' ## assume latest / default season
|
59
|
-
@leagues_latest[ country ] = league_hash
|
60
|
-
else
|
61
|
-
@leagues_history[ country ] ||= {}
|
62
|
-
@leagues_history[ country ][ season ] = league_hash
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
pp @leagues_latest
|
68
|
-
pp @leagues_history
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def basename( league, country:, season: )
|
74
|
-
## todo/check: rename league: to key: - why? why not?
|
75
|
-
|
76
|
-
if country.include?( '-' ) ## assume package name e.g. eng-england etc.
|
77
|
-
## cut off country code from package name
|
78
|
-
cc = country.split( '-' )[0] # use first part
|
79
|
-
else
|
80
|
-
cc = country
|
81
|
-
end
|
82
|
-
|
83
|
-
if season
|
84
|
-
puts " checking season >#{season}<"
|
85
|
-
## check history if season is provided / supplied / known
|
86
|
-
history = @leagues_history[ cc ]
|
87
|
-
if history
|
88
|
-
season_start_year = SeasonUtils.start_year( season ).to_i
|
89
|
-
##
|
90
|
-
## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
|
91
|
-
history.keys.reverse.each do |key|
|
92
|
-
history_season_start_year = SeasonUtils.start_year( key ).to_i
|
93
|
-
puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
|
94
|
-
if season_start_year <= history_season_start_year
|
95
|
-
result = history[ key ][ league ]
|
96
|
-
if result
|
97
|
-
return "#{league}-#{result}"
|
98
|
-
else
|
99
|
-
return nil
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
latest = @leagues_latest[ cc ]
|
107
|
-
if latest
|
108
|
-
result = latest[ league ]
|
109
|
-
return "#{league}-#{result}" if result
|
110
|
-
end
|
111
|
-
|
112
|
-
nil
|
113
|
-
end # method basename
|
114
|
-
end # class LeagueConfig
|
115
|
-
|
116
|
-
|
117
|
-
end ## module Import
|
118
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
module Import
|
5
|
+
|
6
|
+
|
7
|
+
class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
|
11
|
+
## just use leagues without latest for latest - why? why not?
|
12
|
+
@leagues_latest = {
|
13
|
+
'es' => { '1' => 'liga', # spanish liga 1
|
14
|
+
'2' => 'liga2', # spanish liga 2
|
15
|
+
},
|
16
|
+
'it' => { '1' => 'seriea', # italian serie a
|
17
|
+
'2' => 'serieb', # italian serie b
|
18
|
+
},
|
19
|
+
'de' => { '1' => 'bundesliga', # german bundesliga
|
20
|
+
'2' => 'bundesliga2', # german 2. bundesliga
|
21
|
+
},
|
22
|
+
'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
|
23
|
+
'be' => { '1' => 'proleague' }, # belgian pro league
|
24
|
+
'pt' => { '1' => 'liga' }, # portugese Primeira Liga
|
25
|
+
'tr' => { '1' => 'superlig' }, # turkish Süper Lig
|
26
|
+
|
27
|
+
# note: eng now read from txt
|
28
|
+
# 'eng' => { '1' => 'premierleague', # english premier league
|
29
|
+
# '2' => 'championship', # english championship league
|
30
|
+
# '3' => 'league1', # english league 1
|
31
|
+
# },
|
32
|
+
}
|
33
|
+
|
34
|
+
## change history to past or changes/changelog something - why? why not?
|
35
|
+
@leagues_history = {
|
36
|
+
|
37
|
+
# note: eng now read from txt
|
38
|
+
# 'eng' => {
|
39
|
+
# ## until (including) 2003-04 season
|
40
|
+
# '2003-04' => { '1' => 'premierleague', # english premier league
|
41
|
+
# '2' => 'division1', # english division 1
|
42
|
+
# },
|
43
|
+
# ## until (including) 1991-92} season
|
44
|
+
# '1991-92' => { '1' => 'division1', # english division 1
|
45
|
+
# '2' => 'division2', # english division 2
|
46
|
+
# }
|
47
|
+
# }
|
48
|
+
}
|
49
|
+
|
50
|
+
pp @leagues_latest
|
51
|
+
pp @leagues_history
|
52
|
+
|
53
|
+
%w(eng sco fr gr).each do |country|
|
54
|
+
hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
|
55
|
+
pp hash
|
56
|
+
|
57
|
+
hash.each do |season,league_hash|
|
58
|
+
if season == '*' ## assume latest / default season
|
59
|
+
@leagues_latest[ country ] = league_hash
|
60
|
+
else
|
61
|
+
@leagues_history[ country ] ||= {}
|
62
|
+
@leagues_history[ country ][ season ] = league_hash
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
pp @leagues_latest
|
68
|
+
pp @leagues_history
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
def basename( league, country:, season: )
|
74
|
+
## todo/check: rename league: to key: - why? why not?
|
75
|
+
|
76
|
+
if country.include?( '-' ) ## assume package name e.g. eng-england etc.
|
77
|
+
## cut off country code from package name
|
78
|
+
cc = country.split( '-' )[0] # use first part
|
79
|
+
else
|
80
|
+
cc = country
|
81
|
+
end
|
82
|
+
|
83
|
+
if season
|
84
|
+
puts " checking season >#{season}<"
|
85
|
+
## check history if season is provided / supplied / known
|
86
|
+
history = @leagues_history[ cc ]
|
87
|
+
if history
|
88
|
+
season_start_year = SeasonUtils.start_year( season ).to_i
|
89
|
+
##
|
90
|
+
## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
|
91
|
+
history.keys.reverse.each do |key|
|
92
|
+
history_season_start_year = SeasonUtils.start_year( key ).to_i
|
93
|
+
puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
|
94
|
+
if season_start_year <= history_season_start_year
|
95
|
+
result = history[ key ][ league ]
|
96
|
+
if result
|
97
|
+
return "#{league}-#{result}"
|
98
|
+
else
|
99
|
+
return nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
latest = @leagues_latest[ cc ]
|
107
|
+
if latest
|
108
|
+
result = latest[ league ]
|
109
|
+
return "#{league}-#{result}" if result
|
110
|
+
end
|
111
|
+
|
112
|
+
nil
|
113
|
+
end # method basename
|
114
|
+
end # class LeagueConfig
|
115
|
+
|
116
|
+
|
117
|
+
end ## module Import
|
118
|
+
end ## module SportDb
|