sportdb-config 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/config/leagues/fr.txt +9 -9
- data/config/leagues/gr.txt +7 -7
- data/config/leagues/sco.txt +19 -19
- data/config/world/eng.txt +162 -162
- data/lib/sportdb/config/club_reader.rb +278 -278
- data/lib/sportdb/config/clubs.rb +7 -0
- data/lib/sportdb/config/config.rb +123 -123
- data/lib/sportdb/config/league.rb +118 -118
- data/lib/sportdb/config/league_reader.rb +65 -65
- data/lib/sportdb/config/league_utils.rb +24 -24
- data/lib/sportdb/config/variants.rb +91 -81
- data/lib/sportdb/config/version.rb +1 -1
- data/test/test_club_reader.rb +150 -150
- data/test/test_league_reader.rb +54 -54
- data/test/test_league_utils.rb +46 -46
- data/test/test_season_utils.rb +29 -29
- data/test/test_variants.rb +14 -0
- metadata +12 -6
data/lib/sportdb/config/clubs.rb
CHANGED
@@ -93,6 +93,10 @@ class ClubIndex
|
|
93
93
|
attr_reader :errors
|
94
94
|
def errors?() @errors.empty? == false; end
|
95
95
|
|
96
|
+
def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
|
97
|
+
def clubs() @clubs.values; end
|
98
|
+
|
99
|
+
|
96
100
|
|
97
101
|
def add( rec_or_recs ) ## add club record / alt_names
|
98
102
|
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
@@ -202,6 +206,9 @@ class ClubIndex
|
|
202
206
|
|
203
207
|
|
204
208
|
def dump_duplicates # debug helper - report duplicate club name records
|
209
|
+
|
210
|
+
## todo/fix: remove club.duplicates - alreay included in reports -see TeamDuplicatePart
|
211
|
+
## more a "feature" of Clubs than ClubIndex class - why? why not?
|
205
212
|
@clubs.values.each do |club|
|
206
213
|
if club.duplicates?
|
207
214
|
duplicates = club.duplicates
|
@@ -1,123 +1,123 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class Configuration
|
8
|
-
|
9
|
-
##
|
10
|
-
## todo: allow configure of countries_dir like clubs_dir
|
11
|
-
## "fallback" and use a default built-in world/countries.txt
|
12
|
-
|
13
|
-
## todo/check: rename to country_mappings/index - why? why not?
|
14
|
-
## or countries_by_code or countries_by_key
|
15
|
-
def countries
|
16
|
-
@countries ||= build_country_index
|
17
|
-
@countries
|
18
|
-
end
|
19
|
-
|
20
|
-
def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
|
21
|
-
recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
|
22
|
-
CountryIndex.new( recs )
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
def clubs
|
28
|
-
@clubs ||= build_club_index
|
29
|
-
@clubs
|
30
|
-
end
|
31
|
-
|
32
|
-
####
|
33
|
-
# todo/fix: find a better way to configure club / team datasets
|
34
|
-
attr_accessor :clubs_dir
|
35
|
-
def clubs_dir() @clubs_dir ||= './clubs'; end
|
36
|
-
|
37
|
-
|
38
|
-
CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
39
|
-
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
|
40
|
-
clubs\.txt$
|
41
|
-
}x
|
42
|
-
|
43
|
-
def find_clubs_datafiles( path )
|
44
|
-
datafiles = [] ## note: [country, path] pairs for now
|
45
|
-
|
46
|
-
## check all txt files as candidates (MUST include country code for now)
|
47
|
-
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
|
-
pp candidates
|
49
|
-
candidates.each do |candidate|
|
50
|
-
datafiles << candidate if CLUBS_REGEX.match( candidate )
|
51
|
-
end
|
52
|
-
|
53
|
-
pp datafiles
|
54
|
-
datafiles
|
55
|
-
end
|
56
|
-
|
57
|
-
|
58
|
-
def build_club_index
|
59
|
-
## unify team names; team (builtin/known/shared) name mappings
|
60
|
-
## cleanup team names - use local ("native") name with umlaut etc.
|
61
|
-
recs = []
|
62
|
-
|
63
|
-
## todo/fix: pass along / use country code too
|
64
|
-
## note: country code no longer needed in path (is now expected as heading inside the file)
|
65
|
-
|
66
|
-
## todo/fix: add to teamreader
|
67
|
-
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
-
datafiles = find_clubs_datafiles( clubs_dir )
|
69
|
-
datafiles.each do |datafile|
|
70
|
-
recs += ClubReader.read( datafile )
|
71
|
-
end
|
72
|
-
|
73
|
-
|
74
|
-
clubs = ClubIndex.new
|
75
|
-
clubs.add( recs )
|
76
|
-
|
77
|
-
if clubs.errors?
|
78
|
-
puts ""
|
79
|
-
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
80
|
-
puts " #{clubs.errors.size} errors:"
|
81
|
-
pp clubs.errors
|
82
|
-
## exit 1
|
83
|
-
end
|
84
|
-
|
85
|
-
clubs
|
86
|
-
end # method build_club_index
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
def leagues
|
92
|
-
read_leagues() if @leagues.nil?
|
93
|
-
@leagues
|
94
|
-
end
|
95
|
-
|
96
|
-
def read_leagues
|
97
|
-
#####
|
98
|
-
# add / read-in leagues config
|
99
|
-
@leagues = LeagueConfig.new
|
100
|
-
|
101
|
-
self ## return self for chaining
|
102
|
-
end
|
103
|
-
end # class Configuration
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
## lets you use
|
110
|
-
## SportDb::Import.configure do |config|
|
111
|
-
## config.hello = 'World'
|
112
|
-
## end
|
113
|
-
|
114
|
-
def self.configure
|
115
|
-
yield( config )
|
116
|
-
end
|
117
|
-
|
118
|
-
def self.config
|
119
|
-
@config ||= Configuration.new
|
120
|
-
end
|
121
|
-
|
122
|
-
end # module Import
|
123
|
-
end # module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
module Import
|
5
|
+
|
6
|
+
|
7
|
+
class Configuration
|
8
|
+
|
9
|
+
##
|
10
|
+
## todo: allow configure of countries_dir like clubs_dir
|
11
|
+
## "fallback" and use a default built-in world/countries.txt
|
12
|
+
|
13
|
+
## todo/check: rename to country_mappings/index - why? why not?
|
14
|
+
## or countries_by_code or countries_by_key
|
15
|
+
def countries
|
16
|
+
@countries ||= build_country_index
|
17
|
+
@countries
|
18
|
+
end
|
19
|
+
|
20
|
+
def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
|
21
|
+
recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
|
22
|
+
CountryIndex.new( recs )
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
|
27
|
+
def clubs
|
28
|
+
@clubs ||= build_club_index
|
29
|
+
@clubs
|
30
|
+
end
|
31
|
+
|
32
|
+
####
|
33
|
+
# todo/fix: find a better way to configure club / team datasets
|
34
|
+
attr_accessor :clubs_dir
|
35
|
+
def clubs_dir() @clubs_dir ||= './clubs'; end
|
36
|
+
|
37
|
+
|
38
|
+
CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
39
|
+
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
|
40
|
+
clubs\.txt$
|
41
|
+
}x
|
42
|
+
|
43
|
+
def find_clubs_datafiles( path )
|
44
|
+
datafiles = [] ## note: [country, path] pairs for now
|
45
|
+
|
46
|
+
## check all txt files as candidates (MUST include country code for now)
|
47
|
+
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
|
+
pp candidates
|
49
|
+
candidates.each do |candidate|
|
50
|
+
datafiles << candidate if CLUBS_REGEX.match( candidate )
|
51
|
+
end
|
52
|
+
|
53
|
+
pp datafiles
|
54
|
+
datafiles
|
55
|
+
end
|
56
|
+
|
57
|
+
|
58
|
+
def build_club_index
|
59
|
+
## unify team names; team (builtin/known/shared) name mappings
|
60
|
+
## cleanup team names - use local ("native") name with umlaut etc.
|
61
|
+
recs = []
|
62
|
+
|
63
|
+
## todo/fix: pass along / use country code too
|
64
|
+
## note: country code no longer needed in path (is now expected as heading inside the file)
|
65
|
+
|
66
|
+
## todo/fix: add to teamreader
|
67
|
+
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
+
datafiles = find_clubs_datafiles( clubs_dir )
|
69
|
+
datafiles.each do |datafile|
|
70
|
+
recs += ClubReader.read( datafile )
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
clubs = ClubIndex.new
|
75
|
+
clubs.add( recs )
|
76
|
+
|
77
|
+
if clubs.errors?
|
78
|
+
puts ""
|
79
|
+
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
80
|
+
puts " #{clubs.errors.size} errors:"
|
81
|
+
pp clubs.errors
|
82
|
+
## exit 1
|
83
|
+
end
|
84
|
+
|
85
|
+
clubs
|
86
|
+
end # method build_club_index
|
87
|
+
|
88
|
+
|
89
|
+
|
90
|
+
|
91
|
+
def leagues
|
92
|
+
read_leagues() if @leagues.nil?
|
93
|
+
@leagues
|
94
|
+
end
|
95
|
+
|
96
|
+
def read_leagues
|
97
|
+
#####
|
98
|
+
# add / read-in leagues config
|
99
|
+
@leagues = LeagueConfig.new
|
100
|
+
|
101
|
+
self ## return self for chaining
|
102
|
+
end
|
103
|
+
end # class Configuration
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
## lets you use
|
110
|
+
## SportDb::Import.configure do |config|
|
111
|
+
## config.hello = 'World'
|
112
|
+
## end
|
113
|
+
|
114
|
+
def self.configure
|
115
|
+
yield( config )
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.config
|
119
|
+
@config ||= Configuration.new
|
120
|
+
end
|
121
|
+
|
122
|
+
end # module Import
|
123
|
+
end # module SportDb
|
@@ -1,118 +1,118 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
|
8
|
-
|
9
|
-
def initialize
|
10
|
-
|
11
|
-
## just use leagues without latest for latest - why? why not?
|
12
|
-
@leagues_latest = {
|
13
|
-
'es' => { '1' => 'liga', # spanish liga 1
|
14
|
-
'2' => 'liga2', # spanish liga 2
|
15
|
-
},
|
16
|
-
'it' => { '1' => 'seriea', # italian serie a
|
17
|
-
'2' => 'serieb', # italian serie b
|
18
|
-
},
|
19
|
-
'de' => { '1' => 'bundesliga', # german bundesliga
|
20
|
-
'2' => 'bundesliga2', # german 2. bundesliga
|
21
|
-
},
|
22
|
-
'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
|
23
|
-
'be' => { '1' => 'proleague' }, # belgian pro league
|
24
|
-
'pt' => { '1' => 'liga' }, # portugese Primeira Liga
|
25
|
-
'tr' => { '1' => 'superlig' }, # turkish Süper Lig
|
26
|
-
|
27
|
-
# note: eng now read from txt
|
28
|
-
# 'eng' => { '1' => 'premierleague', # english premier league
|
29
|
-
# '2' => 'championship', # english championship league
|
30
|
-
# '3' => 'league1', # english league 1
|
31
|
-
# },
|
32
|
-
}
|
33
|
-
|
34
|
-
## change history to past or changes/changelog something - why? why not?
|
35
|
-
@leagues_history = {
|
36
|
-
|
37
|
-
# note: eng now read from txt
|
38
|
-
# 'eng' => {
|
39
|
-
# ## until (including) 2003-04 season
|
40
|
-
# '2003-04' => { '1' => 'premierleague', # english premier league
|
41
|
-
# '2' => 'division1', # english division 1
|
42
|
-
# },
|
43
|
-
# ## until (including) 1991-92} season
|
44
|
-
# '1991-92' => { '1' => 'division1', # english division 1
|
45
|
-
# '2' => 'division2', # english division 2
|
46
|
-
# }
|
47
|
-
# }
|
48
|
-
}
|
49
|
-
|
50
|
-
pp @leagues_latest
|
51
|
-
pp @leagues_history
|
52
|
-
|
53
|
-
%w(eng sco fr gr).each do |country|
|
54
|
-
hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
|
55
|
-
pp hash
|
56
|
-
|
57
|
-
hash.each do |season,league_hash|
|
58
|
-
if season == '*' ## assume latest / default season
|
59
|
-
@leagues_latest[ country ] = league_hash
|
60
|
-
else
|
61
|
-
@leagues_history[ country ] ||= {}
|
62
|
-
@leagues_history[ country ][ season ] = league_hash
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
pp @leagues_latest
|
68
|
-
pp @leagues_history
|
69
|
-
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
def basename( league, country:, season: )
|
74
|
-
## todo/check: rename league: to key: - why? why not?
|
75
|
-
|
76
|
-
if country.include?( '-' ) ## assume package name e.g. eng-england etc.
|
77
|
-
## cut off country code from package name
|
78
|
-
cc = country.split( '-' )[0] # use first part
|
79
|
-
else
|
80
|
-
cc = country
|
81
|
-
end
|
82
|
-
|
83
|
-
if season
|
84
|
-
puts " checking season >#{season}<"
|
85
|
-
## check history if season is provided / supplied / known
|
86
|
-
history = @leagues_history[ cc ]
|
87
|
-
if history
|
88
|
-
season_start_year = SeasonUtils.start_year( season ).to_i
|
89
|
-
##
|
90
|
-
## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
|
91
|
-
history.keys.reverse.each do |key|
|
92
|
-
history_season_start_year = SeasonUtils.start_year( key ).to_i
|
93
|
-
puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
|
94
|
-
if season_start_year <= history_season_start_year
|
95
|
-
result = history[ key ][ league ]
|
96
|
-
if result
|
97
|
-
return "#{league}-#{result}"
|
98
|
-
else
|
99
|
-
return nil
|
100
|
-
end
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|
105
|
-
|
106
|
-
latest = @leagues_latest[ cc ]
|
107
|
-
if latest
|
108
|
-
result = latest[ league ]
|
109
|
-
return "#{league}-#{result}" if result
|
110
|
-
end
|
111
|
-
|
112
|
-
nil
|
113
|
-
end # method basename
|
114
|
-
end # class LeagueConfig
|
115
|
-
|
116
|
-
|
117
|
-
end ## module Import
|
118
|
-
end ## module SportDb
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
module SportDb
|
4
|
+
module Import
|
5
|
+
|
6
|
+
|
7
|
+
class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
|
8
|
+
|
9
|
+
def initialize
|
10
|
+
|
11
|
+
## just use leagues without latest for latest - why? why not?
|
12
|
+
@leagues_latest = {
|
13
|
+
'es' => { '1' => 'liga', # spanish liga 1
|
14
|
+
'2' => 'liga2', # spanish liga 2
|
15
|
+
},
|
16
|
+
'it' => { '1' => 'seriea', # italian serie a
|
17
|
+
'2' => 'serieb', # italian serie b
|
18
|
+
},
|
19
|
+
'de' => { '1' => 'bundesliga', # german bundesliga
|
20
|
+
'2' => 'bundesliga2', # german 2. bundesliga
|
21
|
+
},
|
22
|
+
'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
|
23
|
+
'be' => { '1' => 'proleague' }, # belgian pro league
|
24
|
+
'pt' => { '1' => 'liga' }, # portugese Primeira Liga
|
25
|
+
'tr' => { '1' => 'superlig' }, # turkish Süper Lig
|
26
|
+
|
27
|
+
# note: eng now read from txt
|
28
|
+
# 'eng' => { '1' => 'premierleague', # english premier league
|
29
|
+
# '2' => 'championship', # english championship league
|
30
|
+
# '3' => 'league1', # english league 1
|
31
|
+
# },
|
32
|
+
}
|
33
|
+
|
34
|
+
## change history to past or changes/changelog something - why? why not?
|
35
|
+
@leagues_history = {
|
36
|
+
|
37
|
+
# note: eng now read from txt
|
38
|
+
# 'eng' => {
|
39
|
+
# ## until (including) 2003-04 season
|
40
|
+
# '2003-04' => { '1' => 'premierleague', # english premier league
|
41
|
+
# '2' => 'division1', # english division 1
|
42
|
+
# },
|
43
|
+
# ## until (including) 1991-92} season
|
44
|
+
# '1991-92' => { '1' => 'division1', # english division 1
|
45
|
+
# '2' => 'division2', # english division 2
|
46
|
+
# }
|
47
|
+
# }
|
48
|
+
}
|
49
|
+
|
50
|
+
pp @leagues_latest
|
51
|
+
pp @leagues_history
|
52
|
+
|
53
|
+
%w(eng sco fr gr).each do |country|
|
54
|
+
hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
|
55
|
+
pp hash
|
56
|
+
|
57
|
+
hash.each do |season,league_hash|
|
58
|
+
if season == '*' ## assume latest / default season
|
59
|
+
@leagues_latest[ country ] = league_hash
|
60
|
+
else
|
61
|
+
@leagues_history[ country ] ||= {}
|
62
|
+
@leagues_history[ country ][ season ] = league_hash
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
pp @leagues_latest
|
68
|
+
pp @leagues_history
|
69
|
+
end
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
def basename( league, country:, season: )
|
74
|
+
## todo/check: rename league: to key: - why? why not?
|
75
|
+
|
76
|
+
if country.include?( '-' ) ## assume package name e.g. eng-england etc.
|
77
|
+
## cut off country code from package name
|
78
|
+
cc = country.split( '-' )[0] # use first part
|
79
|
+
else
|
80
|
+
cc = country
|
81
|
+
end
|
82
|
+
|
83
|
+
if season
|
84
|
+
puts " checking season >#{season}<"
|
85
|
+
## check history if season is provided / supplied / known
|
86
|
+
history = @leagues_history[ cc ]
|
87
|
+
if history
|
88
|
+
season_start_year = SeasonUtils.start_year( season ).to_i
|
89
|
+
##
|
90
|
+
## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
|
91
|
+
history.keys.reverse.each do |key|
|
92
|
+
history_season_start_year = SeasonUtils.start_year( key ).to_i
|
93
|
+
puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
|
94
|
+
if season_start_year <= history_season_start_year
|
95
|
+
result = history[ key ][ league ]
|
96
|
+
if result
|
97
|
+
return "#{league}-#{result}"
|
98
|
+
else
|
99
|
+
return nil
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
latest = @leagues_latest[ cc ]
|
107
|
+
if latest
|
108
|
+
result = latest[ league ]
|
109
|
+
return "#{league}-#{result}" if result
|
110
|
+
end
|
111
|
+
|
112
|
+
nil
|
113
|
+
end # method basename
|
114
|
+
end # class LeagueConfig
|
115
|
+
|
116
|
+
|
117
|
+
end ## module Import
|
118
|
+
end ## module SportDb
|