sportdb-config 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -93,6 +93,10 @@ class ClubIndex
93
93
  attr_reader :errors
94
94
  def errors?() @errors.empty? == false; end
95
95
 
96
+ def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
97
+ def clubs() @clubs.values; end
98
+
99
+
96
100
 
97
101
  def add( rec_or_recs ) ## add club record / alt_names
98
102
  recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
@@ -202,6 +206,9 @@ class ClubIndex
202
206
 
203
207
 
204
208
  def dump_duplicates # debug helper - report duplicate club name records
209
+
210
+ ## todo/fix: remove club.duplicates - alreay included in reports -see TeamDuplicatePart
211
+ ## more a "feature" of Clubs than ClubIndex class - why? why not?
205
212
  @clubs.values.each do |club|
206
213
  if club.duplicates?
207
214
  duplicates = club.duplicates
@@ -1,123 +1,123 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
- class Configuration
8
-
9
- ##
10
- ## todo: allow configure of countries_dir like clubs_dir
11
- ## "fallback" and use a default built-in world/countries.txt
12
-
13
- ## todo/check: rename to country_mappings/index - why? why not?
14
- ## or countries_by_code or countries_by_key
15
- def countries
16
- @countries ||= build_country_index
17
- @countries
18
- end
19
-
20
- def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
21
- recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
22
- CountryIndex.new( recs )
23
- end
24
-
25
-
26
-
27
- def clubs
28
- @clubs ||= build_club_index
29
- @clubs
30
- end
31
-
32
- ####
33
- # todo/fix: find a better way to configure club / team datasets
34
- attr_accessor :clubs_dir
35
- def clubs_dir() @clubs_dir ||= './clubs'; end
36
-
37
-
38
- CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
39
- (?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
40
- clubs\.txt$
41
- }x
42
-
43
- def find_clubs_datafiles( path )
44
- datafiles = [] ## note: [country, path] pairs for now
45
-
46
- ## check all txt files as candidates (MUST include country code for now)
47
- candidates = Dir.glob( "#{path}/**/*.txt" )
48
- pp candidates
49
- candidates.each do |candidate|
50
- datafiles << candidate if CLUBS_REGEX.match( candidate )
51
- end
52
-
53
- pp datafiles
54
- datafiles
55
- end
56
-
57
-
58
- def build_club_index
59
- ## unify team names; team (builtin/known/shared) name mappings
60
- ## cleanup team names - use local ("native") name with umlaut etc.
61
- recs = []
62
-
63
- ## todo/fix: pass along / use country code too
64
- ## note: country code no longer needed in path (is now expected as heading inside the file)
65
-
66
- ## todo/fix: add to teamreader
67
- ## check that name and alt_names for a club are all unique (not duplicates)
68
- datafiles = find_clubs_datafiles( clubs_dir )
69
- datafiles.each do |datafile|
70
- recs += ClubReader.read( datafile )
71
- end
72
-
73
-
74
- clubs = ClubIndex.new
75
- clubs.add( recs )
76
-
77
- if clubs.errors?
78
- puts ""
79
- puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
80
- puts " #{clubs.errors.size} errors:"
81
- pp clubs.errors
82
- ## exit 1
83
- end
84
-
85
- clubs
86
- end # method build_club_index
87
-
88
-
89
-
90
-
91
- def leagues
92
- read_leagues() if @leagues.nil?
93
- @leagues
94
- end
95
-
96
- def read_leagues
97
- #####
98
- # add / read-in leagues config
99
- @leagues = LeagueConfig.new
100
-
101
- self ## return self for chaining
102
- end
103
- end # class Configuration
104
-
105
-
106
-
107
-
108
-
109
- ## lets you use
110
- ## SportDb::Import.configure do |config|
111
- ## config.hello = 'World'
112
- ## end
113
-
114
- def self.configure
115
- yield( config )
116
- end
117
-
118
- def self.config
119
- @config ||= Configuration.new
120
- end
121
-
122
- end # module Import
123
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class Configuration
8
+
9
+ ##
10
+ ## todo: allow configure of countries_dir like clubs_dir
11
+ ## "fallback" and use a default built-in world/countries.txt
12
+
13
+ ## todo/check: rename to country_mappings/index - why? why not?
14
+ ## or countries_by_code or countries_by_key
15
+ def countries
16
+ @countries ||= build_country_index
17
+ @countries
18
+ end
19
+
20
+ def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
21
+ recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
22
+ CountryIndex.new( recs )
23
+ end
24
+
25
+
26
+
27
+ def clubs
28
+ @clubs ||= build_club_index
29
+ @clubs
30
+ end
31
+
32
+ ####
33
+ # todo/fix: find a better way to configure club / team datasets
34
+ attr_accessor :clubs_dir
35
+ def clubs_dir() @clubs_dir ||= './clubs'; end
36
+
37
+
38
+ CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
39
+ (?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
40
+ clubs\.txt$
41
+ }x
42
+
43
+ def find_clubs_datafiles( path )
44
+ datafiles = [] ## note: [country, path] pairs for now
45
+
46
+ ## check all txt files as candidates (MUST include country code for now)
47
+ candidates = Dir.glob( "#{path}/**/*.txt" )
48
+ pp candidates
49
+ candidates.each do |candidate|
50
+ datafiles << candidate if CLUBS_REGEX.match( candidate )
51
+ end
52
+
53
+ pp datafiles
54
+ datafiles
55
+ end
56
+
57
+
58
+ def build_club_index
59
+ ## unify team names; team (builtin/known/shared) name mappings
60
+ ## cleanup team names - use local ("native") name with umlaut etc.
61
+ recs = []
62
+
63
+ ## todo/fix: pass along / use country code too
64
+ ## note: country code no longer needed in path (is now expected as heading inside the file)
65
+
66
+ ## todo/fix: add to teamreader
67
+ ## check that name and alt_names for a club are all unique (not duplicates)
68
+ datafiles = find_clubs_datafiles( clubs_dir )
69
+ datafiles.each do |datafile|
70
+ recs += ClubReader.read( datafile )
71
+ end
72
+
73
+
74
+ clubs = ClubIndex.new
75
+ clubs.add( recs )
76
+
77
+ if clubs.errors?
78
+ puts ""
79
+ puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
80
+ puts " #{clubs.errors.size} errors:"
81
+ pp clubs.errors
82
+ ## exit 1
83
+ end
84
+
85
+ clubs
86
+ end # method build_club_index
87
+
88
+
89
+
90
+
91
+ def leagues
92
+ read_leagues() if @leagues.nil?
93
+ @leagues
94
+ end
95
+
96
+ def read_leagues
97
+ #####
98
+ # add / read-in leagues config
99
+ @leagues = LeagueConfig.new
100
+
101
+ self ## return self for chaining
102
+ end
103
+ end # class Configuration
104
+
105
+
106
+
107
+
108
+
109
+ ## lets you use
110
+ ## SportDb::Import.configure do |config|
111
+ ## config.hello = 'World'
112
+ ## end
113
+
114
+ def self.configure
115
+ yield( config )
116
+ end
117
+
118
+ def self.config
119
+ @config ||= Configuration.new
120
+ end
121
+
122
+ end # module Import
123
+ end # module SportDb
@@ -1,118 +1,118 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
- class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
8
-
9
- def initialize
10
-
11
- ## just use leagues without latest for latest - why? why not?
12
- @leagues_latest = {
13
- 'es' => { '1' => 'liga', # spanish liga 1
14
- '2' => 'liga2', # spanish liga 2
15
- },
16
- 'it' => { '1' => 'seriea', # italian serie a
17
- '2' => 'serieb', # italian serie b
18
- },
19
- 'de' => { '1' => 'bundesliga', # german bundesliga
20
- '2' => 'bundesliga2', # german 2. bundesliga
21
- },
22
- 'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
23
- 'be' => { '1' => 'proleague' }, # belgian pro league
24
- 'pt' => { '1' => 'liga' }, # portugese Primeira Liga
25
- 'tr' => { '1' => 'superlig' }, # turkish Süper Lig
26
-
27
- # note: eng now read from txt
28
- # 'eng' => { '1' => 'premierleague', # english premier league
29
- # '2' => 'championship', # english championship league
30
- # '3' => 'league1', # english league 1
31
- # },
32
- }
33
-
34
- ## change history to past or changes/changelog something - why? why not?
35
- @leagues_history = {
36
-
37
- # note: eng now read from txt
38
- # 'eng' => {
39
- # ## until (including) 2003-04 season
40
- # '2003-04' => { '1' => 'premierleague', # english premier league
41
- # '2' => 'division1', # english division 1
42
- # },
43
- # ## until (including) 1991-92} season
44
- # '1991-92' => { '1' => 'division1', # english division 1
45
- # '2' => 'division2', # english division 2
46
- # }
47
- # }
48
- }
49
-
50
- pp @leagues_latest
51
- pp @leagues_history
52
-
53
- %w(eng sco fr gr).each do |country|
54
- hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
55
- pp hash
56
-
57
- hash.each do |season,league_hash|
58
- if season == '*' ## assume latest / default season
59
- @leagues_latest[ country ] = league_hash
60
- else
61
- @leagues_history[ country ] ||= {}
62
- @leagues_history[ country ][ season ] = league_hash
63
- end
64
- end
65
- end
66
-
67
- pp @leagues_latest
68
- pp @leagues_history
69
- end
70
-
71
-
72
-
73
- def basename( league, country:, season: )
74
- ## todo/check: rename league: to key: - why? why not?
75
-
76
- if country.include?( '-' ) ## assume package name e.g. eng-england etc.
77
- ## cut off country code from package name
78
- cc = country.split( '-' )[0] # use first part
79
- else
80
- cc = country
81
- end
82
-
83
- if season
84
- puts " checking season >#{season}<"
85
- ## check history if season is provided / supplied / known
86
- history = @leagues_history[ cc ]
87
- if history
88
- season_start_year = SeasonUtils.start_year( season ).to_i
89
- ##
90
- ## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
91
- history.keys.reverse.each do |key|
92
- history_season_start_year = SeasonUtils.start_year( key ).to_i
93
- puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
94
- if season_start_year <= history_season_start_year
95
- result = history[ key ][ league ]
96
- if result
97
- return "#{league}-#{result}"
98
- else
99
- return nil
100
- end
101
- end
102
- end
103
- end
104
- end
105
-
106
- latest = @leagues_latest[ cc ]
107
- if latest
108
- result = latest[ league ]
109
- return "#{league}-#{result}" if result
110
- end
111
-
112
- nil
113
- end # method basename
114
- end # class LeagueConfig
115
-
116
-
117
- end ## module Import
118
- end ## module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
8
+
9
+ def initialize
10
+
11
+ ## just use leagues without latest for latest - why? why not?
12
+ @leagues_latest = {
13
+ 'es' => { '1' => 'liga', # spanish liga 1
14
+ '2' => 'liga2', # spanish liga 2
15
+ },
16
+ 'it' => { '1' => 'seriea', # italian serie a
17
+ '2' => 'serieb', # italian serie b
18
+ },
19
+ 'de' => { '1' => 'bundesliga', # german bundesliga
20
+ '2' => 'bundesliga2', # german 2. bundesliga
21
+ },
22
+ 'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
23
+ 'be' => { '1' => 'proleague' }, # belgian pro league
24
+ 'pt' => { '1' => 'liga' }, # portugese Primeira Liga
25
+ 'tr' => { '1' => 'superlig' }, # turkish Süper Lig
26
+
27
+ # note: eng now read from txt
28
+ # 'eng' => { '1' => 'premierleague', # english premier league
29
+ # '2' => 'championship', # english championship league
30
+ # '3' => 'league1', # english league 1
31
+ # },
32
+ }
33
+
34
+ ## change history to past or changes/changelog something - why? why not?
35
+ @leagues_history = {
36
+
37
+ # note: eng now read from txt
38
+ # 'eng' => {
39
+ # ## until (including) 2003-04 season
40
+ # '2003-04' => { '1' => 'premierleague', # english premier league
41
+ # '2' => 'division1', # english division 1
42
+ # },
43
+ # ## until (including) 1991-92} season
44
+ # '1991-92' => { '1' => 'division1', # english division 1
45
+ # '2' => 'division2', # english division 2
46
+ # }
47
+ # }
48
+ }
49
+
50
+ pp @leagues_latest
51
+ pp @leagues_history
52
+
53
+ %w(eng sco fr gr).each do |country|
54
+ hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
55
+ pp hash
56
+
57
+ hash.each do |season,league_hash|
58
+ if season == '*' ## assume latest / default season
59
+ @leagues_latest[ country ] = league_hash
60
+ else
61
+ @leagues_history[ country ] ||= {}
62
+ @leagues_history[ country ][ season ] = league_hash
63
+ end
64
+ end
65
+ end
66
+
67
+ pp @leagues_latest
68
+ pp @leagues_history
69
+ end
70
+
71
+
72
+
73
+ def basename( league, country:, season: )
74
+ ## todo/check: rename league: to key: - why? why not?
75
+
76
+ if country.include?( '-' ) ## assume package name e.g. eng-england etc.
77
+ ## cut off country code from package name
78
+ cc = country.split( '-' )[0] # use first part
79
+ else
80
+ cc = country
81
+ end
82
+
83
+ if season
84
+ puts " checking season >#{season}<"
85
+ ## check history if season is provided / supplied / known
86
+ history = @leagues_history[ cc ]
87
+ if history
88
+ season_start_year = SeasonUtils.start_year( season ).to_i
89
+ ##
90
+ ## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
91
+ history.keys.reverse.each do |key|
92
+ history_season_start_year = SeasonUtils.start_year( key ).to_i
93
+ puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
94
+ if season_start_year <= history_season_start_year
95
+ result = history[ key ][ league ]
96
+ if result
97
+ return "#{league}-#{result}"
98
+ else
99
+ return nil
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ latest = @leagues_latest[ cc ]
107
+ if latest
108
+ result = latest[ league ]
109
+ return "#{league}-#{result}" if result
110
+ end
111
+
112
+ nil
113
+ end # method basename
114
+ end # class LeagueConfig
115
+
116
+
117
+ end ## module Import
118
+ end ## module SportDb