sportdb-config 0.4.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -93,6 +93,10 @@ class ClubIndex
93
93
  attr_reader :errors
94
94
  def errors?() @errors.empty? == false; end
95
95
 
96
+ def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
97
+ def clubs() @clubs.values; end
98
+
99
+
96
100
 
97
101
  def add( rec_or_recs ) ## add club record / alt_names
98
102
  recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
@@ -202,6 +206,9 @@ class ClubIndex
202
206
 
203
207
 
204
208
  def dump_duplicates # debug helper - report duplicate club name records
209
+
210
+ ## todo/fix: remove club.duplicates - alreay included in reports -see TeamDuplicatePart
211
+ ## more a "feature" of Clubs than ClubIndex class - why? why not?
205
212
  @clubs.values.each do |club|
206
213
  if club.duplicates?
207
214
  duplicates = club.duplicates
@@ -1,123 +1,123 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
- class Configuration
8
-
9
- ##
10
- ## todo: allow configure of countries_dir like clubs_dir
11
- ## "fallback" and use a default built-in world/countries.txt
12
-
13
- ## todo/check: rename to country_mappings/index - why? why not?
14
- ## or countries_by_code or countries_by_key
15
- def countries
16
- @countries ||= build_country_index
17
- @countries
18
- end
19
-
20
- def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
21
- recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
22
- CountryIndex.new( recs )
23
- end
24
-
25
-
26
-
27
- def clubs
28
- @clubs ||= build_club_index
29
- @clubs
30
- end
31
-
32
- ####
33
- # todo/fix: find a better way to configure club / team datasets
34
- attr_accessor :clubs_dir
35
- def clubs_dir() @clubs_dir ||= './clubs'; end
36
-
37
-
38
- CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
39
- (?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
40
- clubs\.txt$
41
- }x
42
-
43
- def find_clubs_datafiles( path )
44
- datafiles = [] ## note: [country, path] pairs for now
45
-
46
- ## check all txt files as candidates (MUST include country code for now)
47
- candidates = Dir.glob( "#{path}/**/*.txt" )
48
- pp candidates
49
- candidates.each do |candidate|
50
- datafiles << candidate if CLUBS_REGEX.match( candidate )
51
- end
52
-
53
- pp datafiles
54
- datafiles
55
- end
56
-
57
-
58
- def build_club_index
59
- ## unify team names; team (builtin/known/shared) name mappings
60
- ## cleanup team names - use local ("native") name with umlaut etc.
61
- recs = []
62
-
63
- ## todo/fix: pass along / use country code too
64
- ## note: country code no longer needed in path (is now expected as heading inside the file)
65
-
66
- ## todo/fix: add to teamreader
67
- ## check that name and alt_names for a club are all unique (not duplicates)
68
- datafiles = find_clubs_datafiles( clubs_dir )
69
- datafiles.each do |datafile|
70
- recs += ClubReader.read( datafile )
71
- end
72
-
73
-
74
- clubs = ClubIndex.new
75
- clubs.add( recs )
76
-
77
- if clubs.errors?
78
- puts ""
79
- puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
80
- puts " #{clubs.errors.size} errors:"
81
- pp clubs.errors
82
- ## exit 1
83
- end
84
-
85
- clubs
86
- end # method build_club_index
87
-
88
-
89
-
90
-
91
- def leagues
92
- read_leagues() if @leagues.nil?
93
- @leagues
94
- end
95
-
96
- def read_leagues
97
- #####
98
- # add / read-in leagues config
99
- @leagues = LeagueConfig.new
100
-
101
- self ## return self for chaining
102
- end
103
- end # class Configuration
104
-
105
-
106
-
107
-
108
-
109
- ## lets you use
110
- ## SportDb::Import.configure do |config|
111
- ## config.hello = 'World'
112
- ## end
113
-
114
- def self.configure
115
- yield( config )
116
- end
117
-
118
- def self.config
119
- @config ||= Configuration.new
120
- end
121
-
122
- end # module Import
123
- end # module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class Configuration
8
+
9
+ ##
10
+ ## todo: allow configure of countries_dir like clubs_dir
11
+ ## "fallback" and use a default built-in world/countries.txt
12
+
13
+ ## todo/check: rename to country_mappings/index - why? why not?
14
+ ## or countries_by_code or countries_by_key
15
+ def countries
16
+ @countries ||= build_country_index
17
+ @countries
18
+ end
19
+
20
+ def build_country_index ## todo/check: rename to setup_country_index or read_country_index - why? why not?
21
+ recs = read_csv( "#{SportDb::Boot.data_dir}/world/countries.txt" )
22
+ CountryIndex.new( recs )
23
+ end
24
+
25
+
26
+
27
+ def clubs
28
+ @clubs ||= build_club_index
29
+ @clubs
30
+ end
31
+
32
+ ####
33
+ # todo/fix: find a better way to configure club / team datasets
34
+ attr_accessor :clubs_dir
35
+ def clubs_dir() @clubs_dir ||= './clubs'; end
36
+
37
+
38
+ CLUBS_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
39
+ (?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.txt
40
+ clubs\.txt$
41
+ }x
42
+
43
+ def find_clubs_datafiles( path )
44
+ datafiles = [] ## note: [country, path] pairs for now
45
+
46
+ ## check all txt files as candidates (MUST include country code for now)
47
+ candidates = Dir.glob( "#{path}/**/*.txt" )
48
+ pp candidates
49
+ candidates.each do |candidate|
50
+ datafiles << candidate if CLUBS_REGEX.match( candidate )
51
+ end
52
+
53
+ pp datafiles
54
+ datafiles
55
+ end
56
+
57
+
58
+ def build_club_index
59
+ ## unify team names; team (builtin/known/shared) name mappings
60
+ ## cleanup team names - use local ("native") name with umlaut etc.
61
+ recs = []
62
+
63
+ ## todo/fix: pass along / use country code too
64
+ ## note: country code no longer needed in path (is now expected as heading inside the file)
65
+
66
+ ## todo/fix: add to teamreader
67
+ ## check that name and alt_names for a club are all unique (not duplicates)
68
+ datafiles = find_clubs_datafiles( clubs_dir )
69
+ datafiles.each do |datafile|
70
+ recs += ClubReader.read( datafile )
71
+ end
72
+
73
+
74
+ clubs = ClubIndex.new
75
+ clubs.add( recs )
76
+
77
+ if clubs.errors?
78
+ puts ""
79
+ puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
80
+ puts " #{clubs.errors.size} errors:"
81
+ pp clubs.errors
82
+ ## exit 1
83
+ end
84
+
85
+ clubs
86
+ end # method build_club_index
87
+
88
+
89
+
90
+
91
+ def leagues
92
+ read_leagues() if @leagues.nil?
93
+ @leagues
94
+ end
95
+
96
+ def read_leagues
97
+ #####
98
+ # add / read-in leagues config
99
+ @leagues = LeagueConfig.new
100
+
101
+ self ## return self for chaining
102
+ end
103
+ end # class Configuration
104
+
105
+
106
+
107
+
108
+
109
+ ## lets you use
110
+ ## SportDb::Import.configure do |config|
111
+ ## config.hello = 'World'
112
+ ## end
113
+
114
+ def self.configure
115
+ yield( config )
116
+ end
117
+
118
+ def self.config
119
+ @config ||= Configuration.new
120
+ end
121
+
122
+ end # module Import
123
+ end # module SportDb
@@ -1,118 +1,118 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
- class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
8
-
9
- def initialize
10
-
11
- ## just use leagues without latest for latest - why? why not?
12
- @leagues_latest = {
13
- 'es' => { '1' => 'liga', # spanish liga 1
14
- '2' => 'liga2', # spanish liga 2
15
- },
16
- 'it' => { '1' => 'seriea', # italian serie a
17
- '2' => 'serieb', # italian serie b
18
- },
19
- 'de' => { '1' => 'bundesliga', # german bundesliga
20
- '2' => 'bundesliga2', # german 2. bundesliga
21
- },
22
- 'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
23
- 'be' => { '1' => 'proleague' }, # belgian pro league
24
- 'pt' => { '1' => 'liga' }, # portugese Primeira Liga
25
- 'tr' => { '1' => 'superlig' }, # turkish Süper Lig
26
-
27
- # note: eng now read from txt
28
- # 'eng' => { '1' => 'premierleague', # english premier league
29
- # '2' => 'championship', # english championship league
30
- # '3' => 'league1', # english league 1
31
- # },
32
- }
33
-
34
- ## change history to past or changes/changelog something - why? why not?
35
- @leagues_history = {
36
-
37
- # note: eng now read from txt
38
- # 'eng' => {
39
- # ## until (including) 2003-04 season
40
- # '2003-04' => { '1' => 'premierleague', # english premier league
41
- # '2' => 'division1', # english division 1
42
- # },
43
- # ## until (including) 1991-92} season
44
- # '1991-92' => { '1' => 'division1', # english division 1
45
- # '2' => 'division2', # english division 2
46
- # }
47
- # }
48
- }
49
-
50
- pp @leagues_latest
51
- pp @leagues_history
52
-
53
- %w(eng sco fr gr).each do |country|
54
- hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
55
- pp hash
56
-
57
- hash.each do |season,league_hash|
58
- if season == '*' ## assume latest / default season
59
- @leagues_latest[ country ] = league_hash
60
- else
61
- @leagues_history[ country ] ||= {}
62
- @leagues_history[ country ][ season ] = league_hash
63
- end
64
- end
65
- end
66
-
67
- pp @leagues_latest
68
- pp @leagues_history
69
- end
70
-
71
-
72
-
73
- def basename( league, country:, season: )
74
- ## todo/check: rename league: to key: - why? why not?
75
-
76
- if country.include?( '-' ) ## assume package name e.g. eng-england etc.
77
- ## cut off country code from package name
78
- cc = country.split( '-' )[0] # use first part
79
- else
80
- cc = country
81
- end
82
-
83
- if season
84
- puts " checking season >#{season}<"
85
- ## check history if season is provided / supplied / known
86
- history = @leagues_history[ cc ]
87
- if history
88
- season_start_year = SeasonUtils.start_year( season ).to_i
89
- ##
90
- ## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
91
- history.keys.reverse.each do |key|
92
- history_season_start_year = SeasonUtils.start_year( key ).to_i
93
- puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
94
- if season_start_year <= history_season_start_year
95
- result = history[ key ][ league ]
96
- if result
97
- return "#{league}-#{result}"
98
- else
99
- return nil
100
- end
101
- end
102
- end
103
- end
104
- end
105
-
106
- latest = @leagues_latest[ cc ]
107
- if latest
108
- result = latest[ league ]
109
- return "#{league}-#{result}" if result
110
- end
111
-
112
- nil
113
- end # method basename
114
- end # class LeagueConfig
115
-
116
-
117
- end ## module Import
118
- end ## module SportDb
1
+ # encoding: utf-8
2
+
3
+ module SportDb
4
+ module Import
5
+
6
+
7
+ class LeagueConfig ## use LeagueInfo or LeagueMap or LeagueHash or similar
8
+
9
+ def initialize
10
+
11
+ ## just use leagues without latest for latest - why? why not?
12
+ @leagues_latest = {
13
+ 'es' => { '1' => 'liga', # spanish liga 1
14
+ '2' => 'liga2', # spanish liga 2
15
+ },
16
+ 'it' => { '1' => 'seriea', # italian serie a
17
+ '2' => 'serieb', # italian serie b
18
+ },
19
+ 'de' => { '1' => 'bundesliga', # german bundesliga
20
+ '2' => 'bundesliga2', # german 2. bundesliga
21
+ },
22
+ 'nl' => { '1' => 'eredivisie' }, # dutch eredivisie
23
+ 'be' => { '1' => 'proleague' }, # belgian pro league
24
+ 'pt' => { '1' => 'liga' }, # portugese Primeira Liga
25
+ 'tr' => { '1' => 'superlig' }, # turkish Süper Lig
26
+
27
+ # note: eng now read from txt
28
+ # 'eng' => { '1' => 'premierleague', # english premier league
29
+ # '2' => 'championship', # english championship league
30
+ # '3' => 'league1', # english league 1
31
+ # },
32
+ }
33
+
34
+ ## change history to past or changes/changelog something - why? why not?
35
+ @leagues_history = {
36
+
37
+ # note: eng now read from txt
38
+ # 'eng' => {
39
+ # ## until (including) 2003-04 season
40
+ # '2003-04' => { '1' => 'premierleague', # english premier league
41
+ # '2' => 'division1', # english division 1
42
+ # },
43
+ # ## until (including) 1991-92} season
44
+ # '1991-92' => { '1' => 'division1', # english division 1
45
+ # '2' => 'division2', # english division 2
46
+ # }
47
+ # }
48
+ }
49
+
50
+ pp @leagues_latest
51
+ pp @leagues_history
52
+
53
+ %w(eng sco fr gr).each do |country|
54
+ hash = LeagueReader.read( "#{Boot.data_dir}/leagues/#{country}.txt" )
55
+ pp hash
56
+
57
+ hash.each do |season,league_hash|
58
+ if season == '*' ## assume latest / default season
59
+ @leagues_latest[ country ] = league_hash
60
+ else
61
+ @leagues_history[ country ] ||= {}
62
+ @leagues_history[ country ][ season ] = league_hash
63
+ end
64
+ end
65
+ end
66
+
67
+ pp @leagues_latest
68
+ pp @leagues_history
69
+ end
70
+
71
+
72
+
73
+ def basename( league, country:, season: )
74
+ ## todo/check: rename league: to key: - why? why not?
75
+
76
+ if country.include?( '-' ) ## assume package name e.g. eng-england etc.
77
+ ## cut off country code from package name
78
+ cc = country.split( '-' )[0] # use first part
79
+ else
80
+ cc = country
81
+ end
82
+
83
+ if season
84
+ puts " checking season >#{season}<"
85
+ ## check history if season is provided / supplied / known
86
+ history = @leagues_history[ cc ]
87
+ if history
88
+ season_start_year = SeasonUtils.start_year( season ).to_i
89
+ ##
90
+ ## todo: sorty season keys - why? why not? -- assume reverse chronological order for now
91
+ history.keys.reverse.each do |key|
92
+ history_season_start_year = SeasonUtils.start_year( key ).to_i
93
+ puts " #{season_start_year} <= #{history_season_start_year} - #{season_start_year <= history_season_start_year}"
94
+ if season_start_year <= history_season_start_year
95
+ result = history[ key ][ league ]
96
+ if result
97
+ return "#{league}-#{result}"
98
+ else
99
+ return nil
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
105
+
106
+ latest = @leagues_latest[ cc ]
107
+ if latest
108
+ result = latest[ league ]
109
+ return "#{league}-#{result}" if result
110
+ end
111
+
112
+ nil
113
+ end # method basename
114
+ end # class LeagueConfig
115
+
116
+
117
+ end ## module Import
118
+ end ## module SportDb