sportdb-formats 1.1.6 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +37 -1
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +102 -12
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,178 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
class LeagueIndex
|
7
|
-
|
8
|
-
def self.build( path )
|
9
|
-
pack = Package.new( path ) ## lets us use direcotry or zip archive
|
10
|
-
|
11
|
-
recs = []
|
12
|
-
pack.each_leagues do |entry|
|
13
|
-
recs += League.parse( entry.read )
|
14
|
-
end
|
15
|
-
recs
|
16
|
-
|
17
|
-
leagues = new
|
18
|
-
leagues.add( recs )
|
19
|
-
leagues
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
def catalog() Import.catalog; end
|
24
|
-
|
25
|
-
def initialize
|
26
|
-
@leagues = [] ## leagues by canonical name
|
27
|
-
@leagues_by_name = {}
|
28
|
-
@errors = []
|
29
|
-
end
|
30
|
-
|
31
|
-
attr_reader :errors
|
32
|
-
def errors?() @errors.empty? == false; end
|
33
|
-
|
34
|
-
def mappings() @leagues_by_name; end ## todo/check: rename to index or something - why? why not?
|
35
|
-
def leagues() @leagues.values; end
|
36
|
-
alias_method :all, :leagues ## use ActiveRecord-like alias for leagues
|
37
|
-
|
38
|
-
|
39
|
-
## helpers from club - use a helper module for includes - why? why not?
|
40
|
-
include NameHelper
|
41
|
-
## incl. strip_lang( name )
|
42
|
-
## normalize( name )
|
43
|
-
|
44
|
-
|
45
|
-
def add( rec_or_recs ) ## add club record / alt_names
|
46
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
47
|
-
|
48
|
-
recs.each do |rec|
|
49
|
-
## puts "adding:"
|
50
|
-
## pp rec
|
51
|
-
### step 1) add canonical name
|
52
|
-
@leagues << rec
|
53
|
-
|
54
|
-
## step 2) add all names (canonical name + alt names + alt names (auto))
|
55
|
-
names = [rec.name] + rec.alt_names
|
56
|
-
## check for duplicates - simple check for now - fix/improve
|
57
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
58
|
-
count = names.size
|
59
|
-
count_uniq = names.uniq.size
|
60
|
-
if count != count_uniq
|
61
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
|
62
|
-
pp names
|
63
|
-
pp rec
|
64
|
-
exit 1
|
65
|
-
end
|
66
|
-
|
67
|
-
## todo/fix: move alt_names_auto up for check unique names
|
68
|
-
## e.g. remove/avoid auto-generated duplicates ENG 1, AUT 1, etc
|
69
|
-
names += rec.alt_names_auto
|
70
|
-
|
71
|
-
names.each_with_index do |name,i|
|
72
|
-
## check lang codes e.g. [en], [fr], etc.
|
73
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
74
|
-
name = strip_lang( name )
|
75
|
-
norm = normalize( name )
|
76
|
-
alt_recs = @leagues_by_name[ norm ]
|
77
|
-
if alt_recs
|
78
|
-
## check if include club rec already or is new club rec
|
79
|
-
if alt_recs.include?( rec )
|
80
|
-
## note: do NOT include duplicate club record
|
81
|
-
msg = "** !!! WARN !!! - (norm) name conflict/duplicate for league - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country ? rec.country.key : '?'}<"
|
82
|
-
puts msg
|
83
|
-
@errors << msg
|
84
|
-
else
|
85
|
-
msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country ? alt_recs[0].country.key : '?'}< with >#{rec.name}, #{rec.country ? rec.country.key : '?'}<"
|
86
|
-
puts msg
|
87
|
-
@errors << msg
|
88
|
-
alt_recs << rec
|
89
|
-
end
|
90
|
-
else
|
91
|
-
@leagues_by_name[ norm ] = [rec]
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end # method add
|
96
|
-
|
97
|
-
|
98
|
-
## helper to always convert (possible) country key to existing country record
|
99
|
-
## todo: make private - why? why not?
|
100
|
-
def country( country )
|
101
|
-
if country.is_a?( String ) || country.is_a?( Symbol )
|
102
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
103
|
-
rec = catalog.countries.find( country.to_s )
|
104
|
-
if rec.nil?
|
105
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
106
|
-
exit 1
|
107
|
-
end
|
108
|
-
rec
|
109
|
-
else
|
110
|
-
country ## (re)use country struct - no need to run lookup again
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
def match( name )
|
116
|
-
## note: returns empty array if no match and NOT nil
|
117
|
-
name = normalize( name )
|
118
|
-
@leagues_by_name[ name ] || []
|
119
|
-
end
|
120
|
-
|
121
|
-
def match_by( name:, country: )
|
122
|
-
## note: match must for now always include name
|
123
|
-
m = match( name )
|
124
|
-
if country ## filter by country
|
125
|
-
## note: country assumes / allows the country key or fifa code for now
|
126
|
-
## note: allow passing in of country struct too
|
127
|
-
country_rec = country( country )
|
128
|
-
|
129
|
-
## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
|
130
|
-
m = m.select { |league| league.country &&
|
131
|
-
league.country.key == country_rec.key }
|
132
|
-
end
|
133
|
-
m
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
def find!( name )
|
138
|
-
league = find( name )
|
139
|
-
if league.nil?
|
140
|
-
puts "** !!! ERROR - no league match found for >#{name}<, add to leagues table; sorry"
|
141
|
-
exit 1
|
142
|
-
end
|
143
|
-
league
|
144
|
-
end
|
145
|
-
|
146
|
-
def find( name )
|
147
|
-
league = nil
|
148
|
-
m = match( name )
|
149
|
-
# pp m
|
150
|
-
|
151
|
-
if m.empty?
|
152
|
-
## fall through/do nothing
|
153
|
-
elsif m.size > 1
|
154
|
-
puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
|
155
|
-
pp m
|
156
|
-
exit 1
|
157
|
-
else
|
158
|
-
league = m[0]
|
159
|
-
end
|
160
|
-
|
161
|
-
league
|
162
|
-
end
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
def dump_duplicates # debug helper - report duplicate club name records
|
168
|
-
@leagues_by_name.each do |name, leagues|
|
169
|
-
if leagues.size > 1
|
170
|
-
puts "#{leagues.size} matching leagues duplicates for >#{name}<:"
|
171
|
-
pp leagues
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end # class LeagueIndex
|
176
|
-
|
177
|
-
end # module Import
|
178
|
-
end # module SportDb
|
@@ -1,338 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class ClubIndex
|
8
|
-
|
9
|
-
def self.build( path )
|
10
|
-
pack = Package.new( path ) ## lets us use direcotry or zip archive
|
11
|
-
|
12
|
-
recs = []
|
13
|
-
pack.each_clubs do |entry|
|
14
|
-
recs += Club.parse( entry.read )
|
15
|
-
end
|
16
|
-
recs
|
17
|
-
|
18
|
-
clubs = new
|
19
|
-
clubs.add( recs )
|
20
|
-
|
21
|
-
## add wiki(pedia) anchored links
|
22
|
-
recs = []
|
23
|
-
pack.each_clubs_wiki do |entry|
|
24
|
-
recs += WikiReader.parse( entry.read )
|
25
|
-
end
|
26
|
-
|
27
|
-
pp recs
|
28
|
-
clubs.add_wiki( recs )
|
29
|
-
clubs
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
def catalog() Import.catalog; end
|
34
|
-
|
35
|
-
def initialize
|
36
|
-
@clubs = {} ## clubs (indexed) by canonical name
|
37
|
-
@clubs_by_name = {}
|
38
|
-
@errors = []
|
39
|
-
end
|
40
|
-
|
41
|
-
attr_reader :errors
|
42
|
-
def errors?() @errors.empty? == false; end
|
43
|
-
|
44
|
-
def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
|
45
|
-
def clubs() @clubs.values; end
|
46
|
-
alias_method :all, :clubs ## use ActiveRecord-like alias for clubs
|
47
|
-
|
48
|
-
|
49
|
-
## helpers from club - use a helper module for includes - why? why not?
|
50
|
-
include NameHelper
|
51
|
-
## incl. strip_year( name )
|
52
|
-
## has_year?( name)
|
53
|
-
## strip_lang( name )
|
54
|
-
## normalize( name )
|
55
|
-
|
56
|
-
def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
|
57
|
-
## change/rename to strip_wiki_qualifier or such - why? why not?
|
58
|
-
## note: strip disambiguationn qualifier from wikipedia page name if present
|
59
|
-
## note: only remove year and foot... for now
|
60
|
-
## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
|
61
|
-
## Willem II (football club) => Willem II
|
62
|
-
##
|
63
|
-
## e.g. do NOT strip others !! e.g.
|
64
|
-
## América Futebol Clube (MG)
|
65
|
-
## only add more "special" cases on demand (that, is) if we find more
|
66
|
-
name = name.gsub( /\([12][^\)]+?\)/, '' ).strip ## starting with a digit 1 or 2 (assuming year)
|
67
|
-
name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip ## starting with foot (assuming football ...)
|
68
|
-
name
|
69
|
-
end
|
70
|
-
|
71
|
-
def add_wiki( rec_or_recs ) ## add wiki(pedia club record / links
|
72
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
73
|
-
|
74
|
-
recs.each do |rec|
|
75
|
-
## note: strip qualifier () from wikipedia page name if present
|
76
|
-
## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
|
77
|
-
## Willem II (football club) => Willem II
|
78
|
-
##
|
79
|
-
## e.g. do NOT strip others !! e.g.
|
80
|
-
## América Futebol Clube (MG)
|
81
|
-
## only add more "special" cases on demand (that, is) if we find more
|
82
|
-
name = strip_wiki( rec.name )
|
83
|
-
|
84
|
-
m = match_by( name: name, country: rec.country )
|
85
|
-
if m.nil?
|
86
|
-
puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs"
|
87
|
-
exit 1
|
88
|
-
end
|
89
|
-
if m.size > 1
|
90
|
-
puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<"
|
91
|
-
pp m
|
92
|
-
exit 1
|
93
|
-
end
|
94
|
-
club = m[0]
|
95
|
-
club.wikipedia = rec.name
|
96
|
-
end
|
97
|
-
end # method add_wiki
|
98
|
-
|
99
|
-
|
100
|
-
def add( rec_or_recs ) ## add club record / alt_names
|
101
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
102
|
-
|
103
|
-
recs.each do |rec|
|
104
|
-
## puts "adding:"
|
105
|
-
## pp rec
|
106
|
-
### step 1) add canonical name
|
107
|
-
old_rec = @clubs[ rec.name ]
|
108
|
-
if old_rec
|
109
|
-
puts "** !!! ERROR !!! - (canonical) name conflict - duplicate - >#{rec.name}< will overwrite >#{old_rec.name}<:"
|
110
|
-
pp old_rec
|
111
|
-
pp rec
|
112
|
-
exit 1
|
113
|
-
else
|
114
|
-
@clubs[ rec.name ] = rec
|
115
|
-
end
|
116
|
-
|
117
|
-
## step 2) add all names (canonical name + alt names + alt names (auto))
|
118
|
-
names = [rec.name] + rec.alt_names
|
119
|
-
more_names = []
|
120
|
-
## check "hand-typed" names for year (auto-add)
|
121
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
122
|
-
## (1946-2001,2013-) etc.
|
123
|
-
names.each do |name|
|
124
|
-
if has_year?( name )
|
125
|
-
more_names << strip_year( name )
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
names += more_names
|
130
|
-
## check for duplicates - simple check for now - fix/improve
|
131
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
132
|
-
count = names.size
|
133
|
-
count_uniq = names.uniq.size
|
134
|
-
if count != count_uniq
|
135
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
|
136
|
-
pp names
|
137
|
-
pp rec
|
138
|
-
exit 1
|
139
|
-
end
|
140
|
-
|
141
|
-
## check with auto-names just warn for now and do not exit
|
142
|
-
names += rec.alt_names_auto
|
143
|
-
count = names.size
|
144
|
-
count_uniq = names.uniq.size
|
145
|
-
if count != count_uniq
|
146
|
-
puts "** !!! WARN !!! - #{count-count_uniq} duplicate name(s):"
|
147
|
-
pp names
|
148
|
-
pp rec
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
names.each_with_index do |name,i|
|
153
|
-
## check lang codes e.g. [en], [fr], etc.
|
154
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
155
|
-
name = strip_lang( name )
|
156
|
-
norm = normalize( name )
|
157
|
-
alt_recs = @clubs_by_name[ norm ]
|
158
|
-
if alt_recs
|
159
|
-
## check if include club rec already or is new club rec
|
160
|
-
if alt_recs.include?( rec )
|
161
|
-
## note: do NOT include duplicate club record
|
162
|
-
msg = "** !!! WARN !!! - (norm) name conflict/duplicate for club - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country.name}<"
|
163
|
-
puts msg
|
164
|
-
@errors << msg
|
165
|
-
else
|
166
|
-
msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country.name}< with >#{rec.name}, #{rec.country.name}<"
|
167
|
-
puts msg
|
168
|
-
@errors << msg
|
169
|
-
alt_recs << rec
|
170
|
-
end
|
171
|
-
else
|
172
|
-
@clubs_by_name[ norm ] = [rec]
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end # method add
|
177
|
-
|
178
|
-
|
179
|
-
## todo/fix/check: use rename to find_canon or find_canonical() or something??
|
180
|
-
## remove (getting used?) - why? why not?
|
181
|
-
def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
|
182
|
-
puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
|
183
|
-
@clubs[ name ]
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
def match( name )
|
188
|
-
# note: returns empty array (e.g. []) if no match and NOT nil
|
189
|
-
name = normalize( name )
|
190
|
-
m = @clubs_by_name[ name ] || []
|
191
|
-
|
192
|
-
## no match - retry with unaccented variant if different
|
193
|
-
## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
|
194
|
-
## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
|
195
|
-
if m.empty?
|
196
|
-
name2 = unaccent( name )
|
197
|
-
if name2 != name
|
198
|
-
m = @clubs_by_name[ name2 ] || []
|
199
|
-
end
|
200
|
-
end
|
201
|
-
m
|
202
|
-
end
|
203
|
-
|
204
|
-
|
205
|
-
## helper to always convert (possible) country key to existing country record
|
206
|
-
## todo: make private - why? why not?
|
207
|
-
def country( country )
|
208
|
-
if country.is_a?( String ) || country.is_a?( Symbol )
|
209
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
210
|
-
rec = catalog.countries.find( country.to_s )
|
211
|
-
if rec.nil?
|
212
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
213
|
-
exit 1
|
214
|
-
end
|
215
|
-
rec
|
216
|
-
else
|
217
|
-
country ## (re)use country struct - no need to run lookup again
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
|
222
|
-
## match - always returns an array (with one or more matches) or nil
|
223
|
-
def match_by( name:, country: nil )
|
224
|
-
## note: allow passing in of country key too (auto-counvert)
|
225
|
-
## and country struct too
|
226
|
-
## - country assumes / allows the country key or fifa code for now
|
227
|
-
m = match( name )
|
228
|
-
|
229
|
-
if country
|
230
|
-
country = country( country )
|
231
|
-
|
232
|
-
## note: match must for now always include name
|
233
|
-
## filter by country
|
234
|
-
m = m.select { |club| club.country.key == country.key }
|
235
|
-
end
|
236
|
-
m
|
237
|
-
end
|
238
|
-
|
239
|
-
def find( name ) find_by( name: name, country: nil ); end
|
240
|
-
def find!( name ) find_by!( name: name, country: nil ); end
|
241
|
-
|
242
|
-
## find - always returns a single record / match or nil
|
243
|
-
## if there is more than one match than find aborts / fails
|
244
|
-
def find_by!( name:, country: nil ) ## todo/fix: add international or league flag?
|
245
|
-
club = find_by( name: name, country: country )
|
246
|
-
|
247
|
-
if club.nil?
|
248
|
-
puts "** !!! ERROR - no match for club >#{name}<"
|
249
|
-
exit 1
|
250
|
-
end
|
251
|
-
|
252
|
-
club
|
253
|
-
end
|
254
|
-
|
255
|
-
|
256
|
-
def find_by( name:, country: nil ) ## todo/fix: add international or league flag?
|
257
|
-
## note: allow passing in of country key too (auto-counvert)
|
258
|
-
## and country struct too
|
259
|
-
## - country assumes / allows the country key or fifa code for now
|
260
|
-
m = nil
|
261
|
-
|
262
|
-
if country
|
263
|
-
country = country( country )
|
264
|
-
|
265
|
-
m = match_by( name: name, country: country )
|
266
|
-
|
267
|
-
if m.empty?
|
268
|
-
## (re)try with second country - quick hacks for known leagues
|
269
|
-
## todo/fix: add league flag to activate!!! - why? why not
|
270
|
-
m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
|
271
|
-
m = match_by( name: name, country: 'eng' ) if country.key == 'sco'
|
272
|
-
m = match_by( name: name, country: 'nir' ) if country.key == 'ie'
|
273
|
-
m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
|
274
|
-
m = match_by( name: name, country: 'li' ) if country.key == 'ch'
|
275
|
-
m = match_by( name: name, country: 'ca' ) if country.key == 'us'
|
276
|
-
m = match_by( name: name, country: 'nz' ) if country.key == 'au'
|
277
|
-
end
|
278
|
-
else ## try "global" search - no country passed in
|
279
|
-
m = match( name )
|
280
|
-
end
|
281
|
-
|
282
|
-
|
283
|
-
club = nil
|
284
|
-
if m.empty?
|
285
|
-
## puts "** !!! WARN !!! no match for club >#{name}<"
|
286
|
-
elsif m.size > 1
|
287
|
-
puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
|
288
|
-
pp m
|
289
|
-
exit 1
|
290
|
-
else # bingo; match - assume size == 1
|
291
|
-
club = m[0]
|
292
|
-
end
|
293
|
-
|
294
|
-
club
|
295
|
-
end
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
def build_mods( mods )
|
300
|
-
## e.g.
|
301
|
-
## { 'Arsenal | Arsenal FC' => 'Arsenal, ENG',
|
302
|
-
## 'Liverpool | Liverpool FC' => 'Liverpool, ENG',
|
303
|
-
## 'Barcelona' => 'Barcelona, ESP',
|
304
|
-
## 'Valencia' => 'Valencia, ESP' }
|
305
|
-
|
306
|
-
mods.reduce({}) do |h,(club_names, club_line)|
|
307
|
-
|
308
|
-
values = club_line.split( ',' )
|
309
|
-
values = values.map { |value| value.strip } ## strip all spaces
|
310
|
-
|
311
|
-
## todo/fix: make sure country is present !!!!
|
312
|
-
club_name, country_name = values
|
313
|
-
club = find_by!( name: club_name, country: country_name )
|
314
|
-
|
315
|
-
values = club_names.split( '|' )
|
316
|
-
values = values.map { |value| value.strip } ## strip all spaces
|
317
|
-
|
318
|
-
values.each do |club_name|
|
319
|
-
h[club_name] = club
|
320
|
-
end
|
321
|
-
h
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
|
-
|
326
|
-
def dump_duplicates # debug helper - report duplicate club name records
|
327
|
-
@clubs_by_name.each do |name, clubs|
|
328
|
-
if clubs.size > 1
|
329
|
-
puts "#{clubs.size} matching club duplicates for >#{name}<:"
|
330
|
-
pp clubs
|
331
|
-
end
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end # class ClubIndex
|
335
|
-
|
336
|
-
|
337
|
-
end # module Import
|
338
|
-
end # module SportDb
|
@@ -1,114 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
class NationalTeamIndex
|
6
|
-
|
7
|
-
attr_reader :teams ## all (national) team records
|
8
|
-
|
9
|
-
def initialize( recs )
|
10
|
-
@teams = []
|
11
|
-
@teams_by_code = {}
|
12
|
-
@teams_by_name = {}
|
13
|
-
|
14
|
-
add( recs )
|
15
|
-
end
|
16
|
-
|
17
|
-
include NameHelper
|
18
|
-
## incl. strip_year( name )
|
19
|
-
## has_year?( name)
|
20
|
-
## strip_lang( name )
|
21
|
-
## normalize( name )
|
22
|
-
|
23
|
-
|
24
|
-
def add( recs )
|
25
|
-
###########################################
|
26
|
-
## auto-fill national teams
|
27
|
-
## pp recs
|
28
|
-
recs.each do |rec|
|
29
|
-
@teams << rec
|
30
|
-
|
31
|
-
## add fifa code lookup
|
32
|
-
if @teams_by_code[ rec.code.downcase ]
|
33
|
-
puts "** !! ERROR !! national team code (code) >#{rec.code}< already exits!!"
|
34
|
-
exit 1
|
35
|
-
else
|
36
|
-
@teams_by_code[ rec.code.downcase ] = rec
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
## add all names (canonical name + alt names
|
41
|
-
names = [rec.name] + rec.alt_names
|
42
|
-
more_names = []
|
43
|
-
## check "hand-typed" names for year (auto-add)
|
44
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
45
|
-
## (1946-2001,2013-) etc.
|
46
|
-
names.each do |name|
|
47
|
-
if has_year?( name )
|
48
|
-
more_names << strip_year( name )
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
names += more_names
|
53
|
-
## check for duplicates - simple check for now - fix/improve
|
54
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
55
|
-
count = names.size
|
56
|
-
count_uniq = names.uniq.size
|
57
|
-
if count != count_uniq
|
58
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s) in national teams:"
|
59
|
-
pp names
|
60
|
-
pp rec
|
61
|
-
exit 1
|
62
|
-
end
|
63
|
-
|
64
|
-
names.each_with_index do |name,i|
|
65
|
-
## check lang codes e.g. [en], [fr], etc.
|
66
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
67
|
-
name = strip_lang( name )
|
68
|
-
norm = normalize( name )
|
69
|
-
old_rec = @teams_by_name[ norm ]
|
70
|
-
if old_rec
|
71
|
-
## check if tame name already is included or is new team rec
|
72
|
-
msg = "** !!! ERROR !!! - national team name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
|
73
|
-
puts msg
|
74
|
-
exit 1
|
75
|
-
else
|
76
|
-
@teams_by_name[ norm ] = rec
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end ## each record
|
80
|
-
end # method initialize
|
81
|
-
|
82
|
-
## fix/todo: add find_by (alias for find_by_name/find_by_code)
|
83
|
-
def find_by_code( code )
|
84
|
-
code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
|
85
|
-
@teams_by_code[ code ]
|
86
|
-
end
|
87
|
-
|
88
|
-
def find_by_name( name )
|
89
|
-
name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first)
|
90
|
-
@teams_by_name[ name ]
|
91
|
-
end
|
92
|
-
|
93
|
-
def find( q )
|
94
|
-
## check longest match first (assume name is longer than code)
|
95
|
-
## try lookup / find by (normalized) name first
|
96
|
-
team = find_by_name( q )
|
97
|
-
team = find_by_code( q ) if team.nil?
|
98
|
-
team
|
99
|
-
end
|
100
|
-
|
101
|
-
def find!( q )
|
102
|
-
team = find( q )
|
103
|
-
if team.nil?
|
104
|
-
puts "** !!! ERROR - no match for national team >#{q}< found"
|
105
|
-
exit 1
|
106
|
-
end
|
107
|
-
team
|
108
|
-
end
|
109
|
-
end # class NationalTeamIndex
|
110
|
-
|
111
|
-
|
112
|
-
end # module Import
|
113
|
-
end # module SportDb
|
114
|
-
|
@@ -1,43 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class TeamIndex
|
7
|
-
## note: "virtual" index lets you search clubs and/or national_teams (don't care)
|
8
|
-
|
9
|
-
def catalog() Import.catalog; end
|
10
|
-
|
11
|
-
## todo/check: rename to/use map_by! for array version - why? why not?
|
12
|
-
def find_by!( name:, league:, mods: nil )
|
13
|
-
if name.is_a?( Array )
|
14
|
-
recs = []
|
15
|
-
name.each do |q|
|
16
|
-
recs << __find_by!( name: q, league: league, mods: mods )
|
17
|
-
end
|
18
|
-
recs
|
19
|
-
else ## assume single name
|
20
|
-
__find_by!( name: name, league: league, mods: mods )
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def __find_by!( name:, league:, mods: nil )
|
25
|
-
if mods && mods[ league.key ] && mods[ league.key ][ name ]
|
26
|
-
mods[ league.key ][ name ]
|
27
|
-
else
|
28
|
-
if league.clubs?
|
29
|
-
if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
30
|
-
catalog.clubs.find!( name )
|
31
|
-
else ## assume clubs in domestic/national league tournament
|
32
|
-
catalog.clubs.find_by!( name: name, country: league.country )
|
33
|
-
end
|
34
|
-
else ## assume national teams (not clubs)
|
35
|
-
catalog.national_teams.find!( name )
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end # method __find_by!
|
39
|
-
|
40
|
-
end # class TeamIndex
|
41
|
-
|
42
|
-
end # module Import
|
43
|
-
end # module SportDb
|