sportdb-formats 1.1.6 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +4 -25
- data/Rakefile +1 -1
- data/lib/sportdb/formats/country/country_reader.rb +142 -142
- data/lib/sportdb/formats/datafile.rb +59 -59
- data/lib/sportdb/formats/event/event_reader.rb +184 -183
- data/lib/sportdb/formats/goals.rb +37 -1
- data/lib/sportdb/formats/ground/ground_reader.rb +289 -0
- data/lib/sportdb/formats/league/league_reader.rb +152 -168
- data/lib/sportdb/formats/lines_reader.rb +47 -0
- data/lib/sportdb/formats/match/match_parser.rb +102 -12
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +270 -202
- data/lib/sportdb/formats/outline_reader.rb +0 -1
- data/lib/sportdb/formats/package.rb +394 -374
- data/lib/sportdb/formats/search/sport.rb +357 -0
- data/lib/sportdb/formats/search/world.rb +139 -0
- data/lib/sportdb/formats/team/club_index_history.rb +134 -134
- data/lib/sportdb/formats/team/club_reader.rb +318 -350
- data/lib/sportdb/formats/team/club_reader_history.rb +203 -203
- data/lib/sportdb/formats/team/wiki_reader.rb +108 -108
- data/lib/sportdb/formats/version.rb +4 -7
- data/lib/sportdb/formats.rb +60 -27
- metadata +13 -35
- data/lib/sportdb/formats/country/country_index.rb +0 -192
- data/lib/sportdb/formats/event/event_index.rb +0 -141
- data/lib/sportdb/formats/league/league_index.rb +0 -178
- data/lib/sportdb/formats/team/club_index.rb +0 -338
- data/lib/sportdb/formats/team/national_team_index.rb +0 -114
- data/lib/sportdb/formats/team/team_index.rb +0 -43
- data/test/helper.rb +0 -132
- data/test/test_club_index.rb +0 -183
- data/test/test_club_index_history.rb +0 -107
- data/test/test_club_reader.rb +0 -201
- data/test/test_club_reader_history.rb +0 -212
- data/test/test_club_reader_props.rb +0 -54
- data/test/test_country_index.rb +0 -63
- data/test/test_country_reader.rb +0 -89
- data/test/test_datafile.rb +0 -30
- data/test/test_datafile_package.rb +0 -46
- data/test/test_goals.rb +0 -113
- data/test/test_league_index.rb +0 -157
- data/test/test_league_outline_reader.rb +0 -55
- data/test/test_league_reader.rb +0 -72
- data/test/test_outline_reader.rb +0 -31
- data/test/test_package.rb +0 -78
- data/test/test_package_match.rb +0 -102
- data/test/test_regex.rb +0 -67
- data/test/test_wiki_reader.rb +0 -77
@@ -1,178 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
class LeagueIndex
|
7
|
-
|
8
|
-
def self.build( path )
|
9
|
-
pack = Package.new( path ) ## lets us use direcotry or zip archive
|
10
|
-
|
11
|
-
recs = []
|
12
|
-
pack.each_leagues do |entry|
|
13
|
-
recs += League.parse( entry.read )
|
14
|
-
end
|
15
|
-
recs
|
16
|
-
|
17
|
-
leagues = new
|
18
|
-
leagues.add( recs )
|
19
|
-
leagues
|
20
|
-
end
|
21
|
-
|
22
|
-
|
23
|
-
def catalog() Import.catalog; end
|
24
|
-
|
25
|
-
def initialize
|
26
|
-
@leagues = [] ## leagues by canonical name
|
27
|
-
@leagues_by_name = {}
|
28
|
-
@errors = []
|
29
|
-
end
|
30
|
-
|
31
|
-
attr_reader :errors
|
32
|
-
def errors?() @errors.empty? == false; end
|
33
|
-
|
34
|
-
def mappings() @leagues_by_name; end ## todo/check: rename to index or something - why? why not?
|
35
|
-
def leagues() @leagues.values; end
|
36
|
-
alias_method :all, :leagues ## use ActiveRecord-like alias for leagues
|
37
|
-
|
38
|
-
|
39
|
-
## helpers from club - use a helper module for includes - why? why not?
|
40
|
-
include NameHelper
|
41
|
-
## incl. strip_lang( name )
|
42
|
-
## normalize( name )
|
43
|
-
|
44
|
-
|
45
|
-
def add( rec_or_recs ) ## add club record / alt_names
|
46
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
47
|
-
|
48
|
-
recs.each do |rec|
|
49
|
-
## puts "adding:"
|
50
|
-
## pp rec
|
51
|
-
### step 1) add canonical name
|
52
|
-
@leagues << rec
|
53
|
-
|
54
|
-
## step 2) add all names (canonical name + alt names + alt names (auto))
|
55
|
-
names = [rec.name] + rec.alt_names
|
56
|
-
## check for duplicates - simple check for now - fix/improve
|
57
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
58
|
-
count = names.size
|
59
|
-
count_uniq = names.uniq.size
|
60
|
-
if count != count_uniq
|
61
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
|
62
|
-
pp names
|
63
|
-
pp rec
|
64
|
-
exit 1
|
65
|
-
end
|
66
|
-
|
67
|
-
## todo/fix: move alt_names_auto up for check unique names
|
68
|
-
## e.g. remove/avoid auto-generated duplicates ENG 1, AUT 1, etc
|
69
|
-
names += rec.alt_names_auto
|
70
|
-
|
71
|
-
names.each_with_index do |name,i|
|
72
|
-
## check lang codes e.g. [en], [fr], etc.
|
73
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
74
|
-
name = strip_lang( name )
|
75
|
-
norm = normalize( name )
|
76
|
-
alt_recs = @leagues_by_name[ norm ]
|
77
|
-
if alt_recs
|
78
|
-
## check if include club rec already or is new club rec
|
79
|
-
if alt_recs.include?( rec )
|
80
|
-
## note: do NOT include duplicate club record
|
81
|
-
msg = "** !!! WARN !!! - (norm) name conflict/duplicate for league - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country ? rec.country.key : '?'}<"
|
82
|
-
puts msg
|
83
|
-
@errors << msg
|
84
|
-
else
|
85
|
-
msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country ? alt_recs[0].country.key : '?'}< with >#{rec.name}, #{rec.country ? rec.country.key : '?'}<"
|
86
|
-
puts msg
|
87
|
-
@errors << msg
|
88
|
-
alt_recs << rec
|
89
|
-
end
|
90
|
-
else
|
91
|
-
@leagues_by_name[ norm ] = [rec]
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end # method add
|
96
|
-
|
97
|
-
|
98
|
-
## helper to always convert (possible) country key to existing country record
|
99
|
-
## todo: make private - why? why not?
|
100
|
-
def country( country )
|
101
|
-
if country.is_a?( String ) || country.is_a?( Symbol )
|
102
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
103
|
-
rec = catalog.countries.find( country.to_s )
|
104
|
-
if rec.nil?
|
105
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
106
|
-
exit 1
|
107
|
-
end
|
108
|
-
rec
|
109
|
-
else
|
110
|
-
country ## (re)use country struct - no need to run lookup again
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
def match( name )
|
116
|
-
## note: returns empty array if no match and NOT nil
|
117
|
-
name = normalize( name )
|
118
|
-
@leagues_by_name[ name ] || []
|
119
|
-
end
|
120
|
-
|
121
|
-
def match_by( name:, country: )
|
122
|
-
## note: match must for now always include name
|
123
|
-
m = match( name )
|
124
|
-
if country ## filter by country
|
125
|
-
## note: country assumes / allows the country key or fifa code for now
|
126
|
-
## note: allow passing in of country struct too
|
127
|
-
country_rec = country( country )
|
128
|
-
|
129
|
-
## note: also skip international leagues & cups (e.g. champions league etc.) for now - why? why not?
|
130
|
-
m = m.select { |league| league.country &&
|
131
|
-
league.country.key == country_rec.key }
|
132
|
-
end
|
133
|
-
m
|
134
|
-
end
|
135
|
-
|
136
|
-
|
137
|
-
def find!( name )
|
138
|
-
league = find( name )
|
139
|
-
if league.nil?
|
140
|
-
puts "** !!! ERROR - no league match found for >#{name}<, add to leagues table; sorry"
|
141
|
-
exit 1
|
142
|
-
end
|
143
|
-
league
|
144
|
-
end
|
145
|
-
|
146
|
-
def find( name )
|
147
|
-
league = nil
|
148
|
-
m = match( name )
|
149
|
-
# pp m
|
150
|
-
|
151
|
-
if m.empty?
|
152
|
-
## fall through/do nothing
|
153
|
-
elsif m.size > 1
|
154
|
-
puts "** !!! ERROR - ambigious league name; too many leagues (#{m.size}) found:"
|
155
|
-
pp m
|
156
|
-
exit 1
|
157
|
-
else
|
158
|
-
league = m[0]
|
159
|
-
end
|
160
|
-
|
161
|
-
league
|
162
|
-
end
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
def dump_duplicates # debug helper - report duplicate club name records
|
168
|
-
@leagues_by_name.each do |name, leagues|
|
169
|
-
if leagues.size > 1
|
170
|
-
puts "#{leagues.size} matching leagues duplicates for >#{name}<:"
|
171
|
-
pp leagues
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end # class LeagueIndex
|
176
|
-
|
177
|
-
end # module Import
|
178
|
-
end # module SportDb
|
@@ -1,338 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
class ClubIndex
|
8
|
-
|
9
|
-
def self.build( path )
|
10
|
-
pack = Package.new( path ) ## lets us use direcotry or zip archive
|
11
|
-
|
12
|
-
recs = []
|
13
|
-
pack.each_clubs do |entry|
|
14
|
-
recs += Club.parse( entry.read )
|
15
|
-
end
|
16
|
-
recs
|
17
|
-
|
18
|
-
clubs = new
|
19
|
-
clubs.add( recs )
|
20
|
-
|
21
|
-
## add wiki(pedia) anchored links
|
22
|
-
recs = []
|
23
|
-
pack.each_clubs_wiki do |entry|
|
24
|
-
recs += WikiReader.parse( entry.read )
|
25
|
-
end
|
26
|
-
|
27
|
-
pp recs
|
28
|
-
clubs.add_wiki( recs )
|
29
|
-
clubs
|
30
|
-
end
|
31
|
-
|
32
|
-
|
33
|
-
def catalog() Import.catalog; end
|
34
|
-
|
35
|
-
def initialize
|
36
|
-
@clubs = {} ## clubs (indexed) by canonical name
|
37
|
-
@clubs_by_name = {}
|
38
|
-
@errors = []
|
39
|
-
end
|
40
|
-
|
41
|
-
attr_reader :errors
|
42
|
-
def errors?() @errors.empty? == false; end
|
43
|
-
|
44
|
-
def mappings() @clubs_by_name; end ## todo/check: rename to index or something - why? why not?
|
45
|
-
def clubs() @clubs.values; end
|
46
|
-
alias_method :all, :clubs ## use ActiveRecord-like alias for clubs
|
47
|
-
|
48
|
-
|
49
|
-
## helpers from club - use a helper module for includes - why? why not?
|
50
|
-
include NameHelper
|
51
|
-
## incl. strip_year( name )
|
52
|
-
## has_year?( name)
|
53
|
-
## strip_lang( name )
|
54
|
-
## normalize( name )
|
55
|
-
|
56
|
-
def strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
|
57
|
-
## change/rename to strip_wiki_qualifier or such - why? why not?
|
58
|
-
## note: strip disambiguationn qualifier from wikipedia page name if present
|
59
|
-
## note: only remove year and foot... for now
|
60
|
-
## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
|
61
|
-
## Willem II (football club) => Willem II
|
62
|
-
##
|
63
|
-
## e.g. do NOT strip others !! e.g.
|
64
|
-
## América Futebol Clube (MG)
|
65
|
-
## only add more "special" cases on demand (that, is) if we find more
|
66
|
-
name = name.gsub( /\([12][^\)]+?\)/, '' ).strip ## starting with a digit 1 or 2 (assuming year)
|
67
|
-
name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip ## starting with foot (assuming football ...)
|
68
|
-
name
|
69
|
-
end
|
70
|
-
|
71
|
-
def add_wiki( rec_or_recs ) ## add wiki(pedia club record / links
|
72
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
73
|
-
|
74
|
-
recs.each do |rec|
|
75
|
-
## note: strip qualifier () from wikipedia page name if present
|
76
|
-
## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
|
77
|
-
## Willem II (football club) => Willem II
|
78
|
-
##
|
79
|
-
## e.g. do NOT strip others !! e.g.
|
80
|
-
## América Futebol Clube (MG)
|
81
|
-
## only add more "special" cases on demand (that, is) if we find more
|
82
|
-
name = strip_wiki( rec.name )
|
83
|
-
|
84
|
-
m = match_by( name: name, country: rec.country )
|
85
|
-
if m.nil?
|
86
|
-
puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs"
|
87
|
-
exit 1
|
88
|
-
end
|
89
|
-
if m.size > 1
|
90
|
-
puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{name}, #{rec.country.name} (#{rec.country.key})<"
|
91
|
-
pp m
|
92
|
-
exit 1
|
93
|
-
end
|
94
|
-
club = m[0]
|
95
|
-
club.wikipedia = rec.name
|
96
|
-
end
|
97
|
-
end # method add_wiki
|
98
|
-
|
99
|
-
|
100
|
-
def add( rec_or_recs ) ## add club record / alt_names
|
101
|
-
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
102
|
-
|
103
|
-
recs.each do |rec|
|
104
|
-
## puts "adding:"
|
105
|
-
## pp rec
|
106
|
-
### step 1) add canonical name
|
107
|
-
old_rec = @clubs[ rec.name ]
|
108
|
-
if old_rec
|
109
|
-
puts "** !!! ERROR !!! - (canonical) name conflict - duplicate - >#{rec.name}< will overwrite >#{old_rec.name}<:"
|
110
|
-
pp old_rec
|
111
|
-
pp rec
|
112
|
-
exit 1
|
113
|
-
else
|
114
|
-
@clubs[ rec.name ] = rec
|
115
|
-
end
|
116
|
-
|
117
|
-
## step 2) add all names (canonical name + alt names + alt names (auto))
|
118
|
-
names = [rec.name] + rec.alt_names
|
119
|
-
more_names = []
|
120
|
-
## check "hand-typed" names for year (auto-add)
|
121
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
122
|
-
## (1946-2001,2013-) etc.
|
123
|
-
names.each do |name|
|
124
|
-
if has_year?( name )
|
125
|
-
more_names << strip_year( name )
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
names += more_names
|
130
|
-
## check for duplicates - simple check for now - fix/improve
|
131
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
132
|
-
count = names.size
|
133
|
-
count_uniq = names.uniq.size
|
134
|
-
if count != count_uniq
|
135
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s):"
|
136
|
-
pp names
|
137
|
-
pp rec
|
138
|
-
exit 1
|
139
|
-
end
|
140
|
-
|
141
|
-
## check with auto-names just warn for now and do not exit
|
142
|
-
names += rec.alt_names_auto
|
143
|
-
count = names.size
|
144
|
-
count_uniq = names.uniq.size
|
145
|
-
if count != count_uniq
|
146
|
-
puts "** !!! WARN !!! - #{count-count_uniq} duplicate name(s):"
|
147
|
-
pp names
|
148
|
-
pp rec
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
names.each_with_index do |name,i|
|
153
|
-
## check lang codes e.g. [en], [fr], etc.
|
154
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
155
|
-
name = strip_lang( name )
|
156
|
-
norm = normalize( name )
|
157
|
-
alt_recs = @clubs_by_name[ norm ]
|
158
|
-
if alt_recs
|
159
|
-
## check if include club rec already or is new club rec
|
160
|
-
if alt_recs.include?( rec )
|
161
|
-
## note: do NOT include duplicate club record
|
162
|
-
msg = "** !!! WARN !!! - (norm) name conflict/duplicate for club - >#{name}< normalized to >#{norm}< already included >#{rec.name}, #{rec.country.name}<"
|
163
|
-
puts msg
|
164
|
-
@errors << msg
|
165
|
-
else
|
166
|
-
msg = "** !!! WARN !!! - name conflict/duplicate - >#{name}< will overwrite >#{alt_recs[0].name}, #{alt_recs[0].country.name}< with >#{rec.name}, #{rec.country.name}<"
|
167
|
-
puts msg
|
168
|
-
@errors << msg
|
169
|
-
alt_recs << rec
|
170
|
-
end
|
171
|
-
else
|
172
|
-
@clubs_by_name[ norm ] = [rec]
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
176
|
-
end # method add
|
177
|
-
|
178
|
-
|
179
|
-
## todo/fix/check: use rename to find_canon or find_canonical() or something??
|
180
|
-
## remove (getting used?) - why? why not?
|
181
|
-
def []( name ) ## lookup by canoncial name only; todo/fix: add find alias why? why not?
|
182
|
-
puts "WARN!! do not use ClubIndex#[] for lookup >#{name}< - will get removed!!!"
|
183
|
-
@clubs[ name ]
|
184
|
-
end
|
185
|
-
|
186
|
-
|
187
|
-
def match( name )
|
188
|
-
# note: returns empty array (e.g. []) if no match and NOT nil
|
189
|
-
name = normalize( name )
|
190
|
-
m = @clubs_by_name[ name ] || []
|
191
|
-
|
192
|
-
## no match - retry with unaccented variant if different
|
193
|
-
## e.g. example is Preussen Münster (with mixed accent and unaccented letters) that would go unmatched for now
|
194
|
-
## Preussen Münster => preussenmünster (norm) => preussenmunster (norm+unaccent)
|
195
|
-
if m.empty?
|
196
|
-
name2 = unaccent( name )
|
197
|
-
if name2 != name
|
198
|
-
m = @clubs_by_name[ name2 ] || []
|
199
|
-
end
|
200
|
-
end
|
201
|
-
m
|
202
|
-
end
|
203
|
-
|
204
|
-
|
205
|
-
## helper to always convert (possible) country key to existing country record
|
206
|
-
## todo: make private - why? why not?
|
207
|
-
def country( country )
|
208
|
-
if country.is_a?( String ) || country.is_a?( Symbol )
|
209
|
-
## note: use own "global" countries index setting for ClubIndex - why? why not?
|
210
|
-
rec = catalog.countries.find( country.to_s )
|
211
|
-
if rec.nil?
|
212
|
-
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
213
|
-
exit 1
|
214
|
-
end
|
215
|
-
rec
|
216
|
-
else
|
217
|
-
country ## (re)use country struct - no need to run lookup again
|
218
|
-
end
|
219
|
-
end
|
220
|
-
|
221
|
-
|
222
|
-
## match - always returns an array (with one or more matches) or nil
|
223
|
-
def match_by( name:, country: nil )
|
224
|
-
## note: allow passing in of country key too (auto-counvert)
|
225
|
-
## and country struct too
|
226
|
-
## - country assumes / allows the country key or fifa code for now
|
227
|
-
m = match( name )
|
228
|
-
|
229
|
-
if country
|
230
|
-
country = country( country )
|
231
|
-
|
232
|
-
## note: match must for now always include name
|
233
|
-
## filter by country
|
234
|
-
m = m.select { |club| club.country.key == country.key }
|
235
|
-
end
|
236
|
-
m
|
237
|
-
end
|
238
|
-
|
239
|
-
def find( name ) find_by( name: name, country: nil ); end
|
240
|
-
def find!( name ) find_by!( name: name, country: nil ); end
|
241
|
-
|
242
|
-
## find - always returns a single record / match or nil
|
243
|
-
## if there is more than one match than find aborts / fails
|
244
|
-
def find_by!( name:, country: nil ) ## todo/fix: add international or league flag?
|
245
|
-
club = find_by( name: name, country: country )
|
246
|
-
|
247
|
-
if club.nil?
|
248
|
-
puts "** !!! ERROR - no match for club >#{name}<"
|
249
|
-
exit 1
|
250
|
-
end
|
251
|
-
|
252
|
-
club
|
253
|
-
end
|
254
|
-
|
255
|
-
|
256
|
-
def find_by( name:, country: nil ) ## todo/fix: add international or league flag?
|
257
|
-
## note: allow passing in of country key too (auto-counvert)
|
258
|
-
## and country struct too
|
259
|
-
## - country assumes / allows the country key or fifa code for now
|
260
|
-
m = nil
|
261
|
-
|
262
|
-
if country
|
263
|
-
country = country( country )
|
264
|
-
|
265
|
-
m = match_by( name: name, country: country )
|
266
|
-
|
267
|
-
if m.empty?
|
268
|
-
## (re)try with second country - quick hacks for known leagues
|
269
|
-
## todo/fix: add league flag to activate!!! - why? why not
|
270
|
-
m = match_by( name: name, country: 'wal' ) if country.key == 'eng'
|
271
|
-
m = match_by( name: name, country: 'eng' ) if country.key == 'sco'
|
272
|
-
m = match_by( name: name, country: 'nir' ) if country.key == 'ie'
|
273
|
-
m = match_by( name: name, country: 'mc' ) if country.key == 'fr'
|
274
|
-
m = match_by( name: name, country: 'li' ) if country.key == 'ch'
|
275
|
-
m = match_by( name: name, country: 'ca' ) if country.key == 'us'
|
276
|
-
m = match_by( name: name, country: 'nz' ) if country.key == 'au'
|
277
|
-
end
|
278
|
-
else ## try "global" search - no country passed in
|
279
|
-
m = match( name )
|
280
|
-
end
|
281
|
-
|
282
|
-
|
283
|
-
club = nil
|
284
|
-
if m.empty?
|
285
|
-
## puts "** !!! WARN !!! no match for club >#{name}<"
|
286
|
-
elsif m.size > 1
|
287
|
-
puts "** !!! ERROR - too many matches (#{m.size}) for club >#{name}<:"
|
288
|
-
pp m
|
289
|
-
exit 1
|
290
|
-
else # bingo; match - assume size == 1
|
291
|
-
club = m[0]
|
292
|
-
end
|
293
|
-
|
294
|
-
club
|
295
|
-
end
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
def build_mods( mods )
|
300
|
-
## e.g.
|
301
|
-
## { 'Arsenal | Arsenal FC' => 'Arsenal, ENG',
|
302
|
-
## 'Liverpool | Liverpool FC' => 'Liverpool, ENG',
|
303
|
-
## 'Barcelona' => 'Barcelona, ESP',
|
304
|
-
## 'Valencia' => 'Valencia, ESP' }
|
305
|
-
|
306
|
-
mods.reduce({}) do |h,(club_names, club_line)|
|
307
|
-
|
308
|
-
values = club_line.split( ',' )
|
309
|
-
values = values.map { |value| value.strip } ## strip all spaces
|
310
|
-
|
311
|
-
## todo/fix: make sure country is present !!!!
|
312
|
-
club_name, country_name = values
|
313
|
-
club = find_by!( name: club_name, country: country_name )
|
314
|
-
|
315
|
-
values = club_names.split( '|' )
|
316
|
-
values = values.map { |value| value.strip } ## strip all spaces
|
317
|
-
|
318
|
-
values.each do |club_name|
|
319
|
-
h[club_name] = club
|
320
|
-
end
|
321
|
-
h
|
322
|
-
end
|
323
|
-
end
|
324
|
-
|
325
|
-
|
326
|
-
def dump_duplicates # debug helper - report duplicate club name records
|
327
|
-
@clubs_by_name.each do |name, clubs|
|
328
|
-
if clubs.size > 1
|
329
|
-
puts "#{clubs.size} matching club duplicates for >#{name}<:"
|
330
|
-
pp clubs
|
331
|
-
end
|
332
|
-
end
|
333
|
-
end
|
334
|
-
end # class ClubIndex
|
335
|
-
|
336
|
-
|
337
|
-
end # module Import
|
338
|
-
end # module SportDb
|
@@ -1,114 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
class NationalTeamIndex
|
6
|
-
|
7
|
-
attr_reader :teams ## all (national) team records
|
8
|
-
|
9
|
-
def initialize( recs )
|
10
|
-
@teams = []
|
11
|
-
@teams_by_code = {}
|
12
|
-
@teams_by_name = {}
|
13
|
-
|
14
|
-
add( recs )
|
15
|
-
end
|
16
|
-
|
17
|
-
include NameHelper
|
18
|
-
## incl. strip_year( name )
|
19
|
-
## has_year?( name)
|
20
|
-
## strip_lang( name )
|
21
|
-
## normalize( name )
|
22
|
-
|
23
|
-
|
24
|
-
def add( recs )
|
25
|
-
###########################################
|
26
|
-
## auto-fill national teams
|
27
|
-
## pp recs
|
28
|
-
recs.each do |rec|
|
29
|
-
@teams << rec
|
30
|
-
|
31
|
-
## add fifa code lookup
|
32
|
-
if @teams_by_code[ rec.code.downcase ]
|
33
|
-
puts "** !! ERROR !! national team code (code) >#{rec.code}< already exits!!"
|
34
|
-
exit 1
|
35
|
-
else
|
36
|
-
@teams_by_code[ rec.code.downcase ] = rec
|
37
|
-
end
|
38
|
-
|
39
|
-
|
40
|
-
## add all names (canonical name + alt names
|
41
|
-
names = [rec.name] + rec.alt_names
|
42
|
-
more_names = []
|
43
|
-
## check "hand-typed" names for year (auto-add)
|
44
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
45
|
-
## (1946-2001,2013-) etc.
|
46
|
-
names.each do |name|
|
47
|
-
if has_year?( name )
|
48
|
-
more_names << strip_year( name )
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
names += more_names
|
53
|
-
## check for duplicates - simple check for now - fix/improve
|
54
|
-
## todo/fix: (auto)remove duplicates - why? why not?
|
55
|
-
count = names.size
|
56
|
-
count_uniq = names.uniq.size
|
57
|
-
if count != count_uniq
|
58
|
-
puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s) in national teams:"
|
59
|
-
pp names
|
60
|
-
pp rec
|
61
|
-
exit 1
|
62
|
-
end
|
63
|
-
|
64
|
-
names.each_with_index do |name,i|
|
65
|
-
## check lang codes e.g. [en], [fr], etc.
|
66
|
-
## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
|
67
|
-
name = strip_lang( name )
|
68
|
-
norm = normalize( name )
|
69
|
-
old_rec = @teams_by_name[ norm ]
|
70
|
-
if old_rec
|
71
|
-
## check if tame name already is included or is new team rec
|
72
|
-
msg = "** !!! ERROR !!! - national team name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
|
73
|
-
puts msg
|
74
|
-
exit 1
|
75
|
-
else
|
76
|
-
@teams_by_name[ norm ] = rec
|
77
|
-
end
|
78
|
-
end
|
79
|
-
end ## each record
|
80
|
-
end # method initialize
|
81
|
-
|
82
|
-
## fix/todo: add find_by (alias for find_by_name/find_by_code)
|
83
|
-
def find_by_code( code )
|
84
|
-
code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
|
85
|
-
@teams_by_code[ code ]
|
86
|
-
end
|
87
|
-
|
88
|
-
def find_by_name( name )
|
89
|
-
name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first)
|
90
|
-
@teams_by_name[ name ]
|
91
|
-
end
|
92
|
-
|
93
|
-
def find( q )
|
94
|
-
## check longest match first (assume name is longer than code)
|
95
|
-
## try lookup / find by (normalized) name first
|
96
|
-
team = find_by_name( q )
|
97
|
-
team = find_by_code( q ) if team.nil?
|
98
|
-
team
|
99
|
-
end
|
100
|
-
|
101
|
-
def find!( q )
|
102
|
-
team = find( q )
|
103
|
-
if team.nil?
|
104
|
-
puts "** !!! ERROR - no match for national team >#{q}< found"
|
105
|
-
exit 1
|
106
|
-
end
|
107
|
-
team
|
108
|
-
end
|
109
|
-
end # class NationalTeamIndex
|
110
|
-
|
111
|
-
|
112
|
-
end # module Import
|
113
|
-
end # module SportDb
|
114
|
-
|
@@ -1,43 +0,0 @@
|
|
1
|
-
|
2
|
-
module SportDb
|
3
|
-
module Import
|
4
|
-
|
5
|
-
|
6
|
-
class TeamIndex
|
7
|
-
## note: "virtual" index lets you search clubs and/or national_teams (don't care)
|
8
|
-
|
9
|
-
def catalog() Import.catalog; end
|
10
|
-
|
11
|
-
## todo/check: rename to/use map_by! for array version - why? why not?
|
12
|
-
def find_by!( name:, league:, mods: nil )
|
13
|
-
if name.is_a?( Array )
|
14
|
-
recs = []
|
15
|
-
name.each do |q|
|
16
|
-
recs << __find_by!( name: q, league: league, mods: mods )
|
17
|
-
end
|
18
|
-
recs
|
19
|
-
else ## assume single name
|
20
|
-
__find_by!( name: name, league: league, mods: mods )
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
def __find_by!( name:, league:, mods: nil )
|
25
|
-
if mods && mods[ league.key ] && mods[ league.key ][ name ]
|
26
|
-
mods[ league.key ][ name ]
|
27
|
-
else
|
28
|
-
if league.clubs?
|
29
|
-
if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
|
30
|
-
catalog.clubs.find!( name )
|
31
|
-
else ## assume clubs in domestic/national league tournament
|
32
|
-
catalog.clubs.find_by!( name: name, country: league.country )
|
33
|
-
end
|
34
|
-
else ## assume national teams (not clubs)
|
35
|
-
catalog.national_teams.find!( name )
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end # method __find_by!
|
39
|
-
|
40
|
-
end # class TeamIndex
|
41
|
-
|
42
|
-
end # module Import
|
43
|
-
end # module SportDb
|