sportdb-config 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -0
- data/README.md +108 -0
- data/config/world/ar.txt +11 -0
- data/config/world/at.txt +19 -0
- data/config/world/be.txt +18 -0
- data/config/world/de.txt +19 -0
- data/lib/sportdb/config.rb +1 -0
- data/lib/sportdb/config/club_reader.rb +1 -1
- data/lib/sportdb/config/clubs.rb +47 -5
- data/lib/sportdb/config/config.rb +21 -3
- data/lib/sportdb/config/version.rb +1 -1
- data/lib/sportdb/config/wiki_reader.rb +104 -0
- data/test/test_club_index.rb +14 -0
- data/test/test_config.rb +32 -7
- data/test/test_wiki_reader.rb +77 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f264abfc487652f687a3e4d6dc4388594a587896
|
4
|
+
data.tar.gz: 32f24cc478d3db058ef5aeb943a279019e2d68d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e5a329a6027121d68c81d51dd683befec81eb348ba4ef4606437ca5b77bfd1346ae31e8781a4e5c086c93f5cdd053a417a359e9edefa440fbe580ee0345970d
|
7
|
+
data.tar.gz: a3b7f2e23f0cd8f9755a8c594ef7c520be6b0d131762dbefff81f5c6acee0dbb15ed80128fa292f72ae704e471af28b16152cd1fd71564cc38c4f0b5efbcdf82
|
data/Manifest.txt
CHANGED
@@ -6,7 +6,11 @@ config/leagues/eng.txt
|
|
6
6
|
config/leagues/fr.txt
|
7
7
|
config/leagues/gr.txt
|
8
8
|
config/leagues/sco.txt
|
9
|
+
config/world/ar.txt
|
10
|
+
config/world/at.txt
|
11
|
+
config/world/be.txt
|
9
12
|
config/world/countries.txt
|
13
|
+
config/world/de.txt
|
10
14
|
config/world/eng.txt
|
11
15
|
lib/sportdb/config.rb
|
12
16
|
lib/sportdb/config/club_reader.rb
|
@@ -19,6 +23,7 @@ lib/sportdb/config/league_utils.rb
|
|
19
23
|
lib/sportdb/config/season_utils.rb
|
20
24
|
lib/sportdb/config/variants.rb
|
21
25
|
lib/sportdb/config/version.rb
|
26
|
+
lib/sportdb/config/wiki_reader.rb
|
22
27
|
test/helper.rb
|
23
28
|
test/test_club_index.rb
|
24
29
|
test/test_club_reader.rb
|
@@ -29,3 +34,4 @@ test/test_league_reader.rb
|
|
29
34
|
test/test_league_utils.rb
|
30
35
|
test/test_season_utils.rb
|
31
36
|
test/test_variants.rb
|
37
|
+
test/test_wiki_reader.rb
|
data/README.md
CHANGED
@@ -10,8 +10,116 @@
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
+
Let's use the /clubs datasets (1500+ football clubs from around the world)
|
14
|
+
to match name "variants" e.g. `Arsenal` to canonical global unique
|
15
|
+
names e.g. `Arsenal FC, London, England`:
|
13
16
|
|
17
|
+
``` ruby
|
18
|
+
require 'sportdb/config'
|
14
19
|
|
20
|
+
## note: requires a local copy of the football.db clubs datasets
|
21
|
+
## see https://github.com/openfootball/clubs
|
22
|
+
SportDb::Import.config.clubs_dir = './clubs'
|
23
|
+
|
24
|
+
m = SportDb::Import.config.clubs.match( 'Arsenal' )
|
25
|
+
m.size # 3 club matches found
|
26
|
+
#=> 3
|
27
|
+
m[0].name; m[0].city; m[0].country
|
28
|
+
#=> "Arsenal FC", "London", "England"
|
29
|
+
m[1].name; m[1].city; m[1].country
|
30
|
+
#=> "Arsenal Tula", "Tula", "Russia"
|
31
|
+
m[2].name; m[2].city; m[2].country
|
32
|
+
#=> "Arsenal de Sarandí", "Sarandí", "Argentina"
|
33
|
+
|
34
|
+
|
35
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'eng' )
|
36
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
37
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal FC', country: 'eng' )
|
38
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal F.C.', country: 'eng' )
|
39
|
+
m = SportDb::Import.config.clubs.match_by( name: '...A.r.s.e.n.a.l... F.C...', country: 'eng' )
|
40
|
+
m.size # 1 club match found
|
41
|
+
#=> 1
|
42
|
+
m[0].name; m[0].city; m[0].country
|
43
|
+
#=> "Arsenal FC", "London", "England"
|
44
|
+
|
45
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ar' )
|
46
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
47
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal Sarandí', country: 'ar' )
|
48
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal Sarandi', country: 'ar' )
|
49
|
+
m.size # 1 club match found
|
50
|
+
#=> 1
|
51
|
+
m[0].name; m[0].city; m[0].country
|
52
|
+
#=> "Arsenal de Sarandí", "Sarandí", "Argentina"
|
53
|
+
|
54
|
+
|
55
|
+
# try some more
|
56
|
+
m = SportDb::Import.config.clubs.match( 'AZ' )
|
57
|
+
m[0].name; m[0].city; m[0].country
|
58
|
+
#=> "AZ Alkmaar", "Alkmaar", "Netherlands"
|
59
|
+
|
60
|
+
m = SportDb::Import.config.clubs.match( 'Bayern' )
|
61
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
62
|
+
m = SportDb::Import.config.clubs.match( 'Bayern München' )
|
63
|
+
m = SportDb::Import.config.clubs.match( 'Bayern Munchen' )
|
64
|
+
m = SportDb::Import.config.clubs.match( 'Bayern Muenchen' )
|
65
|
+
m[0].name; m[0].city; m[0].country
|
66
|
+
#=> "Bayern München", "München", "Germany"
|
67
|
+
|
68
|
+
# and so on
|
69
|
+
# ...
|
70
|
+
```
|
71
|
+
|
72
|
+
Let's print all names that have duplicate (more than one) matching club:
|
73
|
+
|
74
|
+
``` ruby
|
75
|
+
SportDb::Import.config.clubs.mappings.each do |name, clubs|
|
76
|
+
if clubs.size > 1
|
77
|
+
puts "#{clubs.size} matching clubs for `#{name}`:"
|
78
|
+
clubs.each do |club|
|
79
|
+
puts " - #{club.name}, #{club.city}, #{club.country.name} (#{club.country.key})"
|
80
|
+
end
|
81
|
+
puts
|
82
|
+
end
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
resulting in:
|
87
|
+
|
88
|
+
```
|
89
|
+
2 matching clubs for `valencia`:
|
90
|
+
- Valencia FC, Léogâne, Haiti (ht)
|
91
|
+
- Valencia CF, Valencia, Spain (es)
|
92
|
+
|
93
|
+
2 matching clubs for `apollon`:
|
94
|
+
- Apollon Limassol FC, , Cyprus (cy)
|
95
|
+
- Apollon Smyrnis FC, Athens, Greece (gr)
|
96
|
+
|
97
|
+
3 matching clubs for `arsenal`:
|
98
|
+
- Arsenal FC, London, England (eng)
|
99
|
+
- Arsenal Tula, Tula, Russia (ru)
|
100
|
+
- Arsenal de Sarandí, Sarandí, Argentina (ar)
|
101
|
+
|
102
|
+
2 matching clubs for `liverpool`:
|
103
|
+
- Liverpool FC, Liverpool, England (eng)
|
104
|
+
- Liverpool Montevideo, Montevideo, Uruguay (uy)
|
105
|
+
|
106
|
+
2 matching clubs for `barcelona`:
|
107
|
+
- FC Barcelona, Barcelona, Spain (es)
|
108
|
+
- Barcelona Guayaquil, Guayaquil, Ecuador (ec)
|
109
|
+
|
110
|
+
3 matching clubs for `nacional`:
|
111
|
+
- CD Nacional Madeira, Funchal, Portugal (pt)
|
112
|
+
- Club Nacional, Asunción, Paraguay (py)
|
113
|
+
- Nacional de Montevideo, Montevideo, Uruguay (uy)
|
114
|
+
|
115
|
+
2 matching clubs for `sanjose`:
|
116
|
+
- San Jose Earthquakes, San Jose, United States (us)
|
117
|
+
- Club Deportivo San José, Oruro, Bolivia (bo)
|
118
|
+
|
119
|
+
...
|
120
|
+
```
|
121
|
+
|
122
|
+
That's it.
|
15
123
|
|
16
124
|
|
17
125
|
## License
|
data/config/world/ar.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
===========================
|
2
|
+
= Argentina (ar)
|
3
|
+
|
4
|
+
Buenos Aires
|
5
|
+
Santa Fe | Provincia Santa Fe
|
6
|
+
Córdoba | Provincia Córdoba
|
7
|
+
Mendoza | Provincia Mendoza
|
8
|
+
San Juan | Provincia San Juan
|
9
|
+
Tucumán | Provincia Tucumán
|
10
|
+
Entre Ríos | Provincia Entre Ríos
|
11
|
+
Misiones | Provincia Misiones
|
data/config/world/at.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
=================================
|
2
|
+
= Österreich • Austria (at)
|
3
|
+
|
4
|
+
|
5
|
+
Wien | Vienna [en]
|
6
|
+
Burgenland
|
7
|
+
Niederösterreich
|
8
|
+
Oberösterreich
|
9
|
+
Steiermark
|
10
|
+
Kärnten
|
11
|
+
Salzburg
|
12
|
+
Tirol
|
13
|
+
Vorarlberg
|
14
|
+
|
15
|
+
|
16
|
+
== Niederösterreich ==
|
17
|
+
|
18
|
+
Wr. Neustadt | Wiener Neustadt
|
19
|
+
St. Pölten | Sankt Pölten
|
data/config/world/be.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
==================================
|
2
|
+
= Belgium (be)
|
3
|
+
|
4
|
+
Brussels
|
5
|
+
|
6
|
+
Antwerpen › Vlaanderen | Antwerpen
|
7
|
+
Limburg › Vlaanderen | Limburg
|
8
|
+
Oost-Vlaanderen › Vlaanderen | Oost-Vlaanderen
|
9
|
+
West-Vlaanderen › Vlaanderen | West-Vlaanderen
|
10
|
+
|
11
|
+
Hainaut › Wallonie | Hainaut
|
12
|
+
Liège › Wallonie | Liège
|
13
|
+
|
14
|
+
|
15
|
+
== Hainaut › Wallonie ==
|
16
|
+
|
17
|
+
Mouscron [fr] | Moeskroen [nl]
|
18
|
+
Mons [fr] | Bergen [nl]
|
data/config/world/de.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
============================================
|
2
|
+
= Germany • Deutschland (de)
|
3
|
+
|
4
|
+
Bayern
|
5
|
+
Nordrhein-Westfalen
|
6
|
+
Saarland
|
7
|
+
Niedersachsen
|
8
|
+
Brandenburg
|
9
|
+
Berlin
|
10
|
+
Hamburg
|
11
|
+
Bremen
|
12
|
+
Baden-Württemberg
|
13
|
+
Hessen
|
14
|
+
Rheinland-Pfalz
|
15
|
+
Schleswig-Holstein
|
16
|
+
Mecklenburg-Vorpommern
|
17
|
+
Sachsen
|
18
|
+
Sachsen-Anhalt
|
19
|
+
Thüringen
|
data/lib/sportdb/config.rb
CHANGED
data/lib/sportdb/config/clubs.rb
CHANGED
@@ -17,10 +17,26 @@ class Club
|
|
17
17
|
|
18
18
|
## special import only attribs
|
19
19
|
attr_accessor :alt_names_auto ## auto-generated alt names
|
20
|
+
attr_accessor :wikipedia # wikipedia page name (for english (en))
|
20
21
|
|
21
22
|
def historic?() @year_end ? true : false; end
|
22
23
|
alias_method :past?, :historic?
|
23
24
|
|
25
|
+
|
26
|
+
def wikipedia?() @wikipedia; end
|
27
|
+
def wikipedia_url
|
28
|
+
if @wikipedia
|
29
|
+
## note: replace spaces with underscore (-)
|
30
|
+
## e.g. Club Brugge KV => Club_Brugge_KV
|
31
|
+
## todo/check/fix:
|
32
|
+
## check if "plain" dash (-) needs to get replaced with typographic dash??
|
33
|
+
"https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
24
40
|
def initialize
|
25
41
|
@alt_names = []
|
26
42
|
@alt_names_auto = []
|
@@ -135,6 +151,25 @@ class ClubIndex
|
|
135
151
|
def strip_norm( name ) Club.strip_norm( name ); end
|
136
152
|
|
137
153
|
|
154
|
+
def add_wiki( rec_or_recs ) ## add wiki(pedia club record / links
|
155
|
+
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
156
|
+
|
157
|
+
recs.each do |rec|
|
158
|
+
m = match_by( name: rec.name, country: rec.country )
|
159
|
+
if m.nil?
|
160
|
+
puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{rec.name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs"
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
if m.size > 1
|
164
|
+
puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{rec.name}, #{rec.country.name} (#{rec.country.key})<"
|
165
|
+
pp m
|
166
|
+
exit 1
|
167
|
+
end
|
168
|
+
club = m[0]
|
169
|
+
club.wikipedia = rec.name
|
170
|
+
end
|
171
|
+
end # method add_wiki
|
172
|
+
|
138
173
|
|
139
174
|
def add( rec_or_recs ) ## add club record / alt_names
|
140
175
|
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
@@ -230,11 +265,18 @@ class ClubIndex
|
|
230
265
|
m = match( name )
|
231
266
|
if m ## filter by country
|
232
267
|
## note: country assumes / allows the country key or fifa code for now
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
268
|
+
|
269
|
+
## note: allow passing in of country struct too
|
270
|
+
country_rec = if country.is_a?( SportDb::Import::Country )
|
271
|
+
country ## (re)use country struct - no need to run lookup again
|
272
|
+
else
|
273
|
+
rec = SportDb::Import.config.countries[ country ]
|
274
|
+
if rec.nil?
|
275
|
+
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
276
|
+
exit 1
|
277
|
+
end
|
278
|
+
rec
|
279
|
+
end
|
238
280
|
|
239
281
|
m = m.select { |club| club.country.key == country_rec.key }
|
240
282
|
m = nil if m.empty? ## note: reset to nil if no more matches
|
@@ -40,14 +40,21 @@ class Configuration
|
|
40
40
|
clubs\.txt$
|
41
41
|
}x
|
42
42
|
|
43
|
-
|
43
|
+
|
44
|
+
CLUBS_WIKI_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
45
|
+
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
46
|
+
clubs\.wiki\.txt$
|
47
|
+
}x
|
48
|
+
|
49
|
+
|
50
|
+
def find_clubs_datafiles( path, pattern )
|
44
51
|
datafiles = [] ## note: [country, path] pairs for now
|
45
52
|
|
46
53
|
## check all txt files as candidates (MUST include country code for now)
|
47
54
|
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
55
|
pp candidates
|
49
56
|
candidates.each do |candidate|
|
50
|
-
datafiles << candidate if
|
57
|
+
datafiles << candidate if pattern.match( candidate )
|
51
58
|
end
|
52
59
|
|
53
60
|
pp datafiles
|
@@ -65,7 +72,7 @@ class Configuration
|
|
65
72
|
|
66
73
|
## todo/fix: add to teamreader
|
67
74
|
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
-
datafiles = find_clubs_datafiles( clubs_dir )
|
75
|
+
datafiles = find_clubs_datafiles( clubs_dir, CLUBS_REGEX )
|
69
76
|
datafiles.each do |datafile|
|
70
77
|
recs += ClubReader.read( datafile )
|
71
78
|
end
|
@@ -74,6 +81,17 @@ class Configuration
|
|
74
81
|
clubs = ClubIndex.new
|
75
82
|
clubs.add( recs )
|
76
83
|
|
84
|
+
## add wiki(pedia) anchored links
|
85
|
+
recs = []
|
86
|
+
datafiles = find_clubs_datafiles( clubs_dir, CLUBS_WIKI_REGEX )
|
87
|
+
datafiles.each do |datafile|
|
88
|
+
recs += WikiReader.read( datafile )
|
89
|
+
end
|
90
|
+
|
91
|
+
pp recs
|
92
|
+
clubs.add_wiki( recs )
|
93
|
+
|
94
|
+
|
77
95
|
if clubs.errors?
|
78
96
|
puts ""
|
79
97
|
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
+
|
10
|
+
class WikiClub
|
11
|
+
attr_reader :name, :country
|
12
|
+
def initialize( name, country )
|
13
|
+
@name, @country = name, country
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
19
|
+
txt = File.open( path, 'r:utf-8' ).read
|
20
|
+
parse( txt )
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def self.parse( txt )
|
25
|
+
recs = []
|
26
|
+
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
27
|
+
|
28
|
+
txt.each_line do |line|
|
29
|
+
line = line.strip
|
30
|
+
|
31
|
+
next if line.empty?
|
32
|
+
next if line.start_with?( '#' ) ## skip comments too
|
33
|
+
|
34
|
+
## strip inline (until end-of-line) comments too
|
35
|
+
## e.g Eupen => KAS Eupen, ## [de]
|
36
|
+
## => Eupen => KAS Eupen,
|
37
|
+
line = line.sub( /#.*/, '' ).strip
|
38
|
+
pp line
|
39
|
+
|
40
|
+
|
41
|
+
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
42
|
+
|
43
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
44
|
+
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
45
|
+
if line =~ /^(={1,}) ## leading ======
|
46
|
+
([^=]+?) ## text (note: for now no "inline" = allowed)
|
47
|
+
=* ## (optional) trailing ====
|
48
|
+
$/x
|
49
|
+
heading_marker = $1
|
50
|
+
heading_level = $1.length ## count number of = for heading level
|
51
|
+
heading = $2.strip
|
52
|
+
|
53
|
+
puts "heading #{heading_level} >#{heading}<"
|
54
|
+
|
55
|
+
if heading_level > 1
|
56
|
+
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
|
60
|
+
## quick hack: if level is 1 assume country for now
|
61
|
+
## and extract country code e.g.
|
62
|
+
## Austria (at) => at
|
63
|
+
## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
|
64
|
+
if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
|
65
|
+
country_code = $1
|
66
|
+
|
67
|
+
## check country code - MUST exist for now!!!!
|
68
|
+
country = SportDb::Import.config.countries[ country_code ]
|
69
|
+
if country.nil?
|
70
|
+
puts "** !!! ERROR [wiki reader] !!! - unknown country with code >#{country_code}< - sorry - add country to config to fix"
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
|
74
|
+
last_country = country
|
75
|
+
else
|
76
|
+
puts "!!! error - heading level 1 - missing country code - >#{heading}<"
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
pp last_country
|
80
|
+
else
|
81
|
+
## strip and squish (white)spaces
|
82
|
+
# e.g. New York FC (2011-) => New York FC (2011-)
|
83
|
+
value = line.strip.gsub( /[ \t]+/, ' ' )
|
84
|
+
|
85
|
+
## normalize (allow underscore (-) - replace with space)
|
86
|
+
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
87
|
+
value = value.gsub( '_', ' ' )
|
88
|
+
|
89
|
+
if last_country.nil?
|
90
|
+
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
91
|
+
exit 1
|
92
|
+
end
|
93
|
+
|
94
|
+
rec = WikiClub.new( value, last_country )
|
95
|
+
recs << rec
|
96
|
+
end
|
97
|
+
end # each_line
|
98
|
+
recs
|
99
|
+
end # method read
|
100
|
+
|
101
|
+
end # class WikiReader
|
102
|
+
|
103
|
+
end ## module Import
|
104
|
+
end ## module SportDb
|
data/test/test_club_index.rb
CHANGED
@@ -81,6 +81,20 @@ class TestClubIndex < MiniTest::Test
|
|
81
81
|
|
82
82
|
m = SportDb::Import.config.clubs.match( '...A.r.s.e.n.a.l... F.C...' )
|
83
83
|
assert_equal 2, m.size
|
84
|
+
|
85
|
+
|
86
|
+
##############################################
|
87
|
+
## test wikipedia names and links/urls
|
88
|
+
|
89
|
+
m = SportDb::Import.config.clubs.match( 'Club Brugge KV' )
|
90
|
+
assert_equal 1, m.size
|
91
|
+
assert_equal 'Club Brugge KV', m[0].wikipedia
|
92
|
+
assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
|
93
|
+
|
94
|
+
m = SportDb::Import.config.clubs.match( 'RSC Anderlecht' )
|
95
|
+
assert_equal 1, m.size
|
96
|
+
assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
|
97
|
+
assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
|
84
98
|
end
|
85
99
|
|
86
100
|
end # class TestClubIndex
|
data/test/test_config.rb
CHANGED
@@ -9,15 +9,40 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestConfig < MiniTest::Test
|
11
11
|
|
12
|
-
def
|
12
|
+
def match_clubs( txt ) SportDb::Import::Configuration::CLUBS_REGEX.match( txt ); end
|
13
|
+
def match_clubs_wiki( txt ) SportDb::Import::Configuration::CLUBS_WIKI_REGEX.match( txt ); end
|
13
14
|
|
14
15
|
def test_find_clubs
|
15
|
-
assert
|
16
|
-
assert
|
17
|
-
assert
|
18
|
-
assert
|
19
|
-
assert
|
20
|
-
assert
|
16
|
+
assert match_clubs( 'de.clubs.txt' )
|
17
|
+
assert match_clubs( 'deutschland/de.clubs.txt' )
|
18
|
+
assert match_clubs( 'europe/de-deutschland/clubs.txt' )
|
19
|
+
assert match_clubs( 'de-deutschland/clubs.txt' )
|
20
|
+
assert match_clubs( 'clubs.txt' )
|
21
|
+
assert match_clubs( 'deutschland/clubs.txt' )
|
22
|
+
|
23
|
+
assert !match_clubs( 'de.clubs.wiki.txt' )
|
24
|
+
assert !match_clubs( 'deutschland/de.clubs.wiki.txt' )
|
25
|
+
assert !match_clubs( 'europe/de-deutschland/clubs.wiki.txt' )
|
26
|
+
assert !match_clubs( 'de-deutschland/clubs.wiki.txt' )
|
27
|
+
assert !match_clubs( 'clubs.wiki.txt' )
|
28
|
+
assert !match_clubs( 'deutschland/clubs.wiki.txt' )
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_find_clubs_wiki
|
33
|
+
assert !match_clubs_wiki( 'de.clubs.txt' )
|
34
|
+
assert !match_clubs_wiki( 'deutschland/de.clubs.txt' )
|
35
|
+
assert !match_clubs_wiki( 'europe/de-deutschland/clubs.txt' )
|
36
|
+
assert !match_clubs_wiki( 'de-deutschland/clubs.txt' )
|
37
|
+
assert !match_clubs_wiki( 'clubs.txt' )
|
38
|
+
assert !match_clubs_wiki( 'deutschland/clubs.txt' )
|
39
|
+
|
40
|
+
assert match_clubs_wiki( 'de.clubs.wiki.txt' )
|
41
|
+
assert match_clubs_wiki( 'deutschland/de.clubs.wiki.txt' )
|
42
|
+
assert match_clubs_wiki( 'europe/de-deutschland/clubs.wiki.txt' )
|
43
|
+
assert match_clubs_wiki( 'de-deutschland/clubs.wiki.txt' )
|
44
|
+
assert match_clubs_wiki( 'clubs.wiki.txt' )
|
45
|
+
assert match_clubs_wiki( 'deutschland/clubs.wiki.txt' )
|
21
46
|
end
|
22
47
|
|
23
48
|
end # class TestConfig
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_wiki_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestWikiReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_parse_at
|
13
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
14
|
+
===================================
|
15
|
+
= Albania (al)
|
16
|
+
|
17
|
+
FK Partizani Tirana
|
18
|
+
KF Tirana
|
19
|
+
FK Kukësi
|
20
|
+
KF Laçi
|
21
|
+
TXT
|
22
|
+
|
23
|
+
pp recs
|
24
|
+
|
25
|
+
assert_equal 4, recs.size
|
26
|
+
assert_equal 'FK Partizani Tirana', recs[0].name
|
27
|
+
assert_equal 'Albania', recs[0].country.name
|
28
|
+
assert_equal 'al', recs[0].country.key
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_parse_be
|
33
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
34
|
+
===========================
|
35
|
+
= Belgium (be)
|
36
|
+
|
37
|
+
R.S.C._Anderlecht
|
38
|
+
Royal_Antwerp_F.C.
|
39
|
+
Cercle_Brugge_K.S.V.
|
40
|
+
R._Charleroi_S.C.
|
41
|
+
Club_Brugge_KV
|
42
|
+
TXT
|
43
|
+
|
44
|
+
pp recs
|
45
|
+
|
46
|
+
assert_equal 5, recs.size
|
47
|
+
assert_equal 'R.S.C. Anderlecht', recs[0].name
|
48
|
+
assert_equal 'Belgium', recs[0].country.name
|
49
|
+
assert_equal 'be', recs[0].country.key
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_parse_world
|
53
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
54
|
+
= Albania (al) =
|
55
|
+
|
56
|
+
FK Partizani Tirana
|
57
|
+
|
58
|
+
|
59
|
+
= Belgium (be) =
|
60
|
+
|
61
|
+
# some comments here
|
62
|
+
R.S.C._Anderlecht # some end-of-line comments here
|
63
|
+
TXT
|
64
|
+
|
65
|
+
pp recs
|
66
|
+
|
67
|
+
assert_equal 2, recs.size
|
68
|
+
assert_equal 'FK Partizani Tirana', recs[0].name
|
69
|
+
assert_equal 'Albania', recs[0].country.name
|
70
|
+
assert_equal 'al', recs[0].country.key
|
71
|
+
|
72
|
+
assert_equal 'R.S.C. Anderlecht', recs[1].name
|
73
|
+
assert_equal 'Belgium', recs[1].country.name
|
74
|
+
assert_equal 'be', recs[1].country.key
|
75
|
+
end
|
76
|
+
|
77
|
+
end # class TestWikiReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-config
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -64,7 +64,11 @@ extra_rdoc_files:
|
|
64
64
|
- config/leagues/fr.txt
|
65
65
|
- config/leagues/gr.txt
|
66
66
|
- config/leagues/sco.txt
|
67
|
+
- config/world/ar.txt
|
68
|
+
- config/world/at.txt
|
69
|
+
- config/world/be.txt
|
67
70
|
- config/world/countries.txt
|
71
|
+
- config/world/de.txt
|
68
72
|
- config/world/eng.txt
|
69
73
|
files:
|
70
74
|
- HISTORY.md
|
@@ -75,7 +79,11 @@ files:
|
|
75
79
|
- config/leagues/fr.txt
|
76
80
|
- config/leagues/gr.txt
|
77
81
|
- config/leagues/sco.txt
|
82
|
+
- config/world/ar.txt
|
83
|
+
- config/world/at.txt
|
84
|
+
- config/world/be.txt
|
78
85
|
- config/world/countries.txt
|
86
|
+
- config/world/de.txt
|
79
87
|
- config/world/eng.txt
|
80
88
|
- lib/sportdb/config.rb
|
81
89
|
- lib/sportdb/config/club_reader.rb
|
@@ -88,6 +96,7 @@ files:
|
|
88
96
|
- lib/sportdb/config/season_utils.rb
|
89
97
|
- lib/sportdb/config/variants.rb
|
90
98
|
- lib/sportdb/config/version.rb
|
99
|
+
- lib/sportdb/config/wiki_reader.rb
|
91
100
|
- test/helper.rb
|
92
101
|
- test/test_club_index.rb
|
93
102
|
- test/test_club_reader.rb
|
@@ -98,6 +107,7 @@ files:
|
|
98
107
|
- test/test_league_utils.rb
|
99
108
|
- test/test_season_utils.rb
|
100
109
|
- test/test_variants.rb
|
110
|
+
- test/test_wiki_reader.rb
|
101
111
|
homepage: https://github.com/sportdb/sport.db
|
102
112
|
licenses:
|
103
113
|
- Public Domain
|