sportdb-config 0.5.0 → 0.5.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +6 -0
- data/README.md +108 -0
- data/config/world/ar.txt +11 -0
- data/config/world/at.txt +19 -0
- data/config/world/be.txt +18 -0
- data/config/world/de.txt +19 -0
- data/lib/sportdb/config.rb +1 -0
- data/lib/sportdb/config/club_reader.rb +1 -1
- data/lib/sportdb/config/clubs.rb +47 -5
- data/lib/sportdb/config/config.rb +21 -3
- data/lib/sportdb/config/version.rb +1 -1
- data/lib/sportdb/config/wiki_reader.rb +104 -0
- data/test/test_club_index.rb +14 -0
- data/test/test_config.rb +32 -7
- data/test/test_wiki_reader.rb +77 -0
- metadata +12 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f264abfc487652f687a3e4d6dc4388594a587896
|
4
|
+
data.tar.gz: 32f24cc478d3db058ef5aeb943a279019e2d68d2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4e5a329a6027121d68c81d51dd683befec81eb348ba4ef4606437ca5b77bfd1346ae31e8781a4e5c086c93f5cdd053a417a359e9edefa440fbe580ee0345970d
|
7
|
+
data.tar.gz: a3b7f2e23f0cd8f9755a8c594ef7c520be6b0d131762dbefff81f5c6acee0dbb15ed80128fa292f72ae704e471af28b16152cd1fd71564cc38c4f0b5efbcdf82
|
data/Manifest.txt
CHANGED
@@ -6,7 +6,11 @@ config/leagues/eng.txt
|
|
6
6
|
config/leagues/fr.txt
|
7
7
|
config/leagues/gr.txt
|
8
8
|
config/leagues/sco.txt
|
9
|
+
config/world/ar.txt
|
10
|
+
config/world/at.txt
|
11
|
+
config/world/be.txt
|
9
12
|
config/world/countries.txt
|
13
|
+
config/world/de.txt
|
10
14
|
config/world/eng.txt
|
11
15
|
lib/sportdb/config.rb
|
12
16
|
lib/sportdb/config/club_reader.rb
|
@@ -19,6 +23,7 @@ lib/sportdb/config/league_utils.rb
|
|
19
23
|
lib/sportdb/config/season_utils.rb
|
20
24
|
lib/sportdb/config/variants.rb
|
21
25
|
lib/sportdb/config/version.rb
|
26
|
+
lib/sportdb/config/wiki_reader.rb
|
22
27
|
test/helper.rb
|
23
28
|
test/test_club_index.rb
|
24
29
|
test/test_club_reader.rb
|
@@ -29,3 +34,4 @@ test/test_league_reader.rb
|
|
29
34
|
test/test_league_utils.rb
|
30
35
|
test/test_season_utils.rb
|
31
36
|
test/test_variants.rb
|
37
|
+
test/test_wiki_reader.rb
|
data/README.md
CHANGED
@@ -10,8 +10,116 @@
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
+
Let's use the /clubs datasets (1500+ football clubs from around the world)
|
14
|
+
to match name "variants" e.g. `Arsenal` to canonical global unique
|
15
|
+
names e.g. `Arsenal FC, London, England`:
|
13
16
|
|
17
|
+
``` ruby
|
18
|
+
require 'sportdb/config'
|
14
19
|
|
20
|
+
## note: requires a local copy of the football.db clubs datasets
|
21
|
+
## see https://github.com/openfootball/clubs
|
22
|
+
SportDb::Import.config.clubs_dir = './clubs'
|
23
|
+
|
24
|
+
m = SportDb::Import.config.clubs.match( 'Arsenal' )
|
25
|
+
m.size # 3 club matches found
|
26
|
+
#=> 3
|
27
|
+
m[0].name; m[0].city; m[0].country
|
28
|
+
#=> "Arsenal FC", "London", "England"
|
29
|
+
m[1].name; m[1].city; m[1].country
|
30
|
+
#=> "Arsenal Tula", "Tula", "Russia"
|
31
|
+
m[2].name; m[2].city; m[2].country
|
32
|
+
#=> "Arsenal de Sarandí", "Sarandí", "Argentina"
|
33
|
+
|
34
|
+
|
35
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'eng' )
|
36
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
37
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal FC', country: 'eng' )
|
38
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal F.C.', country: 'eng' )
|
39
|
+
m = SportDb::Import.config.clubs.match_by( name: '...A.r.s.e.n.a.l... F.C...', country: 'eng' )
|
40
|
+
m.size # 1 club match found
|
41
|
+
#=> 1
|
42
|
+
m[0].name; m[0].city; m[0].country
|
43
|
+
#=> "Arsenal FC", "London", "England"
|
44
|
+
|
45
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ar' )
|
46
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
47
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal Sarandí', country: 'ar' )
|
48
|
+
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal Sarandi', country: 'ar' )
|
49
|
+
m.size # 1 club match found
|
50
|
+
#=> 1
|
51
|
+
m[0].name; m[0].city; m[0].country
|
52
|
+
#=> "Arsenal de Sarandí", "Sarandí", "Argentina"
|
53
|
+
|
54
|
+
|
55
|
+
# try some more
|
56
|
+
m = SportDb::Import.config.clubs.match( 'AZ' )
|
57
|
+
m[0].name; m[0].city; m[0].country
|
58
|
+
#=> "AZ Alkmaar", "Alkmaar", "Netherlands"
|
59
|
+
|
60
|
+
m = SportDb::Import.config.clubs.match( 'Bayern' )
|
61
|
+
# -or- try alternative names (and auto-generated spelling variants)
|
62
|
+
m = SportDb::Import.config.clubs.match( 'Bayern München' )
|
63
|
+
m = SportDb::Import.config.clubs.match( 'Bayern Munchen' )
|
64
|
+
m = SportDb::Import.config.clubs.match( 'Bayern Muenchen' )
|
65
|
+
m[0].name; m[0].city; m[0].country
|
66
|
+
#=> "Bayern München", "München", "Germany"
|
67
|
+
|
68
|
+
# and so on
|
69
|
+
# ...
|
70
|
+
```
|
71
|
+
|
72
|
+
Let's print all names that have duplicate (more than one) matching club:
|
73
|
+
|
74
|
+
``` ruby
|
75
|
+
SportDb::Import.config.clubs.mappings.each do |name, clubs|
|
76
|
+
if clubs.size > 1
|
77
|
+
puts "#{clubs.size} matching clubs for `#{name}`:"
|
78
|
+
clubs.each do |club|
|
79
|
+
puts " - #{club.name}, #{club.city}, #{club.country.name} (#{club.country.key})"
|
80
|
+
end
|
81
|
+
puts
|
82
|
+
end
|
83
|
+
end
|
84
|
+
```
|
85
|
+
|
86
|
+
resulting in:
|
87
|
+
|
88
|
+
```
|
89
|
+
2 matching clubs for `valencia`:
|
90
|
+
- Valencia FC, Léogâne, Haiti (ht)
|
91
|
+
- Valencia CF, Valencia, Spain (es)
|
92
|
+
|
93
|
+
2 matching clubs for `apollon`:
|
94
|
+
- Apollon Limassol FC, , Cyprus (cy)
|
95
|
+
- Apollon Smyrnis FC, Athens, Greece (gr)
|
96
|
+
|
97
|
+
3 matching clubs for `arsenal`:
|
98
|
+
- Arsenal FC, London, England (eng)
|
99
|
+
- Arsenal Tula, Tula, Russia (ru)
|
100
|
+
- Arsenal de Sarandí, Sarandí, Argentina (ar)
|
101
|
+
|
102
|
+
2 matching clubs for `liverpool`:
|
103
|
+
- Liverpool FC, Liverpool, England (eng)
|
104
|
+
- Liverpool Montevideo, Montevideo, Uruguay (uy)
|
105
|
+
|
106
|
+
2 matching clubs for `barcelona`:
|
107
|
+
- FC Barcelona, Barcelona, Spain (es)
|
108
|
+
- Barcelona Guayaquil, Guayaquil, Ecuador (ec)
|
109
|
+
|
110
|
+
3 matching clubs for `nacional`:
|
111
|
+
- CD Nacional Madeira, Funchal, Portugal (pt)
|
112
|
+
- Club Nacional, Asunción, Paraguay (py)
|
113
|
+
- Nacional de Montevideo, Montevideo, Uruguay (uy)
|
114
|
+
|
115
|
+
2 matching clubs for `sanjose`:
|
116
|
+
- San Jose Earthquakes, San Jose, United States (us)
|
117
|
+
- Club Deportivo San José, Oruro, Bolivia (bo)
|
118
|
+
|
119
|
+
...
|
120
|
+
```
|
121
|
+
|
122
|
+
That's it.
|
15
123
|
|
16
124
|
|
17
125
|
## License
|
data/config/world/ar.txt
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
===========================
|
2
|
+
= Argentina (ar)
|
3
|
+
|
4
|
+
Buenos Aires
|
5
|
+
Santa Fe | Provincia Santa Fe
|
6
|
+
Córdoba | Provincia Córdoba
|
7
|
+
Mendoza | Provincia Mendoza
|
8
|
+
San Juan | Provincia San Juan
|
9
|
+
Tucumán | Provincia Tucumán
|
10
|
+
Entre Ríos | Provincia Entre Ríos
|
11
|
+
Misiones | Provincia Misiones
|
data/config/world/at.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
=================================
|
2
|
+
= Österreich • Austria (at)
|
3
|
+
|
4
|
+
|
5
|
+
Wien | Vienna [en]
|
6
|
+
Burgenland
|
7
|
+
Niederösterreich
|
8
|
+
Oberösterreich
|
9
|
+
Steiermark
|
10
|
+
Kärnten
|
11
|
+
Salzburg
|
12
|
+
Tirol
|
13
|
+
Vorarlberg
|
14
|
+
|
15
|
+
|
16
|
+
== Niederösterreich ==
|
17
|
+
|
18
|
+
Wr. Neustadt | Wiener Neustadt
|
19
|
+
St. Pölten | Sankt Pölten
|
data/config/world/be.txt
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
==================================
|
2
|
+
= Belgium (be)
|
3
|
+
|
4
|
+
Brussels
|
5
|
+
|
6
|
+
Antwerpen › Vlaanderen | Antwerpen
|
7
|
+
Limburg › Vlaanderen | Limburg
|
8
|
+
Oost-Vlaanderen › Vlaanderen | Oost-Vlaanderen
|
9
|
+
West-Vlaanderen › Vlaanderen | West-Vlaanderen
|
10
|
+
|
11
|
+
Hainaut › Wallonie | Hainaut
|
12
|
+
Liège › Wallonie | Liège
|
13
|
+
|
14
|
+
|
15
|
+
== Hainaut › Wallonie ==
|
16
|
+
|
17
|
+
Mouscron [fr] | Moeskroen [nl]
|
18
|
+
Mons [fr] | Bergen [nl]
|
data/config/world/de.txt
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
============================================
|
2
|
+
= Germany • Deutschland (de)
|
3
|
+
|
4
|
+
Bayern
|
5
|
+
Nordrhein-Westfalen
|
6
|
+
Saarland
|
7
|
+
Niedersachsen
|
8
|
+
Brandenburg
|
9
|
+
Berlin
|
10
|
+
Hamburg
|
11
|
+
Bremen
|
12
|
+
Baden-Württemberg
|
13
|
+
Hessen
|
14
|
+
Rheinland-Pfalz
|
15
|
+
Schleswig-Holstein
|
16
|
+
Mecklenburg-Vorpommern
|
17
|
+
Sachsen
|
18
|
+
Sachsen-Anhalt
|
19
|
+
Thüringen
|
data/lib/sportdb/config.rb
CHANGED
data/lib/sportdb/config/clubs.rb
CHANGED
@@ -17,10 +17,26 @@ class Club
|
|
17
17
|
|
18
18
|
## special import only attribs
|
19
19
|
attr_accessor :alt_names_auto ## auto-generated alt names
|
20
|
+
attr_accessor :wikipedia # wikipedia page name (for english (en))
|
20
21
|
|
21
22
|
def historic?() @year_end ? true : false; end
|
22
23
|
alias_method :past?, :historic?
|
23
24
|
|
25
|
+
|
26
|
+
def wikipedia?() @wikipedia; end
|
27
|
+
def wikipedia_url
|
28
|
+
if @wikipedia
|
29
|
+
## note: replace spaces with underscore (-)
|
30
|
+
## e.g. Club Brugge KV => Club_Brugge_KV
|
31
|
+
## todo/check/fix:
|
32
|
+
## check if "plain" dash (-) needs to get replaced with typographic dash??
|
33
|
+
"https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
|
34
|
+
else
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
24
40
|
def initialize
|
25
41
|
@alt_names = []
|
26
42
|
@alt_names_auto = []
|
@@ -135,6 +151,25 @@ class ClubIndex
|
|
135
151
|
def strip_norm( name ) Club.strip_norm( name ); end
|
136
152
|
|
137
153
|
|
154
|
+
def add_wiki( rec_or_recs ) ## add wiki(pedia club record / links
|
155
|
+
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
156
|
+
|
157
|
+
recs.each do |rec|
|
158
|
+
m = match_by( name: rec.name, country: rec.country )
|
159
|
+
if m.nil?
|
160
|
+
puts "** !!! ERROR !!! - no matching club found for wiki(pedia) name >#{rec.name}, #{rec.country.name} (#{rec.country.key})<; sorry - to fix add name to clubs"
|
161
|
+
exit 1
|
162
|
+
end
|
163
|
+
if m.size > 1
|
164
|
+
puts "** !!! ERROR !!! - too many (greater than one) matching clubs found for wiki(pedia) name >#{rec.name}, #{rec.country.name} (#{rec.country.key})<"
|
165
|
+
pp m
|
166
|
+
exit 1
|
167
|
+
end
|
168
|
+
club = m[0]
|
169
|
+
club.wikipedia = rec.name
|
170
|
+
end
|
171
|
+
end # method add_wiki
|
172
|
+
|
138
173
|
|
139
174
|
def add( rec_or_recs ) ## add club record / alt_names
|
140
175
|
recs = rec_or_recs.is_a?( Array ) ? rec_or_recs : [rec_or_recs] ## wrap (single) rec in array
|
@@ -230,11 +265,18 @@ class ClubIndex
|
|
230
265
|
m = match( name )
|
231
266
|
if m ## filter by country
|
232
267
|
## note: country assumes / allows the country key or fifa code for now
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
268
|
+
|
269
|
+
## note: allow passing in of country struct too
|
270
|
+
country_rec = if country.is_a?( SportDb::Import::Country )
|
271
|
+
country ## (re)use country struct - no need to run lookup again
|
272
|
+
else
|
273
|
+
rec = SportDb::Import.config.countries[ country ]
|
274
|
+
if rec.nil?
|
275
|
+
puts "** !!! ERROR !!! - unknown country >#{country}< - no match found, sorry - add to world/countries.txt in config"
|
276
|
+
exit 1
|
277
|
+
end
|
278
|
+
rec
|
279
|
+
end
|
238
280
|
|
239
281
|
m = m.select { |club| club.country.key == country_rec.key }
|
240
282
|
m = nil if m.empty? ## note: reset to nil if no more matches
|
@@ -40,14 +40,21 @@ class Configuration
|
|
40
40
|
clubs\.txt$
|
41
41
|
}x
|
42
42
|
|
43
|
-
|
43
|
+
|
44
|
+
CLUBS_WIKI_REGEX = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
45
|
+
(?:[a-z]{1,3}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
46
|
+
clubs\.wiki\.txt$
|
47
|
+
}x
|
48
|
+
|
49
|
+
|
50
|
+
def find_clubs_datafiles( path, pattern )
|
44
51
|
datafiles = [] ## note: [country, path] pairs for now
|
45
52
|
|
46
53
|
## check all txt files as candidates (MUST include country code for now)
|
47
54
|
candidates = Dir.glob( "#{path}/**/*.txt" )
|
48
55
|
pp candidates
|
49
56
|
candidates.each do |candidate|
|
50
|
-
datafiles << candidate if
|
57
|
+
datafiles << candidate if pattern.match( candidate )
|
51
58
|
end
|
52
59
|
|
53
60
|
pp datafiles
|
@@ -65,7 +72,7 @@ class Configuration
|
|
65
72
|
|
66
73
|
## todo/fix: add to teamreader
|
67
74
|
## check that name and alt_names for a club are all unique (not duplicates)
|
68
|
-
datafiles = find_clubs_datafiles( clubs_dir )
|
75
|
+
datafiles = find_clubs_datafiles( clubs_dir, CLUBS_REGEX )
|
69
76
|
datafiles.each do |datafile|
|
70
77
|
recs += ClubReader.read( datafile )
|
71
78
|
end
|
@@ -74,6 +81,17 @@ class Configuration
|
|
74
81
|
clubs = ClubIndex.new
|
75
82
|
clubs.add( recs )
|
76
83
|
|
84
|
+
## add wiki(pedia) anchored links
|
85
|
+
recs = []
|
86
|
+
datafiles = find_clubs_datafiles( clubs_dir, CLUBS_WIKI_REGEX )
|
87
|
+
datafiles.each do |datafile|
|
88
|
+
recs += WikiReader.read( datafile )
|
89
|
+
end
|
90
|
+
|
91
|
+
pp recs
|
92
|
+
clubs.add_wiki( recs )
|
93
|
+
|
94
|
+
|
77
95
|
if clubs.errors?
|
78
96
|
puts ""
|
79
97
|
puts "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
|
4
|
+
module SportDb
|
5
|
+
module Import
|
6
|
+
|
7
|
+
|
8
|
+
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
+
|
10
|
+
class WikiClub
|
11
|
+
attr_reader :name, :country
|
12
|
+
def initialize( name, country )
|
13
|
+
@name, @country = name, country
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
19
|
+
txt = File.open( path, 'r:utf-8' ).read
|
20
|
+
parse( txt )
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
def self.parse( txt )
|
25
|
+
recs = []
|
26
|
+
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
27
|
+
|
28
|
+
txt.each_line do |line|
|
29
|
+
line = line.strip
|
30
|
+
|
31
|
+
next if line.empty?
|
32
|
+
next if line.start_with?( '#' ) ## skip comments too
|
33
|
+
|
34
|
+
## strip inline (until end-of-line) comments too
|
35
|
+
## e.g Eupen => KAS Eupen, ## [de]
|
36
|
+
## => Eupen => KAS Eupen,
|
37
|
+
line = line.sub( /#.*/, '' ).strip
|
38
|
+
pp line
|
39
|
+
|
40
|
+
|
41
|
+
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
42
|
+
|
43
|
+
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
44
|
+
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
45
|
+
if line =~ /^(={1,}) ## leading ======
|
46
|
+
([^=]+?) ## text (note: for now no "inline" = allowed)
|
47
|
+
=* ## (optional) trailing ====
|
48
|
+
$/x
|
49
|
+
heading_marker = $1
|
50
|
+
heading_level = $1.length ## count number of = for heading level
|
51
|
+
heading = $2.strip
|
52
|
+
|
53
|
+
puts "heading #{heading_level} >#{heading}<"
|
54
|
+
|
55
|
+
if heading_level > 1
|
56
|
+
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
57
|
+
exit 1
|
58
|
+
end
|
59
|
+
|
60
|
+
## quick hack: if level is 1 assume country for now
|
61
|
+
## and extract country code e.g.
|
62
|
+
## Austria (at) => at
|
63
|
+
## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
|
64
|
+
if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
|
65
|
+
country_code = $1
|
66
|
+
|
67
|
+
## check country code - MUST exist for now!!!!
|
68
|
+
country = SportDb::Import.config.countries[ country_code ]
|
69
|
+
if country.nil?
|
70
|
+
puts "** !!! ERROR [wiki reader] !!! - unknown country with code >#{country_code}< - sorry - add country to config to fix"
|
71
|
+
exit 1
|
72
|
+
end
|
73
|
+
|
74
|
+
last_country = country
|
75
|
+
else
|
76
|
+
puts "!!! error - heading level 1 - missing country code - >#{heading}<"
|
77
|
+
exit 1
|
78
|
+
end
|
79
|
+
pp last_country
|
80
|
+
else
|
81
|
+
## strip and squish (white)spaces
|
82
|
+
# e.g. New York FC (2011-) => New York FC (2011-)
|
83
|
+
value = line.strip.gsub( /[ \t]+/, ' ' )
|
84
|
+
|
85
|
+
## normalize (allow underscore (-) - replace with space)
|
86
|
+
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
87
|
+
value = value.gsub( '_', ' ' )
|
88
|
+
|
89
|
+
if last_country.nil?
|
90
|
+
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
91
|
+
exit 1
|
92
|
+
end
|
93
|
+
|
94
|
+
rec = WikiClub.new( value, last_country )
|
95
|
+
recs << rec
|
96
|
+
end
|
97
|
+
end # each_line
|
98
|
+
recs
|
99
|
+
end # method read
|
100
|
+
|
101
|
+
end # class WikiReader
|
102
|
+
|
103
|
+
end ## module Import
|
104
|
+
end ## module SportDb
|
data/test/test_club_index.rb
CHANGED
@@ -81,6 +81,20 @@ class TestClubIndex < MiniTest::Test
|
|
81
81
|
|
82
82
|
m = SportDb::Import.config.clubs.match( '...A.r.s.e.n.a.l... F.C...' )
|
83
83
|
assert_equal 2, m.size
|
84
|
+
|
85
|
+
|
86
|
+
##############################################
|
87
|
+
## test wikipedia names and links/urls
|
88
|
+
|
89
|
+
m = SportDb::Import.config.clubs.match( 'Club Brugge KV' )
|
90
|
+
assert_equal 1, m.size
|
91
|
+
assert_equal 'Club Brugge KV', m[0].wikipedia
|
92
|
+
assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
|
93
|
+
|
94
|
+
m = SportDb::Import.config.clubs.match( 'RSC Anderlecht' )
|
95
|
+
assert_equal 1, m.size
|
96
|
+
assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
|
97
|
+
assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
|
84
98
|
end
|
85
99
|
|
86
100
|
end # class TestClubIndex
|
data/test/test_config.rb
CHANGED
@@ -9,15 +9,40 @@ require 'helper'
|
|
9
9
|
|
10
10
|
class TestConfig < MiniTest::Test
|
11
11
|
|
12
|
-
def
|
12
|
+
def match_clubs( txt ) SportDb::Import::Configuration::CLUBS_REGEX.match( txt ); end
|
13
|
+
def match_clubs_wiki( txt ) SportDb::Import::Configuration::CLUBS_WIKI_REGEX.match( txt ); end
|
13
14
|
|
14
15
|
def test_find_clubs
|
15
|
-
assert
|
16
|
-
assert
|
17
|
-
assert
|
18
|
-
assert
|
19
|
-
assert
|
20
|
-
assert
|
16
|
+
assert match_clubs( 'de.clubs.txt' )
|
17
|
+
assert match_clubs( 'deutschland/de.clubs.txt' )
|
18
|
+
assert match_clubs( 'europe/de-deutschland/clubs.txt' )
|
19
|
+
assert match_clubs( 'de-deutschland/clubs.txt' )
|
20
|
+
assert match_clubs( 'clubs.txt' )
|
21
|
+
assert match_clubs( 'deutschland/clubs.txt' )
|
22
|
+
|
23
|
+
assert !match_clubs( 'de.clubs.wiki.txt' )
|
24
|
+
assert !match_clubs( 'deutschland/de.clubs.wiki.txt' )
|
25
|
+
assert !match_clubs( 'europe/de-deutschland/clubs.wiki.txt' )
|
26
|
+
assert !match_clubs( 'de-deutschland/clubs.wiki.txt' )
|
27
|
+
assert !match_clubs( 'clubs.wiki.txt' )
|
28
|
+
assert !match_clubs( 'deutschland/clubs.wiki.txt' )
|
29
|
+
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_find_clubs_wiki
|
33
|
+
assert !match_clubs_wiki( 'de.clubs.txt' )
|
34
|
+
assert !match_clubs_wiki( 'deutschland/de.clubs.txt' )
|
35
|
+
assert !match_clubs_wiki( 'europe/de-deutschland/clubs.txt' )
|
36
|
+
assert !match_clubs_wiki( 'de-deutschland/clubs.txt' )
|
37
|
+
assert !match_clubs_wiki( 'clubs.txt' )
|
38
|
+
assert !match_clubs_wiki( 'deutschland/clubs.txt' )
|
39
|
+
|
40
|
+
assert match_clubs_wiki( 'de.clubs.wiki.txt' )
|
41
|
+
assert match_clubs_wiki( 'deutschland/de.clubs.wiki.txt' )
|
42
|
+
assert match_clubs_wiki( 'europe/de-deutschland/clubs.wiki.txt' )
|
43
|
+
assert match_clubs_wiki( 'de-deutschland/clubs.wiki.txt' )
|
44
|
+
assert match_clubs_wiki( 'clubs.wiki.txt' )
|
45
|
+
assert match_clubs_wiki( 'deutschland/clubs.wiki.txt' )
|
21
46
|
end
|
22
47
|
|
23
48
|
end # class TestConfig
|
@@ -0,0 +1,77 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_wiki_reader.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestWikiReader < MiniTest::Test
|
11
|
+
|
12
|
+
def test_parse_at
|
13
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
14
|
+
===================================
|
15
|
+
= Albania (al)
|
16
|
+
|
17
|
+
FK Partizani Tirana
|
18
|
+
KF Tirana
|
19
|
+
FK Kukësi
|
20
|
+
KF Laçi
|
21
|
+
TXT
|
22
|
+
|
23
|
+
pp recs
|
24
|
+
|
25
|
+
assert_equal 4, recs.size
|
26
|
+
assert_equal 'FK Partizani Tirana', recs[0].name
|
27
|
+
assert_equal 'Albania', recs[0].country.name
|
28
|
+
assert_equal 'al', recs[0].country.key
|
29
|
+
end
|
30
|
+
|
31
|
+
|
32
|
+
def test_parse_be
|
33
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
34
|
+
===========================
|
35
|
+
= Belgium (be)
|
36
|
+
|
37
|
+
R.S.C._Anderlecht
|
38
|
+
Royal_Antwerp_F.C.
|
39
|
+
Cercle_Brugge_K.S.V.
|
40
|
+
R._Charleroi_S.C.
|
41
|
+
Club_Brugge_KV
|
42
|
+
TXT
|
43
|
+
|
44
|
+
pp recs
|
45
|
+
|
46
|
+
assert_equal 5, recs.size
|
47
|
+
assert_equal 'R.S.C. Anderlecht', recs[0].name
|
48
|
+
assert_equal 'Belgium', recs[0].country.name
|
49
|
+
assert_equal 'be', recs[0].country.key
|
50
|
+
end
|
51
|
+
|
52
|
+
def test_parse_world
|
53
|
+
recs = SportDb::Import::WikiReader.parse( <<TXT )
|
54
|
+
= Albania (al) =
|
55
|
+
|
56
|
+
FK Partizani Tirana
|
57
|
+
|
58
|
+
|
59
|
+
= Belgium (be) =
|
60
|
+
|
61
|
+
# some comments here
|
62
|
+
R.S.C._Anderlecht # some end-of-line comments here
|
63
|
+
TXT
|
64
|
+
|
65
|
+
pp recs
|
66
|
+
|
67
|
+
assert_equal 2, recs.size
|
68
|
+
assert_equal 'FK Partizani Tirana', recs[0].name
|
69
|
+
assert_equal 'Albania', recs[0].country.name
|
70
|
+
assert_equal 'al', recs[0].country.key
|
71
|
+
|
72
|
+
assert_equal 'R.S.C. Anderlecht', recs[1].name
|
73
|
+
assert_equal 'Belgium', recs[1].country.name
|
74
|
+
assert_equal 'be', recs[1].country.key
|
75
|
+
end
|
76
|
+
|
77
|
+
end # class TestWikiReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-config
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-08-
|
11
|
+
date: 2019-08-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: csvreader
|
@@ -64,7 +64,11 @@ extra_rdoc_files:
|
|
64
64
|
- config/leagues/fr.txt
|
65
65
|
- config/leagues/gr.txt
|
66
66
|
- config/leagues/sco.txt
|
67
|
+
- config/world/ar.txt
|
68
|
+
- config/world/at.txt
|
69
|
+
- config/world/be.txt
|
67
70
|
- config/world/countries.txt
|
71
|
+
- config/world/de.txt
|
68
72
|
- config/world/eng.txt
|
69
73
|
files:
|
70
74
|
- HISTORY.md
|
@@ -75,7 +79,11 @@ files:
|
|
75
79
|
- config/leagues/fr.txt
|
76
80
|
- config/leagues/gr.txt
|
77
81
|
- config/leagues/sco.txt
|
82
|
+
- config/world/ar.txt
|
83
|
+
- config/world/at.txt
|
84
|
+
- config/world/be.txt
|
78
85
|
- config/world/countries.txt
|
86
|
+
- config/world/de.txt
|
79
87
|
- config/world/eng.txt
|
80
88
|
- lib/sportdb/config.rb
|
81
89
|
- lib/sportdb/config/club_reader.rb
|
@@ -88,6 +96,7 @@ files:
|
|
88
96
|
- lib/sportdb/config/season_utils.rb
|
89
97
|
- lib/sportdb/config/variants.rb
|
90
98
|
- lib/sportdb/config/version.rb
|
99
|
+
- lib/sportdb/config/wiki_reader.rb
|
91
100
|
- test/helper.rb
|
92
101
|
- test/test_club_index.rb
|
93
102
|
- test/test_club_reader.rb
|
@@ -98,6 +107,7 @@ files:
|
|
98
107
|
- test/test_league_utils.rb
|
99
108
|
- test/test_season_utils.rb
|
100
109
|
- test/test_variants.rb
|
110
|
+
- test/test_wiki_reader.rb
|
101
111
|
homepage: https://github.com/sportdb/sport.db
|
102
112
|
licenses:
|
103
113
|
- Public Domain
|