sportdb-config 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/{HISTORY.md → CHANGELOG.md} +0 -0
- data/Manifest.txt +1 -11
- data/Rakefile +8 -4
- data/lib/sportdb/config.rb +9 -33
- data/lib/sportdb/config/config.rb +36 -4
- data/lib/sportdb/config/version.rb +1 -1
- data/test/test_clubs.rb +76 -20
- metadata +48 -16
- data/lib/sportdb/config/club.rb +0 -147
- data/lib/sportdb/config/club_index.rb +0 -212
- data/lib/sportdb/config/club_reader.rb +0 -278
- data/lib/sportdb/config/countries.rb +0 -58
- data/lib/sportdb/config/variants.rb +0 -185
- data/lib/sportdb/config/wiki_reader.rb +0 -104
- data/test/test_club_index.rb +0 -100
- data/test/test_club_reader.rb +0 -150
- data/test/test_variants.rb +0 -46
- data/test/test_wiki_reader.rb +0 -77
@@ -1,58 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
## built-in countries for (quick starter) auto-add
|
7
|
-
|
8
|
-
## note: (re)use the struct from the fifa country gem / library for now
|
9
|
-
Country = ::Fifa::Country
|
10
|
-
|
11
|
-
|
12
|
-
class CountryIndex
|
13
|
-
|
14
|
-
def initialize( recs )
|
15
|
-
@countries = []
|
16
|
-
@countries_by_code = {}
|
17
|
-
|
18
|
-
add( recs )
|
19
|
-
end
|
20
|
-
|
21
|
-
def add( recs )
|
22
|
-
###########################################
|
23
|
-
## auto-fill countries
|
24
|
-
## pp recs
|
25
|
-
recs.each do |rec|
|
26
|
-
## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
|
27
|
-
|
28
|
-
@countries << rec
|
29
|
-
|
30
|
-
## add codes lookups - key, fifa, ...
|
31
|
-
if @countries_by_code[ rec.key ]
|
32
|
-
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
33
|
-
exit 1
|
34
|
-
else
|
35
|
-
@countries_by_code[ rec.key ] = rec
|
36
|
-
end
|
37
|
-
|
38
|
-
## add fifa code (only) if different from key
|
39
|
-
if rec.key != rec.fifa.downcase
|
40
|
-
if @countries_by_code[ rec.fifa.downcase ]
|
41
|
-
puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
|
42
|
-
exit 1
|
43
|
-
else
|
44
|
-
@countries_by_code[ rec.fifa.downcase ] = rec
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end # method initialize
|
49
|
-
|
50
|
-
def []( key )
|
51
|
-
key = key.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
|
52
|
-
@countries_by_code[ key ]
|
53
|
-
end
|
54
|
-
end # class CountryIndex
|
55
|
-
|
56
|
-
|
57
|
-
end # module Import
|
58
|
-
end # module SportDb
|
@@ -1,185 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
class Variant ## (spelling) variant finder / builder for names
|
9
|
-
|
10
|
-
|
11
|
-
def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table
|
12
|
-
## calculate the frequency table of letters, digits, etc.
|
13
|
-
freq = Hash.new(0)
|
14
|
-
name.each_char do |ch|
|
15
|
-
freq[ch] += 1
|
16
|
-
end
|
17
|
-
freq
|
18
|
-
end
|
19
|
-
|
20
|
-
|
21
|
-
## "simple" translation
|
22
|
-
ALPHA_SPECIALS = {
|
23
|
-
'Ä'=>'A', 'ä'=>'a',
|
24
|
-
'Á'=>'A', 'á'=>'a',
|
25
|
-
'à'=>'a',
|
26
|
-
'ã'=>'a',
|
27
|
-
'â'=>'a',
|
28
|
-
'Å'=>'A', 'å'=>'a',
|
29
|
-
'æ'=>'ae',
|
30
|
-
'ā'=>'a',
|
31
|
-
'ă'=>'a',
|
32
|
-
'ą'=>'a',
|
33
|
-
|
34
|
-
'Ç' =>'C', 'ç'=>'c',
|
35
|
-
'ć'=>'c',
|
36
|
-
'Č'=>'C', 'č'=>'c',
|
37
|
-
|
38
|
-
'É'=>'E', 'é'=>'e',
|
39
|
-
'è'=>'e',
|
40
|
-
'ê'=>'e',
|
41
|
-
'ë'=>'e',
|
42
|
-
'ė'=>'e',
|
43
|
-
'ę'=>'e',
|
44
|
-
|
45
|
-
'ğ'=>'g',
|
46
|
-
|
47
|
-
'İ'=>'I',
|
48
|
-
'Í'=>'I', 'í'=>'i',
|
49
|
-
'î'=>'i',
|
50
|
-
'ī'=>'i',
|
51
|
-
'ı'=>'i',
|
52
|
-
|
53
|
-
'Ł'=>'L', 'ł'=>'l',
|
54
|
-
|
55
|
-
'ñ'=>'n',
|
56
|
-
'ń'=>'n',
|
57
|
-
'ň'=>'n',
|
58
|
-
|
59
|
-
'Ö'=>'O', 'ö'=>'o',
|
60
|
-
'ó'=>'o',
|
61
|
-
'õ'=>'o',
|
62
|
-
'ô'=>'o',
|
63
|
-
'ø'=>'o',
|
64
|
-
'ő'=>'o',
|
65
|
-
|
66
|
-
'ř'=>'r',
|
67
|
-
|
68
|
-
'Ś'=>'S',
|
69
|
-
'Ş'=>'S', 'ş'=>'s',
|
70
|
-
'Š'=>'S', 'š'=>'s',
|
71
|
-
'ș'=>'s', ## U+0219
|
72
|
-
'ß'=>'ss',
|
73
|
-
|
74
|
-
'ţ'=>'t', ## U+0163
|
75
|
-
'ț'=>'t', ## U+021B
|
76
|
-
'þ'=>'th',
|
77
|
-
|
78
|
-
'Ü'=>'U', 'ü'=>'u',
|
79
|
-
'Ú'=>'U', 'ú'=>'u',
|
80
|
-
'ū'=>'u',
|
81
|
-
|
82
|
-
'ý'=>'y',
|
83
|
-
|
84
|
-
'ź'=>'z',
|
85
|
-
'ż'=>'z',
|
86
|
-
'Ž'=>'Z', 'ž'=>'z',
|
87
|
-
}
|
88
|
-
|
89
|
-
|
90
|
-
## de,at,ch translation for umlauts
|
91
|
-
ALPHA_SPECIALS_DE = {
|
92
|
-
'Ä'=>'Ae', 'ä'=>'ae',
|
93
|
-
'Ö'=>'Oe', 'ö'=>'oe',
|
94
|
-
'Ü'=>'Ue', 'ü'=>'ue',
|
95
|
-
'ß'=>'ss',
|
96
|
-
}
|
97
|
-
|
98
|
-
## add ALPHA_SPECIALS_ES - why? why not? is Espanyol catalan spelling or spanish (castillian)?
|
99
|
-
# 'ñ'=>'ny', ## e.g. Español => Espanyol
|
100
|
-
|
101
|
-
ALPHA_DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
|
102
|
-
h[ch] = ch.downcase
|
103
|
-
h
|
104
|
-
end.merge(
|
105
|
-
'Ä'=>'ä',
|
106
|
-
'Á'=>'á',
|
107
|
-
'Å'=>'å',
|
108
|
-
|
109
|
-
'Ç'=>'ç',
|
110
|
-
'Č'=>'č',
|
111
|
-
|
112
|
-
'É'=>'é',
|
113
|
-
|
114
|
-
'İ'=>'?', ## fix - add lowercase
|
115
|
-
'Í'=>'í',
|
116
|
-
|
117
|
-
'Ł'=>'ł',
|
118
|
-
|
119
|
-
'Ö'=>'ö',
|
120
|
-
|
121
|
-
'Ś'=>'?', ## fix - add lowercase
|
122
|
-
'Ş'=>'ş',
|
123
|
-
'Š'=>'š',
|
124
|
-
|
125
|
-
'Ü'=>'ü',
|
126
|
-
'Ú'=>'ú',
|
127
|
-
|
128
|
-
'Ž'=>'ž',
|
129
|
-
)
|
130
|
-
|
131
|
-
|
132
|
-
def self.alpha_specials_count( freq, mapping )
|
133
|
-
mapping.keys.reduce(0) do |count,ch|
|
134
|
-
count += freq[ch]
|
135
|
-
count
|
136
|
-
end
|
137
|
-
end
|
138
|
-
|
139
|
-
def self.tr( name, mapping )
|
140
|
-
buf = String.new
|
141
|
-
name.each_char do |ch|
|
142
|
-
buf << if mapping[ch]
|
143
|
-
mapping[ch]
|
144
|
-
else
|
145
|
-
ch
|
146
|
-
end
|
147
|
-
end
|
148
|
-
buf
|
149
|
-
end
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
def self.find( name )
|
154
|
-
alt_names = []
|
155
|
-
|
156
|
-
freq = frequency_table( name )
|
157
|
-
|
158
|
-
if alpha_specials_count( freq, ALPHA_SPECIALS ) > 0 # check if includes äöü etc.
|
159
|
-
alt_names << tr( name, ALPHA_SPECIALS )
|
160
|
-
end
|
161
|
-
|
162
|
-
if alpha_specials_count( freq, ALPHA_SPECIALS_DE ) > 0 ## todo/fix: add / pass-in language/country code and check - why? why not?
|
163
|
-
alt_names << tr( name, ALPHA_SPECIALS_DE )
|
164
|
-
end
|
165
|
-
|
166
|
-
## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
|
167
|
-
alt_names = alt_names.uniq
|
168
|
-
alt_names
|
169
|
-
end
|
170
|
-
|
171
|
-
def self.downcase_i18n( name ) ## our very own downcase for int'l characters / letters
|
172
|
-
tr( name, ALPHA_DOWNCASE )
|
173
|
-
end
|
174
|
-
|
175
|
-
end # class Variant
|
176
|
-
|
177
|
-
end ## module Import
|
178
|
-
end ## module SportDb
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
## "global" convenience helper
|
183
|
-
def downcase_i18n( name )
|
184
|
-
SportDb::Import::Variant.downcase_i18n( name )
|
185
|
-
end # Variant
|
@@ -1,104 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module SportDb
|
5
|
-
module Import
|
6
|
-
|
7
|
-
|
8
|
-
class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
|
9
|
-
|
10
|
-
class WikiClub
|
11
|
-
attr_reader :name, :country
|
12
|
-
def initialize( name, country )
|
13
|
-
@name, @country = name, country
|
14
|
-
end
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
|
19
|
-
txt = File.open( path, 'r:utf-8' ).read
|
20
|
-
parse( txt )
|
21
|
-
end
|
22
|
-
|
23
|
-
|
24
|
-
def self.parse( txt )
|
25
|
-
recs = []
|
26
|
-
last_country = nil ## note: supports only one level of headings for now (and that is a country)
|
27
|
-
|
28
|
-
txt.each_line do |line|
|
29
|
-
line = line.strip
|
30
|
-
|
31
|
-
next if line.empty?
|
32
|
-
next if line.start_with?( '#' ) ## skip comments too
|
33
|
-
|
34
|
-
## strip inline (until end-of-line) comments too
|
35
|
-
## e.g Eupen => KAS Eupen, ## [de]
|
36
|
-
## => Eupen => KAS Eupen,
|
37
|
-
line = line.sub( /#.*/, '' ).strip
|
38
|
-
pp line
|
39
|
-
|
40
|
-
|
41
|
-
next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
|
42
|
-
|
43
|
-
## note: like in wikimedia markup (and markdown) all optional trailing ==== too
|
44
|
-
## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
|
45
|
-
if line =~ /^(={1,}) ## leading ======
|
46
|
-
([^=]+?) ## text (note: for now no "inline" = allowed)
|
47
|
-
=* ## (optional) trailing ====
|
48
|
-
$/x
|
49
|
-
heading_marker = $1
|
50
|
-
heading_level = $1.length ## count number of = for heading level
|
51
|
-
heading = $2.strip
|
52
|
-
|
53
|
-
puts "heading #{heading_level} >#{heading}<"
|
54
|
-
|
55
|
-
if heading_level > 1
|
56
|
-
puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
|
57
|
-
exit 1
|
58
|
-
end
|
59
|
-
|
60
|
-
## quick hack: if level is 1 assume country for now
|
61
|
-
## and extract country code e.g.
|
62
|
-
## Austria (at) => at
|
63
|
-
## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
|
64
|
-
if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
|
65
|
-
country_code = $1
|
66
|
-
|
67
|
-
## check country code - MUST exist for now!!!!
|
68
|
-
country = SportDb::Import.config.countries[ country_code ]
|
69
|
-
if country.nil?
|
70
|
-
puts "** !!! ERROR [wiki reader] !!! - unknown country with code >#{country_code}< - sorry - add country to config to fix"
|
71
|
-
exit 1
|
72
|
-
end
|
73
|
-
|
74
|
-
last_country = country
|
75
|
-
else
|
76
|
-
puts "!!! error - heading level 1 - missing country code - >#{heading}<"
|
77
|
-
exit 1
|
78
|
-
end
|
79
|
-
pp last_country
|
80
|
-
else
|
81
|
-
## strip and squish (white)spaces
|
82
|
-
# e.g. New York FC (2011-) => New York FC (2011-)
|
83
|
-
value = line.strip.gsub( /[ \t]+/, ' ' )
|
84
|
-
|
85
|
-
## normalize (allow underscore (-) - replace with space)
|
86
|
-
## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
|
87
|
-
value = value.gsub( '_', ' ' )
|
88
|
-
|
89
|
-
if last_country.nil?
|
90
|
-
puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
|
91
|
-
exit 1
|
92
|
-
end
|
93
|
-
|
94
|
-
rec = WikiClub.new( value, last_country )
|
95
|
-
recs << rec
|
96
|
-
end
|
97
|
-
end # each_line
|
98
|
-
recs
|
99
|
-
end # method read
|
100
|
-
|
101
|
-
end # class WikiReader
|
102
|
-
|
103
|
-
end ## module Import
|
104
|
-
end ## module SportDb
|
data/test/test_club_index.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_club_index.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestClubIndex < MiniTest::Test
|
11
|
-
|
12
|
-
def test_clubs
|
13
|
-
pp SportDb::Import.config.clubs.errors
|
14
|
-
|
15
|
-
SportDb::Import.config.clubs.dump_duplicates
|
16
|
-
|
17
|
-
m = SportDb::Import.config.clubs.match( 'Rapid Wien' )
|
18
|
-
assert_equal 'SK Rapid Wien', m[0].name
|
19
|
-
assert_equal 'Austria', m[0].country.name
|
20
|
-
assert_equal 'Wien', m[0].city
|
21
|
-
|
22
|
-
m = SportDb::Import.config.clubs.match( 'rapid wien' )
|
23
|
-
assert_equal 'SK Rapid Wien', m[0].name
|
24
|
-
assert_equal 'Austria', m[0].country.name
|
25
|
-
assert_equal 'Wien', m[0].city
|
26
|
-
|
27
|
-
## note: all dots (.) get always removed
|
28
|
-
m = SportDb::Import.config.clubs.match( '...r.a.p.i.d w.i.e.n...' )
|
29
|
-
assert_equal 'SK Rapid Wien', m[0].name
|
30
|
-
assert_equal 'Austria', m[0].country.name
|
31
|
-
assert_equal 'Wien', m[0].city
|
32
|
-
|
33
|
-
## note: all spaces and dashes (-) get always removed
|
34
|
-
m = SportDb::Import.config.clubs.match( '--- r a p i d w i e n ---' )
|
35
|
-
assert_equal 'SK Rapid Wien', m[0].name
|
36
|
-
assert_equal 'Austria', m[0].country.name
|
37
|
-
assert_equal 'Wien', m[0].city
|
38
|
-
|
39
|
-
m = SportDb::Import.config.clubs.match( 'RAPID WIEN' )
|
40
|
-
assert_equal 'SK Rapid Wien', m[0].name
|
41
|
-
assert_equal 'Austria', m[0].country.name
|
42
|
-
assert_equal 'Wien', m[0].city
|
43
|
-
|
44
|
-
|
45
|
-
c = SportDb::Import.config.clubs[ 'SK Rapid Wien' ] ## check canoncial name match (only)
|
46
|
-
assert_equal 'SK Rapid Wien', c.name
|
47
|
-
assert_equal 'Austria', c.country.name
|
48
|
-
assert_equal 'Wien', c.city
|
49
|
-
|
50
|
-
|
51
|
-
m = SportDb::Import.config.clubs.match( 'Arsenal' )
|
52
|
-
assert_equal 3, m.size
|
53
|
-
|
54
|
-
m = SportDb::Import.config.clubs.match( 'ARSENAL' )
|
55
|
-
assert_equal 3, m.size
|
56
|
-
|
57
|
-
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'eng' )
|
58
|
-
assert_equal 1, m.size
|
59
|
-
assert_equal 'Arsenal FC', m[0].name
|
60
|
-
assert_equal 'England', m[0].country.name
|
61
|
-
assert_equal 'London', m[0].city
|
62
|
-
|
63
|
-
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ar' )
|
64
|
-
assert_equal 1, m.size
|
65
|
-
assert_equal 'Arsenal de Sarandí', m[0].name
|
66
|
-
assert_equal 'Argentina', m[0].country.name
|
67
|
-
assert_equal 'Sarandí', m[0].city
|
68
|
-
|
69
|
-
m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ru' )
|
70
|
-
assert_equal 1, m.size
|
71
|
-
assert_equal 'Arsenal Tula', m[0].name
|
72
|
-
assert_equal 'Russia', m[0].country.name
|
73
|
-
assert_equal 'Tula', m[0].city
|
74
|
-
|
75
|
-
|
76
|
-
m = SportDb::Import.config.clubs.match( 'Arsenal FC' )
|
77
|
-
assert_equal 2, m.size
|
78
|
-
|
79
|
-
m = SportDb::Import.config.clubs.match( 'Arsenal F.C.' )
|
80
|
-
assert_equal 2, m.size
|
81
|
-
|
82
|
-
m = SportDb::Import.config.clubs.match( '...A.r.s.e.n.a.l... F.C...' )
|
83
|
-
assert_equal 2, m.size
|
84
|
-
|
85
|
-
|
86
|
-
##############################################
|
87
|
-
## test wikipedia names and links/urls
|
88
|
-
|
89
|
-
m = SportDb::Import.config.clubs.match( 'Club Brugge KV' )
|
90
|
-
assert_equal 1, m.size
|
91
|
-
assert_equal 'Club Brugge KV', m[0].wikipedia
|
92
|
-
assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
|
93
|
-
|
94
|
-
m = SportDb::Import.config.clubs.match( 'RSC Anderlecht' )
|
95
|
-
assert_equal 1, m.size
|
96
|
-
assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
|
97
|
-
assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
|
98
|
-
end
|
99
|
-
|
100
|
-
end # class TestClubIndex
|