sportdb-config 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,58 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
- ## built-in countries for (quick starter) auto-add
7
-
8
- ## note: (re)use the struct from the fifa country gem / library for now
9
- Country = ::Fifa::Country
10
-
11
-
12
- class CountryIndex
13
-
14
- def initialize( recs )
15
- @countries = []
16
- @countries_by_code = {}
17
-
18
- add( recs )
19
- end
20
-
21
- def add( recs )
22
- ###########################################
23
- ## auto-fill countries
24
- ## pp recs
25
- recs.each do |rec|
26
- ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
27
-
28
- @countries << rec
29
-
30
- ## add codes lookups - key, fifa, ...
31
- if @countries_by_code[ rec.key ]
32
- puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
33
- exit 1
34
- else
35
- @countries_by_code[ rec.key ] = rec
36
- end
37
-
38
- ## add fifa code (only) if different from key
39
- if rec.key != rec.fifa.downcase
40
- if @countries_by_code[ rec.fifa.downcase ]
41
- puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
42
- exit 1
43
- else
44
- @countries_by_code[ rec.fifa.downcase ] = rec
45
- end
46
- end
47
- end
48
- end # method initialize
49
-
50
- def []( key )
51
- key = key.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
52
- @countries_by_code[ key ]
53
- end
54
- end # class CountryIndex
55
-
56
-
57
- end # module Import
58
- end # module SportDb
@@ -1,185 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
-
8
- class Variant ## (spelling) variant finder / builder for names
9
-
10
-
11
- def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table
12
- ## calculate the frequency table of letters, digits, etc.
13
- freq = Hash.new(0)
14
- name.each_char do |ch|
15
- freq[ch] += 1
16
- end
17
- freq
18
- end
19
-
20
-
21
- ## "simple" translation
22
- ALPHA_SPECIALS = {
23
- 'Ä'=>'A', 'ä'=>'a',
24
- 'Á'=>'A', 'á'=>'a',
25
- 'à'=>'a',
26
- 'ã'=>'a',
27
- 'â'=>'a',
28
- 'Å'=>'A', 'å'=>'a',
29
- 'æ'=>'ae',
30
- 'ā'=>'a',
31
- 'ă'=>'a',
32
- 'ą'=>'a',
33
-
34
- 'Ç' =>'C', 'ç'=>'c',
35
- 'ć'=>'c',
36
- 'Č'=>'C', 'č'=>'c',
37
-
38
- 'É'=>'E', 'é'=>'e',
39
- 'è'=>'e',
40
- 'ê'=>'e',
41
- 'ë'=>'e',
42
- 'ė'=>'e',
43
- 'ę'=>'e',
44
-
45
- 'ğ'=>'g',
46
-
47
- 'İ'=>'I',
48
- 'Í'=>'I', 'í'=>'i',
49
- 'î'=>'i',
50
- 'ī'=>'i',
51
- 'ı'=>'i',
52
-
53
- 'Ł'=>'L', 'ł'=>'l',
54
-
55
- 'ñ'=>'n',
56
- 'ń'=>'n',
57
- 'ň'=>'n',
58
-
59
- 'Ö'=>'O', 'ö'=>'o',
60
- 'ó'=>'o',
61
- 'õ'=>'o',
62
- 'ô'=>'o',
63
- 'ø'=>'o',
64
- 'ő'=>'o',
65
-
66
- 'ř'=>'r',
67
-
68
- 'Ś'=>'S',
69
- 'Ş'=>'S', 'ş'=>'s',
70
- 'Š'=>'S', 'š'=>'s',
71
- 'ș'=>'s', ## U+0219
72
- 'ß'=>'ss',
73
-
74
- 'ţ'=>'t', ## U+0163
75
- 'ț'=>'t', ## U+021B
76
- 'þ'=>'th',
77
-
78
- 'Ü'=>'U', 'ü'=>'u',
79
- 'Ú'=>'U', 'ú'=>'u',
80
- 'ū'=>'u',
81
-
82
- 'ý'=>'y',
83
-
84
- 'ź'=>'z',
85
- 'ż'=>'z',
86
- 'Ž'=>'Z', 'ž'=>'z',
87
- }
88
-
89
-
90
- ## de,at,ch translation for umlauts
91
- ALPHA_SPECIALS_DE = {
92
- 'Ä'=>'Ae', 'ä'=>'ae',
93
- 'Ö'=>'Oe', 'ö'=>'oe',
94
- 'Ü'=>'Ue', 'ü'=>'ue',
95
- 'ß'=>'ss',
96
- }
97
-
98
- ## add ALPHA_SPECIALS_ES - why? why not? is Espanyol catalan spelling or spanish (castillian)?
99
- # 'ñ'=>'ny', ## e.g. Español => Espanyol
100
-
101
- ALPHA_DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
102
- h[ch] = ch.downcase
103
- h
104
- end.merge(
105
- 'Ä'=>'ä',
106
- 'Á'=>'á',
107
- 'Å'=>'å',
108
-
109
- 'Ç'=>'ç',
110
- 'Č'=>'č',
111
-
112
- 'É'=>'é',
113
-
114
- 'İ'=>'?', ## fix - add lowercase
115
- 'Í'=>'í',
116
-
117
- 'Ł'=>'ł',
118
-
119
- 'Ö'=>'ö',
120
-
121
- 'Ś'=>'?', ## fix - add lowercase
122
- 'Ş'=>'ş',
123
- 'Š'=>'š',
124
-
125
- 'Ü'=>'ü',
126
- 'Ú'=>'ú',
127
-
128
- 'Ž'=>'ž',
129
- )
130
-
131
-
132
- def self.alpha_specials_count( freq, mapping )
133
- mapping.keys.reduce(0) do |count,ch|
134
- count += freq[ch]
135
- count
136
- end
137
- end
138
-
139
- def self.tr( name, mapping )
140
- buf = String.new
141
- name.each_char do |ch|
142
- buf << if mapping[ch]
143
- mapping[ch]
144
- else
145
- ch
146
- end
147
- end
148
- buf
149
- end
150
-
151
-
152
-
153
- def self.find( name )
154
- alt_names = []
155
-
156
- freq = frequency_table( name )
157
-
158
- if alpha_specials_count( freq, ALPHA_SPECIALS ) > 0 # check if includes äöü etc.
159
- alt_names << tr( name, ALPHA_SPECIALS )
160
- end
161
-
162
- if alpha_specials_count( freq, ALPHA_SPECIALS_DE ) > 0 ## todo/fix: add / pass-in language/country code and check - why? why not?
163
- alt_names << tr( name, ALPHA_SPECIALS_DE )
164
- end
165
-
166
- ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
167
- alt_names = alt_names.uniq
168
- alt_names
169
- end
170
-
171
- def self.downcase_i18n( name ) ## our very own downcase for int'l characters / letters
172
- tr( name, ALPHA_DOWNCASE )
173
- end
174
-
175
- end # class Variant
176
-
177
- end ## module Import
178
- end ## module SportDb
179
-
180
-
181
-
182
- ## "global" convenience helper
183
- def downcase_i18n( name )
184
- SportDb::Import::Variant.downcase_i18n( name )
185
- end # Variant
@@ -1,104 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
9
-
10
- class WikiClub
11
- attr_reader :name, :country
12
- def initialize( name, country )
13
- @name, @country = name, country
14
- end
15
- end
16
-
17
-
18
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
19
- txt = File.open( path, 'r:utf-8' ).read
20
- parse( txt )
21
- end
22
-
23
-
24
- def self.parse( txt )
25
- recs = []
26
- last_country = nil ## note: supports only one level of headings for now (and that is a country)
27
-
28
- txt.each_line do |line|
29
- line = line.strip
30
-
31
- next if line.empty?
32
- next if line.start_with?( '#' ) ## skip comments too
33
-
34
- ## strip inline (until end-of-line) comments too
35
- ## e.g Eupen => KAS Eupen, ## [de]
36
- ## => Eupen => KAS Eupen,
37
- line = line.sub( /#.*/, '' ).strip
38
- pp line
39
-
40
-
41
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
42
-
43
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
44
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
45
- if line =~ /^(={1,}) ## leading ======
46
- ([^=]+?) ## text (note: for now no "inline" = allowed)
47
- =* ## (optional) trailing ====
48
- $/x
49
- heading_marker = $1
50
- heading_level = $1.length ## count number of = for heading level
51
- heading = $2.strip
52
-
53
- puts "heading #{heading_level} >#{heading}<"
54
-
55
- if heading_level > 1
56
- puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
57
- exit 1
58
- end
59
-
60
- ## quick hack: if level is 1 assume country for now
61
- ## and extract country code e.g.
62
- ## Austria (at) => at
63
- ## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
64
- if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
65
- country_code = $1
66
-
67
- ## check country code - MUST exist for now!!!!
68
- country = SportDb::Import.config.countries[ country_code ]
69
- if country.nil?
70
- puts "** !!! ERROR [wiki reader] !!! - unknown country with code >#{country_code}< - sorry - add country to config to fix"
71
- exit 1
72
- end
73
-
74
- last_country = country
75
- else
76
- puts "!!! error - heading level 1 - missing country code - >#{heading}<"
77
- exit 1
78
- end
79
- pp last_country
80
- else
81
- ## strip and squish (white)spaces
82
- # e.g. New York FC (2011-) => New York FC (2011-)
83
- value = line.strip.gsub( /[ \t]+/, ' ' )
84
-
85
- ## normalize (allow underscore (-) - replace with space)
86
- ## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
87
- value = value.gsub( '_', ' ' )
88
-
89
- if last_country.nil?
90
- puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
91
- exit 1
92
- end
93
-
94
- rec = WikiClub.new( value, last_country )
95
- recs << rec
96
- end
97
- end # each_line
98
- recs
99
- end # method read
100
-
101
- end # class WikiReader
102
-
103
- end ## module Import
104
- end ## module SportDb
@@ -1,100 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_club_index.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestClubIndex < MiniTest::Test
11
-
12
- def test_clubs
13
- pp SportDb::Import.config.clubs.errors
14
-
15
- SportDb::Import.config.clubs.dump_duplicates
16
-
17
- m = SportDb::Import.config.clubs.match( 'Rapid Wien' )
18
- assert_equal 'SK Rapid Wien', m[0].name
19
- assert_equal 'Austria', m[0].country.name
20
- assert_equal 'Wien', m[0].city
21
-
22
- m = SportDb::Import.config.clubs.match( 'rapid wien' )
23
- assert_equal 'SK Rapid Wien', m[0].name
24
- assert_equal 'Austria', m[0].country.name
25
- assert_equal 'Wien', m[0].city
26
-
27
- ## note: all dots (.) get always removed
28
- m = SportDb::Import.config.clubs.match( '...r.a.p.i.d w.i.e.n...' )
29
- assert_equal 'SK Rapid Wien', m[0].name
30
- assert_equal 'Austria', m[0].country.name
31
- assert_equal 'Wien', m[0].city
32
-
33
- ## note: all spaces and dashes (-) get always removed
34
- m = SportDb::Import.config.clubs.match( '--- r a p i d w i e n ---' )
35
- assert_equal 'SK Rapid Wien', m[0].name
36
- assert_equal 'Austria', m[0].country.name
37
- assert_equal 'Wien', m[0].city
38
-
39
- m = SportDb::Import.config.clubs.match( 'RAPID WIEN' )
40
- assert_equal 'SK Rapid Wien', m[0].name
41
- assert_equal 'Austria', m[0].country.name
42
- assert_equal 'Wien', m[0].city
43
-
44
-
45
- c = SportDb::Import.config.clubs[ 'SK Rapid Wien' ] ## check canoncial name match (only)
46
- assert_equal 'SK Rapid Wien', c.name
47
- assert_equal 'Austria', c.country.name
48
- assert_equal 'Wien', c.city
49
-
50
-
51
- m = SportDb::Import.config.clubs.match( 'Arsenal' )
52
- assert_equal 3, m.size
53
-
54
- m = SportDb::Import.config.clubs.match( 'ARSENAL' )
55
- assert_equal 3, m.size
56
-
57
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'eng' )
58
- assert_equal 1, m.size
59
- assert_equal 'Arsenal FC', m[0].name
60
- assert_equal 'England', m[0].country.name
61
- assert_equal 'London', m[0].city
62
-
63
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ar' )
64
- assert_equal 1, m.size
65
- assert_equal 'Arsenal de Sarandí', m[0].name
66
- assert_equal 'Argentina', m[0].country.name
67
- assert_equal 'Sarandí', m[0].city
68
-
69
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ru' )
70
- assert_equal 1, m.size
71
- assert_equal 'Arsenal Tula', m[0].name
72
- assert_equal 'Russia', m[0].country.name
73
- assert_equal 'Tula', m[0].city
74
-
75
-
76
- m = SportDb::Import.config.clubs.match( 'Arsenal FC' )
77
- assert_equal 2, m.size
78
-
79
- m = SportDb::Import.config.clubs.match( 'Arsenal F.C.' )
80
- assert_equal 2, m.size
81
-
82
- m = SportDb::Import.config.clubs.match( '...A.r.s.e.n.a.l... F.C...' )
83
- assert_equal 2, m.size
84
-
85
-
86
- ##############################################
87
- ## test wikipedia names and links/urls
88
-
89
- m = SportDb::Import.config.clubs.match( 'Club Brugge KV' )
90
- assert_equal 1, m.size
91
- assert_equal 'Club Brugge KV', m[0].wikipedia
92
- assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
93
-
94
- m = SportDb::Import.config.clubs.match( 'RSC Anderlecht' )
95
- assert_equal 1, m.size
96
- assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
97
- assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
98
- end
99
-
100
- end # class TestClubIndex