sportdb-config 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,58 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
- ## built-in countries for (quick starter) auto-add
7
-
8
- ## note: (re)use the struct from the fifa country gem / library for now
9
- Country = ::Fifa::Country
10
-
11
-
12
- class CountryIndex
13
-
14
- def initialize( recs )
15
- @countries = []
16
- @countries_by_code = {}
17
-
18
- add( recs )
19
- end
20
-
21
- def add( recs )
22
- ###########################################
23
- ## auto-fill countries
24
- ## pp recs
25
- recs.each do |rec|
26
- ## rec e.g. { key:'af', fifa:'AFG', name:'Afghanistan'}
27
-
28
- @countries << rec
29
-
30
- ## add codes lookups - key, fifa, ...
31
- if @countries_by_code[ rec.key ]
32
- puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
33
- exit 1
34
- else
35
- @countries_by_code[ rec.key ] = rec
36
- end
37
-
38
- ## add fifa code (only) if different from key
39
- if rec.key != rec.fifa.downcase
40
- if @countries_by_code[ rec.fifa.downcase ]
41
- puts "** !! ERROR !! country code (fifa) >#{rec.fifa.downcase}< already exits!!"
42
- exit 1
43
- else
44
- @countries_by_code[ rec.fifa.downcase ] = rec
45
- end
46
- end
47
- end
48
- end # method initialize
49
-
50
- def []( key )
51
- key = key.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
52
- @countries_by_code[ key ]
53
- end
54
- end # class CountryIndex
55
-
56
-
57
- end # module Import
58
- end # module SportDb
@@ -1,185 +0,0 @@
1
- # encoding: utf-8
2
-
3
- module SportDb
4
- module Import
5
-
6
-
7
-
8
- class Variant ## (spelling) variant finder / builder for names
9
-
10
-
11
- def self.frequency_table( name ) ## todo/check: use/rename to char_frequency_table
12
- ## calculate the frequency table of letters, digits, etc.
13
- freq = Hash.new(0)
14
- name.each_char do |ch|
15
- freq[ch] += 1
16
- end
17
- freq
18
- end
19
-
20
-
21
- ## "simple" translation
22
- ALPHA_SPECIALS = {
23
- 'Ä'=>'A', 'ä'=>'a',
24
- 'Á'=>'A', 'á'=>'a',
25
- 'à'=>'a',
26
- 'ã'=>'a',
27
- 'â'=>'a',
28
- 'Å'=>'A', 'å'=>'a',
29
- 'æ'=>'ae',
30
- 'ā'=>'a',
31
- 'ă'=>'a',
32
- 'ą'=>'a',
33
-
34
- 'Ç' =>'C', 'ç'=>'c',
35
- 'ć'=>'c',
36
- 'Č'=>'C', 'č'=>'c',
37
-
38
- 'É'=>'E', 'é'=>'e',
39
- 'è'=>'e',
40
- 'ê'=>'e',
41
- 'ë'=>'e',
42
- 'ė'=>'e',
43
- 'ę'=>'e',
44
-
45
- 'ğ'=>'g',
46
-
47
- 'İ'=>'I',
48
- 'Í'=>'I', 'í'=>'i',
49
- 'î'=>'i',
50
- 'ī'=>'i',
51
- 'ı'=>'i',
52
-
53
- 'Ł'=>'L', 'ł'=>'l',
54
-
55
- 'ñ'=>'n',
56
- 'ń'=>'n',
57
- 'ň'=>'n',
58
-
59
- 'Ö'=>'O', 'ö'=>'o',
60
- 'ó'=>'o',
61
- 'õ'=>'o',
62
- 'ô'=>'o',
63
- 'ø'=>'o',
64
- 'ő'=>'o',
65
-
66
- 'ř'=>'r',
67
-
68
- 'Ś'=>'S',
69
- 'Ş'=>'S', 'ş'=>'s',
70
- 'Š'=>'S', 'š'=>'s',
71
- 'ș'=>'s', ## U+0219
72
- 'ß'=>'ss',
73
-
74
- 'ţ'=>'t', ## U+0163
75
- 'ț'=>'t', ## U+021B
76
- 'þ'=>'th',
77
-
78
- 'Ü'=>'U', 'ü'=>'u',
79
- 'Ú'=>'U', 'ú'=>'u',
80
- 'ū'=>'u',
81
-
82
- 'ý'=>'y',
83
-
84
- 'ź'=>'z',
85
- 'ż'=>'z',
86
- 'Ž'=>'Z', 'ž'=>'z',
87
- }
88
-
89
-
90
- ## de,at,ch translation for umlauts
91
- ALPHA_SPECIALS_DE = {
92
- 'Ä'=>'Ae', 'ä'=>'ae',
93
- 'Ö'=>'Oe', 'ö'=>'oe',
94
- 'Ü'=>'Ue', 'ü'=>'ue',
95
- 'ß'=>'ss',
96
- }
97
-
98
- ## add ALPHA_SPECIALS_ES - why? why not? is Espanyol catalan spelling or spanish (castillian)?
99
- # 'ñ'=>'ny', ## e.g. Español => Espanyol
100
-
101
- ALPHA_DOWNCASE = %w[A B C D E F G H I J K L M N O P Q R S T U V W X Y Z].reduce({}) do |h,ch|
102
- h[ch] = ch.downcase
103
- h
104
- end.merge(
105
- 'Ä'=>'ä',
106
- 'Á'=>'á',
107
- 'Å'=>'å',
108
-
109
- 'Ç'=>'ç',
110
- 'Č'=>'č',
111
-
112
- 'É'=>'é',
113
-
114
- 'İ'=>'?', ## fix - add lowercase
115
- 'Í'=>'í',
116
-
117
- 'Ł'=>'ł',
118
-
119
- 'Ö'=>'ö',
120
-
121
- 'Ś'=>'?', ## fix - add lowercase
122
- 'Ş'=>'ş',
123
- 'Š'=>'š',
124
-
125
- 'Ü'=>'ü',
126
- 'Ú'=>'ú',
127
-
128
- 'Ž'=>'ž',
129
- )
130
-
131
-
132
- def self.alpha_specials_count( freq, mapping )
133
- mapping.keys.reduce(0) do |count,ch|
134
- count += freq[ch]
135
- count
136
- end
137
- end
138
-
139
- def self.tr( name, mapping )
140
- buf = String.new
141
- name.each_char do |ch|
142
- buf << if mapping[ch]
143
- mapping[ch]
144
- else
145
- ch
146
- end
147
- end
148
- buf
149
- end
150
-
151
-
152
-
153
- def self.find( name )
154
- alt_names = []
155
-
156
- freq = frequency_table( name )
157
-
158
- if alpha_specials_count( freq, ALPHA_SPECIALS ) > 0 # check if includes äöü etc.
159
- alt_names << tr( name, ALPHA_SPECIALS )
160
- end
161
-
162
- if alpha_specials_count( freq, ALPHA_SPECIALS_DE ) > 0 ## todo/fix: add / pass-in language/country code and check - why? why not?
163
- alt_names << tr( name, ALPHA_SPECIALS_DE )
164
- end
165
-
166
- ## todo - make uniq e.g. Preußen is Preussen, Preussen 2x
167
- alt_names = alt_names.uniq
168
- alt_names
169
- end
170
-
171
- def self.downcase_i18n( name ) ## our very own downcase for int'l characters / letters
172
- tr( name, ALPHA_DOWNCASE )
173
- end
174
-
175
- end # class Variant
176
-
177
- end ## module Import
178
- end ## module SportDb
179
-
180
-
181
-
182
- ## "global" convenience helper
183
- def downcase_i18n( name )
184
- SportDb::Import::Variant.downcase_i18n( name )
185
- end # Variant
@@ -1,104 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module SportDb
5
- module Import
6
-
7
-
8
- class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
9
-
10
- class WikiClub
11
- attr_reader :name, :country
12
- def initialize( name, country )
13
- @name, @country = name, country
14
- end
15
- end
16
-
17
-
18
- def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
19
- txt = File.open( path, 'r:utf-8' ).read
20
- parse( txt )
21
- end
22
-
23
-
24
- def self.parse( txt )
25
- recs = []
26
- last_country = nil ## note: supports only one level of headings for now (and that is a country)
27
-
28
- txt.each_line do |line|
29
- line = line.strip
30
-
31
- next if line.empty?
32
- next if line.start_with?( '#' ) ## skip comments too
33
-
34
- ## strip inline (until end-of-line) comments too
35
- ## e.g Eupen => KAS Eupen, ## [de]
36
- ## => Eupen => KAS Eupen,
37
- line = line.sub( /#.*/, '' ).strip
38
- pp line
39
-
40
-
41
- next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
42
-
43
- ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
44
- ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
45
- if line =~ /^(={1,}) ## leading ======
46
- ([^=]+?) ## text (note: for now no "inline" = allowed)
47
- =* ## (optional) trailing ====
48
- $/x
49
- heading_marker = $1
50
- heading_level = $1.length ## count number of = for heading level
51
- heading = $2.strip
52
-
53
- puts "heading #{heading_level} >#{heading}<"
54
-
55
- if heading_level > 1
56
- puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
57
- exit 1
58
- end
59
-
60
- ## quick hack: if level is 1 assume country for now
61
- ## and extract country code e.g.
62
- ## Austria (at) => at
63
- ## todo/fix: allow code only e.g. at or aut without enclosing () too - why? why not?
64
- if heading =~ /\(([a-z]{2,3})\)/i ## note allow (at) or (AUT) too
65
- country_code = $1
66
-
67
- ## check country code - MUST exist for now!!!!
68
- country = SportDb::Import.config.countries[ country_code ]
69
- if country.nil?
70
- puts "** !!! ERROR [wiki reader] !!! - unknown country with code >#{country_code}< - sorry - add country to config to fix"
71
- exit 1
72
- end
73
-
74
- last_country = country
75
- else
76
- puts "!!! error - heading level 1 - missing country code - >#{heading}<"
77
- exit 1
78
- end
79
- pp last_country
80
- else
81
- ## strip and squish (white)spaces
82
- # e.g. New York FC (2011-) => New York FC (2011-)
83
- value = line.strip.gsub( /[ \t]+/, ' ' )
84
-
85
- ## normalize (allow underscore (-) - replace with space)
86
- ## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
87
- value = value.gsub( '_', ' ' )
88
-
89
- if last_country.nil?
90
- puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
91
- exit 1
92
- end
93
-
94
- rec = WikiClub.new( value, last_country )
95
- recs << rec
96
- end
97
- end # each_line
98
- recs
99
- end # method read
100
-
101
- end # class WikiReader
102
-
103
- end ## module Import
104
- end ## module SportDb
@@ -1,100 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_club_index.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestClubIndex < MiniTest::Test
11
-
12
- def test_clubs
13
- pp SportDb::Import.config.clubs.errors
14
-
15
- SportDb::Import.config.clubs.dump_duplicates
16
-
17
- m = SportDb::Import.config.clubs.match( 'Rapid Wien' )
18
- assert_equal 'SK Rapid Wien', m[0].name
19
- assert_equal 'Austria', m[0].country.name
20
- assert_equal 'Wien', m[0].city
21
-
22
- m = SportDb::Import.config.clubs.match( 'rapid wien' )
23
- assert_equal 'SK Rapid Wien', m[0].name
24
- assert_equal 'Austria', m[0].country.name
25
- assert_equal 'Wien', m[0].city
26
-
27
- ## note: all dots (.) get always removed
28
- m = SportDb::Import.config.clubs.match( '...r.a.p.i.d w.i.e.n...' )
29
- assert_equal 'SK Rapid Wien', m[0].name
30
- assert_equal 'Austria', m[0].country.name
31
- assert_equal 'Wien', m[0].city
32
-
33
- ## note: all spaces and dashes (-) get always removed
34
- m = SportDb::Import.config.clubs.match( '--- r a p i d w i e n ---' )
35
- assert_equal 'SK Rapid Wien', m[0].name
36
- assert_equal 'Austria', m[0].country.name
37
- assert_equal 'Wien', m[0].city
38
-
39
- m = SportDb::Import.config.clubs.match( 'RAPID WIEN' )
40
- assert_equal 'SK Rapid Wien', m[0].name
41
- assert_equal 'Austria', m[0].country.name
42
- assert_equal 'Wien', m[0].city
43
-
44
-
45
- c = SportDb::Import.config.clubs[ 'SK Rapid Wien' ] ## check canoncial name match (only)
46
- assert_equal 'SK Rapid Wien', c.name
47
- assert_equal 'Austria', c.country.name
48
- assert_equal 'Wien', c.city
49
-
50
-
51
- m = SportDb::Import.config.clubs.match( 'Arsenal' )
52
- assert_equal 3, m.size
53
-
54
- m = SportDb::Import.config.clubs.match( 'ARSENAL' )
55
- assert_equal 3, m.size
56
-
57
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'eng' )
58
- assert_equal 1, m.size
59
- assert_equal 'Arsenal FC', m[0].name
60
- assert_equal 'England', m[0].country.name
61
- assert_equal 'London', m[0].city
62
-
63
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ar' )
64
- assert_equal 1, m.size
65
- assert_equal 'Arsenal de Sarandí', m[0].name
66
- assert_equal 'Argentina', m[0].country.name
67
- assert_equal 'Sarandí', m[0].city
68
-
69
- m = SportDb::Import.config.clubs.match_by( name: 'Arsenal', country: 'ru' )
70
- assert_equal 1, m.size
71
- assert_equal 'Arsenal Tula', m[0].name
72
- assert_equal 'Russia', m[0].country.name
73
- assert_equal 'Tula', m[0].city
74
-
75
-
76
- m = SportDb::Import.config.clubs.match( 'Arsenal FC' )
77
- assert_equal 2, m.size
78
-
79
- m = SportDb::Import.config.clubs.match( 'Arsenal F.C.' )
80
- assert_equal 2, m.size
81
-
82
- m = SportDb::Import.config.clubs.match( '...A.r.s.e.n.a.l... F.C...' )
83
- assert_equal 2, m.size
84
-
85
-
86
- ##############################################
87
- ## test wikipedia names and links/urls
88
-
89
- m = SportDb::Import.config.clubs.match( 'Club Brugge KV' )
90
- assert_equal 1, m.size
91
- assert_equal 'Club Brugge KV', m[0].wikipedia
92
- assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
93
-
94
- m = SportDb::Import.config.clubs.match( 'RSC Anderlecht' )
95
- assert_equal 1, m.size
96
- assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
97
- assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
98
- end
99
-
100
- end # class TestClubIndex