sportdb-formats 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,75 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class ClubPropsReader
9
+
10
+ def catalog() Import.catalog; end
11
+
12
+
13
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
14
+ txt = File.open( path, 'r:utf-8' ) {|f| f.read }
15
+ parse( txt )
16
+ end
17
+
18
+ def self.parse( txt )
19
+ new( txt ).parse
20
+ end
21
+
22
+
23
+ def initialize( txt )
24
+ @txt = txt
25
+ end
26
+
27
+ def parse
28
+ recs = parse_csv( @txt )
29
+ recs.each do |rec|
30
+ name = rec[:name]
31
+ if name.nil?
32
+ puts "** !!! ERROR !!! Name column required / missing / NOT found in row:"
33
+ pp rec
34
+ exit 1
35
+ end
36
+
37
+ ## find / match club by (canocial) name
38
+ m = catalog.clubs.match( name )
39
+ if m && m.size > 1
40
+ puts "** !!! WARN !!! ambigious (multiple) club matches (#{m.size}) for name >#{name}< in props row:"
41
+ pp rec
42
+ pp m
43
+
44
+ ## todo/fix: try filter by canonical name if more than one match
45
+ m = m.select { |club| club.name == name }
46
+ m = nil if m.empty? ## note: reset to nil if no more matches
47
+ end
48
+
49
+ if m.nil?
50
+ puts "** !!! ERROR !!! no club match for (canonical) name >#{name}< in props row:"
51
+ pp rec
52
+ exit 1
53
+ elsif m.size > 1
54
+ puts "** !!! ERROR !!! ambigious (multiple) club matches (#{m.size}) for (canonical) name >#{name}< in props row:"
55
+ pp rec
56
+ pp m
57
+ exit 1
58
+ else ## assume size == 1, bingo!!!
59
+ club_rec = m[0]
60
+ ## todo/fix: warn if name differes from (canonical) name
61
+ ## todo/fix: also add props to in-memory structs/records!!!
62
+ ## todo/fix: only updated "on-demand" from in-memory struct/records!!!!
63
+
64
+ ## update attributes
65
+ club_rec.key = rec[:key] if rec[:key]
66
+ club_rec.code = rec[:code] if rec[:code]
67
+ ## todo/fix: add (some) more props e.g. address, web, etc.
68
+ end
69
+ end
70
+ end # method parse
71
+
72
+ end # class ClubPropsReader
73
+
74
+ end ## module Import
75
+ end ## module SportDb
@@ -0,0 +1,114 @@
1
+
2
+ module SportDb
3
+ module Import
4
+
5
+ class NationalTeamIndex
6
+
7
+ attr_reader :teams ## all (national) team records
8
+
9
+ def initialize( recs )
10
+ @teams = []
11
+ @teams_by_code = {}
12
+ @teams_by_name = {}
13
+
14
+ add( recs )
15
+ end
16
+
17
+ include NameHelper
18
+ ## incl. strip_year( name )
19
+ ## has_year?( name)
20
+ ## strip_lang( name )
21
+ ## normalize( name )
22
+
23
+
24
+ def add( recs )
25
+ ###########################################
26
+ ## auto-fill national teams
27
+ ## pp recs
28
+ recs.each do |rec|
29
+ @teams << rec
30
+
31
+ ## add fifa code lookup
32
+ if @teams_by_code[ rec.code.downcase ]
33
+ puts "** !! ERROR !! national team code (code) >#{rec.code}< already exits!!"
34
+ exit 1
35
+ else
36
+ @teams_by_code[ rec.code.downcase ] = rec
37
+ end
38
+
39
+
40
+ ## add all names (canonical name + alt names
41
+ names = [rec.name] + rec.alt_names
42
+ more_names = []
43
+ ## check "hand-typed" names for year (auto-add)
44
+ ## check for year(s) e.g. (1887-1911), (-2013),
45
+ ## (1946-2001,2013-) etc.
46
+ names.each do |name|
47
+ if has_year?( name )
48
+ more_names << strip_year( name )
49
+ end
50
+ end
51
+
52
+ names += more_names
53
+ ## check for duplicates - simple check for now - fix/improve
54
+ ## todo/fix: (auto)remove duplicates - why? why not?
55
+ count = names.size
56
+ count_uniq = names.uniq.size
57
+ if count != count_uniq
58
+ puts "** !!! ERROR !!! - #{count-count_uniq} duplicate name(s) in national teams:"
59
+ pp names
60
+ pp rec
61
+ exit 1
62
+ end
63
+
64
+ names.each_with_index do |name,i|
65
+ ## check lang codes e.g. [en], [fr], etc.
66
+ ## todo/check/fix: move strip_lang up in the chain - check for duplicates (e.g. only lang code marker different etc.) - why? why not?
67
+ name = strip_lang( name )
68
+ norm = normalize( name )
69
+ old_rec = @teams_by_name[ norm ]
70
+ if old_rec
71
+ ## check if tame name already is included or is new team rec
72
+ msg = "** !!! ERROR !!! - national team name conflict/duplicate - >#{name}< will overwrite >#{old_rec.name}< with >#{rec.name}<"
73
+ puts msg
74
+ exit 1
75
+ else
76
+ @teams_by_name[ norm ] = rec
77
+ end
78
+ end
79
+ end ## each record
80
+ end # method initialize
81
+
82
+ ## fix/todo: add find_by (alias for find_by_name/find_by_code)
83
+ def find_by_code( code )
84
+ code = code.to_s.downcase ## allow symbols (and always downcase e.g. AUT to aut etc.)
85
+ @teams_by_code[ code ]
86
+ end
87
+
88
+ def find_by_name( name )
89
+ name = normalize( name.to_s ) ## allow symbols too (e.g. use to.s first)
90
+ @teams_by_name[ name ]
91
+ end
92
+
93
+ def find( q )
94
+ ## check longest match first (assume name is longer than code)
95
+ ## try lookup / find by (normalized) name first
96
+ team = find_by_name( q )
97
+ team = find_by_code( q ) if team.nil?
98
+ team
99
+ end
100
+
101
+ def find!( q )
102
+ team = find( q )
103
+ if team.nil?
104
+ puts "** !!! ERROR - no match for national team >#{q}< found"
105
+ exit 1
106
+ end
107
+ team
108
+ end
109
+ end # class NationalTeamIndex
110
+
111
+
112
+ end # module Import
113
+ end # module SportDb
114
+
@@ -0,0 +1,43 @@
1
+
2
+ module SportDb
3
+ module Import
4
+
5
+
6
+ class TeamIndex
7
+ ## note: "virtual" index lets you search clubs and/or national_teams (don't care)
8
+
9
+ def catalog() Import.catalog; end
10
+
11
+ ## todo/check: rename to/use map_by! for array version - why? why not?
12
+ def find_by!( name:, league:, mods: nil )
13
+ if name.is_a?( Array )
14
+ recs = []
15
+ name.each do |q|
16
+ recs << __find_by!( name: q, league: league, mods: mods )
17
+ end
18
+ recs
19
+ else ## assume single name
20
+ __find_by!( name: name, league: league, mods: mods )
21
+ end
22
+ end
23
+
24
+ def __find_by!( name:, league:, mods: nil )
25
+ if mods && mods[ league.key ] && mods[ league.key ][ name ]
26
+ mods[ league.key ][ name ]
27
+ else
28
+ if league.clubs?
29
+ if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
30
+ catalog.clubs.find!( name )
31
+ else ## assume clubs in domestic/national league tournament
32
+ catalog.clubs.find_by!( name: name, country: league.country )
33
+ end
34
+ else ## assume national teams (not clubs)
35
+ catalog.national_teams.find!( name )
36
+ end
37
+ end
38
+ end # method __find_by!
39
+
40
+ end # class TeamIndex
41
+
42
+ end # module Import
43
+ end # module SportDb
@@ -0,0 +1,108 @@
1
+ # encoding: utf-8
2
+
3
+
4
+ module SportDb
5
+ module Import
6
+
7
+
8
+ class WikiReader ## todo/check: rename to WikiClubReader - why? why not?
9
+
10
+ class WikiClub # nested class
11
+ attr_reader :name, :country
12
+ def initialize( name, country )
13
+ @name, @country = name, country
14
+ end
15
+ end # (nested) class WikiClub
16
+
17
+
18
+ def catalog() Import.catalog; end
19
+
20
+
21
+ def self.read( path ) ## use - rename to read_file or from_file etc. - why? why not?
22
+ txt = File.open( path, 'r:utf-8' ) { |f| f.read }
23
+ parse( txt )
24
+ end
25
+
26
+ def self.parse( txt )
27
+ new( txt ).parse
28
+ end
29
+
30
+ def initialize( txt )
31
+ @txt = txt
32
+ end
33
+
34
+ def parse
35
+ recs = []
36
+ last_country = nil ## note: supports only one level of headings for now (and that is a country)
37
+
38
+ @txt.each_line do |line|
39
+ line = line.strip
40
+
41
+ next if line.empty?
42
+ next if line.start_with?( '#' ) ## skip comments too
43
+
44
+ ## strip inline (until end-of-line) comments too
45
+ ## e.g Eupen => KAS Eupen, ## [de]
46
+ ## => Eupen => KAS Eupen,
47
+ line = line.sub( /#.*/, '' ).strip
48
+ pp line
49
+
50
+
51
+ next if line =~ /^={1,}$/ ## skip "decorative" only heading e.g. ========
52
+
53
+ ## note: like in wikimedia markup (and markdown) all optional trailing ==== too
54
+ ## todo/check: allow === Text =-=-=-=-=-= too - why? why not?
55
+ if line =~ /^(={1,}) ## leading ======
56
+ ([^=]+?) ## text (note: for now no "inline" = allowed)
57
+ =* ## (optional) trailing ====
58
+ $/x
59
+ heading_marker = $1
60
+ heading_level = $1.length ## count number of = for heading level
61
+ heading = $2.strip
62
+
63
+ puts "heading #{heading_level} >#{heading}<"
64
+
65
+ if heading_level > 1
66
+ puts "** !!! ERROR [wiki reader] !!! - - headings level too deep - only top / one level supported for now; sorry"
67
+ exit 1
68
+ end
69
+
70
+ ## assume country in heading; allow all "formats" supported by parse e.g.
71
+ ## Österreich • Austria (at)
72
+ ## Österreich • Austria
73
+ ## Austria
74
+ ## Deutschland (de) • Germany
75
+ country = catalog.countries.parse( heading )
76
+ ## check country code - MUST exist for now!!!!
77
+ if country.nil?
78
+ puts "!!! error [wiki reader] - unknown country >#{heading}< - sorry - add country to config to fix"
79
+ exit 1
80
+ end
81
+
82
+ last_country = country
83
+ pp last_country
84
+ else
85
+ ## strip and squish (white)spaces
86
+ # e.g. New York FC (2011-) => New York FC (2011-)
87
+ value = line.strip.gsub( /[ \t]+/, ' ' )
88
+
89
+ ## normalize (allow underscore (-) - replace with space)
90
+ ## e.g. Cercle_Brugge_K.S.V. => Cercle Brugge K.S.V.
91
+ value = value.gsub( '_', ' ' )
92
+
93
+ if last_country.nil?
94
+ puts "** !!! ERROR [wiki reader] !!! - country heading missing for club name; sorry - add country heading to fix"
95
+ exit 1
96
+ end
97
+
98
+ rec = WikiClub.new( value, last_country )
99
+ recs << rec
100
+ end
101
+ end # each_line
102
+ recs
103
+ end # method read
104
+
105
+ end # class WikiReader
106
+
107
+ end ## module Import
108
+ end ## module SportDb
@@ -6,7 +6,7 @@ module Formats
6
6
 
7
7
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
8
8
  MINOR = 0
9
- PATCH = 0
9
+ PATCH = 1
10
10
  VERSION = [MAJOR,MINOR,PATCH].join('.')
11
11
 
12
12
  def self.version
data/test/helper.rb CHANGED
@@ -10,6 +10,78 @@ require 'minitest/autorun'
10
10
  require 'sportdb/formats'
11
11
 
12
12
 
13
+
14
+ module SportDb
15
+ module Import
16
+
17
+ class TestCatalog
18
+ def build_country_index
19
+ recs = CountryReader.read( "#{Test.data_dir}/world/countries.txt" )
20
+ index = CountryIndex.new( recs )
21
+ index
22
+ end
23
+
24
+ def build_league_index
25
+ recs = SportDb::Import::LeagueReader.parse( <<TXT )
26
+ = England =
27
+ 1 English Premier League
28
+ | ENG PL | England Premier League | Premier League
29
+ 2 English Championship
30
+ | ENG CS | England Championship | Championship
31
+ 3 English League One
32
+ | England League One | League One
33
+ 4 English League Two
34
+ 5 English National League
35
+
36
+ cup EFL Cup
37
+ | League Cup | Football League Cup
38
+ | ENG LC | England Liga Cup
39
+
40
+ = Scotland =
41
+ 1 Scottish Premiership
42
+ 2 Scottish Championship
43
+ 3 Scottish League One
44
+ 4 Scottish League Two
45
+ TXT
46
+
47
+ leagues = SportDb::Import::LeagueIndex.new
48
+ leagues.add( recs )
49
+ leagues
50
+ end
51
+
52
+ def build_club_index
53
+ recs = ClubReader.parse( <<TXT )
54
+ = England
55
+
56
+ Chelsea FC
57
+ Arsenal FC
58
+ Tottenham Hotspur
59
+ West Ham United
60
+ Crystal Palace
61
+ Manchester United
62
+ Manchester City
63
+ TXT
64
+
65
+ index = ClubIndex.new
66
+ index.add( recs )
67
+ index
68
+ end
69
+
70
+
71
+ def countries() @countries ||= build_country_index; end
72
+ def leagues() @leagues ||= build_league_index; end
73
+ def clubs() @clubs ||= build_club_index; end
74
+ end
75
+
76
+ configure do |config|
77
+ config.catalog = TestCatalog.new
78
+ end
79
+
80
+ end # module Import
81
+ end # module SportDb
82
+
83
+
84
+
13
85
  ################
14
86
  ## helper
15
87
 
@@ -0,0 +1,183 @@
1
+ # encoding: utf-8
2
+
3
+ ###
4
+ # to run use
5
+ # ruby -I ./lib -I ./test test/test_club_index.rb
6
+
7
+
8
+ require 'helper'
9
+
10
+ class TestClubIndex < MiniTest::Test
11
+
12
+ def test_match
13
+
14
+ recs = SportDb::Import::ClubReader.parse( <<TXT )
15
+ =================================
16
+ = Austria
17
+
18
+ == Wien ==
19
+
20
+ FK Austria Wien, Wien (Favoriten)
21
+ | Austria Vienna | Austria Wien
22
+ SK Rapid Wien, Wien (Hütteldorf)
23
+ | Rapid Vienna | Rapid Wien
24
+
25
+
26
+ ====================================
27
+ = England
28
+
29
+ Arsenal FC, 1886, @ Emirates Stadium, London ## Greater London
30
+ | Arsenal | FC Arsenal
31
+ Chelsea FC, 1905, @ Stamford Bridge, London ## Greater London
32
+ | Chelsea | FC Chelsea
33
+
34
+
35
+ =====================================
36
+ = Russia
37
+
38
+ Arsenal Tula, Tula
39
+ | Arsenal | FC Arsenal Tula
40
+
41
+
42
+ ===========================
43
+ = Argentina
44
+
45
+ == Buenos Aires ==
46
+
47
+ Arsenal de Sarandí, Sarandí › Buenos Aires # Sarandí es una ciudad de la Zona Sur del Gran Buenos Aires
48
+ | Arsenal | Arsenal Sarandi
49
+ | Arsenal FC | Arsenal Fútbol Club
50
+ TXT
51
+
52
+ clubs = SportDb::Import::ClubIndex.new
53
+ clubs.add( recs )
54
+
55
+ pp clubs.errors
56
+
57
+ clubs.dump_duplicates
58
+
59
+ m = clubs.match( 'Rapid Wien' )
60
+ assert_equal 'SK Rapid Wien', m[0].name
61
+ assert_equal 'Austria', m[0].country.name
62
+ assert_equal 'Wien', m[0].city
63
+
64
+ m = clubs.match( 'rapid wien' )
65
+ assert_equal 'SK Rapid Wien', m[0].name
66
+ assert_equal 'Austria', m[0].country.name
67
+ assert_equal 'Wien', m[0].city
68
+
69
+ ## note: all dots (.) get always removed
70
+ m = clubs.match( '...r.a.p.i.d w.i.e.n...' )
71
+ assert_equal 'SK Rapid Wien', m[0].name
72
+ assert_equal 'Austria', m[0].country.name
73
+ assert_equal 'Wien', m[0].city
74
+
75
+ ## note: all spaces and dashes (-) get always removed
76
+ m = clubs.match( '--- r a p i d w i e n ---' )
77
+ assert_equal 'SK Rapid Wien', m[0].name
78
+ assert_equal 'Austria', m[0].country.name
79
+ assert_equal 'Wien', m[0].city
80
+
81
+ m = clubs.match( 'RAPID WIEN' )
82
+ assert_equal 'SK Rapid Wien', m[0].name
83
+ assert_equal 'Austria', m[0].country.name
84
+ assert_equal 'Wien', m[0].city
85
+
86
+
87
+ c = clubs[ 'SK Rapid Wien' ] ## check canoncial name match (only)
88
+ assert_equal 'SK Rapid Wien', c.name
89
+ assert_equal 'Austria', c.country.name
90
+ assert_equal 'Wien', c.city
91
+
92
+
93
+ m = clubs.match( 'Arsenal' )
94
+ assert_equal 3, m.size
95
+
96
+ m = clubs.match( 'ARSENAL' )
97
+ assert_equal 3, m.size
98
+
99
+ m = clubs.match_by( name: 'Arsenal', country: 'eng' )
100
+ assert_equal 1, m.size
101
+ assert_equal 'Arsenal FC', m[0].name
102
+ assert_equal 'England', m[0].country.name
103
+ assert_equal 'London', m[0].city
104
+
105
+ club = clubs.find_by!( name: 'Arsenal', country: 'eng' )
106
+ assert_equal 'Arsenal FC', club.name
107
+ assert_equal 'England', club.country.name
108
+ assert_equal 'London', club.city
109
+
110
+
111
+ m = clubs.match_by( name: 'Arsenal', country: 'ar' )
112
+ assert_equal 1, m.size
113
+ assert_equal 'Arsenal de Sarandí', m[0].name
114
+ assert_equal 'Argentina', m[0].country.name
115
+ assert_equal 'Sarandí', m[0].city
116
+
117
+ club = clubs.find_by!( name: 'Arsenal', country: 'ar' )
118
+ assert_equal 'Arsenal de Sarandí', club.name
119
+ assert_equal 'Argentina', club.country.name
120
+ assert_equal 'Sarandí', club.city
121
+
122
+
123
+ m = clubs.match_by( name: 'Arsenal', country: 'ru' )
124
+ assert_equal 1, m.size
125
+ assert_equal 'Arsenal Tula', m[0].name
126
+ assert_equal 'Russia', m[0].country.name
127
+ assert_equal 'Tula', m[0].city
128
+
129
+
130
+ m = clubs.match( 'Arsenal FC' )
131
+ assert_equal 2, m.size
132
+
133
+ m = clubs.match( 'Arsenal F.C.' )
134
+ assert_equal 2, m.size
135
+
136
+ m = clubs.match( '...A.r.s.e.n.a.l... F.C...' )
137
+ assert_equal 2, m.size
138
+ end
139
+
140
+
141
+ def test_wikipedia # test wikipedia names and links/urls
142
+
143
+ recs = SportDb::Import::ClubReader.parse( <<TXT )
144
+ ==================================
145
+ = Belgium
146
+
147
+ == Brussels ==
148
+
149
+ RSC Anderlecht, 1908, Brussels ## use (just) Anderlecht or Brussel-Anderlecht ??
150
+ | Anderlecht | R.S.C. Anderlecht | Royal Sporting Club Anderlecht
151
+
152
+ == West-Vlaanderen › Vlaanderen ==
153
+
154
+ Club Brugge, 1891, Brugge › West-Vlaanderen › Vlaanderen
155
+ | Club Brugge KV | Club Brugge Koninklijke Voetbalvereniging
156
+ TXT
157
+
158
+
159
+ clubs = SportDb::Import::ClubIndex.new
160
+ clubs.add( recs )
161
+
162
+ recs = SportDb::Import::WikiReader.parse( <<TXT )
163
+ ==================================
164
+ = Belgium
165
+
166
+ R.S.C. Anderlecht
167
+ Club Brugge KV
168
+ TXT
169
+ clubs.add_wiki( recs )
170
+
171
+
172
+ m = clubs.match( 'Club Brugge KV' )
173
+ assert_equal 1, m.size
174
+ assert_equal 'Club Brugge KV', m[0].wikipedia
175
+ assert_equal 'https://en.wikipedia.org/wiki/Club_Brugge_KV', m[0].wikipedia_url
176
+
177
+ m = clubs.match( 'RSC Anderlecht' )
178
+ assert_equal 1, m.size
179
+ assert_equal 'R.S.C. Anderlecht', m[0].wikipedia
180
+ assert_equal 'https://en.wikipedia.org/wiki/R.S.C._Anderlecht', m[0].wikipedia_url
181
+ end
182
+
183
+ end # class TestClubIndex