sportdb-search 0.0.1 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a675e2ad48a651c6b31ccc0f68f838907384a93078269dff90c87e5424472a56
4
- data.tar.gz: 8abf21926e4e3924ef71df5da72330c834fe0a54679efffdb6657e90361e4bac
3
+ metadata.gz: a25ed40d388f7a3ce09acc8a67a5a16aec359696141c3260bda097b5d221c130
4
+ data.tar.gz: ec681b64b842e733d874ab9c264e65797356129a0beccf11d9d40d55d1ea34fa
5
5
  SHA512:
6
- metadata.gz: db536b42aabda47e6a6eb7b2da08a7454f91c017801948a86ee7207762a5ffc9ebc5eb7a32e65378aeea4ad8a93af7c0db390458e667dd6cc350c2d3e9dfa24e
7
- data.tar.gz: 5b69bf93a1dd374d2dcf50907c26aeaf7781af34ba127d91830f09d8e9406dfcf1345b806222258ba6da7d7bf2df8590790d8f3763298c9a1909ca536a02097a
6
+ metadata.gz: 78d1cbcea722b03bbb6d6986fda9837b3d15e1aaf9dd8b5085d382435e56a67bf62c228f0b33ff4ff01845e5a78e4ea86163ba1a150e8e9166ac66e0d5a81867
7
+ data.tar.gz: 7e17104eb185b9c4f82e12eff407e9df4c352c6884194ae04036dd4bae690d0671c10ff723387bb12da54212a6033a3490a1763dfdbb55fd91cb916025d7199d
data/CHANGELOG.md CHANGED
@@ -1,3 +1,5 @@
1
+ ### 0.1.1
2
+
1
3
  ### 0.0.1 / 2024-08-25
2
4
 
3
5
  * Everything is new. First release.
data/Manifest.txt CHANGED
@@ -5,5 +5,5 @@ Rakefile
5
5
  lib/sportdb/search.rb
6
6
  lib/sportdb/search/sport.rb
7
7
  lib/sportdb/search/structs.rb
8
+ lib/sportdb/search/structs_world.rb
8
9
  lib/sportdb/search/version.rb
9
- lib/sportdb/search/world.rb
data/README.md CHANGED
@@ -7,10 +7,71 @@
7
7
 
8
8
 
9
9
 
10
+
10
11
  ## Usage
11
12
 
13
+ Let's use the [/clubs datasets](https://github.com/openfootball/clubs)
14
+ (3000+ football clubs from around the world)
15
+ to match name "variants" e.g. `Arsenal` to canonical global unique
16
+ names e.g. `Arsenal FC, London, England`:
17
+
18
+ ``` ruby
19
+ require 'sportdb/search'
20
+
21
+
22
+ Club = Sports::Club
23
+
24
+ m = Club.match_by( name: 'Arsenal' )
25
+ m.size # 3 club matches found
26
+ #=> 3
27
+ m[0].name; m[0].city; m[0].country
28
+ #=> "Arsenal FC", "London", "England"
29
+ m[1].name; m[1].city; m[1].country
30
+ #=> "Arsenal Tula", "Tula", "Russia"
31
+ m[2].name; m[2].city; m[2].country
32
+ #=> "Arsenal de Sarandí", "Sarandí", "Argentina"
33
+
34
+
35
+ m = Club.match_by( name: 'Arsenal', country: 'eng' )
36
+ # -or- try alternative names (and auto-generated spelling variants)
37
+ m = Club.match_by( name: 'Arsenal FC', country: 'eng' )
38
+ m = Club.match_by( name: 'Arsenal F.C.', country: 'eng' )
39
+ m = Club.match_by( name: '...A.r.s.e.n.a.l... F.C...', country: 'eng' )
40
+ m.size # 1 club match found
41
+ #=> 1
42
+ m[0].name; m[0].city; m[0].country
43
+ #=> "Arsenal FC", "London", "England"
44
+
45
+ m = Club.match_by( name: 'Arsenal', country: 'ar' )
46
+ # -or- try alternative names (and auto-generated spelling variants)
47
+ m = Club.match_by( name: 'Arsenal Sarandí', country: 'ar' )
48
+ m = Club.match_by( name: 'Arsenal Sarandi', country: 'ar' )
49
+ m.size # 1 club match found
50
+ #=> 1
51
+ m[0].name; m[0].city; m[0].country
52
+ #=> "Arsenal de Sarandí", "Sarandí", "Argentina"
53
+
54
+
55
+ # try some more
56
+ m = Club.match_by( name: 'AZ' )
57
+ m[0].name; m[0].city; m[0].country
58
+ #=> "AZ Alkmaar", "Alkmaar", "Netherlands"
59
+
60
+ m = Club.match_by( name: 'Bayern' )
61
+ # -or- try alternative names (and auto-generated spelling variants)
62
+ m = Club.match_by( name: 'Bayern München' )
63
+ m = Club.match_by( name: 'Bayern Munchen' )
64
+ m = Club.match_by( name: 'Bayern Muenchen' )
65
+ m[0].name; m[0].city; m[0].country
66
+ #=> "Bayern München", "München", "Germany"
67
+
68
+ # and so on
69
+ # ...
70
+ ```
71
+
72
+
73
+ That's it.
12
74
 
13
- to be done
14
75
 
15
76
 
16
77
  ## License
@@ -302,7 +302,68 @@ class TeamSearch
302
302
  end
303
303
 
304
304
 
305
+
306
+ CLUB_NAME_RE = %r{^
307
+ (?<name>[^()]+?) ## non-greedy
308
+ (?:
309
+ \s+
310
+ \(
311
+ (?<code>[A-Z][A-Za-z]{2,3}) ## optional (country) code; support single code e.g. (A) - why? why not?
312
+ \)
313
+ )?
314
+ $}x ## note - allow (URU) and (Uru) - why? why not
315
+
316
+
317
+ ###
318
+ # note: missing teams will get
319
+ ## auto-created if possible
320
+ ## only ambigious results (too many matches) raise expection!!!
305
321
  def _find_by!( name:, league:, mods: nil )
322
+ if mods && mods[ league.key ] && mods[ league.key ][ name ]
323
+ mods[ league.key ][ name ]
324
+ else
325
+ if league.clubs?
326
+ if league.intl? ## todo/fix: add intl? to ActiveRecord league!!!
327
+ ###
328
+ ## get country code from name
329
+ ## e.g. Liverpool FC (ENG) or
330
+ ## Liverpool FC (URU) etc.
331
+
332
+ ## check for country code
333
+ if m=CLUB_NAME_RE.match( name )
334
+ if m[:code]
335
+ @clubs.find_by!( name: m[:name],
336
+ country: m[:code] )
337
+ else
338
+ @clubs.find!( name )
339
+ end
340
+ else
341
+ puts "!! PARSE ERROR - invalid club name; cannot match with CLUB_NAME_RE >#{team}<"
342
+ exit 1
343
+ end
344
+ else ## assume clubs in domestic/national league tournament
345
+ ## note - search by league countries (may incl. more than one country
346
+ ## e.g. us incl. ca, fr incl. mc, ch incl. li, etc.
347
+ rec = @clubs.find_by( name: name, league: league )
348
+ if rec.nil?
349
+ puts "auto-create (missing) club #{name}"
350
+ ## todo/fix: add auto flag!!!!
351
+ ### like in rounds!!!
352
+ ## to track auto-created clubs
353
+ rec = SportDb::Import::Club.new( name: name )
354
+ rec.country = league.country ## fix: country kwarg not yet supported!!
355
+ pp rec
356
+ end
357
+ rec
358
+ end
359
+ else ## assume national teams (not clubs)
360
+ @national_teams.find!( name )
361
+ end
362
+ end
363
+ end # method _find_by!
364
+
365
+
366
+ def _find_by_v0!( name:, league:, mods: nil )
306
367
  if mods && mods[ league.key ] && mods[ league.key ][ name ]
307
368
  mods[ league.key ][ name ]
308
369
  else
@@ -10,47 +10,6 @@
10
10
 
11
11
 
12
12
  module Sports
13
-
14
- class Country
15
- def self._search #### use service/api or such - why? why not?
16
- SportDb::Import.world.countries
17
- end
18
- def self.find_by( code: nil, name: nil )
19
- _search.find_by( code: code, name: name )
20
- end
21
-
22
- def self.find( q ) ## find by code (first) or name (second)
23
- _search.find( q )
24
- end
25
-
26
- def self.parse_heading( line )
27
- ## fix - move parse code here from search - why? why not?
28
- _search.parse( line )
29
- end
30
-
31
- ## add alternate names/aliases
32
- class << self
33
- alias_method :[], :find ### keep shortcut - why? why not?
34
- alias_method :heading, :parse_heading
35
- end
36
-
37
-
38
- # open question - what name to use build or parse_line or ?
39
- # or parse_recs for CountryReader?
40
- # remove CountryReader helper methods - why? why not?
41
- # use parse_heading/heading for now !!!
42
- #
43
- # def self.parse( line ) or build( line ) ??
44
- # SportDb::Import.world.countries.parse( line )
45
- # end
46
- #
47
- # !!!! note - conflict with
48
- # def self.read( path ) CountryReader.read( path ); end
49
- # def self.parse( txt ) CountryReader.parse( txt ); end
50
- #
51
- end # class Country
52
-
53
-
54
13
  ###
55
14
  ## todo/fix - add find_by( code: ), find_by( name: )
56
15
  ## split - why? why not?
@@ -113,11 +72,38 @@ class Club
113
72
  end
114
73
 
115
74
  def self.build_mods( mods )
116
- _search_build_mods( mods )
75
+ _search.build_mods( mods )
117
76
  end
118
77
  end # class Club
119
78
 
120
79
 
80
+ class Team
81
+ def self._search
82
+ SportDb::Import.catalog.teams
83
+ end
84
+
85
+ ## todo/check: rename to/use map_by! for array version - why? why not?
86
+ def self.find_by!( name:, league:, mods: nil )
87
+ _search.find_by!( name: name,
88
+ league: league,
89
+ mods: mods )
90
+ end
91
+ end # class Team
92
+
93
+
94
+ class EventInfo
95
+ def self._search
96
+ SportDb::Import.catalog.events
97
+ end
98
+
99
+ def self.find_by( league:, season: )
100
+ _search.find_by( league: league,
101
+ season: season )
102
+ end
103
+ end # class EventInfo
104
+
105
+
106
+
121
107
  class Ground
122
108
  def self._search
123
109
  SportDb::Import.catalog.grounds
@@ -0,0 +1,127 @@
1
+ module Sports
2
+
3
+ class City
4
+ def self._search #### use service/api or such - why? why not?
5
+ SportDb::Import.world.cities
6
+ end
7
+
8
+ def self.match_by( name: )
9
+ _search.match_by( name: name )
10
+ end
11
+ end # class City
12
+
13
+
14
+ class Country
15
+ def self._search #### use service/api or such - why? why not?
16
+ SportDb::Import.world.countries
17
+ end
18
+
19
+ def self.find_by( code: nil, name: nil )
20
+ ## todo/fix upstream - change to find_by( code:, name:, ) too - why? why not?
21
+ if code && name.nil?
22
+ _search.find_by_code( code )
23
+ elsif name && code.nil?
24
+ _search.find_by_name( name )
25
+ else
26
+ raise ArgumentError, "CountrySearch#find_by - one (and only one arg) required - code: or name:"
27
+ end
28
+ end
29
+
30
+ def self.find( q ) ## find by code (first) or name (second)
31
+ _search.find_by_name_or_code( q )
32
+ end
33
+
34
+ ###
35
+ ## split/parse country line
36
+ ##
37
+ ## split on bullet e.g.
38
+ ## split into name and code with regex - make code optional
39
+ ##
40
+ ## Examples:
41
+ ## Österreich • Austria (at)
42
+ ## Österreich • Austria
43
+ ## Austria
44
+ ## Deutschland (de) • Germany
45
+ ##
46
+ ## todo/check: support more formats - why? why not?
47
+ ## e.g. Austria, AUT (e.g. with comma - why? why not?)
48
+ def self.parse_heading( line )
49
+ values = line.split( '•' ) ## use/support multi-lingual separator
50
+ country = nil
51
+ values.each do |value|
52
+ value = value.strip
53
+ ## check for trailing country code e.g. (at), (eng), etc
54
+ ## allow code 1 to 5 for now - northern cyprus(fifa) with 5 letters?.
55
+ ## add/allow gb-eng, gb-wal (official iso2!!), in the future too - why? why not?
56
+ if value =~ /[ ]+\((?<code>[A-Za-z]{1,5})\)$/ ## e.g. Austria (at)
57
+ code = $~[:code]
58
+ name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets
59
+ candidates = [ find_by( code: code ), find_by( name: name ) ]
60
+ if candidates[0].nil?
61
+ puts "** !!! ERROR Country.parse_heading - unknown code >#{code}< in line: #{line}"
62
+ pp line
63
+ exit 1
64
+ end
65
+ if candidates[1].nil?
66
+ puts "** !!! ERROR Country.parse_heading - unknown name >#{code}< in line: #{line}"
67
+ pp line
68
+ exit 1
69
+ end
70
+ if candidates[0] != candidates[1]
71
+ puts "** !!! ERROR Country.parse_heading - name and code do NOT match the same country:"
72
+ pp line
73
+ pp candidates
74
+ exit 1
75
+ end
76
+ if country && country != candidates[0]
77
+ puts "** !!! ERROR Country.parse_heading - names do NOT match the same country:"
78
+ pp line
79
+ pp country
80
+ pp candidates
81
+ exit 1
82
+ end
83
+ country = candidates[0]
84
+ else
85
+ ## just assume value is name or code
86
+ candidate = find( value )
87
+ if candidate.nil?
88
+ puts "** !!! ERROR Country.parse_heading - unknown name or code >#{value}< in line: #{line}"
89
+ pp line
90
+ exit 1
91
+ end
92
+ if country && country != candidate
93
+ puts "** !!! ERROR Country.parse_heading - names do NOT match the same country:"
94
+ pp line
95
+ pp country
96
+ pp candidate
97
+ exit 1
98
+ end
99
+ country = candidate
100
+ end
101
+ end
102
+ country
103
+ end # method parse_heading
104
+
105
+
106
+ ## add alternate names/aliases
107
+ class << self
108
+ alias_method :[], :find ### keep shortcut - why? why not?
109
+ alias_method :heading, :parse_heading
110
+ end
111
+
112
+
113
+ # open question - what name to use build or parse_line or ?
114
+ # or parse_recs for CountryReader?
115
+ # remove CountryReader helper methods - why? why not?
116
+ # use parse_heading/heading for now !!!
117
+ #
118
+ # def self.parse( line ) or build( line ) ??
119
+ # SportDb::Import.world.countries.parse( line )
120
+ # end
121
+ #
122
+ # !!!! note - conflict with
123
+ # def self.read( path ) CountryReader.read( path ); end
124
+ # def self.parse( txt ) CountryReader.parse( txt ); end
125
+ #
126
+ end # class Country
127
+ end # module Sports
@@ -1,9 +1,8 @@
1
1
  module SportDb
2
2
  module Module
3
3
  module Search
4
-
5
4
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
6
- MINOR = 0
5
+ MINOR = 1
7
6
  PATCH = 1
8
7
  VERSION = [MAJOR,MINOR,PATCH].join('.')
9
8
 
@@ -7,22 +7,17 @@ require 'sportdb/catalogs'
7
7
  ### shared basics for search
8
8
  class SportSearch
9
9
  class Search ## base search service - use/keep - why? why not?
10
+ attr_reader :service
10
11
  def initialize( service ) @service = service; end
11
12
  end # class Search
12
13
  end
13
14
 
14
- class WorldSearch
15
- class Search ## base search service - use/keep - why? why not?
16
- def initialize( service ) @service = service; end
17
- end # class Search
18
- end
19
15
 
20
16
 
21
17
 
22
18
  ## our own code
23
19
  require_relative 'search/version'
24
20
  require_relative 'search/sport'
25
- require_relative 'search/world'
26
21
 
27
22
 
28
23
  ########
@@ -76,8 +71,9 @@ class WorldSearch
76
71
  def initialize( countries:, cities: )
77
72
  ## change service to country_service or such - why? why not?
78
73
  ## add city_service and such later
79
- @countries = CountrySearch.new( countries )
80
- @cities = CitySearch.new( cities )
74
+
75
+ @countries = countries
76
+ @cities = cities
81
77
  end
82
78
 
83
79
  ####
@@ -168,35 +164,12 @@ end # module Sports
168
164
  ###
169
165
  ## add/augment core classes with search services
170
166
  require_relative 'search/structs'
167
+ require_relative 'search/structs_world'
171
168
 
172
169
 
173
170
 
174
171
  module SportDb
175
172
  module Import
176
- Season = ::Season ## add a convenience alias for top-level Season class
177
-
178
- ## add "old" convenience aliases for structs - why? why not?
179
- ## todo/check: just use include Sports !!!!
180
- Country = ::Sports::Country
181
- League = ::Sports::League
182
- Group = ::Sports::Group
183
- Round = ::Sports::Round
184
- Match = ::Sports::Match
185
- Matchlist = ::Sports::Matchlist
186
- Goal = ::Sports::Goal
187
- Team = ::Sports::Team
188
- NationalTeam = ::Sports::NationalTeam
189
- Club = ::Sports::Club
190
- Standings = ::Sports::Standings
191
- TeamUsage = ::Sports::TeamUsage
192
-
193
- Ground = ::Sports::Ground
194
-
195
- Player = ::Sports::Player
196
-
197
- EventInfo = ::Sports::EventInfo
198
-
199
-
200
173
  class Team
201
174
  ## add convenience lookup helper / method for name by season for now
202
175
  ## use clubs history - for now kept separate from struct - why? why not?
@@ -205,7 +178,6 @@ module SportDb
205
178
  SportDb::Import.catalog.clubs_history.find_name_by( name: name, season: season ) || name
206
179
  end
207
180
  end # class Team
208
-
209
181
  end # module Import
210
182
  end # module SportDb
211
183
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sportdb-search
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-08-25 00:00:00.000000000 Z
11
+ date: 2024-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sportdb-catalogs
@@ -74,8 +74,8 @@ files:
74
74
  - lib/sportdb/search.rb
75
75
  - lib/sportdb/search/sport.rb
76
76
  - lib/sportdb/search/structs.rb
77
+ - lib/sportdb/search/structs_world.rb
77
78
  - lib/sportdb/search/version.rb
78
- - lib/sportdb/search/world.rb
79
79
  homepage: https://github.com/sportdb/sport.db
80
80
  licenses:
81
81
  - Public Domain
@@ -1,127 +0,0 @@
1
- ###
2
- # world search service api for countries and more
3
- #
4
- # core api is:
5
- # - world.countries.find_by_code
6
- # - .find_by_name
7
-
8
-
9
- class WorldSearch
10
-
11
- class CitySearch < Search
12
- ###################
13
- ## core required delegates - use delegate generator - why? why not?
14
- def match_by( name: )
15
- @service.match_by( name: name )
16
- end
17
- end # class CitySearch
18
-
19
-
20
- class CountrySearch < Search
21
- ###################
22
- ## core required delegates - use delegate generator - why? why not?
23
- def find_by_code( code )
24
- puts "!! DEPRECATED - use CountrySearch#find_by( code: )"
25
- @service.find_by_code( code )
26
- end
27
- def find_by_name( name )
28
- puts "!! DEPRECATED - use CountrySearch#find_by( name: )"
29
- @service.find_by_name( name )
30
- end
31
-
32
- def find_by( code: nil, name: nil )
33
- ## todo/fix upstream - change to find_by( code:, name:, ) too
34
- if code && name.nil?
35
- @service.find_by_code( code )
36
- elsif name && code.nil?
37
- @service.find_by_name( name )
38
- else
39
- raise ArgumentError, "CountrySearch#find_by - one (and only one arg) required - code: or name:"
40
- end
41
- end
42
-
43
- def find( q )
44
- @service.find_by_name_or_code( q )
45
- end
46
- alias_method :[], :find ### keep shortcut - why? why not?
47
-
48
-
49
- ###############
50
- ### more deriv support functions / helpers
51
-
52
- ###
53
- ## split/parse country line
54
- ##
55
- ## split on bullet e.g.
56
- ## split into name and code with regex - make code optional
57
- ##
58
- ## Examples:
59
- ## Österreich • Austria (at)
60
- ## Österreich • Austria
61
- ## Austria
62
- ## Deutschland (de) • Germany
63
- ##
64
- ## todo/check: support more formats - why? why not?
65
- ## e.g. Austria, AUT (e.g. with comma - why? why not?)
66
- def parse( line )
67
- values = line.split( '•' ) ## use/support multi-lingual separator
68
- country = nil
69
- values.each do |value|
70
- value = value.strip
71
- ## check for trailing country code e.g. (at), (eng), etc
72
- ## allow code 1 to 5 for now - northern cyprus(fifa) with 5 letters?.
73
- ## add/allow gb-eng, gb-wal (official iso2!!), in the future too - why? why not?
74
- if value =~ /[ ]+\((?<code>[A-Za-z]{1,5})\)$/ ## e.g. Austria (at)
75
- code = $~[:code]
76
- name = value[0...(value.size-code.size-2)].strip ## note: add -2 for brackets
77
- candidates = [ find_by( code: code ), find_by( name: name ) ]
78
- if candidates[0].nil?
79
- puts "** !!! ERROR Country.parse_heading - unknown code >#{code}< in line: #{line}"
80
- pp line
81
- exit 1
82
- end
83
- if candidates[1].nil?
84
- puts "** !!! ERROR Country.parse_heading - unknown name >#{code}< in line: #{line}"
85
- pp line
86
- exit 1
87
- end
88
- if candidates[0] != candidates[1]
89
- puts "** !!! ERROR Country.parse_heading - name and code do NOT match the same country:"
90
- pp line
91
- pp candidates
92
- exit 1
93
- end
94
- if country && country != candidates[0]
95
- puts "** !!! ERROR Country.parse_heading - names do NOT match the same country:"
96
- pp line
97
- pp country
98
- pp candidates
99
- exit 1
100
- end
101
- country = candidates[0]
102
- else
103
- ## just assume value is name or code
104
- candidate = find( value )
105
- if candidate.nil?
106
- puts "** !!! ERROR Country.parse_heading - unknown name or code >#{value}< in line: #{line}"
107
- pp line
108
- exit 1
109
- end
110
- if country && country != candidate
111
- puts "** !!! ERROR Country.parse_heading - names do NOT match the same country:"
112
- pp line
113
- pp country
114
- pp candidate
115
- exit 1
116
- end
117
- country = candidate
118
- end
119
- end
120
- country
121
- end # method parse
122
- end # class CountrySearch
123
- end # class WorldSearch
124
-
125
-
126
-
127
-