sportdb-structs 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +2 -0
- data/Manifest.txt +1 -6
- data/README.md +3 -3
- data/Rakefile +1 -1
- data/lib/sportdb/structs/config.rb +39 -39
- data/lib/sportdb/structs/goal_parser_csv.rb +28 -28
- data/lib/sportdb/structs/match_parser_csv.rb +1 -1
- data/lib/sportdb/structs/match_status_parser.rb +90 -90
- data/lib/sportdb/structs/name_helper.rb +87 -87
- data/lib/sportdb/structs/structs/country.rb +28 -1
- data/lib/sportdb/structs/structs/goal.rb +231 -231
- data/lib/sportdb/structs/structs/ground.rb +78 -0
- data/lib/sportdb/structs/structs/group.rb +11 -3
- data/lib/sportdb/structs/structs/league.rb +2 -5
- data/lib/sportdb/structs/structs/match.rb +30 -29
- data/lib/sportdb/structs/structs/round.rb +14 -1
- data/lib/sportdb/structs/structs/standings.rb +271 -271
- data/lib/sportdb/structs/structs/team.rb +49 -48
- data/lib/sportdb/structs/structs/team_usage.rb +84 -84
- data/lib/sportdb/structs/version.rb +4 -4
- data/lib/sportdb/structs.rb +39 -0
- metadata +10 -16
- data/test/helper.rb +0 -13
- data/test/test_clubs.rb +0 -38
- data/test/test_csv_reader.rb +0 -30
- data/test/test_match.rb +0 -30
- data/test/test_match_status_parser.rb +0 -57
- data/test/test_name_helper.rb +0 -65
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
|
-
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '009da80615ac77d643529d2feee7692ca31ce2c07c5f1dd45cf2667558af4b90'
|
|
4
|
+
data.tar.gz: 7368f410cfc3b4fa0af288c20399c1244841518cf5598ff443dcf17aaa881501
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 56acdb1dc22c5b70f51c398e1ae912ff6bcbf50f021391efa5eba75d3ddeb4c1d069255fd715a891f6c4451f25f4c0212eb8769bd129755ede88ca322cd4e4e6
|
|
7
|
+
data.tar.gz: 35dcad60e6c784859d5c7218e4771ec5979cb4a07b1e07ba6f598e64fc968c1cfd75ee5e48f2c22996fa570a37292051b8f4f4e195ea2b9527ecfceef4753b6c
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
|
@@ -10,6 +10,7 @@ lib/sportdb/structs/match_status_parser.rb
|
|
|
10
10
|
lib/sportdb/structs/name_helper.rb
|
|
11
11
|
lib/sportdb/structs/structs/country.rb
|
|
12
12
|
lib/sportdb/structs/structs/goal.rb
|
|
13
|
+
lib/sportdb/structs/structs/ground.rb
|
|
13
14
|
lib/sportdb/structs/structs/group.rb
|
|
14
15
|
lib/sportdb/structs/structs/league.rb
|
|
15
16
|
lib/sportdb/structs/structs/match.rb
|
|
@@ -19,9 +20,3 @@ lib/sportdb/structs/structs/standings.rb
|
|
|
19
20
|
lib/sportdb/structs/structs/team.rb
|
|
20
21
|
lib/sportdb/structs/structs/team_usage.rb
|
|
21
22
|
lib/sportdb/structs/version.rb
|
|
22
|
-
test/helper.rb
|
|
23
|
-
test/test_clubs.rb
|
|
24
|
-
test/test_csv_reader.rb
|
|
25
|
-
test/test_match.rb
|
|
26
|
-
test/test_match_status_parser.rb
|
|
27
|
-
test/test_name_helper.rb
|
data/README.md
CHANGED
|
@@ -22,8 +22,8 @@ The `sportdb-structs` scripts are dedicated to the public domain.
|
|
|
22
22
|
Use it as you please with no restrictions whatsoever.
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
|
|
25
26
|
## Questions? Comments?
|
|
26
27
|
|
|
27
|
-
|
|
28
|
-
[
|
|
29
|
-
Thanks!
|
|
28
|
+
Yes, you can. More than welcome.
|
|
29
|
+
See [Help & Support »](https://github.com/openfootball/help)
|
data/Rakefile
CHANGED
|
@@ -11,7 +11,7 @@ Hoe.spec 'sportdb-structs' do
|
|
|
11
11
|
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
|
12
12
|
|
|
13
13
|
self.author = 'Gerald Bauer'
|
|
14
|
-
self.email = '
|
|
14
|
+
self.email = 'gerald.bauer@gmail.com'
|
|
15
15
|
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
|
17
17
|
self.readme_file = 'README.md'
|
|
@@ -1,39 +1,39 @@
|
|
|
1
|
-
module SportDb
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
class Configuration
|
|
5
|
-
##
|
|
6
|
-
## todo: allow configure of countries_dir like clubs_dir
|
|
7
|
-
## "fallback" and use a default built-in world/countries.txt
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
##
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
##
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
def self.
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
end # module Import
|
|
38
|
-
end # module SportDb
|
|
39
|
-
|
|
1
|
+
module SportDb
|
|
2
|
+
module Import
|
|
3
|
+
|
|
4
|
+
class Configuration
|
|
5
|
+
##
|
|
6
|
+
## todo: allow configure of countries_dir like clubs_dir
|
|
7
|
+
## "fallback" and use a default built-in world/countries.txt
|
|
8
|
+
|
|
9
|
+
## note: catalog defined/added in sports-catalogs gem!!!
|
|
10
|
+
## attr_accessor :catalog
|
|
11
|
+
|
|
12
|
+
attr_reader :lang
|
|
13
|
+
def lang=(value)
|
|
14
|
+
## check/todo: always use to_sym - why? needed?
|
|
15
|
+
DateFormats.lang = value
|
|
16
|
+
ScoreFormats.lang = value
|
|
17
|
+
SportDb.lang.lang = value
|
|
18
|
+
|
|
19
|
+
## todo/fix: change SportDb.lang to SportDb.parser.lang
|
|
20
|
+
## or lang_parser or utils or someting !!!!
|
|
21
|
+
## use Sport.lang only as a read-only shortcut
|
|
22
|
+
# a la catalog for config.lang!!!!
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end # class Configuration
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
## lets you use
|
|
29
|
+
## SportDb::Import.configure do |config|
|
|
30
|
+
## config.lang = 'it'
|
|
31
|
+
## end
|
|
32
|
+
def self.configure() yield( config ); end
|
|
33
|
+
|
|
34
|
+
def self.config() @config ||= Configuration.new; end
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
end # module Import
|
|
38
|
+
end # module SportDb
|
|
39
|
+
|
|
@@ -1,28 +1,28 @@
|
|
|
1
|
-
|
|
2
|
-
module SportDb
|
|
3
|
-
class CsvGoalParser
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
def self.read( path )
|
|
7
|
-
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
|
8
|
-
parse( txt )
|
|
9
|
-
end
|
|
10
|
-
|
|
11
|
-
def self.parse( txt )
|
|
12
|
-
new( txt ).parse
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
def initialize( txt )
|
|
17
|
-
@txt = txt
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def parse
|
|
21
|
-
rows = parse_csv( @txt )
|
|
22
|
-
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
|
23
|
-
## pp recs[0]
|
|
24
|
-
recs
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
end # class CsvGoalParser
|
|
28
|
-
end # module Sports
|
|
1
|
+
|
|
2
|
+
module SportDb
|
|
3
|
+
class CsvGoalParser
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def self.read( path )
|
|
7
|
+
txt = File.open( path, 'r:utf-8' ) {|f| f.read } ## note: make sure to use (assume) utf-8
|
|
8
|
+
parse( txt )
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.parse( txt )
|
|
12
|
+
new( txt ).parse
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def initialize( txt )
|
|
17
|
+
@txt = txt
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def parse
|
|
21
|
+
rows = parse_csv( @txt )
|
|
22
|
+
recs = rows.map { |row| Sports::GoalEvent.build( row ) }
|
|
23
|
+
## pp recs[0]
|
|
24
|
+
recs
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end # class CsvGoalParser
|
|
28
|
+
end # module Sports
|
|
@@ -116,7 +116,7 @@ module SportDb
|
|
|
116
116
|
|
|
117
117
|
header_league = find_header( headers, ['League'] )
|
|
118
118
|
headers_mapping[:league] = header_league if header_league
|
|
119
|
-
|
|
119
|
+
else
|
|
120
120
|
## else try footballdata.uk and others
|
|
121
121
|
headers_mapping[:team1] = find_header( headers, ['HomeTeam', 'HT', 'Home'] )
|
|
122
122
|
headers_mapping[:team2] = find_header( headers, ['AwayTeam', 'AT', 'Away'] )
|
|
@@ -1,90 +1,90 @@
|
|
|
1
|
-
#####################
|
|
2
|
-
# helpers for parsing & finding match status e.g.
|
|
3
|
-
# - cancelled / canceled
|
|
4
|
-
# - awarded
|
|
5
|
-
# - abandoned
|
|
6
|
-
# - replay
|
|
7
|
-
# etc.
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
module SportDb
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
### todo/fix: move Status inside Match struct - why? why not?
|
|
14
|
-
|
|
15
|
-
class Status
|
|
16
|
-
# note: use a class as an "enum"-like namespace for now - why? why not?
|
|
17
|
-
# move class into Match e.g. Match::Status - why? why not?
|
|
18
|
-
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
|
19
|
-
AWARDED = 'AWARDED'
|
|
20
|
-
POSTPONED = 'POSTPONED'
|
|
21
|
-
ABANDONED = 'ABANDONED'
|
|
22
|
-
REPLAY = 'REPLAY'
|
|
23
|
-
end # class Status
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class StatusParser
|
|
28
|
-
|
|
29
|
-
def self.parse( str )
|
|
30
|
-
## note: returns nil if no match found
|
|
31
|
-
## note: english usage - cancelled (in UK), canceled (in US)
|
|
32
|
-
if str =~ /^(cancelled|
|
|
33
|
-
canceled|
|
|
34
|
-
can\.
|
|
35
|
-
)/xi
|
|
36
|
-
Status::CANCELLED
|
|
37
|
-
elsif str =~ /^(awarded|
|
|
38
|
-
awd\.
|
|
39
|
-
)/xi
|
|
40
|
-
Status::AWARDED
|
|
41
|
-
elsif str =~ /^(postponed
|
|
42
|
-
)/xi
|
|
43
|
-
Status::POSTPONED
|
|
44
|
-
elsif str =~ /^(abandoned|
|
|
45
|
-
abd\.
|
|
46
|
-
)/xi
|
|
47
|
-
Status::ABANDONED
|
|
48
|
-
elsif str =~ /^(replay
|
|
49
|
-
)/xi
|
|
50
|
-
Status::REPLAY
|
|
51
|
-
else
|
|
52
|
-
# no match
|
|
53
|
-
nil
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
RUN_RE = /\[
|
|
59
|
-
(?<text>[^\]]+)
|
|
60
|
-
\]
|
|
61
|
-
/x
|
|
62
|
-
def self.find!( line )
|
|
63
|
-
## for now check all "protected" text run blocks e.g. []
|
|
64
|
-
## puts "line: >#{line}<"
|
|
65
|
-
|
|
66
|
-
status = nil
|
|
67
|
-
|
|
68
|
-
str = line
|
|
69
|
-
while m = str.match( RUN_RE )
|
|
70
|
-
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
|
71
|
-
|
|
72
|
-
## check for status match
|
|
73
|
-
match_str = m[0] ## keep a copy of the match string (for later sub)
|
|
74
|
-
text = m[:text].strip
|
|
75
|
-
## puts " text: >#{text}<"
|
|
76
|
-
|
|
77
|
-
status = parse( text )
|
|
78
|
-
|
|
79
|
-
if status
|
|
80
|
-
line.sub!( match_str, "[STATUS.#{status}]" )
|
|
81
|
-
break
|
|
82
|
-
end
|
|
83
|
-
end # while match
|
|
84
|
-
|
|
85
|
-
status
|
|
86
|
-
end # method find!
|
|
87
|
-
end # class StatusParser
|
|
88
|
-
|
|
89
|
-
end # module SportDb
|
|
90
|
-
|
|
1
|
+
#####################
|
|
2
|
+
# helpers for parsing & finding match status e.g.
|
|
3
|
+
# - cancelled / canceled
|
|
4
|
+
# - awarded
|
|
5
|
+
# - abandoned
|
|
6
|
+
# - replay
|
|
7
|
+
# etc.
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
module SportDb
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
### todo/fix: move Status inside Match struct - why? why not?
|
|
14
|
+
|
|
15
|
+
class Status
|
|
16
|
+
# note: use a class as an "enum"-like namespace for now - why? why not?
|
|
17
|
+
# move class into Match e.g. Match::Status - why? why not?
|
|
18
|
+
CANCELLED = 'CANCELLED' # canceled (US spelling), cancelled (UK spelling) - what to use?
|
|
19
|
+
AWARDED = 'AWARDED'
|
|
20
|
+
POSTPONED = 'POSTPONED'
|
|
21
|
+
ABANDONED = 'ABANDONED'
|
|
22
|
+
REPLAY = 'REPLAY'
|
|
23
|
+
end # class Status
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class StatusParser
|
|
28
|
+
|
|
29
|
+
def self.parse( str )
|
|
30
|
+
## note: returns nil if no match found
|
|
31
|
+
## note: english usage - cancelled (in UK), canceled (in US)
|
|
32
|
+
if str =~ /^(cancelled|
|
|
33
|
+
canceled|
|
|
34
|
+
can\.
|
|
35
|
+
)/xi
|
|
36
|
+
Status::CANCELLED
|
|
37
|
+
elsif str =~ /^(awarded|
|
|
38
|
+
awd\.
|
|
39
|
+
)/xi
|
|
40
|
+
Status::AWARDED
|
|
41
|
+
elsif str =~ /^(postponed
|
|
42
|
+
)/xi
|
|
43
|
+
Status::POSTPONED
|
|
44
|
+
elsif str =~ /^(abandoned|
|
|
45
|
+
abd\.
|
|
46
|
+
)/xi
|
|
47
|
+
Status::ABANDONED
|
|
48
|
+
elsif str =~ /^(replay
|
|
49
|
+
)/xi
|
|
50
|
+
Status::REPLAY
|
|
51
|
+
else
|
|
52
|
+
# no match
|
|
53
|
+
nil
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
RUN_RE = /\[
|
|
59
|
+
(?<text>[^\]]+)
|
|
60
|
+
\]
|
|
61
|
+
/x
|
|
62
|
+
def self.find!( line )
|
|
63
|
+
## for now check all "protected" text run blocks e.g. []
|
|
64
|
+
## puts "line: >#{line}<"
|
|
65
|
+
|
|
66
|
+
status = nil
|
|
67
|
+
|
|
68
|
+
str = line
|
|
69
|
+
while m = str.match( RUN_RE )
|
|
70
|
+
str = m.post_match ## keep on processing rest of line/str (a.k.a. post match string)
|
|
71
|
+
|
|
72
|
+
## check for status match
|
|
73
|
+
match_str = m[0] ## keep a copy of the match string (for later sub)
|
|
74
|
+
text = m[:text].strip
|
|
75
|
+
## puts " text: >#{text}<"
|
|
76
|
+
|
|
77
|
+
status = parse( text )
|
|
78
|
+
|
|
79
|
+
if status
|
|
80
|
+
line.sub!( match_str, "[STATUS.#{status}]" )
|
|
81
|
+
break
|
|
82
|
+
end
|
|
83
|
+
end # while match
|
|
84
|
+
|
|
85
|
+
status
|
|
86
|
+
end # method find!
|
|
87
|
+
end # class StatusParser
|
|
88
|
+
|
|
89
|
+
end # module SportDb
|
|
90
|
+
|
|
@@ -1,87 +1,87 @@
|
|
|
1
|
-
|
|
2
|
-
module SportDb
|
|
3
|
-
module NameHelper
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
## note: allow placeholder years to e.g. (-___) or (-????)
|
|
7
|
-
## for marking missing (to be filled in) years
|
|
8
|
-
## e.g. (1887-1911), (-2013),
|
|
9
|
-
## (1946-2001, 2013-) etc.
|
|
10
|
-
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
|
11
|
-
YEAR_RE = %r{\(
|
|
12
|
-
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
|
13
|
-
\)}x
|
|
14
|
-
|
|
15
|
-
def strip_year( name )
|
|
16
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
|
17
|
-
## (1946-2001, 2013-) etc.
|
|
18
|
-
## todo/check: only sub once (not global) - why? why not?
|
|
19
|
-
name.gsub( YEAR_RE, '' ).strip
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
def has_year?( name ) name =~ YEAR_RE; end
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
LANG_RE = %r{\[
|
|
26
|
-
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
|
27
|
-
\]}x
|
|
28
|
-
def strip_lang( name )
|
|
29
|
-
name.gsub( LANG_RE, '' ).strip
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
def has_lang?( name ) name =~ LANG_RE; end
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def sanitize( name )
|
|
36
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
|
37
|
-
## (1946-2001,2013-) etc.
|
|
38
|
-
name = strip_year( name )
|
|
39
|
-
## check lang codes e.g. [en], [fr], etc.
|
|
40
|
-
name = strip_lang( name )
|
|
41
|
-
name
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
## note: also add (),’,− etc. e.g.
|
|
46
|
-
## Estudiantes (LP) => Estudiantes LP
|
|
47
|
-
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
|
48
|
-
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
|
49
|
-
##
|
|
50
|
-
## add & too!!
|
|
51
|
-
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
|
52
|
-
|
|
53
|
-
NORM_RE = %r{
|
|
54
|
-
[.'’º/()&_−-]
|
|
55
|
-
}x # note: in [] dash (-) if last doesn't need to get escaped
|
|
56
|
-
## note: remove all dots (.), dash (-), ', º, /, etc.
|
|
57
|
-
# . U+002E (46) - FULL STOP
|
|
58
|
-
# ' U+0027 (39) - APOSTROPHE
|
|
59
|
-
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
|
60
|
-
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
|
61
|
-
# / U+002F (47) - SOLIDUS
|
|
62
|
-
# ( U+0028 (40) - LEFT PARENTHESIS
|
|
63
|
-
# ) U+0029 (41) - RIGHT PARENTHESIS
|
|
64
|
-
# − U+2212 (8722) - MINUS SIGN
|
|
65
|
-
# - U+002D (45) - HYPHEN-MINUS
|
|
66
|
-
|
|
67
|
-
## for norm(alizing) names
|
|
68
|
-
def strip_norm( name )
|
|
69
|
-
name.gsub( NORM_RE, '' )
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def normalize( name )
|
|
73
|
-
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
|
74
|
-
name = strip_norm( name )
|
|
75
|
-
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
|
76
|
-
|
|
77
|
-
## todo/check: use our own downcase - why? why not?
|
|
78
|
-
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
|
79
|
-
name
|
|
80
|
-
end
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
def variants( name ) Variant.find( name ); end
|
|
84
|
-
|
|
85
|
-
end # module NameHelper
|
|
86
|
-
end # module SportDb
|
|
87
|
-
|
|
1
|
+
|
|
2
|
+
module SportDb
|
|
3
|
+
module NameHelper
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
## note: allow placeholder years to e.g. (-___) or (-????)
|
|
7
|
+
## for marking missing (to be filled in) years
|
|
8
|
+
## e.g. (1887-1911), (-2013),
|
|
9
|
+
## (1946-2001, 2013-) etc.
|
|
10
|
+
## todo/check: make more strict e.g. only accept 4-digit years? - why? why not?
|
|
11
|
+
YEAR_RE = %r{\(
|
|
12
|
+
[0-9, ?_-]+? # note: non-greedy (minimum/first) match
|
|
13
|
+
\)}x
|
|
14
|
+
|
|
15
|
+
def strip_year( name )
|
|
16
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
|
17
|
+
## (1946-2001, 2013-) etc.
|
|
18
|
+
## todo/check: only sub once (not global) - why? why not?
|
|
19
|
+
name.gsub( YEAR_RE, '' ).strip
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def has_year?( name ) name =~ YEAR_RE; end
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
LANG_RE = %r{\[
|
|
26
|
+
[a-z]{1,2} # note also allow single-letter [a] or [d] or [e] - why? why not?
|
|
27
|
+
\]}x
|
|
28
|
+
def strip_lang( name )
|
|
29
|
+
name.gsub( LANG_RE, '' ).strip
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def has_lang?( name ) name =~ LANG_RE; end
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def sanitize( name )
|
|
36
|
+
## check for year(s) e.g. (1887-1911), (-2013),
|
|
37
|
+
## (1946-2001,2013-) etc.
|
|
38
|
+
name = strip_year( name )
|
|
39
|
+
## check lang codes e.g. [en], [fr], etc.
|
|
40
|
+
name = strip_lang( name )
|
|
41
|
+
name
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
## note: also add (),’,− etc. e.g.
|
|
46
|
+
## Estudiantes (LP) => Estudiantes LP
|
|
47
|
+
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
|
48
|
+
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
|
49
|
+
##
|
|
50
|
+
## add & too!!
|
|
51
|
+
## e.g. Brighton & Hove Albion => Brighton Hove Albion -- and others in England
|
|
52
|
+
|
|
53
|
+
NORM_RE = %r{
|
|
54
|
+
[.'’º/()&_−-]
|
|
55
|
+
}x # note: in [] dash (-) if last doesn't need to get escaped
|
|
56
|
+
## note: remove all dots (.), dash (-), ', º, /, etc.
|
|
57
|
+
# . U+002E (46) - FULL STOP
|
|
58
|
+
# ' U+0027 (39) - APOSTROPHE
|
|
59
|
+
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
|
60
|
+
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
|
61
|
+
# / U+002F (47) - SOLIDUS
|
|
62
|
+
# ( U+0028 (40) - LEFT PARENTHESIS
|
|
63
|
+
# ) U+0029 (41) - RIGHT PARENTHESIS
|
|
64
|
+
# − U+2212 (8722) - MINUS SIGN
|
|
65
|
+
# - U+002D (45) - HYPHEN-MINUS
|
|
66
|
+
|
|
67
|
+
## for norm(alizing) names
|
|
68
|
+
def strip_norm( name )
|
|
69
|
+
name.gsub( NORM_RE, '' )
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def normalize( name )
|
|
73
|
+
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
|
74
|
+
name = strip_norm( name )
|
|
75
|
+
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
|
76
|
+
|
|
77
|
+
## todo/check: use our own downcase - why? why not?
|
|
78
|
+
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
|
79
|
+
name
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def variants( name ) Variant.find( name ); end
|
|
84
|
+
|
|
85
|
+
end # module NameHelper
|
|
86
|
+
end # module SportDb
|
|
87
|
+
|
|
@@ -4,6 +4,25 @@ module Sports
|
|
|
4
4
|
# note: check that shape/structure/fields/attributes match
|
|
5
5
|
# the ActiveRecord model !!!!
|
|
6
6
|
|
|
7
|
+
## add city here
|
|
8
|
+
## use module World - why? why not?
|
|
9
|
+
|
|
10
|
+
class City
|
|
11
|
+
attr_reader :key, :name, :country
|
|
12
|
+
attr_accessor :alt_names
|
|
13
|
+
|
|
14
|
+
def initialize( key: nil,
|
|
15
|
+
name:, country: )
|
|
16
|
+
## note: auto-generate key "on-the-fly" if missing for now - why? why not?
|
|
17
|
+
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
|
18
|
+
@key = key || unaccent(name).downcase.gsub( /[^a-z]/, '' ) + "_" + country.key
|
|
19
|
+
@name = name
|
|
20
|
+
@country = country
|
|
21
|
+
@alt_names = []
|
|
22
|
+
end
|
|
23
|
+
end # class City
|
|
24
|
+
|
|
25
|
+
|
|
7
26
|
class Country
|
|
8
27
|
|
|
9
28
|
## note: is read-only/immutable for now - why? why not?
|
|
@@ -14,7 +33,15 @@ class Country
|
|
|
14
33
|
def initialize( key: nil, name:, code:, tags: [] )
|
|
15
34
|
## note: auto-generate key "on-the-fly" if missing for now - why? why not?
|
|
16
35
|
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
|
17
|
-
@key =
|
|
36
|
+
@key = begin
|
|
37
|
+
if key
|
|
38
|
+
key
|
|
39
|
+
elsif code
|
|
40
|
+
code.downcase
|
|
41
|
+
else
|
|
42
|
+
unaccent( name ).downcase.gsub( /[^a-z]/, '' )
|
|
43
|
+
end
|
|
44
|
+
end
|
|
18
45
|
@name, @code = name, code
|
|
19
46
|
@alt_names = []
|
|
20
47
|
@tags = tags
|