sportdb-formats 1.0.3 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sportdb/formats/country/country_index.rb +7 -7
- data/lib/sportdb/formats/country/country_reader.rb +26 -6
- data/lib/sportdb/formats/league/league_reader.rb +3 -3
- data/lib/sportdb/formats/package.rb +29 -5
- data/lib/sportdb/formats/structs/country.rb +6 -3
- data/lib/sportdb/formats/version.rb +1 -1
- data/test/test_country_index.rb +4 -4
- data/test/test_country_reader.rb +34 -4
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5ccd241256a964b430d9c9aa515901641ed5a0e2
|
4
|
+
data.tar.gz: 1d997241f388000d7b2c19e26a92c9ba39c8e172
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1d7d1e6b71009ec598f01f59ead597269ea3c4b48bc3e2f5be0a7257879114f2b3110937dfb23c153962cd7dd0a61e23d0050a8eb40c6652acf34a8a1720071c
|
7
|
+
data.tar.gz: 392a8eaf8c0d1f14b021a45ca39ce491f8edceac0cf9ba33ffde920e638a2cf1fd2adf836e596dc2100c233f5ca1fe8aadf47526fb1837de855acaf84974b19a
|
@@ -30,11 +30,11 @@ class CountryIndex
|
|
30
30
|
## auto-fill countries
|
31
31
|
## pp recs
|
32
32
|
recs.each do |rec|
|
33
|
-
## rec e.g. { key:'af',
|
33
|
+
## rec e.g. { key:'af', code:'AFG', name:'Afghanistan'}
|
34
34
|
|
35
35
|
@countries << rec
|
36
36
|
|
37
|
-
## add codes lookups - key,
|
37
|
+
## add codes lookups - key, code, ...
|
38
38
|
if @countries_by_code[ rec.key ]
|
39
39
|
puts "** !! ERROR !! country code (key) >#{rec.key}< already exits!!"
|
40
40
|
exit 1
|
@@ -42,13 +42,13 @@ class CountryIndex
|
|
42
42
|
@countries_by_code[ rec.key ] = rec
|
43
43
|
end
|
44
44
|
|
45
|
-
## add
|
46
|
-
if rec.key != rec.
|
47
|
-
if @countries_by_code[ rec.
|
48
|
-
puts "** !! ERROR !! country code
|
45
|
+
## add code (only) if different from key
|
46
|
+
if rec.key != rec.code.downcase
|
47
|
+
if @countries_by_code[ rec.code.downcase ]
|
48
|
+
puts "** !! ERROR !! country code >#{rec.code.downcase}< already exits!!"
|
49
49
|
exit 1
|
50
50
|
else
|
51
|
-
@countries_by_code[ rec.
|
51
|
+
@countries_by_code[ rec.code.downcase ] = rec
|
52
52
|
end
|
53
53
|
end
|
54
54
|
|
@@ -43,11 +43,31 @@ def parse
|
|
43
43
|
# e.g. East Germany (-1989) => East Germany (-1989)
|
44
44
|
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
45
45
|
last_country.alt_names += values
|
46
|
+
elsif line =~ /^-[ ]*(\d{4})
|
47
|
+
[ ]+
|
48
|
+
(.+)$
|
49
|
+
/x ## check for historic lines e.g. -1989
|
50
|
+
year = $1.to_i
|
51
|
+
parts = $2.split( /=>|⇒/ )
|
52
|
+
values = parts[0].split( ',' )
|
53
|
+
values = values.map { |value| value.strip.gsub( /[ \t]+/, ' ' ) }
|
54
|
+
|
55
|
+
name = values[0]
|
56
|
+
code = values[1]
|
57
|
+
|
58
|
+
last_country = country = Country.new( name: "#{name} (-#{year})",
|
59
|
+
code: code )
|
60
|
+
country.alt_names << name ## note: do NOT forget - add name without year to alt_names!!!
|
61
|
+
|
62
|
+
countries << country
|
63
|
+
## todo/fix: add reference to country today (in parts[1] !!!!)
|
46
64
|
else
|
47
65
|
## assume "regular" line
|
48
66
|
## check if starts with id (todo/check: use a more "strict"/better regex capture pattern!!!)
|
49
67
|
## note: allow country codes upto 4 (!!) e.g. Northern Cyprus
|
50
|
-
if line =~ /^([a-z]{2,4})
|
68
|
+
if line =~ /^([a-z]{2,4})
|
69
|
+
[ ]+
|
70
|
+
(.+)$/x
|
51
71
|
key = $1
|
52
72
|
values = $2.split( ',' )
|
53
73
|
## strip and squish (white)spaces
|
@@ -60,14 +80,14 @@ def parse
|
|
60
80
|
geos = split_geo( values[0] )
|
61
81
|
name = geos[0] ## note: ignore all other geos for now
|
62
82
|
|
63
|
-
## note: allow
|
64
|
-
|
83
|
+
## note: allow country codes up to 4 (!!) e.g. Northern Cyprus
|
84
|
+
code = if values[1] && values[1] =~ /^[A-Z]{3,4}$/ ## note: also check format
|
65
85
|
values[1]
|
66
86
|
else
|
67
87
|
if values[1]
|
68
|
-
puts "** !!! ERROR !!! wrong
|
88
|
+
puts "** !!! ERROR !!! wrong code format >#{values[1]}<; expected three (or four)-letter all up-case"
|
69
89
|
else
|
70
|
-
puts "** !!! ERROR !!! missing
|
90
|
+
puts "** !!! ERROR !!! missing code for (canonical) country name"
|
71
91
|
end
|
72
92
|
exit 1
|
73
93
|
end
|
@@ -80,7 +100,7 @@ def parse
|
|
80
100
|
|
81
101
|
last_country = country = Country.new( key: key,
|
82
102
|
name: name,
|
83
|
-
|
103
|
+
code: code,
|
84
104
|
tags: tags )
|
85
105
|
countries << country
|
86
106
|
else
|
@@ -113,9 +113,9 @@ def parse
|
|
113
113
|
## add a list of (auto-)excluded country codes with conflicts? why? why not?
|
114
114
|
## cl - a) Chile b) Champions League
|
115
115
|
alt_names_auto << "#{country.key.upcase}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
116
|
-
if country.key.upcase != country.
|
117
|
-
alt_names_auto << "#{country.
|
118
|
-
alt_names_auto << "#{country.
|
116
|
+
if country.key.upcase != country.code
|
117
|
+
alt_names_auto << "#{country.code} #{league_key.upcase.gsub('.', ' ')}"
|
118
|
+
alt_names_auto << "#{country.code}" if league_key == '1' ## add shortcut for top level 1 (just country key)
|
119
119
|
end
|
120
120
|
alt_names_auto << "#{country.name} #{league_key}" if league_key =~ /^[0-9]+$/ ## if all numeric e.g. add Austria 1 etc.
|
121
121
|
else ## assume int'l (no country) e.g. champions league, etc.
|
@@ -4,31 +4,50 @@ module SportDb
|
|
4
4
|
|
5
5
|
## todo/fix: make all regexes case-insensitive with /i option - why? why not?
|
6
6
|
## e.g. .TXT and .txt
|
7
|
+
## yes!! use /i option!!!!!
|
7
8
|
|
8
9
|
CONF_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
9
10
|
\.conf\.txt$
|
10
11
|
}x
|
11
12
|
|
13
|
+
## leagues.txt or leagues_en.txt
|
14
|
+
## remove support for en.leagues.txt - why? why not?
|
12
15
|
LEAGUES_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
13
16
|
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.wiki.txt
|
14
|
-
leagues
|
17
|
+
leagues
|
18
|
+
(?:_[a-z0-9_-]+)?
|
19
|
+
\.txt$
|
15
20
|
}x
|
16
21
|
|
22
|
+
## clubs.txt or clubs_en.txt
|
23
|
+
## remove support for en.clubs.txt - why? why not?
|
17
24
|
CLUBS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
18
25
|
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.txt
|
19
|
-
clubs
|
26
|
+
clubs
|
27
|
+
(?:_[a-z0-9_-]+)?
|
28
|
+
\.txt$
|
20
29
|
}x
|
21
30
|
|
22
31
|
CLUBS_WIKI_RE = %r{ (?:^|/) # beginning (^) or beginning of path (/)
|
23
32
|
(?:[a-z]{1,4}\.)? # optional country code/key e.g. eng.clubs.wiki.txt
|
24
|
-
clubs
|
33
|
+
clubs
|
34
|
+
(?:_[a-z0-9_-]+)?
|
35
|
+
\.wiki\.txt$
|
25
36
|
}x
|
26
37
|
|
27
38
|
CLUB_PROPS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
28
39
|
(?: [a-z]{1,4}\. )? # optional country code/key e.g. eng.clubs.props.txt
|
29
|
-
clubs
|
40
|
+
clubs
|
41
|
+
(?:_[a-z0-9_-]+)?
|
42
|
+
\.props\.txt$
|
30
43
|
}x
|
31
44
|
|
45
|
+
## teams.txt or teams_history.txt
|
46
|
+
TEAMS_RE = %r{ (?: ^|/ ) # beginning (^) or beginning of path (/)
|
47
|
+
teams
|
48
|
+
(?:_[a-z0-9_-]+)?
|
49
|
+
\.txt$
|
50
|
+
}x
|
32
51
|
|
33
52
|
### season folder:
|
34
53
|
## e.g. /2019-20 or
|
@@ -36,7 +55,7 @@ module SportDb
|
|
36
55
|
## /2016--france
|
37
56
|
SEASON_RE = %r{ (?:
|
38
57
|
\d{4}-\d{2}
|
39
|
-
| \d{4}(--[
|
58
|
+
| \d{4}(--[a-z0-9_-]+)?
|
40
59
|
)
|
41
60
|
}x
|
42
61
|
SEASON = SEASON_RE.source ## "inline" helper for embedding in other regexes - keep? why? why not?
|
@@ -74,6 +93,8 @@ module SportDb
|
|
74
93
|
end
|
75
94
|
|
76
95
|
|
96
|
+
def self.find_teams( path, pattern: TEAMS_RE ) find( path, pattern ); end
|
97
|
+
def self.match_teams( path ) TEAMS_RE.match( path ); end
|
77
98
|
|
78
99
|
def self.find_clubs( path, pattern: CLUBS_RE ) find( path, pattern ); end
|
79
100
|
def self.find_clubs_wiki( path, pattern: CLUBS_WIKI_RE ) find( path, pattern ); end
|
@@ -98,6 +119,9 @@ module SportDb
|
|
98
119
|
## add match_match and match_match_csv - why? why not?
|
99
120
|
|
100
121
|
class << self
|
122
|
+
alias_method :match_teams?, :match_teams
|
123
|
+
alias_method :teams?, :match_teams
|
124
|
+
|
101
125
|
alias_method :match_clubs?, :match_clubs
|
102
126
|
alias_method :clubs?, :match_clubs
|
103
127
|
|
@@ -11,11 +11,14 @@ class Country
|
|
11
11
|
|
12
12
|
## note: is read-only/immutable for now - why? why not?
|
13
13
|
## add cities (array/list) - why? why not?
|
14
|
-
attr_reader :key, :name, :
|
14
|
+
attr_reader :key, :name, :code, :tags
|
15
15
|
attr_accessor :alt_names
|
16
16
|
|
17
|
-
def initialize( key
|
18
|
-
|
17
|
+
def initialize( key: nil, name:, code:, tags: [] )
|
18
|
+
## note: auto-generate key "on-the-fly" if missing for now - why? why not?
|
19
|
+
## note: quick hack - auto-generate key, that is, remove all non-ascii chars and downcase
|
20
|
+
@key = key || name.downcase.gsub( /[^a-z]/, '' )
|
21
|
+
@name, @code = name, code
|
19
22
|
@alt_names = []
|
20
23
|
@tags = tags
|
21
24
|
end
|
data/test/test_country_index.rb
CHANGED
@@ -16,12 +16,12 @@ class TestCountryIndex < MiniTest::Test
|
|
16
16
|
eng = countries[:eng]
|
17
17
|
assert_equal 'eng', eng.key
|
18
18
|
assert_equal 'England', eng.name
|
19
|
-
assert_equal 'ENG', eng.
|
19
|
+
assert_equal 'ENG', eng.code
|
20
20
|
|
21
21
|
at = countries[:at]
|
22
22
|
assert_equal 'at', at.key
|
23
23
|
assert_equal 'Austria', at.name
|
24
|
-
assert_equal 'AUT', at.
|
24
|
+
assert_equal 'AUT', at.code
|
25
25
|
assert_equal ['Österreich [de]'], at.alt_names
|
26
26
|
|
27
27
|
assert at == countries['AT']
|
@@ -48,13 +48,13 @@ class TestCountryIndex < MiniTest::Test
|
|
48
48
|
assert at == countries.parse( 'Österreich • Austria' )
|
49
49
|
assert at == countries.parse( 'Austria' )
|
50
50
|
assert at == countries.parse( 'at' ) ## (iso alpha2) country code
|
51
|
-
assert at == countries.parse( 'AUT' ) ## fifa code
|
51
|
+
assert at == countries.parse( 'AUT' ) ## (fifa) country code
|
52
52
|
|
53
53
|
|
54
54
|
de = countries[:de]
|
55
55
|
assert_equal 'de', de.key
|
56
56
|
assert_equal 'Germany', de.name
|
57
|
-
assert_equal 'GER', de.
|
57
|
+
assert_equal 'GER', de.code
|
58
58
|
assert_equal ['Deutschland [de]'], de.alt_names
|
59
59
|
|
60
60
|
assert de == countries.parse( 'Deutschland (de) • Germany' )
|
data/test/test_country_reader.rb
CHANGED
@@ -16,12 +16,12 @@ class TestCountryReader < MiniTest::Test
|
|
16
16
|
assert_equal 232, recs.size
|
17
17
|
|
18
18
|
assert_equal 'Albania', recs[0].name
|
19
|
-
assert_equal 'ALB', recs[0].
|
19
|
+
assert_equal 'ALB', recs[0].code
|
20
20
|
assert_equal 'al', recs[0].key
|
21
21
|
assert_equal ['fifa', 'uefa'], recs[0].tags
|
22
22
|
|
23
23
|
assert_equal 'Andorra', recs[1].name
|
24
|
-
assert_equal 'AND', recs[1].
|
24
|
+
assert_equal 'AND', recs[1].code
|
25
25
|
assert_equal 'ad', recs[1].key
|
26
26
|
assert_equal ['fifa', 'uefa'], recs[1].tags
|
27
27
|
end
|
@@ -44,16 +44,46 @@ TXT
|
|
44
44
|
|
45
45
|
assert_equal 4, recs.size
|
46
46
|
assert_equal 'Afghanistan', recs[0].name
|
47
|
-
assert_equal 'AFG', recs[0].
|
47
|
+
assert_equal 'AFG', recs[0].code
|
48
48
|
assert_equal 'af', recs[0].key
|
49
49
|
assert_equal [], recs[0].alt_names
|
50
50
|
assert_equal ['fifa', 'afc'], recs[0].tags
|
51
51
|
|
52
52
|
assert_equal 'American Samoa', recs[3].name
|
53
|
-
assert_equal 'ASA', recs[3].
|
53
|
+
assert_equal 'ASA', recs[3].code
|
54
54
|
assert_equal 'as', recs[3].key
|
55
55
|
assert_equal ['Am. Samoa'], recs[3].alt_names
|
56
56
|
assert_equal [], recs[3].tags
|
57
57
|
end
|
58
58
|
|
59
|
+
def test_parse_historic
|
60
|
+
recs = SportDb::Import::CountryReader.parse( <<TXT )
|
61
|
+
###########################################
|
62
|
+
# Former national teams
|
63
|
+
# with former FIFA country codes etc.
|
64
|
+
-1992 Czechoslovakia, TCH ⇒ Czech Republic
|
65
|
+
|
66
|
+
-1991 Soviet Union, URS ⇒ Russia
|
67
|
+
|
68
|
+
-1989 West Germany, FRG => Germany
|
69
|
+
-1989 East Germany, GDR => Germany
|
70
|
+
TXT
|
71
|
+
|
72
|
+
pp recs
|
73
|
+
|
74
|
+
assert_equal 4, recs.size
|
75
|
+
assert_equal 'Czechoslovakia (-1992)', recs[0].name
|
76
|
+
assert_equal 'TCH', recs[0].code
|
77
|
+
assert_equal 'czechoslovakia', recs[0].key
|
78
|
+
assert_equal ['Czechoslovakia'], recs[0].alt_names
|
79
|
+
assert_equal [], recs[0].tags
|
80
|
+
|
81
|
+
assert_equal 'East Germany (-1989)', recs[3].name
|
82
|
+
assert_equal 'GDR', recs[3].code
|
83
|
+
assert_equal 'eastgermany', recs[3].key
|
84
|
+
assert_equal ['East Germany'], recs[3].alt_names
|
85
|
+
assert_equal [], recs[3].tags
|
86
|
+
end
|
87
|
+
|
88
|
+
|
59
89
|
end # class TestCountryReader
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: alphabets
|