sportdb-formats 0.4.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +24 -4
- data/Rakefile +3 -3
- data/lib/sportdb/formats.rb +25 -2
- data/lib/sportdb/formats/config.rb +40 -0
- data/lib/sportdb/formats/datafile.rb +42 -62
- data/lib/sportdb/formats/datafile_package.rb +160 -0
- data/lib/sportdb/formats/match/conf_parser.rb +120 -0
- data/lib/sportdb/formats/match/mapper.rb +319 -0
- data/lib/sportdb/formats/match/mapper_teams.rb +23 -0
- data/lib/sportdb/formats/match/match_parser.rb +659 -0
- data/lib/sportdb/formats/match/match_parser_auto_conf.rb +202 -0
- data/lib/sportdb/formats/name_helper.rb +84 -0
- data/lib/sportdb/formats/outline_reader.rb +53 -15
- data/lib/sportdb/formats/package.rb +172 -160
- data/lib/sportdb/formats/parser_helper.rb +81 -0
- data/lib/sportdb/formats/score/score_formats.rb +180 -0
- data/lib/sportdb/formats/score/score_parser.rb +196 -0
- data/lib/sportdb/formats/structs/country.rb +1 -43
- data/lib/sportdb/formats/structs/group.rb +25 -0
- data/lib/sportdb/formats/structs/league.rb +7 -26
- data/lib/sportdb/formats/structs/match.rb +72 -51
- data/lib/sportdb/formats/structs/round.rb +14 -4
- data/lib/sportdb/formats/structs/season.rb +3 -0
- data/lib/sportdb/formats/structs/team.rb +144 -0
- data/lib/sportdb/formats/version.rb +2 -2
- data/test/helper.rb +83 -1
- data/test/test_clubs.rb +3 -3
- data/test/test_conf.rb +65 -0
- data/test/test_datafile.rb +21 -30
- data/test/test_match.rb +0 -6
- data/test/test_match_auto.rb +72 -0
- data/test/test_match_auto_champs.rb +45 -0
- data/test/test_match_auto_euro.rb +37 -0
- data/test/test_match_auto_worldcup.rb +61 -0
- data/test/test_match_champs.rb +27 -0
- data/test/test_match_eng.rb +26 -0
- data/test/test_match_euro.rb +27 -0
- data/test/test_match_worldcup.rb +27 -0
- data/test/test_name_helper.rb +67 -0
- data/test/test_outline_reader.rb +3 -3
- data/test/test_package.rb +21 -2
- data/test/test_package_match.rb +78 -0
- data/test/test_scores.rb +67 -51
- metadata +32 -12
- data/lib/sportdb/formats/scores.rb +0 -253
- data/lib/sportdb/formats/structs/club.rb +0 -213
- data/test/test_club_helpers.rb +0 -63
- data/test/test_datafile_match.rb +0 -65
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sportdb-formats
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-05-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: alphabets
|
@@ -16,28 +16,28 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 1.0.0
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 0.
|
26
|
+
version: 1.0.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: date-formats
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - ">="
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 1.0.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - ">="
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 1.0.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: csvreader
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0
|
61
|
+
version: 0.1.0
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.0
|
68
|
+
version: 0.1.0
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: rubyzip
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,32 +122,52 @@ files:
|
|
122
122
|
- README.md
|
123
123
|
- Rakefile
|
124
124
|
- lib/sportdb/formats.rb
|
125
|
+
- lib/sportdb/formats/config.rb
|
125
126
|
- lib/sportdb/formats/datafile.rb
|
127
|
+
- lib/sportdb/formats/datafile_package.rb
|
126
128
|
- lib/sportdb/formats/goals.rb
|
129
|
+
- lib/sportdb/formats/match/conf_parser.rb
|
130
|
+
- lib/sportdb/formats/match/mapper.rb
|
131
|
+
- lib/sportdb/formats/match/mapper_teams.rb
|
132
|
+
- lib/sportdb/formats/match/match_parser.rb
|
133
|
+
- lib/sportdb/formats/match/match_parser_auto_conf.rb
|
134
|
+
- lib/sportdb/formats/name_helper.rb
|
127
135
|
- lib/sportdb/formats/outline_reader.rb
|
128
136
|
- lib/sportdb/formats/package.rb
|
129
|
-
- lib/sportdb/formats/
|
137
|
+
- lib/sportdb/formats/parser_helper.rb
|
138
|
+
- lib/sportdb/formats/score/score_formats.rb
|
139
|
+
- lib/sportdb/formats/score/score_parser.rb
|
130
140
|
- lib/sportdb/formats/season_utils.rb
|
131
|
-
- lib/sportdb/formats/structs/club.rb
|
132
141
|
- lib/sportdb/formats/structs/country.rb
|
142
|
+
- lib/sportdb/formats/structs/group.rb
|
133
143
|
- lib/sportdb/formats/structs/league.rb
|
134
144
|
- lib/sportdb/formats/structs/match.rb
|
135
145
|
- lib/sportdb/formats/structs/matchlist.rb
|
136
146
|
- lib/sportdb/formats/structs/round.rb
|
137
147
|
- lib/sportdb/formats/structs/season.rb
|
138
148
|
- lib/sportdb/formats/structs/standings.rb
|
149
|
+
- lib/sportdb/formats/structs/team.rb
|
139
150
|
- lib/sportdb/formats/structs/team_usage.rb
|
140
151
|
- lib/sportdb/formats/version.rb
|
141
152
|
- test/helper.rb
|
142
|
-
- test/test_club_helpers.rb
|
143
153
|
- test/test_clubs.rb
|
154
|
+
- test/test_conf.rb
|
144
155
|
- test/test_csv_reader.rb
|
145
156
|
- test/test_datafile.rb
|
146
|
-
- test/test_datafile_match.rb
|
147
157
|
- test/test_goals.rb
|
148
158
|
- test/test_match.rb
|
159
|
+
- test/test_match_auto.rb
|
160
|
+
- test/test_match_auto_champs.rb
|
161
|
+
- test/test_match_auto_euro.rb
|
162
|
+
- test/test_match_auto_worldcup.rb
|
163
|
+
- test/test_match_champs.rb
|
164
|
+
- test/test_match_eng.rb
|
165
|
+
- test/test_match_euro.rb
|
166
|
+
- test/test_match_worldcup.rb
|
167
|
+
- test/test_name_helper.rb
|
149
168
|
- test/test_outline_reader.rb
|
150
169
|
- test/test_package.rb
|
170
|
+
- test/test_package_match.rb
|
151
171
|
- test/test_scores.rb
|
152
172
|
- test/test_season.rb
|
153
173
|
homepage: https://github.com/sportdb/sport.db
|
@@ -1,253 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
|
5
|
-
class ScoresFinder
|
6
|
-
|
7
|
-
include LogUtils::Logging
|
8
|
-
|
9
|
-
|
10
|
-
## e.g. 3-4 pen. 2-2 a.e.t. (1-1, 1-1) or
|
11
|
-
## 3-4 pen. 2-2 a.e.t. (1-1, )
|
12
|
-
EN__P_ET_FT_HT__RE = /\b
|
13
|
-
(?<score1p>\d{1,2})
|
14
|
-
-
|
15
|
-
(?<score2p>\d{1,2})
|
16
|
-
\s* # allow optional spaces
|
17
|
-
(?:p|pen\.?|pso) # e.g. pen, pen., PSO, p etc.
|
18
|
-
\s* # allow optional spaces
|
19
|
-
(?<score1et>\d{1,2})
|
20
|
-
-
|
21
|
-
(?<score2et>\d{1,2})
|
22
|
-
\s* # allow optional spaces
|
23
|
-
(?:aet|a\.e\.t\.)
|
24
|
-
\s* # allow optional spaces
|
25
|
-
\(
|
26
|
-
(?<score1>\d{1,2})
|
27
|
-
-
|
28
|
-
(?<score2>\d{1,2})
|
29
|
-
\s*
|
30
|
-
,
|
31
|
-
\s*
|
32
|
-
(?:
|
33
|
-
(?<score1i>\d{1,2})
|
34
|
-
-
|
35
|
-
(?<score2i>\d{1,2})
|
36
|
-
)? # note: make half time (HT) score optional for now
|
37
|
-
\)
|
38
|
-
(?=[\s\]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
|
39
|
-
## note: \b works only after non-alphanum e.g. )
|
40
|
-
|
41
|
-
|
42
|
-
## e.g. 2-1 a.e.t. (1-1, 0-0) or
|
43
|
-
## 2-1 a.e.t. (1-1, )
|
44
|
-
EN__ET_FT_HT__RE = /\b
|
45
|
-
(?<score1et>\d{1,2})
|
46
|
-
-
|
47
|
-
(?<score2et>\d{1,2})
|
48
|
-
\s* # allow optional spaces
|
49
|
-
(?:aet|a\.e\.t\.)
|
50
|
-
\s* # allow optional spaces
|
51
|
-
\(
|
52
|
-
(?<score1>\d{1,2})
|
53
|
-
-
|
54
|
-
(?<score2>\d{1,2})
|
55
|
-
\s*
|
56
|
-
,
|
57
|
-
\s*
|
58
|
-
(?:
|
59
|
-
(?<score1i>\d{1,2})
|
60
|
-
-
|
61
|
-
(?<score2i>\d{1,2})
|
62
|
-
)? # note: make half time (HT) score optional for now
|
63
|
-
\)
|
64
|
-
(?=[\s\]]|$)/xi ## todo/check: remove loakahead assertion here - why require space?
|
65
|
-
## note: \b works only after non-alphanum e.g. )
|
66
|
-
|
67
|
-
|
68
|
-
## e.g. 2-1 (1-1)
|
69
|
-
EN__FT_HT__RE = /\b
|
70
|
-
(?<score1>\d{1,2})
|
71
|
-
-
|
72
|
-
(?<score2>\d{1,2})
|
73
|
-
\s*
|
74
|
-
\(
|
75
|
-
(?<score1i>\d{1,2})
|
76
|
-
-
|
77
|
-
(?<score2i>\d{1,2})
|
78
|
-
\)
|
79
|
-
(?=[\s\]]|$)/x ## todo/check: remove loakahead assertion here - why require space?
|
80
|
-
## note: \b works only after non-alphanum e.g. )
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
###################
|
85
|
-
# more
|
86
|
-
|
87
|
-
# e.g. 1:2 or 0:2 or 3:3 or
|
88
|
-
# 1-1 or 0-2 or 3-3 or
|
89
|
-
# 1x1 or 1X1 or 0x2 or 3x3 -- used in Brazil / Portugal
|
90
|
-
FT_RE = /\b
|
91
|
-
(?<score1>\d{1,2})
|
92
|
-
[:\-xX]
|
93
|
-
(?<score2>\d{1,2})
|
94
|
-
\b/x
|
95
|
-
|
96
|
-
|
97
|
-
# e.g. 1:2nV => after extra time a.e.t
|
98
|
-
|
99
|
-
# note: possible ending w/ . -> thus cannot use /b will not work w/ .; use zero look-ahead
|
100
|
-
ET_RE = /\b
|
101
|
-
(?<score1>\d{1,2})
|
102
|
-
[:\-xX]
|
103
|
-
(?<score2>\d{1,2})
|
104
|
-
\s? # allow optional space
|
105
|
-
(?:nv|n\.v\.|aet|a\.e\.t\.) # allow optional . e.g. nV or n.V.
|
106
|
-
(?=[\s\)\]]|$)/xi
|
107
|
-
|
108
|
-
## todo: add/allow english markers e.g. pen or p ??
|
109
|
-
|
110
|
-
# e.g. 5:4iE => penalty / after penalty a.p
|
111
|
-
|
112
|
-
|
113
|
-
# note: possible ending w/ . -> thus cannot use /b will not work w/ .; use zero look-ahead
|
114
|
-
P_RE = /\b
|
115
|
-
(?<score1>\d{1,2})
|
116
|
-
[:\-xX]
|
117
|
-
(?<score2>\d{1,2})
|
118
|
-
\s? # allow optional space
|
119
|
-
(?:iE|i\.E\.|p|pen|PSO) # allow optional . e.g. iE or i.E.
|
120
|
-
(?=[\s\)\]]|$)/xi
|
121
|
-
|
122
|
-
|
123
|
-
## todo: allow all-in-one "literal form a la kicker" e.g.
|
124
|
-
# 2:2 (1:1, 1:0) n.V. 5:1 i.E.
|
125
|
-
|
126
|
-
def initialize
|
127
|
-
# nothing here for now
|
128
|
-
end
|
129
|
-
|
130
|
-
def find!( line, opts={} )
|
131
|
-
|
132
|
-
### fix: add and match all-in-one literal first, followed by
|
133
|
-
|
134
|
-
# note: always call after find_dates !!!
|
135
|
-
# scores match date-like patterns!! e.g. 10-11 or 10:00 etc.
|
136
|
-
# -- note: score might have two digits too
|
137
|
-
|
138
|
-
### fix: depending on language allow 1:1 or 1-1
|
139
|
-
## do NOT allow mix and match
|
140
|
-
## e.g. default to en is 1-1
|
141
|
-
## de is 1:1 etc.
|
142
|
-
|
143
|
-
|
144
|
-
# extract score from line
|
145
|
-
# and return it
|
146
|
-
# note: side effect - removes date from line string
|
147
|
-
|
148
|
-
|
149
|
-
score1i = nil # half time (ht) scores
|
150
|
-
score2i = nil
|
151
|
-
|
152
|
-
score1 = nil # full time (ft) scores
|
153
|
-
score2 = nil
|
154
|
-
|
155
|
-
score1et = nil # extra time (et) scores
|
156
|
-
score2et = nil
|
157
|
-
|
158
|
-
score1p = nil # penalty (p) scores
|
159
|
-
score2p = nil
|
160
|
-
|
161
|
-
|
162
|
-
if (m = EN__P_ET_FT_HT__RE.match( line ))
|
163
|
-
if m[:score1i] && m[:score2i] ## note: half time (HT) score is optional now
|
164
|
-
score1i = m[:score1i].to_i
|
165
|
-
score2i = m[:score2i].to_i
|
166
|
-
end
|
167
|
-
|
168
|
-
score1 = m[:score1].to_i
|
169
|
-
score2 = m[:score2].to_i
|
170
|
-
score1et = m[:score1et].to_i
|
171
|
-
score2et = m[:score2et].to_i
|
172
|
-
score1p = m[:score1p].to_i
|
173
|
-
score2p = m[:score2p].to_i
|
174
|
-
|
175
|
-
logger.debug " score.en__p_et_ft_ht: >#{score1p}-#{score2p} pen. #{score1et}-#{score2et} a.e.t. (#{score1}-#{score2}, #{score1i}-#{score2i})<"
|
176
|
-
|
177
|
-
line.sub!( m[0], '[SCORES.EN__P_ET_FT_HT]' )
|
178
|
-
|
179
|
-
elsif (m = EN__ET_FT_HT__RE.match( line ))
|
180
|
-
if m[:score1i] && m[:score2i] ## note: half time (HT) score is optional now
|
181
|
-
score1i = m[:score1i].to_i
|
182
|
-
score2i = m[:score2i].to_i
|
183
|
-
end
|
184
|
-
|
185
|
-
score1 = m[:score1].to_i
|
186
|
-
score2 = m[:score2].to_i
|
187
|
-
score1et = m[:score1et].to_i
|
188
|
-
score2et = m[:score2et].to_i
|
189
|
-
|
190
|
-
logger.debug " score.en__et_ft_ht: >#{score1et}-#{score2et} a.e.t. (#{score1}-#{score2}, #{score1i}-#{score2i})<"
|
191
|
-
|
192
|
-
line.sub!( m[0], '[SCORES.EN__ET_FT_HT]' )
|
193
|
-
|
194
|
-
elsif (m = EN__FT_HT__RE.match( line ))
|
195
|
-
score1i = m[:score1i].to_i
|
196
|
-
score2i = m[:score2i].to_i
|
197
|
-
score1 = m[:score1].to_i
|
198
|
-
score2 = m[:score2].to_i
|
199
|
-
|
200
|
-
logger.debug " score.en__ft_ht: >#{score1}-#{score2} (#{score1i}-#{score2i})<"
|
201
|
-
|
202
|
-
line.sub!( m[0], '[SCORES.EN__FT_HT]' )
|
203
|
-
else
|
204
|
-
#######################################################
|
205
|
-
## try "standard" generic patterns for fallbacks
|
206
|
-
|
207
|
-
if (m = ET_RE.match( line ))
|
208
|
-
score1et = m[:score1].to_i
|
209
|
-
score2et = m[:score2].to_i
|
210
|
-
|
211
|
-
logger.debug " score.et: >#{score1et}-#{score2et}<"
|
212
|
-
|
213
|
-
line.sub!( m[0], '[SCORE.ET]' )
|
214
|
-
end
|
215
|
-
|
216
|
-
if (m = P_RE.match( line ))
|
217
|
-
score1p = m[:score1].to_i
|
218
|
-
score2p = m[:score2].to_i
|
219
|
-
|
220
|
-
logger.debug " score.p: >#{score1p}-#{score2p}<"
|
221
|
-
|
222
|
-
line.sub!( m[0], '[SCORE.P]' )
|
223
|
-
end
|
224
|
-
|
225
|
-
## let full time (ft) standard regex go last - has no marker
|
226
|
-
|
227
|
-
if (m = FT_RE.match( line ))
|
228
|
-
score1 = m[:score1].to_i
|
229
|
-
score2 = m[:score2].to_i
|
230
|
-
|
231
|
-
logger.debug " score: >#{score1}-#{score2}<"
|
232
|
-
|
233
|
-
line.sub!( m[0], '[SCORE]' )
|
234
|
-
end
|
235
|
-
end
|
236
|
-
|
237
|
-
## todo: how to handle game w/o extra time
|
238
|
-
# but w/ optional penalty ??? e.g. used in copa liberatores, for example
|
239
|
-
# retrun 0,0 or nil,nil for extra time score ?? or -1, -1 ??
|
240
|
-
# for now use nil,nil
|
241
|
-
|
242
|
-
scores = []
|
243
|
-
scores += [score1i, score2i] if score1p || score2p || score1et || score2et || score1 || score2 || score1i || score2i
|
244
|
-
scores += [score1, score2] if score1p || score2p || score1et || score2et || score1 || score2
|
245
|
-
scores += [score1et, score2et] if score1p || score2p || score1et || score2et
|
246
|
-
scores += [score1p, score2p] if score1p || score2p
|
247
|
-
|
248
|
-
scores
|
249
|
-
end
|
250
|
-
|
251
|
-
end # class ScoresFinder
|
252
|
-
|
253
|
-
end # module SportDb
|
@@ -1,213 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
module SportDb
|
4
|
-
module Import
|
5
|
-
|
6
|
-
########
|
7
|
-
# more attribs - todo/fix - also add "upstream" to struct & model!!!!!
|
8
|
-
# district, geos, year_end, country, etc.
|
9
|
-
|
10
|
-
class Club
|
11
|
-
|
12
|
-
def self.create( **kwargs )
|
13
|
-
new.update( kwargs )
|
14
|
-
end
|
15
|
-
|
16
|
-
def update( **kwargs )
|
17
|
-
@name = kwargs[:name] if kwargs.has_key? :name
|
18
|
-
@alt_names = kwargs[:alt_names] if kwargs.has_key? :alt_names
|
19
|
-
@city = kwargs[:city] if kwargs.has_key? :city
|
20
|
-
## todo/fix: use city struct - why? why not?
|
21
|
-
## todo/fix: add country too or report unused keywords / attributes - why? why not?
|
22
|
-
|
23
|
-
self ## note - MUST return self for chaining
|
24
|
-
end
|
25
|
-
|
26
|
-
|
27
|
-
## todo: use just names for alt_names - why? why not?
|
28
|
-
attr_accessor :key, :name, :alt_names,
|
29
|
-
:code, ## code == abbreviation e.g. ARS etc.
|
30
|
-
:year, :year_end, ## todo/fix: change year_end to end_year (like in season)!!!
|
31
|
-
:ground
|
32
|
-
|
33
|
-
|
34
|
-
alias_method :title, :name ## add alias/compat - why? why not
|
35
|
-
|
36
|
-
def names
|
37
|
-
## todo/check: add alt_names_auto too? - why? why not?
|
38
|
-
[@name] + @alt_names
|
39
|
-
end ## all names
|
40
|
-
|
41
|
-
|
42
|
-
## special import only attribs
|
43
|
-
attr_accessor :alt_names_auto ## auto-generated alt names
|
44
|
-
attr_accessor :wikipedia # wikipedia page name (for english (en))
|
45
|
-
|
46
|
-
|
47
|
-
def historic?() @year_end ? true : false; end
|
48
|
-
alias_method :past?, :historic?
|
49
|
-
|
50
|
-
|
51
|
-
attr_accessor :a, :b
|
52
|
-
def a?() @a == nil; end ## is a (1st) team / club (i)? if a is NOT set
|
53
|
-
def b?() @a != nil; end ## is b (2nd/reserve/jr) team / club (ii) if a is set
|
54
|
-
|
55
|
-
## note: delegate/forward all geo attributes for team b for now (to team a) - keep - why? why not?
|
56
|
-
attr_writer :city, :district, :country, :geos
|
57
|
-
def city() @a == nil ? @city : @a.city; end
|
58
|
-
def district() @a == nil ? @district : @a.district; end
|
59
|
-
def country() @a == nil ? @country : @a.country; end
|
60
|
-
def geos() @a == nil ? @geos : @a.geos; end
|
61
|
-
|
62
|
-
|
63
|
-
def wikipedia?() @wikipedia; end
|
64
|
-
def wikipedia_url
|
65
|
-
if @wikipedia
|
66
|
-
## note: replace spaces with underscore (-)
|
67
|
-
## e.g. Club Brugge KV => Club_Brugge_KV
|
68
|
-
## todo/check/fix:
|
69
|
-
## check if "plain" dash (-) needs to get replaced with typographic dash??
|
70
|
-
"https://en.wikipedia.org/wiki/#{@wikipedia.gsub(' ','_')}"
|
71
|
-
else
|
72
|
-
nil
|
73
|
-
end
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
def initialize
|
78
|
-
@alt_names = []
|
79
|
-
@alt_names_auto = []
|
80
|
-
end
|
81
|
-
|
82
|
-
|
83
|
-
## helper methods for import only
|
84
|
-
## check for duplicates
|
85
|
-
def duplicates?
|
86
|
-
names = [name] + alt_names + alt_names_auto
|
87
|
-
names = names.map { |name| normalize( sanitize(name) ) }
|
88
|
-
|
89
|
-
names.size != names.uniq.size
|
90
|
-
end
|
91
|
-
|
92
|
-
def duplicates
|
93
|
-
names = [name] + alt_names + alt_names_auto
|
94
|
-
|
95
|
-
## calculate (count) frequency and select if greater than one
|
96
|
-
names.reduce( Hash.new ) do |h,name|
|
97
|
-
norm = normalize( sanitize(name) )
|
98
|
-
h[norm] ||= []
|
99
|
-
h[norm] << name; h
|
100
|
-
end.select { |norm,names| names.size > 1 }
|
101
|
-
end
|
102
|
-
|
103
|
-
def add_variants( name_or_names )
|
104
|
-
names = name_or_names.is_a?(Array) ? name_or_names : [name_or_names]
|
105
|
-
names.each do |name|
|
106
|
-
name = sanitize( name )
|
107
|
-
self.alt_names_auto += variants( name )
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
###################################
|
113
|
-
# "global" helper - move to ___ ? why? why not?
|
114
|
-
|
115
|
-
## note: allow placeholder years to e.g. (-___) or (-????)
|
116
|
-
## for marking missing (to be filled in) years
|
117
|
-
YEAR_REGEX = /\([0-9, ?_-]+?\)/ # note: non-greedy (minimum/first) match
|
118
|
-
def self.strip_year( name )
|
119
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
120
|
-
## (1946-2001, 2013-) etc.
|
121
|
-
name.gsub( YEAR_REGEX, '' ).strip
|
122
|
-
end
|
123
|
-
|
124
|
-
def self.has_year?( name ) name =~ YEAR_REGEX; end
|
125
|
-
|
126
|
-
LANG_REGEX = /\[[a-z]{1,2}\]/ ## note also allow [a] or [d] or [e] - why? why not?
|
127
|
-
def self.strip_lang( name )
|
128
|
-
name.gsub( LANG_REGEX, '' ).strip
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.has_lang?( name ) name =~ LANG_REGEX; end
|
132
|
-
|
133
|
-
def self.sanitize( name )
|
134
|
-
## check for year(s) e.g. (1887-1911), (-2013),
|
135
|
-
## (1946-2001,2013-) etc.
|
136
|
-
name = strip_year( name )
|
137
|
-
## check lang codes e.g. [en], [fr], etc.
|
138
|
-
name = strip_lang( name )
|
139
|
-
name
|
140
|
-
end
|
141
|
-
|
142
|
-
|
143
|
-
## note: also add (),’,− etc. e.g.
|
144
|
-
## Estudiantes (LP) => Estudiantes LP
|
145
|
-
## Saint Patrick’s Athletic FC => Saint Patricks Athletic FC
|
146
|
-
## Myllykosken Pallo −47 => Myllykosken Pallo 47
|
147
|
-
|
148
|
-
NORM_REGEX = %r{
|
149
|
-
[.'’º/()−-]
|
150
|
-
}x # note: in [] dash (-) if last doesn't need to get escaped
|
151
|
-
## note: remove all dots (.), dash (-), ', º, /, etc.
|
152
|
-
# . U+002E (46) - FULL STOP
|
153
|
-
# ' U+0027 (39) - APOSTROPHE
|
154
|
-
# ’ U+2019 (8217) - RIGHT SINGLE QUOTATION MARK
|
155
|
-
# º U+00BA (186) - MASCULINE ORDINAL INDICATOR
|
156
|
-
# / U+002F (47) - SOLIDUS
|
157
|
-
# ( U+0028 (40) - LEFT PARENTHESIS
|
158
|
-
# ) U+0029 (41) - RIGHT PARENTHESIS
|
159
|
-
# − U+2212 (8722) - MINUS SIGN
|
160
|
-
# - U+002D (45) - HYPHEN-MINUS
|
161
|
-
|
162
|
-
## for norm(alizing) names
|
163
|
-
def self.strip_norm( name )
|
164
|
-
name.gsub( NORM_REGEX, '' )
|
165
|
-
end
|
166
|
-
|
167
|
-
def self.normalize( name )
|
168
|
-
# note: do NOT call sanitize here (keep normalize "atomic" for reuse)
|
169
|
-
name = strip_norm( name )
|
170
|
-
name = name.gsub( ' ', '' ) # note: also remove all spaces!!!
|
171
|
-
|
172
|
-
## todo/fix: use our own downcase - why? why not?
|
173
|
-
name = downcase_i18n( name ) ## do NOT care about upper and lowercase for now
|
174
|
-
name
|
175
|
-
end
|
176
|
-
|
177
|
-
|
178
|
-
def self.strip_wiki( name ) # todo/check: rename to strip_wikipedia_en - why? why not?
|
179
|
-
## note: strip disambiguationn qualifier from wikipedia page name if present
|
180
|
-
## note: only remove year and foot... for now
|
181
|
-
## e.g. FC Wacker Innsbruck (2002) => FC Wacker Innsbruck
|
182
|
-
## Willem II (football club) => Willem II
|
183
|
-
##
|
184
|
-
## e.g. do NOT strip others !! e.g.
|
185
|
-
## América Futebol Clube (MG)
|
186
|
-
## only add more "special" cases on demand (that, is) if we find more
|
187
|
-
name = name.gsub( /\([12][^\)]+?\)/, '' ).strip ## starting with a digit 1 or 2 (assuming year)
|
188
|
-
name = name.gsub( /\(foot[^\)]+?\)/, '' ).strip ## starting with foot (assuming football ...)
|
189
|
-
name
|
190
|
-
end
|
191
|
-
|
192
|
-
|
193
|
-
private
|
194
|
-
## private "shortcut" convenience helpers
|
195
|
-
def sanitize( name ) self.class.sanitize( name ); end
|
196
|
-
def normalize( name ) self.class.normalize( name ); end
|
197
|
-
|
198
|
-
def variants( name ) Variant.find( name ); end
|
199
|
-
end # class Club
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
############
|
204
|
-
# convenience
|
205
|
-
# Club and Team are for now alias
|
206
|
-
# in the future make
|
207
|
-
# Club > Team
|
208
|
-
# NationalTeam > Team - why? why not?
|
209
|
-
Team = Club
|
210
|
-
|
211
|
-
|
212
|
-
end # module Import
|
213
|
-
end # module SportDb
|