worldfootball 0.2.6 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -2
- data/Manifest.txt +11 -6
- data/README.md +45 -3
- data/Rakefile +1 -0
- data/bin/wfb +70 -14
- data/bin/wfbconf +55 -0
- data/bin/wfbconv +100 -0
- data/bin/wfbdump +76 -0
- data/bin/wfbgen +102 -0
- data/bin/wfbup +101 -0
- data/config/{leagues_america.csv → leagues/america.csv} +31 -2
- data/config/{leagues_europe.csv → leagues/europe.csv} +39 -5
- data/config/rounds.csv +6 -0
- data/config/stages.csv +16 -32
- data/lib/worldfootball/build-parse_score.rb +8 -3
- data/lib/worldfootball/convert.rb +52 -17
- data/lib/worldfootball/download.rb +6 -2
- data/lib/worldfootball/leagues.rb +23 -10
- data/lib/worldfootball/mods.rb +28 -3
- data/lib/worldfootball/page_schedule.rb +38 -15
- data/lib/worldfootball/version.rb +2 -2
- data/lib/worldfootball.rb +72 -1
- metadata +35 -11
- /data/config/{leagues_africa.csv → leagues/africa.csv} +0 -0
- /data/config/{leagues_asia.csv → leagues/asia.csv} +0 -0
- /data/config/{leagues_middle_east.csv → leagues/middle_east.csv} +0 -0
- /data/config/{leagues_pacific.csv → leagues/pacific.csv} +0 -0
data/bin/wfbup
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbup
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbup
|
7
|
+
|
8
|
+
|
9
|
+
##
|
10
|
+
## add offset for restart!!!!
|
11
|
+
## e.g. si.1 - maybe add season later!!!
|
12
|
+
## or better add a expired option e.g. 24h or such !!!!
|
13
|
+
## - stopping at si.1 2017/18...
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
17
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
18
|
+
require 'worldfootball'
|
19
|
+
|
20
|
+
|
21
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
22
|
+
puts " setting web cache to >/sports/cache<"
|
23
|
+
'/sports/cache'
|
24
|
+
else
|
25
|
+
'./cache'
|
26
|
+
end
|
27
|
+
|
28
|
+
## convert (default) output directory
|
29
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
30
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
31
|
+
'/sports/cache.wfb'
|
32
|
+
else
|
33
|
+
'./tmp' ## use tmp in working dir
|
34
|
+
end
|
35
|
+
|
36
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
37
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
38
|
+
'/sports/cache.wfb.txt'
|
39
|
+
else
|
40
|
+
'./tmp' ## use tmp in working dir
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
require 'optparse'
|
45
|
+
|
46
|
+
##
|
47
|
+
# by default convert all with overwrite/force set to false
|
48
|
+
|
49
|
+
Webget.config.sleep = 2
|
50
|
+
|
51
|
+
args = ARGV
|
52
|
+
|
53
|
+
opts = {
|
54
|
+
force: false, # a.k.a. overwrite
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
parser = OptionParser.new do |parser|
|
59
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
60
|
+
|
61
|
+
|
62
|
+
parser.on( "--force",
|
63
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
64
|
+
opts[:force] = true # true|false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
parser.parse!( args )
|
70
|
+
|
71
|
+
puts "OPTS:"
|
72
|
+
p opts
|
73
|
+
puts "ARGV:"
|
74
|
+
p args
|
75
|
+
|
76
|
+
|
77
|
+
keys = if args.size == 0
|
78
|
+
Worldfootball::LEAGUES.keys
|
79
|
+
else
|
80
|
+
args
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
keys.each_with_index do |key, i|
|
86
|
+
league = Worldfootball::LEAGUES[key]
|
87
|
+
seasons = league.seasons
|
88
|
+
|
89
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
90
|
+
|
91
|
+
seasons.each_with_index do |season_rec,j|
|
92
|
+
season = season_rec[0]
|
93
|
+
|
94
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
95
|
+
Worldfootball.schedule( league: key, season: season,
|
96
|
+
overwrite: opts[:force] )
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
puts "bye"
|
@@ -23,10 +23,21 @@ ca.cup, can-canadian-championship-2024
|
|
23
23
|
# - Finale
|
24
24
|
# /mex-primera-division-2018-2019-clausura-playoffs/
|
25
25
|
mx.1, mex-primera-division-2024-2025-apertura_2
|
26
|
-
|
26
|
+
|
27
|
+
## merge expansion and ascenso into one (mx.2) - possible? why? why not?
|
28
|
+
|
29
|
+
## first expansion season -> 2020/21 Apertura
|
30
|
+
## last ascenso season -> 2019/20 Clausura
|
31
|
+
mx.2.expansion, mex-liga-de-expansion-2024-2025-apertura
|
32
|
+
mx.2.ascenso, mex-liga-de-ascenso-2019-2020-clausura
|
33
|
+
|
27
34
|
mx.cup, mex-copa-mx-2019-2020
|
28
35
|
|
29
36
|
|
37
|
+
mx.3.a, mex-lp-serie-a-2024-2025-apertura
|
38
|
+
mx.3.b, mex-lp-serie-b-2024-2025-apertura
|
39
|
+
|
40
|
+
|
30
41
|
|
31
42
|
## change to mls - why? why not?
|
32
43
|
us.1, usa-major-league-soccer-2024
|
@@ -34,6 +45,12 @@ us.2, usa-usl-championship-2024
|
|
34
45
|
us.cup, usa-u-s-open-cup-2024
|
35
46
|
|
36
47
|
|
48
|
+
concacaf.cl, concacaf-champions-league-2020
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
#############
|
53
|
+
### South America
|
37
54
|
|
38
55
|
br.1, bra-serie-a-2024
|
39
56
|
br.2, bra-serie-b-2024
|
@@ -78,4 +95,16 @@ uy.1, uru-primera-division-2024-clausura
|
|
78
95
|
uy.2, uru-segunda-division-2024-fase-regular
|
79
96
|
uy.cup, uru-copa-2024
|
80
97
|
|
81
|
-
ve.1, ven-primera-division-2024-clausura
|
98
|
+
ve.1, ven-primera-division-2024-clausura
|
99
|
+
|
100
|
+
|
101
|
+
copa.l, copa-libertadores-2020
|
102
|
+
|
103
|
+
|
104
|
+
###################
|
105
|
+
### Central America & Caribbean Islands
|
106
|
+
cr.1, crc-primera-division-2020-2021-apertura
|
107
|
+
sv.1, slv-primera-division-2020-2021-apertura
|
108
|
+
gt.1, gua-liga-nacional-2020-2021-apertura
|
109
|
+
hn.1, hon-liga-nacional-2020-2021-apertura
|
110
|
+
ni.1, nca-liga-primera-2020-2021-apertura
|
@@ -1,5 +1,9 @@
|
|
1
1
|
key, slug
|
2
2
|
|
3
|
+
|
4
|
+
###########
|
5
|
+
#### Central Europe
|
6
|
+
|
3
7
|
de.1, bundesliga-2024-2025
|
4
8
|
de.2, 2-bundesliga-2024-2025
|
5
9
|
de.3, 3-liga-2024-2025
|
@@ -29,13 +33,18 @@ hu.1, hun-nb-i-2024-2025
|
|
29
33
|
|
30
34
|
cz.1, cze-1-fotbalova-liga-2024-2025
|
31
35
|
cz.2, cze-2-fotbalova-liga-2024-2025
|
32
|
-
|
36
|
+
|
37
|
+
## commented out for now (three leagues in one - split !!)
|
38
|
+
## cz.3, cze-3-fotbalova-liga-2024-2025-cfl-a
|
39
|
+
|
33
40
|
|
34
41
|
sk.1, svk-super-liga-2024-2025
|
35
42
|
|
36
43
|
pl.1, pol-ekstraklasa-2024-2025
|
37
44
|
|
38
45
|
|
46
|
+
#########
|
47
|
+
### British Isles / Western Europe
|
39
48
|
|
40
49
|
eng.1, eng-premier-league-2024-2025
|
41
50
|
eng.2, eng-championship-2024-2025
|
@@ -45,19 +54,20 @@ eng.5, eng-national-league-2024-2025
|
|
45
54
|
eng.cup, eng-fa-cup-2023-2024 ### update to 2024-2025 later!!!
|
46
55
|
eng.cup.l, eng-league-cup-2024-2025
|
47
56
|
|
48
|
-
|
49
|
-
|
50
57
|
sco.1, sco-premiership-2024-2025
|
51
58
|
wal.1, wal-premier-league-2024-2025
|
52
59
|
nir.1, nir-premier-league-2024-2025
|
53
60
|
ie.1, irl-premier-division-2024
|
54
61
|
|
55
62
|
|
63
|
+
|
64
|
+
##############
|
65
|
+
### Benelux / Western Europe
|
66
|
+
|
56
67
|
fr.1, fra-ligue-1-2024-2025
|
57
68
|
fr.2, fra-ligue-2-2024-2025
|
58
69
|
fr.cup, fra-coupe-de-france-2023-2024 ### update to 2024-2025 later!!!
|
59
70
|
|
60
|
-
|
61
71
|
lu.1, lux-nationaldivision-2024-2025
|
62
72
|
|
63
73
|
nl.1, ned-eredivisie-2024-2025
|
@@ -69,6 +79,10 @@ be.2, bel-eerste-klasse-b-2024-2025
|
|
69
79
|
be.cup, bel-beker-van-belgie-2024-2025
|
70
80
|
|
71
81
|
|
82
|
+
|
83
|
+
######################
|
84
|
+
### Southern Europe
|
85
|
+
|
72
86
|
it.1, ita-serie-a-2024-2025
|
73
87
|
it.2, ita-serie-b-2024-2025
|
74
88
|
it.3.a, ita-serie-c-girone-a-2024-2025
|
@@ -91,11 +105,20 @@ ad.1, and-1a-divisio-2024-2025
|
|
91
105
|
gi.1, gib-premier-divison-2024-2025
|
92
106
|
|
93
107
|
|
108
|
+
|
109
|
+
###################
|
110
|
+
### Eastern Europe
|
111
|
+
|
94
112
|
ro.1, rou-liga-1-2024-2025
|
95
113
|
ro.cup, rou-cupa-romaniei-2024-2025
|
96
114
|
|
97
115
|
bg.1, bul-parva-liga-2024-2025
|
98
116
|
|
117
|
+
##
|
118
|
+
## note: ru - special (transition) league format for season 2011/12 (lasting 18 month!!)
|
119
|
+
# 1) rus-premier-liga-2011-2012/ -- 30 rounds
|
120
|
+
# 2a) rus-premier-liga-2011-2012-meisterschaft/ -- 2011/2012 Meisterschaft (rounds 31 to 44)
|
121
|
+
# b) rus-premier-liga-2011-2012-relegation/ -- 2011/2012 Relegation (rounds 31 to 44)
|
99
122
|
ru.1, rus-premier-liga-2024-2025
|
100
123
|
ru.2, rus-premier-liga-2024-2025
|
101
124
|
|
@@ -104,6 +127,12 @@ by.1, blr-cempionat-2024
|
|
104
127
|
ua.1, ukr-premyer-liga-2024-2025
|
105
128
|
|
106
129
|
|
130
|
+
## note: start with 2012/13 for now!!!
|
131
|
+
## in 2011/12 a new format was introduced, in which after the regular season
|
132
|
+
## two play-off groups were played to decide over the Champions League and Europa League starting rounds
|
133
|
+
## 1) - tur-sueperlig-2011-2012
|
134
|
+
## 2a) - tur-sueperlig-2012-meisterschaft -- 2012 Meisterschaft
|
135
|
+
## 2b) - tur-sueperlig-2012-platzierung -- 2012 Platzierung
|
107
136
|
|
108
137
|
tr.1, tur-sueperlig-2024-2025
|
109
138
|
tr.2, tur-1-lig-2024-2025
|
@@ -125,7 +154,7 @@ rs.1, srb-super-liga-2024-2025
|
|
125
154
|
si.1, svn-prvaliga-2024-2025
|
126
155
|
|
127
156
|
## todo/fix - change to kos.1 - why? why not?
|
128
|
-
xk.1, kos-superliga-2024-2025
|
157
|
+
## xk.1, kos-superliga-2024-2025
|
129
158
|
kos.1, kos-superliga-2024-2025
|
130
159
|
|
131
160
|
|
@@ -134,6 +163,11 @@ az.1, aze-premyer-liqasi-2024-2025
|
|
134
163
|
ge.1, geo-erovnuli-liga-2024
|
135
164
|
|
136
165
|
|
166
|
+
|
167
|
+
|
168
|
+
###################
|
169
|
+
### Northern Europe
|
170
|
+
|
137
171
|
is.1, isl-urvalsdeild-2024
|
138
172
|
fo.1, fro-effodeildin-2024
|
139
173
|
|
data/config/rounds.csv
CHANGED
data/config/stages.csv
CHANGED
@@ -1,51 +1,38 @@
|
|
1
1
|
key, name1, name2,
|
2
2
|
|
3
|
-
*, Meisterschaft,
|
4
|
-
*, Abstieg,
|
5
|
-
*, Relegation, Playoffs - Relegation
|
3
|
+
*, Meisterschaft, Championship
|
4
|
+
*, Abstieg, Relegation
|
6
5
|
|
6
|
+
## use Playoffs - Championship -- why? why not?
|
7
|
+
## use Playoffs - Relegation -- why? why not?
|
7
8
|
|
8
|
-
sco.1, Championship, Playoffs - Championship
|
9
|
-
sco.1, Relegation', Playoffs - Relegation
|
10
9
|
|
11
|
-
|
12
|
-
at.1,
|
13
|
-
at.1, Qualifikationsgruppe, Playoffs - Relegation
|
10
|
+
at.1, Meistergruppe, Championship
|
11
|
+
at.1, Qualifikationsgruppe, Relegation
|
14
12
|
at.1, Playoff, Europa League Finals
|
15
13
|
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sk.1, Meisterschaft, Playoffs - Championship
|
22
|
-
sk.1, Abstieg, Playoffs - Relegation
|
15
|
+
sk.1, Meisterschaft, Championship
|
16
|
+
sk.1, Abstieg, Relegation
|
23
17
|
sk.1, Europa League, Europa League Finals
|
24
18
|
|
25
19
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
ru.1, Meisterschaft, Playoffs - Championship
|
30
|
-
ru.1, Relegation, Playoffs - Relegation
|
31
|
-
|
32
|
-
|
33
|
-
ua.1, Meisterschaft, Playoffs - Championship
|
34
|
-
ua.1, Abstieg, Playoffs - Relegation
|
20
|
+
ua.1, Meisterschaft, Championship
|
21
|
+
ua.1, Abstieg, Relegation
|
35
22
|
ua.1, Playoffs EL, Europa League Finals
|
36
23
|
|
37
24
|
|
38
|
-
fi.1, Meisterschaft,
|
39
|
-
fi.1, Abstieg,
|
25
|
+
fi.1, Meisterschaft, Championship
|
26
|
+
fi.1, Abstieg, Relegation
|
40
27
|
fi.1, Playoff EL, Europa League Finals
|
41
28
|
|
42
|
-
dk.1, Meisterschaft,
|
43
|
-
dk.1, Abstieg,
|
29
|
+
dk.1, Meisterschaft, Championship
|
30
|
+
dk.1, Abstieg, Relegation
|
44
31
|
dk.1, Europa League, Europa League Finals
|
45
32
|
|
46
33
|
|
47
|
-
gr.1, Meisterschaft,
|
48
|
-
gr.1, Abstieg,
|
34
|
+
gr.1, Meisterschaft, Championship
|
35
|
+
gr.1, Abstieg, Relegation
|
49
36
|
gr.1, Playoffs, Playoffs
|
50
37
|
gr.1, Spiel um Platz 6, Match 6th Place
|
51
38
|
|
@@ -55,9 +42,6 @@ mx.1, Apertura Playoffs, Apertura - Liguilla
|
|
55
42
|
mx.1, Clausura Playoffs, Clausura - Liguilla
|
56
43
|
|
57
44
|
|
58
|
-
kr.1, Meisterschaft, Playoffs - Championship
|
59
|
-
kr.1, Abstieg, Playoffs - Relegation
|
60
|
-
|
61
45
|
|
62
46
|
nz.1, Playoffs, Playoff Finals
|
63
47
|
|
@@ -31,13 +31,18 @@ def self.parse_score( score_str )
|
|
31
31
|
et = ''
|
32
32
|
pen = ''
|
33
33
|
|
34
|
+
##
|
35
|
+
## [085] 2021-10-21 | 22:00 | Metropolitanos FC | LALA FC | Aufg.
|
36
|
+
## !! ERROR - unsupported score format >Aufg.< - sorry; maybe add a score error fix/patch
|
37
|
+
## - handle with Aufg.
|
34
38
|
|
35
39
|
if score_str == '---' ## in the future (no score yet) - was -:-
|
36
40
|
ft = ''
|
37
41
|
ht = ''
|
38
42
|
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
39
43
|
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
40
|
-
score_str == 'annull.'
|
44
|
+
score_str == 'annull.' || ## todo/check: change to some other status (see ie 2012) ????
|
45
|
+
score_str == 'Aufg.'
|
41
46
|
ft = '(*)'
|
42
47
|
ht = ''
|
43
48
|
comments = 'cancelled'
|
@@ -172,13 +177,13 @@ def self.parse_score( score_str )
|
|
172
177
|
puts "!! WARN - weird score n.V. only - >#{score_str}<"
|
173
178
|
elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
|
174
179
|
[ ]*
|
175
|
-
|
180
|
+
(?: i\.E\. | n\.P\. )
|
176
181
|
$/x
|
177
182
|
pen = "#{$1}-#{$2}"
|
178
183
|
et = ''
|
179
184
|
ht = ''
|
180
185
|
ft = ''
|
181
|
-
puts "!! WARN - weird score i.E. only - >#{score_str}<"
|
186
|
+
puts "!! WARN - weird score i.E. (n.P.) only - >#{score_str}<"
|
182
187
|
else
|
183
188
|
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
184
189
|
exit 1
|
@@ -2,13 +2,23 @@
|
|
2
2
|
module Worldfootball
|
3
3
|
|
4
4
|
|
5
|
-
def self.convert( league:, season
|
5
|
+
def self.convert( league:, season:,
|
6
|
+
overwrite: true )
|
6
7
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
7
8
|
|
8
9
|
league = find_league!( league )
|
9
10
|
pages = league.pages!( season: season )
|
10
11
|
|
11
12
|
|
13
|
+
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
14
|
+
if !overwrite && File.exist?( out_path )
|
15
|
+
## skip generation
|
16
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
17
|
+
return
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
12
22
|
## collect all teams
|
13
23
|
teams_by_ref = {}
|
14
24
|
|
@@ -48,17 +58,19 @@ def self.convert( league:, season: )
|
|
48
58
|
team_name = norm_team( h[:name] ) ## note: norm team name!!!
|
49
59
|
team_ref = h[:ref]
|
50
60
|
|
61
|
+
###
|
62
|
+
## quick fix for broken refs/links
|
63
|
+
## olympique-lyon => olympique-lyonnais
|
64
|
+
# team_ref = 'olympique-lyonnais' if team_ref == 'olympique-lyon'
|
65
|
+
|
51
66
|
## note: skip N.N. (place holder team)
|
52
67
|
## team_ref is nil etc.
|
53
68
|
next if team_name == 'N.N.'
|
54
69
|
|
55
70
|
team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
|
56
|
-
|
71
|
+
names: [] }
|
57
72
|
team_stat[:count] += team_count
|
58
|
-
|
59
|
-
puts "!! ASSERT ERROR - team ref with differet names; expected #{team_stat[:name]} - got #{team_name}"
|
60
|
-
exit 1
|
61
|
-
end
|
73
|
+
team_stat[:names] << team_name unless team_stat[:names].include?( team_name )
|
62
74
|
end
|
63
75
|
|
64
76
|
|
@@ -73,9 +85,12 @@ def self.convert( league:, season: )
|
|
73
85
|
|
74
86
|
|
75
87
|
clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf',
|
76
|
-
'
|
88
|
+
'uefa.cl.q', 'uefa.el.q', 'uefa.conf.q',
|
89
|
+
'copa.l',
|
90
|
+
'concacaf.cl',
|
77
91
|
'caf.cl',
|
78
|
-
'afl'
|
92
|
+
'afl',
|
93
|
+
].include?(league.key) ? true : false
|
79
94
|
|
80
95
|
####
|
81
96
|
# auto-add (fifa) country code if int'l club tournament
|
@@ -83,7 +98,6 @@ def self.convert( league:, season: )
|
|
83
98
|
##
|
84
99
|
## get country codes for team ref
|
85
100
|
teams_by_ref.each do |team_slug, h|
|
86
|
-
|
87
101
|
Metal.download_team( team_slug, cache: true )
|
88
102
|
team_page = Page::Team.from_cache( team_slug )
|
89
103
|
props = team_page.props
|
@@ -99,14 +113,20 @@ def self.convert( league:, season: )
|
|
99
113
|
|
100
114
|
## generate lookup by name
|
101
115
|
teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
|
102
|
-
|
103
|
-
|
116
|
+
### todo/fix
|
117
|
+
## report warning if names size is > 1!!!!
|
118
|
+
##
|
119
|
+
rec[:names].each do |name|
|
120
|
+
h[ name ] = rec
|
121
|
+
end
|
122
|
+
h
|
104
123
|
end
|
105
124
|
|
125
|
+
|
106
126
|
#####
|
107
127
|
## dump team refs
|
108
128
|
puts " #{teams_by_ref.size} team(s) by ref:"
|
109
|
-
pp
|
129
|
+
pp teams_by_ref
|
110
130
|
|
111
131
|
## quick hack
|
112
132
|
## add country (fifa) codes to team names
|
@@ -129,10 +149,22 @@ def self.convert( league:, season: )
|
|
129
149
|
## note: sort matches by date before saving/writing!!!!
|
130
150
|
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
131
151
|
## note: assume date is third column!!! (stage/round/date/...)
|
132
|
-
|
152
|
+
|
153
|
+
### note - do NOT sort for now
|
154
|
+
## keep "original" page order - why? why not?
|
155
|
+
## recs = recs.sort { |l,r| l[2] <=> r[2] }
|
156
|
+
|
157
|
+
|
133
158
|
## reformat date / beautify e.g. Sat Aug 7 1993
|
134
159
|
recs.each do |rec|
|
135
|
-
|
160
|
+
if rec[2]
|
161
|
+
if rec[2] =~ /^\d{4}-\d{1,2}-\d{1,2}$/
|
162
|
+
rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' )
|
163
|
+
else
|
164
|
+
## report unknown date format warning
|
165
|
+
puts "WARN - unsupported date format (cannot parse?) >#{rec[2]}<"
|
166
|
+
end
|
167
|
+
end
|
136
168
|
end
|
137
169
|
|
138
170
|
## remove unused columns (e.g. stage, et, p, etc.)
|
@@ -141,10 +173,13 @@ recs.each do |rec|
|
|
141
173
|
puts headers
|
142
174
|
pp recs[0] ## check first record
|
143
175
|
|
144
|
-
out_path
|
145
|
-
|
146
|
-
puts "write #{out_path}..."
|
176
|
+
puts " writing to >#{out_path}< - #{recs.size} record(s)..."
|
147
177
|
write_csv( out_path, recs, headers: headers )
|
178
|
+
|
179
|
+
## add to tmp too for debugging
|
180
|
+
out_path2 = "#{config.convert.out_dir}/tmp/#{league.key}/#{season.to_path}.csv"
|
181
|
+
puts " writing to >#{out_path2}< - #{recs.size} record(s)..."
|
182
|
+
write_csv( out_path2, recs, headers: headers )
|
148
183
|
end
|
149
184
|
end # module Worldfootball
|
150
185
|
|
@@ -4,12 +4,16 @@ module Worldfootball
|
|
4
4
|
|
5
5
|
#################
|
6
6
|
## porcelain "api"
|
7
|
-
def self.schedule( league:, season: )
|
7
|
+
def self.schedule( league:, season:, overwrite: true )
|
8
8
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
9
9
|
|
10
10
|
pages = find_league_pages!( league: league, season: season )
|
11
11
|
pages.each do |slug, _|
|
12
|
-
Metal.
|
12
|
+
if !overwrite && Webcache.cached?( Metal.schedule_url( slug ))
|
13
|
+
puts " OK #{league} #{season} - #{slug} (do NOT overwrite)"
|
14
|
+
else
|
15
|
+
Metal.download_schedule( slug )
|
16
|
+
end
|
13
17
|
end # each page
|
14
18
|
end
|
15
19
|
|
@@ -61,9 +61,12 @@ class LeagueItem # nested inside LeagueConfig
|
|
61
61
|
season, stage = text.split( ' ', 2 )
|
62
62
|
|
63
63
|
## todo/fix: add a waring here and auto log to logs.txt!!!!
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
if ['2019-2021',
|
65
|
+
'1958/1960',
|
66
|
+
'1955/1958' ].include?( season )
|
67
|
+
log( "!! WARN - seasons for league #{@key} incl. invalid season #{season} - slug #{slug}; skipping season" )
|
68
|
+
next ## note - skip invalid season entry
|
69
|
+
end
|
67
70
|
|
68
71
|
season = Season.parse( season )
|
69
72
|
|
@@ -106,6 +109,13 @@ class LeagueItem # nested inside LeagueConfig
|
|
106
109
|
recs = seasons[season.key]
|
107
110
|
recs ? recs.reverse : nil
|
108
111
|
end
|
112
|
+
|
113
|
+
def log( msg ) ### append to log
|
114
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
115
|
+
f.write( msg )
|
116
|
+
f.write( "\n" )
|
117
|
+
end
|
118
|
+
end
|
109
119
|
end # class LeagueItem
|
110
120
|
|
111
121
|
|
@@ -122,14 +132,17 @@ def size() @table.size; end
|
|
122
132
|
end # class LeagueConfig
|
123
133
|
|
124
134
|
|
135
|
+
|
136
|
+
|
137
|
+
|
125
138
|
LEAGUES = LeagueConfig.new
|
126
|
-
['
|
127
|
-
'
|
128
|
-
'
|
129
|
-
'
|
130
|
-
'
|
131
|
-
'
|
132
|
-
recs = read_csv( "#{Worldfootball.root}/config/#{name}.csv" )
|
139
|
+
['africa',
|
140
|
+
'america',
|
141
|
+
'asia',
|
142
|
+
'europe',
|
143
|
+
'middle_east',
|
144
|
+
'pacific'].each do |name|
|
145
|
+
recs = read_csv( "#{Worldfootball.root}/config/leagues/#{name}.csv" )
|
133
146
|
pp recs
|
134
147
|
puts " #{recs.size} league(s) in #{name}"
|
135
148
|
LEAGUES.add( recs )
|
data/lib/worldfootball/mods.rb
CHANGED
@@ -10,11 +10,20 @@ module Worldfootball
|
|
10
10
|
def self.norm_team( team )
|
11
11
|
## clean team name and asciify (e.g. ’->' )
|
12
12
|
team = team.sub( '(old)', '' ).strip
|
13
|
-
team = team.gsub( '’', "'" ) ## e.g. Hawke’s Bay United FC
|
14
13
|
|
15
|
-
##
|
16
|
-
##
|
14
|
+
## e.g. Hawke’s Bay United FC or
|
15
|
+
## ASC Monts d`Or Chasselay or
|
16
|
+
## VV Heerlen ´16 / EMM ´15 / Wormer SV´30 / Swift ´36 / etc.
|
17
|
+
team = team.gsub( /[’´`]/, "'" )
|
18
|
+
|
19
|
+
|
20
|
+
## br
|
21
|
+
## Criciúma - SC => Criciúma SC
|
22
|
+
## Bahia - BA => Bahia BA
|
23
|
+
## cz
|
24
|
+
## Baník Most - Souš => Baník Most Souš
|
17
25
|
## remove inline dash ( - ) with single space
|
26
|
+
## to log
|
18
27
|
team = team.gsub( /[ ]+[-][ ]+/, ' ' )
|
19
28
|
|
20
29
|
|
@@ -24,6 +33,22 @@ def self.norm_team( team )
|
|
24
33
|
## others too? - move to mods instead of generic rule - why? why not?
|
25
34
|
team = team.sub( /[ ]+\(A\)/, ' II' )
|
26
35
|
|
36
|
+
##
|
37
|
+
## remove () - used/reserved for country code for now - why? why not?
|
38
|
+
## e.g. Lloyds FC (Sittingbourne) => Lloyds FC Sittingbourne
|
39
|
+
## August 1st (Army Team) => August 1st Army Team
|
40
|
+
##
|
41
|
+
## add warning - why? why not?
|
42
|
+
team = team.sub( /\(
|
43
|
+
([^)]+?) ## eat-up all non-greed to next )
|
44
|
+
\)/x, '\1' )
|
45
|
+
|
46
|
+
##
|
47
|
+
## strip special case
|
48
|
+
## MFK Frýdek-Místek, a.s. => MFK Frýdek-Místek
|
49
|
+
team = team.sub( ', a.s.', '' )
|
50
|
+
|
51
|
+
|
27
52
|
################
|
28
53
|
## quick hack - norm(alize) all N.N. to N.N.
|
29
54
|
## e.g.
|