worldfootball 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -1
- data/Manifest.txt +5 -12
- data/README.md +36 -1
- data/Rakefile +3 -5
- data/bin/wfb +75 -23
- data/config/leagues_asia.csv +6 -0
- data/config/leagues_europe.csv +75 -0
- data/lib/worldfootball/build-parse_score.rb +156 -0
- data/lib/worldfootball/build.rb +64 -117
- data/lib/worldfootball/cache.rb +46 -0
- data/lib/worldfootball/config.rb +85 -0
- data/lib/worldfootball/convert.rb +15 -116
- data/lib/worldfootball/download.rb +8 -18
- data/lib/worldfootball/leagues.rb +119 -171
- data/lib/worldfootball/mods.rb +9 -2
- data/lib/worldfootball/page.rb +9 -0
- data/lib/worldfootball/page_schedule.rb +60 -8
- data/lib/worldfootball/vacuum.rb +3 -1
- data/lib/worldfootball/version.rb +2 -2
- data/lib/worldfootball.rb +6 -16
- metadata +9 -30
- data/lib/worldfootball/generator.rb +0 -33
- data/lib/worldfootball/leagues/asia.rb +0 -53
- data/lib/worldfootball/leagues/europe--british_isles.rb +0 -64
- data/lib/worldfootball/leagues/europe--central.rb +0 -127
- data/lib/worldfootball/leagues/europe--eastern.rb +0 -82
- data/lib/worldfootball/leagues/europe--northern.rb +0 -57
- data/lib/worldfootball/leagues/europe--southern.rb +0 -86
- data/lib/worldfootball/leagues/europe--western.rb +0 -38
- data/lib/worldfootball/leagues/europe.rb +0 -13
- data/lib/worldfootball/leagues/north_america.rb +0 -44
- data/lib/worldfootball/leagues/pacific.rb +0 -21
- data/lib/worldfootball/leagues/south_america.rb +0 -11
data/lib/worldfootball/build.rb
CHANGED
@@ -7,6 +7,11 @@ ROUND_TO_EN = {
|
|
7
7
|
'2. Runde' => 'Round 2',
|
8
8
|
'3. Runde' => 'Round 3',
|
9
9
|
'4. Runde' => 'Round 4',
|
10
|
+
'5. Runde' => 'Round 5',
|
11
|
+
'6. Runde' => 'Round 6',
|
12
|
+
'7. Runde' => 'Round 7',
|
13
|
+
'8. Runde' => 'Round 8',
|
14
|
+
'9. Runde' => 'Round 9',
|
10
15
|
'Achtelfinale' => 'Round of 16',
|
11
16
|
'Viertelfinale' => 'Quarterfinals',
|
12
17
|
'Halbfinale' => 'Semifinals',
|
@@ -20,13 +25,17 @@ ROUND_TO_EN = {
|
|
20
25
|
def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
|
21
26
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
22
27
|
|
23
|
-
|
28
|
+
## note: do NOT pass in league struct! pass in key (string)
|
29
|
+
raise ArgumentError, "league key as string expected" unless league.is_a?(String)
|
24
30
|
|
25
|
-
print " #{rows.size}
|
31
|
+
print " #{rows.size} row(s) - Worldfootball.build #{league} #{season}"
|
26
32
|
print " - #{stage}" unless stage.empty?
|
27
33
|
print "\n"
|
28
34
|
|
29
35
|
|
36
|
+
zone = find_zone!( league: league, season: season )
|
37
|
+
|
38
|
+
|
30
39
|
## note: use only first part from key for lookup
|
31
40
|
## e.g. at.1 => at
|
32
41
|
## eng.1 => eng
|
@@ -47,44 +56,49 @@ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such
|
|
47
56
|
print '[%03d] ' % (i+1)
|
48
57
|
print row[:round]
|
49
58
|
|
50
|
-
if m = row[:round].match(
|
59
|
+
if (m = row[:round].match( /^(?<num>[0-9]+)\. Spieltag$/ ))
|
51
60
|
## todo/check: always use a string even if number (as a string eg. '1' etc.)
|
52
|
-
round = m[
|
61
|
+
round = m[:num] ## note: keep as string (NOT number)
|
53
62
|
print " => #{round}"
|
54
63
|
else
|
55
|
-
puts "!! ERROR: cannot find matchday number"
|
64
|
+
puts "!! ERROR: cannot find matchday number in >#{row[:round]}<:"
|
65
|
+
pp row
|
56
66
|
exit 1
|
57
67
|
end
|
58
68
|
print "\n"
|
59
|
-
|
69
|
+
|
70
|
+
## note - must start line e.g.
|
71
|
+
## do NOT match => Qual. 1. Runde (1. Runde)!!!
|
72
|
+
elsif row[:round] =~ /^(
|
73
|
+
[1-9]\.[ ]Runde|
|
60
74
|
Achtelfinale|
|
61
75
|
Viertelfinale|
|
62
76
|
Halbfinale|
|
63
77
|
Finale
|
64
|
-
|
78
|
+
)$
|
79
|
+
/x
|
65
80
|
puts
|
66
81
|
print '[%03d] ' % (i+1)
|
67
82
|
print row[:round]
|
68
83
|
|
84
|
+
round = ROUND_TO_EN[ row[:round] ]
|
85
|
+
print " => #{round}"
|
86
|
+
print "\n"
|
69
87
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
else
|
75
|
-
round = ROUND_TO_EN[ row[:round] ]
|
76
|
-
if round.nil?
|
77
|
-
puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
|
78
|
-
pp row
|
79
|
-
exit 1
|
80
|
-
end
|
81
|
-
print " => #{round}"
|
88
|
+
if round.nil?
|
89
|
+
puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
|
90
|
+
pp row
|
91
|
+
exit 1
|
82
92
|
end
|
83
|
-
print "\n"
|
84
93
|
else
|
85
|
-
puts
|
94
|
+
puts
|
95
|
+
print '[%03d] ' % (i+1)
|
96
|
+
print row[:round]
|
97
|
+
print "\n"
|
98
|
+
|
99
|
+
puts "!! WARN: unknown round >#{row[:round]}< for league >#{league}<:"
|
86
100
|
pp row
|
87
|
-
|
101
|
+
round = row[:round]
|
88
102
|
end
|
89
103
|
|
90
104
|
|
@@ -94,12 +108,10 @@ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such
|
|
94
108
|
team2_str = row[:team2]
|
95
109
|
score_str = row[:score]
|
96
110
|
|
97
|
-
## convert date from string e.g. 2019-25-10
|
98
|
-
date = Date.strptime( date_str, '%Y-%m-%d' )
|
99
111
|
|
100
112
|
|
101
113
|
### check for score_error; first (step 1) lookup by date
|
102
|
-
score_error = score_errors[
|
114
|
+
score_error = score_errors[ date_str ]
|
103
115
|
if score_error
|
104
116
|
if team1_str == score_error[0] &&
|
105
117
|
team2_str == score_error[1]
|
@@ -123,8 +135,6 @@ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such
|
|
123
135
|
print "\n"
|
124
136
|
|
125
137
|
|
126
|
-
## check for 0:3 Wert. - change Wert. to awd. (awarded)
|
127
|
-
score_str = score_str.sub( /Wert\./i, 'awd.' )
|
128
138
|
|
129
139
|
## clean team name (e.g. remove (old))
|
130
140
|
## and asciify (e.g. ’ to ' )
|
@@ -135,111 +145,48 @@ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such
|
|
135
145
|
team2_str = mods[ team2_str ] if mods[ team2_str ]
|
136
146
|
|
137
147
|
|
148
|
+
ht, ft, et, pen, comments = parse_score( score_str )
|
149
|
+
|
138
150
|
|
151
|
+
###################
|
152
|
+
### calculate date & times
|
153
|
+
## convert date from string e.g. 2019-25-10
|
154
|
+
## date = Date.strptime( date_str, '%Y-%m-%d' )
|
139
155
|
|
140
|
-
|
156
|
+
if time_str.nil? || time_str.empty?
|
157
|
+
## no time
|
158
|
+
## assume 00:00:00T
|
159
|
+
time_str = ''
|
160
|
+
timezone = ''
|
161
|
+
utc = ''
|
162
|
+
else
|
163
|
+
## note - assume central european (summer) time (cet/cest) - UTC+1 or UTC+2
|
164
|
+
cet = CET.strptime( "#{date_str} #{time_str}", '%Y-%m-%d %H:%M' )
|
141
165
|
|
166
|
+
utc = cet.getutc ## convert to utc
|
167
|
+
local = zone.to_local( utc ) # convert to local via utc
|
168
|
+
## overwrite old with local
|
169
|
+
date_str = local.strftime( '%Y-%m-%d' )
|
170
|
+
time_str = local.strftime( '%H:%M' )
|
171
|
+
timezone = local.strftime( '%Z/%z' )
|
172
|
+
utc = utc.strftime( '%Y-%m-%dT%H:%MZ' )
|
173
|
+
end
|
142
174
|
|
143
175
|
|
144
176
|
recs << [stage,
|
145
177
|
round,
|
146
|
-
|
178
|
+
date_str,
|
147
179
|
time_str,
|
180
|
+
timezone,
|
148
181
|
team1_str,
|
149
182
|
ft,
|
150
183
|
ht,
|
151
184
|
team2_str,
|
152
185
|
et, # extra: incl. extra time
|
153
186
|
pen, # extra: incl. penalties
|
154
|
-
comments
|
187
|
+
comments,
|
188
|
+
utc]
|
155
189
|
end # each row
|
156
190
|
recs
|
157
191
|
end # build
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
def self.parse_score( score_str )
|
162
|
-
comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
|
163
|
-
|
164
|
-
## split score
|
165
|
-
ft = ''
|
166
|
-
ht = ''
|
167
|
-
et = ''
|
168
|
-
pen = ''
|
169
|
-
if score_str == '---' ## in the future (no score yet) - was -:-
|
170
|
-
ft = ''
|
171
|
-
ht = ''
|
172
|
-
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
173
|
-
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
174
|
-
score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
|
175
|
-
ft = '(*)'
|
176
|
-
ht = ''
|
177
|
-
comments = 'cancelled'
|
178
|
-
elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
|
179
|
-
ft = '(*)'
|
180
|
-
ht = ''
|
181
|
-
comments = 'abandoned'
|
182
|
-
elsif score_str == 'verl.' ## postponed
|
183
|
-
ft = ''
|
184
|
-
ht = ''
|
185
|
-
comments = 'postponed'
|
186
|
-
# 5-4 (0-0, 1-1, 2-2) i.E.
|
187
|
-
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
188
|
-
[ ]*
|
189
|
-
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
190
|
-
[ ]*,[ ]*
|
191
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
192
|
-
[ ]*,[ ]*
|
193
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)\)
|
194
|
-
[ ]*
|
195
|
-
i\.E\.
|
196
|
-
/x
|
197
|
-
pen = "#{$1}-#{$2}"
|
198
|
-
ht = "#{$3}-#{$4}"
|
199
|
-
ft = "#{$5}-#{$6}"
|
200
|
-
et = "#{$7}-#{$8}"
|
201
|
-
# 2-1 (1-0, 1-1) n.V
|
202
|
-
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
203
|
-
[ ]*
|
204
|
-
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
205
|
-
[ ]*,[ ]*
|
206
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
207
|
-
\)
|
208
|
-
[ ]*
|
209
|
-
n\.V\.
|
210
|
-
/x
|
211
|
-
et = "#{$1}-#{$2}"
|
212
|
-
ht = "#{$3}-#{$4}"
|
213
|
-
ft = "#{$5}-#{$6}"
|
214
|
-
elsif score_str =~ /([0-9]+)
|
215
|
-
[ ]*-[ ]*
|
216
|
-
([0-9]+)
|
217
|
-
[ ]*
|
218
|
-
\(([0-9]+)
|
219
|
-
[ ]*-[ ]*
|
220
|
-
([0-9]+)
|
221
|
-
\)
|
222
|
-
/x
|
223
|
-
ft = "#{$1}-#{$2}"
|
224
|
-
ht = "#{$3}-#{$4}"
|
225
|
-
elsif score_str =~ /([0-9]+)
|
226
|
-
[ ]*-[ ]*
|
227
|
-
([0-9]+)
|
228
|
-
[ ]*
|
229
|
-
([a-z.]+)
|
230
|
-
/x
|
231
|
-
ft = "#{$1}-#{$2} (*)"
|
232
|
-
ht = ''
|
233
|
-
comments = $3
|
234
|
-
elsif score_str =~ /^([0-9]+)-([0-9]+)$/
|
235
|
-
ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
|
236
|
-
ht = ''
|
237
|
-
else
|
238
|
-
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
239
|
-
exit 1
|
240
|
-
end
|
241
|
-
|
242
|
-
[ht, ft, et, pen, comments]
|
243
|
-
end
|
244
|
-
|
245
192
|
end # module Worldfootball
|
@@ -0,0 +1,46 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
|
4
|
+
|
5
|
+
def self.list_pages ## todo/check - rename to/use list_cached_pages
|
6
|
+
start_time = Time.now ## todo: use Timer? t = Timer.start / stop / diff etc. - why? why not?
|
7
|
+
|
8
|
+
# pages = Dir.glob( './dl/at*' )
|
9
|
+
pages = Dir.glob( "#{Webcache.root}/www.weltfussball.de/alle_spiele/*.html" )
|
10
|
+
puts " #{pages.size} page(s)" #=> 576 pages
|
11
|
+
puts
|
12
|
+
|
13
|
+
|
14
|
+
leagues = {}
|
15
|
+
|
16
|
+
pages.each do |path|
|
17
|
+
basename = File.basename( path, File.extname( path ) )
|
18
|
+
print "%-50s" % basename
|
19
|
+
print " => "
|
20
|
+
|
21
|
+
page = Worldfootball.find_page( basename )
|
22
|
+
if page
|
23
|
+
league_key = page[:league]
|
24
|
+
season_key = page[:season]
|
25
|
+
|
26
|
+
print " "
|
27
|
+
print "%-12s" % league_key
|
28
|
+
print "| %-10s" % season_key
|
29
|
+
print "\n"
|
30
|
+
|
31
|
+
seasons = leagues[league_key] ||= []
|
32
|
+
seasons << season_key unless seasons.include?( season_key )
|
33
|
+
else
|
34
|
+
print "??"
|
35
|
+
print "\n"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
puts " #{pages.size} page(s)" #=> 576 pages
|
40
|
+
puts
|
41
|
+
|
42
|
+
end_time = Time.now
|
43
|
+
diff_time = end_time - start_time
|
44
|
+
puts "convert_all: done in #{diff_time} sec(s)"
|
45
|
+
end
|
46
|
+
end # module Worldfootball
|
@@ -0,0 +1,85 @@
|
|
1
|
+
module Worldfootball
|
2
|
+
|
3
|
+
|
4
|
+
####
|
5
|
+
# config for slug to local basename / directories
|
6
|
+
# e.g.
|
7
|
+
# aut-bundesliga-2023-2024 => austria/2023-24/1_bundesliga.txt
|
8
|
+
|
9
|
+
## add (timezone) offset here too - why? why not?
|
10
|
+
LEAGUE_SETUPS = {
|
11
|
+
## note - for now auto-generate path via name (downcased)
|
12
|
+
## e.g. Belgium => /belgium
|
13
|
+
|
14
|
+
## top five (europe)
|
15
|
+
'eng' => { code: 'eng', name: 'England' },
|
16
|
+
'es' => { code: 'esp', name: 'Spain' },
|
17
|
+
# 'fr' => { code: 'fra', name: 'France' },
|
18
|
+
# 'de' => { code: '???', name: 'Germany' },
|
19
|
+
'it' => { code: 'ita', name: 'Italy' },
|
20
|
+
|
21
|
+
|
22
|
+
'be' => { code: 'bel', name: 'Belgium' },
|
23
|
+
'at' => { code: 'aut', name: 'Austria' },
|
24
|
+
'hu' => { code: 'hun', name: 'Hungary' },
|
25
|
+
|
26
|
+
'tr' => { code: 'tur', name: 'Turkey' },
|
27
|
+
'nl' => { code: 'ned', name: 'Netherlands' },
|
28
|
+
'ch' => { code: 'sui', name: 'Switzerland' },
|
29
|
+
|
30
|
+
|
31
|
+
'cz' => { code: 'cze', name: 'Czech Republic' },
|
32
|
+
'dk' => { code: 'den', name: 'Denmark' },
|
33
|
+
'fi' => { code: 'fin', name: 'Finland' },
|
34
|
+
'gr' => { code: 'gre', name: 'Greece' },
|
35
|
+
|
36
|
+
'ie' => { code: 'irl', name: 'Ireland' },
|
37
|
+
'sco' => { code: 'sco', name: 'Scotland' },
|
38
|
+
|
39
|
+
'lu' => { code: 'lux', name: 'Luxembourg' },
|
40
|
+
'pl' => { code: 'pol', name: 'Poland' },
|
41
|
+
'pt' => { code: 'por', name: 'Portugal' },
|
42
|
+
'ro' => { code: 'rou', name: 'Romania' },
|
43
|
+
'ru' => { code: 'rus', name: 'Russia' },
|
44
|
+
'se' => { code: 'swe', name: 'Sweden' },
|
45
|
+
'ua' => { code: 'ukr', name: 'Ukraine' },
|
46
|
+
|
47
|
+
|
48
|
+
'eg' => { code: 'egy', name: 'Egypt' },
|
49
|
+
'jp' => { code: 'jpn', name: 'Japan' },
|
50
|
+
'cn' => { code: 'chn', name: 'China' },
|
51
|
+
|
52
|
+
## note - for now do NOT add United States to league name
|
53
|
+
## e.g. 1 - Major League Soccer
|
54
|
+
## 2 - USL Championship
|
55
|
+
## cup - U.S. Open Cup
|
56
|
+
'us' => { code: 'usa', name: nil, path: 'united-states' },
|
57
|
+
|
58
|
+
'mx' => { code: 'mex', name: 'Mexico' },
|
59
|
+
'ar' => { code: 'arg', name: 'Argentina' },
|
60
|
+
'br' => { code: 'bra', name: 'Brazil' },
|
61
|
+
|
62
|
+
'uy' => { code: 'uru', name: 'Uruguay' },
|
63
|
+
'pe' => { code: 'per', name: 'Peru' },
|
64
|
+
'ec' => { code: 'ecu', name: 'Ecuador' },
|
65
|
+
'bo' => { code: 'bol', name: 'Bolivia' },
|
66
|
+
'cl' => { code: 'chi', name: 'Chile' },
|
67
|
+
'co' => { code: 'col', name: 'Colombia' },
|
68
|
+
|
69
|
+
'cr' => { code: 'crc', name: 'Costa Rica' },
|
70
|
+
'gt' => { code: 'gua', name: 'Guatemala' },
|
71
|
+
'hn' => { code: 'hon', name: 'Honduras' },
|
72
|
+
'sv' => { code: 'slv', name: 'El Salvador' },
|
73
|
+
'ni' => { code: 'nca', name: 'Nicaragua' },
|
74
|
+
|
75
|
+
|
76
|
+
## int'l tournaments
|
77
|
+
'uefa.cl' => { code: nil, name: 'UEFA', path: 'europe' },
|
78
|
+
'uefa.el' => { code: nil, name: 'UEFA', path: 'europe' },
|
79
|
+
'concacaf.cl' => { code: nil, name: nil, path: 'north-america' },
|
80
|
+
'copa.l' => { code: nil, name: nil, path: 'south-america' },
|
81
|
+
}
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
end # module Worldfootball
|
@@ -2,84 +2,18 @@
|
|
2
2
|
module Worldfootball
|
3
3
|
|
4
4
|
|
5
|
-
|
6
|
-
# todo/fix - use timezone instead of offset !!!
|
7
|
-
# e.g
|
8
|
-
=begin
|
9
|
-
TIMEZONES = {
|
10
|
-
'eng.1' => 'Europe/London',
|
11
|
-
'eng.2' => 'Europe/London',
|
12
|
-
|
13
|
-
'es.1' => 'Europe/Madrid',
|
14
|
-
|
15
|
-
'de.1' => 'Europe/Berlin',
|
16
|
-
'fr.1' => 'Europe/Paris',
|
17
|
-
'it.1' => 'Europe/Rome',
|
18
|
-
'nl.1' => 'Europe/Amsterdam',
|
19
|
-
|
20
|
-
'pt.1' => 'Europe/Lisbon',
|
21
|
-
|
22
|
-
## todo/fix - pt.1
|
23
|
-
## one team in madeira!!! check for different timezone??
|
24
|
-
## CD Nacional da Madeira
|
25
|
-
|
26
|
-
'br.1' => 'America/Sao_Paulo',
|
27
|
-
## todo/fix - brazil has 4 timezones
|
28
|
-
## really only two in use for clubs
|
29
|
-
## west and east (amazonas et al)
|
30
|
-
## for now use west for all - why? why not?
|
31
|
-
}
|
32
|
-
=end
|
33
|
-
|
34
|
-
## todo - find "proper/classic" timezone ("winter time")
|
35
|
-
|
36
|
-
## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
|
37
|
-
## Ciudad de México, CDMX, México (GMT-5) -- summer time?
|
38
|
-
## Londres, Reino Unido (GMT+1)
|
39
|
-
## Madrid -- ?
|
40
|
-
## Lisboa -- ?
|
41
|
-
## Moskow -- ?
|
42
|
-
##
|
43
|
-
## todo/check - quick fix timezone offsets for leagues for now
|
44
|
-
## - find something better - why? why not?
|
45
|
-
## note: assume time is in GMT+1
|
46
|
-
OFFSETS = {
|
47
|
-
'eng.1' => -1,
|
48
|
-
'eng.2' => -1,
|
49
|
-
'eng.3' => -1,
|
50
|
-
'eng.4' => -1,
|
51
|
-
'eng.5' => -1,
|
52
|
-
|
53
|
-
'es.1' => -1,
|
54
|
-
'es.2' => -1,
|
55
|
-
|
56
|
-
'pt.1' => -1,
|
57
|
-
'pt.2' => -1,
|
58
|
-
|
59
|
-
'br.1' => -5,
|
60
|
-
'mx.1' => -7,
|
61
|
-
}
|
62
|
-
|
63
|
-
|
64
|
-
def self.convert( league:, season: )
|
5
|
+
def self.convert( league:, season: )
|
65
6
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
66
7
|
|
67
|
-
league = find_league( league )
|
8
|
+
league = find_league!( league )
|
9
|
+
pages = league.pages!( season: season )
|
68
10
|
|
69
|
-
pages = league.pages( season: season )
|
70
11
|
|
71
|
-
## check: rename (optional) offset to time_offset or such?
|
72
|
-
offset = OFFSETS[ league ]
|
73
|
-
|
74
|
-
|
75
|
-
# note: assume stages if pages is an array (of hash table/records)
|
76
|
-
# (and NOT a single hash table/record)
|
77
|
-
if pages.is_a?(Array)
|
78
12
|
recs = []
|
79
|
-
pages.each do |
|
80
|
-
|
81
|
-
|
82
|
-
|
13
|
+
pages.each do |slug, stage|
|
14
|
+
## note: stage might be nil
|
15
|
+
## todo/fix: report error/check if stage is nil!!!
|
16
|
+
stage ||= ''
|
83
17
|
|
84
18
|
print " parsing #{slug}..."
|
85
19
|
|
@@ -93,35 +27,24 @@ def self.convert( league:, season: )
|
|
93
27
|
print "\n"
|
94
28
|
|
95
29
|
rows = page.matches
|
96
|
-
stage_recs = build( rows,
|
30
|
+
stage_recs = build( rows,
|
31
|
+
season: season,
|
32
|
+
league: league.key,
|
33
|
+
stage: stage )
|
97
34
|
|
98
35
|
pp stage_recs[0] ## check first record
|
99
36
|
recs += stage_recs
|
100
37
|
end
|
101
|
-
else
|
102
|
-
page_meta = pages
|
103
|
-
slug = page_meta[:slug]
|
104
|
-
|
105
|
-
print " parsing #{slug}..."
|
106
|
-
|
107
|
-
page = Page::Schedule.from_cache( slug )
|
108
|
-
print " title=>#{page.title}<..."
|
109
|
-
print "\n"
|
110
38
|
|
111
|
-
rows = page.matches
|
112
|
-
recs = build( rows, season: season, league: league.key )
|
113
|
-
|
114
|
-
pp recs[0] ## check first record
|
115
|
-
end
|
116
|
-
|
117
|
-
recs = recs.map { |rec| fix_date( rec, offset ) } if offset
|
118
39
|
|
119
40
|
## note: sort matches by date before saving/writing!!!!
|
120
41
|
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
121
42
|
## note: assume date is third column!!! (stage/round/date/...)
|
122
43
|
recs = recs.sort { |l,r| l[2] <=> r[2] }
|
123
44
|
## reformat date / beautify e.g. Sat Aug 7 1993
|
124
|
-
recs.each
|
45
|
+
recs.each do |rec|
|
46
|
+
rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' )
|
47
|
+
end
|
125
48
|
|
126
49
|
## remove unused columns (e.g. stage, et, p, etc.)
|
127
50
|
recs, headers = vacuum( recs )
|
@@ -134,29 +57,5 @@ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b
|
|
134
57
|
puts "write #{out_path}..."
|
135
58
|
write_csv( out_path, recs, headers: headers )
|
136
59
|
end
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
## helper to fix dates to use local timezone (and not utc/london time)
|
141
|
-
def self.fix_date( row, offset )
|
142
|
-
return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
|
143
|
-
|
144
|
-
col = row[2]
|
145
|
-
if col =~ /^\d{4}-\d{2}-\d{2}$/
|
146
|
-
date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
|
147
|
-
else
|
148
|
-
puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
149
|
-
## todo/fix: add to errors/warns list - why? why not?
|
150
|
-
exit 1
|
151
|
-
end
|
152
|
-
|
153
|
-
date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
|
154
|
-
## NOTE - MUST be -7/24.0!!!! or such to work
|
155
|
-
date = date + (offset/24.0)
|
156
|
-
|
157
|
-
row[2] = date.strftime( date_fmt ) ## overwrite "old"
|
158
|
-
row[3] = date.strftime( '%H:%M' )
|
159
|
-
row ## return row for possible pipelining - why? why not?
|
160
|
-
end
|
161
|
-
|
162
60
|
end # module Worldfootball
|
61
|
+
|
@@ -7,14 +7,9 @@ module Worldfootball
|
|
7
7
|
def self.schedule( league:, season: )
|
8
8
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
9
9
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
## if single (simple) page setup - wrap in array
|
15
|
-
pages = pages.is_a?(Array) ? pages : [pages]
|
16
|
-
pages.each do |page_meta|
|
17
|
-
Metal.download_schedule( page_meta[:slug] )
|
10
|
+
pages = find_league_pages!( league: league, season: season )
|
11
|
+
pages.each do |slug, _|
|
12
|
+
Metal.download_schedule( slug )
|
18
13
|
end # each page
|
19
14
|
end
|
20
15
|
|
@@ -22,14 +17,9 @@ end
|
|
22
17
|
def self.reports( league:, season:, cache: true ) ## todo/check: rename to reports_for_schedule or such - why? why not?
|
23
18
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
24
19
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
## if single (simple) page setup - wrap in array
|
30
|
-
pages = pages.is_a?(Array) ? pages : [pages]
|
31
|
-
pages.each do |page_meta|
|
32
|
-
Metal.download_reports_for_schedule( page_meta[:slug], cache: cache )
|
20
|
+
pages = find_league_pages!( league: league, season: season )
|
21
|
+
pages.each do |slug, _|
|
22
|
+
Metal.download_reports_for_schedule( slug, cache: cache )
|
33
23
|
end # each page
|
34
24
|
end
|
35
25
|
|
@@ -41,7 +31,7 @@ end
|
|
41
31
|
|
42
32
|
## todo/check: put in Downloader namespace/class - why? why not?
|
43
33
|
## or use Metal - no "porcelain" downloaders / machinery
|
44
|
-
class Metal
|
34
|
+
class Metal
|
45
35
|
|
46
36
|
BASE_URL = 'https://www.weltfussball.de'
|
47
37
|
|
@@ -117,7 +107,7 @@ class Metal
|
|
117
107
|
end
|
118
108
|
end
|
119
109
|
|
120
|
-
|
110
|
+
|
121
111
|
def self.download_page( url ) ## get & record/save to cache
|
122
112
|
response = Webget.page( url ) ## fetch (and cache) html page (via HTTP GET)
|
123
113
|
|