worldfootball 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/Manifest.txt +29 -0
- data/README.md +27 -0
- data/Rakefile +34 -0
- data/bin/wfb +104 -0
- data/lib/worldfootball/build.rb +245 -0
- data/lib/worldfootball/convert.rb +162 -0
- data/lib/worldfootball/convert_reports.rb +107 -0
- data/lib/worldfootball/download.rb +131 -0
- data/lib/worldfootball/generator.rb +33 -0
- data/lib/worldfootball/leagues/asia.rb +53 -0
- data/lib/worldfootball/leagues/europe--british_isles.rb +64 -0
- data/lib/worldfootball/leagues/europe--central.rb +127 -0
- data/lib/worldfootball/leagues/europe--eastern.rb +82 -0
- data/lib/worldfootball/leagues/europe--northern.rb +57 -0
- data/lib/worldfootball/leagues/europe--southern.rb +86 -0
- data/lib/worldfootball/leagues/europe--western.rb +38 -0
- data/lib/worldfootball/leagues/europe.rb +13 -0
- data/lib/worldfootball/leagues/north_america.rb +44 -0
- data/lib/worldfootball/leagues/pacific.rb +21 -0
- data/lib/worldfootball/leagues/south_america.rb +11 -0
- data/lib/worldfootball/leagues.rb +200 -0
- data/lib/worldfootball/mods.rb +72 -0
- data/lib/worldfootball/page.rb +106 -0
- data/lib/worldfootball/page_report.rb +186 -0
- data/lib/worldfootball/page_schedule.rb +292 -0
- data/lib/worldfootball/vacuum.rb +66 -0
- data/lib/worldfootball/version.rb +20 -0
- data/lib/worldfootball.rb +66 -0
- metadata +169 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 170277c7714f9b75e93176eb5fff6242fb6efb85051bc8977f7f635dbebf0513
|
4
|
+
data.tar.gz: 5b15d132765c3ee2df4cbdd847b43ba6c7e7bd617ed55bb5fe4dfabc0becb0e0
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: c95b4b2becf545be2c208a207e8980d06369148d18b0657cddfb81470331c828ee8492649908ece372fb996b9d1a6dfc8eeaa45c54a3757eba7fb6d02e363bf0
|
7
|
+
data.tar.gz: ca127cb3f69c861dba48b049ed6da30b9ecde1f3935b30422d6184869b650b5cafc056be7fa37c7198bbd72ad65bdd262e56c82631c62629f4bbb4222992a9c9
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
CHANGELOG.md
|
2
|
+
Manifest.txt
|
3
|
+
README.md
|
4
|
+
Rakefile
|
5
|
+
bin/wfb
|
6
|
+
lib/worldfootball.rb
|
7
|
+
lib/worldfootball/build.rb
|
8
|
+
lib/worldfootball/convert.rb
|
9
|
+
lib/worldfootball/convert_reports.rb
|
10
|
+
lib/worldfootball/download.rb
|
11
|
+
lib/worldfootball/generator.rb
|
12
|
+
lib/worldfootball/leagues.rb
|
13
|
+
lib/worldfootball/leagues/asia.rb
|
14
|
+
lib/worldfootball/leagues/europe--british_isles.rb
|
15
|
+
lib/worldfootball/leagues/europe--central.rb
|
16
|
+
lib/worldfootball/leagues/europe--eastern.rb
|
17
|
+
lib/worldfootball/leagues/europe--northern.rb
|
18
|
+
lib/worldfootball/leagues/europe--southern.rb
|
19
|
+
lib/worldfootball/leagues/europe--western.rb
|
20
|
+
lib/worldfootball/leagues/europe.rb
|
21
|
+
lib/worldfootball/leagues/north_america.rb
|
22
|
+
lib/worldfootball/leagues/pacific.rb
|
23
|
+
lib/worldfootball/leagues/south_america.rb
|
24
|
+
lib/worldfootball/mods.rb
|
25
|
+
lib/worldfootball/page.rb
|
26
|
+
lib/worldfootball/page_report.rb
|
27
|
+
lib/worldfootball/page_schedule.rb
|
28
|
+
lib/worldfootball/vacuum.rb
|
29
|
+
lib/worldfootball/version.rb
|
data/README.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages
|
2
|
+
|
3
|
+
|
4
|
+
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
5
|
+
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
6
|
+
* gem :: [rubygems.org/gems/worldfootball](https://rubygems.org/gems/worldfootball)
|
7
|
+
* rdoc :: [rubydoc.info/gems/worldfootball](http://rubydoc.info/gems/worldfootball)
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
|
14
|
+
To be done
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
## License
|
19
|
+
|
20
|
+
The `worldfootball` scripts are dedicated to the public domain.
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
22
|
+
|
23
|
+
|
24
|
+
## Questions? Comments?
|
25
|
+
|
26
|
+
Yes, you can. More than welcome.
|
27
|
+
See [Help & Support »](https://github.com/openfootball/help)
|
data/Rakefile
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'hoe'
|
2
|
+
require './lib/worldfootball/version.rb'
|
3
|
+
|
4
|
+
Hoe.spec 'worldfootball' do
|
5
|
+
|
6
|
+
self.version = Worldfootball::VERSION
|
7
|
+
|
8
|
+
self.summary = "worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages"
|
9
|
+
self.description = summary
|
10
|
+
|
11
|
+
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
12
|
+
|
13
|
+
self.author = 'Gerald Bauer'
|
14
|
+
self.email = 'gerald.bauer@gmail.com'
|
15
|
+
|
16
|
+
# switch extension to .markdown for gihub formatting
|
17
|
+
self.readme_file = 'README.md'
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
19
|
+
|
20
|
+
self.extra_deps = [
|
21
|
+
## ['tzinfo'],
|
22
|
+
['season-formats'],
|
23
|
+
['webget'],
|
24
|
+
['nokogiri'],
|
25
|
+
['cocos'], ## later pull in with sportsdb-writers
|
26
|
+
]
|
27
|
+
|
28
|
+
self.licenses = ['Public Domain']
|
29
|
+
|
30
|
+
self.spec_extras = {
|
31
|
+
required_ruby_version: '>= 2.2.2'
|
32
|
+
}
|
33
|
+
|
34
|
+
end
|
data/bin/wfb
ADDED
@@ -0,0 +1,104 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfb
|
5
|
+
|
6
|
+
require 'worldfootball'
|
7
|
+
|
8
|
+
|
9
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
10
|
+
puts " setting web cache to >/sports/cache<"
|
11
|
+
'/sports/cache'
|
12
|
+
else
|
13
|
+
'./cache'
|
14
|
+
end
|
15
|
+
|
16
|
+
|
17
|
+
require 'optparse'
|
18
|
+
|
19
|
+
|
20
|
+
module Worldfootball
|
21
|
+
def self.main( args=ARGV )
|
22
|
+
|
23
|
+
opts = {}
|
24
|
+
parser = OptionParser.new do |parser|
|
25
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
26
|
+
|
27
|
+
##
|
28
|
+
## check if git has a offline option?? (use same)
|
29
|
+
## check for other tools - why? why not?
|
30
|
+
|
31
|
+
|
32
|
+
parser.on( "--cache", "--cached", "--offline",
|
33
|
+
"use cached data in #{Webcache.root}" ) do |cached|
|
34
|
+
opts[:cached] = cached
|
35
|
+
end
|
36
|
+
end
|
37
|
+
parser.parse!( args )
|
38
|
+
|
39
|
+
puts "OPTS:"
|
40
|
+
p opts
|
41
|
+
puts "ARGV:"
|
42
|
+
p args
|
43
|
+
|
44
|
+
|
45
|
+
if ['league', 'leagues', 'ls'].include?( args[0] || 'leagues' )
|
46
|
+
keys = LEAGUES.keys
|
47
|
+
|
48
|
+
pp keys
|
49
|
+
puts " #{keys.size} league(s)"
|
50
|
+
|
51
|
+
# puts
|
52
|
+
# puts " pages:"
|
53
|
+
# pp Worldfootball::PAGES
|
54
|
+
|
55
|
+
exit 0
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
league_code = args[0].downcase
|
61
|
+
|
62
|
+
league = find_league( league_code ) ## league info lookup
|
63
|
+
|
64
|
+
season = Season( args[1] || '2024/25' )
|
65
|
+
|
66
|
+
pages = league.pages( season: season )
|
67
|
+
|
68
|
+
pp pages
|
69
|
+
puts " #{pages.size} page(s)"
|
70
|
+
|
71
|
+
|
72
|
+
## wrap single page record into array
|
73
|
+
pages = pages.is_a?( Array ) ? pages : [pages]
|
74
|
+
|
75
|
+
if opts[:cached]
|
76
|
+
# do nothing
|
77
|
+
else ## download to cache
|
78
|
+
pages.each_with_index do |page_rec,i|
|
79
|
+
slug = page_rec[:slug]
|
80
|
+
puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
|
81
|
+
page = Metal.download_schedule( slug )
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
|
86
|
+
pages.each_with_index do |page_rec,i|
|
87
|
+
slug = page_rec[:slug]
|
88
|
+
|
89
|
+
puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
|
90
|
+
page = Page::Schedule.from_cache( slug )
|
91
|
+
matches = page.matches
|
92
|
+
|
93
|
+
puts " #{matches.size} match(es)"
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
end # def self.main
|
98
|
+
end # module Worldfootball
|
99
|
+
|
100
|
+
|
101
|
+
Worldfootball.main( ARGV )
|
102
|
+
|
103
|
+
|
104
|
+
puts "bye"
|
@@ -0,0 +1,245 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
|
4
|
+
|
5
|
+
ROUND_TO_EN = {
|
6
|
+
'1. Runde' => 'Round 1',
|
7
|
+
'2. Runde' => 'Round 2',
|
8
|
+
'3. Runde' => 'Round 3',
|
9
|
+
'4. Runde' => 'Round 4',
|
10
|
+
'Achtelfinale' => 'Round of 16',
|
11
|
+
'Viertelfinale' => 'Quarterfinals',
|
12
|
+
'Halbfinale' => 'Semifinals',
|
13
|
+
'Finale' => 'Final',
|
14
|
+
}
|
15
|
+
|
16
|
+
|
17
|
+
## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
|
18
|
+
|
19
|
+
## build "standard" match records from "raw" table rows
|
20
|
+
def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
|
21
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
22
|
+
|
23
|
+
raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
|
24
|
+
|
25
|
+
print " #{rows.size} rows - build #{league} #{season}"
|
26
|
+
print " - #{stage}" unless stage.empty?
|
27
|
+
print "\n"
|
28
|
+
|
29
|
+
|
30
|
+
## note: use only first part from key for lookup
|
31
|
+
## e.g. at.1 => at
|
32
|
+
## eng.1 => eng
|
33
|
+
## and so on
|
34
|
+
mods = MODS[ league.split('.')[0] ] || {}
|
35
|
+
|
36
|
+
score_errors = SCORE_ERRORS[ league ] || {}
|
37
|
+
|
38
|
+
|
39
|
+
i = 0
|
40
|
+
recs = []
|
41
|
+
rows.each do |row|
|
42
|
+
i += 1
|
43
|
+
|
44
|
+
|
45
|
+
if row[:round] =~ /Spieltag/
|
46
|
+
puts
|
47
|
+
print '[%03d] ' % (i+1)
|
48
|
+
print row[:round]
|
49
|
+
|
50
|
+
if m = row[:round].match( /([0-9]+)\. Spieltag/ )
|
51
|
+
## todo/check: always use a string even if number (as a string eg. '1' etc.)
|
52
|
+
round = m[1] ## note: keep as string (NOT number)
|
53
|
+
print " => #{round}"
|
54
|
+
else
|
55
|
+
puts "!! ERROR: cannot find matchday number"
|
56
|
+
exit 1
|
57
|
+
end
|
58
|
+
print "\n"
|
59
|
+
elsif row[:round] =~ /[1-9]\.[ ]Runde|
|
60
|
+
Achtelfinale|
|
61
|
+
Viertelfinale|
|
62
|
+
Halbfinale|
|
63
|
+
Finale
|
64
|
+
/x
|
65
|
+
puts
|
66
|
+
print '[%03d] ' % (i+1)
|
67
|
+
print row[:round]
|
68
|
+
|
69
|
+
|
70
|
+
## do NOT translate rounds (to english) - keep in german / deutsch (de)
|
71
|
+
if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
|
72
|
+
'de.cup'].include?( league )
|
73
|
+
round = row[:round]
|
74
|
+
else
|
75
|
+
round = ROUND_TO_EN[ row[:round] ]
|
76
|
+
if round.nil?
|
77
|
+
puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
|
78
|
+
pp row
|
79
|
+
exit 1
|
80
|
+
end
|
81
|
+
print " => #{round}"
|
82
|
+
end
|
83
|
+
print "\n"
|
84
|
+
else
|
85
|
+
puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
|
86
|
+
pp row
|
87
|
+
exit 1
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
date_str = row[:date]
|
92
|
+
time_str = row[:time]
|
93
|
+
team1_str = row[:team1]
|
94
|
+
team2_str = row[:team2]
|
95
|
+
score_str = row[:score]
|
96
|
+
|
97
|
+
## convert date from string e.g. 2019-25-10
|
98
|
+
date = Date.strptime( date_str, '%Y-%m-%d' )
|
99
|
+
|
100
|
+
|
101
|
+
### check for score_error; first (step 1) lookup by date
|
102
|
+
score_error = score_errors[ date.strftime('%Y-%m-%d') ]
|
103
|
+
if score_error
|
104
|
+
if team1_str == score_error[0] &&
|
105
|
+
team2_str == score_error[1]
|
106
|
+
## check if team names match too; if yes, apply fix/patch!!
|
107
|
+
if score_str != score_error[2][0]
|
108
|
+
puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
|
109
|
+
pp row
|
110
|
+
end
|
111
|
+
puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
|
112
|
+
score_str = score_error[2][1]
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
print '[%03d] ' % (i+1)
|
118
|
+
print "%-10s | " % date_str
|
119
|
+
print "%-5s | " % time_str
|
120
|
+
print "%-22s | " % team1_str
|
121
|
+
print "%-22s | " % team2_str
|
122
|
+
print score_str
|
123
|
+
print "\n"
|
124
|
+
|
125
|
+
|
126
|
+
## check for 0:3 Wert. - change Wert. to awd. (awarded)
|
127
|
+
score_str = score_str.sub( /Wert\./i, 'awd.' )
|
128
|
+
|
129
|
+
## clean team name (e.g. remove (old))
|
130
|
+
## and asciify (e.g. ’ to ' )
|
131
|
+
team1_str = norm_team( team1_str )
|
132
|
+
team2_str = norm_team( team2_str )
|
133
|
+
|
134
|
+
team1_str = mods[ team1_str ] if mods[ team1_str ]
|
135
|
+
team2_str = mods[ team2_str ] if mods[ team2_str ]
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
ht, ft, et, pen, comments = parse_score( score_str )
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
recs << [stage,
|
145
|
+
round,
|
146
|
+
date.strftime( '%Y-%m-%d' ),
|
147
|
+
time_str,
|
148
|
+
team1_str,
|
149
|
+
ft,
|
150
|
+
ht,
|
151
|
+
team2_str,
|
152
|
+
et, # extra: incl. extra time
|
153
|
+
pen, # extra: incl. penalties
|
154
|
+
comments]
|
155
|
+
end # each row
|
156
|
+
recs
|
157
|
+
end # build
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
def self.parse_score( score_str )
|
162
|
+
comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
|
163
|
+
|
164
|
+
## split score
|
165
|
+
ft = ''
|
166
|
+
ht = ''
|
167
|
+
et = ''
|
168
|
+
pen = ''
|
169
|
+
if score_str == '---' ## in the future (no score yet) - was -:-
|
170
|
+
ft = ''
|
171
|
+
ht = ''
|
172
|
+
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
173
|
+
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
174
|
+
score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
|
175
|
+
ft = '(*)'
|
176
|
+
ht = ''
|
177
|
+
comments = 'cancelled'
|
178
|
+
elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
|
179
|
+
ft = '(*)'
|
180
|
+
ht = ''
|
181
|
+
comments = 'abandoned'
|
182
|
+
elsif score_str == 'verl.' ## postponed
|
183
|
+
ft = ''
|
184
|
+
ht = ''
|
185
|
+
comments = 'postponed'
|
186
|
+
# 5-4 (0-0, 1-1, 2-2) i.E.
|
187
|
+
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
188
|
+
[ ]*
|
189
|
+
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
190
|
+
[ ]*,[ ]*
|
191
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
192
|
+
[ ]*,[ ]*
|
193
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)\)
|
194
|
+
[ ]*
|
195
|
+
i\.E\.
|
196
|
+
/x
|
197
|
+
pen = "#{$1}-#{$2}"
|
198
|
+
ht = "#{$3}-#{$4}"
|
199
|
+
ft = "#{$5}-#{$6}"
|
200
|
+
et = "#{$7}-#{$8}"
|
201
|
+
# 2-1 (1-0, 1-1) n.V
|
202
|
+
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
203
|
+
[ ]*
|
204
|
+
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
205
|
+
[ ]*,[ ]*
|
206
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
207
|
+
\)
|
208
|
+
[ ]*
|
209
|
+
n\.V\.
|
210
|
+
/x
|
211
|
+
et = "#{$1}-#{$2}"
|
212
|
+
ht = "#{$3}-#{$4}"
|
213
|
+
ft = "#{$5}-#{$6}"
|
214
|
+
elsif score_str =~ /([0-9]+)
|
215
|
+
[ ]*-[ ]*
|
216
|
+
([0-9]+)
|
217
|
+
[ ]*
|
218
|
+
\(([0-9]+)
|
219
|
+
[ ]*-[ ]*
|
220
|
+
([0-9]+)
|
221
|
+
\)
|
222
|
+
/x
|
223
|
+
ft = "#{$1}-#{$2}"
|
224
|
+
ht = "#{$3}-#{$4}"
|
225
|
+
elsif score_str =~ /([0-9]+)
|
226
|
+
[ ]*-[ ]*
|
227
|
+
([0-9]+)
|
228
|
+
[ ]*
|
229
|
+
([a-z.]+)
|
230
|
+
/x
|
231
|
+
ft = "#{$1}-#{$2} (*)"
|
232
|
+
ht = ''
|
233
|
+
comments = $3
|
234
|
+
elsif score_str =~ /^([0-9]+)-([0-9]+)$/
|
235
|
+
ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
|
236
|
+
ht = ''
|
237
|
+
else
|
238
|
+
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
239
|
+
exit 1
|
240
|
+
end
|
241
|
+
|
242
|
+
[ht, ft, et, pen, comments]
|
243
|
+
end
|
244
|
+
|
245
|
+
end # module Worldfootball
|
@@ -0,0 +1,162 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
|
4
|
+
|
5
|
+
#################
|
6
|
+
# todo/fix - use timezone instead of offset !!!
|
7
|
+
# e.g
|
8
|
+
=begin
|
9
|
+
TIMEZONES = {
|
10
|
+
'eng.1' => 'Europe/London',
|
11
|
+
'eng.2' => 'Europe/London',
|
12
|
+
|
13
|
+
'es.1' => 'Europe/Madrid',
|
14
|
+
|
15
|
+
'de.1' => 'Europe/Berlin',
|
16
|
+
'fr.1' => 'Europe/Paris',
|
17
|
+
'it.1' => 'Europe/Rome',
|
18
|
+
'nl.1' => 'Europe/Amsterdam',
|
19
|
+
|
20
|
+
'pt.1' => 'Europe/Lisbon',
|
21
|
+
|
22
|
+
## todo/fix - pt.1
|
23
|
+
## one team in madeira!!! check for different timezone??
|
24
|
+
## CD Nacional da Madeira
|
25
|
+
|
26
|
+
'br.1' => 'America/Sao_Paulo',
|
27
|
+
## todo/fix - brazil has 4 timezones
|
28
|
+
## really only two in use for clubs
|
29
|
+
## west and east (amazonas et al)
|
30
|
+
## for now use west for all - why? why not?
|
31
|
+
}
|
32
|
+
=end
|
33
|
+
|
34
|
+
## todo - find "proper/classic" timezone ("winter time")
|
35
|
+
|
36
|
+
## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
|
37
|
+
## Ciudad de México, CDMX, México (GMT-5) -- summer time?
|
38
|
+
## Londres, Reino Unido (GMT+1)
|
39
|
+
## Madrid -- ?
|
40
|
+
## Lisboa -- ?
|
41
|
+
## Moskow -- ?
|
42
|
+
##
|
43
|
+
## todo/check - quick fix timezone offsets for leagues for now
|
44
|
+
## - find something better - why? why not?
|
45
|
+
## note: assume time is in GMT+1
|
46
|
+
OFFSETS = {
|
47
|
+
'eng.1' => -1,
|
48
|
+
'eng.2' => -1,
|
49
|
+
'eng.3' => -1,
|
50
|
+
'eng.4' => -1,
|
51
|
+
'eng.5' => -1,
|
52
|
+
|
53
|
+
'es.1' => -1,
|
54
|
+
'es.2' => -1,
|
55
|
+
|
56
|
+
'pt.1' => -1,
|
57
|
+
'pt.2' => -1,
|
58
|
+
|
59
|
+
'br.1' => -5,
|
60
|
+
'mx.1' => -7,
|
61
|
+
}
|
62
|
+
|
63
|
+
|
64
|
+
def self.convert( league:, season: )
|
65
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
66
|
+
|
67
|
+
league = find_league( league )
|
68
|
+
|
69
|
+
pages = league.pages( season: season )
|
70
|
+
|
71
|
+
## check: rename (optional) offset to time_offset or such?
|
72
|
+
offset = OFFSETS[ league ]
|
73
|
+
|
74
|
+
|
75
|
+
# note: assume stages if pages is an array (of hash table/records)
|
76
|
+
# (and NOT a single hash table/record)
|
77
|
+
if pages.is_a?(Array)
|
78
|
+
recs = []
|
79
|
+
pages.each do |page_meta|
|
80
|
+
slug = page_meta[:slug]
|
81
|
+
stage_name = page_meta[:stage]
|
82
|
+
## todo/fix: report error/check if stage.name is nil!!!
|
83
|
+
|
84
|
+
print " parsing #{slug}..."
|
85
|
+
|
86
|
+
# unless File.exist?( path )
|
87
|
+
# puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
|
88
|
+
# next
|
89
|
+
# end
|
90
|
+
|
91
|
+
page = Page::Schedule.from_cache( slug )
|
92
|
+
print " title=>#{page.title}<..."
|
93
|
+
print "\n"
|
94
|
+
|
95
|
+
rows = page.matches
|
96
|
+
stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
|
97
|
+
|
98
|
+
pp stage_recs[0] ## check first record
|
99
|
+
recs += stage_recs
|
100
|
+
end
|
101
|
+
else
|
102
|
+
page_meta = pages
|
103
|
+
slug = page_meta[:slug]
|
104
|
+
|
105
|
+
print " parsing #{slug}..."
|
106
|
+
|
107
|
+
page = Page::Schedule.from_cache( slug )
|
108
|
+
print " title=>#{page.title}<..."
|
109
|
+
print "\n"
|
110
|
+
|
111
|
+
rows = page.matches
|
112
|
+
recs = build( rows, season: season, league: league.key )
|
113
|
+
|
114
|
+
pp recs[0] ## check first record
|
115
|
+
end
|
116
|
+
|
117
|
+
recs = recs.map { |rec| fix_date( rec, offset ) } if offset
|
118
|
+
|
119
|
+
## note: sort matches by date before saving/writing!!!!
|
120
|
+
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
121
|
+
## note: assume date is third column!!! (stage/round/date/...)
|
122
|
+
recs = recs.sort { |l,r| l[2] <=> r[2] }
|
123
|
+
## reformat date / beautify e.g. Sat Aug 7 1993
|
124
|
+
recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
|
125
|
+
|
126
|
+
## remove unused columns (e.g. stage, et, p, etc.)
|
127
|
+
recs, headers = vacuum( recs )
|
128
|
+
|
129
|
+
puts headers
|
130
|
+
pp recs[0] ## check first record
|
131
|
+
|
132
|
+
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
133
|
+
|
134
|
+
puts "write #{out_path}..."
|
135
|
+
write_csv( out_path, recs, headers: headers )
|
136
|
+
end
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
## helper to fix dates to use local timezone (and not utc/london time)
|
141
|
+
def self.fix_date( row, offset )
|
142
|
+
return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
|
143
|
+
|
144
|
+
col = row[2]
|
145
|
+
if col =~ /^\d{4}-\d{2}-\d{2}$/
|
146
|
+
date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
|
147
|
+
else
|
148
|
+
puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
149
|
+
## todo/fix: add to errors/warns list - why? why not?
|
150
|
+
exit 1
|
151
|
+
end
|
152
|
+
|
153
|
+
date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
|
154
|
+
## NOTE - MUST be -7/24.0!!!! or such to work
|
155
|
+
date = date + (offset/24.0)
|
156
|
+
|
157
|
+
row[2] = date.strftime( date_fmt ) ## overwrite "old"
|
158
|
+
row[3] = date.strftime( '%H:%M' )
|
159
|
+
row ## return row for possible pipelining - why? why not?
|
160
|
+
end
|
161
|
+
|
162
|
+
end # module Worldfootball
|