worldfootball 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/Manifest.txt +29 -0
- data/README.md +27 -0
- data/Rakefile +34 -0
- data/bin/wfb +104 -0
- data/lib/worldfootball/build.rb +245 -0
- data/lib/worldfootball/convert.rb +162 -0
- data/lib/worldfootball/convert_reports.rb +107 -0
- data/lib/worldfootball/download.rb +131 -0
- data/lib/worldfootball/generator.rb +33 -0
- data/lib/worldfootball/leagues/asia.rb +53 -0
- data/lib/worldfootball/leagues/europe--british_isles.rb +64 -0
- data/lib/worldfootball/leagues/europe--central.rb +127 -0
- data/lib/worldfootball/leagues/europe--eastern.rb +82 -0
- data/lib/worldfootball/leagues/europe--northern.rb +57 -0
- data/lib/worldfootball/leagues/europe--southern.rb +86 -0
- data/lib/worldfootball/leagues/europe--western.rb +38 -0
- data/lib/worldfootball/leagues/europe.rb +13 -0
- data/lib/worldfootball/leagues/north_america.rb +44 -0
- data/lib/worldfootball/leagues/pacific.rb +21 -0
- data/lib/worldfootball/leagues/south_america.rb +11 -0
- data/lib/worldfootball/leagues.rb +200 -0
- data/lib/worldfootball/mods.rb +72 -0
- data/lib/worldfootball/page.rb +106 -0
- data/lib/worldfootball/page_report.rb +186 -0
- data/lib/worldfootball/page_schedule.rb +292 -0
- data/lib/worldfootball/vacuum.rb +66 -0
- data/lib/worldfootball/version.rb +20 -0
- data/lib/worldfootball.rb +66 -0
- metadata +169 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 170277c7714f9b75e93176eb5fff6242fb6efb85051bc8977f7f635dbebf0513
|
|
4
|
+
data.tar.gz: 5b15d132765c3ee2df4cbdd847b43ba6c7e7bd617ed55bb5fe4dfabc0becb0e0
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: c95b4b2becf545be2c208a207e8980d06369148d18b0657cddfb81470331c828ee8492649908ece372fb996b9d1a6dfc8eeaa45c54a3757eba7fb6d02e363bf0
|
|
7
|
+
data.tar.gz: ca127cb3f69c861dba48b049ed6da30b9ecde1f3935b30422d6184869b650b5cafc056be7fa37c7198bbd72ad65bdd262e56c82631c62629f4bbb4222992a9c9
|
data/CHANGELOG.md
ADDED
data/Manifest.txt
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
CHANGELOG.md
|
|
2
|
+
Manifest.txt
|
|
3
|
+
README.md
|
|
4
|
+
Rakefile
|
|
5
|
+
bin/wfb
|
|
6
|
+
lib/worldfootball.rb
|
|
7
|
+
lib/worldfootball/build.rb
|
|
8
|
+
lib/worldfootball/convert.rb
|
|
9
|
+
lib/worldfootball/convert_reports.rb
|
|
10
|
+
lib/worldfootball/download.rb
|
|
11
|
+
lib/worldfootball/generator.rb
|
|
12
|
+
lib/worldfootball/leagues.rb
|
|
13
|
+
lib/worldfootball/leagues/asia.rb
|
|
14
|
+
lib/worldfootball/leagues/europe--british_isles.rb
|
|
15
|
+
lib/worldfootball/leagues/europe--central.rb
|
|
16
|
+
lib/worldfootball/leagues/europe--eastern.rb
|
|
17
|
+
lib/worldfootball/leagues/europe--northern.rb
|
|
18
|
+
lib/worldfootball/leagues/europe--southern.rb
|
|
19
|
+
lib/worldfootball/leagues/europe--western.rb
|
|
20
|
+
lib/worldfootball/leagues/europe.rb
|
|
21
|
+
lib/worldfootball/leagues/north_america.rb
|
|
22
|
+
lib/worldfootball/leagues/pacific.rb
|
|
23
|
+
lib/worldfootball/leagues/south_america.rb
|
|
24
|
+
lib/worldfootball/mods.rb
|
|
25
|
+
lib/worldfootball/page.rb
|
|
26
|
+
lib/worldfootball/page_report.rb
|
|
27
|
+
lib/worldfootball/page_schedule.rb
|
|
28
|
+
lib/worldfootball/vacuum.rb
|
|
29
|
+
lib/worldfootball/version.rb
|
data/README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
* home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
|
|
5
|
+
* bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
|
|
6
|
+
* gem :: [rubygems.org/gems/worldfootball](https://rubygems.org/gems/worldfootball)
|
|
7
|
+
* rdoc :: [rubydoc.info/gems/worldfootball](http://rubydoc.info/gems/worldfootball)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
## Usage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
To be done
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
## License
|
|
19
|
+
|
|
20
|
+
The `worldfootball` scripts are dedicated to the public domain.
|
|
21
|
+
Use it as you please with no restrictions whatsoever.
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
## Questions? Comments?
|
|
25
|
+
|
|
26
|
+
Yes, you can. More than welcome.
|
|
27
|
+
See [Help & Support »](https://github.com/openfootball/help)
|
data/Rakefile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
require 'hoe'
|
|
2
|
+
require './lib/worldfootball/version.rb'
|
|
3
|
+
|
|
4
|
+
Hoe.spec 'worldfootball' do
|
|
5
|
+
|
|
6
|
+
self.version = Worldfootball::VERSION
|
|
7
|
+
|
|
8
|
+
self.summary = "worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages"
|
|
9
|
+
self.description = summary
|
|
10
|
+
|
|
11
|
+
self.urls = { home: 'https://github.com/sportdb/sport.db' }
|
|
12
|
+
|
|
13
|
+
self.author = 'Gerald Bauer'
|
|
14
|
+
self.email = 'gerald.bauer@gmail.com'
|
|
15
|
+
|
|
16
|
+
# switch extension to .markdown for gihub formatting
|
|
17
|
+
self.readme_file = 'README.md'
|
|
18
|
+
self.history_file = 'CHANGELOG.md'
|
|
19
|
+
|
|
20
|
+
self.extra_deps = [
|
|
21
|
+
## ['tzinfo'],
|
|
22
|
+
['season-formats'],
|
|
23
|
+
['webget'],
|
|
24
|
+
['nokogiri'],
|
|
25
|
+
['cocos'], ## later pull in with sportsdb-writers
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
self.licenses = ['Public Domain']
|
|
29
|
+
|
|
30
|
+
self.spec_extras = {
|
|
31
|
+
required_ruby_version: '>= 2.2.2'
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
end
|
data/bin/wfb
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
## tip: to test run:
|
|
4
|
+
## ruby -I ./lib bin/wfb
|
|
5
|
+
|
|
6
|
+
require 'worldfootball'
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
|
10
|
+
puts " setting web cache to >/sports/cache<"
|
|
11
|
+
'/sports/cache'
|
|
12
|
+
else
|
|
13
|
+
'./cache'
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
require 'optparse'
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
module Worldfootball
|
|
21
|
+
def self.main( args=ARGV )
|
|
22
|
+
|
|
23
|
+
opts = {}
|
|
24
|
+
parser = OptionParser.new do |parser|
|
|
25
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
|
26
|
+
|
|
27
|
+
##
|
|
28
|
+
## check if git has a offline option?? (use same)
|
|
29
|
+
## check for other tools - why? why not?
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
parser.on( "--cache", "--cached", "--offline",
|
|
33
|
+
"use cached data in #{Webcache.root}" ) do |cached|
|
|
34
|
+
opts[:cached] = cached
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
parser.parse!( args )
|
|
38
|
+
|
|
39
|
+
puts "OPTS:"
|
|
40
|
+
p opts
|
|
41
|
+
puts "ARGV:"
|
|
42
|
+
p args
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if ['league', 'leagues', 'ls'].include?( args[0] || 'leagues' )
|
|
46
|
+
keys = LEAGUES.keys
|
|
47
|
+
|
|
48
|
+
pp keys
|
|
49
|
+
puts " #{keys.size} league(s)"
|
|
50
|
+
|
|
51
|
+
# puts
|
|
52
|
+
# puts " pages:"
|
|
53
|
+
# pp Worldfootball::PAGES
|
|
54
|
+
|
|
55
|
+
exit 0
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
league_code = args[0].downcase
|
|
61
|
+
|
|
62
|
+
league = find_league( league_code ) ## league info lookup
|
|
63
|
+
|
|
64
|
+
season = Season( args[1] || '2024/25' )
|
|
65
|
+
|
|
66
|
+
pages = league.pages( season: season )
|
|
67
|
+
|
|
68
|
+
pp pages
|
|
69
|
+
puts " #{pages.size} page(s)"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
## wrap single page record into array
|
|
73
|
+
pages = pages.is_a?( Array ) ? pages : [pages]
|
|
74
|
+
|
|
75
|
+
if opts[:cached]
|
|
76
|
+
# do nothing
|
|
77
|
+
else ## download to cache
|
|
78
|
+
pages.each_with_index do |page_rec,i|
|
|
79
|
+
slug = page_rec[:slug]
|
|
80
|
+
puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
|
|
81
|
+
page = Metal.download_schedule( slug )
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
pages.each_with_index do |page_rec,i|
|
|
87
|
+
slug = page_rec[:slug]
|
|
88
|
+
|
|
89
|
+
puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
|
|
90
|
+
page = Page::Schedule.from_cache( slug )
|
|
91
|
+
matches = page.matches
|
|
92
|
+
|
|
93
|
+
puts " #{matches.size} match(es)"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
end # def self.main
|
|
98
|
+
end # module Worldfootball
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
Worldfootball.main( ARGV )
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
puts "bye"
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
|
|
2
|
+
module Worldfootball
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
ROUND_TO_EN = {
|
|
6
|
+
'1. Runde' => 'Round 1',
|
|
7
|
+
'2. Runde' => 'Round 2',
|
|
8
|
+
'3. Runde' => 'Round 3',
|
|
9
|
+
'4. Runde' => 'Round 4',
|
|
10
|
+
'Achtelfinale' => 'Round of 16',
|
|
11
|
+
'Viertelfinale' => 'Quarterfinals',
|
|
12
|
+
'Halbfinale' => 'Semifinals',
|
|
13
|
+
'Finale' => 'Final',
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
|
|
18
|
+
|
|
19
|
+
## build "standard" match records from "raw" table rows
|
|
20
|
+
def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
|
|
21
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
|
22
|
+
|
|
23
|
+
raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
|
|
24
|
+
|
|
25
|
+
print " #{rows.size} rows - build #{league} #{season}"
|
|
26
|
+
print " - #{stage}" unless stage.empty?
|
|
27
|
+
print "\n"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
## note: use only first part from key for lookup
|
|
31
|
+
## e.g. at.1 => at
|
|
32
|
+
## eng.1 => eng
|
|
33
|
+
## and so on
|
|
34
|
+
mods = MODS[ league.split('.')[0] ] || {}
|
|
35
|
+
|
|
36
|
+
score_errors = SCORE_ERRORS[ league ] || {}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
i = 0
|
|
40
|
+
recs = []
|
|
41
|
+
rows.each do |row|
|
|
42
|
+
i += 1
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
if row[:round] =~ /Spieltag/
|
|
46
|
+
puts
|
|
47
|
+
print '[%03d] ' % (i+1)
|
|
48
|
+
print row[:round]
|
|
49
|
+
|
|
50
|
+
if m = row[:round].match( /([0-9]+)\. Spieltag/ )
|
|
51
|
+
## todo/check: always use a string even if number (as a string eg. '1' etc.)
|
|
52
|
+
round = m[1] ## note: keep as string (NOT number)
|
|
53
|
+
print " => #{round}"
|
|
54
|
+
else
|
|
55
|
+
puts "!! ERROR: cannot find matchday number"
|
|
56
|
+
exit 1
|
|
57
|
+
end
|
|
58
|
+
print "\n"
|
|
59
|
+
elsif row[:round] =~ /[1-9]\.[ ]Runde|
|
|
60
|
+
Achtelfinale|
|
|
61
|
+
Viertelfinale|
|
|
62
|
+
Halbfinale|
|
|
63
|
+
Finale
|
|
64
|
+
/x
|
|
65
|
+
puts
|
|
66
|
+
print '[%03d] ' % (i+1)
|
|
67
|
+
print row[:round]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
## do NOT translate rounds (to english) - keep in german / deutsch (de)
|
|
71
|
+
if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
|
|
72
|
+
'de.cup'].include?( league )
|
|
73
|
+
round = row[:round]
|
|
74
|
+
else
|
|
75
|
+
round = ROUND_TO_EN[ row[:round] ]
|
|
76
|
+
if round.nil?
|
|
77
|
+
puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
|
|
78
|
+
pp row
|
|
79
|
+
exit 1
|
|
80
|
+
end
|
|
81
|
+
print " => #{round}"
|
|
82
|
+
end
|
|
83
|
+
print "\n"
|
|
84
|
+
else
|
|
85
|
+
puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
|
|
86
|
+
pp row
|
|
87
|
+
exit 1
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
date_str = row[:date]
|
|
92
|
+
time_str = row[:time]
|
|
93
|
+
team1_str = row[:team1]
|
|
94
|
+
team2_str = row[:team2]
|
|
95
|
+
score_str = row[:score]
|
|
96
|
+
|
|
97
|
+
## convert date from string e.g. 2019-25-10
|
|
98
|
+
date = Date.strptime( date_str, '%Y-%m-%d' )
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
### check for score_error; first (step 1) lookup by date
|
|
102
|
+
score_error = score_errors[ date.strftime('%Y-%m-%d') ]
|
|
103
|
+
if score_error
|
|
104
|
+
if team1_str == score_error[0] &&
|
|
105
|
+
team2_str == score_error[1]
|
|
106
|
+
## check if team names match too; if yes, apply fix/patch!!
|
|
107
|
+
if score_str != score_error[2][0]
|
|
108
|
+
puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
|
|
109
|
+
pp row
|
|
110
|
+
end
|
|
111
|
+
puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
|
|
112
|
+
score_str = score_error[2][1]
|
|
113
|
+
end
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
print '[%03d] ' % (i+1)
|
|
118
|
+
print "%-10s | " % date_str
|
|
119
|
+
print "%-5s | " % time_str
|
|
120
|
+
print "%-22s | " % team1_str
|
|
121
|
+
print "%-22s | " % team2_str
|
|
122
|
+
print score_str
|
|
123
|
+
print "\n"
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
## check for 0:3 Wert. - change Wert. to awd. (awarded)
|
|
127
|
+
score_str = score_str.sub( /Wert\./i, 'awd.' )
|
|
128
|
+
|
|
129
|
+
## clean team name (e.g. remove (old))
|
|
130
|
+
## and asciify (e.g. ’ to ' )
|
|
131
|
+
team1_str = norm_team( team1_str )
|
|
132
|
+
team2_str = norm_team( team2_str )
|
|
133
|
+
|
|
134
|
+
team1_str = mods[ team1_str ] if mods[ team1_str ]
|
|
135
|
+
team2_str = mods[ team2_str ] if mods[ team2_str ]
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
ht, ft, et, pen, comments = parse_score( score_str )
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
recs << [stage,
|
|
145
|
+
round,
|
|
146
|
+
date.strftime( '%Y-%m-%d' ),
|
|
147
|
+
time_str,
|
|
148
|
+
team1_str,
|
|
149
|
+
ft,
|
|
150
|
+
ht,
|
|
151
|
+
team2_str,
|
|
152
|
+
et, # extra: incl. extra time
|
|
153
|
+
pen, # extra: incl. penalties
|
|
154
|
+
comments]
|
|
155
|
+
end # each row
|
|
156
|
+
recs
|
|
157
|
+
end # build
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def self.parse_score( score_str )
|
|
162
|
+
comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
|
|
163
|
+
|
|
164
|
+
## split score
|
|
165
|
+
ft = ''
|
|
166
|
+
ht = ''
|
|
167
|
+
et = ''
|
|
168
|
+
pen = ''
|
|
169
|
+
if score_str == '---' ## in the future (no score yet) - was -:-
|
|
170
|
+
ft = ''
|
|
171
|
+
ht = ''
|
|
172
|
+
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
|
173
|
+
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
|
174
|
+
score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
|
|
175
|
+
ft = '(*)'
|
|
176
|
+
ht = ''
|
|
177
|
+
comments = 'cancelled'
|
|
178
|
+
elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
|
|
179
|
+
ft = '(*)'
|
|
180
|
+
ht = ''
|
|
181
|
+
comments = 'abandoned'
|
|
182
|
+
elsif score_str == 'verl.' ## postponed
|
|
183
|
+
ft = ''
|
|
184
|
+
ht = ''
|
|
185
|
+
comments = 'postponed'
|
|
186
|
+
# 5-4 (0-0, 1-1, 2-2) i.E.
|
|
187
|
+
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
188
|
+
[ ]*
|
|
189
|
+
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
190
|
+
[ ]*,[ ]*
|
|
191
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
192
|
+
[ ]*,[ ]*
|
|
193
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)\)
|
|
194
|
+
[ ]*
|
|
195
|
+
i\.E\.
|
|
196
|
+
/x
|
|
197
|
+
pen = "#{$1}-#{$2}"
|
|
198
|
+
ht = "#{$3}-#{$4}"
|
|
199
|
+
ft = "#{$5}-#{$6}"
|
|
200
|
+
et = "#{$7}-#{$8}"
|
|
201
|
+
# 2-1 (1-0, 1-1) n.V
|
|
202
|
+
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
203
|
+
[ ]*
|
|
204
|
+
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
205
|
+
[ ]*,[ ]*
|
|
206
|
+
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
|
207
|
+
\)
|
|
208
|
+
[ ]*
|
|
209
|
+
n\.V\.
|
|
210
|
+
/x
|
|
211
|
+
et = "#{$1}-#{$2}"
|
|
212
|
+
ht = "#{$3}-#{$4}"
|
|
213
|
+
ft = "#{$5}-#{$6}"
|
|
214
|
+
elsif score_str =~ /([0-9]+)
|
|
215
|
+
[ ]*-[ ]*
|
|
216
|
+
([0-9]+)
|
|
217
|
+
[ ]*
|
|
218
|
+
\(([0-9]+)
|
|
219
|
+
[ ]*-[ ]*
|
|
220
|
+
([0-9]+)
|
|
221
|
+
\)
|
|
222
|
+
/x
|
|
223
|
+
ft = "#{$1}-#{$2}"
|
|
224
|
+
ht = "#{$3}-#{$4}"
|
|
225
|
+
elsif score_str =~ /([0-9]+)
|
|
226
|
+
[ ]*-[ ]*
|
|
227
|
+
([0-9]+)
|
|
228
|
+
[ ]*
|
|
229
|
+
([a-z.]+)
|
|
230
|
+
/x
|
|
231
|
+
ft = "#{$1}-#{$2} (*)"
|
|
232
|
+
ht = ''
|
|
233
|
+
comments = $3
|
|
234
|
+
elsif score_str =~ /^([0-9]+)-([0-9]+)$/
|
|
235
|
+
ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
|
|
236
|
+
ht = ''
|
|
237
|
+
else
|
|
238
|
+
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
|
239
|
+
exit 1
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
[ht, ft, et, pen, comments]
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
end # module Worldfootball
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
|
|
2
|
+
module Worldfootball
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
#################
|
|
6
|
+
# todo/fix - use timezone instead of offset !!!
|
|
7
|
+
# e.g
|
|
8
|
+
=begin
|
|
9
|
+
TIMEZONES = {
|
|
10
|
+
'eng.1' => 'Europe/London',
|
|
11
|
+
'eng.2' => 'Europe/London',
|
|
12
|
+
|
|
13
|
+
'es.1' => 'Europe/Madrid',
|
|
14
|
+
|
|
15
|
+
'de.1' => 'Europe/Berlin',
|
|
16
|
+
'fr.1' => 'Europe/Paris',
|
|
17
|
+
'it.1' => 'Europe/Rome',
|
|
18
|
+
'nl.1' => 'Europe/Amsterdam',
|
|
19
|
+
|
|
20
|
+
'pt.1' => 'Europe/Lisbon',
|
|
21
|
+
|
|
22
|
+
## todo/fix - pt.1
|
|
23
|
+
## one team in madeira!!! check for different timezone??
|
|
24
|
+
## CD Nacional da Madeira
|
|
25
|
+
|
|
26
|
+
'br.1' => 'America/Sao_Paulo',
|
|
27
|
+
## todo/fix - brazil has 4 timezones
|
|
28
|
+
## really only two in use for clubs
|
|
29
|
+
## west and east (amazonas et al)
|
|
30
|
+
## for now use west for all - why? why not?
|
|
31
|
+
}
|
|
32
|
+
=end
|
|
33
|
+
|
|
34
|
+
## todo - find "proper/classic" timezone ("winter time")
|
|
35
|
+
|
|
36
|
+
## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
|
|
37
|
+
## Ciudad de México, CDMX, México (GMT-5) -- summer time?
|
|
38
|
+
## Londres, Reino Unido (GMT+1)
|
|
39
|
+
## Madrid -- ?
|
|
40
|
+
## Lisboa -- ?
|
|
41
|
+
## Moskow -- ?
|
|
42
|
+
##
|
|
43
|
+
## todo/check - quick fix timezone offsets for leagues for now
|
|
44
|
+
## - find something better - why? why not?
|
|
45
|
+
## note: assume time is in GMT+1
|
|
46
|
+
OFFSETS = {
|
|
47
|
+
'eng.1' => -1,
|
|
48
|
+
'eng.2' => -1,
|
|
49
|
+
'eng.3' => -1,
|
|
50
|
+
'eng.4' => -1,
|
|
51
|
+
'eng.5' => -1,
|
|
52
|
+
|
|
53
|
+
'es.1' => -1,
|
|
54
|
+
'es.2' => -1,
|
|
55
|
+
|
|
56
|
+
'pt.1' => -1,
|
|
57
|
+
'pt.2' => -1,
|
|
58
|
+
|
|
59
|
+
'br.1' => -5,
|
|
60
|
+
'mx.1' => -7,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def self.convert( league:, season: )
|
|
65
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
|
66
|
+
|
|
67
|
+
league = find_league( league )
|
|
68
|
+
|
|
69
|
+
pages = league.pages( season: season )
|
|
70
|
+
|
|
71
|
+
## check: rename (optional) offset to time_offset or such?
|
|
72
|
+
offset = OFFSETS[ league ]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
# note: assume stages if pages is an array (of hash table/records)
|
|
76
|
+
# (and NOT a single hash table/record)
|
|
77
|
+
if pages.is_a?(Array)
|
|
78
|
+
recs = []
|
|
79
|
+
pages.each do |page_meta|
|
|
80
|
+
slug = page_meta[:slug]
|
|
81
|
+
stage_name = page_meta[:stage]
|
|
82
|
+
## todo/fix: report error/check if stage.name is nil!!!
|
|
83
|
+
|
|
84
|
+
print " parsing #{slug}..."
|
|
85
|
+
|
|
86
|
+
# unless File.exist?( path )
|
|
87
|
+
# puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
|
|
88
|
+
# next
|
|
89
|
+
# end
|
|
90
|
+
|
|
91
|
+
page = Page::Schedule.from_cache( slug )
|
|
92
|
+
print " title=>#{page.title}<..."
|
|
93
|
+
print "\n"
|
|
94
|
+
|
|
95
|
+
rows = page.matches
|
|
96
|
+
stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
|
|
97
|
+
|
|
98
|
+
pp stage_recs[0] ## check first record
|
|
99
|
+
recs += stage_recs
|
|
100
|
+
end
|
|
101
|
+
else
|
|
102
|
+
page_meta = pages
|
|
103
|
+
slug = page_meta[:slug]
|
|
104
|
+
|
|
105
|
+
print " parsing #{slug}..."
|
|
106
|
+
|
|
107
|
+
page = Page::Schedule.from_cache( slug )
|
|
108
|
+
print " title=>#{page.title}<..."
|
|
109
|
+
print "\n"
|
|
110
|
+
|
|
111
|
+
rows = page.matches
|
|
112
|
+
recs = build( rows, season: season, league: league.key )
|
|
113
|
+
|
|
114
|
+
pp recs[0] ## check first record
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
recs = recs.map { |rec| fix_date( rec, offset ) } if offset
|
|
118
|
+
|
|
119
|
+
## note: sort matches by date before saving/writing!!!!
|
|
120
|
+
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
|
121
|
+
## note: assume date is third column!!! (stage/round/date/...)
|
|
122
|
+
recs = recs.sort { |l,r| l[2] <=> r[2] }
|
|
123
|
+
## reformat date / beautify e.g. Sat Aug 7 1993
|
|
124
|
+
recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
|
|
125
|
+
|
|
126
|
+
## remove unused columns (e.g. stage, et, p, etc.)
|
|
127
|
+
recs, headers = vacuum( recs )
|
|
128
|
+
|
|
129
|
+
puts headers
|
|
130
|
+
pp recs[0] ## check first record
|
|
131
|
+
|
|
132
|
+
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
|
133
|
+
|
|
134
|
+
puts "write #{out_path}..."
|
|
135
|
+
write_csv( out_path, recs, headers: headers )
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
## helper to fix dates to use local timezone (and not utc/london time)
|
|
141
|
+
def self.fix_date( row, offset )
|
|
142
|
+
return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
|
|
143
|
+
|
|
144
|
+
col = row[2]
|
|
145
|
+
if col =~ /^\d{4}-\d{2}-\d{2}$/
|
|
146
|
+
date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
|
|
147
|
+
else
|
|
148
|
+
puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
|
149
|
+
## todo/fix: add to errors/warns list - why? why not?
|
|
150
|
+
exit 1
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
|
|
154
|
+
## NOTE - MUST be -7/24.0!!!! or such to work
|
|
155
|
+
date = date + (offset/24.0)
|
|
156
|
+
|
|
157
|
+
row[2] = date.strftime( date_fmt ) ## overwrite "old"
|
|
158
|
+
row[3] = date.strftime( '%H:%M' )
|
|
159
|
+
row ## return row for possible pipelining - why? why not?
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
end # module Worldfootball
|