worldfootball 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +6 -0
- data/Manifest.txt +29 -0
- data/README.md +27 -0
- data/Rakefile +34 -0
- data/bin/wfb +104 -0
- data/lib/worldfootball/build.rb +245 -0
- data/lib/worldfootball/convert.rb +162 -0
- data/lib/worldfootball/convert_reports.rb +107 -0
- data/lib/worldfootball/download.rb +131 -0
- data/lib/worldfootball/generator.rb +33 -0
- data/lib/worldfootball/leagues/asia.rb +53 -0
- data/lib/worldfootball/leagues/europe--british_isles.rb +64 -0
- data/lib/worldfootball/leagues/europe--central.rb +127 -0
- data/lib/worldfootball/leagues/europe--eastern.rb +82 -0
- data/lib/worldfootball/leagues/europe--northern.rb +57 -0
- data/lib/worldfootball/leagues/europe--southern.rb +86 -0
- data/lib/worldfootball/leagues/europe--western.rb +38 -0
- data/lib/worldfootball/leagues/europe.rb +13 -0
- data/lib/worldfootball/leagues/north_america.rb +44 -0
- data/lib/worldfootball/leagues/pacific.rb +21 -0
- data/lib/worldfootball/leagues/south_america.rb +11 -0
- data/lib/worldfootball/leagues.rb +200 -0
- data/lib/worldfootball/mods.rb +72 -0
- data/lib/worldfootball/page.rb +106 -0
- data/lib/worldfootball/page_report.rb +186 -0
- data/lib/worldfootball/page_schedule.rb +292 -0
- data/lib/worldfootball/vacuum.rb +66 -0
- data/lib/worldfootball/version.rb +20 -0
- data/lib/worldfootball.rb +66 -0
- metadata +169 -0
@@ -0,0 +1,86 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
|
4
|
+
LEAGUES_EUROPE.merge!({
|
5
|
+
|
6
|
+
# /ita-serie-a-2019-2020/
|
7
|
+
# /ita-serie-b-2020-2021/
|
8
|
+
'it.1' => { pages: 'ita-serie-a' },
|
9
|
+
'it.2' => { pages: 'ita-serie-b' },
|
10
|
+
|
11
|
+
# /por-primeira-liga-2019-2020/
|
12
|
+
# por-primeira-liga-2020-2021
|
13
|
+
# por-primeira-liga-2019-2020
|
14
|
+
# por-primeira-liga-2018-2019
|
15
|
+
# por-primeira-liga-2017-2018
|
16
|
+
# por-primeira-liga-2016-2017
|
17
|
+
# por-primeira-liga-2015-2016
|
18
|
+
# por-primeira-liga-2014-2015
|
19
|
+
# por-primeira-liga-2013-2014
|
20
|
+
# por-liga-zon-sagres-2012-2013
|
21
|
+
# por-liga-zon-sagres-2011-2012
|
22
|
+
# por-liga-sagres-2010-2011
|
23
|
+
# ...
|
24
|
+
# /por-segunda-liga-2019-2020/
|
25
|
+
# note: Sponsorship names for seasons
|
26
|
+
# 2002–2005: SuperLiga GalpEnergia
|
27
|
+
# 2005–2006: Liga betandwin.com
|
28
|
+
# 2006–2008: BWINLIGA
|
29
|
+
# 2008–2010: Liga Sagres
|
30
|
+
# 2010–2014: Liga ZON Sagres
|
31
|
+
# 2014–2020: Liga NOS
|
32
|
+
'pt.1' => {
|
33
|
+
pages: ['por-primeira-liga',
|
34
|
+
'por-liga-zon-sagres',
|
35
|
+
'por-liga-sagres'
|
36
|
+
],
|
37
|
+
season: ->( season ) {
|
38
|
+
case season
|
39
|
+
when Season('2013/14')..Season('2020/21') then 1
|
40
|
+
when Season('2011/12')..Season('2012/13') then 2
|
41
|
+
when Season('2010/11') then 3
|
42
|
+
end
|
43
|
+
},
|
44
|
+
},
|
45
|
+
'pt.2' => { pages: 'por-segunda-liga' },
|
46
|
+
|
47
|
+
# /esp-primera-division-2019-2020/
|
48
|
+
'es.1' => { pages: 'esp-primera-division' },
|
49
|
+
'es.2' => { pages: 'esp-segunda-division' },
|
50
|
+
|
51
|
+
# /tur-sueperlig-2020-2021/
|
52
|
+
'tr.1' => { pages: 'tur-sueperlig' },
|
53
|
+
'tr.2' => { pages: 'tur-1-lig' },
|
54
|
+
|
55
|
+
|
56
|
+
# /gre-super-league-2020-2021/
|
57
|
+
'gr.1' => {
|
58
|
+
pages: {
|
59
|
+
## note: change from superleague to super-league !!!
|
60
|
+
'gre-super-league' => 'Regular Season',
|
61
|
+
'gre-super-league-{season}-meisterschaft' => 'Playoffs - Championship',
|
62
|
+
'gre-super-league-{season}-abstieg' => 'Playoffs - Relegation',
|
63
|
+
'gre-superleague' => 'Regular Season',
|
64
|
+
'gre-superleague-{end_year}-playoffs' => 'Playoffs',
|
65
|
+
'gre-superleague-{end_year}-spiel-um-platz-6' => 'Match 6th Place',
|
66
|
+
},
|
67
|
+
season: ->( season ) {
|
68
|
+
case season
|
69
|
+
when Season('2020/21') then [1] ## just getting started
|
70
|
+
when Season('2019/20') then [4,2,3]
|
71
|
+
when Season('2017/18')..Season('2018/19') then 4
|
72
|
+
when Season('2013/14')..Season('2016/17') then [4,5]
|
73
|
+
when Season('2012/13') then [4,5,6]
|
74
|
+
when Season('2010/11')..Season('2011/12') then [4,5]
|
75
|
+
end
|
76
|
+
},
|
77
|
+
},
|
78
|
+
|
79
|
+
|
80
|
+
## todo/check: add europe southeastern or balkans - why? why not?
|
81
|
+
# e.g. /cro-1-hnl-2020-2021/
|
82
|
+
'hr.1' => { pages: 'cro-1-hnl' },
|
83
|
+
|
84
|
+
})
|
85
|
+
|
86
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
|
4
|
+
LEAGUES_EUROPE.merge!({
|
5
|
+
'fr.1' => { pages: 'fra-ligue-1' },
|
6
|
+
'fr.2' => { pages: 'fra-ligue-2' },
|
7
|
+
|
8
|
+
# e.g. /lux-nationaldivision-2020-2021/
|
9
|
+
'lu.1' => { pages: 'lux-nationaldivision' },
|
10
|
+
|
11
|
+
# e.g. /ned-eredivisie-2020-2021/
|
12
|
+
'nl.1' => { pages: 'ned-eredivisie' },
|
13
|
+
# Championship play-offs
|
14
|
+
# Europa League play-offs (Group A + Group B / Finals )
|
15
|
+
|
16
|
+
# e.g. /bel-eerste-klasse-a-2020-2021/
|
17
|
+
# /bel-europa-league-playoffs-2018-2019-playoff/
|
18
|
+
# - Halbfinale
|
19
|
+
# - Finale
|
20
|
+
'be.1' => {
|
21
|
+
pages: {
|
22
|
+
'bel-eerste-klasse-a-{season}' => 'Regular Season',
|
23
|
+
'bel-eerste-klasse-a-{season}-playoff-i' => 'Playoffs - Championship',
|
24
|
+
'bel-europa-league-playoffs-{season}' => 'Playoffs - Europa League', ## note: missing groups (A & B)
|
25
|
+
'bel-europa-league-playoffs-{season}-playoff' => 'Playoffs - Europa League - Finals',
|
26
|
+
},
|
27
|
+
season: ->( season ) {
|
28
|
+
case season
|
29
|
+
when Season('2020/21') then [1] # just getting started
|
30
|
+
when Season('2019/20') then [1] # covid-19 - no championship & europa
|
31
|
+
when Season('2018/19') then [1,2,3,4]
|
32
|
+
end
|
33
|
+
}
|
34
|
+
},
|
35
|
+
|
36
|
+
})
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
LEAGUES_EUROPE = {}
|
4
|
+
end
|
5
|
+
|
6
|
+
|
7
|
+
require_relative 'europe--western'
|
8
|
+
require_relative 'europe--british_isles'
|
9
|
+
require_relative 'europe--northern'
|
10
|
+
require_relative 'europe--central'
|
11
|
+
require_relative 'europe--eastern'
|
12
|
+
require_relative 'europe--southern'
|
13
|
+
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Worldfootball
|
2
|
+
|
3
|
+
LEAGUES_NORTH_AMERICA = {
|
4
|
+
|
5
|
+
# todo/fix: adjust date/time by -6 or 7 hours!!!
|
6
|
+
# /can-canadian-championship-2020/
|
7
|
+
# - Qual. 1. Runde
|
8
|
+
# - Qual. 2. Runde
|
9
|
+
# - Qual. 3. Runde
|
10
|
+
# todo/fix: check for leagues - premier league? championship? soccer league?
|
11
|
+
# 'ca.1' => { slug: 'can-canadian-championship' },
|
12
|
+
|
13
|
+
|
14
|
+
|
15
|
+
# todo/fix: adjust date/time by -7 hours!!!
|
16
|
+
## e.g. 25.07.2020 02:30 => 24.07.2020 19.30
|
17
|
+
# 11.01.2020 04:00 => 10.01.2020 21.00
|
18
|
+
#
|
19
|
+
# e.g. /mex-primera-division-2020-2021-apertura/
|
20
|
+
# /mex-primera-division-2019-2020-clausura/
|
21
|
+
# /mex-primera-division-2019-2020-apertura-playoffs/
|
22
|
+
# - Viertelfinale
|
23
|
+
# - Halbfinale
|
24
|
+
# - Finale
|
25
|
+
# /mex-primera-division-2018-2019-clausura-playoffs/
|
26
|
+
'mx.1' => {
|
27
|
+
pages: {
|
28
|
+
'mex-primera-division-{season}-apertura' => 'Apertura', # 1
|
29
|
+
'mex-primera-division-{season}-apertura-playoffs' => 'Apertura - Liguilla', # 2
|
30
|
+
'mex-primera-division-{season}-clausura' => 'Clausura', # 3
|
31
|
+
'mex-primera-division-{season}-clausura-playoffs' => 'Clausura - Liguilla', # 4
|
32
|
+
},
|
33
|
+
season: ->( season ) {
|
34
|
+
case season
|
35
|
+
when Season('2020/21') then [1] # just getting started
|
36
|
+
when Season('2019/20') then [1,2,3] # covid-19 - no liguilla
|
37
|
+
when Season('2010/11')..Season('2018/19') then [1,2,3,4]
|
38
|
+
end
|
39
|
+
}
|
40
|
+
},
|
41
|
+
}
|
42
|
+
|
43
|
+
end # module Worldfootball
|
44
|
+
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Worldfootball
|
2
|
+
|
3
|
+
LEAGUES_PACIFIC = {
|
4
|
+
|
5
|
+
# /nzl-nz-football-championship-2019-2020/
|
6
|
+
# /nzl-nz-football-championship-2018-2019-playoffs/
|
7
|
+
'nz.1' => {
|
8
|
+
pages: {
|
9
|
+
'nzl-nz-football-championship-{season}' => 'Regular Season', # 1
|
10
|
+
'nzl-nz-football-championship-{season}-playoffs' => 'Playoff Finals', # 2
|
11
|
+
},
|
12
|
+
season: ->( season ) {
|
13
|
+
case season
|
14
|
+
when Season('2019/20') then [1] ## covid-19 - no playoffs/finals
|
15
|
+
when Season('2018/19') then [1,2]
|
16
|
+
end
|
17
|
+
}
|
18
|
+
},
|
19
|
+
}
|
20
|
+
|
21
|
+
end # module Worldfootball
|
@@ -0,0 +1,200 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
require_relative 'leagues/europe'
|
4
|
+
require_relative 'leagues/north_america'
|
5
|
+
require_relative 'leagues/south_america'
|
6
|
+
require_relative 'leagues/pacific'
|
7
|
+
require_relative 'leagues/asia'
|
8
|
+
|
9
|
+
|
10
|
+
module Worldfootball
|
11
|
+
|
12
|
+
LEAGUES = [LEAGUES_EUROPE,
|
13
|
+
LEAGUES_NORTH_AMERICA,
|
14
|
+
LEAGUES_SOUTH_AMERICA,
|
15
|
+
LEAGUES_PACIFIC,
|
16
|
+
LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
|
17
|
+
|
18
|
+
|
19
|
+
class League
|
20
|
+
def initialize( key, data )
|
21
|
+
@key = key
|
22
|
+
## @data = data
|
23
|
+
|
24
|
+
@pages = data[:pages]
|
25
|
+
@season_proc = data[:season] || ->(season) { nil }
|
26
|
+
end
|
27
|
+
|
28
|
+
def key() @key; end
|
29
|
+
|
30
|
+
def pages( season: )
|
31
|
+
## note: return for no stages / simple case - just a string
|
32
|
+
## and for the stages case ALWAYS an array (even if it has only one page (with stage))
|
33
|
+
|
34
|
+
if @pages.is_a?( String )
|
35
|
+
# assume always "simple/regular" format w/o stages
|
36
|
+
slug = @pages
|
37
|
+
{ slug: fill_slug( slug, season: season ) }
|
38
|
+
else
|
39
|
+
## check for league format / stages
|
40
|
+
## return array (of strings) or nil (for no stages - "simple" format)
|
41
|
+
indices = @season_proc.call( season )
|
42
|
+
if indices.nil?
|
43
|
+
puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
|
44
|
+
exit 1
|
45
|
+
elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
|
46
|
+
# note: starting with 0 (always use idx-1) !!!
|
47
|
+
slug = if @pages.is_a?( Array )
|
48
|
+
@pages[indices-1]
|
49
|
+
else ## assume hash (and key is page slug)
|
50
|
+
@pages.keys[indices-1]
|
51
|
+
end
|
52
|
+
{ slug: fill_slug( slug, season: season ) }
|
53
|
+
else ## assume regular case - array of integers
|
54
|
+
recs = []
|
55
|
+
indices.each do |idx|
|
56
|
+
slug = key = @pages.keys[idx-1]
|
57
|
+
recs << { slug: fill_slug( slug, season: season ),
|
58
|
+
stage: @pages[key] } ## note: include mapping for page to stage name!!
|
59
|
+
end
|
60
|
+
recs
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end # pages
|
64
|
+
|
65
|
+
|
66
|
+
######
|
67
|
+
# helper method
|
68
|
+
def fill_slug( slug, season: )
|
69
|
+
## note: fill-in/check for place holders too
|
70
|
+
slug = if slug.index( '{season}' )
|
71
|
+
slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
|
72
|
+
elsif slug.index( '{end_year}' )
|
73
|
+
slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
|
74
|
+
else
|
75
|
+
## assume convenience fallback - append regular season
|
76
|
+
"#{slug}-#{season.to_path( :long )}"
|
77
|
+
end
|
78
|
+
|
79
|
+
puts " slug=>#{slug}<"
|
80
|
+
|
81
|
+
slug
|
82
|
+
end
|
83
|
+
end # class League
|
84
|
+
|
85
|
+
|
86
|
+
|
87
|
+
def self.find_league( key ) ## league info lookup
|
88
|
+
data = LEAGUES[ key ]
|
89
|
+
if data.nil?
|
90
|
+
puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
|
91
|
+
exit 1
|
92
|
+
end
|
93
|
+
League.new( key, data ) ## use a convenience wrapper for now
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
|
98
|
+
### "reverse" lookup by page - returns league AND season
|
99
|
+
## note: "blind" season template para - might be season or start_year etc.
|
100
|
+
## e.g. {season} or {start_year} becomes {}
|
101
|
+
|
102
|
+
PAGE_VAR_RE = /{
|
103
|
+
[^}]+
|
104
|
+
}/x
|
105
|
+
|
106
|
+
|
107
|
+
def self.norm_slug( slug )
|
108
|
+
## assume convenience fallback - append regular season
|
109
|
+
slug.index( '{' ) ? slug : "#{slug}-{season}"
|
110
|
+
end
|
111
|
+
|
112
|
+
PAGES ||=
|
113
|
+
LEAGUES.reduce( {} ) do |pages, (key, data)|
|
114
|
+
if data[:pages].is_a?( String )
|
115
|
+
slug = data[:pages]
|
116
|
+
slug = Worldfootball.norm_slug( slug )
|
117
|
+
pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
|
118
|
+
elsif data[:pages].is_a?( Array )
|
119
|
+
data[:pages].each do |slug|
|
120
|
+
slug = Worldfootball.norm_slug( slug )
|
121
|
+
pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
|
122
|
+
end
|
123
|
+
## elsif data[:pages].nil?
|
124
|
+
## todo/fix: missing pages!!!
|
125
|
+
else ## assume hash
|
126
|
+
## add stage to pages too - why? why not?
|
127
|
+
data[:pages].each do |slug, stage|
|
128
|
+
slug = Worldfootball.norm_slug( slug )
|
129
|
+
pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
|
130
|
+
end
|
131
|
+
end
|
132
|
+
pages
|
133
|
+
end
|
134
|
+
|
135
|
+
# e.g. 2000 or 2000-2001
|
136
|
+
SEASON_RE = /[0-9]{4}
|
137
|
+
(?:
|
138
|
+
-[0-9]{4}
|
139
|
+
)?
|
140
|
+
/x
|
141
|
+
|
142
|
+
|
143
|
+
def self.find_page!( slug )
|
144
|
+
page = find_page( slug )
|
145
|
+
if page.nil?
|
146
|
+
puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
|
147
|
+
|
148
|
+
season_str = nil
|
149
|
+
norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
|
150
|
+
season_str = match ## keep reference to season str
|
151
|
+
'{}' ## replace with {}
|
152
|
+
end
|
153
|
+
|
154
|
+
puts " season: >#{season_str}<"
|
155
|
+
puts " slug (norm): >#{norm}<"
|
156
|
+
puts
|
157
|
+
## pp PAGES
|
158
|
+
exit 1
|
159
|
+
end
|
160
|
+
page
|
161
|
+
end
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
def self.find_page( slug )
|
166
|
+
## return league key and season
|
167
|
+
season_str = nil
|
168
|
+
norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
|
169
|
+
season_str = match ## keep reference to season str
|
170
|
+
'{}' ## replace with {}
|
171
|
+
end
|
172
|
+
|
173
|
+
if season_str.nil?
|
174
|
+
puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
|
178
|
+
rec = PAGES[ norm ]
|
179
|
+
return nil if rec.nil?
|
180
|
+
|
181
|
+
|
182
|
+
league_key = rec[:league]
|
183
|
+
slug_tmpl = rec[:slug]
|
184
|
+
season = if slug_tmpl.index( '{start_year}' )
|
185
|
+
## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
|
186
|
+
Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
|
187
|
+
elsif slug_tmpl.index( '{end_year}' )
|
188
|
+
## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
|
189
|
+
Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
|
190
|
+
else ## assume "regular" seasson - pass through as is
|
191
|
+
Season( season_str )
|
192
|
+
end
|
193
|
+
|
194
|
+
## return hash table / record
|
195
|
+
{ league: league_key,
|
196
|
+
season: season.key }
|
197
|
+
end
|
198
|
+
|
199
|
+
|
200
|
+
end # module Worldfootball
|
@@ -0,0 +1,72 @@
|
|
1
|
+
#### todo/check: move MODS and SCORE_ERRORS out-of-lib
|
2
|
+
## and into config or such - why? why not?
|
3
|
+
|
4
|
+
|
5
|
+
module Worldfootball
|
6
|
+
|
7
|
+
|
8
|
+
######
|
9
|
+
# "global" helpers
|
10
|
+
def self.norm_team( team )
|
11
|
+
## clean team name and asciify (e.g. ’->' )
|
12
|
+
team = team.sub( '(old)', '' ).strip
|
13
|
+
team = team.gsub( '’', "'" ) ## e.g. Hawke’s Bay United FC
|
14
|
+
team
|
15
|
+
end
|
16
|
+
|
17
|
+
|
18
|
+
|
19
|
+
MODS = {
|
20
|
+
'at' => {
|
21
|
+
## AT 1
|
22
|
+
'SC Magna Wiener Neustadt' => 'SC Wiener Neustadt', # in 2010/11
|
23
|
+
'KSV Superfund' => 'Kapfenberger SV', # in 2010/11
|
24
|
+
'Kapfenberger SV 1919' => 'Kapfenberger SV', # in 2011/12
|
25
|
+
'FC Trenkwalder Admira' => 'FC Admira Wacker', # in 2011/12
|
26
|
+
## AT 2
|
27
|
+
'Austria Wien (A)' => 'Young Violets', # in 2019/20
|
28
|
+
'FC Wacker Innsbruck (A)' => 'FC Wacker Innsbruck II', # in 2018/19
|
29
|
+
## AT CUP
|
30
|
+
'Rapid Wien (A)' => 'Rapid Wien II', # in 2011/12
|
31
|
+
'Sturm Graz (A)' => 'Sturm Graz II',
|
32
|
+
'Kapfenberger SV 1919 (A)' => 'Kapfenberger SV II',
|
33
|
+
'SV Grödig (A)' => 'SV Grödig II',
|
34
|
+
'RB Salzburg (A)' => 'RB Salzburg II',
|
35
|
+
'SR WGFM Donaufeld' => 'SR Donaufeld Wien',
|
36
|
+
'FC Trenkwalder Admira (A)' => 'FC Admira Wacker II',
|
37
|
+
## AT 3.O (Regionalliga Ost)
|
38
|
+
'FC Admira Wacker (A)' => 'FC Admira Wacker II', # in 2020/21
|
39
|
+
},
|
40
|
+
'nz' => {
|
41
|
+
## NZ 1
|
42
|
+
'Wellington Phoenix (R)' => 'Wellington Phoenix Reserves',
|
43
|
+
},
|
44
|
+
}
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
## fix/patch known score format errors in at/de cups
|
49
|
+
## new convention
|
50
|
+
## for a fix require league, date, and team1 & team2 for now!!!!
|
51
|
+
## - do NOT use some "generic" fix / patch!!!!
|
52
|
+
##
|
53
|
+
## old de/at patches/fixes:
|
54
|
+
## '0-1 (0-0, 0-0, 0-0) n.V.' => '0-1 (0-0, 0-0) n.V.', # too long
|
55
|
+
## '2-1 (1-1, 1-1, 1-0) n.V.' => '2-1 (1-1, 1-1) n.V.',
|
56
|
+
## '4-2 (0-0, 0-0) i.E.' => '4-2 (0-0, 0-0, 0-0) i.E.', # too short
|
57
|
+
|
58
|
+
|
59
|
+
SCORE_ERRORS = {
|
60
|
+
'ro.1' => {
|
61
|
+
## 2013/14
|
62
|
+
'2013-07-29' => [ 'FC Brașov', 'Săgeata Năvodari', ['1-1 (0-0, 0-1)', '1-1 (0-0)']],
|
63
|
+
},
|
64
|
+
'gr.1' => {
|
65
|
+
## 2010/11
|
66
|
+
'2010-11-24' => [ 'Ergotelis', 'Olympiakos Piräus', ['0-2 (0-0, 0-0, 0-0)', '0-2 (0-0)']],
|
67
|
+
'2010-11-28' => [ 'Panserraikos', 'Aris Saloniki', ['1-0 (1-0, 0-0, 0-0)', '1-0 (1-0)']],
|
68
|
+
}
|
69
|
+
}
|
70
|
+
|
71
|
+
|
72
|
+
end # module Worldfootball
|
@@ -0,0 +1,106 @@
|
|
1
|
+
|
2
|
+
module Worldfootball
|
3
|
+
class Page
|
4
|
+
|
5
|
+
def self.from_file( path )
|
6
|
+
html = File.open( path, 'r:utf-8' ) {|f| f.read }
|
7
|
+
new( html )
|
8
|
+
end
|
9
|
+
|
10
|
+
def initialize( html )
|
11
|
+
@html = html
|
12
|
+
end
|
13
|
+
|
14
|
+
def doc
|
15
|
+
## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
|
16
|
+
@doc ||= Nokogiri::HTML( @html )
|
17
|
+
end
|
18
|
+
|
19
|
+
def title
|
20
|
+
# <title>Bundesliga 2010/2011 » Spielplan</title>
|
21
|
+
@title ||= doc.css( 'title' ).first
|
22
|
+
@title.text ## get element's text content
|
23
|
+
end
|
24
|
+
|
25
|
+
def keywords
|
26
|
+
# <meta name="keywords"
|
27
|
+
# content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
|
28
|
+
@keywords ||= doc.css( 'meta[name="keywords"]' ).first
|
29
|
+
@keywords[:content] ## get content attribute
|
30
|
+
## or doc.xpath( '//meta[@name="keywords"]' ).first
|
31
|
+
## pp keywords
|
32
|
+
# puts " #{keywords[:content]}"
|
33
|
+
|
34
|
+
# keywords = doc.at( 'meta[@name="Keywords"]' )
|
35
|
+
# pp keywords
|
36
|
+
## check for
|
37
|
+
end
|
38
|
+
|
39
|
+
# <meta property="og:url"
|
40
|
+
# content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
|
41
|
+
def url
|
42
|
+
@url ||= doc.css( 'meta[property="og:url"]' ).first
|
43
|
+
@url[:content]
|
44
|
+
end
|
45
|
+
|
46
|
+
|
47
|
+
|
48
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
49
|
+
## <!-- [generated 2020-06-30 22:30:19] -->
|
50
|
+
GENERATED_RE = %r{
|
51
|
+
<!--
|
52
|
+
[ ]+
|
53
|
+
\[generated
|
54
|
+
[ ]+
|
55
|
+
(?<date>\d+-\d+-\d+)
|
56
|
+
[ ]+
|
57
|
+
(?<time>\d+:\d+:\d+)
|
58
|
+
\]
|
59
|
+
[ ]+
|
60
|
+
-->
|
61
|
+
}x
|
62
|
+
|
63
|
+
|
64
|
+
def generated
|
65
|
+
@generated ||= begin
|
66
|
+
m=GENERATED_RE.match( @html )
|
67
|
+
if m
|
68
|
+
DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
|
69
|
+
else
|
70
|
+
puts "!! WARN - no generated timestamp found in page"
|
71
|
+
nil
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
### convenience helper / formatter
|
77
|
+
def generated_in_days_ago
|
78
|
+
if generated
|
79
|
+
diff_in_days = Date.today.jd - generated.jd
|
80
|
+
"#{diff_in_days}d"
|
81
|
+
else
|
82
|
+
'?'
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
######################
|
87
|
+
## helper methods
|
88
|
+
|
89
|
+
def squish( str )
|
90
|
+
str = str.strip
|
91
|
+
str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
|
92
|
+
str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
|
93
|
+
str
|
94
|
+
end
|
95
|
+
|
96
|
+
def assert( cond, msg )
|
97
|
+
if cond
|
98
|
+
# do nothing
|
99
|
+
else
|
100
|
+
puts "!!! assert failed (in parse page) - #{msg}"
|
101
|
+
exit 1
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
end # class Page
|
106
|
+
end # module Worldfootball
|