worldfootball 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,86 @@
1
+
2
+ module Worldfootball
3
+
4
+ LEAGUES_EUROPE.merge!({
5
+
6
+ # /ita-serie-a-2019-2020/
7
+ # /ita-serie-b-2020-2021/
8
+ 'it.1' => { pages: 'ita-serie-a' },
9
+ 'it.2' => { pages: 'ita-serie-b' },
10
+
11
+ # /por-primeira-liga-2019-2020/
12
+ # por-primeira-liga-2020-2021
13
+ # por-primeira-liga-2019-2020
14
+ # por-primeira-liga-2018-2019
15
+ # por-primeira-liga-2017-2018
16
+ # por-primeira-liga-2016-2017
17
+ # por-primeira-liga-2015-2016
18
+ # por-primeira-liga-2014-2015
19
+ # por-primeira-liga-2013-2014
20
+ # por-liga-zon-sagres-2012-2013
21
+ # por-liga-zon-sagres-2011-2012
22
+ # por-liga-sagres-2010-2011
23
+ # ...
24
+ # /por-segunda-liga-2019-2020/
25
+ # note: Sponsorship names for seasons
26
+ # 2002–2005: SuperLiga GalpEnergia
27
+ # 2005–2006: Liga betandwin.com
28
+ # 2006–2008: BWINLIGA
29
+ # 2008–2010: Liga Sagres
30
+ # 2010–2014: Liga ZON Sagres
31
+ # 2014–2020: Liga NOS
32
+ 'pt.1' => {
33
+ pages: ['por-primeira-liga',
34
+ 'por-liga-zon-sagres',
35
+ 'por-liga-sagres'
36
+ ],
37
+ season: ->( season ) {
38
+ case season
39
+ when Season('2013/14')..Season('2020/21') then 1
40
+ when Season('2011/12')..Season('2012/13') then 2
41
+ when Season('2010/11') then 3
42
+ end
43
+ },
44
+ },
45
+ 'pt.2' => { pages: 'por-segunda-liga' },
46
+
47
+ # /esp-primera-division-2019-2020/
48
+ 'es.1' => { pages: 'esp-primera-division' },
49
+ 'es.2' => { pages: 'esp-segunda-division' },
50
+
51
+ # /tur-sueperlig-2020-2021/
52
+ 'tr.1' => { pages: 'tur-sueperlig' },
53
+ 'tr.2' => { pages: 'tur-1-lig' },
54
+
55
+
56
+ # /gre-super-league-2020-2021/
57
+ 'gr.1' => {
58
+ pages: {
59
+ ## note: change from superleague to super-league !!!
60
+ 'gre-super-league' => 'Regular Season',
61
+ 'gre-super-league-{season}-meisterschaft' => 'Playoffs - Championship',
62
+ 'gre-super-league-{season}-abstieg' => 'Playoffs - Relegation',
63
+ 'gre-superleague' => 'Regular Season',
64
+ 'gre-superleague-{end_year}-playoffs' => 'Playoffs',
65
+ 'gre-superleague-{end_year}-spiel-um-platz-6' => 'Match 6th Place',
66
+ },
67
+ season: ->( season ) {
68
+ case season
69
+ when Season('2020/21') then [1] ## just getting started
70
+ when Season('2019/20') then [4,2,3]
71
+ when Season('2017/18')..Season('2018/19') then 4
72
+ when Season('2013/14')..Season('2016/17') then [4,5]
73
+ when Season('2012/13') then [4,5,6]
74
+ when Season('2010/11')..Season('2011/12') then [4,5]
75
+ end
76
+ },
77
+ },
78
+
79
+
80
+ ## todo/check: add europe southeastern or balkans - why? why not?
81
+ # e.g. /cro-1-hnl-2020-2021/
82
+ 'hr.1' => { pages: 'cro-1-hnl' },
83
+
84
+ })
85
+
86
+ end
@@ -0,0 +1,38 @@
1
+
2
+ module Worldfootball
3
+
4
+ LEAGUES_EUROPE.merge!({
5
+ 'fr.1' => { pages: 'fra-ligue-1' },
6
+ 'fr.2' => { pages: 'fra-ligue-2' },
7
+
8
+ # e.g. /lux-nationaldivision-2020-2021/
9
+ 'lu.1' => { pages: 'lux-nationaldivision' },
10
+
11
+ # e.g. /ned-eredivisie-2020-2021/
12
+ 'nl.1' => { pages: 'ned-eredivisie' },
13
+ # Championship play-offs
14
+ # Europa League play-offs (Group A + Group B / Finals )
15
+
16
+ # e.g. /bel-eerste-klasse-a-2020-2021/
17
+ # /bel-europa-league-playoffs-2018-2019-playoff/
18
+ # - Halbfinale
19
+ # - Finale
20
+ 'be.1' => {
21
+ pages: {
22
+ 'bel-eerste-klasse-a-{season}' => 'Regular Season',
23
+ 'bel-eerste-klasse-a-{season}-playoff-i' => 'Playoffs - Championship',
24
+ 'bel-europa-league-playoffs-{season}' => 'Playoffs - Europa League', ## note: missing groups (A & B)
25
+ 'bel-europa-league-playoffs-{season}-playoff' => 'Playoffs - Europa League - Finals',
26
+ },
27
+ season: ->( season ) {
28
+ case season
29
+ when Season('2020/21') then [1] # just getting started
30
+ when Season('2019/20') then [1] # covid-19 - no championship & europa
31
+ when Season('2018/19') then [1,2,3,4]
32
+ end
33
+ }
34
+ },
35
+
36
+ })
37
+
38
+ end
@@ -0,0 +1,13 @@
1
+
2
+ module Worldfootball
3
+ LEAGUES_EUROPE = {}
4
+ end
5
+
6
+
7
+ require_relative 'europe--western'
8
+ require_relative 'europe--british_isles'
9
+ require_relative 'europe--northern'
10
+ require_relative 'europe--central'
11
+ require_relative 'europe--eastern'
12
+ require_relative 'europe--southern'
13
+
@@ -0,0 +1,44 @@
1
+ module Worldfootball
2
+
3
+ LEAGUES_NORTH_AMERICA = {
4
+
5
+ # todo/fix: adjust date/time by -6 or 7 hours!!!
6
+ # /can-canadian-championship-2020/
7
+ # - Qual. 1. Runde
8
+ # - Qual. 2. Runde
9
+ # - Qual. 3. Runde
10
+ # todo/fix: check for leagues - premier league? championship? soccer league?
11
+ # 'ca.1' => { slug: 'can-canadian-championship' },
12
+
13
+
14
+
15
+ # todo/fix: adjust date/time by -7 hours!!!
16
+ ## e.g. 25.07.2020 02:30 => 24.07.2020 19.30
17
+ # 11.01.2020 04:00 => 10.01.2020 21.00
18
+ #
19
+ # e.g. /mex-primera-division-2020-2021-apertura/
20
+ # /mex-primera-division-2019-2020-clausura/
21
+ # /mex-primera-division-2019-2020-apertura-playoffs/
22
+ # - Viertelfinale
23
+ # - Halbfinale
24
+ # - Finale
25
+ # /mex-primera-division-2018-2019-clausura-playoffs/
26
+ 'mx.1' => {
27
+ pages: {
28
+ 'mex-primera-division-{season}-apertura' => 'Apertura', # 1
29
+ 'mex-primera-division-{season}-apertura-playoffs' => 'Apertura - Liguilla', # 2
30
+ 'mex-primera-division-{season}-clausura' => 'Clausura', # 3
31
+ 'mex-primera-division-{season}-clausura-playoffs' => 'Clausura - Liguilla', # 4
32
+ },
33
+ season: ->( season ) {
34
+ case season
35
+ when Season('2020/21') then [1] # just getting started
36
+ when Season('2019/20') then [1,2,3] # covid-19 - no liguilla
37
+ when Season('2010/11')..Season('2018/19') then [1,2,3,4]
38
+ end
39
+ }
40
+ },
41
+ }
42
+
43
+ end # module Worldfootball
44
+
@@ -0,0 +1,21 @@
1
+ module Worldfootball
2
+
3
+ LEAGUES_PACIFIC = {
4
+
5
+ # /nzl-nz-football-championship-2019-2020/
6
+ # /nzl-nz-football-championship-2018-2019-playoffs/
7
+ 'nz.1' => {
8
+ pages: {
9
+ 'nzl-nz-football-championship-{season}' => 'Regular Season', # 1
10
+ 'nzl-nz-football-championship-{season}-playoffs' => 'Playoff Finals', # 2
11
+ },
12
+ season: ->( season ) {
13
+ case season
14
+ when Season('2019/20') then [1] ## covid-19 - no playoffs/finals
15
+ when Season('2018/19') then [1,2]
16
+ end
17
+ }
18
+ },
19
+ }
20
+
21
+ end # module Worldfootball
@@ -0,0 +1,11 @@
1
+ module Worldfootball
2
+
3
+ LEAGUES_SOUTH_AMERICA = {
4
+
5
+ # todo/fix: adjust date/time by -6 or 7 hours!!!
6
+ # /bra-serie-a-2020/
7
+ 'br.1' => { pages: 'bra-serie-a' },
8
+ }
9
+
10
+ end # module Worldfootball
11
+
@@ -0,0 +1,200 @@
1
+
2
+
3
+ require_relative 'leagues/europe'
4
+ require_relative 'leagues/north_america'
5
+ require_relative 'leagues/south_america'
6
+ require_relative 'leagues/pacific'
7
+ require_relative 'leagues/asia'
8
+
9
+
10
+ module Worldfootball
11
+
12
+ LEAGUES = [LEAGUES_EUROPE,
13
+ LEAGUES_NORTH_AMERICA,
14
+ LEAGUES_SOUTH_AMERICA,
15
+ LEAGUES_PACIFIC,
16
+ LEAGUES_ASIA].reduce({}) { |mem,h| mem.merge!( h ); mem }
17
+
18
+
19
+ class League
20
+ def initialize( key, data )
21
+ @key = key
22
+ ## @data = data
23
+
24
+ @pages = data[:pages]
25
+ @season_proc = data[:season] || ->(season) { nil }
26
+ end
27
+
28
+ def key() @key; end
29
+
30
+ def pages( season: )
31
+ ## note: return for no stages / simple case - just a string
32
+ ## and for the stages case ALWAYS an array (even if it has only one page (with stage))
33
+
34
+ if @pages.is_a?( String )
35
+ # assume always "simple/regular" format w/o stages
36
+ slug = @pages
37
+ { slug: fill_slug( slug, season: season ) }
38
+ else
39
+ ## check for league format / stages
40
+ ## return array (of strings) or nil (for no stages - "simple" format)
41
+ indices = @season_proc.call( season )
42
+ if indices.nil?
43
+ puts "!! ERROR - no configuration found for season >#{season}< for league >#{@key}< found; sorry"
44
+ exit 1
45
+ elsif indices.is_a?( Integer ) ## single number - single/regular format w/o stage
46
+ # note: starting with 0 (always use idx-1) !!!
47
+ slug = if @pages.is_a?( Array )
48
+ @pages[indices-1]
49
+ else ## assume hash (and key is page slug)
50
+ @pages.keys[indices-1]
51
+ end
52
+ { slug: fill_slug( slug, season: season ) }
53
+ else ## assume regular case - array of integers
54
+ recs = []
55
+ indices.each do |idx|
56
+ slug = key = @pages.keys[idx-1]
57
+ recs << { slug: fill_slug( slug, season: season ),
58
+ stage: @pages[key] } ## note: include mapping for page to stage name!!
59
+ end
60
+ recs
61
+ end
62
+ end
63
+ end # pages
64
+
65
+
66
+ ######
67
+ # helper method
68
+ def fill_slug( slug, season: )
69
+ ## note: fill-in/check for place holders too
70
+ slug = if slug.index( '{season}' )
71
+ slug.sub( '{season}', season.to_path( :long ) ) ## e.g. 2010-2011
72
+ elsif slug.index( '{end_year}' )
73
+ slug.sub( '{end_year}', season.end_year.to_s ) ## e.g. 2011
74
+ else
75
+ ## assume convenience fallback - append regular season
76
+ "#{slug}-#{season.to_path( :long )}"
77
+ end
78
+
79
+ puts " slug=>#{slug}<"
80
+
81
+ slug
82
+ end
83
+ end # class League
84
+
85
+
86
+
87
+ def self.find_league( key ) ## league info lookup
88
+ data = LEAGUES[ key ]
89
+ if data.nil?
90
+ puts "!! ERROR - no league found for >#{key}<; add to leagues tables"
91
+ exit 1
92
+ end
93
+ League.new( key, data ) ## use a convenience wrapper for now
94
+ end
95
+
96
+
97
+
98
+ ### "reverse" lookup by page - returns league AND season
99
+ ## note: "blind" season template para - might be season or start_year etc.
100
+ ## e.g. {season} or {start_year} becomes {}
101
+
102
+ PAGE_VAR_RE = /{
103
+ [^}]+
104
+ }/x
105
+
106
+
107
+ def self.norm_slug( slug )
108
+ ## assume convenience fallback - append regular season
109
+ slug.index( '{' ) ? slug : "#{slug}-{season}"
110
+ end
111
+
112
+ PAGES ||=
113
+ LEAGUES.reduce( {} ) do |pages, (key, data)|
114
+ if data[:pages].is_a?( String )
115
+ slug = data[:pages]
116
+ slug = Worldfootball.norm_slug( slug )
117
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
118
+ elsif data[:pages].is_a?( Array )
119
+ data[:pages].each do |slug|
120
+ slug = Worldfootball.norm_slug( slug )
121
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug }
122
+ end
123
+ ## elsif data[:pages].nil?
124
+ ## todo/fix: missing pages!!!
125
+ else ## assume hash
126
+ ## add stage to pages too - why? why not?
127
+ data[:pages].each do |slug, stage|
128
+ slug = Worldfootball.norm_slug( slug )
129
+ pages[ slug.sub( PAGE_VAR_RE, '{}') ] = { league: key, slug: slug, stage: stage }
130
+ end
131
+ end
132
+ pages
133
+ end
134
+
135
+ # e.g. 2000 or 2000-2001
136
+ SEASON_RE = /[0-9]{4}
137
+ (?:
138
+ -[0-9]{4}
139
+ )?
140
+ /x
141
+
142
+
143
+ def self.find_page!( slug )
144
+ page = find_page( slug )
145
+ if page.nil?
146
+ puts "!! ERROR: no mapping for page >#{slug}< found; sorry"
147
+
148
+ season_str = nil
149
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
150
+ season_str = match ## keep reference to season str
151
+ '{}' ## replace with {}
152
+ end
153
+
154
+ puts " season: >#{season_str}<"
155
+ puts " slug (norm): >#{norm}<"
156
+ puts
157
+ ## pp PAGES
158
+ exit 1
159
+ end
160
+ page
161
+ end
162
+
163
+
164
+
165
+ def self.find_page( slug )
166
+ ## return league key and season
167
+ season_str = nil
168
+ norm = slug.sub( SEASON_RE ) do |match| ## replace season with var placeholder {}
169
+ season_str = match ## keep reference to season str
170
+ '{}' ## replace with {}
171
+ end
172
+
173
+ if season_str.nil?
174
+ puts "!! ERROR: no season found in page slug >#{slug}<; sorry"
175
+ exit 1
176
+ end
177
+
178
+ rec = PAGES[ norm ]
179
+ return nil if rec.nil?
180
+
181
+
182
+ league_key = rec[:league]
183
+ slug_tmpl = rec[:slug]
184
+ season = if slug_tmpl.index( '{start_year}' )
185
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
186
+ Season( "#{season_str.to_i}-#{season_str.to_i+1}" )
187
+ elsif slug_tmpl.index( '{end_year}' )
188
+ ## todo/check - season_str must be year (e.g. 2020 or such and NOT 2020-2021)
189
+ Season( "#{season_str.to_i-1}-#{season_str.to_i}" )
190
+ else ## assume "regular" seasson - pass through as is
191
+ Season( season_str )
192
+ end
193
+
194
+ ## return hash table / record
195
+ { league: league_key,
196
+ season: season.key }
197
+ end
198
+
199
+
200
+ end # module Worldfootball
@@ -0,0 +1,72 @@
1
+ #### todo/check: move MODS and SCORE_ERRORS out-of-lib
2
+ ## and into config or such - why? why not?
3
+
4
+
5
+ module Worldfootball
6
+
7
+
8
+ ######
9
+ # "global" helpers
10
+ def self.norm_team( team )
11
+ ## clean team name and asciify (e.g. ’->' )
12
+ team = team.sub( '(old)', '' ).strip
13
+ team = team.gsub( '’', "'" ) ## e.g. Hawke’s Bay United FC
14
+ team
15
+ end
16
+
17
+
18
+
19
+ MODS = {
20
+ 'at' => {
21
+ ## AT 1
22
+ 'SC Magna Wiener Neustadt' => 'SC Wiener Neustadt', # in 2010/11
23
+ 'KSV Superfund' => 'Kapfenberger SV', # in 2010/11
24
+ 'Kapfenberger SV 1919' => 'Kapfenberger SV', # in 2011/12
25
+ 'FC Trenkwalder Admira' => 'FC Admira Wacker', # in 2011/12
26
+ ## AT 2
27
+ 'Austria Wien (A)' => 'Young Violets', # in 2019/20
28
+ 'FC Wacker Innsbruck (A)' => 'FC Wacker Innsbruck II', # in 2018/19
29
+ ## AT CUP
30
+ 'Rapid Wien (A)' => 'Rapid Wien II', # in 2011/12
31
+ 'Sturm Graz (A)' => 'Sturm Graz II',
32
+ 'Kapfenberger SV 1919 (A)' => 'Kapfenberger SV II',
33
+ 'SV Grödig (A)' => 'SV Grödig II',
34
+ 'RB Salzburg (A)' => 'RB Salzburg II',
35
+ 'SR WGFM Donaufeld' => 'SR Donaufeld Wien',
36
+ 'FC Trenkwalder Admira (A)' => 'FC Admira Wacker II',
37
+ ## AT 3.O (Regionalliga Ost)
38
+ 'FC Admira Wacker (A)' => 'FC Admira Wacker II', # in 2020/21
39
+ },
40
+ 'nz' => {
41
+ ## NZ 1
42
+ 'Wellington Phoenix (R)' => 'Wellington Phoenix Reserves',
43
+ },
44
+ }
45
+
46
+
47
+
48
+ ## fix/patch known score format errors in at/de cups
49
+ ## new convention
50
+ ## for a fix require league, date, and team1 & team2 for now!!!!
51
+ ## - do NOT use some "generic" fix / patch!!!!
52
+ ##
53
+ ## old de/at patches/fixes:
54
+ ## '0-1 (0-0, 0-0, 0-0) n.V.' => '0-1 (0-0, 0-0) n.V.', # too long
55
+ ## '2-1 (1-1, 1-1, 1-0) n.V.' => '2-1 (1-1, 1-1) n.V.',
56
+ ## '4-2 (0-0, 0-0) i.E.' => '4-2 (0-0, 0-0, 0-0) i.E.', # too short
57
+
58
+
59
+ SCORE_ERRORS = {
60
+ 'ro.1' => {
61
+ ## 2013/14
62
+ '2013-07-29' => [ 'FC Brașov', 'Săgeata Năvodari', ['1-1 (0-0, 0-1)', '1-1 (0-0)']],
63
+ },
64
+ 'gr.1' => {
65
+ ## 2010/11
66
+ '2010-11-24' => [ 'Ergotelis', 'Olympiakos Piräus', ['0-2 (0-0, 0-0, 0-0)', '0-2 (0-0)']],
67
+ '2010-11-28' => [ 'Panserraikos', 'Aris Saloniki', ['1-0 (1-0, 0-0, 0-0)', '1-0 (1-0)']],
68
+ }
69
+ }
70
+
71
+
72
+ end # module Worldfootball
@@ -0,0 +1,106 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ def self.from_file( path )
6
+ html = File.open( path, 'r:utf-8' ) {|f| f.read }
7
+ new( html )
8
+ end
9
+
10
+ def initialize( html )
11
+ @html = html
12
+ end
13
+
14
+ def doc
15
+ ## note: if we use a fragment and NOT a document - no access to page head (and meta elements and such)
16
+ @doc ||= Nokogiri::HTML( @html )
17
+ end
18
+
19
+ def title
20
+ # <title>Bundesliga 2010/2011 &raquo; Spielplan</title>
21
+ @title ||= doc.css( 'title' ).first
22
+ @title.text ## get element's text content
23
+ end
24
+
25
+ def keywords
26
+ # <meta name="keywords"
27
+ # content="Bundesliga, 2010/2011, Spielplan, KSV Superfund, SC Magna Wiener Neustadt, SV Ried, FC Wacker Innsbruck, Austria Wien, Sturm Graz, SV Mattersburg, LASK Linz, Rapid Wien, RB Salzburg" />
28
+ @keywords ||= doc.css( 'meta[name="keywords"]' ).first
29
+ @keywords[:content] ## get content attribute
30
+ ## or doc.xpath( '//meta[@name="keywords"]' ).first
31
+ ## pp keywords
32
+ # puts " #{keywords[:content]}"
33
+
34
+ # keywords = doc.at( 'meta[@name="Keywords"]' )
35
+ # pp keywords
36
+ ## check for
37
+ end
38
+
39
+ # <meta property="og:url"
40
+ # content="//www.weltfussball.de/alle_spiele/aut-bundesliga-2010-2011/" />
41
+ def url
42
+ @url ||= doc.css( 'meta[property="og:url"]' ).first
43
+ @url[:content]
44
+ end
45
+
46
+
47
+
48
+ ## <!-- [generated 2020-06-30 22:30:19] -->
49
+ ## <!-- [generated 2020-06-30 22:30:19] -->
50
+ GENERATED_RE = %r{
51
+ <!--
52
+ [ ]+
53
+ \[generated
54
+ [ ]+
55
+ (?<date>\d+-\d+-\d+)
56
+ [ ]+
57
+ (?<time>\d+:\d+:\d+)
58
+ \]
59
+ [ ]+
60
+ -->
61
+ }x
62
+
63
+
64
+ def generated
65
+ @generated ||= begin
66
+ m=GENERATED_RE.match( @html )
67
+ if m
68
+ DateTime.strptime( "#{m[:date]} #{m[:time]}", '%Y-%m-%d %H:%M:%S')
69
+ else
70
+ puts "!! WARN - no generated timestamp found in page"
71
+ nil
72
+ end
73
+ end
74
+ end
75
+
76
+ ### convenience helper / formatter
77
+ def generated_in_days_ago
78
+ if generated
79
+ diff_in_days = Date.today.jd - generated.jd
80
+ "#{diff_in_days}d"
81
+ else
82
+ '?'
83
+ end
84
+ end
85
+
86
+ ######################
87
+ ## helper methods
88
+
89
+ def squish( str )
90
+ str = str.strip
91
+ str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
92
+ str = str.gsub( /[ \t\n]+/, ' ' ) ## fold whitespace to one max.
93
+ str
94
+ end
95
+
96
+ def assert( cond, msg )
97
+ if cond
98
+ # do nothing
99
+ else
100
+ puts "!!! assert failed (in parse page) - #{msg}"
101
+ exit 1
102
+ end
103
+ end
104
+
105
+ end # class Page
106
+ end # module Worldfootball