worldfootball 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 170277c7714f9b75e93176eb5fff6242fb6efb85051bc8977f7f635dbebf0513
4
+ data.tar.gz: 5b15d132765c3ee2df4cbdd847b43ba6c7e7bd617ed55bb5fe4dfabc0becb0e0
5
+ SHA512:
6
+ metadata.gz: c95b4b2becf545be2c208a207e8980d06369148d18b0657cddfb81470331c828ee8492649908ece372fb996b9d1a6dfc8eeaa45c54a3757eba7fb6d02e363bf0
7
+ data.tar.gz: ca127cb3f69c861dba48b049ed6da30b9ecde1f3935b30422d6184869b650b5cafc056be7fa37c7198bbd72ad65bdd262e56c82631c62629f4bbb4222992a9c9
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ### 0.1.1
2
+
3
+ ### 0.0.1 / 2024-07-04
4
+
5
+ * Everything is new. First release.
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,29 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ bin/wfb
6
+ lib/worldfootball.rb
7
+ lib/worldfootball/build.rb
8
+ lib/worldfootball/convert.rb
9
+ lib/worldfootball/convert_reports.rb
10
+ lib/worldfootball/download.rb
11
+ lib/worldfootball/generator.rb
12
+ lib/worldfootball/leagues.rb
13
+ lib/worldfootball/leagues/asia.rb
14
+ lib/worldfootball/leagues/europe--british_isles.rb
15
+ lib/worldfootball/leagues/europe--central.rb
16
+ lib/worldfootball/leagues/europe--eastern.rb
17
+ lib/worldfootball/leagues/europe--northern.rb
18
+ lib/worldfootball/leagues/europe--southern.rb
19
+ lib/worldfootball/leagues/europe--western.rb
20
+ lib/worldfootball/leagues/europe.rb
21
+ lib/worldfootball/leagues/north_america.rb
22
+ lib/worldfootball/leagues/pacific.rb
23
+ lib/worldfootball/leagues/south_america.rb
24
+ lib/worldfootball/mods.rb
25
+ lib/worldfootball/page.rb
26
+ lib/worldfootball/page_report.rb
27
+ lib/worldfootball/page_schedule.rb
28
+ lib/worldfootball/vacuum.rb
29
+ lib/worldfootball/version.rb
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/worldfootball](https://rubygems.org/gems/worldfootball)
7
+ * rdoc :: [rubydoc.info/gems/worldfootball](http://rubydoc.info/gems/worldfootball)
8
+
9
+
10
+
11
+ ## Usage
12
+
13
+
14
+ To be done
15
+
16
+
17
+
18
+ ## License
19
+
20
+ The `worldfootball` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Yes, you can. More than welcome.
27
+ See [Help & Support »](https://github.com/openfootball/help)
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ require 'hoe'
2
+ require './lib/worldfootball/version.rb'
3
+
4
+ Hoe.spec 'worldfootball' do
5
+
6
+ self.version = Worldfootball::VERSION
7
+
8
+ self.summary = "worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages"
9
+ self.description = summary
10
+
11
+ self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'gerald.bauer@gmail.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.extra_deps = [
21
+ ## ['tzinfo'],
22
+ ['season-formats'],
23
+ ['webget'],
24
+ ['nokogiri'],
25
+ ['cocos'], ## later pull in with sportsdb-writers
26
+ ]
27
+
28
+ self.licenses = ['Public Domain']
29
+
30
+ self.spec_extras = {
31
+ required_ruby_version: '>= 2.2.2'
32
+ }
33
+
34
+ end
data/bin/wfb ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/wfb
5
+
6
+ require 'worldfootball'
7
+
8
+
9
+ Webcache.root = if File.exist?( '/sports/cache' )
10
+ puts " setting web cache to >/sports/cache<"
11
+ '/sports/cache'
12
+ else
13
+ './cache'
14
+ end
15
+
16
+
17
+ require 'optparse'
18
+
19
+
20
+ module Worldfootball
21
+ def self.main( args=ARGV )
22
+
23
+ opts = {}
24
+ parser = OptionParser.new do |parser|
25
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
26
+
27
+ ##
28
+ ## check if git has a offline option?? (use same)
29
+ ## check for other tools - why? why not?
30
+
31
+
32
+ parser.on( "--cache", "--cached", "--offline",
33
+ "use cached data in #{Webcache.root}" ) do |cached|
34
+ opts[:cached] = cached
35
+ end
36
+ end
37
+ parser.parse!( args )
38
+
39
+ puts "OPTS:"
40
+ p opts
41
+ puts "ARGV:"
42
+ p args
43
+
44
+
45
+ if ['league', 'leagues', 'ls'].include?( args[0] || 'leagues' )
46
+ keys = LEAGUES.keys
47
+
48
+ pp keys
49
+ puts " #{keys.size} league(s)"
50
+
51
+ # puts
52
+ # puts " pages:"
53
+ # pp Worldfootball::PAGES
54
+
55
+ exit 0
56
+ end
57
+
58
+
59
+
60
+ league_code = args[0].downcase
61
+
62
+ league = find_league( league_code ) ## league info lookup
63
+
64
+ season = Season( args[1] || '2024/25' )
65
+
66
+ pages = league.pages( season: season )
67
+
68
+ pp pages
69
+ puts " #{pages.size} page(s)"
70
+
71
+
72
+ ## wrap single page record into array
73
+ pages = pages.is_a?( Array ) ? pages : [pages]
74
+
75
+ if opts[:cached]
76
+ # do nothing
77
+ else ## download to cache
78
+ pages.each_with_index do |page_rec,i|
79
+ slug = page_rec[:slug]
80
+ puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
81
+ page = Metal.download_schedule( slug )
82
+ end
83
+ end
84
+
85
+
86
+ pages.each_with_index do |page_rec,i|
87
+ slug = page_rec[:slug]
88
+
89
+ puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
90
+ page = Page::Schedule.from_cache( slug )
91
+ matches = page.matches
92
+
93
+ puts " #{matches.size} match(es)"
94
+ end
95
+
96
+
97
+ end # def self.main
98
+ end # module Worldfootball
99
+
100
+
101
+ Worldfootball.main( ARGV )
102
+
103
+
104
+ puts "bye"
@@ -0,0 +1,245 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+ ROUND_TO_EN = {
6
+ '1. Runde' => 'Round 1',
7
+ '2. Runde' => 'Round 2',
8
+ '3. Runde' => 'Round 3',
9
+ '4. Runde' => 'Round 4',
10
+ 'Achtelfinale' => 'Round of 16',
11
+ 'Viertelfinale' => 'Quarterfinals',
12
+ 'Halbfinale' => 'Semifinals',
13
+ 'Finale' => 'Final',
14
+ }
15
+
16
+
17
+ ## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
18
+
19
+ ## build "standard" match records from "raw" table rows
20
+ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
21
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
22
+
23
+ raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
24
+
25
+ print " #{rows.size} rows - build #{league} #{season}"
26
+ print " - #{stage}" unless stage.empty?
27
+ print "\n"
28
+
29
+
30
+ ## note: use only first part from key for lookup
31
+ ## e.g. at.1 => at
32
+ ## eng.1 => eng
33
+ ## and so on
34
+ mods = MODS[ league.split('.')[0] ] || {}
35
+
36
+ score_errors = SCORE_ERRORS[ league ] || {}
37
+
38
+
39
+ i = 0
40
+ recs = []
41
+ rows.each do |row|
42
+ i += 1
43
+
44
+
45
+ if row[:round] =~ /Spieltag/
46
+ puts
47
+ print '[%03d] ' % (i+1)
48
+ print row[:round]
49
+
50
+ if m = row[:round].match( /([0-9]+)\. Spieltag/ )
51
+ ## todo/check: always use a string even if number (as a string eg. '1' etc.)
52
+ round = m[1] ## note: keep as string (NOT number)
53
+ print " => #{round}"
54
+ else
55
+ puts "!! ERROR: cannot find matchday number"
56
+ exit 1
57
+ end
58
+ print "\n"
59
+ elsif row[:round] =~ /[1-9]\.[ ]Runde|
60
+ Achtelfinale|
61
+ Viertelfinale|
62
+ Halbfinale|
63
+ Finale
64
+ /x
65
+ puts
66
+ print '[%03d] ' % (i+1)
67
+ print row[:round]
68
+
69
+
70
+ ## do NOT translate rounds (to english) - keep in german / deutsch (de)
71
+ if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
72
+ 'de.cup'].include?( league )
73
+ round = row[:round]
74
+ else
75
+ round = ROUND_TO_EN[ row[:round] ]
76
+ if round.nil?
77
+ puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
78
+ pp row
79
+ exit 1
80
+ end
81
+ print " => #{round}"
82
+ end
83
+ print "\n"
84
+ else
85
+ puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
86
+ pp row
87
+ exit 1
88
+ end
89
+
90
+
91
+ date_str = row[:date]
92
+ time_str = row[:time]
93
+ team1_str = row[:team1]
94
+ team2_str = row[:team2]
95
+ score_str = row[:score]
96
+
97
+ ## convert date from string e.g. 2019-25-10
98
+ date = Date.strptime( date_str, '%Y-%m-%d' )
99
+
100
+
101
+ ### check for score_error; first (step 1) lookup by date
102
+ score_error = score_errors[ date.strftime('%Y-%m-%d') ]
103
+ if score_error
104
+ if team1_str == score_error[0] &&
105
+ team2_str == score_error[1]
106
+ ## check if team names match too; if yes, apply fix/patch!!
107
+ if score_str != score_error[2][0]
108
+ puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
109
+ pp row
110
+ end
111
+ puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
112
+ score_str = score_error[2][1]
113
+ end
114
+ end
115
+
116
+
117
+ print '[%03d] ' % (i+1)
118
+ print "%-10s | " % date_str
119
+ print "%-5s | " % time_str
120
+ print "%-22s | " % team1_str
121
+ print "%-22s | " % team2_str
122
+ print score_str
123
+ print "\n"
124
+
125
+
126
+ ## check for 0:3 Wert. - change Wert. to awd. (awarded)
127
+ score_str = score_str.sub( /Wert\./i, 'awd.' )
128
+
129
+ ## clean team name (e.g. remove (old))
130
+ ## and asciify (e.g. ’ to ' )
131
+ team1_str = norm_team( team1_str )
132
+ team2_str = norm_team( team2_str )
133
+
134
+ team1_str = mods[ team1_str ] if mods[ team1_str ]
135
+ team2_str = mods[ team2_str ] if mods[ team2_str ]
136
+
137
+
138
+
139
+
140
+ ht, ft, et, pen, comments = parse_score( score_str )
141
+
142
+
143
+
144
+ recs << [stage,
145
+ round,
146
+ date.strftime( '%Y-%m-%d' ),
147
+ time_str,
148
+ team1_str,
149
+ ft,
150
+ ht,
151
+ team2_str,
152
+ et, # extra: incl. extra time
153
+ pen, # extra: incl. penalties
154
+ comments]
155
+ end # each row
156
+ recs
157
+ end # build
158
+
159
+
160
+
161
+ def self.parse_score( score_str )
162
+ comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
163
+
164
+ ## split score
165
+ ft = ''
166
+ ht = ''
167
+ et = ''
168
+ pen = ''
169
+ if score_str == '---' ## in the future (no score yet) - was -:-
170
+ ft = ''
171
+ ht = ''
172
+ elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
173
+ score_str == 'ausg.' || ## todo/check: change to some other status ????
174
+ score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
175
+ ft = '(*)'
176
+ ht = ''
177
+ comments = 'cancelled'
178
+ elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
179
+ ft = '(*)'
180
+ ht = ''
181
+ comments = 'abandoned'
182
+ elsif score_str == 'verl.' ## postponed
183
+ ft = ''
184
+ ht = ''
185
+ comments = 'postponed'
186
+ # 5-4 (0-0, 1-1, 2-2) i.E.
187
+ elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
188
+ [ ]*
189
+ \(([0-9]+) [ ]*-[ ]* ([0-9]+)
190
+ [ ]*,[ ]*
191
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
192
+ [ ]*,[ ]*
193
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)\)
194
+ [ ]*
195
+ i\.E\.
196
+ /x
197
+ pen = "#{$1}-#{$2}"
198
+ ht = "#{$3}-#{$4}"
199
+ ft = "#{$5}-#{$6}"
200
+ et = "#{$7}-#{$8}"
201
+ # 2-1 (1-0, 1-1) n.V
202
+ elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
203
+ [ ]*
204
+ \(([0-9]+) [ ]*-[ ]* ([0-9]+)
205
+ [ ]*,[ ]*
206
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
207
+ \)
208
+ [ ]*
209
+ n\.V\.
210
+ /x
211
+ et = "#{$1}-#{$2}"
212
+ ht = "#{$3}-#{$4}"
213
+ ft = "#{$5}-#{$6}"
214
+ elsif score_str =~ /([0-9]+)
215
+ [ ]*-[ ]*
216
+ ([0-9]+)
217
+ [ ]*
218
+ \(([0-9]+)
219
+ [ ]*-[ ]*
220
+ ([0-9]+)
221
+ \)
222
+ /x
223
+ ft = "#{$1}-#{$2}"
224
+ ht = "#{$3}-#{$4}"
225
+ elsif score_str =~ /([0-9]+)
226
+ [ ]*-[ ]*
227
+ ([0-9]+)
228
+ [ ]*
229
+ ([a-z.]+)
230
+ /x
231
+ ft = "#{$1}-#{$2} (*)"
232
+ ht = ''
233
+ comments = $3
234
+ elsif score_str =~ /^([0-9]+)-([0-9]+)$/
235
+ ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
236
+ ht = ''
237
+ else
238
+ puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
239
+ exit 1
240
+ end
241
+
242
+ [ht, ft, et, pen, comments]
243
+ end
244
+
245
+ end # module Worldfootball
@@ -0,0 +1,162 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+ #################
6
+ # todo/fix - use timezone instead of offset !!!
7
+ # e.g
8
+ =begin
9
+ TIMEZONES = {
10
+ 'eng.1' => 'Europe/London',
11
+ 'eng.2' => 'Europe/London',
12
+
13
+ 'es.1' => 'Europe/Madrid',
14
+
15
+ 'de.1' => 'Europe/Berlin',
16
+ 'fr.1' => 'Europe/Paris',
17
+ 'it.1' => 'Europe/Rome',
18
+ 'nl.1' => 'Europe/Amsterdam',
19
+
20
+ 'pt.1' => 'Europe/Lisbon',
21
+
22
+ ## todo/fix - pt.1
23
+ ## one team in madeira!!! check for different timezone??
24
+ ## CD Nacional da Madeira
25
+
26
+ 'br.1' => 'America/Sao_Paulo',
27
+ ## todo/fix - brazil has 4 timezones
28
+ ## really only two in use for clubs
29
+ ## west and east (amazonas et al)
30
+ ## for now use west for all - why? why not?
31
+ }
32
+ =end
33
+
34
+ ## todo - find "proper/classic" timezone ("winter time")
35
+
36
+ ## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
37
+ ## Ciudad de México, CDMX, México (GMT-5) -- summer time?
38
+ ## Londres, Reino Unido (GMT+1)
39
+ ## Madrid -- ?
40
+ ## Lisboa -- ?
41
+ ## Moskow -- ?
42
+ ##
43
+ ## todo/check - quick fix timezone offsets for leagues for now
44
+ ## - find something better - why? why not?
45
+ ## note: assume time is in GMT+1
46
+ OFFSETS = {
47
+ 'eng.1' => -1,
48
+ 'eng.2' => -1,
49
+ 'eng.3' => -1,
50
+ 'eng.4' => -1,
51
+ 'eng.5' => -1,
52
+
53
+ 'es.1' => -1,
54
+ 'es.2' => -1,
55
+
56
+ 'pt.1' => -1,
57
+ 'pt.2' => -1,
58
+
59
+ 'br.1' => -5,
60
+ 'mx.1' => -7,
61
+ }
62
+
63
+
64
+ def self.convert( league:, season: )
65
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
66
+
67
+ league = find_league( league )
68
+
69
+ pages = league.pages( season: season )
70
+
71
+ ## check: rename (optional) offset to time_offset or such?
72
+ offset = OFFSETS[ league ]
73
+
74
+
75
+ # note: assume stages if pages is an array (of hash table/records)
76
+ # (and NOT a single hash table/record)
77
+ if pages.is_a?(Array)
78
+ recs = []
79
+ pages.each do |page_meta|
80
+ slug = page_meta[:slug]
81
+ stage_name = page_meta[:stage]
82
+ ## todo/fix: report error/check if stage.name is nil!!!
83
+
84
+ print " parsing #{slug}..."
85
+
86
+ # unless File.exist?( path )
87
+ # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
88
+ # next
89
+ # end
90
+
91
+ page = Page::Schedule.from_cache( slug )
92
+ print " title=>#{page.title}<..."
93
+ print "\n"
94
+
95
+ rows = page.matches
96
+ stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
97
+
98
+ pp stage_recs[0] ## check first record
99
+ recs += stage_recs
100
+ end
101
+ else
102
+ page_meta = pages
103
+ slug = page_meta[:slug]
104
+
105
+ print " parsing #{slug}..."
106
+
107
+ page = Page::Schedule.from_cache( slug )
108
+ print " title=>#{page.title}<..."
109
+ print "\n"
110
+
111
+ rows = page.matches
112
+ recs = build( rows, season: season, league: league.key )
113
+
114
+ pp recs[0] ## check first record
115
+ end
116
+
117
+ recs = recs.map { |rec| fix_date( rec, offset ) } if offset
118
+
119
+ ## note: sort matches by date before saving/writing!!!!
120
+ ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
121
+ ## note: assume date is third column!!! (stage/round/date/...)
122
+ recs = recs.sort { |l,r| l[2] <=> r[2] }
123
+ ## reformat date / beautify e.g. Sat Aug 7 1993
124
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
125
+
126
+ ## remove unused columns (e.g. stage, et, p, etc.)
127
+ recs, headers = vacuum( recs )
128
+
129
+ puts headers
130
+ pp recs[0] ## check first record
131
+
132
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
133
+
134
+ puts "write #{out_path}..."
135
+ write_csv( out_path, recs, headers: headers )
136
+ end
137
+
138
+
139
+
140
+ ## helper to fix dates to use local timezone (and not utc/london time)
141
+ def self.fix_date( row, offset )
142
+ return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
143
+
144
+ col = row[2]
145
+ if col =~ /^\d{4}-\d{2}-\d{2}$/
146
+ date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
147
+ else
148
+ puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
149
+ ## todo/fix: add to errors/warns list - why? why not?
150
+ exit 1
151
+ end
152
+
153
+ date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
154
+ ## NOTE - MUST be -7/24.0!!!! or such to work
155
+ date = date + (offset/24.0)
156
+
157
+ row[2] = date.strftime( date_fmt ) ## overwrite "old"
158
+ row[3] = date.strftime( '%H:%M' )
159
+ row ## return row for possible pipelining - why? why not?
160
+ end
161
+
162
+ end # module Worldfootball