worldfootball 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 170277c7714f9b75e93176eb5fff6242fb6efb85051bc8977f7f635dbebf0513
4
+ data.tar.gz: 5b15d132765c3ee2df4cbdd847b43ba6c7e7bd617ed55bb5fe4dfabc0becb0e0
5
+ SHA512:
6
+ metadata.gz: c95b4b2becf545be2c208a207e8980d06369148d18b0657cddfb81470331c828ee8492649908ece372fb996b9d1a6dfc8eeaa45c54a3757eba7fb6d02e363bf0
7
+ data.tar.gz: ca127cb3f69c861dba48b049ed6da30b9ecde1f3935b30422d6184869b650b5cafc056be7fa37c7198bbd72ad65bdd262e56c82631c62629f4bbb4222992a9c9
data/CHANGELOG.md ADDED
@@ -0,0 +1,6 @@
1
+ ### 0.1.1
2
+
3
+ ### 0.0.1 / 2024-07-04
4
+
5
+ * Everything is new. First release.
6
+
data/Manifest.txt ADDED
@@ -0,0 +1,29 @@
1
+ CHANGELOG.md
2
+ Manifest.txt
3
+ README.md
4
+ Rakefile
5
+ bin/wfb
6
+ lib/worldfootball.rb
7
+ lib/worldfootball/build.rb
8
+ lib/worldfootball/convert.rb
9
+ lib/worldfootball/convert_reports.rb
10
+ lib/worldfootball/download.rb
11
+ lib/worldfootball/generator.rb
12
+ lib/worldfootball/leagues.rb
13
+ lib/worldfootball/leagues/asia.rb
14
+ lib/worldfootball/leagues/europe--british_isles.rb
15
+ lib/worldfootball/leagues/europe--central.rb
16
+ lib/worldfootball/leagues/europe--eastern.rb
17
+ lib/worldfootball/leagues/europe--northern.rb
18
+ lib/worldfootball/leagues/europe--southern.rb
19
+ lib/worldfootball/leagues/europe--western.rb
20
+ lib/worldfootball/leagues/europe.rb
21
+ lib/worldfootball/leagues/north_america.rb
22
+ lib/worldfootball/leagues/pacific.rb
23
+ lib/worldfootball/leagues/south_america.rb
24
+ lib/worldfootball/mods.rb
25
+ lib/worldfootball/page.rb
26
+ lib/worldfootball/page_report.rb
27
+ lib/worldfootball/page_schedule.rb
28
+ lib/worldfootball/vacuum.rb
29
+ lib/worldfootball/version.rb
data/README.md ADDED
@@ -0,0 +1,27 @@
1
+ # worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages
2
+
3
+
4
+ * home :: [github.com/sportdb/sport.db](https://github.com/sportdb/sport.db)
5
+ * bugs :: [github.com/sportdb/sport.db/issues](https://github.com/sportdb/sport.db/issues)
6
+ * gem :: [rubygems.org/gems/worldfootball](https://rubygems.org/gems/worldfootball)
7
+ * rdoc :: [rubydoc.info/gems/worldfootball](http://rubydoc.info/gems/worldfootball)
8
+
9
+
10
+
11
+ ## Usage
12
+
13
+
14
+ To be done
15
+
16
+
17
+
18
+ ## License
19
+
20
+ The `worldfootball` scripts are dedicated to the public domain.
21
+ Use it as you please with no restrictions whatsoever.
22
+
23
+
24
+ ## Questions? Comments?
25
+
26
+ Yes, you can. More than welcome.
27
+ See [Help & Support »](https://github.com/openfootball/help)
data/Rakefile ADDED
@@ -0,0 +1,34 @@
1
+ require 'hoe'
2
+ require './lib/worldfootball/version.rb'
3
+
4
+ Hoe.spec 'worldfootball' do
5
+
6
+ self.version = Worldfootball::VERSION
7
+
8
+ self.summary = "worldfootball - get world football (leagues, cups & more) match data via the worldfootball.net/weltfussball.de pages"
9
+ self.description = summary
10
+
11
+ self.urls = { home: 'https://github.com/sportdb/sport.db' }
12
+
13
+ self.author = 'Gerald Bauer'
14
+ self.email = 'gerald.bauer@gmail.com'
15
+
16
+ # switch extension to .markdown for gihub formatting
17
+ self.readme_file = 'README.md'
18
+ self.history_file = 'CHANGELOG.md'
19
+
20
+ self.extra_deps = [
21
+ ## ['tzinfo'],
22
+ ['season-formats'],
23
+ ['webget'],
24
+ ['nokogiri'],
25
+ ['cocos'], ## later pull in with sportsdb-writers
26
+ ]
27
+
28
+ self.licenses = ['Public Domain']
29
+
30
+ self.spec_extras = {
31
+ required_ruby_version: '>= 2.2.2'
32
+ }
33
+
34
+ end
data/bin/wfb ADDED
@@ -0,0 +1,104 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ ## tip: to test run:
4
+ ## ruby -I ./lib bin/wfb
5
+
6
+ require 'worldfootball'
7
+
8
+
9
+ Webcache.root = if File.exist?( '/sports/cache' )
10
+ puts " setting web cache to >/sports/cache<"
11
+ '/sports/cache'
12
+ else
13
+ './cache'
14
+ end
15
+
16
+
17
+ require 'optparse'
18
+
19
+
20
+ module Worldfootball
21
+ def self.main( args=ARGV )
22
+
23
+ opts = {}
24
+ parser = OptionParser.new do |parser|
25
+ parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
26
+
27
+ ##
28
+ ## check if git has a offline option?? (use same)
29
+ ## check for other tools - why? why not?
30
+
31
+
32
+ parser.on( "--cache", "--cached", "--offline",
33
+ "use cached data in #{Webcache.root}" ) do |cached|
34
+ opts[:cached] = cached
35
+ end
36
+ end
37
+ parser.parse!( args )
38
+
39
+ puts "OPTS:"
40
+ p opts
41
+ puts "ARGV:"
42
+ p args
43
+
44
+
45
+ if ['league', 'leagues', 'ls'].include?( args[0] || 'leagues' )
46
+ keys = LEAGUES.keys
47
+
48
+ pp keys
49
+ puts " #{keys.size} league(s)"
50
+
51
+ # puts
52
+ # puts " pages:"
53
+ # pp Worldfootball::PAGES
54
+
55
+ exit 0
56
+ end
57
+
58
+
59
+
60
+ league_code = args[0].downcase
61
+
62
+ league = find_league( league_code ) ## league info lookup
63
+
64
+ season = Season( args[1] || '2024/25' )
65
+
66
+ pages = league.pages( season: season )
67
+
68
+ pp pages
69
+ puts " #{pages.size} page(s)"
70
+
71
+
72
+ ## wrap single page record into array
73
+ pages = pages.is_a?( Array ) ? pages : [pages]
74
+
75
+ if opts[:cached]
76
+ # do nothing
77
+ else ## download to cache
78
+ pages.each_with_index do |page_rec,i|
79
+ slug = page_rec[:slug]
80
+ puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
81
+ page = Metal.download_schedule( slug )
82
+ end
83
+ end
84
+
85
+
86
+ pages.each_with_index do |page_rec,i|
87
+ slug = page_rec[:slug]
88
+
89
+ puts "==> #{i+1}/#{pages.size} - #{league_code} @ #{slug}..."
90
+ page = Page::Schedule.from_cache( slug )
91
+ matches = page.matches
92
+
93
+ puts " #{matches.size} match(es)"
94
+ end
95
+
96
+
97
+ end # def self.main
98
+ end # module Worldfootball
99
+
100
+
101
+ Worldfootball.main( ARGV )
102
+
103
+
104
+ puts "bye"
@@ -0,0 +1,245 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+ ROUND_TO_EN = {
6
+ '1. Runde' => 'Round 1',
7
+ '2. Runde' => 'Round 2',
8
+ '3. Runde' => 'Round 3',
9
+ '4. Runde' => 'Round 4',
10
+ 'Achtelfinale' => 'Round of 16',
11
+ 'Viertelfinale' => 'Quarterfinals',
12
+ 'Halbfinale' => 'Semifinals',
13
+ 'Finale' => 'Final',
14
+ }
15
+
16
+
17
+ ## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
18
+
19
+ ## build "standard" match records from "raw" table rows
20
+ def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
21
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
22
+
23
+ raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
24
+
25
+ print " #{rows.size} rows - build #{league} #{season}"
26
+ print " - #{stage}" unless stage.empty?
27
+ print "\n"
28
+
29
+
30
+ ## note: use only first part from key for lookup
31
+ ## e.g. at.1 => at
32
+ ## eng.1 => eng
33
+ ## and so on
34
+ mods = MODS[ league.split('.')[0] ] || {}
35
+
36
+ score_errors = SCORE_ERRORS[ league ] || {}
37
+
38
+
39
+ i = 0
40
+ recs = []
41
+ rows.each do |row|
42
+ i += 1
43
+
44
+
45
+ if row[:round] =~ /Spieltag/
46
+ puts
47
+ print '[%03d] ' % (i+1)
48
+ print row[:round]
49
+
50
+ if m = row[:round].match( /([0-9]+)\. Spieltag/ )
51
+ ## todo/check: always use a string even if number (as a string eg. '1' etc.)
52
+ round = m[1] ## note: keep as string (NOT number)
53
+ print " => #{round}"
54
+ else
55
+ puts "!! ERROR: cannot find matchday number"
56
+ exit 1
57
+ end
58
+ print "\n"
59
+ elsif row[:round] =~ /[1-9]\.[ ]Runde|
60
+ Achtelfinale|
61
+ Viertelfinale|
62
+ Halbfinale|
63
+ Finale
64
+ /x
65
+ puts
66
+ print '[%03d] ' % (i+1)
67
+ print row[:round]
68
+
69
+
70
+ ## do NOT translate rounds (to english) - keep in german / deutsch (de)
71
+ if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
72
+ 'de.cup'].include?( league )
73
+ round = row[:round]
74
+ else
75
+ round = ROUND_TO_EN[ row[:round] ]
76
+ if round.nil?
77
+ puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
78
+ pp row
79
+ exit 1
80
+ end
81
+ print " => #{round}"
82
+ end
83
+ print "\n"
84
+ else
85
+ puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
86
+ pp row
87
+ exit 1
88
+ end
89
+
90
+
91
+ date_str = row[:date]
92
+ time_str = row[:time]
93
+ team1_str = row[:team1]
94
+ team2_str = row[:team2]
95
+ score_str = row[:score]
96
+
97
+ ## convert date from string e.g. 2019-25-10
98
+ date = Date.strptime( date_str, '%Y-%m-%d' )
99
+
100
+
101
+ ### check for score_error; first (step 1) lookup by date
102
+ score_error = score_errors[ date.strftime('%Y-%m-%d') ]
103
+ if score_error
104
+ if team1_str == score_error[0] &&
105
+ team2_str == score_error[1]
106
+ ## check if team names match too; if yes, apply fix/patch!!
107
+ if score_str != score_error[2][0]
108
+ puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
109
+ pp row
110
+ end
111
+ puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
112
+ score_str = score_error[2][1]
113
+ end
114
+ end
115
+
116
+
117
+ print '[%03d] ' % (i+1)
118
+ print "%-10s | " % date_str
119
+ print "%-5s | " % time_str
120
+ print "%-22s | " % team1_str
121
+ print "%-22s | " % team2_str
122
+ print score_str
123
+ print "\n"
124
+
125
+
126
+ ## check for 0:3 Wert. - change Wert. to awd. (awarded)
127
+ score_str = score_str.sub( /Wert\./i, 'awd.' )
128
+
129
+ ## clean team name (e.g. remove (old))
130
+ ## and asciify (e.g. ’ to ' )
131
+ team1_str = norm_team( team1_str )
132
+ team2_str = norm_team( team2_str )
133
+
134
+ team1_str = mods[ team1_str ] if mods[ team1_str ]
135
+ team2_str = mods[ team2_str ] if mods[ team2_str ]
136
+
137
+
138
+
139
+
140
+ ht, ft, et, pen, comments = parse_score( score_str )
141
+
142
+
143
+
144
+ recs << [stage,
145
+ round,
146
+ date.strftime( '%Y-%m-%d' ),
147
+ time_str,
148
+ team1_str,
149
+ ft,
150
+ ht,
151
+ team2_str,
152
+ et, # extra: incl. extra time
153
+ pen, # extra: incl. penalties
154
+ comments]
155
+ end # each row
156
+ recs
157
+ end # build
158
+
159
+
160
+
161
+ def self.parse_score( score_str )
162
+ comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
163
+
164
+ ## split score
165
+ ft = ''
166
+ ht = ''
167
+ et = ''
168
+ pen = ''
169
+ if score_str == '---' ## in the future (no score yet) - was -:-
170
+ ft = ''
171
+ ht = ''
172
+ elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
173
+ score_str == 'ausg.' || ## todo/check: change to some other status ????
174
+ score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
175
+ ft = '(*)'
176
+ ht = ''
177
+ comments = 'cancelled'
178
+ elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
179
+ ft = '(*)'
180
+ ht = ''
181
+ comments = 'abandoned'
182
+ elsif score_str == 'verl.' ## postponed
183
+ ft = ''
184
+ ht = ''
185
+ comments = 'postponed'
186
+ # 5-4 (0-0, 1-1, 2-2) i.E.
187
+ elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
188
+ [ ]*
189
+ \(([0-9]+) [ ]*-[ ]* ([0-9]+)
190
+ [ ]*,[ ]*
191
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
192
+ [ ]*,[ ]*
193
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)\)
194
+ [ ]*
195
+ i\.E\.
196
+ /x
197
+ pen = "#{$1}-#{$2}"
198
+ ht = "#{$3}-#{$4}"
199
+ ft = "#{$5}-#{$6}"
200
+ et = "#{$7}-#{$8}"
201
+ # 2-1 (1-0, 1-1) n.V
202
+ elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
203
+ [ ]*
204
+ \(([0-9]+) [ ]*-[ ]* ([0-9]+)
205
+ [ ]*,[ ]*
206
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
207
+ \)
208
+ [ ]*
209
+ n\.V\.
210
+ /x
211
+ et = "#{$1}-#{$2}"
212
+ ht = "#{$3}-#{$4}"
213
+ ft = "#{$5}-#{$6}"
214
+ elsif score_str =~ /([0-9]+)
215
+ [ ]*-[ ]*
216
+ ([0-9]+)
217
+ [ ]*
218
+ \(([0-9]+)
219
+ [ ]*-[ ]*
220
+ ([0-9]+)
221
+ \)
222
+ /x
223
+ ft = "#{$1}-#{$2}"
224
+ ht = "#{$3}-#{$4}"
225
+ elsif score_str =~ /([0-9]+)
226
+ [ ]*-[ ]*
227
+ ([0-9]+)
228
+ [ ]*
229
+ ([a-z.]+)
230
+ /x
231
+ ft = "#{$1}-#{$2} (*)"
232
+ ht = ''
233
+ comments = $3
234
+ elsif score_str =~ /^([0-9]+)-([0-9]+)$/
235
+ ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
236
+ ht = ''
237
+ else
238
+ puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
239
+ exit 1
240
+ end
241
+
242
+ [ht, ft, et, pen, comments]
243
+ end
244
+
245
+ end # module Worldfootball
@@ -0,0 +1,162 @@
1
+
2
+ module Worldfootball
3
+
4
+
5
+ #################
6
+ # todo/fix - use timezone instead of offset !!!
7
+ # e.g
8
+ =begin
9
+ TIMEZONES = {
10
+ 'eng.1' => 'Europe/London',
11
+ 'eng.2' => 'Europe/London',
12
+
13
+ 'es.1' => 'Europe/Madrid',
14
+
15
+ 'de.1' => 'Europe/Berlin',
16
+ 'fr.1' => 'Europe/Paris',
17
+ 'it.1' => 'Europe/Rome',
18
+ 'nl.1' => 'Europe/Amsterdam',
19
+
20
+ 'pt.1' => 'Europe/Lisbon',
21
+
22
+ ## todo/fix - pt.1
23
+ ## one team in madeira!!! check for different timezone??
24
+ ## CD Nacional da Madeira
25
+
26
+ 'br.1' => 'America/Sao_Paulo',
27
+ ## todo/fix - brazil has 4 timezones
28
+ ## really only two in use for clubs
29
+ ## west and east (amazonas et al)
30
+ ## for now use west for all - why? why not?
31
+ }
32
+ =end
33
+
34
+ ## todo - find "proper/classic" timezone ("winter time")
35
+
36
+ ## Brasilia - Distrito Federal, Brasil (GMT-3) -- summer time?
37
+ ## Ciudad de México, CDMX, México (GMT-5) -- summer time?
38
+ ## Londres, Reino Unido (GMT+1)
39
+ ## Madrid -- ?
40
+ ## Lisboa -- ?
41
+ ## Moskow -- ?
42
+ ##
43
+ ## todo/check - quick fix timezone offsets for leagues for now
44
+ ## - find something better - why? why not?
45
+ ## note: assume time is in GMT+1
46
+ OFFSETS = {
47
+ 'eng.1' => -1,
48
+ 'eng.2' => -1,
49
+ 'eng.3' => -1,
50
+ 'eng.4' => -1,
51
+ 'eng.5' => -1,
52
+
53
+ 'es.1' => -1,
54
+ 'es.2' => -1,
55
+
56
+ 'pt.1' => -1,
57
+ 'pt.2' => -1,
58
+
59
+ 'br.1' => -5,
60
+ 'mx.1' => -7,
61
+ }
62
+
63
+
64
+ def self.convert( league:, season: )
65
+ season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
66
+
67
+ league = find_league( league )
68
+
69
+ pages = league.pages( season: season )
70
+
71
+ ## check: rename (optional) offset to time_offset or such?
72
+ offset = OFFSETS[ league ]
73
+
74
+
75
+ # note: assume stages if pages is an array (of hash table/records)
76
+ # (and NOT a single hash table/record)
77
+ if pages.is_a?(Array)
78
+ recs = []
79
+ pages.each do |page_meta|
80
+ slug = page_meta[:slug]
81
+ stage_name = page_meta[:stage]
82
+ ## todo/fix: report error/check if stage.name is nil!!!
83
+
84
+ print " parsing #{slug}..."
85
+
86
+ # unless File.exist?( path )
87
+ # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
88
+ # next
89
+ # end
90
+
91
+ page = Page::Schedule.from_cache( slug )
92
+ print " title=>#{page.title}<..."
93
+ print "\n"
94
+
95
+ rows = page.matches
96
+ stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
97
+
98
+ pp stage_recs[0] ## check first record
99
+ recs += stage_recs
100
+ end
101
+ else
102
+ page_meta = pages
103
+ slug = page_meta[:slug]
104
+
105
+ print " parsing #{slug}..."
106
+
107
+ page = Page::Schedule.from_cache( slug )
108
+ print " title=>#{page.title}<..."
109
+ print "\n"
110
+
111
+ rows = page.matches
112
+ recs = build( rows, season: season, league: league.key )
113
+
114
+ pp recs[0] ## check first record
115
+ end
116
+
117
+ recs = recs.map { |rec| fix_date( rec, offset ) } if offset
118
+
119
+ ## note: sort matches by date before saving/writing!!!!
120
+ ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
121
+ ## note: assume date is third column!!! (stage/round/date/...)
122
+ recs = recs.sort { |l,r| l[2] <=> r[2] }
123
+ ## reformat date / beautify e.g. Sat Aug 7 1993
124
+ recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
125
+
126
+ ## remove unused columns (e.g. stage, et, p, etc.)
127
+ recs, headers = vacuum( recs )
128
+
129
+ puts headers
130
+ pp recs[0] ## check first record
131
+
132
+ out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
133
+
134
+ puts "write #{out_path}..."
135
+ write_csv( out_path, recs, headers: headers )
136
+ end
137
+
138
+
139
+
140
+ ## helper to fix dates to use local timezone (and not utc/london time)
141
+ def self.fix_date( row, offset )
142
+ return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
143
+
144
+ col = row[2]
145
+ if col =~ /^\d{4}-\d{2}-\d{2}$/
146
+ date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
147
+ else
148
+ puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
149
+ ## todo/fix: add to errors/warns list - why? why not?
150
+ exit 1
151
+ end
152
+
153
+ date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
154
+ ## NOTE - MUST be -7/24.0!!!! or such to work
155
+ date = date + (offset/24.0)
156
+
157
+ row[2] = date.strftime( date_fmt ) ## overwrite "old"
158
+ row[3] = date.strftime( '%H:%M' )
159
+ row ## return row for possible pipelining - why? why not?
160
+ end
161
+
162
+ end # module Worldfootball