football-sources 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,59 +0,0 @@
1
- module Footballdata
2
-
3
- class Stat ## rename to match stat or something why? why not?
4
- def initialize
5
- @data = {}
6
- end
7
-
8
- def [](key) @data[ key ]; end
9
-
10
- def update( match )
11
- ## keep track of some statistics
12
- stat = @data[:all] ||= { stage: Hash.new( 0 ),
13
- duration: Hash.new( 0 ),
14
- status: Hash.new( 0 ),
15
- group: Hash.new( 0 ),
16
- matchday: Hash.new( 0 ),
17
-
18
- matches: 0,
19
- goals: 0,
20
- }
21
-
22
- stat[:stage][ match['stage'] ] += 1
23
- stat[:group][ match['group'] ] += 1
24
- stat[:status][ match['status'] ] += 1
25
- stat[:matchday][ match['matchday'] ] += 1
26
-
27
- score = match['score']
28
-
29
- stat[:duration][ score['duration'] ] += 1 ## track - assert always REGULAR
30
-
31
- stat[:matches] += 1
32
- stat[:goals] += score['fullTime']['homeTeam'].to_i if score['fullTime']['homeTeam']
33
- stat[:goals] += score['fullTime']['awayTeam'].to_i if score['fullTime']['awayTeam']
34
-
35
-
36
- stage_key = match['stage'].downcase.to_sym # e.g. :regular_season
37
- stat = @data[ stage_key ] ||= { duration: Hash.new( 0 ),
38
- status: Hash.new( 0 ),
39
- group: Hash.new( 0 ),
40
- matchday: Hash.new( 0 ),
41
-
42
- matches: 0,
43
- goals: 0,
44
- }
45
- stat[:group][ match['group'] ] += 1
46
- stat[:status][ match['status'] ] += 1
47
- stat[:matchday][ match['matchday'] ] += 1
48
-
49
- stat[:duration][ score['duration'] ] += 1 ## track - assert always REGULAR
50
-
51
- stat[:matches] += 1
52
- stat[:goals] += score['fullTime']['homeTeam'].to_i if score['fullTime']['homeTeam']
53
- stat[:goals] += score['fullTime']['awayTeam'].to_i if score['fullTime']['awayTeam']
54
- end
55
- end # class Stat
56
- end # module Footballdata
57
-
58
-
59
-
@@ -1,10 +0,0 @@
1
-
2
-
3
- ###########################
4
- ## our own code
5
- require_relative 'apis/config'
6
- require_relative 'apis/mods'
7
- require_relative 'apis/stat'
8
- require_relative 'apis/convert'
9
- require_relative 'apis/convert_cl'
10
-
@@ -1,96 +0,0 @@
1
-
2
- module Fbref
3
-
4
- def self.build( rows, league:, season: )
5
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
6
-
7
- raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
8
-
9
- print " #{rows.size} rows - build #{league} #{season}"
10
- print "\n"
11
-
12
-
13
- recs = []
14
- rows.each do |row|
15
-
16
- stage = row[:stage] || ''
17
-
18
- ## todo/check: assert that only matchweek or round can be present NOT both!!
19
- round = if row[:matchweek] && row[:matchweek].size > 0
20
- row[:matchweek]
21
- elsif row[:round] && row[:round].size > 0
22
- row[:round]
23
- else
24
- ''
25
- end
26
-
27
- date_str = row[:date]
28
- time_str = row[:time]
29
- team1_str = row[:team1]
30
- team2_str = row[:team2]
31
- score_str = row[:score]
32
-
33
- ## convert date from string e.g. 2019-25-10
34
- date = Date.strptime( date_str, '%Y-%m-%d' )
35
-
36
- comments = row[:comments]
37
- ht, ft, et, pen, comments = parse_score( score_str, comments )
38
-
39
-
40
- venue_str = row[:venue]
41
- attendance_str = row[:attendance]
42
-
43
-
44
- recs << [stage,
45
- round,
46
- date.strftime( '%Y-%m-%d' ),
47
- time_str,
48
- team1_str,
49
- ft,
50
- ht,
51
- team2_str,
52
- et, # extra: incl. extra time
53
- pen, # extra: incl. penalties
54
- venue_str,
55
- attendance_str,
56
- comments]
57
- end
58
-
59
- recs
60
- end
61
-
62
-
63
- def self.parse_score( score_str, comments )
64
-
65
- ## split score
66
- ft = ''
67
- ht = ''
68
- et = ''
69
- pen = ''
70
-
71
- if score_str.size > 0
72
- ## note: replace unicode "fancy" dash with ascii-dash
73
- # check other columns too - possible in teams?
74
- score_str = score_str.gsub( /[–]/, '-' ).strip
75
-
76
- if score_str =~ /^\(([0-9]+)\)
77
- [ ]+ ([0-9]+) - ([0-9+]) [ ]+
78
- \(([0-9]+)\)$/x
79
- ft = '?'
80
- et = "#{$2}-#{$3}"
81
- pen = "#{$1}-#{$4}"
82
- else ## assume "regular" score e.g. 0-0
83
- ## check if notes include extra time otherwise assume regular time
84
- if comments =~ /extra time/i
85
- ft = '?'
86
- et = score_str
87
- else
88
- ft = score_str
89
- end
90
- end
91
- end
92
-
93
- [ht, ft, et, pen, comments]
94
- end
95
-
96
- end # module Fbref
@@ -1,16 +0,0 @@
1
- module Fbref
2
-
3
- ### add some more config options / settings
4
- class Configuration
5
- #########
6
- ## nested configuration classes - use - why? why not?
7
- class Convert
8
- def out_dir() @out_dir || './o'; end
9
- def out_dir=(value) @out_dir = value; end
10
- end
11
-
12
- def convert() @convert ||= Convert.new; end
13
- end # class Configuration
14
-
15
-
16
- end # module Fbref
@@ -1,95 +0,0 @@
1
- module Fbref
2
-
3
- def self.convert( league:, season: )
4
- page = Page::Schedule.from_cache( league: league,
5
- season: season )
6
-
7
- puts page.title
8
-
9
- rows = page.matches
10
- recs = build( rows, league: league, season: season )
11
- ## pp rows
12
-
13
- ## reformat date / beautify e.g. Sat Aug 7 1993
14
- recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
15
-
16
- recs, headers = vacuum( recs )
17
- pp recs[0..2]
18
-
19
- season = Season.parse( season )
20
- path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv"
21
- puts "write #{path}..."
22
- Cache::CsvMatchWriter.write( path, recs, headers: headers )
23
- end
24
-
25
-
26
-
27
-
28
- #####
29
- # vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?
30
-
31
- MAX_HEADERS = [
32
- 'Stage',
33
- 'Round',
34
- 'Date',
35
- 'Time',
36
- 'Team 1',
37
- 'FT',
38
- 'HT',
39
- 'Team 2',
40
- 'ET',
41
- 'P',
42
- 'Venue',
43
- 'Att',
44
- 'Comments', ## e.g. awarded, cancelled/canceled, etc.
45
- ]
46
-
47
- MIN_HEADERS = [ ## always keep even if all empty
48
- 'Date',
49
- 'Team 1',
50
- 'FT',
51
- 'Team 2'
52
- ]
53
-
54
- def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
55
- ## check for unused columns and strip/remove
56
- counter = Array.new( MAX_HEADERS.size, 0 )
57
- rows.each do |row|
58
- row.each_with_index do |col, idx|
59
- counter[idx] += 1 unless col.nil? || col.empty?
60
- end
61
- end
62
-
63
- pp counter
64
-
65
- ## check empty columns
66
- headers = []
67
- indices = []
68
- empty_headers = []
69
- empty_indices = []
70
-
71
- counter.each_with_index do |num, idx|
72
- header = MAX_HEADERS[ idx ]
73
- if num > 0 || (num == 0 && fixed_headers.include?( header ))
74
- headers << header
75
- indices << idx
76
- else
77
- empty_headers << header
78
- empty_indices << idx
79
- end
80
- end
81
-
82
- if empty_indices.size > 0
83
- rows = rows.map do |row|
84
- row_vacuumed = []
85
- row.each_with_index do |col, idx|
86
- ## todo/fix: use values or such??
87
- row_vacuumed << col unless empty_indices.include?( idx )
88
- end
89
- row_vacuumed
90
- end
91
- end
92
-
93
- [rows, headers]
94
- end
95
- end # module Fbref
@@ -1,4 +0,0 @@
1
-
2
- require_relative 'fbref/config'
3
- require_relative 'fbref/build'
4
- require_relative 'fbref/convert'
@@ -1,245 +0,0 @@
1
-
2
- module Worldfootball
3
-
4
-
5
- ROUND_TO_EN = {
6
- '1. Runde' => 'Round 1',
7
- '2. Runde' => 'Round 2',
8
- '3. Runde' => 'Round 3',
9
- '4. Runde' => 'Round 4',
10
- 'Achtelfinale' => 'Round of 16',
11
- 'Viertelfinale' => 'Quarterfinals',
12
- 'Halbfinale' => 'Semifinals',
13
- 'Finale' => 'Final',
14
- }
15
-
16
-
17
- ## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
18
-
19
- ## build "standard" match records from "raw" table rows
20
- def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
21
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
22
-
23
- raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
24
-
25
- print " #{rows.size} rows - build #{league} #{season}"
26
- print " - #{stage}" unless stage.empty?
27
- print "\n"
28
-
29
-
30
- ## note: use only first part from key for lookup
31
- ## e.g. at.1 => at
32
- ## eng.1 => eng
33
- ## and so on
34
- mods = MODS[ league.split('.')[0] ] || {}
35
-
36
- score_errors = SCORE_ERRORS[ league ] || {}
37
-
38
-
39
- i = 0
40
- recs = []
41
- rows.each do |row|
42
- i += 1
43
-
44
-
45
- if row[:round] =~ /Spieltag/
46
- puts
47
- print '[%03d] ' % (i+1)
48
- print row[:round]
49
-
50
- if m = row[:round].match( /([0-9]+)\. Spieltag/ )
51
- ## todo/check: always use a string even if number (as a string eg. '1' etc.)
52
- round = m[1] ## note: keep as string (NOT number)
53
- print " => #{round}"
54
- else
55
- puts "!! ERROR: cannot find matchday number"
56
- exit 1
57
- end
58
- print "\n"
59
- elsif row[:round] =~ /[1-9]\.[ ]Runde|
60
- Achtelfinale|
61
- Viertelfinale|
62
- Halbfinale|
63
- Finale
64
- /x
65
- puts
66
- print '[%03d] ' % (i+1)
67
- print row[:round]
68
-
69
-
70
- ## do NOT translate rounds (to english) - keep in german / deutsch (de)
71
- if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
72
- 'de.cup'].include?( league )
73
- round = row[:round]
74
- else
75
- round = ROUND_TO_EN[ row[:round] ]
76
- if round.nil?
77
- puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
78
- pp row
79
- exit 1
80
- end
81
- print " => #{round}"
82
- end
83
- print "\n"
84
- else
85
- puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
86
- pp row
87
- exit 1
88
- end
89
-
90
-
91
- date_str = row[:date]
92
- time_str = row[:time]
93
- team1_str = row[:team1]
94
- team2_str = row[:team2]
95
- score_str = row[:score]
96
-
97
- ## convert date from string e.g. 2019-25-10
98
- date = Date.strptime( date_str, '%Y-%m-%d' )
99
-
100
-
101
- ### check for score_error; first (step 1) lookup by date
102
- score_error = score_errors[ date.strftime('%Y-%m-%d') ]
103
- if score_error
104
- if team1_str == score_error[0] &&
105
- team2_str == score_error[1]
106
- ## check if team names match too; if yes, apply fix/patch!!
107
- if score_str != score_error[2][0]
108
- puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
109
- pp row
110
- end
111
- puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
112
- score_str = score_error[2][1]
113
- end
114
- end
115
-
116
-
117
- print '[%03d] ' % (i+1)
118
- print "%-10s | " % date_str
119
- print "%-5s | " % time_str
120
- print "%-22s | " % team1_str
121
- print "%-22s | " % team2_str
122
- print score_str
123
- print "\n"
124
-
125
-
126
- ## check for 0:3 Wert. - change Wert. to awd. (awarded)
127
- score_str = score_str.sub( /Wert\./i, 'awd.' )
128
-
129
- ## clean team name (e.g. remove (old))
130
- ## and asciify (e.g. ’ to ' )
131
- team1_str = norm_team( team1_str )
132
- team2_str = norm_team( team2_str )
133
-
134
- team1_str = mods[ team1_str ] if mods[ team1_str ]
135
- team2_str = mods[ team2_str ] if mods[ team2_str ]
136
-
137
-
138
-
139
-
140
- ht, ft, et, pen, comments = parse_score( score_str )
141
-
142
-
143
-
144
- recs << [stage,
145
- round,
146
- date.strftime( '%Y-%m-%d' ),
147
- time_str,
148
- team1_str,
149
- ft,
150
- ht,
151
- team2_str,
152
- et, # extra: incl. extra time
153
- pen, # extra: incl. penalties
154
- comments]
155
- end # each row
156
- recs
157
- end # build
158
-
159
-
160
-
161
- def self.parse_score( score_str )
162
- comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
163
-
164
- ## split score
165
- ft = ''
166
- ht = ''
167
- et = ''
168
- pen = ''
169
- if score_str == '---' ## in the future (no score yet) - was -:-
170
- ft = ''
171
- ht = ''
172
- elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
173
- score_str == 'ausg.' || ## todo/check: change to some other status ????
174
- score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
175
- ft = '(*)'
176
- ht = ''
177
- comments = 'cancelled'
178
- elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
179
- ft = '(*)'
180
- ht = ''
181
- comments = 'abandoned'
182
- elsif score_str == 'verl.' ## postponed
183
- ft = ''
184
- ht = ''
185
- comments = 'postponed'
186
- # 5-4 (0-0, 1-1, 2-2) i.E.
187
- elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
188
- [ ]*
189
- \(([0-9]+) [ ]*-[ ]* ([0-9]+)
190
- [ ]*,[ ]*
191
- ([0-9]+) [ ]*-[ ]* ([0-9]+)
192
- [ ]*,[ ]*
193
- ([0-9]+) [ ]*-[ ]* ([0-9]+)\)
194
- [ ]*
195
- i\.E\.
196
- /x
197
- pen = "#{$1}-#{$2}"
198
- ht = "#{$3}-#{$4}"
199
- ft = "#{$5}-#{$6}"
200
- et = "#{$7}-#{$8}"
201
- # 2-1 (1-0, 1-1) n.V
202
- elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
203
- [ ]*
204
- \(([0-9]+) [ ]*-[ ]* ([0-9]+)
205
- [ ]*,[ ]*
206
- ([0-9]+) [ ]*-[ ]* ([0-9]+)
207
- \)
208
- [ ]*
209
- n\.V\.
210
- /x
211
- et = "#{$1}-#{$2}"
212
- ht = "#{$3}-#{$4}"
213
- ft = "#{$5}-#{$6}"
214
- elsif score_str =~ /([0-9]+)
215
- [ ]*-[ ]*
216
- ([0-9]+)
217
- [ ]*
218
- \(([0-9]+)
219
- [ ]*-[ ]*
220
- ([0-9]+)
221
- \)
222
- /x
223
- ft = "#{$1}-#{$2}"
224
- ht = "#{$3}-#{$4}"
225
- elsif score_str =~ /([0-9]+)
226
- [ ]*-[ ]*
227
- ([0-9]+)
228
- [ ]*
229
- ([a-z.]+)
230
- /x
231
- ft = "#{$1}-#{$2} (*)"
232
- ht = ''
233
- comments = $3
234
- elsif score_str =~ /^([0-9]+)-([0-9]+)$/
235
- ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
236
- ht = ''
237
- else
238
- puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
239
- exit 1
240
- end
241
-
242
- [ht, ft, et, pen, comments]
243
- end
244
-
245
- end # module Worldfootball
@@ -1,16 +0,0 @@
1
- module Worldfootball
2
-
3
- ### add some more config options / settings
4
- class Configuration
5
- #########
6
- ## nested configuration classes - use - why? why not?
7
- class Convert
8
- def out_dir() @out_dir || './o'; end
9
- def out_dir=(value) @out_dir = value; end
10
- end
11
-
12
- def convert() @convert ||= Convert.new; end
13
- end # class Configuration
14
-
15
-
16
- end # module Worldfootball
@@ -1,100 +0,0 @@
1
-
2
- module Worldfootball
3
-
4
-
5
-
6
- def self.convert( league:, season:, offset: nil ) ## check: rename (optional) offset to time_offset or such?
7
- season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
8
-
9
- league = find_league( league )
10
-
11
- pages = league.pages( season: season )
12
-
13
- # note: assume stages if pages is an array (of hash table/records)
14
- # (and NOT a single hash table/record)
15
- if pages.is_a?(Array)
16
- recs = []
17
- pages.each do |page_meta|
18
- slug = page_meta[:slug]
19
- stage_name = page_meta[:stage]
20
- ## todo/fix: report error/check if stage.name is nil!!!
21
-
22
- print " parsing #{slug}..."
23
-
24
- # unless File.exist?( path )
25
- # puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
26
- # next
27
- # end
28
-
29
- page = Page::Schedule.from_cache( slug )
30
- print " title=>#{page.title}<..."
31
- print "\n"
32
-
33
- rows = page.matches
34
- stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
35
-
36
- pp stage_recs[0] ## check first record
37
- recs += stage_recs
38
- end
39
- else
40
- page_meta = pages
41
- slug = page_meta[:slug]
42
-
43
- print " parsing #{slug}..."
44
-
45
- page = Page::Schedule.from_cache( slug )
46
- print " title=>#{page.title}<..."
47
- print "\n"
48
-
49
- rows = page.matches
50
- recs = build( rows, season: season, league: league.key )
51
-
52
- pp recs[0] ## check first record
53
- end
54
-
55
- recs = recs.map { |rec| fix_date( rec, offset ) } if offset
56
-
57
- ## note: sort matches by date before saving/writing!!!!
58
- ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
59
- ## note: assume date is third column!!! (stage/round/date/...)
60
- recs = recs.sort { |l,r| l[2] <=> r[2] }
61
- ## reformat date / beautify e.g. Sat Aug 7 1993
62
- recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
63
-
64
- ## remove unused columns (e.g. stage, et, p, etc.)
65
- recs, headers = vacuum( recs )
66
-
67
- puts headers
68
- pp recs[0] ## check first record
69
-
70
- out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
71
-
72
- puts "write #{out_path}..."
73
- Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
74
- end
75
-
76
-
77
-
78
- ## helper to fix dates to use local timezone (and not utc/london time)
79
- def self.fix_date( row, offset )
80
- return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
81
-
82
- col = row[2]
83
- if col =~ /^\d{4}-\d{2}-\d{2}$/
84
- date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
85
- else
86
- puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
87
- ## todo/fix: add to errors/warns list - why? why not?
88
- exit 1
89
- end
90
-
91
- date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
92
- ## NOTE - MUST be -7/24.0!!!! or such to work
93
- date = date + (offset/24.0)
94
-
95
- row[2] = date.strftime( date_fmt ) ## overwrite "old"
96
- row[3] = date.strftime( '%H:%M' )
97
- row ## return row for possible pipelining - why? why not?
98
- end
99
-
100
- end # module Worldfootball