football-sources 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +6 -4
- data/Manifest.txt +2 -20
- data/README.md +30 -169
- data/Rakefile +36 -31
- data/bin/fbgen +131 -0
- data/lib/football/sources.rb +6 -6
- data/lib/football-sources/process.rb +68 -0
- data/lib/football-sources/version.rb +19 -19
- data/lib/football-sources.rb +21 -57
- metadata +51 -40
- data/lib/football-sources/apis/config.rb +0 -17
- data/lib/football-sources/apis/convert.rb +0 -239
- data/lib/football-sources/apis/convert_cl.rb +0 -267
- data/lib/football-sources/apis/mods.rb +0 -20
- data/lib/football-sources/apis/stat.rb +0 -59
- data/lib/football-sources/apis.rb +0 -10
- data/lib/football-sources/fbref/build.rb +0 -96
- data/lib/football-sources/fbref/config.rb +0 -16
- data/lib/football-sources/fbref/convert.rb +0 -95
- data/lib/football-sources/fbref.rb +0 -4
- data/lib/football-sources/worldfootball/build.rb +0 -245
- data/lib/football-sources/worldfootball/config.rb +0 -16
- data/lib/football-sources/worldfootball/convert.rb +0 -100
- data/lib/football-sources/worldfootball/convert_reports.rb +0 -107
- data/lib/football-sources/worldfootball/jobs.rb +0 -76
- data/lib/football-sources/worldfootball/mods.rb +0 -72
- data/lib/football-sources/worldfootball/vacuum.rb +0 -66
- data/lib/football-sources/worldfootball.rb +0 -19
- data/test/helper.rb +0 -8
- data/test/test_version.rb +0 -16
@@ -1,59 +0,0 @@
|
|
1
|
-
module Footballdata
|
2
|
-
|
3
|
-
class Stat ## rename to match stat or something why? why not?
|
4
|
-
def initialize
|
5
|
-
@data = {}
|
6
|
-
end
|
7
|
-
|
8
|
-
def [](key) @data[ key ]; end
|
9
|
-
|
10
|
-
def update( match )
|
11
|
-
## keep track of some statistics
|
12
|
-
stat = @data[:all] ||= { stage: Hash.new( 0 ),
|
13
|
-
duration: Hash.new( 0 ),
|
14
|
-
status: Hash.new( 0 ),
|
15
|
-
group: Hash.new( 0 ),
|
16
|
-
matchday: Hash.new( 0 ),
|
17
|
-
|
18
|
-
matches: 0,
|
19
|
-
goals: 0,
|
20
|
-
}
|
21
|
-
|
22
|
-
stat[:stage][ match['stage'] ] += 1
|
23
|
-
stat[:group][ match['group'] ] += 1
|
24
|
-
stat[:status][ match['status'] ] += 1
|
25
|
-
stat[:matchday][ match['matchday'] ] += 1
|
26
|
-
|
27
|
-
score = match['score']
|
28
|
-
|
29
|
-
stat[:duration][ score['duration'] ] += 1 ## track - assert always REGULAR
|
30
|
-
|
31
|
-
stat[:matches] += 1
|
32
|
-
stat[:goals] += score['fullTime']['homeTeam'].to_i if score['fullTime']['homeTeam']
|
33
|
-
stat[:goals] += score['fullTime']['awayTeam'].to_i if score['fullTime']['awayTeam']
|
34
|
-
|
35
|
-
|
36
|
-
stage_key = match['stage'].downcase.to_sym # e.g. :regular_season
|
37
|
-
stat = @data[ stage_key ] ||= { duration: Hash.new( 0 ),
|
38
|
-
status: Hash.new( 0 ),
|
39
|
-
group: Hash.new( 0 ),
|
40
|
-
matchday: Hash.new( 0 ),
|
41
|
-
|
42
|
-
matches: 0,
|
43
|
-
goals: 0,
|
44
|
-
}
|
45
|
-
stat[:group][ match['group'] ] += 1
|
46
|
-
stat[:status][ match['status'] ] += 1
|
47
|
-
stat[:matchday][ match['matchday'] ] += 1
|
48
|
-
|
49
|
-
stat[:duration][ score['duration'] ] += 1 ## track - assert always REGULAR
|
50
|
-
|
51
|
-
stat[:matches] += 1
|
52
|
-
stat[:goals] += score['fullTime']['homeTeam'].to_i if score['fullTime']['homeTeam']
|
53
|
-
stat[:goals] += score['fullTime']['awayTeam'].to_i if score['fullTime']['awayTeam']
|
54
|
-
end
|
55
|
-
end # class Stat
|
56
|
-
end # module Footballdata
|
57
|
-
|
58
|
-
|
59
|
-
|
@@ -1,96 +0,0 @@
|
|
1
|
-
|
2
|
-
module Fbref
|
3
|
-
|
4
|
-
def self.build( rows, league:, season: )
|
5
|
-
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
6
|
-
|
7
|
-
raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
|
8
|
-
|
9
|
-
print " #{rows.size} rows - build #{league} #{season}"
|
10
|
-
print "\n"
|
11
|
-
|
12
|
-
|
13
|
-
recs = []
|
14
|
-
rows.each do |row|
|
15
|
-
|
16
|
-
stage = row[:stage] || ''
|
17
|
-
|
18
|
-
## todo/check: assert that only matchweek or round can be present NOT both!!
|
19
|
-
round = if row[:matchweek] && row[:matchweek].size > 0
|
20
|
-
row[:matchweek]
|
21
|
-
elsif row[:round] && row[:round].size > 0
|
22
|
-
row[:round]
|
23
|
-
else
|
24
|
-
''
|
25
|
-
end
|
26
|
-
|
27
|
-
date_str = row[:date]
|
28
|
-
time_str = row[:time]
|
29
|
-
team1_str = row[:team1]
|
30
|
-
team2_str = row[:team2]
|
31
|
-
score_str = row[:score]
|
32
|
-
|
33
|
-
## convert date from string e.g. 2019-25-10
|
34
|
-
date = Date.strptime( date_str, '%Y-%m-%d' )
|
35
|
-
|
36
|
-
comments = row[:comments]
|
37
|
-
ht, ft, et, pen, comments = parse_score( score_str, comments )
|
38
|
-
|
39
|
-
|
40
|
-
venue_str = row[:venue]
|
41
|
-
attendance_str = row[:attendance]
|
42
|
-
|
43
|
-
|
44
|
-
recs << [stage,
|
45
|
-
round,
|
46
|
-
date.strftime( '%Y-%m-%d' ),
|
47
|
-
time_str,
|
48
|
-
team1_str,
|
49
|
-
ft,
|
50
|
-
ht,
|
51
|
-
team2_str,
|
52
|
-
et, # extra: incl. extra time
|
53
|
-
pen, # extra: incl. penalties
|
54
|
-
venue_str,
|
55
|
-
attendance_str,
|
56
|
-
comments]
|
57
|
-
end
|
58
|
-
|
59
|
-
recs
|
60
|
-
end
|
61
|
-
|
62
|
-
|
63
|
-
def self.parse_score( score_str, comments )
|
64
|
-
|
65
|
-
## split score
|
66
|
-
ft = ''
|
67
|
-
ht = ''
|
68
|
-
et = ''
|
69
|
-
pen = ''
|
70
|
-
|
71
|
-
if score_str.size > 0
|
72
|
-
## note: replace unicode "fancy" dash with ascii-dash
|
73
|
-
# check other columns too - possible in teams?
|
74
|
-
score_str = score_str.gsub( /[–]/, '-' ).strip
|
75
|
-
|
76
|
-
if score_str =~ /^\(([0-9]+)\)
|
77
|
-
[ ]+ ([0-9]+) - ([0-9+]) [ ]+
|
78
|
-
\(([0-9]+)\)$/x
|
79
|
-
ft = '?'
|
80
|
-
et = "#{$2}-#{$3}"
|
81
|
-
pen = "#{$1}-#{$4}"
|
82
|
-
else ## assume "regular" score e.g. 0-0
|
83
|
-
## check if notes include extra time otherwise assume regular time
|
84
|
-
if comments =~ /extra time/i
|
85
|
-
ft = '?'
|
86
|
-
et = score_str
|
87
|
-
else
|
88
|
-
ft = score_str
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
[ht, ft, et, pen, comments]
|
94
|
-
end
|
95
|
-
|
96
|
-
end # module Fbref
|
@@ -1,16 +0,0 @@
|
|
1
|
-
module Fbref
|
2
|
-
|
3
|
-
### add some more config options / settings
|
4
|
-
class Configuration
|
5
|
-
#########
|
6
|
-
## nested configuration classes - use - why? why not?
|
7
|
-
class Convert
|
8
|
-
def out_dir() @out_dir || './o'; end
|
9
|
-
def out_dir=(value) @out_dir = value; end
|
10
|
-
end
|
11
|
-
|
12
|
-
def convert() @convert ||= Convert.new; end
|
13
|
-
end # class Configuration
|
14
|
-
|
15
|
-
|
16
|
-
end # module Fbref
|
@@ -1,95 +0,0 @@
|
|
1
|
-
module Fbref
|
2
|
-
|
3
|
-
def self.convert( league:, season: )
|
4
|
-
page = Page::Schedule.from_cache( league: league,
|
5
|
-
season: season )
|
6
|
-
|
7
|
-
puts page.title
|
8
|
-
|
9
|
-
rows = page.matches
|
10
|
-
recs = build( rows, league: league, season: season )
|
11
|
-
## pp rows
|
12
|
-
|
13
|
-
## reformat date / beautify e.g. Sat Aug 7 1993
|
14
|
-
recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
|
15
|
-
|
16
|
-
recs, headers = vacuum( recs )
|
17
|
-
pp recs[0..2]
|
18
|
-
|
19
|
-
season = Season.parse( season )
|
20
|
-
path = "#{config.convert.out_dir}/#{league}_#{season.to_path}.csv"
|
21
|
-
puts "write #{path}..."
|
22
|
-
Cache::CsvMatchWriter.write( path, recs, headers: headers )
|
23
|
-
end
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
#####
|
29
|
-
# vacuum helper stuff - todo/fix - (re)use - make more generic - why? why not?
|
30
|
-
|
31
|
-
MAX_HEADERS = [
|
32
|
-
'Stage',
|
33
|
-
'Round',
|
34
|
-
'Date',
|
35
|
-
'Time',
|
36
|
-
'Team 1',
|
37
|
-
'FT',
|
38
|
-
'HT',
|
39
|
-
'Team 2',
|
40
|
-
'ET',
|
41
|
-
'P',
|
42
|
-
'Venue',
|
43
|
-
'Att',
|
44
|
-
'Comments', ## e.g. awarded, cancelled/canceled, etc.
|
45
|
-
]
|
46
|
-
|
47
|
-
MIN_HEADERS = [ ## always keep even if all empty
|
48
|
-
'Date',
|
49
|
-
'Team 1',
|
50
|
-
'FT',
|
51
|
-
'Team 2'
|
52
|
-
]
|
53
|
-
|
54
|
-
def self.vacuum( rows, headers: MAX_HEADERS, fixed_headers: MIN_HEADERS )
|
55
|
-
## check for unused columns and strip/remove
|
56
|
-
counter = Array.new( MAX_HEADERS.size, 0 )
|
57
|
-
rows.each do |row|
|
58
|
-
row.each_with_index do |col, idx|
|
59
|
-
counter[idx] += 1 unless col.nil? || col.empty?
|
60
|
-
end
|
61
|
-
end
|
62
|
-
|
63
|
-
pp counter
|
64
|
-
|
65
|
-
## check empty columns
|
66
|
-
headers = []
|
67
|
-
indices = []
|
68
|
-
empty_headers = []
|
69
|
-
empty_indices = []
|
70
|
-
|
71
|
-
counter.each_with_index do |num, idx|
|
72
|
-
header = MAX_HEADERS[ idx ]
|
73
|
-
if num > 0 || (num == 0 && fixed_headers.include?( header ))
|
74
|
-
headers << header
|
75
|
-
indices << idx
|
76
|
-
else
|
77
|
-
empty_headers << header
|
78
|
-
empty_indices << idx
|
79
|
-
end
|
80
|
-
end
|
81
|
-
|
82
|
-
if empty_indices.size > 0
|
83
|
-
rows = rows.map do |row|
|
84
|
-
row_vacuumed = []
|
85
|
-
row.each_with_index do |col, idx|
|
86
|
-
## todo/fix: use values or such??
|
87
|
-
row_vacuumed << col unless empty_indices.include?( idx )
|
88
|
-
end
|
89
|
-
row_vacuumed
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
[rows, headers]
|
94
|
-
end
|
95
|
-
end # module Fbref
|
@@ -1,245 +0,0 @@
|
|
1
|
-
|
2
|
-
module Worldfootball
|
3
|
-
|
4
|
-
|
5
|
-
ROUND_TO_EN = {
|
6
|
-
'1. Runde' => 'Round 1',
|
7
|
-
'2. Runde' => 'Round 2',
|
8
|
-
'3. Runde' => 'Round 3',
|
9
|
-
'4. Runde' => 'Round 4',
|
10
|
-
'Achtelfinale' => 'Round of 16',
|
11
|
-
'Viertelfinale' => 'Quarterfinals',
|
12
|
-
'Halbfinale' => 'Semifinals',
|
13
|
-
'Finale' => 'Final',
|
14
|
-
}
|
15
|
-
|
16
|
-
|
17
|
-
## todo/check: english league cup/trophy has NO ET - also support - make more flexible!!!
|
18
|
-
|
19
|
-
## build "standard" match records from "raw" table rows
|
20
|
-
def self.build( rows, season:, league:, stage: '' ) ## rename to fixup or such - why? why not?
|
21
|
-
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
22
|
-
|
23
|
-
raise ArgumentError, "league key as string expected" unless league.is_a?(String) ## note: do NOT pass in league struct! pass in key (string)
|
24
|
-
|
25
|
-
print " #{rows.size} rows - build #{league} #{season}"
|
26
|
-
print " - #{stage}" unless stage.empty?
|
27
|
-
print "\n"
|
28
|
-
|
29
|
-
|
30
|
-
## note: use only first part from key for lookup
|
31
|
-
## e.g. at.1 => at
|
32
|
-
## eng.1 => eng
|
33
|
-
## and so on
|
34
|
-
mods = MODS[ league.split('.')[0] ] || {}
|
35
|
-
|
36
|
-
score_errors = SCORE_ERRORS[ league ] || {}
|
37
|
-
|
38
|
-
|
39
|
-
i = 0
|
40
|
-
recs = []
|
41
|
-
rows.each do |row|
|
42
|
-
i += 1
|
43
|
-
|
44
|
-
|
45
|
-
if row[:round] =~ /Spieltag/
|
46
|
-
puts
|
47
|
-
print '[%03d] ' % (i+1)
|
48
|
-
print row[:round]
|
49
|
-
|
50
|
-
if m = row[:round].match( /([0-9]+)\. Spieltag/ )
|
51
|
-
## todo/check: always use a string even if number (as a string eg. '1' etc.)
|
52
|
-
round = m[1] ## note: keep as string (NOT number)
|
53
|
-
print " => #{round}"
|
54
|
-
else
|
55
|
-
puts "!! ERROR: cannot find matchday number"
|
56
|
-
exit 1
|
57
|
-
end
|
58
|
-
print "\n"
|
59
|
-
elsif row[:round] =~ /[1-9]\.[ ]Runde|
|
60
|
-
Achtelfinale|
|
61
|
-
Viertelfinale|
|
62
|
-
Halbfinale|
|
63
|
-
Finale
|
64
|
-
/x
|
65
|
-
puts
|
66
|
-
print '[%03d] ' % (i+1)
|
67
|
-
print row[:round]
|
68
|
-
|
69
|
-
|
70
|
-
## do NOT translate rounds (to english) - keep in german / deutsch (de)
|
71
|
-
if ['at.cup', 'at.1', ## at.1 - incl. europa league playoff
|
72
|
-
'de.cup'].include?( league )
|
73
|
-
round = row[:round]
|
74
|
-
else
|
75
|
-
round = ROUND_TO_EN[ row[:round] ]
|
76
|
-
if round.nil?
|
77
|
-
puts "!! ERROR: no mapping for round to english (en) found >#{row[:round]}<:"
|
78
|
-
pp row
|
79
|
-
exit 1
|
80
|
-
end
|
81
|
-
print " => #{round}"
|
82
|
-
end
|
83
|
-
print "\n"
|
84
|
-
else
|
85
|
-
puts "!! ERROR: unknown round >#{row[:round]}< for league >#{league}<:"
|
86
|
-
pp row
|
87
|
-
exit 1
|
88
|
-
end
|
89
|
-
|
90
|
-
|
91
|
-
date_str = row[:date]
|
92
|
-
time_str = row[:time]
|
93
|
-
team1_str = row[:team1]
|
94
|
-
team2_str = row[:team2]
|
95
|
-
score_str = row[:score]
|
96
|
-
|
97
|
-
## convert date from string e.g. 2019-25-10
|
98
|
-
date = Date.strptime( date_str, '%Y-%m-%d' )
|
99
|
-
|
100
|
-
|
101
|
-
### check for score_error; first (step 1) lookup by date
|
102
|
-
score_error = score_errors[ date.strftime('%Y-%m-%d') ]
|
103
|
-
if score_error
|
104
|
-
if team1_str == score_error[0] &&
|
105
|
-
team2_str == score_error[1]
|
106
|
-
## check if team names match too; if yes, apply fix/patch!!
|
107
|
-
if score_str != score_error[2][0]
|
108
|
-
puts "!! WARN - score fix changed? - expected #{score_error[2][0]}, got #{score_str} - fixing to #{score_error[2][1]}"
|
109
|
-
pp row
|
110
|
-
end
|
111
|
-
puts "FIX - applying score error fix - from #{score_error[2][0]} to => #{score_error[2][1]}"
|
112
|
-
score_str = score_error[2][1]
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
|
117
|
-
print '[%03d] ' % (i+1)
|
118
|
-
print "%-10s | " % date_str
|
119
|
-
print "%-5s | " % time_str
|
120
|
-
print "%-22s | " % team1_str
|
121
|
-
print "%-22s | " % team2_str
|
122
|
-
print score_str
|
123
|
-
print "\n"
|
124
|
-
|
125
|
-
|
126
|
-
## check for 0:3 Wert. - change Wert. to awd. (awarded)
|
127
|
-
score_str = score_str.sub( /Wert\./i, 'awd.' )
|
128
|
-
|
129
|
-
## clean team name (e.g. remove (old))
|
130
|
-
## and asciify (e.g. ’ to ' )
|
131
|
-
team1_str = norm_team( team1_str )
|
132
|
-
team2_str = norm_team( team2_str )
|
133
|
-
|
134
|
-
team1_str = mods[ team1_str ] if mods[ team1_str ]
|
135
|
-
team2_str = mods[ team2_str ] if mods[ team2_str ]
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
ht, ft, et, pen, comments = parse_score( score_str )
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
recs << [stage,
|
145
|
-
round,
|
146
|
-
date.strftime( '%Y-%m-%d' ),
|
147
|
-
time_str,
|
148
|
-
team1_str,
|
149
|
-
ft,
|
150
|
-
ht,
|
151
|
-
team2_str,
|
152
|
-
et, # extra: incl. extra time
|
153
|
-
pen, # extra: incl. penalties
|
154
|
-
comments]
|
155
|
-
end # each row
|
156
|
-
recs
|
157
|
-
end # build
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
def self.parse_score( score_str )
|
162
|
-
comments = String.new( '' ) ## check - rename to/use status or such - why? why not?
|
163
|
-
|
164
|
-
## split score
|
165
|
-
ft = ''
|
166
|
-
ht = ''
|
167
|
-
et = ''
|
168
|
-
pen = ''
|
169
|
-
if score_str == '---' ## in the future (no score yet) - was -:-
|
170
|
-
ft = ''
|
171
|
-
ht = ''
|
172
|
-
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
173
|
-
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
174
|
-
score_str == 'annull.' ## todo/check: change to some other status (see ie 2012) ????
|
175
|
-
ft = '(*)'
|
176
|
-
ht = ''
|
177
|
-
comments = 'cancelled'
|
178
|
-
elsif score_str == 'abgebr.' ## abandoned -- waiting for replay?
|
179
|
-
ft = '(*)'
|
180
|
-
ht = ''
|
181
|
-
comments = 'abandoned'
|
182
|
-
elsif score_str == 'verl.' ## postponed
|
183
|
-
ft = ''
|
184
|
-
ht = ''
|
185
|
-
comments = 'postponed'
|
186
|
-
# 5-4 (0-0, 1-1, 2-2) i.E.
|
187
|
-
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
188
|
-
[ ]*
|
189
|
-
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
190
|
-
[ ]*,[ ]*
|
191
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
192
|
-
[ ]*,[ ]*
|
193
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)\)
|
194
|
-
[ ]*
|
195
|
-
i\.E\.
|
196
|
-
/x
|
197
|
-
pen = "#{$1}-#{$2}"
|
198
|
-
ht = "#{$3}-#{$4}"
|
199
|
-
ft = "#{$5}-#{$6}"
|
200
|
-
et = "#{$7}-#{$8}"
|
201
|
-
# 2-1 (1-0, 1-1) n.V
|
202
|
-
elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
|
203
|
-
[ ]*
|
204
|
-
\(([0-9]+) [ ]*-[ ]* ([0-9]+)
|
205
|
-
[ ]*,[ ]*
|
206
|
-
([0-9]+) [ ]*-[ ]* ([0-9]+)
|
207
|
-
\)
|
208
|
-
[ ]*
|
209
|
-
n\.V\.
|
210
|
-
/x
|
211
|
-
et = "#{$1}-#{$2}"
|
212
|
-
ht = "#{$3}-#{$4}"
|
213
|
-
ft = "#{$5}-#{$6}"
|
214
|
-
elsif score_str =~ /([0-9]+)
|
215
|
-
[ ]*-[ ]*
|
216
|
-
([0-9]+)
|
217
|
-
[ ]*
|
218
|
-
\(([0-9]+)
|
219
|
-
[ ]*-[ ]*
|
220
|
-
([0-9]+)
|
221
|
-
\)
|
222
|
-
/x
|
223
|
-
ft = "#{$1}-#{$2}"
|
224
|
-
ht = "#{$3}-#{$4}"
|
225
|
-
elsif score_str =~ /([0-9]+)
|
226
|
-
[ ]*-[ ]*
|
227
|
-
([0-9]+)
|
228
|
-
[ ]*
|
229
|
-
([a-z.]+)
|
230
|
-
/x
|
231
|
-
ft = "#{$1}-#{$2} (*)"
|
232
|
-
ht = ''
|
233
|
-
comments = $3
|
234
|
-
elsif score_str =~ /^([0-9]+)-([0-9]+)$/
|
235
|
-
ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
|
236
|
-
ht = ''
|
237
|
-
else
|
238
|
-
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
239
|
-
exit 1
|
240
|
-
end
|
241
|
-
|
242
|
-
[ht, ft, et, pen, comments]
|
243
|
-
end
|
244
|
-
|
245
|
-
end # module Worldfootball
|
@@ -1,16 +0,0 @@
|
|
1
|
-
module Worldfootball
|
2
|
-
|
3
|
-
### add some more config options / settings
|
4
|
-
class Configuration
|
5
|
-
#########
|
6
|
-
## nested configuration classes - use - why? why not?
|
7
|
-
class Convert
|
8
|
-
def out_dir() @out_dir || './o'; end
|
9
|
-
def out_dir=(value) @out_dir = value; end
|
10
|
-
end
|
11
|
-
|
12
|
-
def convert() @convert ||= Convert.new; end
|
13
|
-
end # class Configuration
|
14
|
-
|
15
|
-
|
16
|
-
end # module Worldfootball
|
@@ -1,100 +0,0 @@
|
|
1
|
-
|
2
|
-
module Worldfootball
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
def self.convert( league:, season:, offset: nil ) ## check: rename (optional) offset to time_offset or such?
|
7
|
-
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
8
|
-
|
9
|
-
league = find_league( league )
|
10
|
-
|
11
|
-
pages = league.pages( season: season )
|
12
|
-
|
13
|
-
# note: assume stages if pages is an array (of hash table/records)
|
14
|
-
# (and NOT a single hash table/record)
|
15
|
-
if pages.is_a?(Array)
|
16
|
-
recs = []
|
17
|
-
pages.each do |page_meta|
|
18
|
-
slug = page_meta[:slug]
|
19
|
-
stage_name = page_meta[:stage]
|
20
|
-
## todo/fix: report error/check if stage.name is nil!!!
|
21
|
-
|
22
|
-
print " parsing #{slug}..."
|
23
|
-
|
24
|
-
# unless File.exist?( path )
|
25
|
-
# puts "!! WARN - missing stage >#{stage_name}< source - >#{path}<"
|
26
|
-
# next
|
27
|
-
# end
|
28
|
-
|
29
|
-
page = Page::Schedule.from_cache( slug )
|
30
|
-
print " title=>#{page.title}<..."
|
31
|
-
print "\n"
|
32
|
-
|
33
|
-
rows = page.matches
|
34
|
-
stage_recs = build( rows, season: season, league: league.key, stage: stage_name )
|
35
|
-
|
36
|
-
pp stage_recs[0] ## check first record
|
37
|
-
recs += stage_recs
|
38
|
-
end
|
39
|
-
else
|
40
|
-
page_meta = pages
|
41
|
-
slug = page_meta[:slug]
|
42
|
-
|
43
|
-
print " parsing #{slug}..."
|
44
|
-
|
45
|
-
page = Page::Schedule.from_cache( slug )
|
46
|
-
print " title=>#{page.title}<..."
|
47
|
-
print "\n"
|
48
|
-
|
49
|
-
rows = page.matches
|
50
|
-
recs = build( rows, season: season, league: league.key )
|
51
|
-
|
52
|
-
pp recs[0] ## check first record
|
53
|
-
end
|
54
|
-
|
55
|
-
recs = recs.map { |rec| fix_date( rec, offset ) } if offset
|
56
|
-
|
57
|
-
## note: sort matches by date before saving/writing!!!!
|
58
|
-
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
59
|
-
## note: assume date is third column!!! (stage/round/date/...)
|
60
|
-
recs = recs.sort { |l,r| l[2] <=> r[2] }
|
61
|
-
## reformat date / beautify e.g. Sat Aug 7 1993
|
62
|
-
recs.each { |rec| rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' ) }
|
63
|
-
|
64
|
-
## remove unused columns (e.g. stage, et, p, etc.)
|
65
|
-
recs, headers = vacuum( recs )
|
66
|
-
|
67
|
-
puts headers
|
68
|
-
pp recs[0] ## check first record
|
69
|
-
|
70
|
-
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
71
|
-
|
72
|
-
puts "write #{out_path}..."
|
73
|
-
Cache::CsvMatchWriter.write( out_path, recs, headers: headers )
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
## helper to fix dates to use local timezone (and not utc/london time)
|
79
|
-
def self.fix_date( row, offset )
|
80
|
-
return row if row[3].nil? || row[3].empty? ## note: time (column) required for fix
|
81
|
-
|
82
|
-
col = row[2]
|
83
|
-
if col =~ /^\d{4}-\d{2}-\d{2}$/
|
84
|
-
date_fmt = '%Y-%m-%d' # e.g. 2002-08-17
|
85
|
-
else
|
86
|
-
puts "!!! ERROR - wrong (unknown) date format >>#{col}<<; cannot continue; fix it; sorry"
|
87
|
-
## todo/fix: add to errors/warns list - why? why not?
|
88
|
-
exit 1
|
89
|
-
end
|
90
|
-
|
91
|
-
date = DateTime.strptime( "#{row[2]} #{row[3]}", "#{date_fmt} %H:%M" )
|
92
|
-
## NOTE - MUST be -7/24.0!!!! or such to work
|
93
|
-
date = date + (offset/24.0)
|
94
|
-
|
95
|
-
row[2] = date.strftime( date_fmt ) ## overwrite "old"
|
96
|
-
row[3] = date.strftime( '%H:%M' )
|
97
|
-
row ## return row for possible pipelining - why? why not?
|
98
|
-
end
|
99
|
-
|
100
|
-
end # module Worldfootball
|