worldfootball 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -2
- data/Manifest.txt +5 -0
- data/README.md +43 -1
- data/bin/wfb +14 -33
- data/bin/wfbconf +55 -0
- data/bin/wfbconv +100 -0
- data/bin/wfbdump +76 -0
- data/bin/wfbgen +102 -0
- data/bin/wfbup +101 -0
- data/config/leagues/america.csv +10 -3
- data/config/leagues/europe.csv +5 -2
- data/config/rounds.csv +6 -0
- data/config/stages.csv +16 -32
- data/lib/worldfootball/build-parse_score.rb +8 -3
- data/lib/worldfootball/convert.rb +52 -17
- data/lib/worldfootball/download.rb +6 -2
- data/lib/worldfootball/leagues.rb +16 -3
- data/lib/worldfootball/mods.rb +28 -3
- data/lib/worldfootball/page_schedule.rb +38 -15
- data/lib/worldfootball/version.rb +1 -1
- data/lib/worldfootball.rb +62 -0
- metadata +15 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53f1c44ded1d0dcec880df13b61a77265febcceacd88acb5ce883db39914cce9
|
4
|
+
data.tar.gz: b56eb75f1277026f77c29ee8f4d51788978955accbac5812659d1f3597e9747f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 501b137138a280f74d1cbd50c8f9ebc44ec8f1b7c7c9268005deb2d3a04b2d0046d05bf5a5ff7b3fb7c6f9f7b938423cdd83bb3b983d020bfb26057f2ed463c9
|
7
|
+
data.tar.gz: 84d716bcbffb32c11da5fd3a6b549a115ecbea0b4747182d84e3e7280cf3ff2de37ee57d37b20288d43f57374727fa907b256ed745b04d8b32b44ae17809d8ee
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -39,14 +39,56 @@ and so on.
|
|
39
39
|
|
40
40
|
|
41
41
|
|
42
|
-
### More
|
42
|
+
### More Command-Line Tools
|
43
43
|
|
44
|
+
<!--
|
44
45
|
Debugging tips & tricks. List all cached (offline local) match schedule pages:
|
45
46
|
|
46
47
|
```
|
47
48
|
$ wfb cache
|
48
49
|
```
|
50
|
+
-->
|
49
51
|
|
52
|
+
wfbup -
|
53
|
+
wfbconv -
|
54
|
+
wfbgen -
|
55
|
+
wfbconf -
|
56
|
+
wfbdump
|
57
|
+
|
58
|
+
|
59
|
+
#### wfbup - download leagues (if no league passed in, download all!)
|
60
|
+
|
61
|
+
```
|
62
|
+
$ wfbup # download ALL leagues and ALL seasons (uses all built-in configs)
|
63
|
+
$ wfbup at.1 # download ALL seasons for league
|
64
|
+
```
|
65
|
+
|
66
|
+
#### wfbconv - convert (to .csv) leagues (if no league passed in, converts all!)
|
67
|
+
|
68
|
+
```
|
69
|
+
$ wfbconv # convert ALL leagues and ALL seasons (uses all built-in configs)
|
70
|
+
$ wfbconv at.1 # convert ALL seasons for league
|
71
|
+
```
|
72
|
+
|
73
|
+
#### wfbgen - generate (.txt) leagues (if no league passed in, generate all!)
|
74
|
+
|
75
|
+
```
|
76
|
+
$ wfbgen # generate ALL leagues and ALL seasons (uses all built-in configs)
|
77
|
+
$ wfbgen at.1 # generate ALL seasons for league
|
78
|
+
```
|
79
|
+
|
80
|
+
|
81
|
+
#### wfbconf - check built-in config(uration) for league
|
82
|
+
|
83
|
+
```
|
84
|
+
$ wfbconf eng.1
|
85
|
+
```
|
86
|
+
|
87
|
+
#### wfbdump - dump (page) slug incl. matches, teams, rounds & more
|
88
|
+
|
89
|
+
```
|
90
|
+
$ wfbdump aut-bundesliga-2024-2025
|
91
|
+
```
|
50
92
|
|
51
93
|
|
52
94
|
|
data/bin/wfb
CHANGED
@@ -4,8 +4,12 @@
|
|
4
4
|
## ruby -I ./lib bin/wfb
|
5
5
|
## or
|
6
6
|
## ruby -I wfb/lib wfb/bin/wfb
|
7
|
+
## or
|
8
|
+
## ruby -I wfb/lib wfb/bin/wfb -f max.csv --cached
|
7
9
|
|
8
10
|
|
11
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
12
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
9
13
|
require 'worldfootball'
|
10
14
|
|
11
15
|
|
@@ -25,7 +29,7 @@ Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
|
25
29
|
end
|
26
30
|
|
27
31
|
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
28
|
-
puts " setting
|
32
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
29
33
|
'/sports/cache.wfb.txt'
|
30
34
|
else
|
31
35
|
'./tmp' ## use tmp in working dir
|
@@ -35,6 +39,7 @@ Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
|
35
39
|
require 'optparse'
|
36
40
|
|
37
41
|
|
42
|
+
|
38
43
|
module Worldfootball
|
39
44
|
def self.main( args=ARGV )
|
40
45
|
|
@@ -130,15 +135,14 @@ end
|
|
130
135
|
# assume leagues
|
131
136
|
|
132
137
|
datasets = if opts[:file]
|
133
|
-
|
138
|
+
read_leagueset( opts[:file] )
|
134
139
|
else
|
135
|
-
|
140
|
+
parse_leagueset_args( args )
|
136
141
|
end
|
137
142
|
|
138
143
|
|
139
144
|
## step 0 - validate and fill-up seasons etc.
|
140
|
-
datasets.each do |
|
141
|
-
league_key, seasons = dataset
|
145
|
+
datasets.each do |league_key, seasons|
|
142
146
|
|
143
147
|
league = find_league!( league_key ) ## league info lookup
|
144
148
|
|
@@ -150,8 +154,8 @@ datasets.each do |dataset|
|
|
150
154
|
## might be 2024/25 or 2024 or
|
151
155
|
# for world cup 2022 or such
|
152
156
|
if seasons.empty?
|
153
|
-
|
154
|
-
|
157
|
+
season = Season(league.seasons.keys[0])
|
158
|
+
seasons << season
|
155
159
|
end
|
156
160
|
end
|
157
161
|
|
@@ -206,36 +210,13 @@ if opts[:convert]
|
|
206
210
|
end
|
207
211
|
end
|
208
212
|
|
213
|
+
|
209
214
|
if opts[:generate]
|
210
215
|
## step 3 - generate
|
211
216
|
datasets.each do |league_key, seasons|
|
212
217
|
seasons.each do |season|
|
213
|
-
|
214
|
-
|
215
|
-
## get matches
|
216
|
-
puts " ---> reading matches in #{path} ..."
|
217
|
-
matches = SportDb::CsvMatchParser.read( path )
|
218
|
-
puts " #{matches.size} matches"
|
219
|
-
|
220
|
-
## build
|
221
|
-
txt = SportDb::TxtMatchWriter.build( matches )
|
222
|
-
puts txt
|
223
|
-
|
224
|
-
path = if season >= Season( '2000' )
|
225
|
-
"#{config.generate.out_dir}/#{season.to_path}/#{league_key}.txt"
|
226
|
-
else
|
227
|
-
decade = season.start_year - (season.start_year%10)
|
228
|
-
## use archive-style before 2000!!!
|
229
|
-
"#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league_key}.txt"
|
230
|
-
end
|
231
|
-
|
232
|
-
buf = String.new
|
233
|
-
## note - use league key for league name for now!!
|
234
|
-
buf << "= #{league_key.upcase.gsub('.', ' ')} #{season.key}\n\n"
|
235
|
-
buf << txt
|
236
|
-
|
237
|
-
puts " writing to >#{path}<..."
|
238
|
-
write_text( path, buf )
|
218
|
+
generate( league: league_key,
|
219
|
+
season: season )
|
239
220
|
end
|
240
221
|
end
|
241
222
|
## for debugging dump page (slug) titles
|
data/bin/wfbconf
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbconf
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbconf
|
7
|
+
|
8
|
+
####
|
9
|
+
# wfbconf - check built-in config(uration) for league
|
10
|
+
# e.g. wfbconf eng.1
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
13
|
+
require 'worldfootball'
|
14
|
+
|
15
|
+
|
16
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
17
|
+
puts " setting web cache to >/sports/cache<"
|
18
|
+
'/sports/cache'
|
19
|
+
else
|
20
|
+
'./cache'
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
require 'optparse'
|
26
|
+
|
27
|
+
##
|
28
|
+
# by default convert all with overwrite/force set to false
|
29
|
+
|
30
|
+
Webget.config.sleep = 2
|
31
|
+
|
32
|
+
args = ARGV
|
33
|
+
|
34
|
+
|
35
|
+
Worldfootball.debug = true
|
36
|
+
|
37
|
+
##
|
38
|
+
## download fresh copy?
|
39
|
+
## Worldfootball::Metal.download_schedule( slug )
|
40
|
+
|
41
|
+
key = args[0] || 'eng.1'
|
42
|
+
|
43
|
+
league = Worldfootball::LEAGUES[key]
|
44
|
+
seasons = league.seasons
|
45
|
+
pp seasons
|
46
|
+
|
47
|
+
puts " #{key} - #{seasons.size} season(s)"
|
48
|
+
|
49
|
+
puts
|
50
|
+
latest = seasons.keys[0]
|
51
|
+
puts " latest #{latest}: "
|
52
|
+
pp seasons[latest]
|
53
|
+
|
54
|
+
|
55
|
+
puts "bye"
|
data/bin/wfbconv
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbconv
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbconv
|
7
|
+
|
8
|
+
###
|
9
|
+
# wfbconv - convert (to .csv) all leagues (if no league passed in, converts all!)
|
10
|
+
#
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
14
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
15
|
+
require 'worldfootball'
|
16
|
+
|
17
|
+
|
18
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
19
|
+
puts " setting web cache to >/sports/cache<"
|
20
|
+
'/sports/cache'
|
21
|
+
else
|
22
|
+
'./cache'
|
23
|
+
end
|
24
|
+
|
25
|
+
## convert (default) output directory
|
26
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
27
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
28
|
+
'/sports/cache.wfb'
|
29
|
+
else
|
30
|
+
'./tmp' ## use tmp in working dir
|
31
|
+
end
|
32
|
+
|
33
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
34
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
35
|
+
'/sports/cache.wfb.txt'
|
36
|
+
else
|
37
|
+
'./tmp' ## use tmp in working dir
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
require 'optparse'
|
42
|
+
|
43
|
+
##
|
44
|
+
# by default convert all with overwrite/force set to false
|
45
|
+
|
46
|
+
Webget.config.sleep = 2
|
47
|
+
|
48
|
+
args = ARGV
|
49
|
+
|
50
|
+
opts = {
|
51
|
+
force: false, # a.k.a. overwrite
|
52
|
+
}
|
53
|
+
|
54
|
+
|
55
|
+
parser = OptionParser.new do |parser|
|
56
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
57
|
+
|
58
|
+
|
59
|
+
parser.on( "--force",
|
60
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
61
|
+
opts[:force] = true # true|false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
parser.parse!( args )
|
67
|
+
|
68
|
+
puts "OPTS:"
|
69
|
+
p opts
|
70
|
+
puts "ARGV:"
|
71
|
+
p args
|
72
|
+
|
73
|
+
|
74
|
+
keys = if args.size == 0
|
75
|
+
Worldfootball::LEAGUES.keys
|
76
|
+
else
|
77
|
+
args
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
keys.each_with_index do |key, i|
|
83
|
+
league = Worldfootball::LEAGUES[key]
|
84
|
+
seasons = league.seasons
|
85
|
+
|
86
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
87
|
+
|
88
|
+
seasons.each_with_index do |season_rec,j|
|
89
|
+
season = season_rec[0]
|
90
|
+
|
91
|
+
next if key == 'nl.cup' && season == '1959/60'
|
92
|
+
|
93
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
94
|
+
Worldfootball.convert( league: key, season: season,
|
95
|
+
overwrite: opts[:force] )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
puts "bye"
|
data/bin/wfbdump
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbdump
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbdump
|
7
|
+
|
8
|
+
|
9
|
+
#####
|
10
|
+
# wfbdump - dump matches, teams, rounds and more for (page) slug
|
11
|
+
# e.g.
|
12
|
+
# wfbdump caf-champions-league-2008
|
13
|
+
# wfbdump aut-bundesliga-2024-2025
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
17
|
+
require 'worldfootball'
|
18
|
+
|
19
|
+
|
20
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
21
|
+
puts " setting web cache to >/sports/cache<"
|
22
|
+
'/sports/cache'
|
23
|
+
else
|
24
|
+
'./cache'
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
require 'optparse'
|
30
|
+
|
31
|
+
##
|
32
|
+
# by default convert all with overwrite/force set to false
|
33
|
+
|
34
|
+
Webget.config.sleep = 2
|
35
|
+
|
36
|
+
args = ARGV
|
37
|
+
|
38
|
+
|
39
|
+
Worldfootball.debug = true
|
40
|
+
|
41
|
+
##
|
42
|
+
## download fresh copy?
|
43
|
+
## Worldfootball::Metal.download_schedule( slug )
|
44
|
+
|
45
|
+
slug = args[0] || 'caf-champions-league-2008'
|
46
|
+
|
47
|
+
page = Worldfootball::Page::Schedule.from_cache( slug )
|
48
|
+
|
49
|
+
matches = page.matches
|
50
|
+
teams = page.teams
|
51
|
+
rounds = page.rounds
|
52
|
+
|
53
|
+
puts " #{matches.size} match(es), #{teams.size} team(s), #{rounds.size} round(s)"
|
54
|
+
# pp matches
|
55
|
+
|
56
|
+
puts
|
57
|
+
puts " #{teams.size} team(s)"
|
58
|
+
pp teams
|
59
|
+
|
60
|
+
puts
|
61
|
+
puts " #{rounds.size} round(s)"
|
62
|
+
pp rounds
|
63
|
+
|
64
|
+
|
65
|
+
puts "==> page props"
|
66
|
+
print "title: "
|
67
|
+
puts page.title
|
68
|
+
puts
|
69
|
+
print "keywords: "
|
70
|
+
puts page.keywords
|
71
|
+
puts
|
72
|
+
print "url: "
|
73
|
+
puts page.url
|
74
|
+
|
75
|
+
|
76
|
+
puts "bye"
|
data/bin/wfbgen
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbgen
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbgen
|
7
|
+
|
8
|
+
###############
|
9
|
+
# wfbgen
|
10
|
+
#
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
14
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sportdb-writers/lib' )
|
15
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
16
|
+
|
17
|
+
require 'worldfootball'
|
18
|
+
|
19
|
+
|
20
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
21
|
+
puts " setting web cache to >/sports/cache<"
|
22
|
+
'/sports/cache'
|
23
|
+
else
|
24
|
+
'./cache'
|
25
|
+
end
|
26
|
+
|
27
|
+
## convert (default) output directory
|
28
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
29
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
30
|
+
'/sports/cache.wfb'
|
31
|
+
else
|
32
|
+
'./tmp' ## use tmp in working dir
|
33
|
+
end
|
34
|
+
|
35
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
36
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
37
|
+
'/sports/cache.wfb.txt'
|
38
|
+
else
|
39
|
+
'./tmp' ## use tmp in working dir
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
require 'optparse'
|
44
|
+
|
45
|
+
##
|
46
|
+
# by default convert all with overwrite/force set to false
|
47
|
+
|
48
|
+
Webget.config.sleep = 2
|
49
|
+
|
50
|
+
args = ARGV
|
51
|
+
|
52
|
+
opts = {
|
53
|
+
force: false, # a.k.a. overwrite
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
parser = OptionParser.new do |parser|
|
58
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
59
|
+
|
60
|
+
|
61
|
+
parser.on( "--force",
|
62
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
63
|
+
opts[:force] = true # true|false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
parser.parse!( args )
|
69
|
+
|
70
|
+
puts "OPTS:"
|
71
|
+
p opts
|
72
|
+
puts "ARGV:"
|
73
|
+
p args
|
74
|
+
|
75
|
+
|
76
|
+
keys = if args.size == 0
|
77
|
+
Worldfootball::LEAGUES.keys
|
78
|
+
else
|
79
|
+
args
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
keys.each_with_index do |key, i|
|
85
|
+
league = Worldfootball::LEAGUES[key]
|
86
|
+
seasons = league.seasons
|
87
|
+
|
88
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
89
|
+
|
90
|
+
seasons.each_with_index do |season_rec,j|
|
91
|
+
season = season_rec[0]
|
92
|
+
|
93
|
+
next if key == 'nl.cup' && season == '1959/60'
|
94
|
+
|
95
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
96
|
+
Worldfootball.generate( league: key, season: season,
|
97
|
+
overwrite: opts[:force] )
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
puts "bye"
|
data/bin/wfbup
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbup
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbup
|
7
|
+
|
8
|
+
|
9
|
+
##
|
10
|
+
## add offset for restart!!!!
|
11
|
+
## e.g. si.1 - maybe add season later!!!
|
12
|
+
## or better add a expired option e.g. 24h or such !!!!
|
13
|
+
## - stopping at si.1 2017/18...
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
17
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
18
|
+
require 'worldfootball'
|
19
|
+
|
20
|
+
|
21
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
22
|
+
puts " setting web cache to >/sports/cache<"
|
23
|
+
'/sports/cache'
|
24
|
+
else
|
25
|
+
'./cache'
|
26
|
+
end
|
27
|
+
|
28
|
+
## convert (default) output directory
|
29
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
30
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
31
|
+
'/sports/cache.wfb'
|
32
|
+
else
|
33
|
+
'./tmp' ## use tmp in working dir
|
34
|
+
end
|
35
|
+
|
36
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
37
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
38
|
+
'/sports/cache.wfb.txt'
|
39
|
+
else
|
40
|
+
'./tmp' ## use tmp in working dir
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
require 'optparse'
|
45
|
+
|
46
|
+
##
|
47
|
+
# by default convert all with overwrite/force set to false
|
48
|
+
|
49
|
+
Webget.config.sleep = 2
|
50
|
+
|
51
|
+
args = ARGV
|
52
|
+
|
53
|
+
opts = {
|
54
|
+
force: false, # a.k.a. overwrite
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
parser = OptionParser.new do |parser|
|
59
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
60
|
+
|
61
|
+
|
62
|
+
parser.on( "--force",
|
63
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
64
|
+
opts[:force] = true # true|false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
parser.parse!( args )
|
70
|
+
|
71
|
+
puts "OPTS:"
|
72
|
+
p opts
|
73
|
+
puts "ARGV:"
|
74
|
+
p args
|
75
|
+
|
76
|
+
|
77
|
+
keys = if args.size == 0
|
78
|
+
Worldfootball::LEAGUES.keys
|
79
|
+
else
|
80
|
+
args
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
keys.each_with_index do |key, i|
|
86
|
+
league = Worldfootball::LEAGUES[key]
|
87
|
+
seasons = league.seasons
|
88
|
+
|
89
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
90
|
+
|
91
|
+
seasons.each_with_index do |season_rec,j|
|
92
|
+
season = season_rec[0]
|
93
|
+
|
94
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
95
|
+
Worldfootball.schedule( league: key, season: season,
|
96
|
+
overwrite: opts[:force] )
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
puts "bye"
|
data/config/leagues/america.csv
CHANGED
@@ -23,12 +23,19 @@ ca.cup, can-canadian-championship-2024
|
|
23
23
|
# - Finale
|
24
24
|
# /mex-primera-division-2018-2019-clausura-playoffs/
|
25
25
|
mx.1, mex-primera-division-2024-2025-apertura_2
|
26
|
-
|
26
|
+
|
27
|
+
## merge expansion and ascenso into one (mx.2) - possible? why? why not?
|
28
|
+
|
29
|
+
## first expansion season -> 2020/21 Apertura
|
30
|
+
## last ascenso season -> 2019/20 Clausura
|
31
|
+
mx.2.expansion, mex-liga-de-expansion-2024-2025-apertura
|
32
|
+
mx.2.ascenso, mex-liga-de-ascenso-2019-2020-clausura
|
33
|
+
|
27
34
|
mx.cup, mex-copa-mx-2019-2020
|
28
35
|
|
29
36
|
|
30
|
-
|
31
|
-
mx.3,
|
37
|
+
mx.3.a, mex-lp-serie-a-2024-2025-apertura
|
38
|
+
mx.3.b, mex-lp-serie-b-2024-2025-apertura
|
32
39
|
|
33
40
|
|
34
41
|
|
data/config/leagues/europe.csv
CHANGED
@@ -33,7 +33,10 @@ hu.1, hun-nb-i-2024-2025
|
|
33
33
|
|
34
34
|
cz.1, cze-1-fotbalova-liga-2024-2025
|
35
35
|
cz.2, cze-2-fotbalova-liga-2024-2025
|
36
|
-
|
36
|
+
|
37
|
+
## commented out for now (three leagues in one - split !!)
|
38
|
+
## cz.3, cze-3-fotbalova-liga-2024-2025-cfl-a
|
39
|
+
|
37
40
|
|
38
41
|
sk.1, svk-super-liga-2024-2025
|
39
42
|
|
@@ -151,7 +154,7 @@ rs.1, srb-super-liga-2024-2025
|
|
151
154
|
si.1, svn-prvaliga-2024-2025
|
152
155
|
|
153
156
|
## todo/fix - change to kos.1 - why? why not?
|
154
|
-
xk.1, kos-superliga-2024-2025
|
157
|
+
## xk.1, kos-superliga-2024-2025
|
155
158
|
kos.1, kos-superliga-2024-2025
|
156
159
|
|
157
160
|
|
data/config/rounds.csv
CHANGED
data/config/stages.csv
CHANGED
@@ -1,51 +1,38 @@
|
|
1
1
|
key, name1, name2,
|
2
2
|
|
3
|
-
*, Meisterschaft,
|
4
|
-
*, Abstieg,
|
5
|
-
*, Relegation, Playoffs - Relegation
|
3
|
+
*, Meisterschaft, Championship
|
4
|
+
*, Abstieg, Relegation
|
6
5
|
|
6
|
+
## use Playoffs - Championship -- why? why not?
|
7
|
+
## use Playoffs - Relegation -- why? why not?
|
7
8
|
|
8
|
-
sco.1, Championship, Playoffs - Championship
|
9
|
-
sco.1, Relegation', Playoffs - Relegation
|
10
9
|
|
11
|
-
|
12
|
-
at.1,
|
13
|
-
at.1, Qualifikationsgruppe, Playoffs - Relegation
|
10
|
+
at.1, Meistergruppe, Championship
|
11
|
+
at.1, Qualifikationsgruppe, Relegation
|
14
12
|
at.1, Playoff, Europa League Finals
|
15
13
|
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sk.1, Meisterschaft, Playoffs - Championship
|
22
|
-
sk.1, Abstieg, Playoffs - Relegation
|
15
|
+
sk.1, Meisterschaft, Championship
|
16
|
+
sk.1, Abstieg, Relegation
|
23
17
|
sk.1, Europa League, Europa League Finals
|
24
18
|
|
25
19
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
ru.1, Meisterschaft, Playoffs - Championship
|
30
|
-
ru.1, Relegation, Playoffs - Relegation
|
31
|
-
|
32
|
-
|
33
|
-
ua.1, Meisterschaft, Playoffs - Championship
|
34
|
-
ua.1, Abstieg, Playoffs - Relegation
|
20
|
+
ua.1, Meisterschaft, Championship
|
21
|
+
ua.1, Abstieg, Relegation
|
35
22
|
ua.1, Playoffs EL, Europa League Finals
|
36
23
|
|
37
24
|
|
38
|
-
fi.1, Meisterschaft,
|
39
|
-
fi.1, Abstieg,
|
25
|
+
fi.1, Meisterschaft, Championship
|
26
|
+
fi.1, Abstieg, Relegation
|
40
27
|
fi.1, Playoff EL, Europa League Finals
|
41
28
|
|
42
|
-
dk.1, Meisterschaft,
|
43
|
-
dk.1, Abstieg,
|
29
|
+
dk.1, Meisterschaft, Championship
|
30
|
+
dk.1, Abstieg, Relegation
|
44
31
|
dk.1, Europa League, Europa League Finals
|
45
32
|
|
46
33
|
|
47
|
-
gr.1, Meisterschaft,
|
48
|
-
gr.1, Abstieg,
|
34
|
+
gr.1, Meisterschaft, Championship
|
35
|
+
gr.1, Abstieg, Relegation
|
49
36
|
gr.1, Playoffs, Playoffs
|
50
37
|
gr.1, Spiel um Platz 6, Match 6th Place
|
51
38
|
|
@@ -55,9 +42,6 @@ mx.1, Apertura Playoffs, Apertura - Liguilla
|
|
55
42
|
mx.1, Clausura Playoffs, Clausura - Liguilla
|
56
43
|
|
57
44
|
|
58
|
-
kr.1, Meisterschaft, Playoffs - Championship
|
59
|
-
kr.1, Abstieg, Playoffs - Relegation
|
60
|
-
|
61
45
|
|
62
46
|
nz.1, Playoffs, Playoff Finals
|
63
47
|
|
@@ -31,13 +31,18 @@ def self.parse_score( score_str )
|
|
31
31
|
et = ''
|
32
32
|
pen = ''
|
33
33
|
|
34
|
+
##
|
35
|
+
## [085] 2021-10-21 | 22:00 | Metropolitanos FC | LALA FC | Aufg.
|
36
|
+
## !! ERROR - unsupported score format >Aufg.< - sorry; maybe add a score error fix/patch
|
37
|
+
## - handle with Aufg.
|
34
38
|
|
35
39
|
if score_str == '---' ## in the future (no score yet) - was -:-
|
36
40
|
ft = ''
|
37
41
|
ht = ''
|
38
42
|
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
39
43
|
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
40
|
-
score_str == 'annull.'
|
44
|
+
score_str == 'annull.' || ## todo/check: change to some other status (see ie 2012) ????
|
45
|
+
score_str == 'Aufg.'
|
41
46
|
ft = '(*)'
|
42
47
|
ht = ''
|
43
48
|
comments = 'cancelled'
|
@@ -172,13 +177,13 @@ def self.parse_score( score_str )
|
|
172
177
|
puts "!! WARN - weird score n.V. only - >#{score_str}<"
|
173
178
|
elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
|
174
179
|
[ ]*
|
175
|
-
|
180
|
+
(?: i\.E\. | n\.P\. )
|
176
181
|
$/x
|
177
182
|
pen = "#{$1}-#{$2}"
|
178
183
|
et = ''
|
179
184
|
ht = ''
|
180
185
|
ft = ''
|
181
|
-
puts "!! WARN - weird score i.E. only - >#{score_str}<"
|
186
|
+
puts "!! WARN - weird score i.E. (n.P.) only - >#{score_str}<"
|
182
187
|
else
|
183
188
|
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
184
189
|
exit 1
|
@@ -2,13 +2,23 @@
|
|
2
2
|
module Worldfootball
|
3
3
|
|
4
4
|
|
5
|
-
def self.convert( league:, season
|
5
|
+
def self.convert( league:, season:,
|
6
|
+
overwrite: true )
|
6
7
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
7
8
|
|
8
9
|
league = find_league!( league )
|
9
10
|
pages = league.pages!( season: season )
|
10
11
|
|
11
12
|
|
13
|
+
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
14
|
+
if !overwrite && File.exist?( out_path )
|
15
|
+
## skip generation
|
16
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
17
|
+
return
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
12
22
|
## collect all teams
|
13
23
|
teams_by_ref = {}
|
14
24
|
|
@@ -48,17 +58,19 @@ def self.convert( league:, season: )
|
|
48
58
|
team_name = norm_team( h[:name] ) ## note: norm team name!!!
|
49
59
|
team_ref = h[:ref]
|
50
60
|
|
61
|
+
###
|
62
|
+
## quick fix for broken refs/links
|
63
|
+
## olympique-lyon => olympique-lyonnais
|
64
|
+
# team_ref = 'olympique-lyonnais' if team_ref == 'olympique-lyon'
|
65
|
+
|
51
66
|
## note: skip N.N. (place holder team)
|
52
67
|
## team_ref is nil etc.
|
53
68
|
next if team_name == 'N.N.'
|
54
69
|
|
55
70
|
team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
|
56
|
-
|
71
|
+
names: [] }
|
57
72
|
team_stat[:count] += team_count
|
58
|
-
|
59
|
-
puts "!! ASSERT ERROR - team ref with differet names; expected #{team_stat[:name]} - got #{team_name}"
|
60
|
-
exit 1
|
61
|
-
end
|
73
|
+
team_stat[:names] << team_name unless team_stat[:names].include?( team_name )
|
62
74
|
end
|
63
75
|
|
64
76
|
|
@@ -73,9 +85,12 @@ def self.convert( league:, season: )
|
|
73
85
|
|
74
86
|
|
75
87
|
clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf',
|
76
|
-
'
|
88
|
+
'uefa.cl.q', 'uefa.el.q', 'uefa.conf.q',
|
89
|
+
'copa.l',
|
90
|
+
'concacaf.cl',
|
77
91
|
'caf.cl',
|
78
|
-
'afl'
|
92
|
+
'afl',
|
93
|
+
].include?(league.key) ? true : false
|
79
94
|
|
80
95
|
####
|
81
96
|
# auto-add (fifa) country code if int'l club tournament
|
@@ -83,7 +98,6 @@ def self.convert( league:, season: )
|
|
83
98
|
##
|
84
99
|
## get country codes for team ref
|
85
100
|
teams_by_ref.each do |team_slug, h|
|
86
|
-
|
87
101
|
Metal.download_team( team_slug, cache: true )
|
88
102
|
team_page = Page::Team.from_cache( team_slug )
|
89
103
|
props = team_page.props
|
@@ -99,14 +113,20 @@ def self.convert( league:, season: )
|
|
99
113
|
|
100
114
|
## generate lookup by name
|
101
115
|
teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
|
102
|
-
|
103
|
-
|
116
|
+
### todo/fix
|
117
|
+
## report warning if names size is > 1!!!!
|
118
|
+
##
|
119
|
+
rec[:names].each do |name|
|
120
|
+
h[ name ] = rec
|
121
|
+
end
|
122
|
+
h
|
104
123
|
end
|
105
124
|
|
125
|
+
|
106
126
|
#####
|
107
127
|
## dump team refs
|
108
128
|
puts " #{teams_by_ref.size} team(s) by ref:"
|
109
|
-
pp
|
129
|
+
pp teams_by_ref
|
110
130
|
|
111
131
|
## quick hack
|
112
132
|
## add country (fifa) codes to team names
|
@@ -129,10 +149,22 @@ def self.convert( league:, season: )
|
|
129
149
|
## note: sort matches by date before saving/writing!!!!
|
130
150
|
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
131
151
|
## note: assume date is third column!!! (stage/round/date/...)
|
132
|
-
|
152
|
+
|
153
|
+
### note - do NOT sort for now
|
154
|
+
## keep "original" page order - why? why not?
|
155
|
+
## recs = recs.sort { |l,r| l[2] <=> r[2] }
|
156
|
+
|
157
|
+
|
133
158
|
## reformat date / beautify e.g. Sat Aug 7 1993
|
134
159
|
recs.each do |rec|
|
135
|
-
|
160
|
+
if rec[2]
|
161
|
+
if rec[2] =~ /^\d{4}-\d{1,2}-\d{1,2}$/
|
162
|
+
rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' )
|
163
|
+
else
|
164
|
+
## report unknown date format warning
|
165
|
+
puts "WARN - unsupported date format (cannot parse?) >#{rec[2]}<"
|
166
|
+
end
|
167
|
+
end
|
136
168
|
end
|
137
169
|
|
138
170
|
## remove unused columns (e.g. stage, et, p, etc.)
|
@@ -141,10 +173,13 @@ recs.each do |rec|
|
|
141
173
|
puts headers
|
142
174
|
pp recs[0] ## check first record
|
143
175
|
|
144
|
-
out_path
|
145
|
-
|
146
|
-
puts "write #{out_path}..."
|
176
|
+
puts " writing to >#{out_path}< - #{recs.size} record(s)..."
|
147
177
|
write_csv( out_path, recs, headers: headers )
|
178
|
+
|
179
|
+
## add to tmp too for debugging
|
180
|
+
out_path2 = "#{config.convert.out_dir}/tmp/#{league.key}/#{season.to_path}.csv"
|
181
|
+
puts " writing to >#{out_path2}< - #{recs.size} record(s)..."
|
182
|
+
write_csv( out_path2, recs, headers: headers )
|
148
183
|
end
|
149
184
|
end # module Worldfootball
|
150
185
|
|
@@ -4,12 +4,16 @@ module Worldfootball
|
|
4
4
|
|
5
5
|
#################
|
6
6
|
## porcelain "api"
|
7
|
-
def self.schedule( league:, season: )
|
7
|
+
def self.schedule( league:, season:, overwrite: true )
|
8
8
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
9
9
|
|
10
10
|
pages = find_league_pages!( league: league, season: season )
|
11
11
|
pages.each do |slug, _|
|
12
|
-
Metal.
|
12
|
+
if !overwrite && Webcache.cached?( Metal.schedule_url( slug ))
|
13
|
+
puts " OK #{league} #{season} - #{slug} (do NOT overwrite)"
|
14
|
+
else
|
15
|
+
Metal.download_schedule( slug )
|
16
|
+
end
|
13
17
|
end # each page
|
14
18
|
end
|
15
19
|
|
@@ -61,9 +61,12 @@ class LeagueItem # nested inside LeagueConfig
|
|
61
61
|
season, stage = text.split( ' ', 2 )
|
62
62
|
|
63
63
|
## todo/fix: add a waring here and auto log to logs.txt!!!!
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
if ['2019-2021',
|
65
|
+
'1958/1960',
|
66
|
+
'1955/1958' ].include?( season )
|
67
|
+
log( "!! WARN - seasons for league #{@key} incl. invalid season #{season} - slug #{slug}; skipping season" )
|
68
|
+
next ## note - skip invalid season entry
|
69
|
+
end
|
67
70
|
|
68
71
|
season = Season.parse( season )
|
69
72
|
|
@@ -106,6 +109,13 @@ class LeagueItem # nested inside LeagueConfig
|
|
106
109
|
recs = seasons[season.key]
|
107
110
|
recs ? recs.reverse : nil
|
108
111
|
end
|
112
|
+
|
113
|
+
def log( msg ) ### append to log
|
114
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
115
|
+
f.write( msg )
|
116
|
+
f.write( "\n" )
|
117
|
+
end
|
118
|
+
end
|
109
119
|
end # class LeagueItem
|
110
120
|
|
111
121
|
|
@@ -122,6 +132,9 @@ def size() @table.size; end
|
|
122
132
|
end # class LeagueConfig
|
123
133
|
|
124
134
|
|
135
|
+
|
136
|
+
|
137
|
+
|
125
138
|
LEAGUES = LeagueConfig.new
|
126
139
|
['africa',
|
127
140
|
'america',
|
data/lib/worldfootball/mods.rb
CHANGED
@@ -10,11 +10,20 @@ module Worldfootball
|
|
10
10
|
def self.norm_team( team )
|
11
11
|
## clean team name and asciify (e.g. ’->' )
|
12
12
|
team = team.sub( '(old)', '' ).strip
|
13
|
-
team = team.gsub( '’', "'" ) ## e.g. Hawke’s Bay United FC
|
14
13
|
|
15
|
-
##
|
16
|
-
##
|
14
|
+
## e.g. Hawke’s Bay United FC or
|
15
|
+
## ASC Monts d`Or Chasselay or
|
16
|
+
## VV Heerlen ´16 / EMM ´15 / Wormer SV´30 / Swift ´36 / etc.
|
17
|
+
team = team.gsub( /[’´`]/, "'" )
|
18
|
+
|
19
|
+
|
20
|
+
## br
|
21
|
+
## Criciúma - SC => Criciúma SC
|
22
|
+
## Bahia - BA => Bahia BA
|
23
|
+
## cz
|
24
|
+
## Baník Most - Souš => Baník Most Souš
|
17
25
|
## remove inline dash ( - ) with single space
|
26
|
+
## to log
|
18
27
|
team = team.gsub( /[ ]+[-][ ]+/, ' ' )
|
19
28
|
|
20
29
|
|
@@ -24,6 +33,22 @@ def self.norm_team( team )
|
|
24
33
|
## others too? - move to mods instead of generic rule - why? why not?
|
25
34
|
team = team.sub( /[ ]+\(A\)/, ' II' )
|
26
35
|
|
36
|
+
##
|
37
|
+
## remove () - used/reserved for country code for now - why? why not?
|
38
|
+
## e.g. Lloyds FC (Sittingbourne) => Lloyds FC Sittingbourne
|
39
|
+
## August 1st (Army Team) => August 1st Army Team
|
40
|
+
##
|
41
|
+
## add warning - why? why not?
|
42
|
+
team = team.sub( /\(
|
43
|
+
([^)]+?) ## eat-up all non-greed to next )
|
44
|
+
\)/x, '\1' )
|
45
|
+
|
46
|
+
##
|
47
|
+
## strip special case
|
48
|
+
## MFK Frýdek-Místek, a.s. => MFK Frýdek-Místek
|
49
|
+
team = team.sub( ', a.s.', '' )
|
50
|
+
|
51
|
+
|
27
52
|
################
|
28
53
|
## quick hack - norm(alize) all N.N. to N.N.
|
29
54
|
## e.g.
|
@@ -12,6 +12,15 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
+
PLACEHOLDERS = [
|
16
|
+
'N.N.',
|
17
|
+
'Verlierer HF 1',
|
18
|
+
'Verlierer HF 2',
|
19
|
+
'Sieger HF 1',
|
20
|
+
'Sieger HF 2',
|
21
|
+
]
|
22
|
+
def placeholder?( str ) PLACEHOLDERS.include?( str ); end
|
23
|
+
|
15
24
|
|
16
25
|
def matches
|
17
26
|
@matches ||= begin
|
@@ -20,10 +29,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
20
29
|
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
21
30
|
|
22
31
|
## note: use > for "strict" sibling (child without any in-betweens)
|
23
|
-
|
32
|
+
tables = doc.css( 'div.data > table.standard_tabelle' ) ## get table
|
24
33
|
# puts table.class.name #=> Nokogiri::XML::Element
|
25
34
|
# puts table.text
|
26
35
|
|
36
|
+
assert( tables.size==1, "expected one table.standard_tabelle; got #{tables.size}" )
|
37
|
+
table = tables.first
|
27
38
|
assert( table, 'no table.standard_tabelle found in schedule page!!')
|
28
39
|
|
29
40
|
trs = table.css( 'tr' )
|
@@ -77,8 +88,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
77
88
|
ths = tr.css( 'th' )
|
78
89
|
tds = tr.css( 'td' )
|
79
90
|
|
80
|
-
|
81
|
-
|
91
|
+
tr_text = squish( tr.text )
|
92
|
+
|
93
|
+
if tr_text =~ /Spieltag/ ||
|
94
|
+
tr_text =~ /[1-9]\.[ ]Runde|
|
82
95
|
Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
|
83
96
|
Qualifikation| # see CA Championship
|
84
97
|
Sechzehntelfinale| # see EL
|
@@ -110,25 +123,24 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
110
123
|
if debug?
|
111
124
|
puts
|
112
125
|
print '[%03d] ' % i
|
113
|
-
|
114
|
-
print "round >#{tr.text.strip}<"
|
126
|
+
print "round >#{tr_text}<"
|
115
127
|
print "\n"
|
116
128
|
end
|
117
129
|
|
118
|
-
last_round =
|
130
|
+
last_round = tr_text
|
119
131
|
elsif ths.count > 0 &&
|
120
132
|
tds.count == 0
|
121
133
|
## check for round NOT yet configured!!!
|
122
|
-
puts "!! WARN: found unregistered round line >#{
|
123
|
-
log( "!! WARN: found unregistered round line >#{
|
134
|
+
puts "!! WARN: found unregistered round line >#{tr_text}<"
|
135
|
+
log( "!! WARN: found unregistered round line >#{tr_text}< in page #{title}" )
|
124
136
|
|
125
|
-
last_round =
|
137
|
+
last_round = tr_text
|
126
138
|
else ## assume table row (tr) is match line
|
127
139
|
|
128
140
|
date_str = squish( tds[0].text )
|
129
141
|
time_str = squish( tds[1].text )
|
130
142
|
|
131
|
-
date_str = last_date_str if date_str.empty?
|
143
|
+
date_str = last_date_str if date_str.empty? && last_date_str
|
132
144
|
|
133
145
|
if debug?
|
134
146
|
## note: for debugging - print as we go along (parsing)
|
@@ -148,7 +160,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
148
160
|
else
|
149
161
|
team1_str = squish( tds[2].text )
|
150
162
|
team1_ref = nil
|
151
|
-
puts "!! WARN: no team1_ref for >#{team1_str}< found"
|
163
|
+
puts "!! WARN: no team1_ref for >#{team1_str}< found" unless placeholder?( team1_str )
|
152
164
|
end
|
153
165
|
|
154
166
|
if debug?
|
@@ -170,9 +182,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
170
182
|
else
|
171
183
|
team2_str = squish( tds[4].text )
|
172
184
|
team2_ref = nil
|
173
|
-
puts "!! WARN: no team2_ref for >#{team2_str}< found"
|
185
|
+
puts "!! WARN: no team2_ref for >#{team2_str}< found" unless placeholder?( team2_str )
|
174
186
|
end
|
175
187
|
|
188
|
+
|
176
189
|
if debug?
|
177
190
|
## note: for debugging - print as we go along (parsing)
|
178
191
|
print "%-22s | " % team2_str
|
@@ -198,8 +211,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
198
211
|
## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
|
199
212
|
## </td>
|
200
213
|
img = tds[6].css( 'img' )[0]
|
214
|
+
|
215
|
+
|
216
|
+
|
201
217
|
if img && img[:src].index( '/live/')
|
202
|
-
puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
218
|
+
## puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
219
|
+
print " LIVE BADGE "
|
203
220
|
score_str = '-:-' # note: -:- gets replaced to ---
|
204
221
|
end
|
205
222
|
|
@@ -217,8 +234,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
217
234
|
## special case for '00.00.0000'
|
218
235
|
## CANNOT parse
|
219
236
|
## use empty date - why? why not?
|
237
|
+
## if start with 00.00. e.g. 00.00.1939
|
238
|
+
|
220
239
|
|
221
|
-
date = if date_str == '00.00.0000'
|
240
|
+
date = if date_str == '00.00.0000' ||
|
241
|
+
date_str.start_with?( '00.00.' ) ||
|
242
|
+
date_str.empty?
|
222
243
|
nil
|
223
244
|
else
|
224
245
|
Date.strptime( date_str, '%d.%m.%Y' )
|
@@ -237,7 +258,9 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
237
258
|
report_ref: score_ref
|
238
259
|
}
|
239
260
|
|
240
|
-
|
261
|
+
## note - only update last date if date present
|
262
|
+
## might be empty (not available) in the beginning
|
263
|
+
last_date_str = date_str if !date_str.empty?
|
241
264
|
end
|
242
265
|
end # each tr (table row)
|
243
266
|
|
data/lib/worldfootball.rb
CHANGED
@@ -41,6 +41,14 @@ module Worldfootball
|
|
41
41
|
def self.debug=(value) @debug = value; end
|
42
42
|
def self.debug?() @debug ||= false; end ## note: default is FALSE
|
43
43
|
|
44
|
+
def self.log( msg ) ### append to log
|
45
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
46
|
+
f.write( msg )
|
47
|
+
f.write( "\n" )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
44
52
|
|
45
53
|
|
46
54
|
class Configuration
|
@@ -71,6 +79,60 @@ end # module Worldfootball
|
|
71
79
|
|
72
80
|
|
73
81
|
|
82
|
+
###
|
83
|
+
# todo - move generate to generate file!!!
|
84
|
+
module Worldfootball
|
85
|
+
def self.generate( league:, season:,
|
86
|
+
overwrite: true )
|
87
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
88
|
+
|
89
|
+
league = find_league!( league )
|
90
|
+
pages = league.pages!( season: season )
|
91
|
+
|
92
|
+
|
93
|
+
out_path = if season >= Season( '2000' )
|
94
|
+
"#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt"
|
95
|
+
else
|
96
|
+
decade = season.start_year - (season.start_year%10)
|
97
|
+
## use archive-style before 2000!!!
|
98
|
+
"#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt"
|
99
|
+
end
|
100
|
+
|
101
|
+
## check if output exists already
|
102
|
+
if !overwrite && File.exist?( out_path )
|
103
|
+
## skip generation
|
104
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
105
|
+
return
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
## get matches
|
110
|
+
path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv"
|
111
|
+
puts " ---> reading matches in #{path} ..."
|
112
|
+
matches = SportDb::CsvMatchParser.read( path )
|
113
|
+
puts " #{matches.size} matches"
|
114
|
+
|
115
|
+
## build
|
116
|
+
txt = SportDb::TxtMatchWriter.build( matches )
|
117
|
+
puts txt
|
118
|
+
|
119
|
+
|
120
|
+
buf = String.new
|
121
|
+
## note - use league key for league name for now!!
|
122
|
+
buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n"
|
123
|
+
buf << txt
|
124
|
+
|
125
|
+
puts " writing to >#{out_path}<..."
|
126
|
+
write_text( out_path, buf )
|
127
|
+
|
128
|
+
## add to tmp too for debugging
|
129
|
+
out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt"
|
130
|
+
puts " writing to >#{out_path2}<..."
|
131
|
+
write_text( out_path2, buf )
|
132
|
+
end
|
133
|
+
end # module Worldfootball
|
134
|
+
|
135
|
+
|
74
136
|
|
75
137
|
|
76
138
|
puts Worldfootball.banner ## say hello
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: worldfootball
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: football-timezones
|
@@ -106,19 +106,24 @@ dependencies:
|
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: '4.
|
109
|
+
version: '4.2'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
112
|
version_requirements: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
114
|
- - "~>"
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: '4.
|
116
|
+
version: '4.2'
|
117
117
|
description: worldfootball - get world football (leagues, cups & more) match data
|
118
118
|
via the worldfootball.net/weltfussball.de pages
|
119
119
|
email: gerald.bauer@gmail.com
|
120
120
|
executables:
|
121
121
|
- wfb
|
122
|
+
- wfbconf
|
123
|
+
- wfbconv
|
124
|
+
- wfbdump
|
125
|
+
- wfbgen
|
126
|
+
- wfbup
|
122
127
|
extensions: []
|
123
128
|
extra_rdoc_files:
|
124
129
|
- CHANGELOG.md
|
@@ -130,6 +135,11 @@ files:
|
|
130
135
|
- README.md
|
131
136
|
- Rakefile
|
132
137
|
- bin/wfb
|
138
|
+
- bin/wfbconf
|
139
|
+
- bin/wfbconv
|
140
|
+
- bin/wfbdump
|
141
|
+
- bin/wfbgen
|
142
|
+
- bin/wfbup
|
133
143
|
- config/leagues/africa.csv
|
134
144
|
- config/leagues/america.csv
|
135
145
|
- config/leagues/asia.csv
|
@@ -175,7 +185,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
185
|
- !ruby/object:Gem::Version
|
176
186
|
version: '0'
|
177
187
|
requirements: []
|
178
|
-
rubygems_version: 3.
|
188
|
+
rubygems_version: 3.5.22
|
179
189
|
signing_key:
|
180
190
|
specification_version: 4
|
181
191
|
summary: worldfootball - get world football (leagues, cups & more) match data via
|