worldfootball 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -2
- data/Manifest.txt +5 -0
- data/README.md +43 -1
- data/bin/wfb +14 -33
- data/bin/wfbconf +55 -0
- data/bin/wfbconv +100 -0
- data/bin/wfbdump +76 -0
- data/bin/wfbgen +102 -0
- data/bin/wfbup +101 -0
- data/config/leagues/america.csv +10 -3
- data/config/leagues/europe.csv +5 -2
- data/config/rounds.csv +6 -0
- data/config/stages.csv +16 -32
- data/lib/worldfootball/build-parse_score.rb +8 -3
- data/lib/worldfootball/convert.rb +52 -17
- data/lib/worldfootball/download.rb +6 -2
- data/lib/worldfootball/leagues.rb +16 -3
- data/lib/worldfootball/mods.rb +28 -3
- data/lib/worldfootball/page_schedule.rb +38 -15
- data/lib/worldfootball/version.rb +1 -1
- data/lib/worldfootball.rb +62 -0
- metadata +15 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53f1c44ded1d0dcec880df13b61a77265febcceacd88acb5ce883db39914cce9
|
4
|
+
data.tar.gz: b56eb75f1277026f77c29ee8f4d51788978955accbac5812659d1f3597e9747f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 501b137138a280f74d1cbd50c8f9ebc44ec8f1b7c7c9268005deb2d3a04b2d0046d05bf5a5ff7b3fb7c6f9f7b938423cdd83bb3b983d020bfb26057f2ed463c9
|
7
|
+
data.tar.gz: 84d716bcbffb32c11da5fd3a6b549a115ecbea0b4747182d84e3e7280cf3ff2de37ee57d37b20288d43f57374727fa907b256ed745b04d8b32b44ae17809d8ee
|
data/CHANGELOG.md
CHANGED
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -39,14 +39,56 @@ and so on.
|
|
39
39
|
|
40
40
|
|
41
41
|
|
42
|
-
### More
|
42
|
+
### More Command-Line Tools
|
43
43
|
|
44
|
+
<!--
|
44
45
|
Debugging tips & tricks. List all cached (offline local) match schedule pages:
|
45
46
|
|
46
47
|
```
|
47
48
|
$ wfb cache
|
48
49
|
```
|
50
|
+
-->
|
49
51
|
|
52
|
+
wfbup -
|
53
|
+
wfbconv -
|
54
|
+
wfbgen -
|
55
|
+
wfbconf -
|
56
|
+
wfbdump
|
57
|
+
|
58
|
+
|
59
|
+
#### wfbup - download leagues (if no league passed in, download all!)
|
60
|
+
|
61
|
+
```
|
62
|
+
$ wfbup # download ALL leagues and ALL seasons (uses all built-in configs)
|
63
|
+
$ wfbup at.1 # download ALL seasons for league
|
64
|
+
```
|
65
|
+
|
66
|
+
#### wfbconv - convert (to .csv) leagues (if no league passed in, converts all!)
|
67
|
+
|
68
|
+
```
|
69
|
+
$ wfbconv # convert ALL leagues and ALL seasons (uses all built-in configs)
|
70
|
+
$ wfbconv at.1 # convert ALL seasons for league
|
71
|
+
```
|
72
|
+
|
73
|
+
#### wfbgen - generate (.txt) leagues (if no league passed in, generate all!)
|
74
|
+
|
75
|
+
```
|
76
|
+
$ wfbgen # generate ALL leagues and ALL seasons (uses all built-in configs)
|
77
|
+
$ wfbgen at.1 # generate ALL seasons for league
|
78
|
+
```
|
79
|
+
|
80
|
+
|
81
|
+
#### wfbconf - check built-in config(uration) for league
|
82
|
+
|
83
|
+
```
|
84
|
+
$ wfbconf eng.1
|
85
|
+
```
|
86
|
+
|
87
|
+
#### wfbdump - dump (page) slug incl. matches, teams, rounds & more
|
88
|
+
|
89
|
+
```
|
90
|
+
$ wfbdump aut-bundesliga-2024-2025
|
91
|
+
```
|
50
92
|
|
51
93
|
|
52
94
|
|
data/bin/wfb
CHANGED
@@ -4,8 +4,12 @@
|
|
4
4
|
## ruby -I ./lib bin/wfb
|
5
5
|
## or
|
6
6
|
## ruby -I wfb/lib wfb/bin/wfb
|
7
|
+
## or
|
8
|
+
## ruby -I wfb/lib wfb/bin/wfb -f max.csv --cached
|
7
9
|
|
8
10
|
|
11
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
12
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
9
13
|
require 'worldfootball'
|
10
14
|
|
11
15
|
|
@@ -25,7 +29,7 @@ Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
|
25
29
|
end
|
26
30
|
|
27
31
|
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
28
|
-
puts " setting
|
32
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
29
33
|
'/sports/cache.wfb.txt'
|
30
34
|
else
|
31
35
|
'./tmp' ## use tmp in working dir
|
@@ -35,6 +39,7 @@ Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
|
35
39
|
require 'optparse'
|
36
40
|
|
37
41
|
|
42
|
+
|
38
43
|
module Worldfootball
|
39
44
|
def self.main( args=ARGV )
|
40
45
|
|
@@ -130,15 +135,14 @@ end
|
|
130
135
|
# assume leagues
|
131
136
|
|
132
137
|
datasets = if opts[:file]
|
133
|
-
|
138
|
+
read_leagueset( opts[:file] )
|
134
139
|
else
|
135
|
-
|
140
|
+
parse_leagueset_args( args )
|
136
141
|
end
|
137
142
|
|
138
143
|
|
139
144
|
## step 0 - validate and fill-up seasons etc.
|
140
|
-
datasets.each do |
|
141
|
-
league_key, seasons = dataset
|
145
|
+
datasets.each do |league_key, seasons|
|
142
146
|
|
143
147
|
league = find_league!( league_key ) ## league info lookup
|
144
148
|
|
@@ -150,8 +154,8 @@ datasets.each do |dataset|
|
|
150
154
|
## might be 2024/25 or 2024 or
|
151
155
|
# for world cup 2022 or such
|
152
156
|
if seasons.empty?
|
153
|
-
|
154
|
-
|
157
|
+
season = Season(league.seasons.keys[0])
|
158
|
+
seasons << season
|
155
159
|
end
|
156
160
|
end
|
157
161
|
|
@@ -206,36 +210,13 @@ if opts[:convert]
|
|
206
210
|
end
|
207
211
|
end
|
208
212
|
|
213
|
+
|
209
214
|
if opts[:generate]
|
210
215
|
## step 3 - generate
|
211
216
|
datasets.each do |league_key, seasons|
|
212
217
|
seasons.each do |season|
|
213
|
-
|
214
|
-
|
215
|
-
## get matches
|
216
|
-
puts " ---> reading matches in #{path} ..."
|
217
|
-
matches = SportDb::CsvMatchParser.read( path )
|
218
|
-
puts " #{matches.size} matches"
|
219
|
-
|
220
|
-
## build
|
221
|
-
txt = SportDb::TxtMatchWriter.build( matches )
|
222
|
-
puts txt
|
223
|
-
|
224
|
-
path = if season >= Season( '2000' )
|
225
|
-
"#{config.generate.out_dir}/#{season.to_path}/#{league_key}.txt"
|
226
|
-
else
|
227
|
-
decade = season.start_year - (season.start_year%10)
|
228
|
-
## use archive-style before 2000!!!
|
229
|
-
"#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league_key}.txt"
|
230
|
-
end
|
231
|
-
|
232
|
-
buf = String.new
|
233
|
-
## note - use league key for league name for now!!
|
234
|
-
buf << "= #{league_key.upcase.gsub('.', ' ')} #{season.key}\n\n"
|
235
|
-
buf << txt
|
236
|
-
|
237
|
-
puts " writing to >#{path}<..."
|
238
|
-
write_text( path, buf )
|
218
|
+
generate( league: league_key,
|
219
|
+
season: season )
|
239
220
|
end
|
240
221
|
end
|
241
222
|
## for debugging dump page (slug) titles
|
data/bin/wfbconf
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbconf
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbconf
|
7
|
+
|
8
|
+
####
|
9
|
+
# wfbconf - check built-in config(uration) for league
|
10
|
+
# e.g. wfbconf eng.1
|
11
|
+
|
12
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
13
|
+
require 'worldfootball'
|
14
|
+
|
15
|
+
|
16
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
17
|
+
puts " setting web cache to >/sports/cache<"
|
18
|
+
'/sports/cache'
|
19
|
+
else
|
20
|
+
'./cache'
|
21
|
+
end
|
22
|
+
|
23
|
+
|
24
|
+
|
25
|
+
require 'optparse'
|
26
|
+
|
27
|
+
##
|
28
|
+
# by default convert all with overwrite/force set to false
|
29
|
+
|
30
|
+
Webget.config.sleep = 2
|
31
|
+
|
32
|
+
args = ARGV
|
33
|
+
|
34
|
+
|
35
|
+
Worldfootball.debug = true
|
36
|
+
|
37
|
+
##
|
38
|
+
## download fresh copy?
|
39
|
+
## Worldfootball::Metal.download_schedule( slug )
|
40
|
+
|
41
|
+
key = args[0] || 'eng.1'
|
42
|
+
|
43
|
+
league = Worldfootball::LEAGUES[key]
|
44
|
+
seasons = league.seasons
|
45
|
+
pp seasons
|
46
|
+
|
47
|
+
puts " #{key} - #{seasons.size} season(s)"
|
48
|
+
|
49
|
+
puts
|
50
|
+
latest = seasons.keys[0]
|
51
|
+
puts " latest #{latest}: "
|
52
|
+
pp seasons[latest]
|
53
|
+
|
54
|
+
|
55
|
+
puts "bye"
|
data/bin/wfbconv
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbconv
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbconv
|
7
|
+
|
8
|
+
###
|
9
|
+
# wfbconv - convert (to .csv) all leagues (if no league passed in, converts all!)
|
10
|
+
#
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
14
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
15
|
+
require 'worldfootball'
|
16
|
+
|
17
|
+
|
18
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
19
|
+
puts " setting web cache to >/sports/cache<"
|
20
|
+
'/sports/cache'
|
21
|
+
else
|
22
|
+
'./cache'
|
23
|
+
end
|
24
|
+
|
25
|
+
## convert (default) output directory
|
26
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
27
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
28
|
+
'/sports/cache.wfb'
|
29
|
+
else
|
30
|
+
'./tmp' ## use tmp in working dir
|
31
|
+
end
|
32
|
+
|
33
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
34
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
35
|
+
'/sports/cache.wfb.txt'
|
36
|
+
else
|
37
|
+
'./tmp' ## use tmp in working dir
|
38
|
+
end
|
39
|
+
|
40
|
+
|
41
|
+
require 'optparse'
|
42
|
+
|
43
|
+
##
|
44
|
+
# by default convert all with overwrite/force set to false
|
45
|
+
|
46
|
+
Webget.config.sleep = 2
|
47
|
+
|
48
|
+
args = ARGV
|
49
|
+
|
50
|
+
opts = {
|
51
|
+
force: false, # a.k.a. overwrite
|
52
|
+
}
|
53
|
+
|
54
|
+
|
55
|
+
parser = OptionParser.new do |parser|
|
56
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
57
|
+
|
58
|
+
|
59
|
+
parser.on( "--force",
|
60
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
61
|
+
opts[:force] = true # true|false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
parser.parse!( args )
|
67
|
+
|
68
|
+
puts "OPTS:"
|
69
|
+
p opts
|
70
|
+
puts "ARGV:"
|
71
|
+
p args
|
72
|
+
|
73
|
+
|
74
|
+
keys = if args.size == 0
|
75
|
+
Worldfootball::LEAGUES.keys
|
76
|
+
else
|
77
|
+
args
|
78
|
+
end
|
79
|
+
|
80
|
+
|
81
|
+
|
82
|
+
keys.each_with_index do |key, i|
|
83
|
+
league = Worldfootball::LEAGUES[key]
|
84
|
+
seasons = league.seasons
|
85
|
+
|
86
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
87
|
+
|
88
|
+
seasons.each_with_index do |season_rec,j|
|
89
|
+
season = season_rec[0]
|
90
|
+
|
91
|
+
next if key == 'nl.cup' && season == '1959/60'
|
92
|
+
|
93
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
94
|
+
Worldfootball.convert( league: key, season: season,
|
95
|
+
overwrite: opts[:force] )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
puts "bye"
|
data/bin/wfbdump
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbdump
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbdump
|
7
|
+
|
8
|
+
|
9
|
+
#####
|
10
|
+
# wfbdump - dump matches, teams, rounds and more for (page) slug
|
11
|
+
# e.g.
|
12
|
+
# wfbdump caf-champions-league-2008
|
13
|
+
# wfbdump aut-bundesliga-2024-2025
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
17
|
+
require 'worldfootball'
|
18
|
+
|
19
|
+
|
20
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
21
|
+
puts " setting web cache to >/sports/cache<"
|
22
|
+
'/sports/cache'
|
23
|
+
else
|
24
|
+
'./cache'
|
25
|
+
end
|
26
|
+
|
27
|
+
|
28
|
+
|
29
|
+
require 'optparse'
|
30
|
+
|
31
|
+
##
|
32
|
+
# by default convert all with overwrite/force set to false
|
33
|
+
|
34
|
+
Webget.config.sleep = 2
|
35
|
+
|
36
|
+
args = ARGV
|
37
|
+
|
38
|
+
|
39
|
+
Worldfootball.debug = true
|
40
|
+
|
41
|
+
##
|
42
|
+
## download fresh copy?
|
43
|
+
## Worldfootball::Metal.download_schedule( slug )
|
44
|
+
|
45
|
+
slug = args[0] || 'caf-champions-league-2008'
|
46
|
+
|
47
|
+
page = Worldfootball::Page::Schedule.from_cache( slug )
|
48
|
+
|
49
|
+
matches = page.matches
|
50
|
+
teams = page.teams
|
51
|
+
rounds = page.rounds
|
52
|
+
|
53
|
+
puts " #{matches.size} match(es), #{teams.size} team(s), #{rounds.size} round(s)"
|
54
|
+
# pp matches
|
55
|
+
|
56
|
+
puts
|
57
|
+
puts " #{teams.size} team(s)"
|
58
|
+
pp teams
|
59
|
+
|
60
|
+
puts
|
61
|
+
puts " #{rounds.size} round(s)"
|
62
|
+
pp rounds
|
63
|
+
|
64
|
+
|
65
|
+
puts "==> page props"
|
66
|
+
print "title: "
|
67
|
+
puts page.title
|
68
|
+
puts
|
69
|
+
print "keywords: "
|
70
|
+
puts page.keywords
|
71
|
+
puts
|
72
|
+
print "url: "
|
73
|
+
puts page.url
|
74
|
+
|
75
|
+
|
76
|
+
puts "bye"
|
data/bin/wfbgen
ADDED
@@ -0,0 +1,102 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbgen
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbgen
|
7
|
+
|
8
|
+
###############
|
9
|
+
# wfbgen
|
10
|
+
#
|
11
|
+
|
12
|
+
|
13
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
14
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sportdb-writers/lib' )
|
15
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
16
|
+
|
17
|
+
require 'worldfootball'
|
18
|
+
|
19
|
+
|
20
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
21
|
+
puts " setting web cache to >/sports/cache<"
|
22
|
+
'/sports/cache'
|
23
|
+
else
|
24
|
+
'./cache'
|
25
|
+
end
|
26
|
+
|
27
|
+
## convert (default) output directory
|
28
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
29
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
30
|
+
'/sports/cache.wfb'
|
31
|
+
else
|
32
|
+
'./tmp' ## use tmp in working dir
|
33
|
+
end
|
34
|
+
|
35
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
36
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
37
|
+
'/sports/cache.wfb.txt'
|
38
|
+
else
|
39
|
+
'./tmp' ## use tmp in working dir
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
require 'optparse'
|
44
|
+
|
45
|
+
##
|
46
|
+
# by default convert all with overwrite/force set to false
|
47
|
+
|
48
|
+
Webget.config.sleep = 2
|
49
|
+
|
50
|
+
args = ARGV
|
51
|
+
|
52
|
+
opts = {
|
53
|
+
force: false, # a.k.a. overwrite
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
parser = OptionParser.new do |parser|
|
58
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
59
|
+
|
60
|
+
|
61
|
+
parser.on( "--force",
|
62
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
63
|
+
opts[:force] = true # true|false
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
parser.parse!( args )
|
69
|
+
|
70
|
+
puts "OPTS:"
|
71
|
+
p opts
|
72
|
+
puts "ARGV:"
|
73
|
+
p args
|
74
|
+
|
75
|
+
|
76
|
+
keys = if args.size == 0
|
77
|
+
Worldfootball::LEAGUES.keys
|
78
|
+
else
|
79
|
+
args
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
|
84
|
+
keys.each_with_index do |key, i|
|
85
|
+
league = Worldfootball::LEAGUES[key]
|
86
|
+
seasons = league.seasons
|
87
|
+
|
88
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
89
|
+
|
90
|
+
seasons.each_with_index do |season_rec,j|
|
91
|
+
season = season_rec[0]
|
92
|
+
|
93
|
+
next if key == 'nl.cup' && season == '1959/60'
|
94
|
+
|
95
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
96
|
+
Worldfootball.generate( league: key, season: season,
|
97
|
+
overwrite: opts[:force] )
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
|
102
|
+
puts "bye"
|
data/bin/wfbup
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
## tip: to test run:
|
4
|
+
## ruby -I ./lib bin/wfbup
|
5
|
+
## or
|
6
|
+
## ruby -I wfb/lib wfb/bin/wfbup
|
7
|
+
|
8
|
+
|
9
|
+
##
|
10
|
+
## add offset for restart!!!!
|
11
|
+
## e.g. si.1 - maybe add season later!!!
|
12
|
+
## or better add a expired option e.g. 24h or such !!!!
|
13
|
+
## - stopping at si.1 2017/18...
|
14
|
+
|
15
|
+
|
16
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/timezones/lib' )
|
17
|
+
$LOAD_PATH.unshift( '/sports/sportdb/sport.db/fifa/lib' )
|
18
|
+
require 'worldfootball'
|
19
|
+
|
20
|
+
|
21
|
+
Webcache.root = if File.exist?( '/sports/cache' )
|
22
|
+
puts " setting web cache to >/sports/cache<"
|
23
|
+
'/sports/cache'
|
24
|
+
else
|
25
|
+
'./cache'
|
26
|
+
end
|
27
|
+
|
28
|
+
## convert (default) output directory
|
29
|
+
Worldfootball.config.convert.out_dir = if File.exist?( '/sports/cache.wfb')
|
30
|
+
puts " setting convert out_dir to >/sports/cache.wfb<"
|
31
|
+
'/sports/cache.wfb'
|
32
|
+
else
|
33
|
+
'./tmp' ## use tmp in working dir
|
34
|
+
end
|
35
|
+
|
36
|
+
Worldfootball.config.generate.out_dir = if File.exist?( '/sports/cache.wfb.txt')
|
37
|
+
puts " setting generate out_dir to >/sports/cache.wfb.txt<"
|
38
|
+
'/sports/cache.wfb.txt'
|
39
|
+
else
|
40
|
+
'./tmp' ## use tmp in working dir
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
require 'optparse'
|
45
|
+
|
46
|
+
##
|
47
|
+
# by default convert all with overwrite/force set to false
|
48
|
+
|
49
|
+
Webget.config.sleep = 2
|
50
|
+
|
51
|
+
args = ARGV
|
52
|
+
|
53
|
+
opts = {
|
54
|
+
force: false, # a.k.a. overwrite
|
55
|
+
}
|
56
|
+
|
57
|
+
|
58
|
+
parser = OptionParser.new do |parser|
|
59
|
+
parser.banner = "Usage: #{$PROGRAM_NAME} [options]"
|
60
|
+
|
61
|
+
|
62
|
+
parser.on( "--force",
|
63
|
+
"always overwrite (force) datafile - default is (#{opts[:force]})" ) do |force|
|
64
|
+
opts[:force] = true # true|false
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
|
69
|
+
parser.parse!( args )
|
70
|
+
|
71
|
+
puts "OPTS:"
|
72
|
+
p opts
|
73
|
+
puts "ARGV:"
|
74
|
+
p args
|
75
|
+
|
76
|
+
|
77
|
+
keys = if args.size == 0
|
78
|
+
Worldfootball::LEAGUES.keys
|
79
|
+
else
|
80
|
+
args
|
81
|
+
end
|
82
|
+
|
83
|
+
|
84
|
+
|
85
|
+
keys.each_with_index do |key, i|
|
86
|
+
league = Worldfootball::LEAGUES[key]
|
87
|
+
seasons = league.seasons
|
88
|
+
|
89
|
+
puts "==> #{i+1}/#{keys.size} #{key} - #{seasons.size} seasons(s)..."
|
90
|
+
|
91
|
+
seasons.each_with_index do |season_rec,j|
|
92
|
+
season = season_rec[0]
|
93
|
+
|
94
|
+
puts " #{j+1}/#{seasons.size} #{key} #{season}..."
|
95
|
+
Worldfootball.schedule( league: key, season: season,
|
96
|
+
overwrite: opts[:force] )
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
puts "bye"
|
data/config/leagues/america.csv
CHANGED
@@ -23,12 +23,19 @@ ca.cup, can-canadian-championship-2024
|
|
23
23
|
# - Finale
|
24
24
|
# /mex-primera-division-2018-2019-clausura-playoffs/
|
25
25
|
mx.1, mex-primera-division-2024-2025-apertura_2
|
26
|
-
|
26
|
+
|
27
|
+
## merge expansion and ascenso into one (mx.2) - possible? why? why not?
|
28
|
+
|
29
|
+
## first expansion season -> 2020/21 Apertura
|
30
|
+
## last ascenso season -> 2019/20 Clausura
|
31
|
+
mx.2.expansion, mex-liga-de-expansion-2024-2025-apertura
|
32
|
+
mx.2.ascenso, mex-liga-de-ascenso-2019-2020-clausura
|
33
|
+
|
27
34
|
mx.cup, mex-copa-mx-2019-2020
|
28
35
|
|
29
36
|
|
30
|
-
|
31
|
-
mx.3,
|
37
|
+
mx.3.a, mex-lp-serie-a-2024-2025-apertura
|
38
|
+
mx.3.b, mex-lp-serie-b-2024-2025-apertura
|
32
39
|
|
33
40
|
|
34
41
|
|
data/config/leagues/europe.csv
CHANGED
@@ -33,7 +33,10 @@ hu.1, hun-nb-i-2024-2025
|
|
33
33
|
|
34
34
|
cz.1, cze-1-fotbalova-liga-2024-2025
|
35
35
|
cz.2, cze-2-fotbalova-liga-2024-2025
|
36
|
-
|
36
|
+
|
37
|
+
## commented out for now (three leagues in one - split !!)
|
38
|
+
## cz.3, cze-3-fotbalova-liga-2024-2025-cfl-a
|
39
|
+
|
37
40
|
|
38
41
|
sk.1, svk-super-liga-2024-2025
|
39
42
|
|
@@ -151,7 +154,7 @@ rs.1, srb-super-liga-2024-2025
|
|
151
154
|
si.1, svn-prvaliga-2024-2025
|
152
155
|
|
153
156
|
## todo/fix - change to kos.1 - why? why not?
|
154
|
-
xk.1, kos-superliga-2024-2025
|
157
|
+
## xk.1, kos-superliga-2024-2025
|
155
158
|
kos.1, kos-superliga-2024-2025
|
156
159
|
|
157
160
|
|
data/config/rounds.csv
CHANGED
data/config/stages.csv
CHANGED
@@ -1,51 +1,38 @@
|
|
1
1
|
key, name1, name2,
|
2
2
|
|
3
|
-
*, Meisterschaft,
|
4
|
-
*, Abstieg,
|
5
|
-
*, Relegation, Playoffs - Relegation
|
3
|
+
*, Meisterschaft, Championship
|
4
|
+
*, Abstieg, Relegation
|
6
5
|
|
6
|
+
## use Playoffs - Championship -- why? why not?
|
7
|
+
## use Playoffs - Relegation -- why? why not?
|
7
8
|
|
8
|
-
sco.1, Championship, Playoffs - Championship
|
9
|
-
sco.1, Relegation', Playoffs - Relegation
|
10
9
|
|
11
|
-
|
12
|
-
at.1,
|
13
|
-
at.1, Qualifikationsgruppe, Playoffs - Relegation
|
10
|
+
at.1, Meistergruppe, Championship
|
11
|
+
at.1, Qualifikationsgruppe, Relegation
|
14
12
|
at.1, Playoff, Europa League Finals
|
15
13
|
|
16
14
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sk.1, Meisterschaft, Playoffs - Championship
|
22
|
-
sk.1, Abstieg, Playoffs - Relegation
|
15
|
+
sk.1, Meisterschaft, Championship
|
16
|
+
sk.1, Abstieg, Relegation
|
23
17
|
sk.1, Europa League, Europa League Finals
|
24
18
|
|
25
19
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
ru.1, Meisterschaft, Playoffs - Championship
|
30
|
-
ru.1, Relegation, Playoffs - Relegation
|
31
|
-
|
32
|
-
|
33
|
-
ua.1, Meisterschaft, Playoffs - Championship
|
34
|
-
ua.1, Abstieg, Playoffs - Relegation
|
20
|
+
ua.1, Meisterschaft, Championship
|
21
|
+
ua.1, Abstieg, Relegation
|
35
22
|
ua.1, Playoffs EL, Europa League Finals
|
36
23
|
|
37
24
|
|
38
|
-
fi.1, Meisterschaft,
|
39
|
-
fi.1, Abstieg,
|
25
|
+
fi.1, Meisterschaft, Championship
|
26
|
+
fi.1, Abstieg, Relegation
|
40
27
|
fi.1, Playoff EL, Europa League Finals
|
41
28
|
|
42
|
-
dk.1, Meisterschaft,
|
43
|
-
dk.1, Abstieg,
|
29
|
+
dk.1, Meisterschaft, Championship
|
30
|
+
dk.1, Abstieg, Relegation
|
44
31
|
dk.1, Europa League, Europa League Finals
|
45
32
|
|
46
33
|
|
47
|
-
gr.1, Meisterschaft,
|
48
|
-
gr.1, Abstieg,
|
34
|
+
gr.1, Meisterschaft, Championship
|
35
|
+
gr.1, Abstieg, Relegation
|
49
36
|
gr.1, Playoffs, Playoffs
|
50
37
|
gr.1, Spiel um Platz 6, Match 6th Place
|
51
38
|
|
@@ -55,9 +42,6 @@ mx.1, Apertura Playoffs, Apertura - Liguilla
|
|
55
42
|
mx.1, Clausura Playoffs, Clausura - Liguilla
|
56
43
|
|
57
44
|
|
58
|
-
kr.1, Meisterschaft, Playoffs - Championship
|
59
|
-
kr.1, Abstieg, Playoffs - Relegation
|
60
|
-
|
61
45
|
|
62
46
|
nz.1, Playoffs, Playoff Finals
|
63
47
|
|
@@ -31,13 +31,18 @@ def self.parse_score( score_str )
|
|
31
31
|
et = ''
|
32
32
|
pen = ''
|
33
33
|
|
34
|
+
##
|
35
|
+
## [085] 2021-10-21 | 22:00 | Metropolitanos FC | LALA FC | Aufg.
|
36
|
+
## !! ERROR - unsupported score format >Aufg.< - sorry; maybe add a score error fix/patch
|
37
|
+
## - handle with Aufg.
|
34
38
|
|
35
39
|
if score_str == '---' ## in the future (no score yet) - was -:-
|
36
40
|
ft = ''
|
37
41
|
ht = ''
|
38
42
|
elsif score_str == 'n.gesp.' || ## cancelled (british) / canceled (us)
|
39
43
|
score_str == 'ausg.' || ## todo/check: change to some other status ????
|
40
|
-
score_str == 'annull.'
|
44
|
+
score_str == 'annull.' || ## todo/check: change to some other status (see ie 2012) ????
|
45
|
+
score_str == 'Aufg.'
|
41
46
|
ft = '(*)'
|
42
47
|
ht = ''
|
43
48
|
comments = 'cancelled'
|
@@ -172,13 +177,13 @@ def self.parse_score( score_str )
|
|
172
177
|
puts "!! WARN - weird score n.V. only - >#{score_str}<"
|
173
178
|
elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
|
174
179
|
[ ]*
|
175
|
-
|
180
|
+
(?: i\.E\. | n\.P\. )
|
176
181
|
$/x
|
177
182
|
pen = "#{$1}-#{$2}"
|
178
183
|
et = ''
|
179
184
|
ht = ''
|
180
185
|
ft = ''
|
181
|
-
puts "!! WARN - weird score i.E. only - >#{score_str}<"
|
186
|
+
puts "!! WARN - weird score i.E. (n.P.) only - >#{score_str}<"
|
182
187
|
else
|
183
188
|
puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
|
184
189
|
exit 1
|
@@ -2,13 +2,23 @@
|
|
2
2
|
module Worldfootball
|
3
3
|
|
4
4
|
|
5
|
-
def self.convert( league:, season
|
5
|
+
def self.convert( league:, season:,
|
6
|
+
overwrite: true )
|
6
7
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
7
8
|
|
8
9
|
league = find_league!( league )
|
9
10
|
pages = league.pages!( season: season )
|
10
11
|
|
11
12
|
|
13
|
+
out_path = "#{config.convert.out_dir}/#{season.path}/#{league.key}.csv"
|
14
|
+
if !overwrite && File.exist?( out_path )
|
15
|
+
## skip generation
|
16
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
17
|
+
return
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
|
12
22
|
## collect all teams
|
13
23
|
teams_by_ref = {}
|
14
24
|
|
@@ -48,17 +58,19 @@ def self.convert( league:, season: )
|
|
48
58
|
team_name = norm_team( h[:name] ) ## note: norm team name!!!
|
49
59
|
team_ref = h[:ref]
|
50
60
|
|
61
|
+
###
|
62
|
+
## quick fix for broken refs/links
|
63
|
+
## olympique-lyon => olympique-lyonnais
|
64
|
+
# team_ref = 'olympique-lyonnais' if team_ref == 'olympique-lyon'
|
65
|
+
|
51
66
|
## note: skip N.N. (place holder team)
|
52
67
|
## team_ref is nil etc.
|
53
68
|
next if team_name == 'N.N.'
|
54
69
|
|
55
70
|
team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
|
56
|
-
|
71
|
+
names: [] }
|
57
72
|
team_stat[:count] += team_count
|
58
|
-
|
59
|
-
puts "!! ASSERT ERROR - team ref with differet names; expected #{team_stat[:name]} - got #{team_name}"
|
60
|
-
exit 1
|
61
|
-
end
|
73
|
+
team_stat[:names] << team_name unless team_stat[:names].include?( team_name )
|
62
74
|
end
|
63
75
|
|
64
76
|
|
@@ -73,9 +85,12 @@ def self.convert( league:, season: )
|
|
73
85
|
|
74
86
|
|
75
87
|
clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf',
|
76
|
-
'
|
88
|
+
'uefa.cl.q', 'uefa.el.q', 'uefa.conf.q',
|
89
|
+
'copa.l',
|
90
|
+
'concacaf.cl',
|
77
91
|
'caf.cl',
|
78
|
-
'afl'
|
92
|
+
'afl',
|
93
|
+
].include?(league.key) ? true : false
|
79
94
|
|
80
95
|
####
|
81
96
|
# auto-add (fifa) country code if int'l club tournament
|
@@ -83,7 +98,6 @@ def self.convert( league:, season: )
|
|
83
98
|
##
|
84
99
|
## get country codes for team ref
|
85
100
|
teams_by_ref.each do |team_slug, h|
|
86
|
-
|
87
101
|
Metal.download_team( team_slug, cache: true )
|
88
102
|
team_page = Page::Team.from_cache( team_slug )
|
89
103
|
props = team_page.props
|
@@ -99,14 +113,20 @@ def self.convert( league:, season: )
|
|
99
113
|
|
100
114
|
## generate lookup by name
|
101
115
|
teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
|
102
|
-
|
103
|
-
|
116
|
+
### todo/fix
|
117
|
+
## report warning if names size is > 1!!!!
|
118
|
+
##
|
119
|
+
rec[:names].each do |name|
|
120
|
+
h[ name ] = rec
|
121
|
+
end
|
122
|
+
h
|
104
123
|
end
|
105
124
|
|
125
|
+
|
106
126
|
#####
|
107
127
|
## dump team refs
|
108
128
|
puts " #{teams_by_ref.size} team(s) by ref:"
|
109
|
-
pp
|
129
|
+
pp teams_by_ref
|
110
130
|
|
111
131
|
## quick hack
|
112
132
|
## add country (fifa) codes to team names
|
@@ -129,10 +149,22 @@ def self.convert( league:, season: )
|
|
129
149
|
## note: sort matches by date before saving/writing!!!!
|
130
150
|
## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
|
131
151
|
## note: assume date is third column!!! (stage/round/date/...)
|
132
|
-
|
152
|
+
|
153
|
+
### note - do NOT sort for now
|
154
|
+
## keep "original" page order - why? why not?
|
155
|
+
## recs = recs.sort { |l,r| l[2] <=> r[2] }
|
156
|
+
|
157
|
+
|
133
158
|
## reformat date / beautify e.g. Sat Aug 7 1993
|
134
159
|
recs.each do |rec|
|
135
|
-
|
160
|
+
if rec[2]
|
161
|
+
if rec[2] =~ /^\d{4}-\d{1,2}-\d{1,2}$/
|
162
|
+
rec[2] = Date.strptime( rec[2], '%Y-%m-%d' ).strftime( '%a %b %-d %Y' )
|
163
|
+
else
|
164
|
+
## report unknown date format warning
|
165
|
+
puts "WARN - unsupported date format (cannot parse?) >#{rec[2]}<"
|
166
|
+
end
|
167
|
+
end
|
136
168
|
end
|
137
169
|
|
138
170
|
## remove unused columns (e.g. stage, et, p, etc.)
|
@@ -141,10 +173,13 @@ recs.each do |rec|
|
|
141
173
|
puts headers
|
142
174
|
pp recs[0] ## check first record
|
143
175
|
|
144
|
-
out_path
|
145
|
-
|
146
|
-
puts "write #{out_path}..."
|
176
|
+
puts " writing to >#{out_path}< - #{recs.size} record(s)..."
|
147
177
|
write_csv( out_path, recs, headers: headers )
|
178
|
+
|
179
|
+
## add to tmp too for debugging
|
180
|
+
out_path2 = "#{config.convert.out_dir}/tmp/#{league.key}/#{season.to_path}.csv"
|
181
|
+
puts " writing to >#{out_path2}< - #{recs.size} record(s)..."
|
182
|
+
write_csv( out_path2, recs, headers: headers )
|
148
183
|
end
|
149
184
|
end # module Worldfootball
|
150
185
|
|
@@ -4,12 +4,16 @@ module Worldfootball
|
|
4
4
|
|
5
5
|
#################
|
6
6
|
## porcelain "api"
|
7
|
-
def self.schedule( league:, season: )
|
7
|
+
def self.schedule( league:, season:, overwrite: true )
|
8
8
|
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
9
9
|
|
10
10
|
pages = find_league_pages!( league: league, season: season )
|
11
11
|
pages.each do |slug, _|
|
12
|
-
Metal.
|
12
|
+
if !overwrite && Webcache.cached?( Metal.schedule_url( slug ))
|
13
|
+
puts " OK #{league} #{season} - #{slug} (do NOT overwrite)"
|
14
|
+
else
|
15
|
+
Metal.download_schedule( slug )
|
16
|
+
end
|
13
17
|
end # each page
|
14
18
|
end
|
15
19
|
|
@@ -61,9 +61,12 @@ class LeagueItem # nested inside LeagueConfig
|
|
61
61
|
season, stage = text.split( ' ', 2 )
|
62
62
|
|
63
63
|
## todo/fix: add a waring here and auto log to logs.txt!!!!
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
if ['2019-2021',
|
65
|
+
'1958/1960',
|
66
|
+
'1955/1958' ].include?( season )
|
67
|
+
log( "!! WARN - seasons for league #{@key} incl. invalid season #{season} - slug #{slug}; skipping season" )
|
68
|
+
next ## note - skip invalid season entry
|
69
|
+
end
|
67
70
|
|
68
71
|
season = Season.parse( season )
|
69
72
|
|
@@ -106,6 +109,13 @@ class LeagueItem # nested inside LeagueConfig
|
|
106
109
|
recs = seasons[season.key]
|
107
110
|
recs ? recs.reverse : nil
|
108
111
|
end
|
112
|
+
|
113
|
+
def log( msg ) ### append to log
|
114
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
115
|
+
f.write( msg )
|
116
|
+
f.write( "\n" )
|
117
|
+
end
|
118
|
+
end
|
109
119
|
end # class LeagueItem
|
110
120
|
|
111
121
|
|
@@ -122,6 +132,9 @@ def size() @table.size; end
|
|
122
132
|
end # class LeagueConfig
|
123
133
|
|
124
134
|
|
135
|
+
|
136
|
+
|
137
|
+
|
125
138
|
LEAGUES = LeagueConfig.new
|
126
139
|
['africa',
|
127
140
|
'america',
|
data/lib/worldfootball/mods.rb
CHANGED
@@ -10,11 +10,20 @@ module Worldfootball
|
|
10
10
|
def self.norm_team( team )
|
11
11
|
## clean team name and asciify (e.g. ’->' )
|
12
12
|
team = team.sub( '(old)', '' ).strip
|
13
|
-
team = team.gsub( '’', "'" ) ## e.g. Hawke’s Bay United FC
|
14
13
|
|
15
|
-
##
|
16
|
-
##
|
14
|
+
## e.g. Hawke’s Bay United FC or
|
15
|
+
## ASC Monts d`Or Chasselay or
|
16
|
+
## VV Heerlen ´16 / EMM ´15 / Wormer SV´30 / Swift ´36 / etc.
|
17
|
+
team = team.gsub( /[’´`]/, "'" )
|
18
|
+
|
19
|
+
|
20
|
+
## br
|
21
|
+
## Criciúma - SC => Criciúma SC
|
22
|
+
## Bahia - BA => Bahia BA
|
23
|
+
## cz
|
24
|
+
## Baník Most - Souš => Baník Most Souš
|
17
25
|
## remove inline dash ( - ) with single space
|
26
|
+
## to log
|
18
27
|
team = team.gsub( /[ ]+[-][ ]+/, ' ' )
|
19
28
|
|
20
29
|
|
@@ -24,6 +33,22 @@ def self.norm_team( team )
|
|
24
33
|
## others too? - move to mods instead of generic rule - why? why not?
|
25
34
|
team = team.sub( /[ ]+\(A\)/, ' II' )
|
26
35
|
|
36
|
+
##
|
37
|
+
## remove () - used/reserved for country code for now - why? why not?
|
38
|
+
## e.g. Lloyds FC (Sittingbourne) => Lloyds FC Sittingbourne
|
39
|
+
## August 1st (Army Team) => August 1st Army Team
|
40
|
+
##
|
41
|
+
## add warning - why? why not?
|
42
|
+
team = team.sub( /\(
|
43
|
+
([^)]+?) ## eat-up all non-greed to next )
|
44
|
+
\)/x, '\1' )
|
45
|
+
|
46
|
+
##
|
47
|
+
## strip special case
|
48
|
+
## MFK Frýdek-Místek, a.s. => MFK Frýdek-Místek
|
49
|
+
team = team.sub( ', a.s.', '' )
|
50
|
+
|
51
|
+
|
27
52
|
################
|
28
53
|
## quick hack - norm(alize) all N.N. to N.N.
|
29
54
|
## e.g.
|
@@ -12,6 +12,15 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
+
PLACEHOLDERS = [
|
16
|
+
'N.N.',
|
17
|
+
'Verlierer HF 1',
|
18
|
+
'Verlierer HF 2',
|
19
|
+
'Sieger HF 1',
|
20
|
+
'Sieger HF 2',
|
21
|
+
]
|
22
|
+
def placeholder?( str ) PLACEHOLDERS.include?( str ); end
|
23
|
+
|
15
24
|
|
16
25
|
def matches
|
17
26
|
@matches ||= begin
|
@@ -20,10 +29,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
20
29
|
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
21
30
|
|
22
31
|
## note: use > for "strict" sibling (child without any in-betweens)
|
23
|
-
|
32
|
+
tables = doc.css( 'div.data > table.standard_tabelle' ) ## get table
|
24
33
|
# puts table.class.name #=> Nokogiri::XML::Element
|
25
34
|
# puts table.text
|
26
35
|
|
36
|
+
assert( tables.size==1, "expected one table.standard_tabelle; got #{tables.size}" )
|
37
|
+
table = tables.first
|
27
38
|
assert( table, 'no table.standard_tabelle found in schedule page!!')
|
28
39
|
|
29
40
|
trs = table.css( 'tr' )
|
@@ -77,8 +88,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
77
88
|
ths = tr.css( 'th' )
|
78
89
|
tds = tr.css( 'td' )
|
79
90
|
|
80
|
-
|
81
|
-
|
91
|
+
tr_text = squish( tr.text )
|
92
|
+
|
93
|
+
if tr_text =~ /Spieltag/ ||
|
94
|
+
tr_text =~ /[1-9]\.[ ]Runde|
|
82
95
|
Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
|
83
96
|
Qualifikation| # see CA Championship
|
84
97
|
Sechzehntelfinale| # see EL
|
@@ -110,25 +123,24 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
110
123
|
if debug?
|
111
124
|
puts
|
112
125
|
print '[%03d] ' % i
|
113
|
-
|
114
|
-
print "round >#{tr.text.strip}<"
|
126
|
+
print "round >#{tr_text}<"
|
115
127
|
print "\n"
|
116
128
|
end
|
117
129
|
|
118
|
-
last_round =
|
130
|
+
last_round = tr_text
|
119
131
|
elsif ths.count > 0 &&
|
120
132
|
tds.count == 0
|
121
133
|
## check for round NOT yet configured!!!
|
122
|
-
puts "!! WARN: found unregistered round line >#{
|
123
|
-
log( "!! WARN: found unregistered round line >#{
|
134
|
+
puts "!! WARN: found unregistered round line >#{tr_text}<"
|
135
|
+
log( "!! WARN: found unregistered round line >#{tr_text}< in page #{title}" )
|
124
136
|
|
125
|
-
last_round =
|
137
|
+
last_round = tr_text
|
126
138
|
else ## assume table row (tr) is match line
|
127
139
|
|
128
140
|
date_str = squish( tds[0].text )
|
129
141
|
time_str = squish( tds[1].text )
|
130
142
|
|
131
|
-
date_str = last_date_str if date_str.empty?
|
143
|
+
date_str = last_date_str if date_str.empty? && last_date_str
|
132
144
|
|
133
145
|
if debug?
|
134
146
|
## note: for debugging - print as we go along (parsing)
|
@@ -148,7 +160,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
148
160
|
else
|
149
161
|
team1_str = squish( tds[2].text )
|
150
162
|
team1_ref = nil
|
151
|
-
puts "!! WARN: no team1_ref for >#{team1_str}< found"
|
163
|
+
puts "!! WARN: no team1_ref for >#{team1_str}< found" unless placeholder?( team1_str )
|
152
164
|
end
|
153
165
|
|
154
166
|
if debug?
|
@@ -170,9 +182,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
170
182
|
else
|
171
183
|
team2_str = squish( tds[4].text )
|
172
184
|
team2_ref = nil
|
173
|
-
puts "!! WARN: no team2_ref for >#{team2_str}< found"
|
185
|
+
puts "!! WARN: no team2_ref for >#{team2_str}< found" unless placeholder?( team2_str )
|
174
186
|
end
|
175
187
|
|
188
|
+
|
176
189
|
if debug?
|
177
190
|
## note: for debugging - print as we go along (parsing)
|
178
191
|
print "%-22s | " % team2_str
|
@@ -198,8 +211,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
198
211
|
## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
|
199
212
|
## </td>
|
200
213
|
img = tds[6].css( 'img' )[0]
|
214
|
+
|
215
|
+
|
216
|
+
|
201
217
|
if img && img[:src].index( '/live/')
|
202
|
-
puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
218
|
+
## puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
219
|
+
print " LIVE BADGE "
|
203
220
|
score_str = '-:-' # note: -:- gets replaced to ---
|
204
221
|
end
|
205
222
|
|
@@ -217,8 +234,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
217
234
|
## special case for '00.00.0000'
|
218
235
|
## CANNOT parse
|
219
236
|
## use empty date - why? why not?
|
237
|
+
## if start with 00.00. e.g. 00.00.1939
|
238
|
+
|
220
239
|
|
221
|
-
date = if date_str == '00.00.0000'
|
240
|
+
date = if date_str == '00.00.0000' ||
|
241
|
+
date_str.start_with?( '00.00.' ) ||
|
242
|
+
date_str.empty?
|
222
243
|
nil
|
223
244
|
else
|
224
245
|
Date.strptime( date_str, '%d.%m.%Y' )
|
@@ -237,7 +258,9 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
237
258
|
report_ref: score_ref
|
238
259
|
}
|
239
260
|
|
240
|
-
|
261
|
+
## note - only update last date if date present
|
262
|
+
## might be empty (not available) in the beginning
|
263
|
+
last_date_str = date_str if !date_str.empty?
|
241
264
|
end
|
242
265
|
end # each tr (table row)
|
243
266
|
|
data/lib/worldfootball.rb
CHANGED
@@ -41,6 +41,14 @@ module Worldfootball
|
|
41
41
|
def self.debug=(value) @debug = value; end
|
42
42
|
def self.debug?() @debug ||= false; end ## note: default is FALSE
|
43
43
|
|
44
|
+
def self.log( msg ) ### append to log
|
45
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
46
|
+
f.write( msg )
|
47
|
+
f.write( "\n" )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
44
52
|
|
45
53
|
|
46
54
|
class Configuration
|
@@ -71,6 +79,60 @@ end # module Worldfootball
|
|
71
79
|
|
72
80
|
|
73
81
|
|
82
|
+
###
|
83
|
+
# todo - move generate to generate file!!!
|
84
|
+
module Worldfootball
|
85
|
+
def self.generate( league:, season:,
|
86
|
+
overwrite: true )
|
87
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
88
|
+
|
89
|
+
league = find_league!( league )
|
90
|
+
pages = league.pages!( season: season )
|
91
|
+
|
92
|
+
|
93
|
+
out_path = if season >= Season( '2000' )
|
94
|
+
"#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt"
|
95
|
+
else
|
96
|
+
decade = season.start_year - (season.start_year%10)
|
97
|
+
## use archive-style before 2000!!!
|
98
|
+
"#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt"
|
99
|
+
end
|
100
|
+
|
101
|
+
## check if output exists already
|
102
|
+
if !overwrite && File.exist?( out_path )
|
103
|
+
## skip generation
|
104
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
105
|
+
return
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
## get matches
|
110
|
+
path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv"
|
111
|
+
puts " ---> reading matches in #{path} ..."
|
112
|
+
matches = SportDb::CsvMatchParser.read( path )
|
113
|
+
puts " #{matches.size} matches"
|
114
|
+
|
115
|
+
## build
|
116
|
+
txt = SportDb::TxtMatchWriter.build( matches )
|
117
|
+
puts txt
|
118
|
+
|
119
|
+
|
120
|
+
buf = String.new
|
121
|
+
## note - use league key for league name for now!!
|
122
|
+
buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n"
|
123
|
+
buf << txt
|
124
|
+
|
125
|
+
puts " writing to >#{out_path}<..."
|
126
|
+
write_text( out_path, buf )
|
127
|
+
|
128
|
+
## add to tmp too for debugging
|
129
|
+
out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt"
|
130
|
+
puts " writing to >#{out_path2}<..."
|
131
|
+
write_text( out_path2, buf )
|
132
|
+
end
|
133
|
+
end # module Worldfootball
|
134
|
+
|
135
|
+
|
74
136
|
|
75
137
|
|
76
138
|
puts Worldfootball.banner ## say hello
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: worldfootball
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: football-timezones
|
@@ -106,19 +106,24 @@ dependencies:
|
|
106
106
|
requirements:
|
107
107
|
- - "~>"
|
108
108
|
- !ruby/object:Gem::Version
|
109
|
-
version: '4.
|
109
|
+
version: '4.2'
|
110
110
|
type: :development
|
111
111
|
prerelease: false
|
112
112
|
version_requirements: !ruby/object:Gem::Requirement
|
113
113
|
requirements:
|
114
114
|
- - "~>"
|
115
115
|
- !ruby/object:Gem::Version
|
116
|
-
version: '4.
|
116
|
+
version: '4.2'
|
117
117
|
description: worldfootball - get world football (leagues, cups & more) match data
|
118
118
|
via the worldfootball.net/weltfussball.de pages
|
119
119
|
email: gerald.bauer@gmail.com
|
120
120
|
executables:
|
121
121
|
- wfb
|
122
|
+
- wfbconf
|
123
|
+
- wfbconv
|
124
|
+
- wfbdump
|
125
|
+
- wfbgen
|
126
|
+
- wfbup
|
122
127
|
extensions: []
|
123
128
|
extra_rdoc_files:
|
124
129
|
- CHANGELOG.md
|
@@ -130,6 +135,11 @@ files:
|
|
130
135
|
- README.md
|
131
136
|
- Rakefile
|
132
137
|
- bin/wfb
|
138
|
+
- bin/wfbconf
|
139
|
+
- bin/wfbconv
|
140
|
+
- bin/wfbdump
|
141
|
+
- bin/wfbgen
|
142
|
+
- bin/wfbup
|
133
143
|
- config/leagues/africa.csv
|
134
144
|
- config/leagues/america.csv
|
135
145
|
- config/leagues/asia.csv
|
@@ -175,7 +185,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
175
185
|
- !ruby/object:Gem::Version
|
176
186
|
version: '0'
|
177
187
|
requirements: []
|
178
|
-
rubygems_version: 3.
|
188
|
+
rubygems_version: 3.5.22
|
179
189
|
signing_key:
|
180
190
|
specification_version: 4
|
181
191
|
summary: worldfootball - get world football (leagues, cups & more) match data via
|