worldfootball 0.2.6 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +1 -2
- data/Manifest.txt +11 -6
- data/README.md +45 -3
- data/Rakefile +1 -0
- data/bin/wfb +70 -14
- data/bin/wfbconf +55 -0
- data/bin/wfbconv +100 -0
- data/bin/wfbdump +76 -0
- data/bin/wfbgen +102 -0
- data/bin/wfbup +101 -0
- data/config/{leagues_america.csv → leagues/america.csv} +31 -2
- data/config/{leagues_europe.csv → leagues/europe.csv} +39 -5
- data/config/rounds.csv +6 -0
- data/config/stages.csv +16 -32
- data/lib/worldfootball/build-parse_score.rb +8 -3
- data/lib/worldfootball/convert.rb +52 -17
- data/lib/worldfootball/download.rb +6 -2
- data/lib/worldfootball/leagues.rb +23 -10
- data/lib/worldfootball/mods.rb +28 -3
- data/lib/worldfootball/page_schedule.rb +38 -15
- data/lib/worldfootball/version.rb +2 -2
- data/lib/worldfootball.rb +72 -1
- metadata +35 -11
- /data/config/{leagues_africa.csv → leagues/africa.csv} +0 -0
- /data/config/{leagues_asia.csv → leagues/asia.csv} +0 -0
- /data/config/{leagues_middle_east.csv → leagues/middle_east.csv} +0 -0
- /data/config/{leagues_pacific.csv → leagues/pacific.csv} +0 -0
@@ -12,6 +12,15 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
12
12
|
end
|
13
13
|
|
14
14
|
|
15
|
+
PLACEHOLDERS = [
|
16
|
+
'N.N.',
|
17
|
+
'Verlierer HF 1',
|
18
|
+
'Verlierer HF 2',
|
19
|
+
'Sieger HF 1',
|
20
|
+
'Sieger HF 2',
|
21
|
+
]
|
22
|
+
def placeholder?( str ) PLACEHOLDERS.include?( str ); end
|
23
|
+
|
15
24
|
|
16
25
|
def matches
|
17
26
|
@matches ||= begin
|
@@ -20,10 +29,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
20
29
|
# <table class="standard_tabelle" cellpadding="3" cellspacing="1">
|
21
30
|
|
22
31
|
## note: use > for "strict" sibling (child without any in-betweens)
|
23
|
-
|
32
|
+
tables = doc.css( 'div.data > table.standard_tabelle' ) ## get table
|
24
33
|
# puts table.class.name #=> Nokogiri::XML::Element
|
25
34
|
# puts table.text
|
26
35
|
|
36
|
+
assert( tables.size==1, "expected one table.standard_tabelle; got #{tables.size}" )
|
37
|
+
table = tables.first
|
27
38
|
assert( table, 'no table.standard_tabelle found in schedule page!!')
|
28
39
|
|
29
40
|
trs = table.css( 'tr' )
|
@@ -77,8 +88,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
77
88
|
ths = tr.css( 'th' )
|
78
89
|
tds = tr.css( 'td' )
|
79
90
|
|
80
|
-
|
81
|
-
|
91
|
+
tr_text = squish( tr.text )
|
92
|
+
|
93
|
+
if tr_text =~ /Spieltag/ ||
|
94
|
+
tr_text =~ /[1-9]\.[ ]Runde|
|
82
95
|
Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
|
83
96
|
Qualifikation| # see CA Championship
|
84
97
|
Sechzehntelfinale| # see EL
|
@@ -110,25 +123,24 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
110
123
|
if debug?
|
111
124
|
puts
|
112
125
|
print '[%03d] ' % i
|
113
|
-
|
114
|
-
print "round >#{tr.text.strip}<"
|
126
|
+
print "round >#{tr_text}<"
|
115
127
|
print "\n"
|
116
128
|
end
|
117
129
|
|
118
|
-
last_round =
|
130
|
+
last_round = tr_text
|
119
131
|
elsif ths.count > 0 &&
|
120
132
|
tds.count == 0
|
121
133
|
## check for round NOT yet configured!!!
|
122
|
-
puts "!! WARN: found unregistered round line >#{
|
123
|
-
log( "!! WARN: found unregistered round line >#{
|
134
|
+
puts "!! WARN: found unregistered round line >#{tr_text}<"
|
135
|
+
log( "!! WARN: found unregistered round line >#{tr_text}< in page #{title}" )
|
124
136
|
|
125
|
-
last_round =
|
137
|
+
last_round = tr_text
|
126
138
|
else ## assume table row (tr) is match line
|
127
139
|
|
128
140
|
date_str = squish( tds[0].text )
|
129
141
|
time_str = squish( tds[1].text )
|
130
142
|
|
131
|
-
date_str = last_date_str if date_str.empty?
|
143
|
+
date_str = last_date_str if date_str.empty? && last_date_str
|
132
144
|
|
133
145
|
if debug?
|
134
146
|
## note: for debugging - print as we go along (parsing)
|
@@ -148,7 +160,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
148
160
|
else
|
149
161
|
team1_str = squish( tds[2].text )
|
150
162
|
team1_ref = nil
|
151
|
-
puts "!! WARN: no team1_ref for >#{team1_str}< found"
|
163
|
+
puts "!! WARN: no team1_ref for >#{team1_str}< found" unless placeholder?( team1_str )
|
152
164
|
end
|
153
165
|
|
154
166
|
if debug?
|
@@ -170,9 +182,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
170
182
|
else
|
171
183
|
team2_str = squish( tds[4].text )
|
172
184
|
team2_ref = nil
|
173
|
-
puts "!! WARN: no team2_ref for >#{team2_str}< found"
|
185
|
+
puts "!! WARN: no team2_ref for >#{team2_str}< found" unless placeholder?( team2_str )
|
174
186
|
end
|
175
187
|
|
188
|
+
|
176
189
|
if debug?
|
177
190
|
## note: for debugging - print as we go along (parsing)
|
178
191
|
print "%-22s | " % team2_str
|
@@ -198,8 +211,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
198
211
|
## <img src="https://s.hs-data.com/bilder/shared/live/2.png" /></a>
|
199
212
|
## </td>
|
200
213
|
img = tds[6].css( 'img' )[0]
|
214
|
+
|
215
|
+
|
216
|
+
|
201
217
|
if img && img[:src].index( '/live/')
|
202
|
-
puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
218
|
+
## puts "!! WARN: live match badge, resetting score from #{score_str} to -:-"
|
219
|
+
print " LIVE BADGE "
|
203
220
|
score_str = '-:-' # note: -:- gets replaced to ---
|
204
221
|
end
|
205
222
|
|
@@ -217,8 +234,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
217
234
|
## special case for '00.00.0000'
|
218
235
|
## CANNOT parse
|
219
236
|
## use empty date - why? why not?
|
237
|
+
## if start with 00.00. e.g. 00.00.1939
|
238
|
+
|
220
239
|
|
221
|
-
date = if date_str == '00.00.0000'
|
240
|
+
date = if date_str == '00.00.0000' ||
|
241
|
+
date_str.start_with?( '00.00.' ) ||
|
242
|
+
date_str.empty?
|
222
243
|
nil
|
223
244
|
else
|
224
245
|
Date.strptime( date_str, '%d.%m.%Y' )
|
@@ -237,7 +258,9 @@ class Schedule < Page ## note: use nested class for now - why? why not?
|
|
237
258
|
report_ref: score_ref
|
238
259
|
}
|
239
260
|
|
240
|
-
|
261
|
+
## note - only update last date if date present
|
262
|
+
## might be empty (not available) in the beginning
|
263
|
+
last_date_str = date_str if !date_str.empty?
|
241
264
|
end
|
242
265
|
end # each tr (table row)
|
243
266
|
|
data/lib/worldfootball.rb
CHANGED
@@ -6,6 +6,9 @@ require 'webget' ## incl. webget, webcache, webclient, etc.
|
|
6
6
|
require 'nokogiri'
|
7
7
|
|
8
8
|
|
9
|
+
require 'sportdb/writers'
|
10
|
+
|
11
|
+
|
9
12
|
###
|
10
13
|
# our own code
|
11
14
|
require_relative 'worldfootball/version'
|
@@ -38,6 +41,14 @@ module Worldfootball
|
|
38
41
|
def self.debug=(value) @debug = value; end
|
39
42
|
def self.debug?() @debug ||= false; end ## note: default is FALSE
|
40
43
|
|
44
|
+
def self.log( msg ) ### append to log
|
45
|
+
File.open( './logs.txt', 'a:utf-8' ) do |f|
|
46
|
+
f.write( msg )
|
47
|
+
f.write( "\n" )
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
|
41
52
|
|
42
53
|
|
43
54
|
class Configuration
|
@@ -47,10 +58,16 @@ class Configuration
|
|
47
58
|
def out_dir() @out_dir || './o'; end
|
48
59
|
def out_dir=(value) @out_dir = value; end
|
49
60
|
end
|
61
|
+
class Generate
|
62
|
+
def out_dir() @out_dir || './o'; end
|
63
|
+
def out_dir=(value) @out_dir = value; end
|
64
|
+
end
|
50
65
|
|
51
|
-
def convert() @convert
|
66
|
+
def convert() @convert ||= Convert.new; end
|
67
|
+
def generate() @generate ||= Generate.new; end
|
52
68
|
end # class Configuration
|
53
69
|
|
70
|
+
|
54
71
|
## lets you use
|
55
72
|
## Worldfootball.configure do |config|
|
56
73
|
## config.convert.out_dir = './o'
|
@@ -62,6 +79,60 @@ end # module Worldfootball
|
|
62
79
|
|
63
80
|
|
64
81
|
|
82
|
+
###
|
83
|
+
# todo - move generate to generate file!!!
|
84
|
+
module Worldfootball
|
85
|
+
def self.generate( league:, season:,
|
86
|
+
overwrite: true )
|
87
|
+
season = Season( season ) ## cast (ensure) season class (NOT string, integer, etc.)
|
88
|
+
|
89
|
+
league = find_league!( league )
|
90
|
+
pages = league.pages!( season: season )
|
91
|
+
|
92
|
+
|
93
|
+
out_path = if season >= Season( '2000' )
|
94
|
+
"#{config.generate.out_dir}/#{season.to_path}/#{league.key}.txt"
|
95
|
+
else
|
96
|
+
decade = season.start_year - (season.start_year%10)
|
97
|
+
## use archive-style before 2000!!!
|
98
|
+
"#{config.generate.out_dir}/archive/#{decade}s/#{season.to_path}/#{league.key}.txt"
|
99
|
+
end
|
100
|
+
|
101
|
+
## check if output exists already
|
102
|
+
if !overwrite && File.exist?( out_path )
|
103
|
+
## skip generation
|
104
|
+
puts " OK #{league.key} #{season} (do NOT overwrite)"
|
105
|
+
return
|
106
|
+
end
|
107
|
+
|
108
|
+
|
109
|
+
## get matches
|
110
|
+
path = "#{config.convert.out_dir}/#{season.to_path}/#{league.key}.csv"
|
111
|
+
puts " ---> reading matches in #{path} ..."
|
112
|
+
matches = SportDb::CsvMatchParser.read( path )
|
113
|
+
puts " #{matches.size} matches"
|
114
|
+
|
115
|
+
## build
|
116
|
+
txt = SportDb::TxtMatchWriter.build( matches )
|
117
|
+
puts txt
|
118
|
+
|
119
|
+
|
120
|
+
buf = String.new
|
121
|
+
## note - use league key for league name for now!!
|
122
|
+
buf << "= #{league.key.upcase.gsub('.', ' ')} #{season.key}\n\n"
|
123
|
+
buf << txt
|
124
|
+
|
125
|
+
puts " writing to >#{out_path}<..."
|
126
|
+
write_text( out_path, buf )
|
127
|
+
|
128
|
+
## add to tmp too for debugging
|
129
|
+
out_path2 = "#{config.generate.out_dir}/tmp/#{league.key}/#{season.to_path}.txt"
|
130
|
+
puts " writing to >#{out_path2}<..."
|
131
|
+
write_text( out_path2, buf )
|
132
|
+
end
|
133
|
+
end # module Worldfootball
|
134
|
+
|
135
|
+
|
65
136
|
|
66
137
|
|
67
138
|
puts Worldfootball.banner ## say hello
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: worldfootball
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: football-timezones
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: sportdb-writers
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: rdoc
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -92,19 +106,24 @@ dependencies:
|
|
92
106
|
requirements:
|
93
107
|
- - "~>"
|
94
108
|
- !ruby/object:Gem::Version
|
95
|
-
version: '4.
|
109
|
+
version: '4.2'
|
96
110
|
type: :development
|
97
111
|
prerelease: false
|
98
112
|
version_requirements: !ruby/object:Gem::Requirement
|
99
113
|
requirements:
|
100
114
|
- - "~>"
|
101
115
|
- !ruby/object:Gem::Version
|
102
|
-
version: '4.
|
116
|
+
version: '4.2'
|
103
117
|
description: worldfootball - get world football (leagues, cups & more) match data
|
104
118
|
via the worldfootball.net/weltfussball.de pages
|
105
119
|
email: gerald.bauer@gmail.com
|
106
120
|
executables:
|
107
121
|
- wfb
|
122
|
+
- wfbconf
|
123
|
+
- wfbconv
|
124
|
+
- wfbdump
|
125
|
+
- wfbgen
|
126
|
+
- wfbup
|
108
127
|
extensions: []
|
109
128
|
extra_rdoc_files:
|
110
129
|
- CHANGELOG.md
|
@@ -116,12 +135,17 @@ files:
|
|
116
135
|
- README.md
|
117
136
|
- Rakefile
|
118
137
|
- bin/wfb
|
119
|
-
-
|
120
|
-
-
|
121
|
-
-
|
122
|
-
-
|
123
|
-
-
|
124
|
-
- config/
|
138
|
+
- bin/wfbconf
|
139
|
+
- bin/wfbconv
|
140
|
+
- bin/wfbdump
|
141
|
+
- bin/wfbgen
|
142
|
+
- bin/wfbup
|
143
|
+
- config/leagues/africa.csv
|
144
|
+
- config/leagues/america.csv
|
145
|
+
- config/leagues/asia.csv
|
146
|
+
- config/leagues/europe.csv
|
147
|
+
- config/leagues/middle_east.csv
|
148
|
+
- config/leagues/pacific.csv
|
125
149
|
- config/rounds.csv
|
126
150
|
- config/stages.csv
|
127
151
|
- lib/worldfootball.rb
|
@@ -161,7 +185,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
161
185
|
- !ruby/object:Gem::Version
|
162
186
|
version: '0'
|
163
187
|
requirements: []
|
164
|
-
rubygems_version: 3.
|
188
|
+
rubygems_version: 3.5.22
|
165
189
|
signing_key:
|
166
190
|
specification_version: 4
|
167
191
|
summary: worldfootball - get world football (leagues, cups & more) match data via
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|