worldfootball 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6bc6759b77de7fe44eccf59f09fd7b6cc3ddc4576612f4fb208c38a59a02536e
4
- data.tar.gz: 912fe7d04165b50bc3e50f5e5a1ea554d7fd788d4d019c2b719c8625dd222a3e
3
+ metadata.gz: 9d824bdedb279bf91779c99445fcf5bd5a6df8acc342232624032256a32fbe18
4
+ data.tar.gz: 14eb224174efe18645412a88eaa332e01901247dd28ffa795cba8b902d3a1dd6
5
5
  SHA512:
6
- metadata.gz: 74bf6c7e7a8aeaeb18d9bd1b74764328301b3aa3c309ca814ea494be704958a4f086e7bf94b1b1ea1ad9f974b7be31eb6e0d2a0c611b6c35b85caca06d8f6e71
7
- data.tar.gz: 7cadf69fe3acafc2f976e3f5c980c8411e8758f5426559d56860c791b473debdb4e1067ad071de6704e7da3ccc434c3af14454bed6ba32ac825c2b491467db08
6
+ metadata.gz: b0e4885e80c6ff3fb13a00a654524ea4514fcac730438096e412cd76a5ba14c17fa563996afd81176f7538bffb6ac8bdc3c75de7af7ace2124269b7c72c0265b
7
+ data.tar.gz: feb02079e9c481c3b2e53d5c72030e5e9e687f94bf053c29134da613933c2f5f6b569339fb8980ab3c048e248c171c93ea23a4b4a350c5dfede8563990b5a298
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.5
1
+ ### 0.2.6
2
2
 
3
3
  ### 0.0.1 / 2024-07-04
4
4
 
data/Rakefile CHANGED
@@ -19,6 +19,7 @@ Hoe.spec 'worldfootball' do
19
19
 
20
20
  self.extra_deps = [
21
21
  ['football-timezones'],
22
+ ['fifa'],
22
23
  ['webget'],
23
24
  ['nokogiri'],
24
25
  ]
data/config/rounds.csv CHANGED
@@ -56,3 +56,30 @@ caf.cl, Gruppe B, Group
56
56
  caf.cl, Gruppe C, Group
57
57
  caf.cl, Gruppe D, Group
58
58
 
59
+
60
+ uefa.cl, Gruppe A, Group
61
+ uefa.cl, Gruppe B, Group
62
+ uefa.cl, Gruppe C, Group
63
+ uefa.cl, Gruppe D, Group
64
+ uefa.cl, Gruppe E, Group
65
+ uefa.cl, Gruppe F, Group
66
+ uefa.cl, Gruppe G, Group
67
+ uefa.cl, Gruppe H, Group
68
+
69
+ uefa.el, Gruppe A, Group
70
+ uefa.el, Gruppe B, Group
71
+ uefa.el, Gruppe C, Group
72
+ uefa.el, Gruppe D, Group
73
+ uefa.el, Gruppe E, Group
74
+ uefa.el, Gruppe F, Group
75
+ uefa.el, Gruppe G, Group
76
+ uefa.el, Gruppe H, Group
77
+
78
+ uefa.conf, Gruppe A, Group
79
+ uefa.conf, Gruppe B, Group
80
+ uefa.conf, Gruppe C, Group
81
+ uefa.conf, Gruppe D, Group
82
+ uefa.conf, Gruppe E, Group
83
+ uefa.conf, Gruppe F, Group
84
+ uefa.conf, Gruppe G, Group
85
+ uefa.conf, Gruppe H, Group
@@ -9,6 +9,9 @@ def self.convert( league:, season: )
9
9
  pages = league.pages!( season: season )
10
10
 
11
11
 
12
+ ## collect all teams
13
+ teams_by_ref = {}
14
+
12
15
  recs = []
13
16
  pages.each do |slug, stage|
14
17
  ## note: stage might be nil
@@ -35,6 +38,30 @@ def self.convert( league:, season: )
35
38
  print "\n"
36
39
 
37
40
  rows = page.matches
41
+
42
+ teams = page.teams
43
+ ## e.g. {:count=>2, :name=>"AS Arta", :ref=>"as-arta"},
44
+ ## {:count=>4, :name=>"Dekedaha FC", :ref=>"dekedaha-fc"},
45
+ ## ...
46
+ teams.each do |h|
47
+ team_count = h[:count]
48
+ team_name = norm_team( h[:name] ) ## note: norm team name!!!
49
+ team_ref = h[:ref]
50
+
51
+ ## note: skip N.N. (place holder team)
52
+ ## team_ref is nil etc.
53
+ next if team_name == 'N.N.'
54
+
55
+ team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
56
+ name: team_name }
57
+ team_stat[:count] += team_count
58
+ if team_name != team_stat[:name]
59
+ puts "!! ASSERT ERROR - team ref with differet names; expected #{team_stat[:name]} - got #{team_name}"
60
+ exit 1
61
+ end
62
+ end
63
+
64
+
38
65
  stage_recs = build( rows,
39
66
  season: season,
40
67
  league: league.key,
@@ -45,6 +72,60 @@ def self.convert( league:, season: )
45
72
  end
46
73
 
47
74
 
75
+ clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf',
76
+ 'copa.l',
77
+ 'caf.cl',
78
+ 'afl'].include?(league.key) ? true : false
79
+
80
+ ####
81
+ # auto-add (fifa) country code if int'l club tournament
82
+ if clubs_intl
83
+ ##
84
+ ## get country codes for team ref
85
+ teams_by_ref.each do |team_slug, h|
86
+
87
+ Metal.download_team( team_slug, cache: true )
88
+ team_page = Page::Team.from_cache( team_slug )
89
+ props = team_page.props
90
+ pp props
91
+ country_name = props[:country]
92
+ cty = Fifa.world.find_by_name( country_name )
93
+ if cty.nil?
94
+ puts "!! ERROR - no country found for #{country_name}"
95
+ exit 1
96
+ end
97
+ h[:code] = cty.code
98
+ end
99
+
100
+ ## generate lookup by name
101
+ teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
102
+ h[ rec[:name]] = rec
103
+ h
104
+ end
105
+
106
+ #####
107
+ ## dump team refs
108
+ puts " #{teams_by_ref.size} team(s) by ref:"
109
+ pp teams_by_name
110
+
111
+ ## quick hack
112
+ ## add country (fifa) codes to team names
113
+ recs.each do |rec|
114
+ team1_org = rec[5]
115
+ if team1_org != 'N.N.' ## note - skip place holder; keep as-is
116
+ country_code = teams_by_name[team1_org][:code]
117
+ rec[5] = "#{team1_org} (#{country_code})"
118
+ end
119
+
120
+ team2_org = rec[8]
121
+ if team2_org != 'N.N.' ## note - skip place holder; keep as-is
122
+ country_code = teams_by_name[team2_org][:code]
123
+ rec[8] = "#{team2_org} (#{country_code})"
124
+ end
125
+ end
126
+ end
127
+
128
+
48
129
  ## note: sort matches by date before saving/writing!!!!
49
130
  ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
50
131
  ## note: assume date is third column!!! (stage/round/date/...)
@@ -24,7 +24,10 @@ class LeagueItem # nested inside LeagueConfig
24
24
  ### todo/fix:
25
25
  ## use from cache if not older than 1 (or 5/10?) hour(s) or such
26
26
  ## why? why not?
27
- Worldfootball::Metal.download_schedule( @slug )
27
+ schedule_url = Worldfootball::Metal.schedule_url( @slug )
28
+ if Webcache.expired_in_1d?( schedule_url )
29
+ Worldfootball::Metal.download_schedule( @slug )
30
+ end
28
31
  page = Worldfootball::Page::Schedule.from_cache( @slug )
29
32
 
30
33
  ## pp page.seasons
@@ -24,6 +24,12 @@ def self.norm_team( team )
24
24
  ## others too? - move to mods instead of generic rule - why? why not?
25
25
  team = team.sub( /[ ]+\(A\)/, ' II' )
26
26
 
27
+ ################
28
+ ## quick hack - norm(alize) all N.N. to N.N.
29
+ ## e.g.
30
+ team = 'N.N.' if ['Sieger HF 1',
31
+ 'Sieger HF 2'].include?( team )
32
+
27
33
  team
28
34
  end
29
35
 
@@ -85,9 +85,13 @@ GENERATED_RE = %r{
85
85
  end
86
86
  end
87
87
 
88
+
88
89
  ######################
89
90
  ## helper methods
90
91
 
92
+ def debug?() Worldfootball.debug?; end
93
+
94
+
91
95
  def squish( str )
92
96
  str = str.strip
93
97
  str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
@@ -51,7 +51,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
51
51
  ## <td colspan="2"></td>
52
52
  ## </tr>
53
53
  ##
54
- # <tr class="e2-parent" data-liga_id="530" data-gs_match_id="10259222"
54
+ # <tr class="e2-parent" data-liga_id="530" data-gs_match_id="10259222"
55
55
  # style="display:none;">
56
56
  ## <td colspan="2"></td>
57
57
  ## <td colspan="3">
@@ -68,7 +68,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
68
68
 
69
69
  i += 1
70
70
 
71
- ## puts "[debug] row #{i} >#{tr.text.strip}<"
71
+ ## puts "[debug] row #{i} >#{tr.text.strip}<"
72
72
 
73
73
  ### note - assume for now match lines use tds
74
74
  ## and round lines use ths (NOT tds)!!
@@ -76,7 +76,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
76
76
 
77
77
  ths = tr.css( 'th' )
78
78
  tds = tr.css( 'td' )
79
-
79
+
80
80
  if tr.text.strip =~ /Spieltag/ ||
81
81
  tr.text.strip =~ /[1-9]\.[ ]Runde|
82
82
  Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
@@ -93,28 +93,30 @@ class Schedule < Page ## note: use nested class for now - why? why not?
93
93
  Spiele| # see Serie A 1960-61 Relegation
94
94
  3\.[ ]Platz| # see bra-serie-a-2000-yellow-module-playoffs
95
95
  Spiel[ ]um[ ]Platz[ ]3| # see campeonato-2009-cuadrangulares-deportivo-cuenca-cs-emelec
96
- Relegation| # see egy-premiership-2013-2014-abstiegsplayoff
97
- Copa[ ]Libertadores| # see ecu-campeonato-2012-segunda-etapa-playoffs
96
+ Relegation| # see egy-premiership-2013-2014-abstiegsplayoff
97
+ Copa[ ]Libertadores| # see ecu-campeonato-2012-segunda-etapa-playoffs
98
98
  Copa[ ]Sudamericana| # see campeonato-2012-liguilla-final-playoffs-cs-emelec-ldu-quito
99
- Repechaje| # see nca-liga-primera-2023-2024-clausura-playoffs
100
- Final[ ]de[ ]Grupos| # see hon-liga-nacional-2020-2021-clausura-playoffs
99
+ Repechaje| # see nca-liga-primera-2023-2024-clausura-playoffs
100
+ Final[ ]de[ ]Grupos| # see hon-liga-nacional-2020-2021-clausura-playoffs
101
101
  Gran[ ]Final| # see liga-nacional-2020-2021-apertura-playoffs-finale-olimpia-motagua
102
102
  Finalrunde| # see hon-liga-nacional-2019-2020-apertura-pentagonal
103
- Zona[ ]A| # see gua-liga-nacional-2020-2021-clausura
104
- Zona[ ]B| # see liga-nacional-2020-2021-clausura-zona-a-comunicaciones-deportivo-malacateco
103
+ Zona[ ]A| # see gua-liga-nacional-2020-2021-clausura
104
+ Zona[ ]B| # see liga-nacional-2020-2021-clausura-zona-a-comunicaciones-deportivo-malacateco
105
105
  Interzone| # see liga-nacional-2020-2021-clausura-zona-b-achuapa-sanarate
106
106
  Final[ ]Segunda[ ]Ronda| # see crc-primera-division-2018-2019-apertura-playoffs
107
107
  Quadrangular # see crc-primera-division-2016-2017-verano-playoffs
108
108
  /x
109
-
110
- puts
111
- print '[%03d] ' % i
112
- ## print squish( tr.text )
113
- print "round >#{tr.text.strip}<"
114
- print "\n"
109
+
110
+ if debug?
111
+ puts
112
+ print '[%03d] ' % i
113
+ ## print squish( tr.text )
114
+ print "round >#{tr.text.strip}<"
115
+ print "\n"
116
+ end
115
117
 
116
118
  last_round = tr.text.strip
117
- elsif ths.count > 0 &&
119
+ elsif ths.count > 0 &&
118
120
  tds.count == 0
119
121
  ## check for round NOT yet configured!!!
120
122
  puts "!! WARN: found unregistered round line >#{tr.text.strip}<"
@@ -128,11 +130,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
128
130
 
129
131
  date_str = last_date_str if date_str.empty?
130
132
 
131
- ## note: for debugging - print as we go along (parsing)
132
- print '[%03d] ' % i
133
- print "%-10s | " % date_str
134
- print "%-5s | " % time_str
135
-
133
+ if debug?
134
+ ## note: for debugging - print as we go along (parsing)
135
+ print '[%03d] ' % i
136
+ print "%-10s | " % date_str
137
+ print "%-5s | " % time_str
138
+ end
136
139
 
137
140
  # was: team1_str = squish( tds[2].text )
138
141
 
@@ -148,8 +151,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
148
151
  puts "!! WARN: no team1_ref for >#{team1_str}< found"
149
152
  end
150
153
 
151
- ## note: for debugging - print as we go along (parsing)
152
- print "%-22s | " % team1_str
154
+ if debug?
155
+ ## note: for debugging - print as we go along (parsing)
156
+ print "%-22s | " % team1_str
157
+ end
153
158
 
154
159
  ## <td> - </td>
155
160
  ## e.g. -
@@ -168,9 +173,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
168
173
  puts "!! WARN: no team2_ref for >#{team2_str}< found"
169
174
  end
170
175
 
171
- ## note: for debugging - print as we go along (parsing)
172
- print "%-22s | " % team2_str
173
-
176
+ if debug?
177
+ ## note: for debugging - print as we go along (parsing)
178
+ print "%-22s | " % team2_str
179
+ end
174
180
 
175
181
 
176
182
  ### was: score_str = squish( tds[5].text )
@@ -197,11 +203,11 @@ class Schedule < Page ## note: use nested class for now - why? why not?
197
203
  score_str = '-:-' # note: -:- gets replaced to ---
198
204
  end
199
205
 
200
-
201
- print "%-10s | " % score_str
202
- print (score_ref ? score_ref : 'n/a')
203
- print "\n"
204
-
206
+ if debug?
207
+ print "%-10s | " % score_str
208
+ print (score_ref ? score_ref : 'n/a')
209
+ print "\n"
210
+ end
205
211
 
206
212
  ## change 2:1 (1:1) to 2-1 (1-1)
207
213
  score_str = score_str.gsub( ':', '-' )
@@ -214,7 +220,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
214
220
 
215
221
  date = if date_str == '00.00.0000'
216
222
  nil
217
- else
223
+ else
218
224
  Date.strptime( date_str, '%d.%m.%Y' )
219
225
  end
220
226
 
@@ -10,7 +10,100 @@ class Team < Page ## note: use nested class for now - why? why not?
10
10
  new( html )
11
11
  end
12
12
 
13
+ ## rename to properties or attributes or ??
14
+ ## or infobox or data or ?? why? why not?
15
+ def props
16
+ sidebar = doc.css( 'div.sidebar' ).first
17
+ assert( sidebar, 'no div.sidebar found in team page!!' )
13
18
 
19
+ h2 = sidebar.css( 'h2' ).first
20
+ assert( h2, 'no h2 inside div.sidebar found in team page!!')
21
+ name = h2.text
22
+
23
+ table = sidebar.css( 'table' ).first
24
+ assert( table, 'no table inside div.sidebar found in team page!!')
25
+
26
+ long_name = nil
27
+ country = nil
28
+ ground = nil
29
+ founded = nil
30
+
31
+ trs = table.css( 'tr' )
32
+ trs.each_with_index do |tr,i|
33
+ text = squish(tr.text)
34
+ ## puts "[#{i+1}] >#{text}<"
35
+
36
+ ## assume long name first column if NOT starting w/ Land:
37
+ if i==0 && !text.start_with?( 'Land' )
38
+ long_name = text
39
+ end
40
+
41
+ if text.start_with?( 'Land:' )
42
+ country = text.sub( 'Land:', '' ).strip
43
+ end
44
+
45
+ if text.start_with?( 'gegründet:' )
46
+ founded = text.sub( 'gegründet:', '' ).strip
47
+ end
48
+
49
+ if text.start_with?( 'Stadion:' )
50
+ ground = text.sub( 'Stadion:', '' ).strip
51
+ end
52
+ end
53
+
54
+ data = { name: name }
55
+ data[ :long_name] = long_name if long_name
56
+ data[ :country ] = country if country
57
+ data[ :founded ] = founded if founded && founded != '0000'
58
+ data[ :ground] = ground if ground && ground != '----------'
59
+
60
+ data
61
+ end
62
+
63
+ =begin
64
+ <div class="sidebar">
65
+
66
+ <div class="box emblemwrapper">
67
+ <div class="head">
68
+ <h2>Al Ahly SC</h2>
69
+ </div>
70
+ <div class="data " align="center">
71
+ <div class="emblem"><a href="/teams/al-ahly-sc/"><img src="https://s.hs-data.com/bilder/wappen/mittel/1480.gif?fallback=png" border="0" width="100" hspace="5" vspace="5" alt="Al Ahly SC" title="Al Ahly SC" /></a></div>
72
+ <div class="emblem_background"><a href="/teams/al-ahly-sc/"><img src="https://s.hs-data.com/bilder/wappen/mittel/1480.gif?fallback=png" border="0" width="100" hspace="5" vspace="5" alt="Al Ahly SC" title="Al Ahly SC" /></a></div>
73
+ </div>
74
+ <div class="data">
75
+ <table class="standard_tabelle yellow" cellpadding="3" cellspacing="0">
76
+ <tr>
77
+ <td colspan="2" align="center">Al Ahly Sporting Club</td>
78
+ </tr>
79
+ <tr>
80
+ <td colspan="2">&nbsp;</td>
81
+ </tr>
82
+ <tr>
83
+ <td align="right"><b>Land:</b></td>
84
+ <td>
85
+ <img src="https://s.hs-data.com/bilder/flaggen_neu/68.gif" width="18" height="12" hspace="5" title="Ägypten" align="absmiddle" />
86
+ Ägypten </td>
87
+ </tr>
88
+ <tr>
89
+ <td align="right"><b>gegründet:</b></td>
90
+ <td>24.04.1907</td>
91
+ </tr>
92
+ <tr>
93
+ <td align="right"><b>Stadion:</b></td>
94
+ <td><a href="/spielorte/international-stadium-cairo/" title="International Stadium">International Stadium</a></td>
95
+ </tr>
96
+ <tr>
97
+ <td align="right"><b>Homepage:</b></td>
98
+ <td><a href="http://alahlyegypt.com/" target="_blank">alahlyegypt.com/</a></td>
99
+ </tr>
100
+ <tr>
101
+ <td colspan="2" align="right"><b><a href="/teams/al-ahly-sc/1/" title="Weitere Infos zu Al Ahly SC">zum Profil &raquo;</a></b></td>
102
+ </tr>
103
+ </table>
104
+ </div>
105
+ </div>
106
+ =end
14
107
 
15
108
 
16
109
  ######
@@ -2,15 +2,15 @@ module Worldfootball
2
2
 
3
3
 
4
4
  MAX_HEADERS = [
5
- 'Stage',
6
- 'Round',
7
- 'Date',
8
- 'Time',
9
- 'Timezone',
10
- 'Team 1',
11
- 'FT',
12
- 'HT',
13
- 'Team 2',
5
+ 'Stage', # 0
6
+ 'Round', # 1
7
+ 'Date', # 2
8
+ 'Time', # 3
9
+ 'Timezone', #4
10
+ 'Team 1', #5
11
+ 'FT', #6
12
+ 'HT', #7
13
+ 'Team 2', #8
14
14
  'ET',
15
15
  'P',
16
16
  'Comments', ## e.g. awarded, cancelled/canceled, etc.
@@ -2,7 +2,7 @@
2
2
  module Worldfootball
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
4
  MINOR = 2
5
- PATCH = 5
5
+ PATCH = 6
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
data/lib/worldfootball.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  ## 3rd party (our own)
2
2
  require 'football/timezones' ## note - pulls in season/formats, cocos & tzinfo
3
+ require 'fifa'
4
+
3
5
  require 'webget' ## incl. webget, webcache, webclient, etc.
4
6
  require 'nokogiri'
5
7
 
@@ -31,6 +33,13 @@ require_relative 'worldfootball/convert_reports'
31
33
 
32
34
  module Worldfootball
33
35
 
36
+ #########
37
+ ## add a global debug flag
38
+ def self.debug=(value) @debug = value; end
39
+ def self.debug?() @debug ||= false; end ## note: default is FALSE
40
+
41
+
42
+
34
43
  class Configuration
35
44
  #########
36
45
  ## nested configuration classes - use - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worldfootball
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-10-03 00:00:00.000000000 Z
11
+ date: 2024-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: football-timezones
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fifa
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: webget
29
43
  requirement: !ruby/object:Gem::Requirement