worldfootball 0.2.4 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0dfb778159b88ccab0c79abd75512a5751e148c96f5f2c1ecec6a1ea95b4eaa7
4
- data.tar.gz: 907c8f61a222068bc591260c6928f08c5ab76fc3fa0f826b8a83e4aba673f6a3
3
+ metadata.gz: 9d824bdedb279bf91779c99445fcf5bd5a6df8acc342232624032256a32fbe18
4
+ data.tar.gz: 14eb224174efe18645412a88eaa332e01901247dd28ffa795cba8b902d3a1dd6
5
5
  SHA512:
6
- metadata.gz: 3d008e4768814d2d290d71a73a82f7d8b83812e75dffc6a34fa3a32778634b7160b1642d1741ba84546e89f127965f9822db7c77c47ba2b3c01adb679523256a
7
- data.tar.gz: '09605deda7a4431e396747d2b8cca60f1aaae662249999e0b585427d53721328cf2d74beb063f4423034c0bee3e6caf1c3037e87a5ad948836a6f8371c0bfaf6'
6
+ metadata.gz: b0e4885e80c6ff3fb13a00a654524ea4514fcac730438096e412cd76a5ba14c17fa563996afd81176f7538bffb6ac8bdc3c75de7af7ace2124269b7c72c0265b
7
+ data.tar.gz: feb02079e9c481c3b2e53d5c72030e5e9e687f94bf053c29134da613933c2f5f6b569339fb8980ab3c048e248c171c93ea23a4b4a350c5dfede8563990b5a298
data/CHANGELOG.md CHANGED
@@ -1,4 +1,4 @@
1
- ### 0.2.4
1
+ ### 0.2.6
2
2
 
3
3
  ### 0.0.1 / 2024-07-04
4
4
 
data/Manifest.txt CHANGED
@@ -3,6 +3,7 @@ Manifest.txt
3
3
  README.md
4
4
  Rakefile
5
5
  bin/wfb
6
+ config/leagues_africa.csv
6
7
  config/leagues_america.csv
7
8
  config/leagues_asia.csv
8
9
  config/leagues_europe.csv
@@ -22,6 +23,7 @@ lib/worldfootball/mods.rb
22
23
  lib/worldfootball/page.rb
23
24
  lib/worldfootball/page_report.rb
24
25
  lib/worldfootball/page_schedule.rb
26
+ lib/worldfootball/page_team.rb
25
27
  lib/worldfootball/stages.rb
26
28
  lib/worldfootball/vacuum.rb
27
29
  lib/worldfootball/version.rb
data/Rakefile CHANGED
@@ -19,6 +19,7 @@ Hoe.spec 'worldfootball' do
19
19
 
20
20
  self.extra_deps = [
21
21
  ['football-timezones'],
22
+ ['fifa'],
22
23
  ['webget'],
23
24
  ['nokogiri'],
24
25
  ]
data/bin/wfb CHANGED
@@ -45,17 +45,17 @@ parser = OptionParser.new do |parser|
45
45
 
46
46
  parser.on( "--cache", "--cached", "--offline",
47
47
  "use cached data in #{Webcache.root}" ) do |cached|
48
- opts[:cached] = cached
48
+ opts[:cached] = true
49
49
  end
50
50
 
51
51
  parser.on( "--[no-]convert",
52
52
  "turn on/off conversion to .csv in #{Worldfootball.config.convert.out_dir} - default is (#{opts[:convert]})" ) do |convert|
53
- opts[:convert] = convert
53
+ opts[:convert] = convert # true|false
54
54
  end
55
55
 
56
56
  parser.on( "--print", "--pp",
57
57
  "pretty print cached data in #{Webcache.root}; no download & conversion") do |print|
58
- opts[:cached] = true
58
+ opts[:cached] = true
59
59
  opts[:convert] = false
60
60
  end
61
61
 
@@ -0,0 +1,8 @@
1
+ key, slug
2
+
3
+ afl, african-football-league-2023
4
+ caf.cl, caf-champions-league-2024-2025
5
+
6
+ eg.1, egy-premiership-2023-2024
7
+ ma.1, mar-botola-pro-1-2024-2025
8
+ dz.1, alg-ligue-1-2024-2025
@@ -37,8 +37,19 @@ us.cup, usa-u-s-open-cup-2024
37
37
 
38
38
  br.1, bra-serie-a-2024
39
39
  br.2, bra-serie-b-2024
40
+ br.3, bra-serie-c-2024-playoffs
41
+ br.4, bra-serie-d-2024
42
+
40
43
  br.cup, bra-copa-do-brasil-2024
41
44
 
45
+ br.carioca, bra-campeonato-carioca-2024-taca-guanabara-finals
46
+ br.gauchao, bra-campeonato-gaucho-2024-playoffs
47
+ br.mineiro, bra-campeonato-mineiro-2024-fase-final
48
+ br.paranaense, bra-campeonato-paranaense-2024-fase-final
49
+ br.paulistao, bra-campeonato-paulista-2024-playoffs
50
+
51
+
52
+
42
53
 
43
54
  co.1, col-primera-a-2024-clausura
44
55
  co.2, col-primera-b-2024-clausura
@@ -8,3 +8,4 @@ cn.1, chn-super-league-2024
8
8
 
9
9
  kr.1, kor-k-league-1-2024
10
10
 
11
+ kz.1, kaz-premier-liga-2024
@@ -13,6 +13,7 @@ de.cup, dfb-pokal-2024-2025
13
13
  at.1, aut-bundesliga-2024-2025
14
14
  at.2, aut-2-liga-2024-2025
15
15
  at.3.o, aut-regionalliga-ost-2024-2025
16
+ at.3.m, aut-regionalliga-mitte-2024-2025
16
17
  at.cup, aut-oefb-cup-2024-2025
17
18
 
18
19
 
@@ -20,6 +21,9 @@ ch.1, sui-super-league-2024-2025
20
21
  ch.2, sui-challenge-league-2024-2025
21
22
  ch.cup, sui-cup-2024-2025
22
23
 
24
+ # note - li no league, only cup
25
+ # most (all?) clubs play in switzerland
26
+ li.cup, lie-fl-1-cup-2024-2025
23
27
 
24
28
  hu.1, hun-nb-i-2024-2025
25
29
 
@@ -44,7 +48,8 @@ eng.cup.l, eng-league-cup-2024-2025
44
48
 
45
49
 
46
50
  sco.1, sco-premiership-2024-2025
47
-
51
+ wal.1, wal-premier-league-2024-2025
52
+ nir.1, nir-premier-league-2024-2025
48
53
  ie.1, irl-premier-division-2024
49
54
 
50
55
 
@@ -64,10 +69,16 @@ be.2, bel-eerste-klasse-b-2024-2025
64
69
  be.cup, bel-beker-van-belgie-2024-2025
65
70
 
66
71
 
67
- it.1, ita-serie-a-2024-2025
68
- it.2, ita-serie-b-2024-2025
72
+ it.1, ita-serie-a-2024-2025
73
+ it.2, ita-serie-b-2024-2025
74
+ it.3.a, ita-serie-c-girone-a-2024-2025
75
+ it.3.b, ita-serie-c-girone-b-2024-2025
76
+ it.3.c, ita-serie-c-girone-c-2024-2025
69
77
  it.cup, ita-coppa-italia-2024-2025
70
78
 
79
+ sm.1, smr-campionato-2024-2025
80
+
81
+
71
82
  pt.1, por-primeira-liga-2024-2025
72
83
  pt.2, por-segunda-liga-2024-2025
73
84
 
@@ -75,13 +86,21 @@ es.1, esp-primera-division-2024-2025
75
86
  es.2, esp-segunda-division-2024-2025
76
87
  es.cup, esp-copa-del-rey-2023-2024 ## update to 2024-2025 later!!!
77
88
 
89
+ ad.1, and-1a-divisio-2024-2025
90
+
91
+ gi.1, gib-premier-divison-2024-2025
92
+
78
93
 
79
94
  ro.1, rou-liga-1-2024-2025
80
95
  ro.cup, rou-cupa-romaniei-2024-2025
81
96
 
97
+ bg.1, bul-parva-liga-2024-2025
98
+
82
99
  ru.1, rus-premier-liga-2024-2025
83
100
  ru.2, rus-premier-liga-2024-2025
84
101
 
102
+ by.1, blr-cempionat-2024
103
+
85
104
  ua.1, ukr-premyer-liga-2024-2025
86
105
 
87
106
 
@@ -91,13 +110,32 @@ tr.2, tur-1-lig-2024-2025
91
110
 
92
111
  gr.1, gre-super-league-2024-2025
93
112
 
113
+ cy.1, cyp-first-division-2024-2025
114
+ mt.1, mlt-premier-league-2024-2025-opening
115
+
94
116
  hr.1, cro-1-hnl-2024-2025
95
117
  ba.1, bih-premier-liga-2024-2025
96
118
 
97
119
  al.1, alb-kategoria-superiore-2024-2025
98
120
 
121
+ mk.1, mkd-1-mfl-2024-2025
122
+ md.1, mda-divizia-nationala-2024-2025-phase-i
123
+ me.1, mne-1-cfl-2024-2025
124
+ rs.1, srb-super-liga-2024-2025
125
+ si.1, svn-prvaliga-2024-2025
126
+
127
+ ## todo/fix - change to kos.1 - why? why not?
128
+ xk.1, kos-superliga-2024-2025
129
+ kos.1, kos-superliga-2024-2025
130
+
131
+
132
+ am.1, arm-premier-league-2024-2025
133
+ az.1, aze-premyer-liqasi-2024-2025
134
+ ge.1, geo-erovnuli-liga-2024
135
+
99
136
 
100
137
  is.1, isl-urvalsdeild-2024
138
+ fo.1, fro-effodeildin-2024
101
139
 
102
140
  se.1, swe-allsvenskan-2024
103
141
  se.2, swe-superettan-2024
@@ -106,7 +144,9 @@ no.1, nor-eliteserien-2024
106
144
  fi.1, fin-veikkausliiga-2024-championship
107
145
  dk.1, den-superliga-2024-2025
108
146
 
109
-
147
+ ee.1, est-meistriliiga-2024
148
+ lt.1, ltu-a-lyga-2024
149
+ lv.1, lat-virsliga-2024
110
150
 
111
151
 
112
152
  uefa.cl, champions-league-2024-2025
data/config/rounds.csv CHANGED
@@ -1,5 +1,6 @@
1
1
  key, name1, name2
2
2
 
3
+ ## de to en
3
4
  *, 1. Runde, Round 1
4
5
  *, 2. Runde, Round 2
5
6
  *, 3. Runde, Round 3
@@ -14,3 +15,71 @@ key, name1, name2
14
15
  *, Halbfinale, Semifinals
15
16
  *, Finale, Final
16
17
 
18
+ *, Spiel um Platz 3, Match for 3rd place
19
+
20
+ *, Vorrunde, Preliminary round
21
+
22
+ *, Qual. 1. Runde, Qual. Round 1
23
+ *, Qual. 2. Runde, Qual. Round 2
24
+
25
+
26
+
27
+ ## es to en
28
+ *, Recalificación, Reclassification
29
+
30
+
31
+
32
+
33
+ ## misc - make more generic - why? why not?
34
+ mx.1, Qual. 1. Runde, Play-in round 1
35
+ mx.1, Qual. 2. Runde, Play-in round 2
36
+ mx.2, Qual. 1. Runde, Play-in round 1
37
+ mx.2, Qual. 2. Runde, Play-in round 2
38
+
39
+
40
+ ###
41
+ ## quick fix - move groups to new groups column!!!!
42
+ ar.1, Gruppe A, Group
43
+ ar.1, Gruppe B, Group
44
+ ar.1, Gruppe C, Group
45
+ ar.1, Gruppe D, Group
46
+ ar.1, Gruppe E, Group
47
+ ar.1, Gruppe F, Group
48
+ ar.1, Gruppe 1, Group
49
+ ar.1, Gruppe 2, Group
50
+
51
+ co.1, Gruppe A, Group
52
+ co.1, Gruppe B, Group
53
+
54
+ caf.cl, Gruppe A, Group
55
+ caf.cl, Gruppe B, Group
56
+ caf.cl, Gruppe C, Group
57
+ caf.cl, Gruppe D, Group
58
+
59
+
60
+ uefa.cl, Gruppe A, Group
61
+ uefa.cl, Gruppe B, Group
62
+ uefa.cl, Gruppe C, Group
63
+ uefa.cl, Gruppe D, Group
64
+ uefa.cl, Gruppe E, Group
65
+ uefa.cl, Gruppe F, Group
66
+ uefa.cl, Gruppe G, Group
67
+ uefa.cl, Gruppe H, Group
68
+
69
+ uefa.el, Gruppe A, Group
70
+ uefa.el, Gruppe B, Group
71
+ uefa.el, Gruppe C, Group
72
+ uefa.el, Gruppe D, Group
73
+ uefa.el, Gruppe E, Group
74
+ uefa.el, Gruppe F, Group
75
+ uefa.el, Gruppe G, Group
76
+ uefa.el, Gruppe H, Group
77
+
78
+ uefa.conf, Gruppe A, Group
79
+ uefa.conf, Gruppe B, Group
80
+ uefa.conf, Gruppe C, Group
81
+ uefa.conf, Gruppe D, Group
82
+ uefa.conf, Gruppe E, Group
83
+ uefa.conf, Gruppe F, Group
84
+ uefa.conf, Gruppe G, Group
85
+ uefa.conf, Gruppe H, Group
@@ -1,6 +1,17 @@
1
1
  module Worldfootball
2
2
 
3
3
 
4
+ ## add WO
5
+ ## br.mineiro 2024 - Descenso
6
+ ##
7
+ ## W.O. or w/o (originally two words: "walk over"),
8
+ ##
9
+ ## [004] 3. Spieltag => 3
10
+ ## [004] 2024-03-22 | 00:00 | Atlético Patrocinense - MG | Ipatinga - MG | WO
11
+ ## !! ERROR - unsupported score format >WO< - sorry; maybe add a score error fix/patch
12
+
13
+
14
+
4
15
 
5
16
  def self.parse_score( score_str )
6
17
  ## add support for
@@ -8,7 +19,7 @@ def self.parse_score( score_str )
8
19
  ## 3-0 (0-0, 0-0) awd.
9
20
 
10
21
  ## check for 0:3 Wert. - change Wert. to awd. (awarded)
11
- ## todo/fix - use "hardcoded" Wert\. in regex - why? why not?
22
+ ## todo/fix - use "hardcoded" Wert\. in regex - why? why not?
12
23
  ## score_str = score_str.sub( /Wert\./i, 'awd.' )
13
24
 
14
25
 
@@ -38,6 +49,11 @@ def self.parse_score( score_str )
38
49
  ft = ''
39
50
  ht = ''
40
51
  comments = 'postponed'
52
+ elsif score_str == 'WO' # walk over
53
+ ## W.O. or w/o (originally two words: "walk over"),
54
+ ft = '(*)'
55
+ ht = ''
56
+ comments = 'w/o' ## use walkover - why? why not?
41
57
  # 5-4 (0-0, 1-1, 2-2) i.E.
42
58
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
43
59
  [ ]*
@@ -54,7 +70,7 @@ def self.parse_score( score_str )
54
70
  ft = "#{$5}-#{$6}"
55
71
  et = "#{$7}-#{$8}"
56
72
  # 3-2 (0-0, 1-1) i.E. - note: no extra time!!! only ht,ft!!!
57
- # "popular" in southamerica & mexico
73
+ # "popular" in southamerica & mexico
58
74
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
59
75
  [ ]*
60
76
  \(([0-9]+) [ ]*-[ ]* ([0-9]+)
@@ -80,14 +96,14 @@ def self.parse_score( score_str )
80
96
  et = "#{$1}-#{$2}"
81
97
  ht = "#{$3}-#{$4}"
82
98
  ft = "#{$5}-#{$6}"
83
- ### auto-patch fix drop last score
99
+ ### auto-patch fix drop last score
84
100
  ## 1-3 (0-1, 1-1, 0-2) n.V. => 1-3 (0-1, 1-1) n.V.
85
101
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
86
102
  [ ]*
87
103
  \(([0-9]+) [ ]*-[ ]* ([0-9]+)
88
104
  [ ]*,[ ]*
89
- ([0-9]+) [ ]*-[ ]* ([0-9]+)
90
- [ ]*,[ ]*
105
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
106
+ [ ]*,[ ]*
91
107
  ([0-9]+) [ ]*-[ ]* ([0-9]+)
92
108
  \)
93
109
  [ ]*
@@ -97,7 +113,7 @@ def self.parse_score( score_str )
97
113
  ht = "#{$3}-#{$4}"
98
114
  ft = "#{$5}-#{$6}"
99
115
 
100
- puts "!! WARN - auto-fix/patch score - >#{score_str}<"
116
+ puts "!! WARN - auto-fix/patch score - >#{score_str}<"
101
117
  ### todo/fix - log auto-patch/fix - for double checking!!!!!
102
118
  elsif score_str =~ /([0-9]+) [ ]*-[ ]* ([0-9]+)
103
119
  [ ]*
@@ -120,9 +136,9 @@ def self.parse_score( score_str )
120
136
  [ ]*
121
137
  \(([0-9]+) [ ]*-[ ]* ([0-9]+)
122
138
  [ ]*,[ ]*
123
- ([0-9]+) [ ]*-[ ]* ([0-9]+)
124
- \)
125
- [ ]*
139
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
140
+ \)
141
+ [ ]*
126
142
  Wert\. # ([a-z.]+)
127
143
  /x ### assume awd. (awarded) always - why? why not?
128
144
  ft = "#{$1}-#{$2} (*)"
@@ -133,19 +149,36 @@ def self.parse_score( score_str )
133
149
  ft = "#{$1}-#{$2}" ## e.g. see luxemburg and others
134
150
  ht = ''
135
151
  ## auto-fix/patch
136
- # 3-3 (0-3, 3-3) => 3-3 (0-3) - drop last score
152
+ # 3-3 (0-3, 3-3) => 3-3 (0-3) - drop last score
137
153
  elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
138
154
  [ ]*
139
155
  \(([0-9]+) [ ]*-[ ]* ([0-9]+)
140
156
  [ ]*,[ ]*
141
- ([0-9]+) [ ]*-[ ]* ([0-9]+)
157
+ ([0-9]+) [ ]*-[ ]* ([0-9]+)
142
158
  \)$
143
159
  /x
144
160
  ft = "#{$1}-#{$2}"
145
161
  ht = "#{$3}-#{$4}"
146
162
 
147
- puts "!! WARN - auto-fix/patch score - >#{score_str}<"
163
+ puts "!! WARN - auto-fix/patch score - >#{score_str}<"
148
164
  ### todo/fix - log auto-patch/fix - for double checking!!!!!
165
+ elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
166
+ [ ]*
167
+ n\.V\.
168
+ $/x
169
+ et = "#{$1}-#{$2}"
170
+ ht = ''
171
+ ft = ''
172
+ puts "!! WARN - weird score n.V. only - >#{score_str}<"
173
+ elsif score_str =~ /^([0-9]+) [ ]*-[ ]* ([0-9]+)
174
+ [ ]*
175
+ i\.E\.
176
+ $/x
177
+ pen = "#{$1}-#{$2}"
178
+ et = ''
179
+ ht = ''
180
+ ft = ''
181
+ puts "!! WARN - weird score i.E. only - >#{score_str}<"
149
182
  else
150
183
  puts "!! ERROR - unsupported score format >#{score_str}< - sorry; maybe add a score error fix/patch"
151
184
  exit 1
@@ -9,6 +9,9 @@ def self.convert( league:, season: )
9
9
  pages = league.pages!( season: season )
10
10
 
11
11
 
12
+ ## collect all teams
13
+ teams_by_ref = {}
14
+
12
15
  recs = []
13
16
  pages.each do |slug, stage|
14
17
  ## note: stage might be nil
@@ -35,6 +38,30 @@ def self.convert( league:, season: )
35
38
  print "\n"
36
39
 
37
40
  rows = page.matches
41
+
42
+ teams = page.teams
43
+ ## e.g. {:count=>2, :name=>"AS Arta", :ref=>"as-arta"},
44
+ ## {:count=>4, :name=>"Dekedaha FC", :ref=>"dekedaha-fc"},
45
+ ## ...
46
+ teams.each do |h|
47
+ team_count = h[:count]
48
+ team_name = norm_team( h[:name] ) ## note: norm team name!!!
49
+ team_ref = h[:ref]
50
+
51
+ ## note: skip N.N. (place holder team)
52
+ ## team_ref is nil etc.
53
+ next if team_name == 'N.N.'
54
+
55
+ team_stat = teams_by_ref[ team_ref ] ||= { count: 0,
56
+ name: team_name }
57
+ team_stat[:count] += team_count
58
+ if team_name != team_stat[:name]
59
+ puts "!! ASSERT ERROR - team ref with differet names; expected #{team_stat[:name]} - got #{team_name}"
60
+ exit 1
61
+ end
62
+ end
63
+
64
+
38
65
  stage_recs = build( rows,
39
66
  season: season,
40
67
  league: league.key,
@@ -45,6 +72,60 @@ def self.convert( league:, season: )
45
72
  end
46
73
 
47
74
 
75
+ clubs_intl = ['uefa.cl', 'uefa.el', 'uefa.conf',
76
+ 'copa.l',
77
+ 'caf.cl',
78
+ 'afl'].include?(league.key) ? true : false
79
+
80
+ ####
81
+ # auto-add (fifa) country code if int'l club tournament
82
+ if clubs_intl
83
+ ##
84
+ ## get country codes for team ref
85
+ teams_by_ref.each do |team_slug, h|
86
+
87
+ Metal.download_team( team_slug, cache: true )
88
+ team_page = Page::Team.from_cache( team_slug )
89
+ props = team_page.props
90
+ pp props
91
+ country_name = props[:country]
92
+ cty = Fifa.world.find_by_name( country_name )
93
+ if cty.nil?
94
+ puts "!! ERROR - no country found for #{country_name}"
95
+ exit 1
96
+ end
97
+ h[:code] = cty.code
98
+ end
99
+
100
+ ## generate lookup by name
101
+ teams_by_name = teams_by_ref.reduce( {} ) do |h, (slug,rec)|
102
+ h[ rec[:name]] = rec
103
+ h
104
+ end
105
+
106
+ #####
107
+ ## dump team refs
108
+ puts " #{teams_by_ref.size} team(s) by ref:"
109
+ pp teams_by_name
110
+
111
+ ## quick hack
112
+ ## add country (fifa) codes to team names
113
+ recs.each do |rec|
114
+ team1_org = rec[5]
115
+ if team1_org != 'N.N.' ## note - skip place holder; keep as-is
116
+ country_code = teams_by_name[team1_org][:code]
117
+ rec[5] = "#{team1_org} (#{country_code})"
118
+ end
119
+
120
+ team2_org = rec[8]
121
+ if team2_org != 'N.N.' ## note - skip place holder; keep as-is
122
+ country_code = teams_by_name[team2_org][:code]
123
+ rec[8] = "#{team2_org} (#{country_code})"
124
+ end
125
+ end
126
+ end
127
+
128
+
48
129
  ## note: sort matches by date before saving/writing!!!!
49
130
  ## note: for now assume date in string in 1999-11-30 format (allows sort by "simple" a-z)
50
131
  ## note: assume date is third column!!! (stage/round/date/...)
@@ -38,7 +38,7 @@ class Metal
38
38
 
39
39
  def self.schedule_url( slug ) "#{BASE_URL}/alle_spiele/#{slug}/"; end
40
40
  def self.report_url( slug ) "#{BASE_URL}/spielbericht/#{slug}/"; end
41
-
41
+ def self.team_url( slug ) "#{BASE_URL}/teams/#{slug}/"; end
42
42
 
43
43
  ##
44
44
  ## note:
@@ -49,6 +49,19 @@ class Metal
49
49
  ## 301 Moved Permanently location=https://www.weltfussball.de/
50
50
 
51
51
 
52
+ def self.download_team( slug, cache: true )
53
+ url = team_url( slug )
54
+
55
+ ## check check first
56
+ if cache && Webcache.cached?( url )
57
+ puts " reuse local (cached) copy >#{Webcache.url_to_id( url )}<"
58
+ else
59
+ download_page( url )
60
+ end
61
+ end
62
+
63
+
64
+
52
65
  # url = "https://www.weltfussball.de/alle_spiele/eng-league-one-#{season}/"
53
66
  # url = "https://www.weltfussball.de/alle_spiele/eng-league-two-#{season}/"
54
67
  # https://www.weltfussball.de/alle_spiele/eng-national-league-2019-2020/
@@ -62,7 +75,6 @@ class Metal
62
75
  # https://www.weltfussball.de/alle_spiele/tur-sueperlig-2019-2020/
63
76
  # https://www.weltfussball.de/alle_spiele/tur-1-lig-2019-2020/
64
77
 
65
-
66
78
  def self.download_schedule( slug )
67
79
  url = schedule_url( slug )
68
80
  download_page( url )
@@ -24,7 +24,10 @@ class LeagueItem # nested inside LeagueConfig
24
24
  ### todo/fix:
25
25
  ## use from cache if not older than 1 (or 5/10?) hour(s) or such
26
26
  ## why? why not?
27
- Worldfootball::Metal.download_schedule( @slug )
27
+ schedule_url = Worldfootball::Metal.schedule_url( @slug )
28
+ if Webcache.expired_in_1d?( schedule_url )
29
+ Worldfootball::Metal.download_schedule( @slug )
30
+ end
28
31
  page = Worldfootball::Page::Schedule.from_cache( @slug )
29
32
 
30
33
  ## pp page.seasons
@@ -120,7 +123,8 @@ end # class LeagueConfig
120
123
 
121
124
 
122
125
  LEAGUES = LeagueConfig.new
123
- ['leagues_america',
126
+ ['leagues_africa',
127
+ 'leagues_america',
124
128
  'leagues_asia',
125
129
  'leagues_europe',
126
130
  'leagues_middle_east',
@@ -24,6 +24,12 @@ def self.norm_team( team )
24
24
  ## others too? - move to mods instead of generic rule - why? why not?
25
25
  team = team.sub( /[ ]+\(A\)/, ' II' )
26
26
 
27
+ ################
28
+ ## quick hack - norm(alize) all N.N. to N.N.
29
+ ## e.g.
30
+ team = 'N.N.' if ['Sieger HF 1',
31
+ 'Sieger HF 2'].include?( team )
32
+
27
33
  team
28
34
  end
29
35
 
@@ -8,7 +8,9 @@ module Worldfootball
8
8
  end
9
9
 
10
10
  def initialize( html )
11
- @html = html
11
+ ## todo/fix - fix upstream in wget!!!! why? why not?
12
+ ## normalize unicode (to nfc - ruby's default norm form)
13
+ @html = html.unicode_normalize
12
14
  end
13
15
 
14
16
  def doc
@@ -83,9 +85,13 @@ GENERATED_RE = %r{
83
85
  end
84
86
  end
85
87
 
88
+
86
89
  ######################
87
90
  ## helper methods
88
91
 
92
+ def debug?() Worldfootball.debug?; end
93
+
94
+
89
95
  def squish( str )
90
96
  str = str.strip
91
97
  str = str.gsub( "\u{00A0}", ' ' ) # Unicode Character 'NO-BREAK SPACE' (U+00A0)
@@ -51,7 +51,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
51
51
  ## <td colspan="2"></td>
52
52
  ## </tr>
53
53
  ##
54
- # <tr class="e2-parent" data-liga_id="530" data-gs_match_id="10259222"
54
+ # <tr class="e2-parent" data-liga_id="530" data-gs_match_id="10259222"
55
55
  # style="display:none;">
56
56
  ## <td colspan="2"></td>
57
57
  ## <td colspan="3">
@@ -68,7 +68,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
68
68
 
69
69
  i += 1
70
70
 
71
- ## puts "[debug] row #{i} >#{tr.text.strip}<"
71
+ ## puts "[debug] row #{i} >#{tr.text.strip}<"
72
72
 
73
73
  ### note - assume for now match lines use tds
74
74
  ## and round lines use ths (NOT tds)!!
@@ -76,7 +76,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
76
76
 
77
77
  ths = tr.css( 'th' )
78
78
  tds = tr.css( 'td' )
79
-
79
+
80
80
  if tr.text.strip =~ /Spieltag/ ||
81
81
  tr.text.strip =~ /[1-9]\.[ ]Runde|
82
82
  Qual\.[ ][1-9]\.[ ]Runde| # see EL or CL Quali
@@ -93,28 +93,30 @@ class Schedule < Page ## note: use nested class for now - why? why not?
93
93
  Spiele| # see Serie A 1960-61 Relegation
94
94
  3\.[ ]Platz| # see bra-serie-a-2000-yellow-module-playoffs
95
95
  Spiel[ ]um[ ]Platz[ ]3| # see campeonato-2009-cuadrangulares-deportivo-cuenca-cs-emelec
96
- Relegation| # see egy-premiership-2013-2014-abstiegsplayoff
97
- Copa[ ]Libertadores| # see ecu-campeonato-2012-segunda-etapa-playoffs
96
+ Relegation| # see egy-premiership-2013-2014-abstiegsplayoff
97
+ Copa[ ]Libertadores| # see ecu-campeonato-2012-segunda-etapa-playoffs
98
98
  Copa[ ]Sudamericana| # see campeonato-2012-liguilla-final-playoffs-cs-emelec-ldu-quito
99
- Repechaje| # see nca-liga-primera-2023-2024-clausura-playoffs
100
- Final[ ]de[ ]Grupos| # see hon-liga-nacional-2020-2021-clausura-playoffs
99
+ Repechaje| # see nca-liga-primera-2023-2024-clausura-playoffs
100
+ Final[ ]de[ ]Grupos| # see hon-liga-nacional-2020-2021-clausura-playoffs
101
101
  Gran[ ]Final| # see liga-nacional-2020-2021-apertura-playoffs-finale-olimpia-motagua
102
102
  Finalrunde| # see hon-liga-nacional-2019-2020-apertura-pentagonal
103
- Zona[ ]A| # see gua-liga-nacional-2020-2021-clausura
104
- Zona[ ]B| # see liga-nacional-2020-2021-clausura-zona-a-comunicaciones-deportivo-malacateco
103
+ Zona[ ]A| # see gua-liga-nacional-2020-2021-clausura
104
+ Zona[ ]B| # see liga-nacional-2020-2021-clausura-zona-a-comunicaciones-deportivo-malacateco
105
105
  Interzone| # see liga-nacional-2020-2021-clausura-zona-b-achuapa-sanarate
106
106
  Final[ ]Segunda[ ]Ronda| # see crc-primera-division-2018-2019-apertura-playoffs
107
107
  Quadrangular # see crc-primera-division-2016-2017-verano-playoffs
108
108
  /x
109
-
110
- puts
111
- print '[%03d] ' % i
112
- ## print squish( tr.text )
113
- print "round >#{tr.text.strip}<"
114
- print "\n"
109
+
110
+ if debug?
111
+ puts
112
+ print '[%03d] ' % i
113
+ ## print squish( tr.text )
114
+ print "round >#{tr.text.strip}<"
115
+ print "\n"
116
+ end
115
117
 
116
118
  last_round = tr.text.strip
117
- elsif ths.count > 0 &&
119
+ elsif ths.count > 0 &&
118
120
  tds.count == 0
119
121
  ## check for round NOT yet configured!!!
120
122
  puts "!! WARN: found unregistered round line >#{tr.text.strip}<"
@@ -128,11 +130,12 @@ class Schedule < Page ## note: use nested class for now - why? why not?
128
130
 
129
131
  date_str = last_date_str if date_str.empty?
130
132
 
131
- ## note: for debugging - print as we go along (parsing)
132
- print '[%03d] ' % i
133
- print "%-10s | " % date_str
134
- print "%-5s | " % time_str
135
-
133
+ if debug?
134
+ ## note: for debugging - print as we go along (parsing)
135
+ print '[%03d] ' % i
136
+ print "%-10s | " % date_str
137
+ print "%-5s | " % time_str
138
+ end
136
139
 
137
140
  # was: team1_str = squish( tds[2].text )
138
141
 
@@ -148,8 +151,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
148
151
  puts "!! WARN: no team1_ref for >#{team1_str}< found"
149
152
  end
150
153
 
151
- ## note: for debugging - print as we go along (parsing)
152
- print "%-22s | " % team1_str
154
+ if debug?
155
+ ## note: for debugging - print as we go along (parsing)
156
+ print "%-22s | " % team1_str
157
+ end
153
158
 
154
159
  ## <td> - </td>
155
160
  ## e.g. -
@@ -168,9 +173,10 @@ class Schedule < Page ## note: use nested class for now - why? why not?
168
173
  puts "!! WARN: no team2_ref for >#{team2_str}< found"
169
174
  end
170
175
 
171
- ## note: for debugging - print as we go along (parsing)
172
- print "%-22s | " % team2_str
173
-
176
+ if debug?
177
+ ## note: for debugging - print as we go along (parsing)
178
+ print "%-22s | " % team2_str
179
+ end
174
180
 
175
181
 
176
182
  ### was: score_str = squish( tds[5].text )
@@ -197,11 +203,11 @@ class Schedule < Page ## note: use nested class for now - why? why not?
197
203
  score_str = '-:-' # note: -:- gets replaced to ---
198
204
  end
199
205
 
200
-
201
- print "%-10s | " % score_str
202
- print (score_ref ? score_ref : 'n/a')
203
- print "\n"
204
-
206
+ if debug?
207
+ print "%-10s | " % score_str
208
+ print (score_ref ? score_ref : 'n/a')
209
+ print "\n"
210
+ end
205
211
 
206
212
  ## change 2:1 (1:1) to 2-1 (1-1)
207
213
  score_str = score_str.gsub( ':', '-' )
@@ -214,7 +220,7 @@ class Schedule < Page ## note: use nested class for now - why? why not?
214
220
 
215
221
  date = if date_str == '00.00.0000'
216
222
  nil
217
- else
223
+ else
218
224
  Date.strptime( date_str, '%d.%m.%Y' )
219
225
  end
220
226
 
@@ -0,0 +1,115 @@
1
+
2
+ module Worldfootball
3
+ class Page
4
+
5
+ class Team < Page ## note: use nested class for now - why? why not?
6
+
7
+ def self.from_cache( slug )
8
+ url = Metal.team_url( slug )
9
+ html = Webcache.read( url )
10
+ new( html )
11
+ end
12
+
13
+ ## rename to properties or attributes or ??
14
+ ## or infobox or data or ?? why? why not?
15
+ def props
16
+ sidebar = doc.css( 'div.sidebar' ).first
17
+ assert( sidebar, 'no div.sidebar found in team page!!' )
18
+
19
+ h2 = sidebar.css( 'h2' ).first
20
+ assert( h2, 'no h2 inside div.sidebar found in team page!!')
21
+ name = h2.text
22
+
23
+ table = sidebar.css( 'table' ).first
24
+ assert( table, 'no table inside div.sidebar found in team page!!')
25
+
26
+ long_name = nil
27
+ country = nil
28
+ ground = nil
29
+ founded = nil
30
+
31
+ trs = table.css( 'tr' )
32
+ trs.each_with_index do |tr,i|
33
+ text = squish(tr.text)
34
+ ## puts "[#{i+1}] >#{text}<"
35
+
36
+ ## assume long name first column if NOT starting w/ Land:
37
+ if i==0 && !text.start_with?( 'Land' )
38
+ long_name = text
39
+ end
40
+
41
+ if text.start_with?( 'Land:' )
42
+ country = text.sub( 'Land:', '' ).strip
43
+ end
44
+
45
+ if text.start_with?( 'gegründet:' )
46
+ founded = text.sub( 'gegründet:', '' ).strip
47
+ end
48
+
49
+ if text.start_with?( 'Stadion:' )
50
+ ground = text.sub( 'Stadion:', '' ).strip
51
+ end
52
+ end
53
+
54
+ data = { name: name }
55
+ data[ :long_name] = long_name if long_name
56
+ data[ :country ] = country if country
57
+ data[ :founded ] = founded if founded && founded != '0000'
58
+ data[ :ground] = ground if ground && ground != '----------'
59
+
60
+ data
61
+ end
62
+
63
+ =begin
64
+ <div class="sidebar">
65
+
66
+ <div class="box emblemwrapper">
67
+ <div class="head">
68
+ <h2>Al Ahly SC</h2>
69
+ </div>
70
+ <div class="data " align="center">
71
+ <div class="emblem"><a href="/teams/al-ahly-sc/"><img src="https://s.hs-data.com/bilder/wappen/mittel/1480.gif?fallback=png" border="0" width="100" hspace="5" vspace="5" alt="Al Ahly SC" title="Al Ahly SC" /></a></div>
72
+ <div class="emblem_background"><a href="/teams/al-ahly-sc/"><img src="https://s.hs-data.com/bilder/wappen/mittel/1480.gif?fallback=png" border="0" width="100" hspace="5" vspace="5" alt="Al Ahly SC" title="Al Ahly SC" /></a></div>
73
+ </div>
74
+ <div class="data">
75
+ <table class="standard_tabelle yellow" cellpadding="3" cellspacing="0">
76
+ <tr>
77
+ <td colspan="2" align="center">Al Ahly Sporting Club</td>
78
+ </tr>
79
+ <tr>
80
+ <td colspan="2">&nbsp;</td>
81
+ </tr>
82
+ <tr>
83
+ <td align="right"><b>Land:</b></td>
84
+ <td>
85
+ <img src="https://s.hs-data.com/bilder/flaggen_neu/68.gif" width="18" height="12" hspace="5" title="Ägypten" align="absmiddle" />
86
+ Ägypten </td>
87
+ </tr>
88
+ <tr>
89
+ <td align="right"><b>gegründet:</b></td>
90
+ <td>24.04.1907</td>
91
+ </tr>
92
+ <tr>
93
+ <td align="right"><b>Stadion:</b></td>
94
+ <td><a href="/spielorte/international-stadium-cairo/" title="International Stadium">International Stadium</a></td>
95
+ </tr>
96
+ <tr>
97
+ <td align="right"><b>Homepage:</b></td>
98
+ <td><a href="http://alahlyegypt.com/" target="_blank">alahlyegypt.com/</a></td>
99
+ </tr>
100
+ <tr>
101
+ <td colspan="2" align="right"><b><a href="/teams/al-ahly-sc/1/" title="Weitere Infos zu Al Ahly SC">zum Profil &raquo;</a></b></td>
102
+ </tr>
103
+ </table>
104
+ </div>
105
+ </div>
106
+ =end
107
+
108
+
109
+ ######
110
+ ## helpers
111
+
112
+ end # class Team
113
+
114
+ end # class Page
115
+ end # module Worldfootball
@@ -2,15 +2,15 @@ module Worldfootball
2
2
 
3
3
 
4
4
  MAX_HEADERS = [
5
- 'Stage',
6
- 'Round',
7
- 'Date',
8
- 'Time',
9
- 'Timezone',
10
- 'Team 1',
11
- 'FT',
12
- 'HT',
13
- 'Team 2',
5
+ 'Stage', # 0
6
+ 'Round', # 1
7
+ 'Date', # 2
8
+ 'Time', # 3
9
+ 'Timezone', #4
10
+ 'Team 1', #5
11
+ 'FT', #6
12
+ 'HT', #7
13
+ 'Team 2', #8
14
14
  'ET',
15
15
  'P',
16
16
  'Comments', ## e.g. awarded, cancelled/canceled, etc.
@@ -2,7 +2,7 @@
2
2
  module Worldfootball
3
3
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
4
4
  MINOR = 2
5
- PATCH = 4
5
+ PATCH = 6
6
6
  VERSION = [MAJOR,MINOR,PATCH].join('.')
7
7
 
8
8
  def self.version
data/lib/worldfootball.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  ## 3rd party (our own)
2
2
  require 'football/timezones' ## note - pulls in season/formats, cocos & tzinfo
3
+ require 'fifa'
4
+
3
5
  require 'webget' ## incl. webget, webcache, webclient, etc.
4
6
  require 'nokogiri'
5
7
 
@@ -16,6 +18,7 @@ require_relative 'worldfootball/download'
16
18
  require_relative 'worldfootball/page'
17
19
  require_relative 'worldfootball/page_schedule'
18
20
  require_relative 'worldfootball/page_report'
21
+ require_relative 'worldfootball/page_team'
19
22
  require_relative 'worldfootball/cache'
20
23
 
21
24
 
@@ -30,6 +33,13 @@ require_relative 'worldfootball/convert_reports'
30
33
 
31
34
  module Worldfootball
32
35
 
36
+ #########
37
+ ## add a global debug flag
38
+ def self.debug=(value) @debug = value; end
39
+ def self.debug?() @debug ||= false; end ## note: default is FALSE
40
+
41
+
42
+
33
43
  class Configuration
34
44
  #########
35
45
  ## nested configuration classes - use - why? why not?
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: worldfootball
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-15 00:00:00.000000000 Z
11
+ date: 2024-10-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: football-timezones
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: fifa
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: webget
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -102,6 +116,7 @@ files:
102
116
  - README.md
103
117
  - Rakefile
104
118
  - bin/wfb
119
+ - config/leagues_africa.csv
105
120
  - config/leagues_america.csv
106
121
  - config/leagues_asia.csv
107
122
  - config/leagues_europe.csv
@@ -121,6 +136,7 @@ files:
121
136
  - lib/worldfootball/page.rb
122
137
  - lib/worldfootball/page_report.rb
123
138
  - lib/worldfootball/page_schedule.rb
139
+ - lib/worldfootball/page_team.rb
124
140
  - lib/worldfootball/stages.rb
125
141
  - lib/worldfootball/vacuum.rb
126
142
  - lib/worldfootball/version.rb