openmatchkit 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openmatchkit/__init__.py +41 -0
- openmatchkit/cli.py +182 -0
- openmatchkit/client.py +405 -0
- openmatchkit/exceptions.py +14 -0
- openmatchkit/export.py +94 -0
- openmatchkit/http.py +181 -0
- openmatchkit/models.py +198 -0
- openmatchkit/prediction/__init__.py +7 -0
- openmatchkit/prediction/elo.py +43 -0
- openmatchkit/prediction/poisson.py +99 -0
- openmatchkit/sources/__init__.py +11 -0
- openmatchkit/sources/base.py +29 -0
- openmatchkit/sources/football_data_uk.py +121 -0
- openmatchkit/sources/json_file.py +189 -0
- openmatchkit/sources/openfootball.py +235 -0
- openmatchkit/sources/public_html.py +41 -0
- openmatchkit-0.2.1.dist-info/METADATA +192 -0
- openmatchkit-0.2.1.dist-info/RECORD +20 -0
- openmatchkit-0.2.1.dist-info/WHEEL +4 -0
- openmatchkit-0.2.1.dist-info/entry_points.txt +2 -0
openmatchkit/export.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import csv
|
|
4
|
+
import json
|
|
5
|
+
from io import StringIO
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel
|
|
10
|
+
|
|
11
|
+
from openmatchkit.models import Match
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def matches_to_rows(matches: list[Match]) -> list[dict[str, object]]:
|
|
15
|
+
rows: list[dict[str, object]] = []
|
|
16
|
+
|
|
17
|
+
for match in matches:
|
|
18
|
+
rows.append(
|
|
19
|
+
{
|
|
20
|
+
"match_id": match.match_id,
|
|
21
|
+
"competition": match.competition,
|
|
22
|
+
"season": match.season,
|
|
23
|
+
"round": match.round,
|
|
24
|
+
"group": match.group,
|
|
25
|
+
"kickoff": match.kickoff.isoformat() if match.kickoff else None,
|
|
26
|
+
"home": match.home.name,
|
|
27
|
+
"away": match.away.name,
|
|
28
|
+
"home_score": match.score.home,
|
|
29
|
+
"away_score": match.score.away,
|
|
30
|
+
"status": match.status.value,
|
|
31
|
+
"venue": match.venue,
|
|
32
|
+
"source": match.source,
|
|
33
|
+
"source_url": match.source_url,
|
|
34
|
+
"fetched_at": match.fetched_at.isoformat() if match.fetched_at else None,
|
|
35
|
+
}
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return rows
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def to_json_string(matches: list[Match]) -> str:
|
|
42
|
+
payload = [match.model_dump(mode="json") for match in matches]
|
|
43
|
+
return json.dumps(payload, indent=2)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def to_model_json_string(value: BaseModel | list[BaseModel] | dict[str, Any] | list[Any]) -> str:
|
|
47
|
+
if isinstance(value, BaseModel):
|
|
48
|
+
payload: Any = value.model_dump(mode="json")
|
|
49
|
+
elif isinstance(value, list):
|
|
50
|
+
payload = [
|
|
51
|
+
item.model_dump(mode="json") if isinstance(item, BaseModel) else item for item in value
|
|
52
|
+
]
|
|
53
|
+
else:
|
|
54
|
+
payload = value
|
|
55
|
+
|
|
56
|
+
return json.dumps(payload, indent=2)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def to_csv_string(matches: list[Match]) -> str:
|
|
60
|
+
rows = matches_to_rows(matches)
|
|
61
|
+
fieldnames = [
|
|
62
|
+
"match_id",
|
|
63
|
+
"competition",
|
|
64
|
+
"season",
|
|
65
|
+
"round",
|
|
66
|
+
"group",
|
|
67
|
+
"kickoff",
|
|
68
|
+
"home",
|
|
69
|
+
"away",
|
|
70
|
+
"home_score",
|
|
71
|
+
"away_score",
|
|
72
|
+
"status",
|
|
73
|
+
"venue",
|
|
74
|
+
"source",
|
|
75
|
+
"source_url",
|
|
76
|
+
"fetched_at",
|
|
77
|
+
]
|
|
78
|
+
output = StringIO()
|
|
79
|
+
writer = csv.DictWriter(output, fieldnames=fieldnames)
|
|
80
|
+
writer.writeheader()
|
|
81
|
+
writer.writerows(rows)
|
|
82
|
+
return output.getvalue()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def to_json(matches: list[Match], path: str | Path) -> Path:
|
|
86
|
+
target = Path(path)
|
|
87
|
+
target.write_text(to_json_string(matches), encoding="utf-8")
|
|
88
|
+
return target
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def to_csv(matches: list[Match], path: str | Path) -> Path:
|
|
92
|
+
target = Path(path)
|
|
93
|
+
target.write_text(to_csv_string(matches), encoding="utf-8", newline="")
|
|
94
|
+
return target
|
openmatchkit/http.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import time
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from datetime import datetime, timedelta, timezone
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
from urllib.robotparser import RobotFileParser
|
|
9
|
+
|
|
10
|
+
import httpx
|
|
11
|
+
|
|
12
|
+
from openmatchkit.exceptions import SourceFetchError, SourceNotAllowedError
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class _RobotsEntry:
|
|
17
|
+
parser: RobotFileParser
|
|
18
|
+
fetched_at: datetime
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class _CacheEntry:
|
|
23
|
+
body: str
|
|
24
|
+
fetched_at: datetime
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class SafeHttpClient:
|
|
29
|
+
"""HTTP client with conservative scraping defaults.
|
|
30
|
+
|
|
31
|
+
It respects robots.txt, keeps an in-memory cache, applies per-origin delays,
|
|
32
|
+
and never attempts to bypass login, CAPTCHA, paywalls, or other protections.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
user_agent: str = "openmatchkit/0.2.1 (+https://github.com/patilprashan246/openmatchkit)"
|
|
36
|
+
min_delay_seconds: float = 2.0
|
|
37
|
+
timeout_seconds: float = 20.0
|
|
38
|
+
respect_robots: bool = True
|
|
39
|
+
cache_ttl_seconds: float = 300.0
|
|
40
|
+
robots_ttl_seconds: float = 24 * 60 * 60
|
|
41
|
+
transport: httpx.BaseTransport | None = field(default=None, repr=False)
|
|
42
|
+
|
|
43
|
+
_client: httpx.Client = field(init=False, repr=False)
|
|
44
|
+
_last_request_at: dict[str, float] = field(default_factory=dict, init=False, repr=False)
|
|
45
|
+
_robots_cache: dict[str, _RobotsEntry] = field(default_factory=dict, init=False, repr=False)
|
|
46
|
+
_response_cache: dict[str, _CacheEntry] = field(default_factory=dict, init=False, repr=False)
|
|
47
|
+
|
|
48
|
+
def __post_init__(self) -> None:
|
|
49
|
+
self._client = httpx.Client(
|
|
50
|
+
timeout=self.timeout_seconds,
|
|
51
|
+
headers={"User-Agent": self.user_agent},
|
|
52
|
+
follow_redirects=True,
|
|
53
|
+
transport=self.transport,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
def close(self) -> None:
|
|
57
|
+
self._client.close()
|
|
58
|
+
|
|
59
|
+
def __enter__(self) -> SafeHttpClient:
|
|
60
|
+
return self
|
|
61
|
+
|
|
62
|
+
def __exit__(self, *_args: object) -> None:
|
|
63
|
+
self.close()
|
|
64
|
+
|
|
65
|
+
def _origin(self, url: str) -> str:
|
|
66
|
+
parsed = urlparse(url)
|
|
67
|
+
return f"{parsed.scheme}://{parsed.netloc}"
|
|
68
|
+
|
|
69
|
+
def _robots_url(self, url: str) -> str:
|
|
70
|
+
return f"{self._origin(url)}/robots.txt"
|
|
71
|
+
|
|
72
|
+
def _allow_all_robots(self, robots_url: str) -> RobotFileParser:
|
|
73
|
+
parser = RobotFileParser(robots_url)
|
|
74
|
+
parser.parse([])
|
|
75
|
+
return parser
|
|
76
|
+
|
|
77
|
+
def _block_all_robots(self, robots_url: str) -> RobotFileParser:
|
|
78
|
+
parser = RobotFileParser(robots_url)
|
|
79
|
+
parser.parse(["User-agent: *", "Disallow: /"])
|
|
80
|
+
return parser
|
|
81
|
+
|
|
82
|
+
def _get_robots(self, url: str) -> RobotFileParser:
|
|
83
|
+
origin = self._origin(url)
|
|
84
|
+
now = datetime.now(timezone.utc)
|
|
85
|
+
cached = self._robots_cache.get(origin)
|
|
86
|
+
|
|
87
|
+
if cached and now - cached.fetched_at < timedelta(seconds=self.robots_ttl_seconds):
|
|
88
|
+
return cached.parser
|
|
89
|
+
|
|
90
|
+
robots_url = self._robots_url(url)
|
|
91
|
+
|
|
92
|
+
try:
|
|
93
|
+
response = self._client.get(robots_url)
|
|
94
|
+
except httpx.HTTPError:
|
|
95
|
+
parser = self._block_all_robots(robots_url)
|
|
96
|
+
else:
|
|
97
|
+
if response.status_code == 404:
|
|
98
|
+
parser = self._allow_all_robots(robots_url)
|
|
99
|
+
elif response.status_code in {401, 403} or response.status_code >= 500:
|
|
100
|
+
parser = self._block_all_robots(robots_url)
|
|
101
|
+
elif response.status_code >= 400:
|
|
102
|
+
parser = self._allow_all_robots(robots_url)
|
|
103
|
+
else:
|
|
104
|
+
parser = RobotFileParser(robots_url)
|
|
105
|
+
parser.parse(response.text.splitlines())
|
|
106
|
+
|
|
107
|
+
self._robots_cache[origin] = _RobotsEntry(parser=parser, fetched_at=now)
|
|
108
|
+
return parser
|
|
109
|
+
|
|
110
|
+
def _ensure_allowed(self, url: str) -> None:
|
|
111
|
+
if not self.respect_robots:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
robots = self._get_robots(url)
|
|
115
|
+
if not robots.can_fetch(self.user_agent, url):
|
|
116
|
+
raise SourceNotAllowedError(f"Blocked by robots.txt: {url}")
|
|
117
|
+
|
|
118
|
+
def _rate_limit(self, url: str) -> None:
|
|
119
|
+
origin = self._origin(url)
|
|
120
|
+
now = time.monotonic()
|
|
121
|
+
last = self._last_request_at.get(origin)
|
|
122
|
+
|
|
123
|
+
if last is not None:
|
|
124
|
+
elapsed = now - last
|
|
125
|
+
wait_time = self.min_delay_seconds - elapsed
|
|
126
|
+
if wait_time > 0:
|
|
127
|
+
time.sleep(wait_time)
|
|
128
|
+
|
|
129
|
+
self._last_request_at[origin] = time.monotonic()
|
|
130
|
+
|
|
131
|
+
def _cache_key(self, url: str) -> str:
|
|
132
|
+
return f"GET {url}"
|
|
133
|
+
|
|
134
|
+
def _cache_get(self, url: str) -> str | None:
|
|
135
|
+
if self.cache_ttl_seconds <= 0:
|
|
136
|
+
return None
|
|
137
|
+
|
|
138
|
+
entry = self._response_cache.get(self._cache_key(url))
|
|
139
|
+
if entry is None:
|
|
140
|
+
return None
|
|
141
|
+
|
|
142
|
+
now = datetime.now(timezone.utc)
|
|
143
|
+
if now - entry.fetched_at >= timedelta(seconds=self.cache_ttl_seconds):
|
|
144
|
+
self._response_cache.pop(self._cache_key(url), None)
|
|
145
|
+
return None
|
|
146
|
+
|
|
147
|
+
return entry.body
|
|
148
|
+
|
|
149
|
+
def _cache_set(self, url: str, body: str) -> None:
|
|
150
|
+
if self.cache_ttl_seconds <= 0:
|
|
151
|
+
return
|
|
152
|
+
|
|
153
|
+
self._response_cache[self._cache_key(url)] = _CacheEntry(
|
|
154
|
+
body=body,
|
|
155
|
+
fetched_at=datetime.now(timezone.utc),
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
def get_text(self, url: str) -> str:
|
|
159
|
+
cached = self._cache_get(url)
|
|
160
|
+
if cached is not None:
|
|
161
|
+
return cached
|
|
162
|
+
|
|
163
|
+
self._ensure_allowed(url)
|
|
164
|
+
self._rate_limit(url)
|
|
165
|
+
|
|
166
|
+
try:
|
|
167
|
+
response = self._client.get(url)
|
|
168
|
+
response.raise_for_status()
|
|
169
|
+
except httpx.HTTPError as exc:
|
|
170
|
+
raise SourceFetchError(f"Failed to fetch {url}: {exc}") from exc
|
|
171
|
+
|
|
172
|
+
self._cache_set(url, response.text)
|
|
173
|
+
return response.text
|
|
174
|
+
|
|
175
|
+
def get_json(self, url: str) -> object:
|
|
176
|
+
text = self.get_text(url)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
return json.loads(text)
|
|
180
|
+
except json.JSONDecodeError as exc:
|
|
181
|
+
raise SourceFetchError(f"Failed to parse JSON from {url}: {exc}") from exc
|
openmatchkit/models.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class MatchStatus(str, Enum):
|
|
11
|
+
SCHEDULED = "scheduled"
|
|
12
|
+
LIVE = "live"
|
|
13
|
+
HALF_TIME = "half_time"
|
|
14
|
+
FULL_TIME = "full_time"
|
|
15
|
+
POSTPONED = "postponed"
|
|
16
|
+
CANCELLED = "cancelled"
|
|
17
|
+
UNKNOWN = "unknown"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class Team(BaseModel):
|
|
21
|
+
model_config = ConfigDict(str_strip_whitespace=True)
|
|
22
|
+
|
|
23
|
+
name: str = Field(min_length=1)
|
|
24
|
+
code: str | None = None
|
|
25
|
+
country: str | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TeamInfo(BaseModel):
|
|
29
|
+
name: str
|
|
30
|
+
code: str | None = None
|
|
31
|
+
country: str | None = None
|
|
32
|
+
appearances: int = 0
|
|
33
|
+
sources: list[str] = Field(default_factory=list)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Player(BaseModel):
|
|
37
|
+
model_config = ConfigDict(str_strip_whitespace=True)
|
|
38
|
+
|
|
39
|
+
name: str = Field(min_length=1)
|
|
40
|
+
player_id: str | None = None
|
|
41
|
+
team: str | None = None
|
|
42
|
+
position: str | None = None
|
|
43
|
+
shirt_number: int | None = None
|
|
44
|
+
country: str | None = None
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class Score(BaseModel):
|
|
48
|
+
home: int | None = None
|
|
49
|
+
away: int | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Match(BaseModel):
|
|
53
|
+
match_id: str
|
|
54
|
+
competition: str
|
|
55
|
+
season: str | None = None
|
|
56
|
+
round: str | None = None
|
|
57
|
+
group: str | None = None
|
|
58
|
+
kickoff: datetime | None = None
|
|
59
|
+
home: Team
|
|
60
|
+
away: Team
|
|
61
|
+
score: Score = Field(default_factory=Score)
|
|
62
|
+
status: MatchStatus = MatchStatus.UNKNOWN
|
|
63
|
+
venue: str | None = None
|
|
64
|
+
source: str
|
|
65
|
+
source_url: str | None = None
|
|
66
|
+
fetched_at: datetime | None = None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class StandingRow(BaseModel):
|
|
70
|
+
team: str
|
|
71
|
+
group: str | None = None
|
|
72
|
+
played: int = 0
|
|
73
|
+
won: int = 0
|
|
74
|
+
drawn: int = 0
|
|
75
|
+
lost: int = 0
|
|
76
|
+
goals_for: int = 0
|
|
77
|
+
goals_against: int = 0
|
|
78
|
+
goal_difference: int = 0
|
|
79
|
+
points: int = 0
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class MatchClock(BaseModel):
|
|
83
|
+
minute: int | None = None
|
|
84
|
+
added_time: int | None = None
|
|
85
|
+
period: str | None = None
|
|
86
|
+
display: str | None = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class MatchEvent(BaseModel):
|
|
90
|
+
event_type: str
|
|
91
|
+
team: str | None = None
|
|
92
|
+
player: str | None = None
|
|
93
|
+
assist: str | None = None
|
|
94
|
+
minute: int | None = None
|
|
95
|
+
added_time: int | None = None
|
|
96
|
+
detail: str | None = None
|
|
97
|
+
home_score: int | None = None
|
|
98
|
+
away_score: int | None = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class TeamMatchStats(BaseModel):
|
|
102
|
+
team: str
|
|
103
|
+
possession: float | None = None
|
|
104
|
+
shots: int | None = None
|
|
105
|
+
shots_on_target: int | None = None
|
|
106
|
+
corners: int | None = None
|
|
107
|
+
fouls: int | None = None
|
|
108
|
+
offsides: int | None = None
|
|
109
|
+
yellow_cards: int | None = None
|
|
110
|
+
red_cards: int | None = None
|
|
111
|
+
saves: int | None = None
|
|
112
|
+
passes: int | None = None
|
|
113
|
+
pass_accuracy: float | None = None
|
|
114
|
+
expected_goals: float | None = None
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class PlayerMatchStats(BaseModel):
|
|
118
|
+
player: Player
|
|
119
|
+
started: bool | None = None
|
|
120
|
+
minutes_played: int | None = None
|
|
121
|
+
goals: int = 0
|
|
122
|
+
assists: int = 0
|
|
123
|
+
yellow_cards: int = 0
|
|
124
|
+
red_cards: int = 0
|
|
125
|
+
shots: int | None = None
|
|
126
|
+
shots_on_target: int | None = None
|
|
127
|
+
saves: int | None = None
|
|
128
|
+
passes: int | None = None
|
|
129
|
+
tackles: int | None = None
|
|
130
|
+
rating: float | None = None
|
|
131
|
+
source: str | None = None
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
class TeamLineup(BaseModel):
|
|
135
|
+
team: Team
|
|
136
|
+
formation: str | None = None
|
|
137
|
+
coach: str | None = None
|
|
138
|
+
starters: list[Player] = Field(default_factory=list)
|
|
139
|
+
substitutes: list[Player] = Field(default_factory=list)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class Scoreboard(BaseModel):
|
|
143
|
+
match: Match
|
|
144
|
+
clock: MatchClock = Field(default_factory=MatchClock)
|
|
145
|
+
team_stats: list[TeamMatchStats] = Field(default_factory=list)
|
|
146
|
+
lineups: list[TeamLineup] = Field(default_factory=list)
|
|
147
|
+
events: list[MatchEvent] = Field(default_factory=list)
|
|
148
|
+
player_stats: list[PlayerMatchStats] = Field(default_factory=list)
|
|
149
|
+
source_notes: list[str] = Field(default_factory=list)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class PlayerTotals(BaseModel):
|
|
153
|
+
appearances: int = 0
|
|
154
|
+
starts: int = 0
|
|
155
|
+
minutes_played: int = 0
|
|
156
|
+
goals: int = 0
|
|
157
|
+
assists: int = 0
|
|
158
|
+
yellow_cards: int = 0
|
|
159
|
+
red_cards: int = 0
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class PlayerMatchAppearance(BaseModel):
|
|
163
|
+
match_id: str
|
|
164
|
+
competition: str
|
|
165
|
+
season: str | None = None
|
|
166
|
+
kickoff: datetime | None = None
|
|
167
|
+
team: str
|
|
168
|
+
opponent: str | None = None
|
|
169
|
+
home_away: Literal["home", "away", "neutral", "unknown"] = "unknown"
|
|
170
|
+
status: MatchStatus = MatchStatus.UNKNOWN
|
|
171
|
+
minutes_played: int | None = None
|
|
172
|
+
started: bool | None = None
|
|
173
|
+
goals: int = 0
|
|
174
|
+
assists: int = 0
|
|
175
|
+
yellow_cards: int = 0
|
|
176
|
+
red_cards: int = 0
|
|
177
|
+
source: str
|
|
178
|
+
source_url: str | None = None
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class PlayerHistory(BaseModel):
|
|
182
|
+
player: Player
|
|
183
|
+
totals: PlayerTotals = Field(default_factory=PlayerTotals)
|
|
184
|
+
appearances: list[PlayerMatchAppearance] = Field(default_factory=list)
|
|
185
|
+
source_notes: list[str] = Field(default_factory=list)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class Prediction(BaseModel):
|
|
189
|
+
home: str
|
|
190
|
+
away: str
|
|
191
|
+
home_win_probability: float
|
|
192
|
+
draw_probability: float
|
|
193
|
+
away_win_probability: float
|
|
194
|
+
expected_home_goals: float
|
|
195
|
+
expected_away_goals: float
|
|
196
|
+
model: str
|
|
197
|
+
training_matches: int = 0
|
|
198
|
+
note: str = "Educational baseline only. Not betting, financial, or professional advice."
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from openmatchkit.models import Match
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class EloRatings:
|
|
7
|
+
"""Small Elo baseline for team strength summaries."""
|
|
8
|
+
|
|
9
|
+
def __init__(self, default_rating: float = 1500.0, k_factor: float = 24.0) -> None:
|
|
10
|
+
self.default_rating = default_rating
|
|
11
|
+
self.k_factor = k_factor
|
|
12
|
+
self.ratings: dict[str, float] = {}
|
|
13
|
+
|
|
14
|
+
def rating(self, team: str) -> float:
|
|
15
|
+
return self.ratings.get(team, self.default_rating)
|
|
16
|
+
|
|
17
|
+
def _expected(self, team: str, opponent: str) -> float:
|
|
18
|
+
return 1 / (1 + 10 ** ((self.rating(opponent) - self.rating(team)) / 400))
|
|
19
|
+
|
|
20
|
+
def _actual_scores(self, match: Match) -> tuple[float, float] | None:
|
|
21
|
+
if match.score.home is None or match.score.away is None:
|
|
22
|
+
return None
|
|
23
|
+
|
|
24
|
+
if match.score.home > match.score.away:
|
|
25
|
+
return 1.0, 0.0
|
|
26
|
+
if match.score.home < match.score.away:
|
|
27
|
+
return 0.0, 1.0
|
|
28
|
+
return 0.5, 0.5
|
|
29
|
+
|
|
30
|
+
def fit(self, matches: list[Match]) -> None:
|
|
31
|
+
for match in matches:
|
|
32
|
+
actual = self._actual_scores(match)
|
|
33
|
+
if actual is None:
|
|
34
|
+
continue
|
|
35
|
+
|
|
36
|
+
home = match.home.name
|
|
37
|
+
away = match.away.name
|
|
38
|
+
home_actual, away_actual = actual
|
|
39
|
+
home_expected = self._expected(home, away)
|
|
40
|
+
away_expected = self._expected(away, home)
|
|
41
|
+
|
|
42
|
+
self.ratings[home] = self.rating(home) + self.k_factor * (home_actual - home_expected)
|
|
43
|
+
self.ratings[away] = self.rating(away) + self.k_factor * (away_actual - away_expected)
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from collections import defaultdict
|
|
5
|
+
|
|
6
|
+
from openmatchkit.models import Match, Prediction
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class SimplePoissonPredictor:
|
|
10
|
+
"""Educational Poisson baseline for win/draw/loss probabilities."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, max_goals: int = 8) -> None:
|
|
13
|
+
self.max_goals = max_goals
|
|
14
|
+
self.team_for_goals: dict[str, float] = {}
|
|
15
|
+
self.team_against_goals: dict[str, float] = {}
|
|
16
|
+
self.global_home_goals = 1.35
|
|
17
|
+
self.global_away_goals = 1.10
|
|
18
|
+
self.training_matches = 0
|
|
19
|
+
|
|
20
|
+
def fit(self, matches: list[Match]) -> None:
|
|
21
|
+
scored = [m for m in matches if m.score.home is not None and m.score.away is not None]
|
|
22
|
+
self.training_matches = len(scored)
|
|
23
|
+
|
|
24
|
+
if not scored:
|
|
25
|
+
return
|
|
26
|
+
|
|
27
|
+
home_goals = sum(m.score.home or 0 for m in scored)
|
|
28
|
+
away_goals = sum(m.score.away or 0 for m in scored)
|
|
29
|
+
|
|
30
|
+
self.global_home_goals = home_goals / len(scored)
|
|
31
|
+
self.global_away_goals = away_goals / len(scored)
|
|
32
|
+
|
|
33
|
+
goals_for: dict[str, list[int]] = defaultdict(list)
|
|
34
|
+
goals_against: dict[str, list[int]] = defaultdict(list)
|
|
35
|
+
|
|
36
|
+
for match in scored:
|
|
37
|
+
home = match.home.name
|
|
38
|
+
away = match.away.name
|
|
39
|
+
hg = match.score.home or 0
|
|
40
|
+
ag = match.score.away or 0
|
|
41
|
+
|
|
42
|
+
goals_for[home].append(hg)
|
|
43
|
+
goals_against[home].append(ag)
|
|
44
|
+
goals_for[away].append(ag)
|
|
45
|
+
goals_against[away].append(hg)
|
|
46
|
+
|
|
47
|
+
self.team_for_goals = {
|
|
48
|
+
team: sum(values) / len(values) for team, values in goals_for.items()
|
|
49
|
+
}
|
|
50
|
+
self.team_against_goals = {
|
|
51
|
+
team: sum(values) / len(values) for team, values in goals_against.items()
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
def _poisson(self, goals: int, expected: float) -> float:
|
|
55
|
+
return (expected**goals) * math.exp(-expected) / math.factorial(goals)
|
|
56
|
+
|
|
57
|
+
def _expected_goals(self, home: str, away: str) -> tuple[float, float]:
|
|
58
|
+
home_attack = self.team_for_goals.get(home, self.global_home_goals)
|
|
59
|
+
away_defense = self.team_against_goals.get(away, self.global_home_goals)
|
|
60
|
+
|
|
61
|
+
away_attack = self.team_for_goals.get(away, self.global_away_goals)
|
|
62
|
+
home_defense = self.team_against_goals.get(home, self.global_away_goals)
|
|
63
|
+
|
|
64
|
+
expected_home = max(0.2, (home_attack + away_defense) / 2)
|
|
65
|
+
expected_away = max(0.2, (away_attack + home_defense) / 2)
|
|
66
|
+
|
|
67
|
+
return expected_home, expected_away
|
|
68
|
+
|
|
69
|
+
def predict(self, home: str, away: str) -> Prediction:
|
|
70
|
+
expected_home, expected_away = self._expected_goals(home, away)
|
|
71
|
+
|
|
72
|
+
home_win = 0.0
|
|
73
|
+
draw = 0.0
|
|
74
|
+
away_win = 0.0
|
|
75
|
+
|
|
76
|
+
for hg in range(self.max_goals + 1):
|
|
77
|
+
for ag in range(self.max_goals + 1):
|
|
78
|
+
probability = self._poisson(hg, expected_home) * self._poisson(ag, expected_away)
|
|
79
|
+
|
|
80
|
+
if hg > ag:
|
|
81
|
+
home_win += probability
|
|
82
|
+
elif hg == ag:
|
|
83
|
+
draw += probability
|
|
84
|
+
else:
|
|
85
|
+
away_win += probability
|
|
86
|
+
|
|
87
|
+
total = home_win + draw + away_win
|
|
88
|
+
|
|
89
|
+
return Prediction(
|
|
90
|
+
home=home,
|
|
91
|
+
away=away,
|
|
92
|
+
home_win_probability=home_win / total,
|
|
93
|
+
draw_probability=draw / total,
|
|
94
|
+
away_win_probability=away_win / total,
|
|
95
|
+
expected_home_goals=expected_home,
|
|
96
|
+
expected_away_goals=expected_away,
|
|
97
|
+
model="simple_poisson",
|
|
98
|
+
training_matches=self.training_matches,
|
|
99
|
+
)
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from openmatchkit.sources.football_data_uk import FootballDataUkSource
|
|
2
|
+
from openmatchkit.sources.json_file import JsonFileSource
|
|
3
|
+
from openmatchkit.sources.openfootball import OpenFootballSource
|
|
4
|
+
from openmatchkit.sources.public_html import PublicHtmlSource
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"FootballDataUkSource",
|
|
8
|
+
"JsonFileSource",
|
|
9
|
+
"OpenFootballSource",
|
|
10
|
+
"PublicHtmlSource",
|
|
11
|
+
]
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Protocol
|
|
4
|
+
|
|
5
|
+
from openmatchkit.models import Match, PlayerHistory, Scoreboard
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MatchSource(Protocol):
|
|
9
|
+
name: str
|
|
10
|
+
|
|
11
|
+
def fixtures(self, competition: str, season: str | None = None) -> list[Match]: ...
|
|
12
|
+
|
|
13
|
+
def live_scores(self) -> list[Match]: ...
|
|
14
|
+
|
|
15
|
+
def scoreboard(
|
|
16
|
+
self,
|
|
17
|
+
match_id: str,
|
|
18
|
+
competition: str | None = None,
|
|
19
|
+
season: str | None = None,
|
|
20
|
+
) -> Scoreboard | None: ...
|
|
21
|
+
|
|
22
|
+
def live_scoreboards(self) -> list[Scoreboard]: ...
|
|
23
|
+
|
|
24
|
+
def player_history(
|
|
25
|
+
self,
|
|
26
|
+
player: str,
|
|
27
|
+
competition: str | None = None,
|
|
28
|
+
season: str | None = None,
|
|
29
|
+
) -> PlayerHistory: ...
|