datafc 1.5.0__tar.gz → 2.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datafc-2.1.0/PKG-INFO +951 -0
- datafc-2.1.0/README.md +925 -0
- datafc-2.1.0/datafc/__init__.py +79 -0
- datafc-2.1.0/datafc/exceptions.py +46 -0
- datafc-2.1.0/datafc/sofascore/__init__.py +82 -0
- datafc-2.1.0/datafc/sofascore/_parsers.py +560 -0
- datafc-2.1.0/datafc/sofascore/aio.py +2394 -0
- datafc-2.1.0/datafc/sofascore/fetch_average_positions_data.py +91 -0
- datafc-2.1.0/datafc/sofascore/fetch_coordinates_data.py +102 -0
- datafc-2.1.0/datafc/sofascore/fetch_goal_networks_data.py +148 -0
- datafc-2.1.0/datafc/sofascore/fetch_incidents_data.py +87 -0
- datafc-2.1.0/datafc/sofascore/fetch_league_player_stats_data.py +165 -0
- datafc-2.1.0/datafc/sofascore/fetch_lineups_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_match_data.py +84 -0
- datafc-2.1.0/datafc/sofascore/fetch_match_details_data.py +92 -0
- datafc-2.1.0/datafc/sofascore/fetch_match_h2h_data.py +82 -0
- datafc-2.1.0/datafc/sofascore/fetch_match_odds_data.py +73 -0
- datafc-2.1.0/datafc/sofascore/fetch_match_stats_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_momentum_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_past_matches_data.py +140 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_career_stats_data.py +124 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_data.py +132 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_match_log_data.py +126 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_national_team_data.py +108 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_stats_data.py +126 -0
- datafc-2.1.0/datafc/sofascore/fetch_player_transfers_data.py +122 -0
- datafc-2.1.0/datafc/sofascore/fetch_pregame_form_data.py +109 -0
- datafc-2.1.0/datafc/sofascore/fetch_referee_stats_data.py +104 -0
- datafc-2.1.0/datafc/sofascore/fetch_search_data.py +93 -0
- datafc-2.1.0/datafc/sofascore/fetch_season_rounds_data.py +99 -0
- datafc-2.1.0/datafc/sofascore/fetch_seasons_data.py +86 -0
- datafc-2.1.0/datafc/sofascore/fetch_shots_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_squad_data.py +124 -0
- datafc-2.1.0/datafc/sofascore/fetch_standings_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_substitutions_data.py +80 -0
- datafc-2.1.0/datafc/sofascore/fetch_team_data.py +132 -0
- datafc-2.1.0/datafc/sofascore/fetch_team_match_history_data.py +108 -0
- datafc-2.1.0/datafc/sofascore/fetch_team_stats_data.py +123 -0
- datafc-2.1.0/datafc/sofascore/fetch_team_transfers_data.py +128 -0
- datafc-2.1.0/datafc/sofascore/fetch_upcoming_matches_data.py +105 -0
- datafc-2.1.0/datafc/utils/__init__.py +35 -0
- datafc-2.1.0/datafc/utils/_async_client.py +164 -0
- datafc-2.1.0/datafc/utils/_cache.py +142 -0
- datafc-2.1.0/datafc/utils/_client.py +136 -0
- datafc-2.1.0/datafc/utils/_config.py +93 -0
- datafc-2.1.0/datafc/utils/_helpers.py +26 -0
- datafc-2.1.0/datafc/utils/_save_files.py +142 -0
- datafc-2.1.0/datafc/utils/_tournament_info.py +66 -0
- datafc-2.1.0/datafc/utils/_validate.py +98 -0
- datafc-2.1.0/datafc.egg-info/PKG-INFO +951 -0
- datafc-2.1.0/datafc.egg-info/SOURCES.txt +55 -0
- datafc-2.1.0/datafc.egg-info/requires.txt +13 -0
- datafc-2.1.0/pyproject.toml +55 -0
- datafc-1.5.0/PKG-INFO +0 -1086
- datafc-1.5.0/README.md +0 -1070
- datafc-1.5.0/datafc/__init__.py +0 -3
- datafc-1.5.0/datafc/sofascore/__init__.py +0 -31
- datafc-1.5.0/datafc/sofascore/fetch_coordinates_data.py +0 -118
- datafc-1.5.0/datafc/sofascore/fetch_goal_networks_data.py +0 -153
- datafc-1.5.0/datafc/sofascore/fetch_lineups_data.py +0 -148
- datafc-1.5.0/datafc/sofascore/fetch_match_data.py +0 -156
- datafc-1.5.0/datafc/sofascore/fetch_match_odds_data.py +0 -113
- datafc-1.5.0/datafc/sofascore/fetch_match_stats_data.py +0 -111
- datafc-1.5.0/datafc/sofascore/fetch_momentum_data.py +0 -106
- datafc-1.5.0/datafc/sofascore/fetch_past_matches_data.py +0 -180
- datafc-1.5.0/datafc/sofascore/fetch_player_stats_data.py +0 -132
- datafc-1.5.0/datafc/sofascore/fetch_shots_data.py +0 -142
- datafc-1.5.0/datafc/sofascore/fetch_squad_data.py +0 -123
- datafc-1.5.0/datafc/sofascore/fetch_standings_data.py +0 -110
- datafc-1.5.0/datafc/sofascore/fetch_substitutions_data.py +0 -110
- datafc-1.5.0/datafc/sofascore/fetch_team_stats_data.py +0 -114
- datafc-1.5.0/datafc/utils/__init__.py +0 -0
- datafc-1.5.0/datafc/utils/_config.py +0 -25
- datafc-1.5.0/datafc/utils/_save_files.py +0 -50
- datafc-1.5.0/datafc/utils/_setup_webdriver.py +0 -49
- datafc-1.5.0/datafc.egg-info/PKG-INFO +0 -1086
- datafc-1.5.0/datafc.egg-info/SOURCES.txt +0 -28
- datafc-1.5.0/datafc.egg-info/requires.txt +0 -4
- datafc-1.5.0/setup.py +0 -30
- {datafc-1.5.0 → datafc-2.1.0}/LICENSE +0 -0
- {datafc-1.5.0 → datafc-2.1.0}/datafc.egg-info/dependency_links.txt +0 -0
- {datafc-1.5.0 → datafc-2.1.0}/datafc.egg-info/top_level.txt +0 -0
- {datafc-1.5.0 → datafc-2.1.0}/setup.cfg +0 -0
datafc-2.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,951 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: datafc
|
|
3
|
+
Version: 2.1.0
|
|
4
|
+
Summary: Fetch, process, and export structured football data.
|
|
5
|
+
Author-email: Uraz Akgül <urazdev@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/urazakgul/datafc
|
|
8
|
+
Project-URL: Repository, https://github.com/urazakgul/datafc
|
|
9
|
+
Project-URL: Issues, https://github.com/urazakgul/datafc/issues
|
|
10
|
+
Keywords: football,soccer,data,analytics,sofascore
|
|
11
|
+
Requires-Python: >=3.8
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
License-File: LICENSE
|
|
14
|
+
Requires-Dist: pandas>=1.5
|
|
15
|
+
Requires-Dist: curl_cffi>=0.7
|
|
16
|
+
Requires-Dist: openpyxl>=3.0
|
|
17
|
+
Provides-Extra: parquet
|
|
18
|
+
Requires-Dist: pyarrow>=12.0; extra == "parquet"
|
|
19
|
+
Provides-Extra: dev
|
|
20
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
21
|
+
Requires-Dist: mypy>=1.0; extra == "dev"
|
|
22
|
+
Requires-Dist: pandas-stubs>=2.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
24
|
+
Requires-Dist: pytest-mock>=3.12; extra == "dev"
|
|
25
|
+
Dynamic: license-file
|
|
26
|
+
|
|
27
|
+
# datafc v2.1.0
|
|
28
|
+
|
|
29
|
+
## Overview
|
|
30
|
+
|
|
31
|
+
`datafc` fetches, processes, and exports structured football data. It provides **33 functions** covering tournament metadata, standings, squad rosters, match fixtures, shots, lineups, player heatmaps, odds, and more — all returning clean `pandas` DataFrames ready for analysis. Sofascore is currently the only supported data source.
|
|
32
|
+
|
|
33
|
+
> **Finding IDs:** `tournament_id` and `season_id` can be discovered two ways:
|
|
34
|
+
> - **From the URL:** navigating to a league page on Sofascore (e.g. `sofascore.com/.../trendyol-super-lig/52#id:63814`) shows `tournament_id=52` and `season_id=63814`.
|
|
35
|
+
> - **Programmatically:** use `search_data("super lig", entity_type="tournament")` to find the tournament ID, then `seasons_data(tournament_id)` to list all available seasons.
|
|
36
|
+
|
|
37
|
+
## What Data Can You Access?
|
|
38
|
+
|
|
39
|
+
### Discovery
|
|
40
|
+
|
|
41
|
+
| Function | What it returns |
|
|
42
|
+
|---|---|
|
|
43
|
+
| `search_data` | Search for players, teams, tournaments, or managers by name |
|
|
44
|
+
| `seasons_data` | All seasons and their IDs for a given tournament |
|
|
45
|
+
|
|
46
|
+
### Tournament / Season Metadata
|
|
47
|
+
|
|
48
|
+
| Function | What it returns |
|
|
49
|
+
|---|---|
|
|
50
|
+
| `season_rounds_data` | All rounds/matchweeks in a season |
|
|
51
|
+
|
|
52
|
+
### League / Season
|
|
53
|
+
|
|
54
|
+
| Function | What it returns |
|
|
55
|
+
|---|---|
|
|
56
|
+
| `standings_data` | League table (Total, Home, Away) with W/D/L, goals, points |
|
|
57
|
+
| `team_data` | Team profiles: stadium, kit colors, manager, venue capacity |
|
|
58
|
+
| `team_stats_data` | 100+ season-level stats per team |
|
|
59
|
+
| `team_transfers_data` | All incoming and outgoing transfers per team |
|
|
60
|
+
| `player_stats_data` | Top player stats per team (goals, assists, key passes, …) |
|
|
61
|
+
| `squad_data` | Squad roster with age, height, market value, contract expiry |
|
|
62
|
+
| `upcoming_matches_data` | Upcoming fixtures for all teams in the standings |
|
|
63
|
+
| `team_match_history_data` | Full match history for a team across all competitions |
|
|
64
|
+
| `league_player_stats_data` | Wide-format player rankings, sortable by any metric |
|
|
65
|
+
|
|
66
|
+
### Matchweek
|
|
67
|
+
|
|
68
|
+
| Function | What it returns |
|
|
69
|
+
|---|---|
|
|
70
|
+
| `match_data` | Fixtures for a matchweek: score, status, home/away teams |
|
|
71
|
+
| `match_details_data` | Referee info (name, cards, games) and venue details per match |
|
|
72
|
+
| `match_stats_data` | Aggregate team stats per match (possession, shots, fouls, …) |
|
|
73
|
+
| `match_odds_data` | Pre-match 1/X/2 betting odds |
|
|
74
|
+
| `match_h2h_data` | All-time H2H record between the two teams |
|
|
75
|
+
| `momentum_data` | Minute-by-minute momentum score throughout the match |
|
|
76
|
+
| `pregame_form_data` | Last 5 results, avg rating, league position, and squad value before each match |
|
|
77
|
+
| `shots_data` | Every shot: coordinates, xG, xGOT, outcome, body part |
|
|
78
|
+
| `lineups_data` | Starting XI and substitutes with per-match player stats |
|
|
79
|
+
| `substitutions_data` | Substitution events: minute, player in, player out |
|
|
80
|
+
| `incidents_data` | Goals, cards, and VAR decisions per match |
|
|
81
|
+
| `average_positions_data` | Average pitch position (x/y) per player |
|
|
82
|
+
| `coordinates_data` | Heatmap touch coordinates per player (requires `lineups_data` output) |
|
|
83
|
+
| `goal_networks_data` | Goal-sequence coordinates (passes, shots, goalkeeper position) |
|
|
84
|
+
| `past_matches_data` | Historical H2H results for team pairs in a given matchweek |
|
|
85
|
+
|
|
86
|
+
### Player
|
|
87
|
+
|
|
88
|
+
| Function | What it returns |
|
|
89
|
+
|---|---|
|
|
90
|
+
| `player_data` | Player profile: age, nationality, height, market value |
|
|
91
|
+
| `player_transfers_data` | Transfer history per player |
|
|
92
|
+
| `player_career_stats_data` | Season-by-season career stats across all competitions (long format) |
|
|
93
|
+
| `player_national_team_data` | National team appearances, goals, and debut date |
|
|
94
|
+
| `player_match_log_data` | Match-by-match in-game statistics across all recorded matches |
|
|
95
|
+
|
|
96
|
+
### Referee
|
|
97
|
+
|
|
98
|
+
| Function | What it returns |
|
|
99
|
+
|---|---|
|
|
100
|
+
| `referee_stats_data` | Career stats for a referee: games, cards, and per-game averages |
|
|
101
|
+
|
|
102
|
+
> **Coverage:** Any league and season available on Sofascore. For Turkey Super Lig, every season from 1980/81 to the present is accessible.
|
|
103
|
+
|
|
104
|
+
## Installation
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
pip install datafc
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
To install the latest development version:
|
|
111
|
+
|
|
112
|
+
```bash
|
|
113
|
+
pip install git+https://github.com/urazakgul/datafc.git
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
To upgrade an existing installation:
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
pip install --upgrade datafc
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Quick Start
|
|
123
|
+
|
|
124
|
+
```python
|
|
125
|
+
from datafc import (
|
|
126
|
+
standings_data,
|
|
127
|
+
match_data,
|
|
128
|
+
shots_data,
|
|
129
|
+
league_player_stats_data,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
standings_df = standings_data(tournament_id=52, season_id=77805)
|
|
133
|
+
|
|
134
|
+
match_df = match_data(tournament_id=52, season_id=77805, week_number=1)
|
|
135
|
+
|
|
136
|
+
shots_df = shots_data(match_df=match_df)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
top_scorers = league_player_stats_data(
|
|
140
|
+
tournament_id=52,
|
|
141
|
+
season_id=77805,
|
|
142
|
+
order="-goals",
|
|
143
|
+
fields=["goals", "assists", "rating"],
|
|
144
|
+
max_players=20,
|
|
145
|
+
)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Async API
|
|
149
|
+
|
|
150
|
+
All functions are also available in async form via `datafc.aio`, designed for fetching multiple weeks or matches in parallel with `asyncio.gather()`.
|
|
151
|
+
|
|
152
|
+
```python
|
|
153
|
+
import asyncio
|
|
154
|
+
import pandas as pd
|
|
155
|
+
from datafc import aio
|
|
156
|
+
|
|
157
|
+
async def fetch_full_season(tournament_id, season_id, total_weeks):
|
|
158
|
+
tasks = [
|
|
159
|
+
aio.match_data(tournament_id, season_id, week_number=w)
|
|
160
|
+
for w in range(1, total_weeks + 1)
|
|
161
|
+
]
|
|
162
|
+
frames = await asyncio.gather(*tasks)
|
|
163
|
+
return pd.concat(frames, ignore_index=True)
|
|
164
|
+
|
|
165
|
+
df = asyncio.run(fetch_full_season(52, 63814, total_weeks=38))
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
Use `return_exceptions=True` when mixing independent coroutines so that one failure does not cancel the rest:
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
results = await asyncio.gather(
|
|
172
|
+
aio.match_data(52, 77805, week_number=1),
|
|
173
|
+
aio.standings_data(52, 77805),
|
|
174
|
+
return_exceptions=True,
|
|
175
|
+
)
|
|
176
|
+
for label, result in zip(["match", "standings"], results):
|
|
177
|
+
if isinstance(result, Exception):
|
|
178
|
+
print(f"{label} failed: {result}")
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
Async functions accept the same parameters as their sync counterparts, including `cache`, `enable_json_export`, `enable_excel_export`, and `output_dir` (see [Caching](#caching) and [Common Parameters](#common-parameters)).
|
|
182
|
+
|
|
183
|
+
## Common Parameters
|
|
184
|
+
|
|
185
|
+
Every function accepts the following shared parameters:
|
|
186
|
+
|
|
187
|
+
| Parameter | Type | Default | Description |
|
|
188
|
+
|---|---|---|---|
|
|
189
|
+
| `data_source` | `str` | `"sofascore"` | Data source: `"sofascore"` or `"sofavpn"` (use `sofavpn` if Sofascore is blocked in your region) |
|
|
190
|
+
| `rate_limit` | `float` | `2.0` | Maximum requests per second. The limit is **global across all instances** in the same process (sync) or event loop (async) — creating multiple clients does not multiply throughput. |
|
|
191
|
+
| `cache` | `DiskCache` | `None` | Optional `DiskCache` instance for persistent response caching (see [Caching](#caching)). |
|
|
192
|
+
| `enable_json_export` | `bool` | `False` | Save output as a JSON file |
|
|
193
|
+
| `enable_excel_export` | `bool` | `False` | Save output as an Excel file |
|
|
194
|
+
| `output_dir` | `str` | `"."` | Directory for exported files |
|
|
195
|
+
|
|
196
|
+
## Caching
|
|
197
|
+
|
|
198
|
+
Responses can be cached to disk to avoid redundant API calls across sessions:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
import asyncio
|
|
202
|
+
from datafc import DiskCache
|
|
203
|
+
from datafc import aio
|
|
204
|
+
|
|
205
|
+
cache = DiskCache(cache_dir=".datafc_cache", ttl_hours=24)
|
|
206
|
+
|
|
207
|
+
async def main():
|
|
208
|
+
# First call hits the API; subsequent calls read from disk
|
|
209
|
+
df = await aio.match_data(52, 63814, week_number=1, cache=cache)
|
|
210
|
+
|
|
211
|
+
asyncio.run(main())
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
`DiskCache` stores responses as JSON files keyed by URL. Cache entries expire after `ttl_hours` (set to `0` to disable expiry). Call `cache.clear()` to invalidate all entries.
|
|
215
|
+
|
|
216
|
+
## Parquet Export
|
|
217
|
+
|
|
218
|
+
For large datasets (`player_career_stats_data`, `coordinates_data`, `lineups_data`), Parquet is significantly faster to read and write than JSON. Use `save_parquet` directly on any DataFrame returned by a fetch function:
|
|
219
|
+
|
|
220
|
+
```python
|
|
221
|
+
from datafc import player_career_stats_data, standings_data, squad_data, save_parquet
|
|
222
|
+
|
|
223
|
+
standings_df = standings_data(52, 63814)
|
|
224
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
225
|
+
df = player_career_stats_data(squad_df=squad_df)
|
|
226
|
+
|
|
227
|
+
save_parquet(
|
|
228
|
+
data=df,
|
|
229
|
+
fn_name="player_career_stats_data",
|
|
230
|
+
data_source="sofascore",
|
|
231
|
+
country="Turkey",
|
|
232
|
+
tournament="Trendyol Super Lig",
|
|
233
|
+
season="25/26",
|
|
234
|
+
output_dir="data/processed",
|
|
235
|
+
)
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
Parquet export requires `pyarrow`. Install it with:
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
pip install datafc[parquet]
|
|
242
|
+
# or
|
|
243
|
+
pip install pyarrow
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Exception Hierarchy
|
|
247
|
+
|
|
248
|
+
```
|
|
249
|
+
DataFCError
|
|
250
|
+
├── InvalidParameterError (bad input: unknown data_source, invalid category, etc.)
|
|
251
|
+
├── DataNotAvailableError (valid request but no data returned)
|
|
252
|
+
└── APIError (HTTP-level error from the Sofascore API)
|
|
253
|
+
├── RateLimitError (HTTP 429)
|
|
254
|
+
└── ServerError (HTTP 5xx)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
```python
|
|
258
|
+
from datafc import match_data, DataNotAvailableError, RateLimitError
|
|
259
|
+
|
|
260
|
+
try:
|
|
261
|
+
df = match_data(52, 63814, week_number=99)
|
|
262
|
+
except DataNotAvailableError:
|
|
263
|
+
print("No data for that week.")
|
|
264
|
+
except RateLimitError:
|
|
265
|
+
print("Rate limited. Lower your rate_limit or add delays.")
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
## Function Reference
|
|
269
|
+
|
|
270
|
+
### Discovery
|
|
271
|
+
|
|
272
|
+
#### `search_data`
|
|
273
|
+
|
|
274
|
+
Search for teams, players, tournaments, or managers by name. Useful for finding IDs without visiting the website.
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
from datafc import search_data
|
|
278
|
+
|
|
279
|
+
df = search_data("galatasaray", entity_type="team")
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
Parameters:
|
|
283
|
+
|
|
284
|
+
- `query` (str): Search term.
|
|
285
|
+
- `entity_type` (str, optional): Filter by type: `"team"`, `"player"`, `"tournament"`, or `"manager"`. `None` returns all types.
|
|
286
|
+
|
|
287
|
+
Columns: `entity_id`, `entity_name`, `entity_type`, `score`, `country`, `position`.
|
|
288
|
+
|
|
289
|
+
---
|
|
290
|
+
|
|
291
|
+
#### `seasons_data`
|
|
292
|
+
|
|
293
|
+
List all available seasons for a tournament. Use this to discover valid `season_id` values before calling other functions.
|
|
294
|
+
|
|
295
|
+
```python
|
|
296
|
+
from datafc import seasons_data
|
|
297
|
+
|
|
298
|
+
df = seasons_data(tournament_id=52)
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
Columns: `tournament_id`, `season_id`, `season_name`, `season_year`.
|
|
302
|
+
|
|
303
|
+
---
|
|
304
|
+
|
|
305
|
+
### Tournament / Season Metadata
|
|
306
|
+
|
|
307
|
+
#### `season_rounds_data`
|
|
308
|
+
|
|
309
|
+
Fetch all rounds (matchweeks) defined for a season. Useful for iterating over all weeks programmatically.
|
|
310
|
+
|
|
311
|
+
```python
|
|
312
|
+
from datafc import season_rounds_data
|
|
313
|
+
|
|
314
|
+
df = season_rounds_data(tournament_id=52, season_id=77805)
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
Columns: `tournament_id`, `season_id`, `round_number`, `slug`, `name`, `prefix`, `is_latest`.
|
|
318
|
+
|
|
319
|
+
---
|
|
320
|
+
|
|
321
|
+
### League / Season
|
|
322
|
+
|
|
323
|
+
#### `standings_data`
|
|
324
|
+
|
|
325
|
+
Fetch league standings for Total, Home, and Away categories.
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
from datafc import standings_data
|
|
329
|
+
|
|
330
|
+
df = standings_data(tournament_id=52, season_id=77805)
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `position`, `matches`, `wins`, `draws`, `losses`, `scores_for`, `scores_against`, `points`, `category` (`Total` / `Home` / `Away`).
|
|
334
|
+
|
|
335
|
+
---
|
|
336
|
+
|
|
337
|
+
#### `team_data`
|
|
338
|
+
|
|
339
|
+
Fetch profile and infrastructure data for every team in the standings: stadium name and capacity, kit colors, and current manager.
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from datafc import standings_data, team_data
|
|
343
|
+
|
|
344
|
+
standings_df = standings_data(52, 63814)
|
|
345
|
+
df = team_data(standings_df=standings_df)
|
|
346
|
+
```
|
|
347
|
+
|
|
348
|
+
Columns: `country`, `tournament`, `team_id`, `team_name`, `short_name`, `slug`, `national`, `country_name`, `country_id`, `primary_color`, `secondary_color`, `text_color`, `venue_id`, `venue_name`, `venue_capacity`, `venue_city`, `manager_id`, `manager_name`, `manager_country`.
|
|
349
|
+
|
|
350
|
+
Dependencies: `standings_data`
|
|
351
|
+
|
|
352
|
+
---
|
|
353
|
+
|
|
354
|
+
#### `team_stats_data`
|
|
355
|
+
|
|
356
|
+
Fetch season-level team statistics (long format) for every team in the standings.
|
|
357
|
+
|
|
358
|
+
```python
|
|
359
|
+
from datafc import standings_data, team_stats_data
|
|
360
|
+
|
|
361
|
+
standings_df = standings_data(52, 63814)
|
|
362
|
+
df = team_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
|
|
363
|
+
```
|
|
364
|
+
|
|
365
|
+
Parameters:
|
|
366
|
+
|
|
367
|
+
- `standings_df` (DataFrame): Output of `standings_data`.
|
|
368
|
+
- `tournament_id` (int)
|
|
369
|
+
- `season_id` (int)
|
|
370
|
+
- `season` (str, optional): Human-readable season label (e.g. `"24/25"`) used only in the export filename.
|
|
371
|
+
|
|
372
|
+
Columns: `country`, `tournament`, `team_name`, `team_id`, `stat`, `value`.
|
|
373
|
+
|
|
374
|
+
Dependencies: `standings_data`
|
|
375
|
+
|
|
376
|
+
---
|
|
377
|
+
|
|
378
|
+
#### `team_transfers_data`
|
|
379
|
+
|
|
380
|
+
Fetch all incoming and outgoing transfer records for every team in the standings.
|
|
381
|
+
|
|
382
|
+
```python
|
|
383
|
+
from datafc import standings_data, team_transfers_data
|
|
384
|
+
|
|
385
|
+
standings_df = standings_data(52, 63814)
|
|
386
|
+
df = team_transfers_data(standings_df=standings_df)
|
|
387
|
+
```
|
|
388
|
+
|
|
389
|
+
Columns: `country`, `tournament`, `team_name`, `team_id`, `direction` (`in` / `out`), `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type` (`loan` / `permanent` / `free` / `end_of_contract`), `fee`, `fee_currency`.
|
|
390
|
+
|
|
391
|
+
Dependencies: `standings_data`
|
|
392
|
+
|
|
393
|
+
---
|
|
394
|
+
|
|
395
|
+
#### `player_stats_data`
|
|
396
|
+
|
|
397
|
+
Fetch top player statistics per team (long format). Covers goals, assists, key passes, duels, and more.
|
|
398
|
+
|
|
399
|
+
```python
|
|
400
|
+
from datafc import standings_data, player_stats_data
|
|
401
|
+
|
|
402
|
+
standings_df = standings_data(52, 63814)
|
|
403
|
+
df = player_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
Columns: `country`, `tournament`, `team_name`, `team_id`, `player_name`, `player_id`, `position`, `stat_name`, `stat_value`.
|
|
407
|
+
|
|
408
|
+
Dependencies: `standings_data`
|
|
409
|
+
|
|
410
|
+
---
|
|
411
|
+
|
|
412
|
+
#### `squad_data`
|
|
413
|
+
|
|
414
|
+
Fetch full squad roster for every team: age, height, nationality, position, preferred foot, contract expiry, and market value.
|
|
415
|
+
|
|
416
|
+
```python
|
|
417
|
+
from datafc import standings_data, squad_data
|
|
418
|
+
|
|
419
|
+
standings_df = standings_data(52, 63814)
|
|
420
|
+
df = squad_data(standings_df=standings_df)
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `player_name`, `player_id`, `age`, `height`, `player_country`, `position`, `preferred_foot`, `contract_until`, `market_value`, `market_currency`.
|
|
424
|
+
|
|
425
|
+
Dependencies: `standings_data`
|
|
426
|
+
|
|
427
|
+
---
|
|
428
|
+
|
|
429
|
+
#### `team_match_history_data`
|
|
430
|
+
|
|
431
|
+
Fetch the complete match history for a single team across all competitions.
|
|
432
|
+
|
|
433
|
+
```python
|
|
434
|
+
from datafc import team_match_history_data
|
|
435
|
+
|
|
436
|
+
df = team_match_history_data(team_id=4748) # Brazil
|
|
437
|
+
```
|
|
438
|
+
|
|
439
|
+
The `team_id` can be obtained from `standings_data()`, `squad_data()`, or `search_data()`.
|
|
440
|
+
|
|
441
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `home_score_period1`, `home_score_period2`, `home_score_normaltime`, `home_score_display`, `home_score_current`, `away_score_period1`, `away_score_period2`, `away_score_normaltime`, `away_score_display`, `away_score_current`, `start_timestamp`, `status`.
|
|
442
|
+
|
|
443
|
+
> **Note:** Results span all competitions in Sofascore's database (league, cup, international). Filter by the `tournament` column to narrow down to a specific competition.
|
|
444
|
+
|
|
445
|
+
Dependencies: none
|
|
446
|
+
|
|
447
|
+
---
|
|
448
|
+
|
|
449
|
+
#### `upcoming_matches_data`
|
|
450
|
+
|
|
451
|
+
Fetch upcoming fixtures for all teams currently in the standings.
|
|
452
|
+
|
|
453
|
+
```python
|
|
454
|
+
from datafc import standings_data, upcoming_matches_data
|
|
455
|
+
|
|
456
|
+
standings_df = standings_data(52, 63814)
|
|
457
|
+
df = upcoming_matches_data(standings_df=standings_df)
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `start_timestamp`, `status`.
|
|
461
|
+
|
|
462
|
+
> **Note:** Results may include fixtures from cup competitions (e.g. Türkiye Kupası) if a team's next scheduled match is outside the league. Filter by the `tournament` column to restrict to league fixtures only.
|
|
463
|
+
|
|
464
|
+
Dependencies: `standings_data`
|
|
465
|
+
|
|
466
|
+
---
|
|
467
|
+
|
|
468
|
+
#### `league_player_stats_data`
|
|
469
|
+
|
|
470
|
+
Fetch ranked player statistics across the entire league in wide format (one row per player). Supports pagination, position filtering, and multiple accumulation methods.
|
|
471
|
+
|
|
472
|
+
```python
|
|
473
|
+
from datafc import league_player_stats_data
|
|
474
|
+
|
|
475
|
+
# Top 50 goalscorers
|
|
476
|
+
df = league_player_stats_data(
|
|
477
|
+
tournament_id=52,
|
|
478
|
+
season_id=77805,
|
|
479
|
+
order="-goals",
|
|
480
|
+
accumulation="total",
|
|
481
|
+
fields=["goals", "assists", "rating", "expectedGoals"],
|
|
482
|
+
max_players=50,
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Top midfielders by rating per 90
|
|
486
|
+
df = league_player_stats_data(
|
|
487
|
+
tournament_id=52,
|
|
488
|
+
season_id=77805,
|
|
489
|
+
order="-rating",
|
|
490
|
+
accumulation="per90",
|
|
491
|
+
position="M",
|
|
492
|
+
max_players=20,
|
|
493
|
+
)
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
Parameters:
|
|
497
|
+
|
|
498
|
+
- `order` (str): Field to sort by, prefix with `-` for descending. Default `"-rating"`.
|
|
499
|
+
- `accumulation` (str): `"total"`, `"per90"`, or `"perMatch"`. Default `"total"`.
|
|
500
|
+
- `fields` (list, optional): Stats columns to include. `None` returns 14 default fields.
|
|
501
|
+
- `position` (str, optional): `"G"`, `"D"`, `"M"`, or `"F"`. `None` includes all positions.
|
|
502
|
+
- `max_players` (int): Maximum players to return (fetches multiple pages if needed). Default `100`.
|
|
503
|
+
|
|
504
|
+
Available fields: `goals`, `assists`, `rating`, `expectedGoals`, `expectedAssists`, `goalsAssistsSum`, `penaltyGoals`, `freeKickGoal`, `scoringFrequency`, `totalShots`, `shotsOnTarget`, `bigChancesCreated`, `bigChancesMissed`, `accuratePasses`, `accuratePassesPercentage`, `keyPasses`, `accurateLongBalls`, `accurateLongBallsPercentage`, `successfulDribbles`, `successfulDribblesPercentage`, `tackles`, `interceptions`, `clearances`, `possessionLost`, `yellowCards`, `redCards`, `saves`, `goalsPrevented`, `minutesPlayed`, `appearances`.
|
|
505
|
+
|
|
506
|
+
Columns: `tournament_id`, `season_id`, `player_name`, `player_id`, `team_name`, `team_id`, + one column per requested field.
|
|
507
|
+
|
|
508
|
+
---
|
|
509
|
+
|
|
510
|
+
### Matchweek
|
|
511
|
+
|
|
512
|
+
#### `match_data`
|
|
513
|
+
|
|
514
|
+
Fetch match fixtures and scores for a given matchweek.
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
from datafc import match_data
|
|
518
|
+
|
|
519
|
+
match_df = match_data(
|
|
520
|
+
tournament_id=52,
|
|
521
|
+
season_id=77805,
|
|
522
|
+
week_number=21,
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
# UEFA tournaments require additional parameters:
|
|
526
|
+
ucl_df = match_data(
|
|
527
|
+
tournament_id=7,
|
|
528
|
+
season_id=61644,
|
|
529
|
+
week_number=5,
|
|
530
|
+
tournament_type="uefa",
|
|
531
|
+
tournament_stage="round_of_16",
|
|
532
|
+
)
|
|
533
|
+
```
|
|
534
|
+
|
|
535
|
+
Parameters:
|
|
536
|
+
|
|
537
|
+
- `tournament_id` (int)
|
|
538
|
+
- `season_id` (int)
|
|
539
|
+
- `week_number` (int)
|
|
540
|
+
- `tournament_type` (str, optional): `"uefa"` for UEFA competitions. `None` assumes a domestic league.
|
|
541
|
+
- `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Options: `preliminary_semifinals`, `preliminary_final`, `qualification_round`, `qualification_playoff`, `group_stage_week`, `playoff_round`, `round_of_16`, `quarterfinals`, `semifinals`, `match_for_3rd_place`, `final`.
|
|
542
|
+
|
|
543
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `injury_time_1`, `injury_time_2`, `start_timestamp`, `status`, `home_score_current`, `home_score_display`, `home_score_period1`, `home_score_period2`, `home_score_normaltime`, `away_score_current`, `away_score_display`, `away_score_period1`, `away_score_period2`, `away_score_normaltime`.
|
|
544
|
+
|
|
545
|
+
---
|
|
546
|
+
|
|
547
|
+
#### `match_details_data`
|
|
548
|
+
|
|
549
|
+
Fetch referee and venue details for each match.
|
|
550
|
+
|
|
551
|
+
```python
|
|
552
|
+
from datafc import match_details_data
|
|
553
|
+
|
|
554
|
+
df = match_details_data(match_df=match_df)
|
|
555
|
+
```
|
|
556
|
+
|
|
557
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `referee_id`, `referee_name`, `referee_country`, `referee_yellow_cards`, `referee_red_cards`, `referee_games`, `venue_id`, `venue_name`, `venue_city`, `venue_country`, `venue_capacity`.
|
|
558
|
+
|
|
559
|
+
Dependencies: `match_data`
|
|
560
|
+
|
|
561
|
+
---
|
|
562
|
+
|
|
563
|
+
#### `match_stats_data`
|
|
564
|
+
|
|
565
|
+
Fetch aggregate team statistics (possession, shots, passes, etc.) for each match.
|
|
566
|
+
|
|
567
|
+
```python
|
|
568
|
+
from datafc import match_stats_data
|
|
569
|
+
|
|
570
|
+
df = match_stats_data(match_df=match_df)
|
|
571
|
+
```
|
|
572
|
+
|
|
573
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `period` (`ALL` / `1ST` / `2ND`), `group_name`, `stat_name`, `home_team_stat`, `away_team_stat`.
|
|
574
|
+
|
|
575
|
+
Dependencies: `match_data`
|
|
576
|
+
|
|
577
|
+
---
|
|
578
|
+
|
|
579
|
+
#### `match_odds_data`
|
|
580
|
+
|
|
581
|
+
Fetch pre-match and live betting odds.
|
|
582
|
+
|
|
583
|
+
```python
|
|
584
|
+
from datafc import match_odds_data
|
|
585
|
+
|
|
586
|
+
df = match_odds_data(match_df=match_df)
|
|
587
|
+
```
|
|
588
|
+
|
|
589
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `market_name`, `market_id`, `is_live`, `choice_name`, `initial_fractional_value`, `current_fractional_value`, `winning`, `change`.
|
|
590
|
+
|
|
591
|
+
Dependencies: `match_data`
|
|
592
|
+
|
|
593
|
+
---
|
|
594
|
+
|
|
595
|
+
#### `match_h2h_data`
|
|
596
|
+
|
|
597
|
+
Fetch all-time head-to-head win/draw/loss record between the two teams in each match.
|
|
598
|
+
|
|
599
|
+
```python
|
|
600
|
+
from datafc import match_h2h_data
|
|
601
|
+
|
|
602
|
+
df = match_h2h_data(match_df=match_df)
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `home_wins`, `away_wins`, `draws`.
|
|
606
|
+
|
|
607
|
+
Dependencies: `match_data`
|
|
608
|
+
|
|
609
|
+
---
|
|
610
|
+
|
|
611
|
+
#### `momentum_data`
|
|
612
|
+
|
|
613
|
+
Fetch minute-by-minute match momentum values (positive = home advantage, negative = away).
|
|
614
|
+
|
|
615
|
+
```python
|
|
616
|
+
from datafc import momentum_data
|
|
617
|
+
|
|
618
|
+
df = momentum_data(match_df=match_df)
|
|
619
|
+
```
|
|
620
|
+
|
|
621
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `minute`, `value`.
|
|
622
|
+
|
|
623
|
+
Dependencies: `match_data`
|
|
624
|
+
|
|
625
|
+
---
|
|
626
|
+
|
|
627
|
+
#### `pregame_form_data`
|
|
628
|
+
|
|
629
|
+
Fetch pre-game form context for each match: last 5 results, average rating, league position, and squad market value for both the home and away team.
|
|
630
|
+
|
|
631
|
+
```python
|
|
632
|
+
from datafc import pregame_form_data
|
|
633
|
+
|
|
634
|
+
df = pregame_form_data(match_df=match_df)
|
|
635
|
+
```
|
|
636
|
+
|
|
637
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team` (`home` / `away`), `avg_rating`, `position`, `value`, `form_1`, `form_2`, `form_3`, `form_4`, `form_5` (most recent result last).
|
|
638
|
+
|
|
639
|
+
Dependencies: `match_data`
|
|
640
|
+
|
|
641
|
+
---
|
|
642
|
+
|
|
643
|
+
#### `shots_data`
|
|
644
|
+
|
|
645
|
+
Fetch all shot events with coordinates, xG, xGOT, body part, situation, and goal mouth location.
|
|
646
|
+
|
|
647
|
+
```python
|
|
648
|
+
from datafc import shots_data
|
|
649
|
+
|
|
650
|
+
df = shots_data(match_df=match_df)
|
|
651
|
+
```
|
|
652
|
+
|
|
653
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `player_position`, `is_home`, `incident_type`, `shot_type`, `body_part`, `goal_type`, `situation`, `goal_mouth_location`, `xg`, `xgot`, `player_coordinates_x`, `player_coordinates_y`, `player_coordinates_z`, `goal_mouth_coordinates_x`, `goal_mouth_coordinates_y`, `goal_mouth_coordinates_z`, `draw_start_x`, `draw_start_y`, `draw_end_x`, `draw_end_y`, `draw_goal_x`, `draw_goal_y`, `block_coordinates_x`, `block_coordinates_y`, `block_coordinates_z`, `time`, `time_seconds`, `added_time`.
|
|
654
|
+
|
|
655
|
+
Dependencies: `match_data`
|
|
656
|
+
|
|
657
|
+
---
|
|
658
|
+
|
|
659
|
+
#### `lineups_data`
|
|
660
|
+
|
|
661
|
+
Fetch player lineup details and per-match player statistics (long format: one row per player per stat).
|
|
662
|
+
|
|
663
|
+
```python
|
|
664
|
+
from datafc import lineups_data
|
|
665
|
+
|
|
666
|
+
df = lineups_data(match_df=match_df)
|
|
667
|
+
```
|
|
668
|
+
|
|
669
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_name`, `player_id`, `stat_name`, `stat_value`.
|
|
670
|
+
|
|
671
|
+
Dependencies: `match_data`
|
|
672
|
+
|
|
673
|
+
---
|
|
674
|
+
|
|
675
|
+
#### `substitutions_data`
|
|
676
|
+
|
|
677
|
+
Fetch substitution events with player names and minute.
|
|
678
|
+
|
|
679
|
+
```python
|
|
680
|
+
from datafc import substitutions_data
|
|
681
|
+
|
|
682
|
+
df = substitutions_data(match_df=match_df)
|
|
683
|
+
```
|
|
684
|
+
|
|
685
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `time`, `player_in`, `player_in_id`, `player_out`, `player_out_id`.
|
|
686
|
+
|
|
687
|
+
Dependencies: `match_data`
|
|
688
|
+
|
|
689
|
+
---
|
|
690
|
+
|
|
691
|
+
#### `incidents_data`
|
|
692
|
+
|
|
693
|
+
Fetch goal, card, and VAR decision events for each match.
|
|
694
|
+
|
|
695
|
+
```python
|
|
696
|
+
from datafc import incidents_data
|
|
697
|
+
|
|
698
|
+
df = incidents_data(match_df=match_df)
|
|
699
|
+
```
|
|
700
|
+
|
|
701
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `incident_type`, `incident_class`, `time`, `added_time`, `is_home`, `player_id`, `player_name`, `home_score`, `away_score`, `goal_from`, `card_reason`, `rescinded`, `var_confirmed`.
|
|
702
|
+
|
|
703
|
+
> **Note on `var_confirmed`:** `True` = VAR reviewed and upheld the on-field decision. `False` = VAR reviewed and overturned the decision. `None` = no VAR review occurred for that incident.
|
|
704
|
+
|
|
705
|
+
Dependencies: `match_data`
|
|
706
|
+
|
|
707
|
+
---
|
|
708
|
+
|
|
709
|
+
#### `average_positions_data`
|
|
710
|
+
|
|
711
|
+
Fetch each player's average X/Y position on the pitch during a match. Coordinates are on a 0–100 scale. Useful for formation and tactical analysis.
|
|
712
|
+
|
|
713
|
+
```python
|
|
714
|
+
from datafc import average_positions_data
|
|
715
|
+
|
|
716
|
+
df = average_positions_data(match_df=match_df)
|
|
717
|
+
```
|
|
718
|
+
|
|
719
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `side` (`home` / `away`), `player_name`, `player_id`, `position`, `jersey_number`, `average_x`, `average_y`, `points_count`.
|
|
720
|
+
|
|
721
|
+
Dependencies: `match_data`
|
|
722
|
+
|
|
723
|
+
---
|
|
724
|
+
|
|
725
|
+
#### `coordinates_data`
|
|
726
|
+
|
|
727
|
+
Fetch heatmap touch coordinates (X/Y) for each player. Requires `lineups_data` output as input.
|
|
728
|
+
|
|
729
|
+
```python
|
|
730
|
+
from datafc import lineups_data, coordinates_data
|
|
731
|
+
|
|
732
|
+
lineups_df = lineups_data(match_df=match_df)
|
|
733
|
+
df = coordinates_data(lineups_df=lineups_df)
|
|
734
|
+
```
|
|
735
|
+
|
|
736
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_id`, `player_name`, `x`, `y`.
|
|
737
|
+
|
|
738
|
+
> **Note:** Players with no heatmap data (short substitute appearances, 404 or 403 responses) are silently skipped. The function raises `DataNotAvailableError` only if **no** player yields any coordinates.
|
|
739
|
+
|
|
740
|
+
Dependencies: `lineups_data`
|
|
741
|
+
|
|
742
|
+
---
|
|
743
|
+
|
|
744
|
+
#### `goal_networks_data`
|
|
745
|
+
|
|
746
|
+
Fetch coordinate data for each action in a goal-scoring sequence (passes, shots, goalkeeper position).
|
|
747
|
+
|
|
748
|
+
```python
|
|
749
|
+
from datafc import goal_networks_data
|
|
750
|
+
|
|
751
|
+
df = goal_networks_data(match_df=match_df)
|
|
752
|
+
```
|
|
753
|
+
|
|
754
|
+
Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `event_type`, `player_x`, `player_y`, `pass_end_x`, `pass_end_y`, `is_assist`, `id`, `goalkeeper_x`, `goalkeeper_y`, `goal_shot_x`, `goal_shot_y`, `goal_mouth_x`, `goal_mouth_y`, `goalkeeper_name`, `goalkeeper_id`.
|
|
755
|
+
|
|
756
|
+
Dependencies: `match_data`
|
|
757
|
+
|
|
758
|
+
---
|
|
759
|
+
|
|
760
|
+
#### `past_matches_data`
|
|
761
|
+
|
|
762
|
+
Fetch the complete head-to-head match history for each team pair playing in a given matchweek.
|
|
763
|
+
|
|
764
|
+
```python
|
|
765
|
+
from datafc import past_matches_data
|
|
766
|
+
|
|
767
|
+
df = past_matches_data(
|
|
768
|
+
tournament_id=52,
|
|
769
|
+
season_id=77805,
|
|
770
|
+
week_number=21,
|
|
771
|
+
)
|
|
772
|
+
```
|
|
773
|
+
|
|
774
|
+
Parameters:
|
|
775
|
+
|
|
776
|
+
- `tournament_id` (int)
|
|
777
|
+
- `season_id` (int)
|
|
778
|
+
- `week_number` (int)
|
|
779
|
+
- `tournament_type` (str, optional): `"uefa"` for UEFA competitions.
|
|
780
|
+
- `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Same options as `match_data`.
|
|
781
|
+
|
|
782
|
+
Same columns as `match_data`.
|
|
783
|
+
|
|
784
|
+
---
|
|
785
|
+
|
|
786
|
+
### Player
|
|
787
|
+
|
|
788
|
+
#### `player_data`
|
|
789
|
+
|
|
790
|
+
Fetch profile data for each player in a squad: nationality, date of birth, height, weight, preferred foot, and market value.
|
|
791
|
+
|
|
792
|
+
```python
|
|
793
|
+
from datafc import standings_data, squad_data, player_data
|
|
794
|
+
|
|
795
|
+
standings_df = standings_data(52, 63814)
|
|
796
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
797
|
+
df = player_data(squad_df=squad_df)
|
|
798
|
+
```
|
|
799
|
+
|
|
800
|
+
Columns: `player_id`, `player_name`, `date_of_birth`, `age`, `nationality`, `nationality_id`, `height`, `weight`, `preferred_foot`, `jersey_number`, `position`, `market_value`, `market_currency`, `team_id`, `team_name`.
|
|
801
|
+
|
|
802
|
+
Dependencies: `squad_data`
|
|
803
|
+
|
|
804
|
+
---
|
|
805
|
+
|
|
806
|
+
#### `player_transfers_data`
|
|
807
|
+
|
|
808
|
+
Fetch transfer history for each player in a squad.
|
|
809
|
+
|
|
810
|
+
```python
|
|
811
|
+
from datafc import standings_data, squad_data, player_transfers_data
|
|
812
|
+
|
|
813
|
+
standings_df = standings_data(52, 63814)
|
|
814
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
815
|
+
df = player_transfers_data(squad_df=squad_df)
|
|
816
|
+
```
|
|
817
|
+
|
|
818
|
+
Columns: `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type`, `fee`, `fee_currency`.
|
|
819
|
+
|
|
820
|
+
Dependencies: `squad_data`
|
|
821
|
+
|
|
822
|
+
---
|
|
823
|
+
|
|
824
|
+
#### `player_career_stats_data`
|
|
825
|
+
|
|
826
|
+
Fetch season-by-season career statistics across all competitions for each player in a squad (long format: one row per player-season-stat combination). Only `overall` entries are included; home/away splits are excluded.
|
|
827
|
+
|
|
828
|
+
```python
|
|
829
|
+
from datafc import standings_data, squad_data, player_career_stats_data
|
|
830
|
+
|
|
831
|
+
standings_df = standings_data(52, 63814)
|
|
832
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
833
|
+
df = player_career_stats_data(squad_df=squad_df)
|
|
834
|
+
```
|
|
835
|
+
|
|
836
|
+
Columns: `player_id`, `player_name`, `tournament_id`, `tournament_name`, `season_id`, `season_name`, `team_id`, `team_name`, `stat`, `value`.
|
|
837
|
+
|
|
838
|
+
Dependencies: `squad_data`
|
|
839
|
+
|
|
840
|
+
---
|
|
841
|
+
|
|
842
|
+
#### `player_national_team_data`
|
|
843
|
+
|
|
844
|
+
Fetch national team career statistics (appearances, goals, debut) for each player in a squad.
|
|
845
|
+
|
|
846
|
+
```python
|
|
847
|
+
from datafc import standings_data, squad_data, player_national_team_data
|
|
848
|
+
|
|
849
|
+
standings_df = standings_data(52, 63814)
|
|
850
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
851
|
+
df = player_national_team_data(squad_df=squad_df)
|
|
852
|
+
```
|
|
853
|
+
|
|
854
|
+
Columns: `player_id`, `player_name`, `team_id`, `team_name`, `team_code`, `appearances`, `goals`, `debut_timestamp`.
|
|
855
|
+
|
|
856
|
+
Dependencies: `squad_data`
|
|
857
|
+
|
|
858
|
+
---
|
|
859
|
+
|
|
860
|
+
#### `player_match_log_data`
|
|
861
|
+
|
|
862
|
+
Fetch match-by-match in-game statistics for each player in a squad across all recorded matches (wide format: one row per player per match).
|
|
863
|
+
|
|
864
|
+
```python
|
|
865
|
+
from datafc import standings_data, squad_data, player_match_log_data
|
|
866
|
+
|
|
867
|
+
standings_df = standings_data(52, 63814)
|
|
868
|
+
squad_df = squad_data(standings_df=standings_df)
|
|
869
|
+
df = player_match_log_data(squad_df=squad_df)
|
|
870
|
+
```
|
|
871
|
+
|
|
872
|
+
Columns: `player_id`, `player_name`, `game_id`, `start_timestamp`, `tournament`, `season`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `home_score`, `away_score`, `status`, + all available in-match stat columns (e.g. `goals`, `assists`, `rating`, `minutesPlayed`, …).
|
|
873
|
+
|
|
874
|
+
Dependencies: `squad_data`
|
|
875
|
+
|
|
876
|
+
---
|
|
877
|
+
|
|
878
|
+
### Referee
|
|
879
|
+
|
|
880
|
+
#### `referee_stats_data`
|
|
881
|
+
|
|
882
|
+
Fetch career statistics for a referee. The `referee_id` can be obtained from the `referee_id` column in `match_details_data()` output.
|
|
883
|
+
|
|
884
|
+
```python
|
|
885
|
+
from datafc import referee_stats_data
|
|
886
|
+
|
|
887
|
+
df = referee_stats_data(referee_id=12345)
|
|
888
|
+
```
|
|
889
|
+
|
|
890
|
+
Parameters:
|
|
891
|
+
|
|
892
|
+
- `referee_id` (int): The unique Sofascore identifier for the referee.
|
|
893
|
+
|
|
894
|
+
Columns: `referee_id`, `referee_name`, `tournament_id`, `tournament_name`, `stat`, `value`. One row per stat per tournament. Covers appearances, yellow cards, red cards, second yellow cards, and penalties.
|
|
895
|
+
|
|
896
|
+
---
|
|
897
|
+
|
|
898
|
+
## Changelog
|
|
899
|
+
|
|
900
|
+
### v2.1.0
|
|
901
|
+
|
|
902
|
+
- Added `team_match_history_data`: fetches the complete match history for a single team across all competitions using `team_id` directly (no standings dependency).
|
|
903
|
+
|
|
904
|
+
---
|
|
905
|
+
|
|
906
|
+
### v2.0.0
|
|
907
|
+
|
|
908
|
+
- **Chrome / Selenium removed — no browser required.** datafc now makes direct HTTP requests. Installation is simpler, and fetches are significantly faster than before.
|
|
909
|
+
- **18 new functions.** `seasons_data`, `season_rounds_data`, `team_data`, `team_transfers_data`, `upcoming_matches_data`, `league_player_stats_data`, `match_details_data`, `match_h2h_data`, `pregame_form_data`, `incidents_data`, `average_positions_data`, `player_data`, `player_transfers_data`, `player_career_stats_data`, `player_national_team_data`, `player_match_log_data`, `referee_stats_data`, `search_data`.
|
|
910
|
+
- **Async API.** All functions are available in `datafc.aio` for parallel fetching with `asyncio.gather()`, letting you download an entire matchweek's worth of data concurrently.
|
|
911
|
+
- **Disk caching.** Pass a `DiskCache` instance to any function to avoid re-fetching data you've already downloaded. Cached responses are returned instantly on subsequent calls.
|
|
912
|
+
- **Automatic rate limiting and retries.** All functions accept a `rate_limit` parameter. Temporary failures (rate limits, server errors) are retried automatically without any extra code on your side.
|
|
913
|
+
- **New Parquet export.** Use `save_parquet()` on any DataFrame returned by a fetch function to save output as `.parquet`. Requires `pyarrow` (`pip install datafc[parquet]`).
|
|
914
|
+
- **Heatmap fetch no longer crashes on partial access errors.** `coordinates_data` now skips players that the API refuses to serve and returns data for everyone else. The function only raises an error if no player yields any coordinates at all.
|
|
915
|
+
- **Exported filenames are human-readable.** JSON, Excel, and Parquet files now use the league name (e.g. `trendyol_superlig_shots_data.json`) instead of raw numeric IDs. Turkish and other non-ASCII characters are transliterated correctly — `Şampiyonlar` becomes `sampiyonlar`, not `ampiyonlar`.
|
|
916
|
+
- **Valid JSON output.** Exported `.json` files no longer contain invalid `NaN` literals; they use `null` instead, making them compatible with every JSON parser and spreadsheet tool.
|
|
917
|
+
- **Cleaner numeric columns.** Score fields, ratings, and market values that were previously returned as strings or empty strings are now proper numeric types (`null` when missing, not `""`).
|
|
918
|
+
- **Clearer errors.** When something goes wrong, the exception type tells you what happened: data not available, invalid parameter, API access error, rate limit hit, or server error.
|
|
919
|
+
|
|
920
|
+
### v1.5.0
|
|
921
|
+
|
|
922
|
+
- Added `team_stats_data`, `player_stats_data`, and `squad_data`.
|
|
923
|
+
|
|
924
|
+
### v1.4.0
|
|
925
|
+
|
|
926
|
+
- Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` for UEFA competitions (UCL, UEL, UECL, UNL).
|
|
927
|
+
|
|
928
|
+
### v1.3.0
|
|
929
|
+
|
|
930
|
+
- Added `past_matches_data`.
|
|
931
|
+
|
|
932
|
+
### v1.2.0
|
|
933
|
+
|
|
934
|
+
- Added match score columns to `match_data`.
|
|
935
|
+
|
|
936
|
+
### v1.1.0
|
|
937
|
+
|
|
938
|
+
- Added 4 new columns to `match_data`.
|
|
939
|
+
- Added `data_source` parameter to export functions.
|
|
940
|
+
|
|
941
|
+
### v1.0.0
|
|
942
|
+
|
|
943
|
+
- Initial release. Selenium-based Sofascore scraper with JSON/Excel export.
|
|
944
|
+
|
|
945
|
+
## License
|
|
946
|
+
|
|
947
|
+
MIT License
|
|
948
|
+
|
|
949
|
+
## Contributing
|
|
950
|
+
|
|
951
|
+
Bug reports, feature requests, and pull requests are welcome at [github.com/urazakgul/datafc](https://github.com/urazakgul/datafc/issues).
|