datafc 1.5.0__tar.gz → 2.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. datafc-2.1.0/PKG-INFO +951 -0
  2. datafc-2.1.0/README.md +925 -0
  3. datafc-2.1.0/datafc/__init__.py +79 -0
  4. datafc-2.1.0/datafc/exceptions.py +46 -0
  5. datafc-2.1.0/datafc/sofascore/__init__.py +82 -0
  6. datafc-2.1.0/datafc/sofascore/_parsers.py +560 -0
  7. datafc-2.1.0/datafc/sofascore/aio.py +2394 -0
  8. datafc-2.1.0/datafc/sofascore/fetch_average_positions_data.py +91 -0
  9. datafc-2.1.0/datafc/sofascore/fetch_coordinates_data.py +102 -0
  10. datafc-2.1.0/datafc/sofascore/fetch_goal_networks_data.py +148 -0
  11. datafc-2.1.0/datafc/sofascore/fetch_incidents_data.py +87 -0
  12. datafc-2.1.0/datafc/sofascore/fetch_league_player_stats_data.py +165 -0
  13. datafc-2.1.0/datafc/sofascore/fetch_lineups_data.py +80 -0
  14. datafc-2.1.0/datafc/sofascore/fetch_match_data.py +84 -0
  15. datafc-2.1.0/datafc/sofascore/fetch_match_details_data.py +92 -0
  16. datafc-2.1.0/datafc/sofascore/fetch_match_h2h_data.py +82 -0
  17. datafc-2.1.0/datafc/sofascore/fetch_match_odds_data.py +73 -0
  18. datafc-2.1.0/datafc/sofascore/fetch_match_stats_data.py +80 -0
  19. datafc-2.1.0/datafc/sofascore/fetch_momentum_data.py +80 -0
  20. datafc-2.1.0/datafc/sofascore/fetch_past_matches_data.py +140 -0
  21. datafc-2.1.0/datafc/sofascore/fetch_player_career_stats_data.py +124 -0
  22. datafc-2.1.0/datafc/sofascore/fetch_player_data.py +132 -0
  23. datafc-2.1.0/datafc/sofascore/fetch_player_match_log_data.py +126 -0
  24. datafc-2.1.0/datafc/sofascore/fetch_player_national_team_data.py +108 -0
  25. datafc-2.1.0/datafc/sofascore/fetch_player_stats_data.py +126 -0
  26. datafc-2.1.0/datafc/sofascore/fetch_player_transfers_data.py +122 -0
  27. datafc-2.1.0/datafc/sofascore/fetch_pregame_form_data.py +109 -0
  28. datafc-2.1.0/datafc/sofascore/fetch_referee_stats_data.py +104 -0
  29. datafc-2.1.0/datafc/sofascore/fetch_search_data.py +93 -0
  30. datafc-2.1.0/datafc/sofascore/fetch_season_rounds_data.py +99 -0
  31. datafc-2.1.0/datafc/sofascore/fetch_seasons_data.py +86 -0
  32. datafc-2.1.0/datafc/sofascore/fetch_shots_data.py +80 -0
  33. datafc-2.1.0/datafc/sofascore/fetch_squad_data.py +124 -0
  34. datafc-2.1.0/datafc/sofascore/fetch_standings_data.py +80 -0
  35. datafc-2.1.0/datafc/sofascore/fetch_substitutions_data.py +80 -0
  36. datafc-2.1.0/datafc/sofascore/fetch_team_data.py +132 -0
  37. datafc-2.1.0/datafc/sofascore/fetch_team_match_history_data.py +108 -0
  38. datafc-2.1.0/datafc/sofascore/fetch_team_stats_data.py +123 -0
  39. datafc-2.1.0/datafc/sofascore/fetch_team_transfers_data.py +128 -0
  40. datafc-2.1.0/datafc/sofascore/fetch_upcoming_matches_data.py +105 -0
  41. datafc-2.1.0/datafc/utils/__init__.py +35 -0
  42. datafc-2.1.0/datafc/utils/_async_client.py +164 -0
  43. datafc-2.1.0/datafc/utils/_cache.py +142 -0
  44. datafc-2.1.0/datafc/utils/_client.py +136 -0
  45. datafc-2.1.0/datafc/utils/_config.py +93 -0
  46. datafc-2.1.0/datafc/utils/_helpers.py +26 -0
  47. datafc-2.1.0/datafc/utils/_save_files.py +142 -0
  48. datafc-2.1.0/datafc/utils/_tournament_info.py +66 -0
  49. datafc-2.1.0/datafc/utils/_validate.py +98 -0
  50. datafc-2.1.0/datafc.egg-info/PKG-INFO +951 -0
  51. datafc-2.1.0/datafc.egg-info/SOURCES.txt +55 -0
  52. datafc-2.1.0/datafc.egg-info/requires.txt +13 -0
  53. datafc-2.1.0/pyproject.toml +55 -0
  54. datafc-1.5.0/PKG-INFO +0 -1086
  55. datafc-1.5.0/README.md +0 -1070
  56. datafc-1.5.0/datafc/__init__.py +0 -3
  57. datafc-1.5.0/datafc/sofascore/__init__.py +0 -31
  58. datafc-1.5.0/datafc/sofascore/fetch_coordinates_data.py +0 -118
  59. datafc-1.5.0/datafc/sofascore/fetch_goal_networks_data.py +0 -153
  60. datafc-1.5.0/datafc/sofascore/fetch_lineups_data.py +0 -148
  61. datafc-1.5.0/datafc/sofascore/fetch_match_data.py +0 -156
  62. datafc-1.5.0/datafc/sofascore/fetch_match_odds_data.py +0 -113
  63. datafc-1.5.0/datafc/sofascore/fetch_match_stats_data.py +0 -111
  64. datafc-1.5.0/datafc/sofascore/fetch_momentum_data.py +0 -106
  65. datafc-1.5.0/datafc/sofascore/fetch_past_matches_data.py +0 -180
  66. datafc-1.5.0/datafc/sofascore/fetch_player_stats_data.py +0 -132
  67. datafc-1.5.0/datafc/sofascore/fetch_shots_data.py +0 -142
  68. datafc-1.5.0/datafc/sofascore/fetch_squad_data.py +0 -123
  69. datafc-1.5.0/datafc/sofascore/fetch_standings_data.py +0 -110
  70. datafc-1.5.0/datafc/sofascore/fetch_substitutions_data.py +0 -110
  71. datafc-1.5.0/datafc/sofascore/fetch_team_stats_data.py +0 -114
  72. datafc-1.5.0/datafc/utils/__init__.py +0 -0
  73. datafc-1.5.0/datafc/utils/_config.py +0 -25
  74. datafc-1.5.0/datafc/utils/_save_files.py +0 -50
  75. datafc-1.5.0/datafc/utils/_setup_webdriver.py +0 -49
  76. datafc-1.5.0/datafc.egg-info/PKG-INFO +0 -1086
  77. datafc-1.5.0/datafc.egg-info/SOURCES.txt +0 -28
  78. datafc-1.5.0/datafc.egg-info/requires.txt +0 -4
  79. datafc-1.5.0/setup.py +0 -30
  80. {datafc-1.5.0 → datafc-2.1.0}/LICENSE +0 -0
  81. {datafc-1.5.0 → datafc-2.1.0}/datafc.egg-info/dependency_links.txt +0 -0
  82. {datafc-1.5.0 → datafc-2.1.0}/datafc.egg-info/top_level.txt +0 -0
  83. {datafc-1.5.0 → datafc-2.1.0}/setup.cfg +0 -0
datafc-2.1.0/PKG-INFO ADDED
@@ -0,0 +1,951 @@
1
+ Metadata-Version: 2.4
2
+ Name: datafc
3
+ Version: 2.1.0
4
+ Summary: Fetch, process, and export structured football data.
5
+ Author-email: Uraz Akgül <urazdev@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/urazakgul/datafc
8
+ Project-URL: Repository, https://github.com/urazakgul/datafc
9
+ Project-URL: Issues, https://github.com/urazakgul/datafc/issues
10
+ Keywords: football,soccer,data,analytics,sofascore
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: pandas>=1.5
15
+ Requires-Dist: curl_cffi>=0.7
16
+ Requires-Dist: openpyxl>=3.0
17
+ Provides-Extra: parquet
18
+ Requires-Dist: pyarrow>=12.0; extra == "parquet"
19
+ Provides-Extra: dev
20
+ Requires-Dist: ruff>=0.4; extra == "dev"
21
+ Requires-Dist: mypy>=1.0; extra == "dev"
22
+ Requires-Dist: pandas-stubs>=2.0; extra == "dev"
23
+ Requires-Dist: pytest>=8.0; extra == "dev"
24
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
25
+ Dynamic: license-file
26
+
27
+ # datafc v2.1.0
28
+
29
+ ## Overview
30
+
31
+ `datafc` fetches, processes, and exports structured football data. It provides **33 functions** covering tournament metadata, standings, squad rosters, match fixtures, shots, lineups, player heatmaps, odds, and more — all returning clean `pandas` DataFrames ready for analysis. Sofascore is currently the only supported data source.
32
+
33
+ > **Finding IDs:** `tournament_id` and `season_id` can be discovered two ways:
34
+ > - **From the URL:** navigating to a league page on Sofascore (e.g. `sofascore.com/.../trendyol-super-lig/52#id:63814`) shows `tournament_id=52` and `season_id=63814`.
35
+ > - **Programmatically:** use `search_data("super lig", entity_type="tournament")` to find the tournament ID, then `seasons_data(tournament_id)` to list all available seasons.
36
+
37
+ ## What Data Can You Access?
38
+
39
+ ### Discovery
40
+
41
+ | Function | What it returns |
42
+ |---|---|
43
+ | `search_data` | Search for players, teams, tournaments, or managers by name |
44
+ | `seasons_data` | All seasons and their IDs for a given tournament |
45
+
46
+ ### Tournament / Season Metadata
47
+
48
+ | Function | What it returns |
49
+ |---|---|
50
+ | `season_rounds_data` | All rounds/matchweeks in a season |
51
+
52
+ ### League / Season
53
+
54
+ | Function | What it returns |
55
+ |---|---|
56
+ | `standings_data` | League table (Total, Home, Away) with W/D/L, goals, points |
57
+ | `team_data` | Team profiles: stadium, kit colors, manager, venue capacity |
58
+ | `team_stats_data` | 100+ season-level stats per team |
59
+ | `team_transfers_data` | All incoming and outgoing transfers per team |
60
+ | `player_stats_data` | Top player stats per team (goals, assists, key passes, …) |
61
+ | `squad_data` | Squad roster with age, height, market value, contract expiry |
62
+ | `upcoming_matches_data` | Upcoming fixtures for all teams in the standings |
63
+ | `team_match_history_data` | Full match history for a team across all competitions |
64
+ | `league_player_stats_data` | Wide-format player rankings, sortable by any metric |
65
+
66
+ ### Matchweek
67
+
68
+ | Function | What it returns |
69
+ |---|---|
70
+ | `match_data` | Fixtures for a matchweek: score, status, home/away teams |
71
+ | `match_details_data` | Referee info (name, cards, games) and venue details per match |
72
+ | `match_stats_data` | Aggregate team stats per match (possession, shots, fouls, …) |
73
+ | `match_odds_data` | Pre-match 1/X/2 betting odds |
74
+ | `match_h2h_data` | All-time H2H record between the two teams |
75
+ | `momentum_data` | Minute-by-minute momentum score throughout the match |
76
+ | `pregame_form_data` | Last 5 results, avg rating, league position, and squad value before each match |
77
+ | `shots_data` | Every shot: coordinates, xG, xGOT, outcome, body part |
78
+ | `lineups_data` | Starting XI and substitutes with per-match player stats |
79
+ | `substitutions_data` | Substitution events: minute, player in, player out |
80
+ | `incidents_data` | Goals, cards, and VAR decisions per match |
81
+ | `average_positions_data` | Average pitch position (x/y) per player |
82
+ | `coordinates_data` | Heatmap touch coordinates per player (requires `lineups_data` output) |
83
+ | `goal_networks_data` | Goal-sequence coordinates (passes, shots, goalkeeper position) |
84
+ | `past_matches_data` | Historical H2H results for team pairs in a given matchweek |
85
+
86
+ ### Player
87
+
88
+ | Function | What it returns |
89
+ |---|---|
90
+ | `player_data` | Player profile: age, nationality, height, market value |
91
+ | `player_transfers_data` | Transfer history per player |
92
+ | `player_career_stats_data` | Season-by-season career stats across all competitions (long format) |
93
+ | `player_national_team_data` | National team appearances, goals, and debut date |
94
+ | `player_match_log_data` | Match-by-match in-game statistics across all recorded matches |
95
+
96
+ ### Referee
97
+
98
+ | Function | What it returns |
99
+ |---|---|
100
+ | `referee_stats_data` | Career stats for a referee: games, cards, and per-game averages |
101
+
102
+ > **Coverage:** Any league and season available on Sofascore. For Turkey Super Lig, every season from 1980/81 to the present is accessible.
103
+
104
+ ## Installation
105
+
106
+ ```bash
107
+ pip install datafc
108
+ ```
109
+
110
+ To install the latest development version:
111
+
112
+ ```bash
113
+ pip install git+https://github.com/urazakgul/datafc.git
114
+ ```
115
+
116
+ To upgrade an existing installation:
117
+
118
+ ```bash
119
+ pip install --upgrade datafc
120
+ ```
121
+
122
+ ## Quick Start
123
+
124
+ ```python
125
+ from datafc import (
126
+ standings_data,
127
+ match_data,
128
+ shots_data,
129
+ league_player_stats_data,
130
+ )
131
+
132
+ standings_df = standings_data(tournament_id=52, season_id=77805)
133
+
134
+ match_df = match_data(tournament_id=52, season_id=77805, week_number=1)
135
+
136
+ shots_df = shots_data(match_df=match_df)
137
+
138
+
139
+ top_scorers = league_player_stats_data(
140
+ tournament_id=52,
141
+ season_id=77805,
142
+ order="-goals",
143
+ fields=["goals", "assists", "rating"],
144
+ max_players=20,
145
+ )
146
+ ```
147
+
148
+ ## Async API
149
+
150
+ All functions are also available in async form via `datafc.aio`, designed for fetching multiple weeks or matches in parallel with `asyncio.gather()`.
151
+
152
+ ```python
153
+ import asyncio
154
+ import pandas as pd
155
+ from datafc import aio
156
+
157
+ async def fetch_full_season(tournament_id, season_id, total_weeks):
158
+ tasks = [
159
+ aio.match_data(tournament_id, season_id, week_number=w)
160
+ for w in range(1, total_weeks + 1)
161
+ ]
162
+ frames = await asyncio.gather(*tasks)
163
+ return pd.concat(frames, ignore_index=True)
164
+
165
+ df = asyncio.run(fetch_full_season(52, 63814, total_weeks=38))
166
+ ```
167
+
168
+ Use `return_exceptions=True` when mixing independent coroutines so that one failure does not cancel the rest:
169
+
170
+ ```python
171
+ results = await asyncio.gather(
172
+ aio.match_data(52, 77805, week_number=1),
173
+ aio.standings_data(52, 77805),
174
+ return_exceptions=True,
175
+ )
176
+ for label, result in zip(["match", "standings"], results):
177
+ if isinstance(result, Exception):
178
+ print(f"{label} failed: {result}")
179
+ ```
180
+
181
+ Async functions accept the same parameters as their sync counterparts, including `cache`, `enable_json_export`, `enable_excel_export`, and `output_dir` (see [Caching](#caching) and [Common Parameters](#common-parameters)).
182
+
183
+ ## Common Parameters
184
+
185
+ Every function accepts the following shared parameters:
186
+
187
+ | Parameter | Type | Default | Description |
188
+ |---|---|---|---|
189
+ | `data_source` | `str` | `"sofascore"` | Data source: `"sofascore"` or `"sofavpn"` (use `sofavpn` if Sofascore is blocked in your region) |
190
+ | `rate_limit` | `float` | `2.0` | Maximum requests per second. The limit is **global across all instances** in the same process (sync) or event loop (async) — creating multiple clients does not multiply throughput. |
191
+ | `cache` | `DiskCache` | `None` | Optional `DiskCache` instance for persistent response caching (see [Caching](#caching)). |
192
+ | `enable_json_export` | `bool` | `False` | Save output as a JSON file |
193
+ | `enable_excel_export` | `bool` | `False` | Save output as an Excel file |
194
+ | `output_dir` | `str` | `"."` | Directory for exported files |
195
+
196
+ ## Caching
197
+
198
+ Responses can be cached to disk to avoid redundant API calls across sessions:
199
+
200
+ ```python
201
+ import asyncio
202
+ from datafc import DiskCache
203
+ from datafc import aio
204
+
205
+ cache = DiskCache(cache_dir=".datafc_cache", ttl_hours=24)
206
+
207
+ async def main():
208
+ # First call hits the API; subsequent calls read from disk
209
+ df = await aio.match_data(52, 63814, week_number=1, cache=cache)
210
+
211
+ asyncio.run(main())
212
+ ```
213
+
214
+ `DiskCache` stores responses as JSON files keyed by URL. Cache entries expire after `ttl_hours` (set to `0` to disable expiry). Call `cache.clear()` to invalidate all entries.
215
+
216
+ ## Parquet Export
217
+
218
+ For large datasets (`player_career_stats_data`, `coordinates_data`, `lineups_data`), Parquet is significantly faster to read and write than JSON. Use `save_parquet` directly on any DataFrame returned by a fetch function:
219
+
220
+ ```python
221
+ from datafc import player_career_stats_data, standings_data, squad_data, save_parquet
222
+
223
+ standings_df = standings_data(52, 63814)
224
+ squad_df = squad_data(standings_df=standings_df)
225
+ df = player_career_stats_data(squad_df=squad_df)
226
+
227
+ save_parquet(
228
+ data=df,
229
+ fn_name="player_career_stats_data",
230
+ data_source="sofascore",
231
+ country="Turkey",
232
+ tournament="Trendyol Super Lig",
233
+ season="25/26",
234
+ output_dir="data/processed",
235
+ )
236
+ ```
237
+
238
+ Parquet export requires `pyarrow`. Install it with:
239
+
240
+ ```bash
241
+ pip install datafc[parquet]
242
+ # or
243
+ pip install pyarrow
244
+ ```
245
+
246
+ ## Exception Hierarchy
247
+
248
+ ```
249
+ DataFCError
250
+ ├── InvalidParameterError (bad input: unknown data_source, invalid category, etc.)
251
+ ├── DataNotAvailableError (valid request but no data returned)
252
+ └── APIError (HTTP-level error from the Sofascore API)
253
+ ├── RateLimitError (HTTP 429)
254
+ └── ServerError (HTTP 5xx)
255
+ ```
256
+
257
+ ```python
258
+ from datafc import match_data, DataNotAvailableError, RateLimitError
259
+
260
+ try:
261
+ df = match_data(52, 63814, week_number=99)
262
+ except DataNotAvailableError:
263
+ print("No data for that week.")
264
+ except RateLimitError:
265
+ print("Rate limited. Lower your rate_limit or add delays.")
266
+ ```
267
+
268
+ ## Function Reference
269
+
270
+ ### Discovery
271
+
272
+ #### `search_data`
273
+
274
+ Search for teams, players, tournaments, or managers by name. Useful for finding IDs without visiting the website.
275
+
276
+ ```python
277
+ from datafc import search_data
278
+
279
+ df = search_data("galatasaray", entity_type="team")
280
+ ```
281
+
282
+ Parameters:
283
+
284
+ - `query` (str): Search term.
285
+ - `entity_type` (str, optional): Filter by type: `"team"`, `"player"`, `"tournament"`, or `"manager"`. `None` returns all types.
286
+
287
+ Columns: `entity_id`, `entity_name`, `entity_type`, `score`, `country`, `position`.
288
+
289
+ ---
290
+
291
+ #### `seasons_data`
292
+
293
+ List all available seasons for a tournament. Use this to discover valid `season_id` values before calling other functions.
294
+
295
+ ```python
296
+ from datafc import seasons_data
297
+
298
+ df = seasons_data(tournament_id=52)
299
+ ```
300
+
301
+ Columns: `tournament_id`, `season_id`, `season_name`, `season_year`.
302
+
303
+ ---
304
+
305
+ ### Tournament / Season Metadata
306
+
307
+ #### `season_rounds_data`
308
+
309
+ Fetch all rounds (matchweeks) defined for a season. Useful for iterating over all weeks programmatically.
310
+
311
+ ```python
312
+ from datafc import season_rounds_data
313
+
314
+ df = season_rounds_data(tournament_id=52, season_id=77805)
315
+ ```
316
+
317
+ Columns: `tournament_id`, `season_id`, `round_number`, `slug`, `name`, `prefix`, `is_latest`.
318
+
319
+ ---
320
+
321
+ ### League / Season
322
+
323
+ #### `standings_data`
324
+
325
+ Fetch league standings for Total, Home, and Away categories.
326
+
327
+ ```python
328
+ from datafc import standings_data
329
+
330
+ df = standings_data(tournament_id=52, season_id=77805)
331
+ ```
332
+
333
+ Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `position`, `matches`, `wins`, `draws`, `losses`, `scores_for`, `scores_against`, `points`, `category` (`Total` / `Home` / `Away`).
334
+
335
+ ---
336
+
337
+ #### `team_data`
338
+
339
+ Fetch profile and infrastructure data for every team in the standings: stadium name and capacity, kit colors, and current manager.
340
+
341
+ ```python
342
+ from datafc import standings_data, team_data
343
+
344
+ standings_df = standings_data(52, 63814)
345
+ df = team_data(standings_df=standings_df)
346
+ ```
347
+
348
+ Columns: `country`, `tournament`, `team_id`, `team_name`, `short_name`, `slug`, `national`, `country_name`, `country_id`, `primary_color`, `secondary_color`, `text_color`, `venue_id`, `venue_name`, `venue_capacity`, `venue_city`, `manager_id`, `manager_name`, `manager_country`.
349
+
350
+ Dependencies: `standings_data`
351
+
352
+ ---
353
+
354
+ #### `team_stats_data`
355
+
356
+ Fetch season-level team statistics (long format) for every team in the standings.
357
+
358
+ ```python
359
+ from datafc import standings_data, team_stats_data
360
+
361
+ standings_df = standings_data(52, 63814)
362
+ df = team_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
363
+ ```
364
+
365
+ Parameters:
366
+
367
+ - `standings_df` (DataFrame): Output of `standings_data`.
368
+ - `tournament_id` (int)
369
+ - `season_id` (int)
370
+ - `season` (str, optional): Human-readable season label (e.g. `"24/25"`) used only in the export filename.
371
+
372
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `stat`, `value`.
373
+
374
+ Dependencies: `standings_data`
375
+
376
+ ---
377
+
378
+ #### `team_transfers_data`
379
+
380
+ Fetch all incoming and outgoing transfer records for every team in the standings.
381
+
382
+ ```python
383
+ from datafc import standings_data, team_transfers_data
384
+
385
+ standings_df = standings_data(52, 63814)
386
+ df = team_transfers_data(standings_df=standings_df)
387
+ ```
388
+
389
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `direction` (`in` / `out`), `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type` (`loan` / `permanent` / `free` / `end_of_contract`), `fee`, `fee_currency`.
390
+
391
+ Dependencies: `standings_data`
392
+
393
+ ---
394
+
395
+ #### `player_stats_data`
396
+
397
+ Fetch top player statistics per team (long format). Covers goals, assists, key passes, duels, and more.
398
+
399
+ ```python
400
+ from datafc import standings_data, player_stats_data
401
+
402
+ standings_df = standings_data(52, 63814)
403
+ df = player_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
404
+ ```
405
+
406
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `player_name`, `player_id`, `position`, `stat_name`, `stat_value`.
407
+
408
+ Dependencies: `standings_data`
409
+
410
+ ---
411
+
412
+ #### `squad_data`
413
+
414
+ Fetch full squad roster for every team: age, height, nationality, position, preferred foot, contract expiry, and market value.
415
+
416
+ ```python
417
+ from datafc import standings_data, squad_data
418
+
419
+ standings_df = standings_data(52, 63814)
420
+ df = squad_data(standings_df=standings_df)
421
+ ```
422
+
423
+ Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `player_name`, `player_id`, `age`, `height`, `player_country`, `position`, `preferred_foot`, `contract_until`, `market_value`, `market_currency`.
424
+
425
+ Dependencies: `standings_data`
426
+
427
+ ---
428
+
429
+ #### `team_match_history_data`
430
+
431
+ Fetch the complete match history for a single team across all competitions.
432
+
433
+ ```python
434
+ from datafc import team_match_history_data
435
+
436
+ df = team_match_history_data(team_id=4748) # Brazil
437
+ ```
438
+
439
+ The `team_id` can be obtained from `standings_data()`, `squad_data()`, or `search_data()`.
440
+
441
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `home_score_period1`, `home_score_period2`, `home_score_normaltime`, `home_score_display`, `home_score_current`, `away_score_period1`, `away_score_period2`, `away_score_normaltime`, `away_score_display`, `away_score_current`, `start_timestamp`, `status`.
442
+
443
+ > **Note:** Results span all competitions in Sofascore's database (league, cup, international). Filter by the `tournament` column to narrow down to a specific competition.
444
+
445
+ Dependencies: none
446
+
447
+ ---
448
+
449
+ #### `upcoming_matches_data`
450
+
451
+ Fetch upcoming fixtures for all teams currently in the standings.
452
+
453
+ ```python
454
+ from datafc import standings_data, upcoming_matches_data
455
+
456
+ standings_df = standings_data(52, 63814)
457
+ df = upcoming_matches_data(standings_df=standings_df)
458
+ ```
459
+
460
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `start_timestamp`, `status`.
461
+
462
+ > **Note:** Results may include fixtures from cup competitions (e.g. Türkiye Kupası) if a team's next scheduled match is outside the league. Filter by the `tournament` column to restrict to league fixtures only.
463
+
464
+ Dependencies: `standings_data`
465
+
466
+ ---
467
+
468
+ #### `league_player_stats_data`
469
+
470
+ Fetch ranked player statistics across the entire league in wide format (one row per player). Supports pagination, position filtering, and multiple accumulation methods.
471
+
472
+ ```python
473
+ from datafc import league_player_stats_data
474
+
475
+ # Top 50 goalscorers
476
+ df = league_player_stats_data(
477
+ tournament_id=52,
478
+ season_id=77805,
479
+ order="-goals",
480
+ accumulation="total",
481
+ fields=["goals", "assists", "rating", "expectedGoals"],
482
+ max_players=50,
483
+ )
484
+
485
+ # Top midfielders by rating per 90
486
+ df = league_player_stats_data(
487
+ tournament_id=52,
488
+ season_id=77805,
489
+ order="-rating",
490
+ accumulation="per90",
491
+ position="M",
492
+ max_players=20,
493
+ )
494
+ ```
495
+
496
+ Parameters:
497
+
498
+ - `order` (str): Field to sort by, prefix with `-` for descending. Default `"-rating"`.
499
+ - `accumulation` (str): `"total"`, `"per90"`, or `"perMatch"`. Default `"total"`.
500
+ - `fields` (list, optional): Stats columns to include. `None` returns 14 default fields.
501
+ - `position` (str, optional): `"G"`, `"D"`, `"M"`, or `"F"`. `None` includes all positions.
502
+ - `max_players` (int): Maximum players to return (fetches multiple pages if needed). Default `100`.
503
+
504
+ Available fields: `goals`, `assists`, `rating`, `expectedGoals`, `expectedAssists`, `goalsAssistsSum`, `penaltyGoals`, `freeKickGoal`, `scoringFrequency`, `totalShots`, `shotsOnTarget`, `bigChancesCreated`, `bigChancesMissed`, `accuratePasses`, `accuratePassesPercentage`, `keyPasses`, `accurateLongBalls`, `accurateLongBallsPercentage`, `successfulDribbles`, `successfulDribblesPercentage`, `tackles`, `interceptions`, `clearances`, `possessionLost`, `yellowCards`, `redCards`, `saves`, `goalsPrevented`, `minutesPlayed`, `appearances`.
505
+
506
+ Columns: `tournament_id`, `season_id`, `player_name`, `player_id`, `team_name`, `team_id`, + one column per requested field.
507
+
508
+ ---
509
+
510
+ ### Matchweek
511
+
512
+ #### `match_data`
513
+
514
+ Fetch match fixtures and scores for a given matchweek.
515
+
516
+ ```python
517
+ from datafc import match_data
518
+
519
+ match_df = match_data(
520
+ tournament_id=52,
521
+ season_id=77805,
522
+ week_number=21,
523
+ )
524
+
525
+ # UEFA tournaments require additional parameters:
526
+ ucl_df = match_data(
527
+ tournament_id=7,
528
+ season_id=61644,
529
+ week_number=5,
530
+ tournament_type="uefa",
531
+ tournament_stage="round_of_16",
532
+ )
533
+ ```
534
+
535
+ Parameters:
536
+
537
+ - `tournament_id` (int)
538
+ - `season_id` (int)
539
+ - `week_number` (int)
540
+ - `tournament_type` (str, optional): `"uefa"` for UEFA competitions. `None` assumes a domestic league.
541
+ - `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Options: `preliminary_semifinals`, `preliminary_final`, `qualification_round`, `qualification_playoff`, `group_stage_week`, `playoff_round`, `round_of_16`, `quarterfinals`, `semifinals`, `match_for_3rd_place`, `final`.
542
+
543
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `injury_time_1`, `injury_time_2`, `start_timestamp`, `status`, `home_score_current`, `home_score_display`, `home_score_period1`, `home_score_period2`, `home_score_normaltime`, `away_score_current`, `away_score_display`, `away_score_period1`, `away_score_period2`, `away_score_normaltime`.
544
+
545
+ ---
546
+
547
+ #### `match_details_data`
548
+
549
+ Fetch referee and venue details for each match.
550
+
551
+ ```python
552
+ from datafc import match_details_data
553
+
554
+ df = match_details_data(match_df=match_df)
555
+ ```
556
+
557
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `referee_id`, `referee_name`, `referee_country`, `referee_yellow_cards`, `referee_red_cards`, `referee_games`, `venue_id`, `venue_name`, `venue_city`, `venue_country`, `venue_capacity`.
558
+
559
+ Dependencies: `match_data`
560
+
561
+ ---
562
+
563
+ #### `match_stats_data`
564
+
565
+ Fetch aggregate team statistics (possession, shots, passes, etc.) for each match.
566
+
567
+ ```python
568
+ from datafc import match_stats_data
569
+
570
+ df = match_stats_data(match_df=match_df)
571
+ ```
572
+
573
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `period` (`ALL` / `1ST` / `2ND`), `group_name`, `stat_name`, `home_team_stat`, `away_team_stat`.
574
+
575
+ Dependencies: `match_data`
576
+
577
+ ---
578
+
579
+ #### `match_odds_data`
580
+
581
+ Fetch pre-match and live betting odds.
582
+
583
+ ```python
584
+ from datafc import match_odds_data
585
+
586
+ df = match_odds_data(match_df=match_df)
587
+ ```
588
+
589
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `market_name`, `market_id`, `is_live`, `choice_name`, `initial_fractional_value`, `current_fractional_value`, `winning`, `change`.
590
+
591
+ Dependencies: `match_data`
592
+
593
+ ---
594
+
595
+ #### `match_h2h_data`
596
+
597
+ Fetch all-time head-to-head win/draw/loss record between the two teams in each match.
598
+
599
+ ```python
600
+ from datafc import match_h2h_data
601
+
602
+ df = match_h2h_data(match_df=match_df)
603
+ ```
604
+
605
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `home_wins`, `away_wins`, `draws`.
606
+
607
+ Dependencies: `match_data`
608
+
609
+ ---
610
+
611
+ #### `momentum_data`
612
+
613
+ Fetch minute-by-minute match momentum values (positive = home advantage, negative = away).
614
+
615
+ ```python
616
+ from datafc import momentum_data
617
+
618
+ df = momentum_data(match_df=match_df)
619
+ ```
620
+
621
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `minute`, `value`.
622
+
623
+ Dependencies: `match_data`
624
+
625
+ ---
626
+
627
+ #### `pregame_form_data`
628
+
629
+ Fetch pre-game form context for each match: last 5 results, average rating, league position, and squad market value for both the home and away team.
630
+
631
+ ```python
632
+ from datafc import pregame_form_data
633
+
634
+ df = pregame_form_data(match_df=match_df)
635
+ ```
636
+
637
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team` (`home` / `away`), `avg_rating`, `position`, `value`, `form_1`, `form_2`, `form_3`, `form_4`, `form_5` (most recent result last).
638
+
639
+ Dependencies: `match_data`
640
+
641
+ ---
642
+
643
+ #### `shots_data`
644
+
645
+ Fetch all shot events with coordinates, xG, xGOT, body part, situation, and goal mouth location.
646
+
647
+ ```python
648
+ from datafc import shots_data
649
+
650
+ df = shots_data(match_df=match_df)
651
+ ```
652
+
653
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `player_position`, `is_home`, `incident_type`, `shot_type`, `body_part`, `goal_type`, `situation`, `goal_mouth_location`, `xg`, `xgot`, `player_coordinates_x`, `player_coordinates_y`, `player_coordinates_z`, `goal_mouth_coordinates_x`, `goal_mouth_coordinates_y`, `goal_mouth_coordinates_z`, `draw_start_x`, `draw_start_y`, `draw_end_x`, `draw_end_y`, `draw_goal_x`, `draw_goal_y`, `block_coordinates_x`, `block_coordinates_y`, `block_coordinates_z`, `time`, `time_seconds`, `added_time`.
654
+
655
+ Dependencies: `match_data`
656
+
657
+ ---
658
+
659
+ #### `lineups_data`
660
+
661
+ Fetch player lineup details and per-match player statistics (long format: one row per player per stat).
662
+
663
+ ```python
664
+ from datafc import lineups_data
665
+
666
+ df = lineups_data(match_df=match_df)
667
+ ```
668
+
669
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_name`, `player_id`, `stat_name`, `stat_value`.
670
+
671
+ Dependencies: `match_data`
672
+
673
+ ---
674
+
675
+ #### `substitutions_data`
676
+
677
+ Fetch substitution events with player names and minute.
678
+
679
+ ```python
680
+ from datafc import substitutions_data
681
+
682
+ df = substitutions_data(match_df=match_df)
683
+ ```
684
+
685
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `time`, `player_in`, `player_in_id`, `player_out`, `player_out_id`.
686
+
687
+ Dependencies: `match_data`
688
+
689
+ ---
690
+
691
+ #### `incidents_data`
692
+
693
+ Fetch goal, card, and VAR decision events for each match.
694
+
695
+ ```python
696
+ from datafc import incidents_data
697
+
698
+ df = incidents_data(match_df=match_df)
699
+ ```
700
+
701
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `incident_type`, `incident_class`, `time`, `added_time`, `is_home`, `player_id`, `player_name`, `home_score`, `away_score`, `goal_from`, `card_reason`, `rescinded`, `var_confirmed`.
702
+
703
+ > **Note on `var_confirmed`:** `True` = VAR reviewed and upheld the on-field decision. `False` = VAR reviewed and overturned the decision. `None` = no VAR review occurred for that incident.
704
+
705
+ Dependencies: `match_data`
706
+
707
+ ---
708
+
709
+ #### `average_positions_data`
710
+
711
+ Fetch each player's average X/Y position on the pitch during a match. Coordinates are on a 0–100 scale. Useful for formation and tactical analysis.
712
+
713
+ ```python
714
+ from datafc import average_positions_data
715
+
716
+ df = average_positions_data(match_df=match_df)
717
+ ```
718
+
719
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `side` (`home` / `away`), `player_name`, `player_id`, `position`, `jersey_number`, `average_x`, `average_y`, `points_count`.
720
+
721
+ Dependencies: `match_data`
722
+
723
+ ---
724
+
725
+ #### `coordinates_data`
726
+
727
+ Fetch heatmap touch coordinates (X/Y) for each player. Requires `lineups_data` output as input.
728
+
729
+ ```python
730
+ from datafc import lineups_data, coordinates_data
731
+
732
+ lineups_df = lineups_data(match_df=match_df)
733
+ df = coordinates_data(lineups_df=lineups_df)
734
+ ```
735
+
736
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_id`, `player_name`, `x`, `y`.
737
+
738
+ > **Note:** Players with no heatmap data (short substitute appearances, 404 or 403 responses) are silently skipped. The function raises `DataNotAvailableError` only if **no** player yields any coordinates.
739
+
740
+ Dependencies: `lineups_data`
741
+
742
+ ---
743
+
744
+ #### `goal_networks_data`
745
+
746
+ Fetch coordinate data for each action in a goal-scoring sequence (passes, shots, goalkeeper position).
747
+
748
+ ```python
749
+ from datafc import goal_networks_data
750
+
751
+ df = goal_networks_data(match_df=match_df)
752
+ ```
753
+
754
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `event_type`, `player_x`, `player_y`, `pass_end_x`, `pass_end_y`, `is_assist`, `id`, `goalkeeper_x`, `goalkeeper_y`, `goal_shot_x`, `goal_shot_y`, `goal_mouth_x`, `goal_mouth_y`, `goalkeeper_name`, `goalkeeper_id`.
755
+
756
+ Dependencies: `match_data`
757
+
758
+ ---
759
+
760
+ #### `past_matches_data`
761
+
762
+ Fetch the complete head-to-head match history for each team pair playing in a given matchweek.
763
+
764
+ ```python
765
+ from datafc import past_matches_data
766
+
767
+ df = past_matches_data(
768
+ tournament_id=52,
769
+ season_id=77805,
770
+ week_number=21,
771
+ )
772
+ ```
773
+
774
+ Parameters:
775
+
776
+ - `tournament_id` (int)
777
+ - `season_id` (int)
778
+ - `week_number` (int)
779
+ - `tournament_type` (str, optional): `"uefa"` for UEFA competitions.
780
+ - `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Same options as `match_data`.
781
+
782
+ Same columns as `match_data`.
783
+
784
+ ---
785
+
786
+ ### Player
787
+
788
+ #### `player_data`
789
+
790
+ Fetch profile data for each player in a squad: nationality, date of birth, height, weight, preferred foot, and market value.
791
+
792
+ ```python
793
+ from datafc import standings_data, squad_data, player_data
794
+
795
+ standings_df = standings_data(52, 63814)
796
+ squad_df = squad_data(standings_df=standings_df)
797
+ df = player_data(squad_df=squad_df)
798
+ ```
799
+
800
+ Columns: `player_id`, `player_name`, `date_of_birth`, `age`, `nationality`, `nationality_id`, `height`, `weight`, `preferred_foot`, `jersey_number`, `position`, `market_value`, `market_currency`, `team_id`, `team_name`.
801
+
802
+ Dependencies: `squad_data`
803
+
804
+ ---
805
+
806
+ #### `player_transfers_data`
807
+
808
+ Fetch transfer history for each player in a squad.
809
+
810
+ ```python
811
+ from datafc import standings_data, squad_data, player_transfers_data
812
+
813
+ standings_df = standings_data(52, 63814)
814
+ squad_df = squad_data(standings_df=standings_df)
815
+ df = player_transfers_data(squad_df=squad_df)
816
+ ```
817
+
818
+ Columns: `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type`, `fee`, `fee_currency`.
819
+
820
+ Dependencies: `squad_data`
821
+
822
+ ---
823
+
824
+ #### `player_career_stats_data`
825
+
826
+ Fetch season-by-season career statistics across all competitions for each player in a squad (long format: one row per player-season-stat combination). Only `overall` entries are included; home/away splits are excluded.
827
+
828
+ ```python
829
+ from datafc import standings_data, squad_data, player_career_stats_data
830
+
831
+ standings_df = standings_data(52, 63814)
832
+ squad_df = squad_data(standings_df=standings_df)
833
+ df = player_career_stats_data(squad_df=squad_df)
834
+ ```
835
+
836
+ Columns: `player_id`, `player_name`, `tournament_id`, `tournament_name`, `season_id`, `season_name`, `team_id`, `team_name`, `stat`, `value`.
837
+
838
+ Dependencies: `squad_data`
839
+
840
+ ---
841
+
842
+ #### `player_national_team_data`
843
+
844
+ Fetch national team career statistics (appearances, goals, debut) for each player in a squad.
845
+
846
+ ```python
847
+ from datafc import standings_data, squad_data, player_national_team_data
848
+
849
+ standings_df = standings_data(52, 63814)
850
+ squad_df = squad_data(standings_df=standings_df)
851
+ df = player_national_team_data(squad_df=squad_df)
852
+ ```
853
+
854
+ Columns: `player_id`, `player_name`, `team_id`, `team_name`, `team_code`, `appearances`, `goals`, `debut_timestamp`.
855
+
856
+ Dependencies: `squad_data`
857
+
858
+ ---
859
+
860
+ #### `player_match_log_data`
861
+
862
+ Fetch match-by-match in-game statistics for each player in a squad across all recorded matches (wide format: one row per player per match).
863
+
864
+ ```python
865
+ from datafc import standings_data, squad_data, player_match_log_data
866
+
867
+ standings_df = standings_data(52, 63814)
868
+ squad_df = squad_data(standings_df=standings_df)
869
+ df = player_match_log_data(squad_df=squad_df)
870
+ ```
871
+
872
+ Columns: `player_id`, `player_name`, `game_id`, `start_timestamp`, `tournament`, `season`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `home_score`, `away_score`, `status`, + all available in-match stat columns (e.g. `goals`, `assists`, `rating`, `minutesPlayed`, …).
873
+
874
+ Dependencies: `squad_data`
875
+
876
+ ---
877
+
878
+ ### Referee
879
+
880
+ #### `referee_stats_data`
881
+
882
+ Fetch career statistics for a referee. The `referee_id` can be obtained from the `referee_id` column in `match_details_data()` output.
883
+
884
+ ```python
885
+ from datafc import referee_stats_data
886
+
887
+ df = referee_stats_data(referee_id=12345)
888
+ ```
889
+
890
+ Parameters:
891
+
892
+ - `referee_id` (int): The unique Sofascore identifier for the referee.
893
+
894
+ Columns: `referee_id`, `referee_name`, `tournament_id`, `tournament_name`, `stat`, `value`. One row per stat per tournament. Covers appearances, yellow cards, red cards, second yellow cards, and penalties.
895
+
896
+ ---
897
+
898
+ ## Changelog
899
+
900
+ ### v2.1.0
901
+
902
+ - Added `team_match_history_data`: fetches the complete match history for a single team across all competitions using `team_id` directly (no standings dependency).
903
+
904
+ ---
905
+
906
+ ### v2.0.0
907
+
908
+ - **Chrome / Selenium removed — no browser required.** datafc now makes direct HTTP requests. Installation is simpler, and fetches are significantly faster than before.
909
+ - **18 new functions.** `seasons_data`, `season_rounds_data`, `team_data`, `team_transfers_data`, `upcoming_matches_data`, `league_player_stats_data`, `match_details_data`, `match_h2h_data`, `pregame_form_data`, `incidents_data`, `average_positions_data`, `player_data`, `player_transfers_data`, `player_career_stats_data`, `player_national_team_data`, `player_match_log_data`, `referee_stats_data`, `search_data`.
910
+ - **Async API.** All functions are available in `datafc.aio` for parallel fetching with `asyncio.gather()`, letting you download an entire matchweek's worth of data concurrently.
911
+ - **Disk caching.** Pass a `DiskCache` instance to any function to avoid re-fetching data you've already downloaded. Cached responses are returned instantly on subsequent calls.
912
+ - **Automatic rate limiting and retries.** All functions accept a `rate_limit` parameter. Temporary failures (rate limits, server errors) are retried automatically without any extra code on your side.
913
+ - **New Parquet export.** Use `save_parquet()` on any DataFrame returned by a fetch function to save output as `.parquet`. Requires `pyarrow` (`pip install datafc[parquet]`).
914
+ - **Heatmap fetch no longer crashes on partial access errors.** `coordinates_data` now skips players that the API refuses to serve and returns data for everyone else. The function only raises an error if no player yields any coordinates at all.
915
+ - **Exported filenames are human-readable.** JSON, Excel, and Parquet files now use the league name (e.g. `trendyol_superlig_shots_data.json`) instead of raw numeric IDs. Turkish and other non-ASCII characters are transliterated correctly — `Şampiyonlar` becomes `sampiyonlar`, not `ampiyonlar`.
916
+ - **Valid JSON output.** Exported `.json` files no longer contain invalid `NaN` literals; they use `null` instead, making them compatible with every JSON parser and spreadsheet tool.
917
+ - **Cleaner numeric columns.** Score fields, ratings, and market values that were previously returned as strings or empty strings are now proper numeric types (`null` when missing, not `""`).
918
+ - **Clearer errors.** When something goes wrong, the exception type tells you what happened: data not available, invalid parameter, API access error, rate limit hit, or server error.
919
+
920
+ ### v1.5.0
921
+
922
+ - Added `team_stats_data`, `player_stats_data`, and `squad_data`.
923
+
924
+ ### v1.4.0
925
+
926
+ - Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` for UEFA competitions (UCL, UEL, UECL, UNL).
927
+
928
+ ### v1.3.0
929
+
930
+ - Added `past_matches_data`.
931
+
932
+ ### v1.2.0
933
+
934
+ - Added match score columns to `match_data`.
935
+
936
+ ### v1.1.0
937
+
938
+ - Added 4 new columns to `match_data`.
939
+ - Added `data_source` parameter to export functions.
940
+
941
+ ### v1.0.0
942
+
943
+ - Initial release. Selenium-based Sofascore scraper with JSON/Excel export.
944
+
945
+ ## License
946
+
947
+ MIT License
948
+
949
+ ## Contributing
950
+
951
+ Bug reports, feature requests, and pull requests are welcome at [github.com/urazakgul/datafc](https://github.com/urazakgul/datafc/issues).