datafc 1.5.0__tar.gz → 2.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. datafc-2.0.0/PKG-INFO +934 -0
  2. datafc-2.0.0/README.md +908 -0
  3. datafc-2.0.0/datafc/__init__.py +78 -0
  4. datafc-2.0.0/datafc/exceptions.py +46 -0
  5. datafc-2.0.0/datafc/sofascore/__init__.py +80 -0
  6. datafc-2.0.0/datafc/sofascore/_parsers.py +524 -0
  7. datafc-2.0.0/datafc/sofascore/aio.py +2327 -0
  8. datafc-2.0.0/datafc/sofascore/fetch_average_positions_data.py +91 -0
  9. datafc-2.0.0/datafc/sofascore/fetch_coordinates_data.py +102 -0
  10. datafc-2.0.0/datafc/sofascore/fetch_goal_networks_data.py +148 -0
  11. datafc-2.0.0/datafc/sofascore/fetch_incidents_data.py +87 -0
  12. datafc-2.0.0/datafc/sofascore/fetch_league_player_stats_data.py +165 -0
  13. datafc-2.0.0/datafc/sofascore/fetch_lineups_data.py +80 -0
  14. datafc-2.0.0/datafc/sofascore/fetch_match_data.py +84 -0
  15. datafc-2.0.0/datafc/sofascore/fetch_match_details_data.py +92 -0
  16. datafc-2.0.0/datafc/sofascore/fetch_match_h2h_data.py +82 -0
  17. datafc-2.0.0/datafc/sofascore/fetch_match_odds_data.py +73 -0
  18. datafc-2.0.0/datafc/sofascore/fetch_match_stats_data.py +80 -0
  19. datafc-2.0.0/datafc/sofascore/fetch_momentum_data.py +80 -0
  20. datafc-2.0.0/datafc/sofascore/fetch_past_matches_data.py +140 -0
  21. datafc-2.0.0/datafc/sofascore/fetch_player_career_stats_data.py +124 -0
  22. datafc-2.0.0/datafc/sofascore/fetch_player_data.py +132 -0
  23. datafc-2.0.0/datafc/sofascore/fetch_player_match_log_data.py +126 -0
  24. datafc-2.0.0/datafc/sofascore/fetch_player_national_team_data.py +108 -0
  25. datafc-2.0.0/datafc/sofascore/fetch_player_stats_data.py +126 -0
  26. datafc-2.0.0/datafc/sofascore/fetch_player_transfers_data.py +122 -0
  27. datafc-2.0.0/datafc/sofascore/fetch_pregame_form_data.py +109 -0
  28. datafc-2.0.0/datafc/sofascore/fetch_referee_stats_data.py +104 -0
  29. datafc-2.0.0/datafc/sofascore/fetch_search_data.py +93 -0
  30. datafc-2.0.0/datafc/sofascore/fetch_season_rounds_data.py +99 -0
  31. datafc-2.0.0/datafc/sofascore/fetch_seasons_data.py +86 -0
  32. datafc-2.0.0/datafc/sofascore/fetch_shots_data.py +80 -0
  33. datafc-2.0.0/datafc/sofascore/fetch_squad_data.py +124 -0
  34. datafc-2.0.0/datafc/sofascore/fetch_standings_data.py +80 -0
  35. datafc-2.0.0/datafc/sofascore/fetch_substitutions_data.py +80 -0
  36. datafc-2.0.0/datafc/sofascore/fetch_team_data.py +132 -0
  37. datafc-2.0.0/datafc/sofascore/fetch_team_stats_data.py +123 -0
  38. datafc-2.0.0/datafc/sofascore/fetch_team_transfers_data.py +128 -0
  39. datafc-2.0.0/datafc/sofascore/fetch_upcoming_matches_data.py +105 -0
  40. datafc-2.0.0/datafc/utils/__init__.py +35 -0
  41. datafc-2.0.0/datafc/utils/_async_client.py +164 -0
  42. datafc-2.0.0/datafc/utils/_cache.py +142 -0
  43. datafc-2.0.0/datafc/utils/_client.py +136 -0
  44. datafc-2.0.0/datafc/utils/_config.py +93 -0
  45. datafc-2.0.0/datafc/utils/_helpers.py +26 -0
  46. datafc-2.0.0/datafc/utils/_save_files.py +142 -0
  47. datafc-2.0.0/datafc/utils/_tournament_info.py +66 -0
  48. datafc-2.0.0/datafc/utils/_validate.py +98 -0
  49. datafc-2.0.0/datafc.egg-info/PKG-INFO +934 -0
  50. datafc-2.0.0/datafc.egg-info/SOURCES.txt +54 -0
  51. datafc-2.0.0/datafc.egg-info/requires.txt +13 -0
  52. datafc-2.0.0/pyproject.toml +55 -0
  53. datafc-1.5.0/PKG-INFO +0 -1086
  54. datafc-1.5.0/README.md +0 -1070
  55. datafc-1.5.0/datafc/__init__.py +0 -3
  56. datafc-1.5.0/datafc/sofascore/__init__.py +0 -31
  57. datafc-1.5.0/datafc/sofascore/fetch_coordinates_data.py +0 -118
  58. datafc-1.5.0/datafc/sofascore/fetch_goal_networks_data.py +0 -153
  59. datafc-1.5.0/datafc/sofascore/fetch_lineups_data.py +0 -148
  60. datafc-1.5.0/datafc/sofascore/fetch_match_data.py +0 -156
  61. datafc-1.5.0/datafc/sofascore/fetch_match_odds_data.py +0 -113
  62. datafc-1.5.0/datafc/sofascore/fetch_match_stats_data.py +0 -111
  63. datafc-1.5.0/datafc/sofascore/fetch_momentum_data.py +0 -106
  64. datafc-1.5.0/datafc/sofascore/fetch_past_matches_data.py +0 -180
  65. datafc-1.5.0/datafc/sofascore/fetch_player_stats_data.py +0 -132
  66. datafc-1.5.0/datafc/sofascore/fetch_shots_data.py +0 -142
  67. datafc-1.5.0/datafc/sofascore/fetch_squad_data.py +0 -123
  68. datafc-1.5.0/datafc/sofascore/fetch_standings_data.py +0 -110
  69. datafc-1.5.0/datafc/sofascore/fetch_substitutions_data.py +0 -110
  70. datafc-1.5.0/datafc/sofascore/fetch_team_stats_data.py +0 -114
  71. datafc-1.5.0/datafc/utils/__init__.py +0 -0
  72. datafc-1.5.0/datafc/utils/_config.py +0 -25
  73. datafc-1.5.0/datafc/utils/_save_files.py +0 -50
  74. datafc-1.5.0/datafc/utils/_setup_webdriver.py +0 -49
  75. datafc-1.5.0/datafc.egg-info/PKG-INFO +0 -1086
  76. datafc-1.5.0/datafc.egg-info/SOURCES.txt +0 -28
  77. datafc-1.5.0/datafc.egg-info/requires.txt +0 -4
  78. datafc-1.5.0/setup.py +0 -30
  79. {datafc-1.5.0 → datafc-2.0.0}/LICENSE +0 -0
  80. {datafc-1.5.0 → datafc-2.0.0}/datafc.egg-info/dependency_links.txt +0 -0
  81. {datafc-1.5.0 → datafc-2.0.0}/datafc.egg-info/top_level.txt +0 -0
  82. {datafc-1.5.0 → datafc-2.0.0}/setup.cfg +0 -0
datafc-2.0.0/PKG-INFO ADDED
@@ -0,0 +1,934 @@
1
+ Metadata-Version: 2.4
2
+ Name: datafc
3
+ Version: 2.0.0
4
+ Summary: Fetch, process, and export structured football data.
5
+ Author-email: Uraz Akgül <urazdev@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/urazakgul/datafc
8
+ Project-URL: Repository, https://github.com/urazakgul/datafc
9
+ Project-URL: Issues, https://github.com/urazakgul/datafc/issues
10
+ Keywords: football,soccer,data,analytics,sofascore
11
+ Requires-Python: >=3.8
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: pandas>=1.5
15
+ Requires-Dist: curl_cffi>=0.7
16
+ Requires-Dist: openpyxl>=3.0
17
+ Provides-Extra: parquet
18
+ Requires-Dist: pyarrow>=12.0; extra == "parquet"
19
+ Provides-Extra: dev
20
+ Requires-Dist: ruff>=0.4; extra == "dev"
21
+ Requires-Dist: mypy>=1.0; extra == "dev"
22
+ Requires-Dist: pandas-stubs>=2.0; extra == "dev"
23
+ Requires-Dist: pytest>=8.0; extra == "dev"
24
+ Requires-Dist: pytest-mock>=3.12; extra == "dev"
25
+ Dynamic: license-file
26
+
27
+ # datafc v2.0.0
28
+
29
+ ## Overview
30
+
31
+ `datafc` fetches, processes, and exports structured football data. It provides **32 functions** covering tournament metadata, standings, squad rosters, match fixtures, shots, lineups, player heatmaps, odds, and more — all returning clean `pandas` DataFrames ready for analysis. Sofascore is currently the only supported data source.
32
+
33
+ > **Finding IDs:** `tournament_id` and `season_id` can be discovered two ways:
34
+ > - **From the URL:** navigating to a league page on Sofascore (e.g. `sofascore.com/.../trendyol-super-lig/52#id:63814`) shows `tournament_id=52` and `season_id=63814`.
35
+ > - **Programmatically:** use `search_data("super lig", entity_type="tournament")` to find the tournament ID, then `seasons_data(tournament_id)` to list all available seasons.
36
+
37
+ ## What Data Can You Access?
38
+
39
+ ### Discovery
40
+
41
+ | Function | What it returns |
42
+ |---|---|
43
+ | `search_data` | Search for players, teams, tournaments, or managers by name |
44
+ | `seasons_data` | All seasons and their IDs for a given tournament |
45
+
46
+ ### Tournament / Season Metadata
47
+
48
+ | Function | What it returns |
49
+ |---|---|
50
+ | `season_rounds_data` | All rounds/matchweeks in a season |
51
+
52
+ ### League / Season
53
+
54
+ | Function | What it returns |
55
+ |---|---|
56
+ | `standings_data` | League table (Total, Home, Away) with W/D/L, goals, points |
57
+ | `team_data` | Team profiles: stadium, kit colors, manager, venue capacity |
58
+ | `team_stats_data` | 100+ season-level stats per team |
59
+ | `team_transfers_data` | All incoming and outgoing transfers per team |
60
+ | `player_stats_data` | Top player stats per team (goals, assists, key passes, …) |
61
+ | `squad_data` | Squad roster with age, height, market value, contract expiry |
62
+ | `upcoming_matches_data` | Upcoming fixtures for all teams in the standings |
63
+ | `league_player_stats_data` | Wide-format player rankings, sortable by any metric |
64
+
65
+ ### Matchweek
66
+
67
+ | Function | What it returns |
68
+ |---|---|
69
+ | `match_data` | Fixtures for a matchweek: score, status, home/away teams |
70
+ | `match_details_data` | Referee info (name, cards, games) and venue details per match |
71
+ | `match_stats_data` | Aggregate team stats per match (possession, shots, fouls, …) |
72
+ | `match_odds_data` | Pre-match 1/X/2 betting odds |
73
+ | `match_h2h_data` | All-time H2H record between the two teams |
74
+ | `momentum_data` | Minute-by-minute momentum score throughout the match |
75
+ | `pregame_form_data` | Last 5 results, avg rating, league position, and squad value before each match |
76
+ | `shots_data` | Every shot: coordinates, xG, xGOT, outcome, body part |
77
+ | `lineups_data` | Starting XI and substitutes with per-match player stats |
78
+ | `substitutions_data` | Substitution events: minute, player in, player out |
79
+ | `incidents_data` | Goals, cards, and VAR decisions per match |
80
+ | `average_positions_data` | Average pitch position (x/y) per player |
81
+ | `coordinates_data` | Heatmap touch coordinates per player (requires `lineups_data` output) |
82
+ | `goal_networks_data` | Goal-sequence coordinates (passes, shots, goalkeeper position) |
83
+ | `past_matches_data` | Historical H2H results for team pairs in a given matchweek |
84
+
85
+ ### Player
86
+
87
+ | Function | What it returns |
88
+ |---|---|
89
+ | `player_data` | Player profile: age, nationality, height, market value |
90
+ | `player_transfers_data` | Transfer history per player |
91
+ | `player_career_stats_data` | Season-by-season career stats across all competitions (long format) |
92
+ | `player_national_team_data` | National team appearances, goals, and debut date |
93
+ | `player_match_log_data` | Match-by-match in-game statistics across all recorded matches |
94
+
95
+ ### Referee
96
+
97
+ | Function | What it returns |
98
+ |---|---|
99
+ | `referee_stats_data` | Career stats for a referee: games, cards, and per-game averages |
100
+
101
+ > **Coverage:** Any league and season available on Sofascore. For Turkey Super Lig, every season from 1980/81 to the present is accessible.
102
+
103
+ ## Installation
104
+
105
+ ```bash
106
+ pip install datafc
107
+ ```
108
+
109
+ To install the latest development version:
110
+
111
+ ```bash
112
+ pip install git+https://github.com/urazakgul/datafc.git
113
+ ```
114
+
115
+ To upgrade an existing installation:
116
+
117
+ ```bash
118
+ pip install --upgrade datafc
119
+ ```
120
+
121
+ ## Quick Start
122
+
123
+ ```python
124
+ from datafc import (
125
+ standings_data,
126
+ match_data,
127
+ shots_data,
128
+ league_player_stats_data,
129
+ )
130
+
131
+ standings_df = standings_data(tournament_id=52, season_id=77805)
132
+
133
+ match_df = match_data(tournament_id=52, season_id=77805, week_number=1)
134
+
135
+ shots_df = shots_data(match_df=match_df)
136
+
137
+
138
+ top_scorers = league_player_stats_data(
139
+ tournament_id=52,
140
+ season_id=77805,
141
+ order="-goals",
142
+ fields=["goals", "assists", "rating"],
143
+ max_players=20,
144
+ )
145
+ ```
146
+
147
+ ## Async API
148
+
149
+ All functions are also available in async form via `datafc.aio`, designed for fetching multiple weeks or matches in parallel with `asyncio.gather()`.
150
+
151
+ ```python
152
+ import asyncio
153
+ import pandas as pd
154
+ from datafc import aio
155
+
156
+ async def fetch_full_season(tournament_id, season_id, total_weeks):
157
+ tasks = [
158
+ aio.match_data(tournament_id, season_id, week_number=w)
159
+ for w in range(1, total_weeks + 1)
160
+ ]
161
+ frames = await asyncio.gather(*tasks)
162
+ return pd.concat(frames, ignore_index=True)
163
+
164
+ df = asyncio.run(fetch_full_season(52, 63814, total_weeks=38))
165
+ ```
166
+
167
+ Use `return_exceptions=True` when mixing independent coroutines so that one failure does not cancel the rest:
168
+
169
+ ```python
170
+ results = await asyncio.gather(
171
+ aio.match_data(52, 77805, week_number=1),
172
+ aio.standings_data(52, 77805),
173
+ return_exceptions=True,
174
+ )
175
+ for label, result in zip(["match", "standings"], results):
176
+ if isinstance(result, Exception):
177
+ print(f"{label} failed: {result}")
178
+ ```
179
+
180
+ Async functions accept the same parameters as their sync counterparts, including `cache`, `enable_json_export`, `enable_excel_export`, and `output_dir` (see [Caching](#caching) and [Common Parameters](#common-parameters)).
181
+
182
+ ## Common Parameters
183
+
184
+ Every function accepts the following shared parameters:
185
+
186
+ | Parameter | Type | Default | Description |
187
+ |---|---|---|---|
188
+ | `data_source` | `str` | `"sofascore"` | Data source: `"sofascore"` or `"sofavpn"` (use `sofavpn` if Sofascore is blocked in your region) |
189
+ | `rate_limit` | `float` | `2.0` | Maximum requests per second. The limit is **global across all instances** in the same process (sync) or event loop (async) — creating multiple clients does not multiply throughput. |
190
+ | `cache` | `DiskCache` | `None` | Optional `DiskCache` instance for persistent response caching (see [Caching](#caching)). |
191
+ | `enable_json_export` | `bool` | `False` | Save output as a JSON file |
192
+ | `enable_excel_export` | `bool` | `False` | Save output as an Excel file |
193
+ | `output_dir` | `str` | `"."` | Directory for exported files |
194
+
195
+ ## Caching
196
+
197
+ Responses can be cached to disk to avoid redundant API calls across sessions:
198
+
199
+ ```python
200
+ import asyncio
201
+ from datafc import DiskCache
202
+ from datafc import aio
203
+
204
+ cache = DiskCache(cache_dir=".datafc_cache", ttl_hours=24)
205
+
206
+ async def main():
207
+ # First call hits the API; subsequent calls read from disk
208
+ df = await aio.match_data(52, 63814, week_number=1, cache=cache)
209
+
210
+ asyncio.run(main())
211
+ ```
212
+
213
+ `DiskCache` stores responses as JSON files keyed by URL. Cache entries expire after `ttl_hours` (set to `0` to disable expiry). Call `cache.clear()` to invalidate all entries.
214
+
215
+ ## Parquet Export
216
+
217
+ For large datasets (`player_career_stats_data`, `coordinates_data`, `lineups_data`), Parquet is significantly faster to read and write than JSON. Use `save_parquet` directly on any DataFrame returned by a fetch function:
218
+
219
+ ```python
220
+ from datafc import player_career_stats_data, standings_data, squad_data, save_parquet
221
+
222
+ standings_df = standings_data(52, 63814)
223
+ squad_df = squad_data(standings_df=standings_df)
224
+ df = player_career_stats_data(squad_df=squad_df)
225
+
226
+ save_parquet(
227
+ data=df,
228
+ fn_name="player_career_stats_data",
229
+ data_source="sofascore",
230
+ country="Turkey",
231
+ tournament="Trendyol Super Lig",
232
+ season="25/26",
233
+ output_dir="data/processed",
234
+ )
235
+ ```
236
+
237
+ Parquet export requires `pyarrow`. Install it with:
238
+
239
+ ```bash
240
+ pip install datafc[parquet]
241
+ # or
242
+ pip install pyarrow
243
+ ```
244
+
245
+ ## Exception Hierarchy
246
+
247
+ ```
248
+ DataFCError
249
+ ├── InvalidParameterError (bad input: unknown data_source, invalid category, etc.)
250
+ ├── DataNotAvailableError (valid request but no data returned)
251
+ └── APIError (HTTP-level error from the Sofascore API)
252
+ ├── RateLimitError (HTTP 429)
253
+ └── ServerError (HTTP 5xx)
254
+ ```
255
+
256
+ ```python
257
+ from datafc import match_data, DataNotAvailableError, RateLimitError
258
+
259
+ try:
260
+ df = match_data(52, 63814, week_number=99)
261
+ except DataNotAvailableError:
262
+ print("No data for that week.")
263
+ except RateLimitError:
264
+ print("Rate limited. Lower your rate_limit or add delays.")
265
+ ```
266
+
267
+ ## Function Reference
268
+
269
+ ### Discovery
270
+
271
+ #### `search_data`
272
+
273
+ Search for teams, players, tournaments, or managers by name. Useful for finding IDs without visiting the website.
274
+
275
+ ```python
276
+ from datafc import search_data
277
+
278
+ df = search_data("galatasaray", entity_type="team")
279
+ ```
280
+
281
+ Parameters:
282
+
283
+ - `query` (str): Search term.
284
+ - `entity_type` (str, optional): Filter by type: `"team"`, `"player"`, `"tournament"`, or `"manager"`. `None` returns all types.
285
+
286
+ Columns: `entity_id`, `entity_name`, `entity_type`, `score`, `country`, `position`.
287
+
288
+ ---
289
+
290
+ #### `seasons_data`
291
+
292
+ List all available seasons for a tournament. Use this to discover valid `season_id` values before calling other functions.
293
+
294
+ ```python
295
+ from datafc import seasons_data
296
+
297
+ df = seasons_data(tournament_id=52)
298
+ ```
299
+
300
+ Columns: `tournament_id`, `season_id`, `season_name`, `season_year`.
301
+
302
+ ---
303
+
304
+ ### Tournament / Season Metadata
305
+
306
+ #### `season_rounds_data`
307
+
308
+ Fetch all rounds (matchweeks) defined for a season. Useful for iterating over all weeks programmatically.
309
+
310
+ ```python
311
+ from datafc import season_rounds_data
312
+
313
+ df = season_rounds_data(tournament_id=52, season_id=77805)
314
+ ```
315
+
316
+ Columns: `tournament_id`, `season_id`, `round_number`, `slug`, `name`, `prefix`, `is_latest`.
317
+
318
+ ---
319
+
320
+ ### League / Season
321
+
322
+ #### `standings_data`
323
+
324
+ Fetch league standings for Total, Home, and Away categories.
325
+
326
+ ```python
327
+ from datafc import standings_data
328
+
329
+ df = standings_data(tournament_id=52, season_id=77805)
330
+ ```
331
+
332
+ Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `position`, `matches`, `wins`, `draws`, `losses`, `scores_for`, `scores_against`, `points`, `category` (`Total` / `Home` / `Away`).
333
+
334
+ ---
335
+
336
+ #### `team_data`
337
+
338
+ Fetch profile and infrastructure data for every team in the standings: stadium name and capacity, kit colors, and current manager.
339
+
340
+ ```python
341
+ from datafc import standings_data, team_data
342
+
343
+ standings_df = standings_data(52, 63814)
344
+ df = team_data(standings_df=standings_df)
345
+ ```
346
+
347
+ Columns: `country`, `tournament`, `team_id`, `team_name`, `short_name`, `slug`, `national`, `country_name`, `country_id`, `primary_color`, `secondary_color`, `text_color`, `venue_id`, `venue_name`, `venue_capacity`, `venue_city`, `manager_id`, `manager_name`, `manager_country`.
348
+
349
+ Dependencies: `standings_data`
350
+
351
+ ---
352
+
353
+ #### `team_stats_data`
354
+
355
+ Fetch season-level team statistics (long format) for every team in the standings.
356
+
357
+ ```python
358
+ from datafc import standings_data, team_stats_data
359
+
360
+ standings_df = standings_data(52, 63814)
361
+ df = team_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
362
+ ```
363
+
364
+ Parameters:
365
+
366
+ - `standings_df` (DataFrame): Output of `standings_data`.
367
+ - `tournament_id` (int)
368
+ - `season_id` (int)
369
+ - `season` (str, optional): Human-readable season label (e.g. `"24/25"`) used only in the export filename.
370
+
371
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `stat`, `value`.
372
+
373
+ Dependencies: `standings_data`
374
+
375
+ ---
376
+
377
+ #### `team_transfers_data`
378
+
379
+ Fetch all incoming and outgoing transfer records for every team in the standings.
380
+
381
+ ```python
382
+ from datafc import standings_data, team_transfers_data
383
+
384
+ standings_df = standings_data(52, 63814)
385
+ df = team_transfers_data(standings_df=standings_df)
386
+ ```
387
+
388
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `direction` (`in` / `out`), `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type` (`loan` / `permanent` / `free` / `end_of_contract`), `fee`, `fee_currency`.
389
+
390
+ Dependencies: `standings_data`
391
+
392
+ ---
393
+
394
+ #### `player_stats_data`
395
+
396
+ Fetch top player statistics per team (long format). Covers goals, assists, key passes, duels, and more.
397
+
398
+ ```python
399
+ from datafc import standings_data, player_stats_data
400
+
401
+ standings_df = standings_data(52, 63814)
402
+ df = player_stats_data(standings_df=standings_df, tournament_id=52, season_id=77805)
403
+ ```
404
+
405
+ Columns: `country`, `tournament`, `team_name`, `team_id`, `player_name`, `player_id`, `position`, `stat_name`, `stat_value`.
406
+
407
+ Dependencies: `standings_data`
408
+
409
+ ---
410
+
411
+ #### `squad_data`
412
+
413
+ Fetch full squad roster for every team: age, height, nationality, position, preferred foot, contract expiry, and market value.
414
+
415
+ ```python
416
+ from datafc import standings_data, squad_data
417
+
418
+ standings_df = standings_data(52, 63814)
419
+ df = squad_data(standings_df=standings_df)
420
+ ```
421
+
422
+ Columns: `country`, `tournament`, `tournament_id`, `season_id`, `team_name`, `team_id`, `player_name`, `player_id`, `age`, `height`, `player_country`, `position`, `preferred_foot`, `contract_until`, `market_value`, `market_currency`.
423
+
424
+ Dependencies: `standings_data`
425
+
426
+ ---
427
+
428
+ #### `upcoming_matches_data`
429
+
430
+ Fetch upcoming fixtures for all teams currently in the standings.
431
+
432
+ ```python
433
+ from datafc import standings_data, upcoming_matches_data
434
+
435
+ standings_df = standings_data(52, 63814)
436
+ df = upcoming_matches_data(standings_df=standings_df)
437
+ ```
438
+
439
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `start_timestamp`, `status`.
440
+
441
+ > **Note:** Results may include fixtures from cup competitions (e.g. Türkiye Kupası) if a team's next scheduled match is outside the league. Filter by the `tournament` column to restrict to league fixtures only.
442
+
443
+ Dependencies: `standings_data`
444
+
445
+ ---
446
+
447
+ #### `league_player_stats_data`
448
+
449
+ Fetch ranked player statistics across the entire league in wide format (one row per player). Supports pagination, position filtering, and multiple accumulation methods.
450
+
451
+ ```python
452
+ from datafc import league_player_stats_data
453
+
454
+ # Top 50 goalscorers
455
+ df = league_player_stats_data(
456
+ tournament_id=52,
457
+ season_id=77805,
458
+ order="-goals",
459
+ accumulation="total",
460
+ fields=["goals", "assists", "rating", "expectedGoals"],
461
+ max_players=50,
462
+ )
463
+
464
+ # Top midfielders by rating per 90
465
+ df = league_player_stats_data(
466
+ tournament_id=52,
467
+ season_id=77805,
468
+ order="-rating",
469
+ accumulation="per90",
470
+ position="M",
471
+ max_players=20,
472
+ )
473
+ ```
474
+
475
+ Parameters:
476
+
477
+ - `order` (str): Field to sort by, prefix with `-` for descending. Default `"-rating"`.
478
+ - `accumulation` (str): `"total"`, `"per90"`, or `"perMatch"`. Default `"total"`.
479
+ - `fields` (list, optional): Stats columns to include. `None` returns 14 default fields.
480
+ - `position` (str, optional): `"G"`, `"D"`, `"M"`, or `"F"`. `None` includes all positions.
481
+ - `max_players` (int): Maximum players to return (fetches multiple pages if needed). Default `100`.
482
+
483
+ Available fields: `goals`, `assists`, `rating`, `expectedGoals`, `expectedAssists`, `goalsAssistsSum`, `penaltyGoals`, `freeKickGoal`, `scoringFrequency`, `totalShots`, `shotsOnTarget`, `bigChancesCreated`, `bigChancesMissed`, `accuratePasses`, `accuratePassesPercentage`, `keyPasses`, `accurateLongBalls`, `accurateLongBallsPercentage`, `successfulDribbles`, `successfulDribblesPercentage`, `tackles`, `interceptions`, `clearances`, `possessionLost`, `yellowCards`, `redCards`, `saves`, `goalsPrevented`, `minutesPlayed`, `appearances`.
484
+
485
+ Columns: `tournament_id`, `season_id`, `player_name`, `player_id`, `team_name`, `team_id`, + one column per requested field.
486
+
487
+ ---
488
+
489
+ ### Matchweek
490
+
491
+ #### `match_data`
492
+
493
+ Fetch match fixtures and scores for a given matchweek.
494
+
495
+ ```python
496
+ from datafc import match_data
497
+
498
+ match_df = match_data(
499
+ tournament_id=52,
500
+ season_id=77805,
501
+ week_number=21,
502
+ )
503
+
504
+ # UEFA tournaments require additional parameters:
505
+ ucl_df = match_data(
506
+ tournament_id=7,
507
+ season_id=61644,
508
+ week_number=5,
509
+ tournament_type="uefa",
510
+ tournament_stage="round_of_16",
511
+ )
512
+ ```
513
+
514
+ Parameters:
515
+
516
+ - `tournament_id` (int)
517
+ - `season_id` (int)
518
+ - `week_number` (int)
519
+ - `tournament_type` (str, optional): `"uefa"` for UEFA competitions. `None` assumes a domestic league.
520
+ - `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Options: `preliminary_semifinals`, `preliminary_final`, `qualification_round`, `qualification_playoff`, `group_stage_week`, `playoff_round`, `round_of_16`, `quarterfinals`, `semifinals`, `match_for_3rd_place`, `final`.
521
+
522
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `injury_time_1`, `injury_time_2`, `start_timestamp`, `status`, `home_score_current`, `home_score_display`, `home_score_period1`, `home_score_period2`, `home_score_normaltime`, `away_score_current`, `away_score_display`, `away_score_period1`, `away_score_period2`, `away_score_normaltime`.
523
+
524
+ ---
525
+
526
+ #### `match_details_data`
527
+
528
+ Fetch referee and venue details for each match.
529
+
530
+ ```python
531
+ from datafc import match_details_data
532
+
533
+ df = match_details_data(match_df=match_df)
534
+ ```
535
+
536
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `referee_id`, `referee_name`, `referee_country`, `referee_yellow_cards`, `referee_red_cards`, `referee_games`, `venue_id`, `venue_name`, `venue_city`, `venue_country`, `venue_capacity`.
537
+
538
+ Dependencies: `match_data`
539
+
540
+ ---
541
+
542
+ #### `match_stats_data`
543
+
544
+ Fetch aggregate team statistics (possession, shots, passes, etc.) for each match.
545
+
546
+ ```python
547
+ from datafc import match_stats_data
548
+
549
+ df = match_stats_data(match_df=match_df)
550
+ ```
551
+
552
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `period` (`ALL` / `1ST` / `2ND`), `group_name`, `stat_name`, `home_team_stat`, `away_team_stat`.
553
+
554
+ Dependencies: `match_data`
555
+
556
+ ---
557
+
558
+ #### `match_odds_data`
559
+
560
+ Fetch pre-match and live betting odds.
561
+
562
+ ```python
563
+ from datafc import match_odds_data
564
+
565
+ df = match_odds_data(match_df=match_df)
566
+ ```
567
+
568
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `market_name`, `market_id`, `is_live`, `choice_name`, `initial_fractional_value`, `current_fractional_value`, `winning`, `change`.
569
+
570
+ Dependencies: `match_data`
571
+
572
+ ---
573
+
574
+ #### `match_h2h_data`
575
+
576
+ Fetch all-time head-to-head win/draw/loss record between the two teams in each match.
577
+
578
+ ```python
579
+ from datafc import match_h2h_data
580
+
581
+ df = match_h2h_data(match_df=match_df)
582
+ ```
583
+
584
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `home_wins`, `away_wins`, `draws`.
585
+
586
+ Dependencies: `match_data`
587
+
588
+ ---
589
+
590
+ #### `momentum_data`
591
+
592
+ Fetch minute-by-minute match momentum values (positive = home advantage, negative = away).
593
+
594
+ ```python
595
+ from datafc import momentum_data
596
+
597
+ df = momentum_data(match_df=match_df)
598
+ ```
599
+
600
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `minute`, `value`.
601
+
602
+ Dependencies: `match_data`
603
+
604
+ ---
605
+
606
+ #### `pregame_form_data`
607
+
608
+ Fetch pre-game form context for each match: last 5 results, average rating, league position, and squad market value for both the home and away team.
609
+
610
+ ```python
611
+ from datafc import pregame_form_data
612
+
613
+ df = pregame_form_data(match_df=match_df)
614
+ ```
615
+
616
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team` (`home` / `away`), `avg_rating`, `position`, `value`, `form_1`, `form_2`, `form_3`, `form_4`, `form_5` (most recent result last).
617
+
618
+ Dependencies: `match_data`
619
+
620
+ ---
621
+
622
+ #### `shots_data`
623
+
624
+ Fetch all shot events with coordinates, xG, xGOT, body part, situation, and goal mouth location.
625
+
626
+ ```python
627
+ from datafc import shots_data
628
+
629
+ df = shots_data(match_df=match_df)
630
+ ```
631
+
632
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `player_position`, `is_home`, `incident_type`, `shot_type`, `body_part`, `goal_type`, `situation`, `goal_mouth_location`, `xg`, `xgot`, `player_coordinates_x`, `player_coordinates_y`, `player_coordinates_z`, `goal_mouth_coordinates_x`, `goal_mouth_coordinates_y`, `goal_mouth_coordinates_z`, `draw_start_x`, `draw_start_y`, `draw_end_x`, `draw_end_y`, `draw_goal_x`, `draw_goal_y`, `block_coordinates_x`, `block_coordinates_y`, `block_coordinates_z`, `time`, `time_seconds`, `added_time`.
633
+
634
+ Dependencies: `match_data`
635
+
636
+ ---
637
+
638
+ #### `lineups_data`
639
+
640
+ Fetch player lineup details and per-match player statistics (long format: one row per player per stat).
641
+
642
+ ```python
643
+ from datafc import lineups_data
644
+
645
+ df = lineups_data(match_df=match_df)
646
+ ```
647
+
648
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_name`, `player_id`, `stat_name`, `stat_value`.
649
+
650
+ Dependencies: `match_data`
651
+
652
+ ---
653
+
654
+ #### `substitutions_data`
655
+
656
+ Fetch substitution events with player names and minute.
657
+
658
+ ```python
659
+ from datafc import substitutions_data
660
+
661
+ df = substitutions_data(match_df=match_df)
662
+ ```
663
+
664
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `time`, `player_in`, `player_in_id`, `player_out`, `player_out_id`.
665
+
666
+ Dependencies: `match_data`
667
+
668
+ ---
669
+
670
+ #### `incidents_data`
671
+
672
+ Fetch goal, card, and VAR decision events for each match.
673
+
674
+ ```python
675
+ from datafc import incidents_data
676
+
677
+ df = incidents_data(match_df=match_df)
678
+ ```
679
+
680
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `incident_type`, `incident_class`, `time`, `added_time`, `is_home`, `player_id`, `player_name`, `home_score`, `away_score`, `goal_from`, `card_reason`, `rescinded`, `var_confirmed`.
681
+
682
+ > **Note on `var_confirmed`:** `True` = VAR reviewed and upheld the on-field decision. `False` = VAR reviewed and overturned the decision. `None` = no VAR review occurred for that incident.
683
+
684
+ Dependencies: `match_data`
685
+
686
+ ---
687
+
688
+ #### `average_positions_data`
689
+
690
+ Fetch each player's average X/Y position on the pitch during a match. Coordinates are on a 0–100 scale. Useful for formation and tactical analysis.
691
+
692
+ ```python
693
+ from datafc import average_positions_data
694
+
695
+ df = average_positions_data(match_df=match_df)
696
+ ```
697
+
698
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `home_team`, `away_team`, `side` (`home` / `away`), `player_name`, `player_id`, `position`, `jersey_number`, `average_x`, `average_y`, `points_count`.
699
+
700
+ Dependencies: `match_data`
701
+
702
+ ---
703
+
704
+ #### `coordinates_data`
705
+
706
+ Fetch heatmap touch coordinates (X/Y) for each player. Requires `lineups_data` output as input.
707
+
708
+ ```python
709
+ from datafc import lineups_data, coordinates_data
710
+
711
+ lineups_df = lineups_data(match_df=match_df)
712
+ df = coordinates_data(lineups_df=lineups_df)
713
+ ```
714
+
715
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `team`, `player_id`, `player_name`, `x`, `y`.
716
+
717
+ > **Note:** Players with no heatmap data (short substitute appearances, 404 or 403 responses) are silently skipped. The function raises `DataNotAvailableError` only if **no** player yields any coordinates.
718
+
719
+ Dependencies: `lineups_data`
720
+
721
+ ---
722
+
723
+ #### `goal_networks_data`
724
+
725
+ Fetch coordinate data for each action in a goal-scoring sequence (passes, shots, goalkeeper position).
726
+
727
+ ```python
728
+ from datafc import goal_networks_data
729
+
730
+ df = goal_networks_data(match_df=match_df)
731
+ ```
732
+
733
+ Columns: `country`, `tournament`, `season`, `week`, `game_id`, `player_name`, `player_id`, `event_type`, `player_x`, `player_y`, `pass_end_x`, `pass_end_y`, `is_assist`, `id`, `goalkeeper_x`, `goalkeeper_y`, `goal_shot_x`, `goal_shot_y`, `goal_mouth_x`, `goal_mouth_y`, `goalkeeper_name`, `goalkeeper_id`.
734
+
735
+ Dependencies: `match_data`
736
+
737
+ ---
738
+
739
+ #### `past_matches_data`
740
+
741
+ Fetch the complete head-to-head match history for each team pair playing in a given matchweek.
742
+
743
+ ```python
744
+ from datafc import past_matches_data
745
+
746
+ df = past_matches_data(
747
+ tournament_id=52,
748
+ season_id=77805,
749
+ week_number=21,
750
+ )
751
+ ```
752
+
753
+ Parameters:
754
+
755
+ - `tournament_id` (int)
756
+ - `season_id` (int)
757
+ - `week_number` (int)
758
+ - `tournament_type` (str, optional): `"uefa"` for UEFA competitions.
759
+ - `tournament_stage` (str, optional): Required when `tournament_type="uefa"`. Same options as `match_data`.
760
+
761
+ Same columns as `match_data`.
762
+
763
+ ---
764
+
765
+ ### Player
766
+
767
+ #### `player_data`
768
+
769
+ Fetch profile data for each player in a squad: nationality, date of birth, height, weight, preferred foot, and market value.
770
+
771
+ ```python
772
+ from datafc import standings_data, squad_data, player_data
773
+
774
+ standings_df = standings_data(52, 63814)
775
+ squad_df = squad_data(standings_df=standings_df)
776
+ df = player_data(squad_df=squad_df)
777
+ ```
778
+
779
+ Columns: `player_id`, `player_name`, `date_of_birth`, `age`, `nationality`, `nationality_id`, `height`, `weight`, `preferred_foot`, `jersey_number`, `position`, `market_value`, `market_currency`, `team_id`, `team_name`.
780
+
781
+ Dependencies: `squad_data`
782
+
783
+ ---
784
+
785
+ #### `player_transfers_data`
786
+
787
+ Fetch transfer history for each player in a squad.
788
+
789
+ ```python
790
+ from datafc import standings_data, squad_data, player_transfers_data
791
+
792
+ standings_df = standings_data(52, 63814)
793
+ squad_df = squad_data(standings_df=standings_df)
794
+ df = player_transfers_data(squad_df=squad_df)
795
+ ```
796
+
797
+ Columns: `player_id`, `player_name`, `transfer_date`, `from_team_id`, `from_team_name`, `to_team_id`, `to_team_name`, `transfer_type`, `fee`, `fee_currency`.
798
+
799
+ Dependencies: `squad_data`
800
+
801
+ ---
802
+
803
+ #### `player_career_stats_data`
804
+
805
+ Fetch season-by-season career statistics across all competitions for each player in a squad (long format: one row per player-season-stat combination). Only `overall` entries are included; home/away splits are excluded.
806
+
807
+ ```python
808
+ from datafc import standings_data, squad_data, player_career_stats_data
809
+
810
+ standings_df = standings_data(52, 63814)
811
+ squad_df = squad_data(standings_df=standings_df)
812
+ df = player_career_stats_data(squad_df=squad_df)
813
+ ```
814
+
815
+ Columns: `player_id`, `player_name`, `tournament_id`, `tournament_name`, `season_id`, `season_name`, `team_id`, `team_name`, `stat`, `value`.
816
+
817
+ Dependencies: `squad_data`
818
+
819
+ ---
820
+
821
+ #### `player_national_team_data`
822
+
823
+ Fetch national team career statistics (appearances, goals, debut) for each player in a squad.
824
+
825
+ ```python
826
+ from datafc import standings_data, squad_data, player_national_team_data
827
+
828
+ standings_df = standings_data(52, 63814)
829
+ squad_df = squad_data(standings_df=standings_df)
830
+ df = player_national_team_data(squad_df=squad_df)
831
+ ```
832
+
833
+ Columns: `player_id`, `player_name`, `team_id`, `team_name`, `team_code`, `appearances`, `goals`, `debut_timestamp`.
834
+
835
+ Dependencies: `squad_data`
836
+
837
+ ---
838
+
839
+ #### `player_match_log_data`
840
+
841
+ Fetch match-by-match in-game statistics for each player in a squad across all recorded matches (wide format: one row per player per match).
842
+
843
+ ```python
844
+ from datafc import standings_data, squad_data, player_match_log_data
845
+
846
+ standings_df = standings_data(52, 63814)
847
+ squad_df = squad_data(standings_df=standings_df)
848
+ df = player_match_log_data(squad_df=squad_df)
849
+ ```
850
+
851
+ Columns: `player_id`, `player_name`, `game_id`, `start_timestamp`, `tournament`, `season`, `home_team`, `home_team_id`, `away_team`, `away_team_id`, `home_score`, `away_score`, `status`, + all available in-match stat columns (e.g. `goals`, `assists`, `rating`, `minutesPlayed`, …).
852
+
853
+ Dependencies: `squad_data`
854
+
855
+ ---
856
+
857
+ ### Referee
858
+
859
+ #### `referee_stats_data`
860
+
861
+ Fetch career statistics for a referee. The `referee_id` can be obtained from the `referee_id` column in `match_details_data()` output.
862
+
863
+ ```python
864
+ from datafc import referee_stats_data
865
+
866
+ df = referee_stats_data(referee_id=12345)
867
+ ```
868
+
869
+ Parameters:
870
+
871
+ - `referee_id` (int): The unique Sofascore identifier for the referee.
872
+
873
+ Columns: `referee_id`, `referee_name`, `tournament_id`, `tournament_name`, `stat`, `value`. One row per stat per tournament. Covers appearances, yellow cards, red cards, second yellow cards, and penalties.
874
+
875
+ ---
876
+
877
+ ## Changelog
878
+
879
+ ### v2.0.0
880
+
881
+ **Chrome / Selenium removed — no browser required.** datafc now makes direct HTTP requests. Installation is simpler, and fetches are significantly faster than before.
882
+
883
+ **18 new functions.** `seasons_data`, `season_rounds_data`, `team_data`, `team_transfers_data`, `upcoming_matches_data`, `league_player_stats_data`, `match_details_data`, `match_h2h_data`, `pregame_form_data`, `incidents_data`, `average_positions_data`, `player_data`, `player_transfers_data`, `player_career_stats_data`, `player_national_team_data`, `player_match_log_data`, `referee_stats_data`, `search_data`.
884
+
885
+ **Async API.** All 32 functions are available in `datafc.aio` for parallel fetching with `asyncio.gather()`, letting you download an entire matchweek's worth of data concurrently.
886
+
887
+ **Disk caching.** Pass a `DiskCache` instance to any function to avoid re-fetching data you've already downloaded. Cached responses are returned instantly on subsequent calls.
888
+
889
+ **Automatic rate limiting and retries.** All functions accept a `rate_limit` parameter. Temporary failures (rate limits, server errors) are retried automatically without any extra code on your side.
890
+
891
+ **New Parquet export.** Use `save_parquet()` on any DataFrame returned by a fetch function to save output as `.parquet`. Requires `pyarrow` (`pip install datafc[parquet]`).
892
+
893
+ **Heatmap fetch no longer crashes on partial access errors.** `coordinates_data` now skips players that the API refuses to serve and returns data for everyone else. The function only raises an error if no player yields any coordinates at all.
894
+
895
+ **Exported filenames are human-readable.** JSON, Excel, and Parquet files now use the league name (e.g. `trendyol_superlig_shots_data.json`) instead of raw numeric IDs. Turkish and other non-ASCII characters are transliterated correctly — `Şampiyonlar` becomes `sampiyonlar`, not `ampiyonlar`.
896
+
897
+ **Valid JSON output.** Exported `.json` files no longer contain invalid `NaN` literals; they use `null` instead, making them compatible with every JSON parser and spreadsheet tool.
898
+
899
+ **Cleaner numeric columns.** Score fields, ratings, and market values that were previously returned as strings or empty strings are now proper numeric types (`null` when missing, not `""`).
900
+
901
+ **Clearer errors.** When something goes wrong, the exception type tells you what happened: data not available, invalid parameter, API access error, rate limit hit, or server error.
902
+
903
+ ### v1.5.0
904
+
905
+ - Added `team_stats_data`, `player_stats_data`, and `squad_data`.
906
+
907
+ ### v1.4.0
908
+
909
+ - Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` for UEFA competitions (UCL, UEL, UECL, UNL).
910
+
911
+ ### v1.3.0
912
+
913
+ - Added `past_matches_data`.
914
+
915
+ ### v1.2.0
916
+
917
+ - Added match score columns to `match_data`.
918
+
919
+ ### v1.1.0
920
+
921
+ - Added 4 new columns to `match_data`.
922
+ - Added `data_source` parameter to export functions.
923
+
924
+ ### v1.0.0
925
+
926
+ - Initial release. Selenium-based Sofascore scraper with JSON/Excel export.
927
+
928
+ ## License
929
+
930
+ MIT License
931
+
932
+ ## Contributing
933
+
934
+ Bug reports, feature requests, and pull requests are welcome at [github.com/urazakgul/datafc](https://github.com/urazakgul/datafc/issues).