datafc 1.1.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datafc-1.1.0 → datafc-1.3.0}/PKG-INFO +95 -25
- {datafc-1.1.0 → datafc-1.3.0}/README.md +94 -24
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/__init__.py +3 -1
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_match_data.py +11 -1
- datafc-1.3.0/datafc/sofascore/fetch_past_matches_data.py +142 -0
- datafc-1.3.0/datafc/utils/_config.py +8 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc.egg-info/PKG-INFO +95 -25
- {datafc-1.1.0 → datafc-1.3.0}/datafc.egg-info/SOURCES.txt +1 -0
- {datafc-1.1.0 → datafc-1.3.0}/setup.py +1 -1
- datafc-1.1.0/datafc/utils/_config.py +0 -6
- {datafc-1.1.0 → datafc-1.3.0}/LICENSE +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/__init__.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_coordinates_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_goal_networks_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_lineups_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_match_odds_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_match_stats_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_momentum_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_shots_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_standings_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/sofascore/fetch_substitutions_data.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/utils/__init__.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/utils/_save_files.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc/utils/_setup_webdriver.py +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc.egg-info/dependency_links.txt +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc.egg-info/requires.txt +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/datafc.egg-info/top_level.txt +0 -0
- {datafc-1.1.0 → datafc-1.3.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafc
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: A scalable Python library for fetching, processing, and exporting structured football match data.
|
|
5
5
|
Home-page: https://github.com/urazakgul/datafc
|
|
6
6
|
Author: Uraz Akgül
|
|
@@ -14,7 +14,7 @@ Requires-Python: >=3.8
|
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
16
|
|
|
17
|
-
# datafc v1.
|
|
17
|
+
# datafc v1.3.0
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
@@ -53,7 +53,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
53
53
|
To install a specific version of `datafc`, use:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install datafc==1.
|
|
56
|
+
pip install datafc==1.3.0
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -105,6 +105,7 @@ from datafc.sofascore import (
|
|
|
105
105
|
match_odds_data,
|
|
106
106
|
match_stats_data,
|
|
107
107
|
momentum_data,
|
|
108
|
+
past_matches_data,
|
|
108
109
|
lineups_data,
|
|
109
110
|
coordinates_data,
|
|
110
111
|
substitutions_data,
|
|
@@ -140,20 +141,19 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
140
141
|
|
|
141
142
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
142
143
|
|
|
143
|
-
Exception: `standings_data`
|
|
144
|
+
Exception: `standings_data` and `past_matches_data`
|
|
144
145
|
|
|
145
|
-
Unlike
|
|
146
|
+
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
146
147
|
|
|
147
148
|
### Match Data
|
|
148
149
|
|
|
149
150
|
#### `match_data`
|
|
150
151
|
|
|
151
|
-
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
152
|
+
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
152
153
|
|
|
153
154
|
Example Usage:
|
|
154
155
|
|
|
155
156
|
```python
|
|
156
|
-
# Fetch match data for a specific tournament, season, and week
|
|
157
157
|
match_df = match_data(
|
|
158
158
|
tournament_id=52,
|
|
159
159
|
season_id=63814,
|
|
@@ -193,6 +193,16 @@ The returned DataFrame includes the following columns:
|
|
|
193
193
|
* `injury_time_2`: Added injury time in the second half.
|
|
194
194
|
* `start_timestamp`: The start time of the match.
|
|
195
195
|
* `status`: The current status of the match.
|
|
196
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
197
|
+
* `home_score_display`: The displayed score of the home team.
|
|
198
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
199
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
200
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
201
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
202
|
+
* `away_score_display`: The displayed score of the away team.
|
|
203
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
204
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
205
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
196
206
|
|
|
197
207
|
Dependencies:
|
|
198
208
|
|
|
@@ -200,12 +210,11 @@ Dependencies:
|
|
|
200
210
|
|
|
201
211
|
#### `match_odds_data`
|
|
202
212
|
|
|
203
|
-
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
213
|
+
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
204
214
|
|
|
205
215
|
Example Usage:
|
|
206
216
|
|
|
207
217
|
```python
|
|
208
|
-
# Fetch match odds data
|
|
209
218
|
match_odds_df = match_odds_data(
|
|
210
219
|
match_df=match_df,
|
|
211
220
|
data_source="sofascore",
|
|
@@ -248,12 +257,11 @@ Dependencies:
|
|
|
248
257
|
|
|
249
258
|
#### `match_stats_data`
|
|
250
259
|
|
|
251
|
-
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
260
|
+
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
252
261
|
|
|
253
262
|
Example Usage:
|
|
254
263
|
|
|
255
264
|
```python
|
|
256
|
-
# Fetch match statistics data
|
|
257
265
|
match_stats_df = match_stats_data(
|
|
258
266
|
match_df=match_df,
|
|
259
267
|
data_source="sofascore",
|
|
@@ -293,12 +301,11 @@ Dependencies:
|
|
|
293
301
|
|
|
294
302
|
#### `momentum_data`
|
|
295
303
|
|
|
296
|
-
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
304
|
+
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
297
305
|
|
|
298
306
|
Example Usage:
|
|
299
307
|
|
|
300
308
|
```python
|
|
301
|
-
# Fetch momentum data
|
|
302
309
|
momentum_df = momentum_data(
|
|
303
310
|
match_df=match_df,
|
|
304
311
|
data_source="sofascore",
|
|
@@ -333,16 +340,76 @@ Dependencies:
|
|
|
333
340
|
|
|
334
341
|
* Requires `match_data` output as `match_df`.
|
|
335
342
|
|
|
343
|
+
#### `past_matches_data`
|
|
344
|
+
|
|
345
|
+
The `past_matches_data` function fetches past match data for a specified tournament, season, and week number.
|
|
346
|
+
|
|
347
|
+
Example Usage:
|
|
348
|
+
|
|
349
|
+
```python
|
|
350
|
+
past_matches_df = past_matches_data(
|
|
351
|
+
tournament_id=52,
|
|
352
|
+
season_id=63814,
|
|
353
|
+
week_number=21,
|
|
354
|
+
data_source="sofascore",
|
|
355
|
+
enable_json_export=True,
|
|
356
|
+
enable_excel_export=True
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
print(past_matches_df)
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
Parameters:
|
|
363
|
+
|
|
364
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
365
|
+
* `season_id` (int): The unique identifier for the season.
|
|
366
|
+
* `week_number` (int): The matchweek number within the season.
|
|
367
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `sofascore`.
|
|
368
|
+
* `element_load_timeout` (int): The maximum time (in seconds) to wait for the API response. Defaults to 10.
|
|
369
|
+
* `enable_json_export` (bool): If `True`, exports the fetched data as a JSON file. Defaults to `False`.
|
|
370
|
+
* `enable_excel_export` (bool): If `True`, exports the fetched data as an Excel file. Defaults to `False`.
|
|
371
|
+
|
|
372
|
+
Data Structure:
|
|
373
|
+
|
|
374
|
+
The returned DataFrame includes the following columns:
|
|
375
|
+
|
|
376
|
+
* `country`: The country where the tournament is held.
|
|
377
|
+
* `tournament`: The name of the tournament.
|
|
378
|
+
* `season`: The season year.
|
|
379
|
+
* `week`: The matchweek number.
|
|
380
|
+
* `game_id`: The unique identifier for the match.
|
|
381
|
+
* `home_team`: The name of the home team.
|
|
382
|
+
* `home_team_id`: The unique identifier for the home team.
|
|
383
|
+
* `away_team`: The name of the away team.
|
|
384
|
+
* `away_team_id`: The unique identifier for the away team.
|
|
385
|
+
* `injury_time_1`: Added injury time in the first half.
|
|
386
|
+
* `injury_time_2`: Added injury time in the second half.
|
|
387
|
+
* `start_timestamp`: The start time of the match.
|
|
388
|
+
* `status`: The current status of the match.
|
|
389
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
390
|
+
* `home_score_display`: The displayed score of the home team.
|
|
391
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
392
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
393
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
394
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
395
|
+
* `away_score_display`: The displayed score of the away team.
|
|
396
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
397
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
398
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
399
|
+
|
|
400
|
+
Dependencies:
|
|
401
|
+
|
|
402
|
+
* No prior function dependency required.
|
|
403
|
+
|
|
336
404
|
### Player Data
|
|
337
405
|
|
|
338
406
|
#### `lineups_data`
|
|
339
407
|
|
|
340
|
-
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
408
|
+
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
341
409
|
|
|
342
410
|
Example Usage:
|
|
343
411
|
|
|
344
412
|
```python
|
|
345
|
-
# Fetch lineups data based on match data
|
|
346
413
|
lineups_df = lineups_data(
|
|
347
414
|
match_df=match_df,
|
|
348
415
|
data_source="sofascore",
|
|
@@ -382,12 +449,11 @@ Dependencies:
|
|
|
382
449
|
|
|
383
450
|
#### `coordinates_data`
|
|
384
451
|
|
|
385
|
-
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
452
|
+
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
386
453
|
|
|
387
454
|
Example Usage:
|
|
388
455
|
|
|
389
456
|
```python
|
|
390
|
-
# Fetch coordinates data
|
|
391
457
|
coordinates_df = coordinates_data(
|
|
392
458
|
lineups_df=lineups_df,
|
|
393
459
|
data_source="sofascore",
|
|
@@ -427,12 +493,11 @@ Dependencies:
|
|
|
427
493
|
|
|
428
494
|
#### `substitutions_data`
|
|
429
495
|
|
|
430
|
-
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
496
|
+
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
431
497
|
|
|
432
498
|
Example Usage:
|
|
433
499
|
|
|
434
500
|
```python
|
|
435
|
-
# Fetch substitution data
|
|
436
501
|
substitutions_df = substitutions_data(
|
|
437
502
|
match_df=match_df,
|
|
438
503
|
data_source="sofascore",
|
|
@@ -474,12 +539,11 @@ Dependencies:
|
|
|
474
539
|
|
|
475
540
|
#### `goal_networks_data`
|
|
476
541
|
|
|
477
|
-
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
542
|
+
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
478
543
|
|
|
479
544
|
Example Usage:
|
|
480
545
|
|
|
481
546
|
```python
|
|
482
|
-
# Fetch goal networks data
|
|
483
547
|
goal_networks_df = goal_networks_data(
|
|
484
548
|
match_df=match_df,
|
|
485
549
|
data_source="sofascore",
|
|
@@ -531,12 +595,11 @@ Dependencies:
|
|
|
531
595
|
|
|
532
596
|
#### `shots_data`
|
|
533
597
|
|
|
534
|
-
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
598
|
+
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
535
599
|
|
|
536
600
|
Example Usage:
|
|
537
601
|
|
|
538
602
|
```python
|
|
539
|
-
# Fetch shot data
|
|
540
603
|
shots_df = shots_data(
|
|
541
604
|
match_df=match_df,
|
|
542
605
|
data_source="sofascore",
|
|
@@ -598,12 +661,11 @@ Dependencies:
|
|
|
598
661
|
|
|
599
662
|
#### `standings_data`
|
|
600
663
|
|
|
601
|
-
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
664
|
+
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
602
665
|
|
|
603
666
|
Example Usage:
|
|
604
667
|
|
|
605
668
|
```python
|
|
606
|
-
# Fetch league standings
|
|
607
669
|
standings_df = standings_data(
|
|
608
670
|
tournament_id=52,
|
|
609
671
|
season_id=63814,
|
|
@@ -648,10 +710,18 @@ Dependencies:
|
|
|
648
710
|
|
|
649
711
|
## Changelog
|
|
650
712
|
|
|
713
|
+
* v1.3.0
|
|
714
|
+
* Added `past_matches_data` function to fetch historical match data.
|
|
715
|
+
|
|
716
|
+
* v1.2.0
|
|
717
|
+
* Added match score columns to `match_data`
|
|
718
|
+
|
|
651
719
|
* v1.1.0
|
|
652
720
|
* Added 4 new columns to `match_data`
|
|
653
721
|
* Added `data_source` parameter to `save_json` and `save_excel` for including the source in file names
|
|
654
722
|
|
|
723
|
+
* v1.0.1 (Cancelled, not used)
|
|
724
|
+
|
|
655
725
|
* v1.0.0
|
|
656
726
|
* Initial release of `datafc`
|
|
657
727
|
* Fetching match data using Selenium WebDriver
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# datafc v1.
|
|
1
|
+
# datafc v1.3.0
|
|
2
2
|
|
|
3
3
|
## Overview
|
|
4
4
|
|
|
@@ -37,7 +37,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
37
37
|
To install a specific version of `datafc`, use:
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
|
-
pip install datafc==1.
|
|
40
|
+
pip install datafc==1.3.0
|
|
41
41
|
```
|
|
42
42
|
|
|
43
43
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -89,6 +89,7 @@ from datafc.sofascore import (
|
|
|
89
89
|
match_odds_data,
|
|
90
90
|
match_stats_data,
|
|
91
91
|
momentum_data,
|
|
92
|
+
past_matches_data,
|
|
92
93
|
lineups_data,
|
|
93
94
|
coordinates_data,
|
|
94
95
|
substitutions_data,
|
|
@@ -124,20 +125,19 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
124
125
|
|
|
125
126
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
126
127
|
|
|
127
|
-
Exception: `standings_data`
|
|
128
|
+
Exception: `standings_data` and `past_matches_data`
|
|
128
129
|
|
|
129
|
-
Unlike
|
|
130
|
+
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
130
131
|
|
|
131
132
|
### Match Data
|
|
132
133
|
|
|
133
134
|
#### `match_data`
|
|
134
135
|
|
|
135
|
-
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
136
|
+
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
136
137
|
|
|
137
138
|
Example Usage:
|
|
138
139
|
|
|
139
140
|
```python
|
|
140
|
-
# Fetch match data for a specific tournament, season, and week
|
|
141
141
|
match_df = match_data(
|
|
142
142
|
tournament_id=52,
|
|
143
143
|
season_id=63814,
|
|
@@ -177,6 +177,16 @@ The returned DataFrame includes the following columns:
|
|
|
177
177
|
* `injury_time_2`: Added injury time in the second half.
|
|
178
178
|
* `start_timestamp`: The start time of the match.
|
|
179
179
|
* `status`: The current status of the match.
|
|
180
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
181
|
+
* `home_score_display`: The displayed score of the home team.
|
|
182
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
183
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
184
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
185
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
186
|
+
* `away_score_display`: The displayed score of the away team.
|
|
187
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
188
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
189
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
180
190
|
|
|
181
191
|
Dependencies:
|
|
182
192
|
|
|
@@ -184,12 +194,11 @@ Dependencies:
|
|
|
184
194
|
|
|
185
195
|
#### `match_odds_data`
|
|
186
196
|
|
|
187
|
-
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
197
|
+
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
188
198
|
|
|
189
199
|
Example Usage:
|
|
190
200
|
|
|
191
201
|
```python
|
|
192
|
-
# Fetch match odds data
|
|
193
202
|
match_odds_df = match_odds_data(
|
|
194
203
|
match_df=match_df,
|
|
195
204
|
data_source="sofascore",
|
|
@@ -232,12 +241,11 @@ Dependencies:
|
|
|
232
241
|
|
|
233
242
|
#### `match_stats_data`
|
|
234
243
|
|
|
235
|
-
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
244
|
+
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
236
245
|
|
|
237
246
|
Example Usage:
|
|
238
247
|
|
|
239
248
|
```python
|
|
240
|
-
# Fetch match statistics data
|
|
241
249
|
match_stats_df = match_stats_data(
|
|
242
250
|
match_df=match_df,
|
|
243
251
|
data_source="sofascore",
|
|
@@ -277,12 +285,11 @@ Dependencies:
|
|
|
277
285
|
|
|
278
286
|
#### `momentum_data`
|
|
279
287
|
|
|
280
|
-
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
288
|
+
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
281
289
|
|
|
282
290
|
Example Usage:
|
|
283
291
|
|
|
284
292
|
```python
|
|
285
|
-
# Fetch momentum data
|
|
286
293
|
momentum_df = momentum_data(
|
|
287
294
|
match_df=match_df,
|
|
288
295
|
data_source="sofascore",
|
|
@@ -317,16 +324,76 @@ Dependencies:
|
|
|
317
324
|
|
|
318
325
|
* Requires `match_data` output as `match_df`.
|
|
319
326
|
|
|
327
|
+
#### `past_matches_data`
|
|
328
|
+
|
|
329
|
+
The `past_matches_data` function fetches past match data for a specified tournament, season, and week number.
|
|
330
|
+
|
|
331
|
+
Example Usage:
|
|
332
|
+
|
|
333
|
+
```python
|
|
334
|
+
past_matches_df = past_matches_data(
|
|
335
|
+
tournament_id=52,
|
|
336
|
+
season_id=63814,
|
|
337
|
+
week_number=21,
|
|
338
|
+
data_source="sofascore",
|
|
339
|
+
enable_json_export=True,
|
|
340
|
+
enable_excel_export=True
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
print(past_matches_df)
|
|
344
|
+
```
|
|
345
|
+
|
|
346
|
+
Parameters:
|
|
347
|
+
|
|
348
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
349
|
+
* `season_id` (int): The unique identifier for the season.
|
|
350
|
+
* `week_number` (int): The matchweek number within the season.
|
|
351
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `sofascore`.
|
|
352
|
+
* `element_load_timeout` (int): The maximum time (in seconds) to wait for the API response. Defaults to 10.
|
|
353
|
+
* `enable_json_export` (bool): If `True`, exports the fetched data as a JSON file. Defaults to `False`.
|
|
354
|
+
* `enable_excel_export` (bool): If `True`, exports the fetched data as an Excel file. Defaults to `False`.
|
|
355
|
+
|
|
356
|
+
Data Structure:
|
|
357
|
+
|
|
358
|
+
The returned DataFrame includes the following columns:
|
|
359
|
+
|
|
360
|
+
* `country`: The country where the tournament is held.
|
|
361
|
+
* `tournament`: The name of the tournament.
|
|
362
|
+
* `season`: The season year.
|
|
363
|
+
* `week`: The matchweek number.
|
|
364
|
+
* `game_id`: The unique identifier for the match.
|
|
365
|
+
* `home_team`: The name of the home team.
|
|
366
|
+
* `home_team_id`: The unique identifier for the home team.
|
|
367
|
+
* `away_team`: The name of the away team.
|
|
368
|
+
* `away_team_id`: The unique identifier for the away team.
|
|
369
|
+
* `injury_time_1`: Added injury time in the first half.
|
|
370
|
+
* `injury_time_2`: Added injury time in the second half.
|
|
371
|
+
* `start_timestamp`: The start time of the match.
|
|
372
|
+
* `status`: The current status of the match.
|
|
373
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
374
|
+
* `home_score_display`: The displayed score of the home team.
|
|
375
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
376
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
377
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
378
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
379
|
+
* `away_score_display`: The displayed score of the away team.
|
|
380
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
381
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
382
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
383
|
+
|
|
384
|
+
Dependencies:
|
|
385
|
+
|
|
386
|
+
* No prior function dependency required.
|
|
387
|
+
|
|
320
388
|
### Player Data
|
|
321
389
|
|
|
322
390
|
#### `lineups_data`
|
|
323
391
|
|
|
324
|
-
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
392
|
+
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
325
393
|
|
|
326
394
|
Example Usage:
|
|
327
395
|
|
|
328
396
|
```python
|
|
329
|
-
# Fetch lineups data based on match data
|
|
330
397
|
lineups_df = lineups_data(
|
|
331
398
|
match_df=match_df,
|
|
332
399
|
data_source="sofascore",
|
|
@@ -366,12 +433,11 @@ Dependencies:
|
|
|
366
433
|
|
|
367
434
|
#### `coordinates_data`
|
|
368
435
|
|
|
369
|
-
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
436
|
+
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
370
437
|
|
|
371
438
|
Example Usage:
|
|
372
439
|
|
|
373
440
|
```python
|
|
374
|
-
# Fetch coordinates data
|
|
375
441
|
coordinates_df = coordinates_data(
|
|
376
442
|
lineups_df=lineups_df,
|
|
377
443
|
data_source="sofascore",
|
|
@@ -411,12 +477,11 @@ Dependencies:
|
|
|
411
477
|
|
|
412
478
|
#### `substitutions_data`
|
|
413
479
|
|
|
414
|
-
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
480
|
+
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
415
481
|
|
|
416
482
|
Example Usage:
|
|
417
483
|
|
|
418
484
|
```python
|
|
419
|
-
# Fetch substitution data
|
|
420
485
|
substitutions_df = substitutions_data(
|
|
421
486
|
match_df=match_df,
|
|
422
487
|
data_source="sofascore",
|
|
@@ -458,12 +523,11 @@ Dependencies:
|
|
|
458
523
|
|
|
459
524
|
#### `goal_networks_data`
|
|
460
525
|
|
|
461
|
-
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
526
|
+
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
462
527
|
|
|
463
528
|
Example Usage:
|
|
464
529
|
|
|
465
530
|
```python
|
|
466
|
-
# Fetch goal networks data
|
|
467
531
|
goal_networks_df = goal_networks_data(
|
|
468
532
|
match_df=match_df,
|
|
469
533
|
data_source="sofascore",
|
|
@@ -515,12 +579,11 @@ Dependencies:
|
|
|
515
579
|
|
|
516
580
|
#### `shots_data`
|
|
517
581
|
|
|
518
|
-
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
582
|
+
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
519
583
|
|
|
520
584
|
Example Usage:
|
|
521
585
|
|
|
522
586
|
```python
|
|
523
|
-
# Fetch shot data
|
|
524
587
|
shots_df = shots_data(
|
|
525
588
|
match_df=match_df,
|
|
526
589
|
data_source="sofascore",
|
|
@@ -582,12 +645,11 @@ Dependencies:
|
|
|
582
645
|
|
|
583
646
|
#### `standings_data`
|
|
584
647
|
|
|
585
|
-
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
648
|
+
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
586
649
|
|
|
587
650
|
Example Usage:
|
|
588
651
|
|
|
589
652
|
```python
|
|
590
|
-
# Fetch league standings
|
|
591
653
|
standings_df = standings_data(
|
|
592
654
|
tournament_id=52,
|
|
593
655
|
season_id=63814,
|
|
@@ -632,10 +694,18 @@ Dependencies:
|
|
|
632
694
|
|
|
633
695
|
## Changelog
|
|
634
696
|
|
|
697
|
+
* v1.3.0
|
|
698
|
+
* Added `past_matches_data` function to fetch historical match data.
|
|
699
|
+
|
|
700
|
+
* v1.2.0
|
|
701
|
+
* Added match score columns to `match_data`
|
|
702
|
+
|
|
635
703
|
* v1.1.0
|
|
636
704
|
* Added 4 new columns to `match_data`
|
|
637
705
|
* Added `data_source` parameter to `save_json` and `save_excel` for including the source in file names
|
|
638
706
|
|
|
707
|
+
* v1.0.1 (Cancelled, not used)
|
|
708
|
+
|
|
639
709
|
* v1.0.0
|
|
640
710
|
* Initial release of `datafc`
|
|
641
711
|
* Fetching match data using Selenium WebDriver
|
|
@@ -8,6 +8,7 @@ from .fetch_coordinates_data import coordinates_data
|
|
|
8
8
|
from .fetch_substitutions_data import substitutions_data
|
|
9
9
|
from .fetch_match_odds_data import match_odds_data
|
|
10
10
|
from .fetch_momentum_data import momentum_data
|
|
11
|
+
from .fetch_past_matches_data import past_matches_data
|
|
11
12
|
|
|
12
13
|
__all__ = [
|
|
13
14
|
"match_data",
|
|
@@ -19,5 +20,6 @@ __all__ = [
|
|
|
19
20
|
"coordinates_data",
|
|
20
21
|
"substitutions_data",
|
|
21
22
|
"match_odds_data",
|
|
22
|
-
"momentum_data"
|
|
23
|
+
"momentum_data",
|
|
24
|
+
"past_matches_data"
|
|
23
25
|
]
|
|
@@ -66,7 +66,17 @@ def match_data(
|
|
|
66
66
|
"injury_time_1": events_df["time"].apply(lambda x: x.get("injuryTime1", "")),
|
|
67
67
|
"injury_time_2": events_df["time"].apply(lambda x: x.get("injuryTime2", "")),
|
|
68
68
|
"start_timestamp": events_df["startTimestamp"],
|
|
69
|
-
"status": events_df["status"].apply(lambda x: x.get("description", ""))
|
|
69
|
+
"status": events_df["status"].apply(lambda x: x.get("description", "")),
|
|
70
|
+
"home_score_current": events_df["homeScore"].apply(lambda x: x.get("current", "")),
|
|
71
|
+
"home_score_display": events_df["homeScore"].apply(lambda x: x.get("display", "")),
|
|
72
|
+
"home_score_period1": events_df["homeScore"].apply(lambda x: x.get("period1", "")),
|
|
73
|
+
"home_score_period2": events_df["homeScore"].apply(lambda x: x.get("period2", "")),
|
|
74
|
+
"home_score_normaltime": events_df["homeScore"].apply(lambda x: x.get("normaltime", "")),
|
|
75
|
+
"away_score_current": events_df["awayScore"].apply(lambda x: x.get("current", "")),
|
|
76
|
+
"away_score_display": events_df["awayScore"].apply(lambda x: x.get("display", "")),
|
|
77
|
+
"away_score_period1": events_df["awayScore"].apply(lambda x: x.get("period1", "")),
|
|
78
|
+
"away_score_period2": events_df["awayScore"].apply(lambda x: x.get("period2", "")),
|
|
79
|
+
"away_score_normaltime": events_df["awayScore"].apply(lambda x: x.get("normaltime", ""))
|
|
70
80
|
})
|
|
71
81
|
|
|
72
82
|
if enable_json_export or enable_excel_export:
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from selenium.webdriver.common.by import By
|
|
4
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
|
6
|
+
from selenium.common.exceptions import TimeoutException, WebDriverException
|
|
7
|
+
from datafc.utils._setup_webdriver import setup_webdriver
|
|
8
|
+
from datafc.utils._save_files import save_json, save_excel
|
|
9
|
+
from datafc.utils._config import ALLOWED_SOURCES, API_BASE_URLS
|
|
10
|
+
|
|
11
|
+
def past_matches_data(
|
|
12
|
+
tournament_id: int,
|
|
13
|
+
season_id: int,
|
|
14
|
+
week_number: int,
|
|
15
|
+
data_source: str = "sofascore",
|
|
16
|
+
element_load_timeout: int = 10,
|
|
17
|
+
enable_json_export: bool = False,
|
|
18
|
+
enable_excel_export: bool = False
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""
|
|
21
|
+
Fetches past match data for a specified tournament, season, and week number.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
tournament_id (int): The unique identifier for the tournament.
|
|
25
|
+
season_id (int): The unique identifier for the season.
|
|
26
|
+
week_number (int): The matchweek number within the season.
|
|
27
|
+
data_source (str): The data source ('sofavpn' or 'sofascore'). Defaults to 'sofascore'.
|
|
28
|
+
element_load_timeout (int): The maximum time (in seconds) to wait for the API response. Defaults to 10.
|
|
29
|
+
enable_json_export (bool): If `True`, exports the fetched data as a JSON file. Defaults to `False`.
|
|
30
|
+
enable_excel_export (bool): If `True`, exports the fetched data as an Excel file. Defaults to `False`.
|
|
31
|
+
"""
|
|
32
|
+
if data_source not in ALLOWED_SOURCES:
|
|
33
|
+
raise ValueError(f"Invalid data source: {data_source}. Must be one of {ALLOWED_SOURCES}")
|
|
34
|
+
|
|
35
|
+
api_request_url = f"{API_BASE_URLS[data_source]}/api/v1/unique-tournament/{tournament_id}/season/{season_id}/events/round/{week_number}"
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
webdriver_instance = setup_webdriver()
|
|
39
|
+
webdriver_instance.get(api_request_url)
|
|
40
|
+
|
|
41
|
+
response_element = WebDriverWait(webdriver_instance, element_load_timeout).until(
|
|
42
|
+
EC.visibility_of_element_located((By.TAG_NAME, "pre"))
|
|
43
|
+
)
|
|
44
|
+
response_text = response_element.text.strip()
|
|
45
|
+
if not response_text:
|
|
46
|
+
raise RuntimeError("API response is empty.")
|
|
47
|
+
|
|
48
|
+
api_response_data = json.loads(response_text)
|
|
49
|
+
if "events" not in api_response_data or not isinstance(api_response_data["events"], list):
|
|
50
|
+
raise ValueError("Invalid API response format: 'events' key is missing or not a list.")
|
|
51
|
+
|
|
52
|
+
events_df = pd.DataFrame(api_response_data.get("events", []))
|
|
53
|
+
if events_df.empty:
|
|
54
|
+
raise ValueError("No match data found for the specified parameters.")
|
|
55
|
+
|
|
56
|
+
fn_country = events_df.iloc[0]["tournament"].get("category", {}).get("name", "")
|
|
57
|
+
fn_tournament = events_df.iloc[0]["tournament"].get("name", "")
|
|
58
|
+
fn_season = events_df.iloc[0]["season"].get("year", "")
|
|
59
|
+
fn_week = week_number
|
|
60
|
+
|
|
61
|
+
custom_ids = events_df["customId"].tolist()
|
|
62
|
+
all_matches_data = []
|
|
63
|
+
|
|
64
|
+
for custom_id in custom_ids:
|
|
65
|
+
h2h_url = f"{API_BASE_URLS[data_source + '2']}/api/v1/event/{custom_id}/h2h/events"
|
|
66
|
+
webdriver_instance.get(h2h_url)
|
|
67
|
+
h2h_response_element = WebDriverWait(webdriver_instance, element_load_timeout).until(
|
|
68
|
+
EC.visibility_of_element_located((By.TAG_NAME, "pre"))
|
|
69
|
+
)
|
|
70
|
+
h2h_response_text = h2h_response_element.text.strip()
|
|
71
|
+
if not h2h_response_text:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
h2h_data = json.loads(h2h_response_text)
|
|
75
|
+
if "events" not in h2h_data or not isinstance(h2h_data["events"], list):
|
|
76
|
+
continue
|
|
77
|
+
|
|
78
|
+
for event in h2h_data["events"]:
|
|
79
|
+
match_info = {
|
|
80
|
+
"country": event["tournament"].get("category", {}).get("name", ""),
|
|
81
|
+
"tournament": event["tournament"].get("name", ""),
|
|
82
|
+
"season": event["season"].get("year", ""),
|
|
83
|
+
"week": event.get("roundInfo", {}).get("round", ""),
|
|
84
|
+
"game_id": event.get("id", ""),
|
|
85
|
+
"home_team": event["homeTeam"].get("name", ""),
|
|
86
|
+
"home_team_id": event["homeTeam"].get("id", ""),
|
|
87
|
+
"away_team": event["awayTeam"].get("name", ""),
|
|
88
|
+
"away_team_id": event["awayTeam"].get("id", ""),
|
|
89
|
+
"injury_time_1": event.get("time", {}).get("injuryTime1", ""),
|
|
90
|
+
"injury_time_2": event.get("time", {}).get("injuryTime2", ""),
|
|
91
|
+
"start_timestamp": event.get("startTimestamp", ""),
|
|
92
|
+
"status": event["status"].get("description", ""),
|
|
93
|
+
"home_score_current": event["homeScore"].get("current", ""),
|
|
94
|
+
"home_score_display": event["homeScore"].get("display", ""),
|
|
95
|
+
"home_score_period1": event["homeScore"].get("period1", ""),
|
|
96
|
+
"home_score_period2": event["homeScore"].get("period2", ""),
|
|
97
|
+
"home_score_normaltime": event["homeScore"].get("normaltime", ""),
|
|
98
|
+
"away_score_current": event["awayScore"].get("current", ""),
|
|
99
|
+
"away_score_display": event["awayScore"].get("display", ""),
|
|
100
|
+
"away_score_period1": event["awayScore"].get("period1", ""),
|
|
101
|
+
"away_score_period2": event["awayScore"].get("period2", ""),
|
|
102
|
+
"away_score_normaltime": event["awayScore"].get("normaltime", "")
|
|
103
|
+
}
|
|
104
|
+
all_matches_data.append(match_info)
|
|
105
|
+
|
|
106
|
+
detailed_matches_df = pd.DataFrame(all_matches_data)
|
|
107
|
+
|
|
108
|
+
if enable_json_export or enable_excel_export:
|
|
109
|
+
if enable_json_export:
|
|
110
|
+
save_json(
|
|
111
|
+
data=detailed_matches_df,
|
|
112
|
+
data_source=data_source,
|
|
113
|
+
country=fn_country,
|
|
114
|
+
tournament=fn_tournament,
|
|
115
|
+
season=fn_season,
|
|
116
|
+
week_number=fn_week
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
if enable_excel_export:
|
|
120
|
+
save_excel(
|
|
121
|
+
data=detailed_matches_df,
|
|
122
|
+
data_source=data_source,
|
|
123
|
+
country=fn_country,
|
|
124
|
+
tournament=fn_tournament,
|
|
125
|
+
season=fn_season,
|
|
126
|
+
week_number=fn_week
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
return detailed_matches_df
|
|
130
|
+
|
|
131
|
+
except TimeoutException:
|
|
132
|
+
raise RuntimeError("Timeout occurred while waiting for the page or API response.")
|
|
133
|
+
except WebDriverException as e:
|
|
134
|
+
raise RuntimeError(f"Selenium WebDriver error: {str(e)}")
|
|
135
|
+
except json.JSONDecodeError:
|
|
136
|
+
raise RuntimeError("Failed to decode API response as JSON.")
|
|
137
|
+
except Exception as e:
|
|
138
|
+
raise RuntimeError(f"Unexpected error while fetching past matches data: {e.__class__.__name__} - {e}")
|
|
139
|
+
|
|
140
|
+
finally:
|
|
141
|
+
if webdriver_instance:
|
|
142
|
+
webdriver_instance.quit()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafc
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.0
|
|
4
4
|
Summary: A scalable Python library for fetching, processing, and exporting structured football match data.
|
|
5
5
|
Home-page: https://github.com/urazakgul/datafc
|
|
6
6
|
Author: Uraz Akgül
|
|
@@ -14,7 +14,7 @@ Requires-Python: >=3.8
|
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
16
|
|
|
17
|
-
# datafc v1.
|
|
17
|
+
# datafc v1.3.0
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
@@ -53,7 +53,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
53
53
|
To install a specific version of `datafc`, use:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install datafc==1.
|
|
56
|
+
pip install datafc==1.3.0
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -105,6 +105,7 @@ from datafc.sofascore import (
|
|
|
105
105
|
match_odds_data,
|
|
106
106
|
match_stats_data,
|
|
107
107
|
momentum_data,
|
|
108
|
+
past_matches_data,
|
|
108
109
|
lineups_data,
|
|
109
110
|
coordinates_data,
|
|
110
111
|
substitutions_data,
|
|
@@ -140,20 +141,19 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
140
141
|
|
|
141
142
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
142
143
|
|
|
143
|
-
Exception: `standings_data`
|
|
144
|
+
Exception: `standings_data` and `past_matches_data`
|
|
144
145
|
|
|
145
|
-
Unlike
|
|
146
|
+
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
146
147
|
|
|
147
148
|
### Match Data
|
|
148
149
|
|
|
149
150
|
#### `match_data`
|
|
150
151
|
|
|
151
|
-
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
152
|
+
The `match_data` function fetches match data for a specified tournament, season, and matchweek.
|
|
152
153
|
|
|
153
154
|
Example Usage:
|
|
154
155
|
|
|
155
156
|
```python
|
|
156
|
-
# Fetch match data for a specific tournament, season, and week
|
|
157
157
|
match_df = match_data(
|
|
158
158
|
tournament_id=52,
|
|
159
159
|
season_id=63814,
|
|
@@ -193,6 +193,16 @@ The returned DataFrame includes the following columns:
|
|
|
193
193
|
* `injury_time_2`: Added injury time in the second half.
|
|
194
194
|
* `start_timestamp`: The start time of the match.
|
|
195
195
|
* `status`: The current status of the match.
|
|
196
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
197
|
+
* `home_score_display`: The displayed score of the home team.
|
|
198
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
199
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
200
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
201
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
202
|
+
* `away_score_display`: The displayed score of the away team.
|
|
203
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
204
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
205
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
196
206
|
|
|
197
207
|
Dependencies:
|
|
198
208
|
|
|
@@ -200,12 +210,11 @@ Dependencies:
|
|
|
200
210
|
|
|
201
211
|
#### `match_odds_data`
|
|
202
212
|
|
|
203
|
-
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
213
|
+
The `match_odds_data` function fetches betting odds data for each match in the provided match dataset.
|
|
204
214
|
|
|
205
215
|
Example Usage:
|
|
206
216
|
|
|
207
217
|
```python
|
|
208
|
-
# Fetch match odds data
|
|
209
218
|
match_odds_df = match_odds_data(
|
|
210
219
|
match_df=match_df,
|
|
211
220
|
data_source="sofascore",
|
|
@@ -248,12 +257,11 @@ Dependencies:
|
|
|
248
257
|
|
|
249
258
|
#### `match_stats_data`
|
|
250
259
|
|
|
251
|
-
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
260
|
+
The `match_stats_data` function fetches statistical data for each match in the provided match dataset.
|
|
252
261
|
|
|
253
262
|
Example Usage:
|
|
254
263
|
|
|
255
264
|
```python
|
|
256
|
-
# Fetch match statistics data
|
|
257
265
|
match_stats_df = match_stats_data(
|
|
258
266
|
match_df=match_df,
|
|
259
267
|
data_source="sofascore",
|
|
@@ -293,12 +301,11 @@ Dependencies:
|
|
|
293
301
|
|
|
294
302
|
#### `momentum_data`
|
|
295
303
|
|
|
296
|
-
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
304
|
+
The `momentum_data` function fetches momentum data for each match in the provided match dataset.
|
|
297
305
|
|
|
298
306
|
Example Usage:
|
|
299
307
|
|
|
300
308
|
```python
|
|
301
|
-
# Fetch momentum data
|
|
302
309
|
momentum_df = momentum_data(
|
|
303
310
|
match_df=match_df,
|
|
304
311
|
data_source="sofascore",
|
|
@@ -333,16 +340,76 @@ Dependencies:
|
|
|
333
340
|
|
|
334
341
|
* Requires `match_data` output as `match_df`.
|
|
335
342
|
|
|
343
|
+
#### `past_matches_data`
|
|
344
|
+
|
|
345
|
+
The `past_matches_data` function fetches past match data for a specified tournament, season, and week number.
|
|
346
|
+
|
|
347
|
+
Example Usage:
|
|
348
|
+
|
|
349
|
+
```python
|
|
350
|
+
past_matches_df = past_matches_data(
|
|
351
|
+
tournament_id=52,
|
|
352
|
+
season_id=63814,
|
|
353
|
+
week_number=21,
|
|
354
|
+
data_source="sofascore",
|
|
355
|
+
enable_json_export=True,
|
|
356
|
+
enable_excel_export=True
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
print(past_matches_df)
|
|
360
|
+
```
|
|
361
|
+
|
|
362
|
+
Parameters:
|
|
363
|
+
|
|
364
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
365
|
+
* `season_id` (int): The unique identifier for the season.
|
|
366
|
+
* `week_number` (int): The matchweek number within the season.
|
|
367
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `sofascore`.
|
|
368
|
+
* `element_load_timeout` (int): The maximum time (in seconds) to wait for the API response. Defaults to 10.
|
|
369
|
+
* `enable_json_export` (bool): If `True`, exports the fetched data as a JSON file. Defaults to `False`.
|
|
370
|
+
* `enable_excel_export` (bool): If `True`, exports the fetched data as an Excel file. Defaults to `False`.
|
|
371
|
+
|
|
372
|
+
Data Structure:
|
|
373
|
+
|
|
374
|
+
The returned DataFrame includes the following columns:
|
|
375
|
+
|
|
376
|
+
* `country`: The country where the tournament is held.
|
|
377
|
+
* `tournament`: The name of the tournament.
|
|
378
|
+
* `season`: The season year.
|
|
379
|
+
* `week`: The matchweek number.
|
|
380
|
+
* `game_id`: The unique identifier for the match.
|
|
381
|
+
* `home_team`: The name of the home team.
|
|
382
|
+
* `home_team_id`: The unique identifier for the home team.
|
|
383
|
+
* `away_team`: The name of the away team.
|
|
384
|
+
* `away_team_id`: The unique identifier for the away team.
|
|
385
|
+
* `injury_time_1`: Added injury time in the first half.
|
|
386
|
+
* `injury_time_2`: Added injury time in the second half.
|
|
387
|
+
* `start_timestamp`: The start time of the match.
|
|
388
|
+
* `status`: The current status of the match.
|
|
389
|
+
* `home_score_current`: The latest recorded score for the home team.
|
|
390
|
+
* `home_score_display`: The displayed score of the home team.
|
|
391
|
+
* `home_score_period1`: The home team's score at the end of the first half.
|
|
392
|
+
* `home_score_period2`: The home team's goals scored in the second half.
|
|
393
|
+
* `home_score_normaltime`: The home team's final score at the end of normal time (90 minutes).
|
|
394
|
+
* `away_score_current`: The latest recorded score for the away team.
|
|
395
|
+
* `away_score_display`: The displayed score of the away team.
|
|
396
|
+
* `away_score_period1`: The away team's score at the end of the first half.
|
|
397
|
+
* `away_score_period2`: The away team's goals scored in the second half.
|
|
398
|
+
* `away_score_normaltime`: The away team's final score at the end of normal time (90 minutes).
|
|
399
|
+
|
|
400
|
+
Dependencies:
|
|
401
|
+
|
|
402
|
+
* No prior function dependency required.
|
|
403
|
+
|
|
336
404
|
### Player Data
|
|
337
405
|
|
|
338
406
|
#### `lineups_data`
|
|
339
407
|
|
|
340
|
-
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
408
|
+
The `lineups_data` function fetches lineup data for each match in the provided match dataset.
|
|
341
409
|
|
|
342
410
|
Example Usage:
|
|
343
411
|
|
|
344
412
|
```python
|
|
345
|
-
# Fetch lineups data based on match data
|
|
346
413
|
lineups_df = lineups_data(
|
|
347
414
|
match_df=match_df,
|
|
348
415
|
data_source="sofascore",
|
|
@@ -382,12 +449,11 @@ Dependencies:
|
|
|
382
449
|
|
|
383
450
|
#### `coordinates_data`
|
|
384
451
|
|
|
385
|
-
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
452
|
+
The `coordinates_data` function fetches coordinate data for each player in the provided lineup dataset.
|
|
386
453
|
|
|
387
454
|
Example Usage:
|
|
388
455
|
|
|
389
456
|
```python
|
|
390
|
-
# Fetch coordinates data
|
|
391
457
|
coordinates_df = coordinates_data(
|
|
392
458
|
lineups_df=lineups_df,
|
|
393
459
|
data_source="sofascore",
|
|
@@ -427,12 +493,11 @@ Dependencies:
|
|
|
427
493
|
|
|
428
494
|
#### `substitutions_data`
|
|
429
495
|
|
|
430
|
-
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
496
|
+
The `substitutions_data` function fetches substitution data for each match in the provided match dataset.
|
|
431
497
|
|
|
432
498
|
Example Usage:
|
|
433
499
|
|
|
434
500
|
```python
|
|
435
|
-
# Fetch substitution data
|
|
436
501
|
substitutions_df = substitutions_data(
|
|
437
502
|
match_df=match_df,
|
|
438
503
|
data_source="sofascore",
|
|
@@ -474,12 +539,11 @@ Dependencies:
|
|
|
474
539
|
|
|
475
540
|
#### `goal_networks_data`
|
|
476
541
|
|
|
477
|
-
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
542
|
+
The `goal_networks_data` function fetches goal network data for each match in the provided match dataset.
|
|
478
543
|
|
|
479
544
|
Example Usage:
|
|
480
545
|
|
|
481
546
|
```python
|
|
482
|
-
# Fetch goal networks data
|
|
483
547
|
goal_networks_df = goal_networks_data(
|
|
484
548
|
match_df=match_df,
|
|
485
549
|
data_source="sofascore",
|
|
@@ -531,12 +595,11 @@ Dependencies:
|
|
|
531
595
|
|
|
532
596
|
#### `shots_data`
|
|
533
597
|
|
|
534
|
-
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
598
|
+
The `shots_data` function fetches shot data for each match in the provided match dataset.
|
|
535
599
|
|
|
536
600
|
Example Usage:
|
|
537
601
|
|
|
538
602
|
```python
|
|
539
|
-
# Fetch shot data
|
|
540
603
|
shots_df = shots_data(
|
|
541
604
|
match_df=match_df,
|
|
542
605
|
data_source="sofascore",
|
|
@@ -598,12 +661,11 @@ Dependencies:
|
|
|
598
661
|
|
|
599
662
|
#### `standings_data`
|
|
600
663
|
|
|
601
|
-
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
664
|
+
The `standings_data` function fetches league standings for a specific tournament and season.
|
|
602
665
|
|
|
603
666
|
Example Usage:
|
|
604
667
|
|
|
605
668
|
```python
|
|
606
|
-
# Fetch league standings
|
|
607
669
|
standings_df = standings_data(
|
|
608
670
|
tournament_id=52,
|
|
609
671
|
season_id=63814,
|
|
@@ -648,10 +710,18 @@ Dependencies:
|
|
|
648
710
|
|
|
649
711
|
## Changelog
|
|
650
712
|
|
|
713
|
+
* v1.3.0
|
|
714
|
+
* Added `past_matches_data` function to fetch historical match data.
|
|
715
|
+
|
|
716
|
+
* v1.2.0
|
|
717
|
+
* Added match score columns to `match_data`
|
|
718
|
+
|
|
651
719
|
* v1.1.0
|
|
652
720
|
* Added 4 new columns to `match_data`
|
|
653
721
|
* Added `data_source` parameter to `save_json` and `save_excel` for including the source in file names
|
|
654
722
|
|
|
723
|
+
* v1.0.1 (Cancelled, not used)
|
|
724
|
+
|
|
655
725
|
* v1.0.0
|
|
656
726
|
* Initial release of `datafc`
|
|
657
727
|
* Fetching match data using Selenium WebDriver
|
|
@@ -15,6 +15,7 @@ datafc/sofascore/fetch_match_data.py
|
|
|
15
15
|
datafc/sofascore/fetch_match_odds_data.py
|
|
16
16
|
datafc/sofascore/fetch_match_stats_data.py
|
|
17
17
|
datafc/sofascore/fetch_momentum_data.py
|
|
18
|
+
datafc/sofascore/fetch_past_matches_data.py
|
|
18
19
|
datafc/sofascore/fetch_shots_data.py
|
|
19
20
|
datafc/sofascore/fetch_standings_data.py
|
|
20
21
|
datafc/sofascore/fetch_substitutions_data.py
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="datafc",
|
|
8
|
-
version="1.
|
|
8
|
+
version="1.3.0",
|
|
9
9
|
author="Uraz Akgül",
|
|
10
10
|
author_email="urazdev@gmail.com",
|
|
11
11
|
description="A scalable Python library for fetching, processing, and exporting structured football match data.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|