datafc 1.4.0__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {datafc-1.4.0 → datafc-1.5.0}/PKG-INFO +193 -4
- {datafc-1.4.0 → datafc-1.5.0}/README.md +192 -3
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/__init__.py +7 -1
- datafc-1.5.0/datafc/sofascore/fetch_player_stats_data.py +132 -0
- datafc-1.5.0/datafc/sofascore/fetch_squad_data.py +123 -0
- datafc-1.5.0/datafc/sofascore/fetch_team_stats_data.py +114 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc.egg-info/PKG-INFO +193 -4
- {datafc-1.4.0 → datafc-1.5.0}/datafc.egg-info/SOURCES.txt +3 -0
- {datafc-1.4.0 → datafc-1.5.0}/setup.py +1 -1
- {datafc-1.4.0 → datafc-1.5.0}/LICENSE +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/__init__.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_coordinates_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_goal_networks_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_lineups_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_match_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_match_odds_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_match_stats_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_momentum_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_past_matches_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_shots_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_standings_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/sofascore/fetch_substitutions_data.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/utils/__init__.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/utils/_config.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/utils/_save_files.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc/utils/_setup_webdriver.py +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc.egg-info/dependency_links.txt +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc.egg-info/requires.txt +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/datafc.egg-info/top_level.txt +0 -0
- {datafc-1.4.0 → datafc-1.5.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafc
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: A scalable Python library for fetching, processing, and exporting structured football match data.
|
|
5
5
|
Home-page: https://github.com/urazakgul/datafc
|
|
6
6
|
Author: Uraz Akgül
|
|
@@ -14,7 +14,7 @@ Requires-Python: >=3.8
|
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
16
|
|
|
17
|
-
# datafc v1.
|
|
17
|
+
# datafc v1.5.0
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
@@ -53,7 +53,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
53
53
|
To install a specific version of `datafc`, use:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install datafc==1.
|
|
56
|
+
pip install datafc==1.5.0
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -111,7 +111,10 @@ from datafc.sofascore import (
|
|
|
111
111
|
substitutions_data,
|
|
112
112
|
goal_networks_data,
|
|
113
113
|
shots_data,
|
|
114
|
-
standings_data
|
|
114
|
+
standings_data,
|
|
115
|
+
team_stats_data,
|
|
116
|
+
player_stats_data,
|
|
117
|
+
squad_data
|
|
115
118
|
)
|
|
116
119
|
```
|
|
117
120
|
|
|
@@ -141,10 +144,22 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
141
144
|
|
|
142
145
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
143
146
|
|
|
147
|
+
### `standings_data`: A Foundation for Team and Player-Level Functions
|
|
148
|
+
|
|
144
149
|
Exception: `standings_data` and `past_matches_data`
|
|
145
150
|
|
|
146
151
|
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
147
152
|
|
|
153
|
+
However, `standings_data` serves as a critical dependency for the following functions:
|
|
154
|
+
|
|
155
|
+
* `team_stats_data`
|
|
156
|
+
* `player_stats_data`
|
|
157
|
+
* `squad_data`
|
|
158
|
+
|
|
159
|
+
These functions rely on team-level metadata (such as `team_id`) provided by `standings_data` to fetch more granular data. Ensure that `standings_data` is successfully executed and includes teams with `category == 'Total'` before calling any of the above functions.
|
|
160
|
+
|
|
161
|
+
`past_matches_data` also works independently and includes an extra field: `week_number`.
|
|
162
|
+
|
|
148
163
|
### Match Data
|
|
149
164
|
|
|
150
165
|
#### `match_data`
|
|
@@ -864,8 +879,182 @@ Dependencies:
|
|
|
864
879
|
|
|
865
880
|
* No prior function dependency required.
|
|
866
881
|
|
|
882
|
+
### Team Statistics Data
|
|
883
|
+
|
|
884
|
+
#### `team_stats_data`
|
|
885
|
+
|
|
886
|
+
The `team_stats_data` function fetches detailed statistical data for each team in a given tournament and season, based on the team list provided by `standings_data`.
|
|
887
|
+
|
|
888
|
+
Note: This function requires the output of `standings_data` and only processes rows where `category == 'Total'`.
|
|
889
|
+
|
|
890
|
+
Example Usage:
|
|
891
|
+
|
|
892
|
+
```python
|
|
893
|
+
standings_df = standings_data(
|
|
894
|
+
tournament_id=52,
|
|
895
|
+
season_id=63814,
|
|
896
|
+
data_source="sofascore"
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
team_stats_df = team_stats_data(
|
|
900
|
+
standings_df=standings_df,
|
|
901
|
+
tournament_id=52,
|
|
902
|
+
season_id=63814,
|
|
903
|
+
data_source="sofascore",
|
|
904
|
+
enable_json_export=True,
|
|
905
|
+
enable_excel_export=True
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
print(team_stats_df)
|
|
909
|
+
```
|
|
910
|
+
|
|
911
|
+
Parameters:
|
|
912
|
+
|
|
913
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on each team, typically returned by standings_data.
|
|
914
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
915
|
+
* `season_id` (int): The unique identifier for the season.
|
|
916
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
917
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
918
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
919
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
920
|
+
|
|
921
|
+
Data Structure:
|
|
922
|
+
|
|
923
|
+
The returned DataFrame includes the following columns:
|
|
924
|
+
|
|
925
|
+
* `country`: The country where the tournament is held.
|
|
926
|
+
* `tournament`: The name of the tournament.
|
|
927
|
+
* `team_name`: The name of the team.
|
|
928
|
+
* `team_id`: The unique identifier of the team.
|
|
929
|
+
* `stat`: The name of the statistic.
|
|
930
|
+
* `value`: The value of the statistic.
|
|
931
|
+
|
|
932
|
+
Dependencies:
|
|
933
|
+
|
|
934
|
+
* Requires `standings_data` output as `standings_df`.
|
|
935
|
+
|
|
936
|
+
### Player Statistics Data
|
|
937
|
+
|
|
938
|
+
#### `player_stats_data`
|
|
939
|
+
|
|
940
|
+
The `player_stats_data` function fetches top player statistics for each team in the given standings dataset. It processes player-level metrics like goals, assists, duels won, and more.
|
|
941
|
+
|
|
942
|
+
Note: This function requires the output of `standings_data`, and filters for rows where `category == 'Total'`.
|
|
943
|
+
|
|
944
|
+
Example Usage:
|
|
945
|
+
|
|
946
|
+
```python
|
|
947
|
+
standings_df = standings_data(
|
|
948
|
+
tournament_id=52,
|
|
949
|
+
season_id=63814,
|
|
950
|
+
data_source="sofascore"
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
player_stats_df = player_stats_data(
|
|
954
|
+
standings_df=standings_df,
|
|
955
|
+
tournament_id=52,
|
|
956
|
+
season_id=63814,
|
|
957
|
+
data_source="sofascore",
|
|
958
|
+
enable_json_export=True,
|
|
959
|
+
enable_excel_export=True
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
print(player_stats_df)
|
|
963
|
+
```
|
|
964
|
+
|
|
965
|
+
Parameters:
|
|
966
|
+
|
|
967
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on teams, returned by standings_data.
|
|
968
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
969
|
+
* `season_id` (int): The unique identifier for the season.
|
|
970
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
971
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
972
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
973
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
974
|
+
|
|
975
|
+
Data Structure:
|
|
976
|
+
|
|
977
|
+
The returned DataFrame includes the following columns:
|
|
978
|
+
|
|
979
|
+
* `country`: The country where the tournament is held.
|
|
980
|
+
* `tournament`: The name of the tournament.
|
|
981
|
+
* `team_name`: The name of the team.
|
|
982
|
+
* `team_id`: The unique identifier of the team.
|
|
983
|
+
* `player_name`: The name of the player.
|
|
984
|
+
* `player_id`: The unique identifier of the player.
|
|
985
|
+
* `position`: The player’s position.
|
|
986
|
+
* `stat_name`: The name of the statistic.
|
|
987
|
+
* `stat_value`: The value of the statistic.
|
|
988
|
+
|
|
989
|
+
Dependencies:
|
|
990
|
+
|
|
991
|
+
* Requires `standings_data` output as `standings_df`.
|
|
992
|
+
|
|
993
|
+
### Squad Data
|
|
994
|
+
|
|
995
|
+
#### `squad_data`
|
|
996
|
+
|
|
997
|
+
The `squad_data` function fetches detailed squad (roster) information for each team listed in the provided standings dataset. It includes player bio data such as age, height, position, market value, and contract info.
|
|
998
|
+
|
|
999
|
+
Note: This function requires the output of `standings_data`, and only processes rows where `category == 'Total'`.
|
|
1000
|
+
|
|
1001
|
+
Example Usage:
|
|
1002
|
+
|
|
1003
|
+
```python
|
|
1004
|
+
standings_df = standings_data(
|
|
1005
|
+
tournament_id=52,
|
|
1006
|
+
season_id=63814,
|
|
1007
|
+
data_source="sofascore"
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
squad_df = squad_data(
|
|
1011
|
+
standings_df=standings_df,
|
|
1012
|
+
data_source="sofascore",
|
|
1013
|
+
enable_json_export=True,
|
|
1014
|
+
enable_excel_export=True
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
print(squad_df)
|
|
1018
|
+
```
|
|
1019
|
+
|
|
1020
|
+
Parameters:
|
|
1021
|
+
|
|
1022
|
+
* `standings_df` (pd.DataFrame): A DataFrame with team metadata, returned by standings_data.
|
|
1023
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
1024
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
1025
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
1026
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
1027
|
+
|
|
1028
|
+
Data Structure:
|
|
1029
|
+
|
|
1030
|
+
The returned DataFrame includes the following columns:
|
|
1031
|
+
|
|
1032
|
+
* `country`: The country where the tournament is held.
|
|
1033
|
+
* `tournament`: The name of the tournament.
|
|
1034
|
+
* `team_name`: The name of the team.
|
|
1035
|
+
* `team_id`: The unique identifier of the team.
|
|
1036
|
+
* `player_name`: The name of the player.
|
|
1037
|
+
* `player_id`: The unique identifier of the player.
|
|
1038
|
+
* `age`: The date of birth timestamp (UNIX format).
|
|
1039
|
+
* `height`: The height of the player.
|
|
1040
|
+
* `player_country`: The nationality of the player.
|
|
1041
|
+
* `position`: The position of the player.
|
|
1042
|
+
* `preferred_foot`: The preferred foot of the player.
|
|
1043
|
+
* `contract_until`: The contract end date (UNIX timestamp).
|
|
1044
|
+
* `market_value`: The market value of the player.
|
|
1045
|
+
* `market_currency`: The currency used for the market value.
|
|
1046
|
+
|
|
1047
|
+
Dependencies:
|
|
1048
|
+
|
|
1049
|
+
* Requires `standings_data` output as `standings_df`.
|
|
1050
|
+
|
|
867
1051
|
## Changelog
|
|
868
1052
|
|
|
1053
|
+
* v1.5.0
|
|
1054
|
+
* Added `team_stats_data` function to retrieve detailed per-team statistics using `standings_data`.
|
|
1055
|
+
* Added `player_stats_data` function to retrieve player-level top stats per team.
|
|
1056
|
+
* Added `squad_data` function to fetch full squad information including bio and market value.
|
|
1057
|
+
|
|
869
1058
|
* v1.4.0
|
|
870
1059
|
* Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` functions.
|
|
871
1060
|
* Extended support for UEFA tournaments, including UEFA Champions League (UCL), UEFA Europa League (UEL), UEFA Europa Conference League (UECL), and UEFA Nations League (UNL), allowing seamless data fetching across multiple competitions.
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# datafc v1.
|
|
1
|
+
# datafc v1.5.0
|
|
2
2
|
|
|
3
3
|
## Overview
|
|
4
4
|
|
|
@@ -37,7 +37,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
37
37
|
To install a specific version of `datafc`, use:
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
|
-
pip install datafc==1.
|
|
40
|
+
pip install datafc==1.5.0
|
|
41
41
|
```
|
|
42
42
|
|
|
43
43
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -95,7 +95,10 @@ from datafc.sofascore import (
|
|
|
95
95
|
substitutions_data,
|
|
96
96
|
goal_networks_data,
|
|
97
97
|
shots_data,
|
|
98
|
-
standings_data
|
|
98
|
+
standings_data,
|
|
99
|
+
team_stats_data,
|
|
100
|
+
player_stats_data,
|
|
101
|
+
squad_data
|
|
99
102
|
)
|
|
100
103
|
```
|
|
101
104
|
|
|
@@ -125,10 +128,22 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
125
128
|
|
|
126
129
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
127
130
|
|
|
131
|
+
### `standings_data`: A Foundation for Team and Player-Level Functions
|
|
132
|
+
|
|
128
133
|
Exception: `standings_data` and `past_matches_data`
|
|
129
134
|
|
|
130
135
|
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
131
136
|
|
|
137
|
+
However, `standings_data` serves as a critical dependency for the following functions:
|
|
138
|
+
|
|
139
|
+
* `team_stats_data`
|
|
140
|
+
* `player_stats_data`
|
|
141
|
+
* `squad_data`
|
|
142
|
+
|
|
143
|
+
These functions rely on team-level metadata (such as `team_id`) provided by `standings_data` to fetch more granular data. Ensure that `standings_data` is successfully executed and includes teams with `category == 'Total'` before calling any of the above functions.
|
|
144
|
+
|
|
145
|
+
`past_matches_data` also works independently and includes an extra field: `week_number`.
|
|
146
|
+
|
|
132
147
|
### Match Data
|
|
133
148
|
|
|
134
149
|
#### `match_data`
|
|
@@ -848,8 +863,182 @@ Dependencies:
|
|
|
848
863
|
|
|
849
864
|
* No prior function dependency required.
|
|
850
865
|
|
|
866
|
+
### Team Statistics Data
|
|
867
|
+
|
|
868
|
+
#### `team_stats_data`
|
|
869
|
+
|
|
870
|
+
The `team_stats_data` function fetches detailed statistical data for each team in a given tournament and season, based on the team list provided by `standings_data`.
|
|
871
|
+
|
|
872
|
+
Note: This function requires the output of `standings_data` and only processes rows where `category == 'Total'`.
|
|
873
|
+
|
|
874
|
+
Example Usage:
|
|
875
|
+
|
|
876
|
+
```python
|
|
877
|
+
standings_df = standings_data(
|
|
878
|
+
tournament_id=52,
|
|
879
|
+
season_id=63814,
|
|
880
|
+
data_source="sofascore"
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
team_stats_df = team_stats_data(
|
|
884
|
+
standings_df=standings_df,
|
|
885
|
+
tournament_id=52,
|
|
886
|
+
season_id=63814,
|
|
887
|
+
data_source="sofascore",
|
|
888
|
+
enable_json_export=True,
|
|
889
|
+
enable_excel_export=True
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
print(team_stats_df)
|
|
893
|
+
```
|
|
894
|
+
|
|
895
|
+
Parameters:
|
|
896
|
+
|
|
897
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on each team, typically returned by standings_data.
|
|
898
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
899
|
+
* `season_id` (int): The unique identifier for the season.
|
|
900
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
901
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
902
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
903
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
904
|
+
|
|
905
|
+
Data Structure:
|
|
906
|
+
|
|
907
|
+
The returned DataFrame includes the following columns:
|
|
908
|
+
|
|
909
|
+
* `country`: The country where the tournament is held.
|
|
910
|
+
* `tournament`: The name of the tournament.
|
|
911
|
+
* `team_name`: The name of the team.
|
|
912
|
+
* `team_id`: The unique identifier of the team.
|
|
913
|
+
* `stat`: The name of the statistic.
|
|
914
|
+
* `value`: The value of the statistic.
|
|
915
|
+
|
|
916
|
+
Dependencies:
|
|
917
|
+
|
|
918
|
+
* Requires `standings_data` output as `standings_df`.
|
|
919
|
+
|
|
920
|
+
### Player Statistics Data
|
|
921
|
+
|
|
922
|
+
#### `player_stats_data`
|
|
923
|
+
|
|
924
|
+
The `player_stats_data` function fetches top player statistics for each team in the given standings dataset. It processes player-level metrics like goals, assists, duels won, and more.
|
|
925
|
+
|
|
926
|
+
Note: This function requires the output of `standings_data`, and filters for rows where `category == 'Total'`.
|
|
927
|
+
|
|
928
|
+
Example Usage:
|
|
929
|
+
|
|
930
|
+
```python
|
|
931
|
+
standings_df = standings_data(
|
|
932
|
+
tournament_id=52,
|
|
933
|
+
season_id=63814,
|
|
934
|
+
data_source="sofascore"
|
|
935
|
+
)
|
|
936
|
+
|
|
937
|
+
player_stats_df = player_stats_data(
|
|
938
|
+
standings_df=standings_df,
|
|
939
|
+
tournament_id=52,
|
|
940
|
+
season_id=63814,
|
|
941
|
+
data_source="sofascore",
|
|
942
|
+
enable_json_export=True,
|
|
943
|
+
enable_excel_export=True
|
|
944
|
+
)
|
|
945
|
+
|
|
946
|
+
print(player_stats_df)
|
|
947
|
+
```
|
|
948
|
+
|
|
949
|
+
Parameters:
|
|
950
|
+
|
|
951
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on teams, returned by standings_data.
|
|
952
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
953
|
+
* `season_id` (int): The unique identifier for the season.
|
|
954
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
955
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
956
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
957
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
958
|
+
|
|
959
|
+
Data Structure:
|
|
960
|
+
|
|
961
|
+
The returned DataFrame includes the following columns:
|
|
962
|
+
|
|
963
|
+
* `country`: The country where the tournament is held.
|
|
964
|
+
* `tournament`: The name of the tournament.
|
|
965
|
+
* `team_name`: The name of the team.
|
|
966
|
+
* `team_id`: The unique identifier of the team.
|
|
967
|
+
* `player_name`: The name of the player.
|
|
968
|
+
* `player_id`: The unique identifier of the player.
|
|
969
|
+
* `position`: The player’s position.
|
|
970
|
+
* `stat_name`: The name of the statistic.
|
|
971
|
+
* `stat_value`: The value of the statistic.
|
|
972
|
+
|
|
973
|
+
Dependencies:
|
|
974
|
+
|
|
975
|
+
* Requires `standings_data` output as `standings_df`.
|
|
976
|
+
|
|
977
|
+
### Squad Data
|
|
978
|
+
|
|
979
|
+
#### `squad_data`
|
|
980
|
+
|
|
981
|
+
The `squad_data` function fetches detailed squad (roster) information for each team listed in the provided standings dataset. It includes player bio data such as age, height, position, market value, and contract info.
|
|
982
|
+
|
|
983
|
+
Note: This function requires the output of `standings_data`, and only processes rows where `category == 'Total'`.
|
|
984
|
+
|
|
985
|
+
Example Usage:
|
|
986
|
+
|
|
987
|
+
```python
|
|
988
|
+
standings_df = standings_data(
|
|
989
|
+
tournament_id=52,
|
|
990
|
+
season_id=63814,
|
|
991
|
+
data_source="sofascore"
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
squad_df = squad_data(
|
|
995
|
+
standings_df=standings_df,
|
|
996
|
+
data_source="sofascore",
|
|
997
|
+
enable_json_export=True,
|
|
998
|
+
enable_excel_export=True
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
print(squad_df)
|
|
1002
|
+
```
|
|
1003
|
+
|
|
1004
|
+
Parameters:
|
|
1005
|
+
|
|
1006
|
+
* `standings_df` (pd.DataFrame): A DataFrame with team metadata, returned by standings_data.
|
|
1007
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
1008
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
1009
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
1010
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
1011
|
+
|
|
1012
|
+
Data Structure:
|
|
1013
|
+
|
|
1014
|
+
The returned DataFrame includes the following columns:
|
|
1015
|
+
|
|
1016
|
+
* `country`: The country where the tournament is held.
|
|
1017
|
+
* `tournament`: The name of the tournament.
|
|
1018
|
+
* `team_name`: The name of the team.
|
|
1019
|
+
* `team_id`: The unique identifier of the team.
|
|
1020
|
+
* `player_name`: The name of the player.
|
|
1021
|
+
* `player_id`: The unique identifier of the player.
|
|
1022
|
+
* `age`: The date of birth timestamp (UNIX format).
|
|
1023
|
+
* `height`: The height of the player.
|
|
1024
|
+
* `player_country`: The nationality of the player.
|
|
1025
|
+
* `position`: The position of the player.
|
|
1026
|
+
* `preferred_foot`: The preferred foot of the player.
|
|
1027
|
+
* `contract_until`: The contract end date (UNIX timestamp).
|
|
1028
|
+
* `market_value`: The market value of the player.
|
|
1029
|
+
* `market_currency`: The currency used for the market value.
|
|
1030
|
+
|
|
1031
|
+
Dependencies:
|
|
1032
|
+
|
|
1033
|
+
* Requires `standings_data` output as `standings_df`.
|
|
1034
|
+
|
|
851
1035
|
## Changelog
|
|
852
1036
|
|
|
1037
|
+
* v1.5.0
|
|
1038
|
+
* Added `team_stats_data` function to retrieve detailed per-team statistics using `standings_data`.
|
|
1039
|
+
* Added `player_stats_data` function to retrieve player-level top stats per team.
|
|
1040
|
+
* Added `squad_data` function to fetch full squad information including bio and market value.
|
|
1041
|
+
|
|
853
1042
|
* v1.4.0
|
|
854
1043
|
* Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` functions.
|
|
855
1044
|
* Extended support for UEFA tournaments, including UEFA Champions League (UCL), UEFA Europa League (UEL), UEFA Europa Conference League (UECL), and UEFA Nations League (UNL), allowing seamless data fetching across multiple competitions.
|
|
@@ -9,6 +9,9 @@ from .fetch_substitutions_data import substitutions_data
|
|
|
9
9
|
from .fetch_match_odds_data import match_odds_data
|
|
10
10
|
from .fetch_momentum_data import momentum_data
|
|
11
11
|
from .fetch_past_matches_data import past_matches_data
|
|
12
|
+
from .fetch_team_stats_data import team_stats_data
|
|
13
|
+
from .fetch_player_stats_data import player_stats_data
|
|
14
|
+
from .fetch_squad_data import squad_data
|
|
12
15
|
|
|
13
16
|
__all__ = [
|
|
14
17
|
"match_data",
|
|
@@ -21,5 +24,8 @@ __all__ = [
|
|
|
21
24
|
"substitutions_data",
|
|
22
25
|
"match_odds_data",
|
|
23
26
|
"momentum_data",
|
|
24
|
-
"past_matches_data"
|
|
27
|
+
"past_matches_data",
|
|
28
|
+
"team_stats_data",
|
|
29
|
+
"player_stats_data",
|
|
30
|
+
"squad_data"
|
|
25
31
|
]
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from selenium.webdriver.common.by import By
|
|
4
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
|
6
|
+
from selenium.common.exceptions import TimeoutException, WebDriverException
|
|
7
|
+
from datafc.utils._setup_webdriver import setup_webdriver
|
|
8
|
+
from datafc.utils._save_files import save_json, save_excel
|
|
9
|
+
from datafc.utils._config import ALLOWED_SOURCES, API_BASE_URLS
|
|
10
|
+
|
|
11
|
+
def player_stats_data(
|
|
12
|
+
standings_df: pd.DataFrame,
|
|
13
|
+
tournament_id: int,
|
|
14
|
+
season_id: int,
|
|
15
|
+
data_source: str = "sofascore",
|
|
16
|
+
element_load_timeout: int = 10,
|
|
17
|
+
enable_json_export: bool = False,
|
|
18
|
+
enable_excel_export: bool = False
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""
|
|
21
|
+
Fetches player statistics data for each team in the provided standings dataset.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
standings_df (pd.DataFrame): A DataFrame containing team metadata, including team_id.
|
|
25
|
+
tournament_id (int): The unique identifier for the tournament.
|
|
26
|
+
season_id (int): The unique identifier for the season.
|
|
27
|
+
data_source (str): The data source ('sofascore'). Defaults to 'sofascore'.
|
|
28
|
+
element_load_timeout (int): The maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
29
|
+
enable_json_export (bool): If `True`, saves the fetched data as a JSON file. Defaults to `False`.
|
|
30
|
+
enable_excel_export (bool): If `True`, saves the fetched data as an Excel file. Defaults to `False`.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
pd.DataFrame: A DataFrame containing statistical information for each player.
|
|
34
|
+
"""
|
|
35
|
+
if data_source not in ALLOWED_SOURCES:
|
|
36
|
+
raise ValueError(f"Invalid data source: {data_source}. Must be one of {ALLOWED_SOURCES}")
|
|
37
|
+
|
|
38
|
+
if standings_df is None or standings_df.empty:
|
|
39
|
+
raise ValueError("Standings dataframe must be provided and cannot be empty.")
|
|
40
|
+
|
|
41
|
+
webdriver_instance = None
|
|
42
|
+
try:
|
|
43
|
+
webdriver_instance = setup_webdriver()
|
|
44
|
+
stats_data_list = []
|
|
45
|
+
|
|
46
|
+
standings_df = standings_df[standings_df["category"] == "Total"]
|
|
47
|
+
for _, row in standings_df.iterrows():
|
|
48
|
+
country = row["country"]
|
|
49
|
+
tournament = row["tournament"]
|
|
50
|
+
team_name = row["team_name"]
|
|
51
|
+
team_id = row["team_id"]
|
|
52
|
+
|
|
53
|
+
url = f"{API_BASE_URLS[data_source + '2']}/api/v1/team/{team_id}/unique-tournament/{tournament_id}/season/{season_id}/top-players/overall"
|
|
54
|
+
webdriver_instance.get(url)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
pre_tag = WebDriverWait(webdriver_instance, element_load_timeout).until(
|
|
58
|
+
EC.visibility_of_element_located((By.TAG_NAME, "pre"))
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
stats_json = json.loads(pre_tag.text)
|
|
62
|
+
top_players = stats_json.get("topPlayers", {})
|
|
63
|
+
|
|
64
|
+
for category, players in top_players.items():
|
|
65
|
+
for player_data in players:
|
|
66
|
+
player = player_data.get("player", {})
|
|
67
|
+
player_id = player.get("id")
|
|
68
|
+
statistics = player_data.get("statistics", {})
|
|
69
|
+
|
|
70
|
+
for stat, value in statistics.items():
|
|
71
|
+
if stat in ["id", "type"]:
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
log_entry = {
|
|
75
|
+
"country": country,
|
|
76
|
+
"tournament": tournament,
|
|
77
|
+
"team_name": team_name,
|
|
78
|
+
"team_id": team_id,
|
|
79
|
+
"player_name": player.get("name"),
|
|
80
|
+
"player_id": player_id,
|
|
81
|
+
"position": player.get("position"),
|
|
82
|
+
"stat_name": stat,
|
|
83
|
+
"stat_value": value
|
|
84
|
+
}
|
|
85
|
+
if log_entry not in stats_data_list:
|
|
86
|
+
stats_data_list.append(log_entry)
|
|
87
|
+
except TimeoutException:
|
|
88
|
+
print(f"Timeout while fetching player stats data for team_id {team_id}.")
|
|
89
|
+
except json.JSONDecodeError:
|
|
90
|
+
print(f"Failed to decode player stats data for team_id {team_id}.")
|
|
91
|
+
except WebDriverException as e:
|
|
92
|
+
print(f"Selenium WebDriver error while fetching player stats data for team_id {team_id}: {str(e)}")
|
|
93
|
+
except Exception as e:
|
|
94
|
+
print(f"Unexpected error while fetching player stats data for team_id {team_id}: {e.__class__.__name__} - {e}")
|
|
95
|
+
|
|
96
|
+
stats_data_df = pd.DataFrame(stats_data_list).drop_duplicates()
|
|
97
|
+
|
|
98
|
+
if stats_data_df.empty:
|
|
99
|
+
raise ValueError("No player statistics data found for the specified teams.")
|
|
100
|
+
|
|
101
|
+
if enable_json_export or enable_excel_export:
|
|
102
|
+
first_row = stats_data_df.iloc[0]
|
|
103
|
+
|
|
104
|
+
if enable_json_export:
|
|
105
|
+
save_json(
|
|
106
|
+
data=stats_data_df,
|
|
107
|
+
data_source=data_source,
|
|
108
|
+
country=first_row["country"],
|
|
109
|
+
tournament=first_row["tournament"],
|
|
110
|
+
season=None,
|
|
111
|
+
week_number=None
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
if enable_excel_export:
|
|
115
|
+
save_excel(
|
|
116
|
+
data=stats_data_df,
|
|
117
|
+
data_source=data_source,
|
|
118
|
+
country=first_row["country"],
|
|
119
|
+
tournament=first_row["tournament"],
|
|
120
|
+
season=None,
|
|
121
|
+
week_number=None
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return stats_data_df
|
|
125
|
+
|
|
126
|
+
except WebDriverException as e:
|
|
127
|
+
raise RuntimeError(f"Selenium WebDriver error: {str(e)}")
|
|
128
|
+
except Exception as e:
|
|
129
|
+
raise RuntimeError(f"Unexpected error while fetching player stats data: {e.__class__.__name__} - {e}")
|
|
130
|
+
finally:
|
|
131
|
+
if webdriver_instance:
|
|
132
|
+
webdriver_instance.quit()
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from selenium.webdriver.common.by import By
|
|
4
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
|
6
|
+
from selenium.common.exceptions import TimeoutException, WebDriverException
|
|
7
|
+
from datafc.utils._setup_webdriver import setup_webdriver
|
|
8
|
+
from datafc.utils._save_files import save_json, save_excel
|
|
9
|
+
from datafc.utils._config import ALLOWED_SOURCES, API_BASE_URLS
|
|
10
|
+
|
|
11
|
+
def squad_data(
|
|
12
|
+
standings_df: pd.DataFrame,
|
|
13
|
+
data_source: str = "sofascore",
|
|
14
|
+
element_load_timeout: int = 10,
|
|
15
|
+
enable_json_export: bool = False,
|
|
16
|
+
enable_excel_export: bool = False
|
|
17
|
+
) -> pd.DataFrame:
|
|
18
|
+
"""
|
|
19
|
+
Fetches squad data for each team in the provided standings dataset.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
standings_df (pd.DataFrame): A DataFrame containing team metadata, including team_id.
|
|
23
|
+
data_source (str): The data source ('sofavpn' or 'sofascore'). Defaults to 'sofascore'.
|
|
24
|
+
element_load_timeout (int): The maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
25
|
+
enable_json_export (bool): If `True`, saves the fetched data as a JSON file. Defaults to `False`.
|
|
26
|
+
enable_excel_export (bool): If `True`, saves the fetched data as an Excel file. Defaults to `False`.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
pd.DataFrame: A DataFrame containing squad information for each team.
|
|
30
|
+
"""
|
|
31
|
+
if data_source not in ALLOWED_SOURCES:
|
|
32
|
+
raise ValueError(f"Invalid data source: {data_source}. Must be one of {ALLOWED_SOURCES}")
|
|
33
|
+
|
|
34
|
+
if standings_df is None or standings_df.empty:
|
|
35
|
+
raise ValueError("Standings dataframe must be provided and cannot be empty.")
|
|
36
|
+
|
|
37
|
+
webdriver_instance = None
|
|
38
|
+
try:
|
|
39
|
+
webdriver_instance = setup_webdriver()
|
|
40
|
+
squads_data_list = []
|
|
41
|
+
|
|
42
|
+
standings_df = standings_df[standings_df["category"] == "Total"]
|
|
43
|
+
for _, row in standings_df.iterrows():
|
|
44
|
+
country = row["country"]
|
|
45
|
+
tournament = row["tournament"]
|
|
46
|
+
team_name = row["team_name"]
|
|
47
|
+
team_id = row["team_id"]
|
|
48
|
+
|
|
49
|
+
url = f"{API_BASE_URLS[data_source]}/api/v1/team/{team_id}/players"
|
|
50
|
+
webdriver_instance.get(url)
|
|
51
|
+
|
|
52
|
+
try:
|
|
53
|
+
pre_tag = WebDriverWait(webdriver_instance, element_load_timeout).until(
|
|
54
|
+
EC.visibility_of_element_located((By.TAG_NAME, "pre"))
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
squad_json = json.loads(pre_tag.text)
|
|
58
|
+
|
|
59
|
+
for player_info in squad_json.get("players", []):
|
|
60
|
+
player = player_info["player"]
|
|
61
|
+
squads_data_list.append({
|
|
62
|
+
"country": country,
|
|
63
|
+
"tournament": tournament,
|
|
64
|
+
"team_name": team_name,
|
|
65
|
+
"team_id": team_id,
|
|
66
|
+
"player_name": player.get("name"),
|
|
67
|
+
"player_id": player.get("id"),
|
|
68
|
+
"age": player.get("dateOfBirthTimestamp"),
|
|
69
|
+
"height": player.get("height"),
|
|
70
|
+
"player_country": player.get("country", {}).get("name"),
|
|
71
|
+
"position": player.get("position"),
|
|
72
|
+
"preferred_foot": player.get("preferredFoot"),
|
|
73
|
+
"contract_until": player.get("contractUntilTimestamp"),
|
|
74
|
+
"market_value": player.get("proposedMarketValueRaw", {}).get("value"),
|
|
75
|
+
"market_currency": player.get("proposedMarketValueRaw", {}).get("currency")
|
|
76
|
+
})
|
|
77
|
+
except TimeoutException:
|
|
78
|
+
print(f"Timeout while fetching squad data for team_id {team_id}.")
|
|
79
|
+
except json.JSONDecodeError:
|
|
80
|
+
print(f"Failed to decode squad data for team_id {team_id}.")
|
|
81
|
+
except WebDriverException as e:
|
|
82
|
+
print(f"Selenium WebDriver error while fetching squad data for team_id {team_id}: {str(e)}")
|
|
83
|
+
except Exception as e:
|
|
84
|
+
print(f"Unexpected error while fetching squad data for team_id {team_id}: {e.__class__.__name__} - {e}")
|
|
85
|
+
|
|
86
|
+
squads_data_df = pd.DataFrame(squads_data_list)
|
|
87
|
+
|
|
88
|
+
if squads_data_df.empty:
|
|
89
|
+
raise ValueError("No squad data found for the specified teams.")
|
|
90
|
+
|
|
91
|
+
if enable_json_export or enable_excel_export:
|
|
92
|
+
first_row = squads_data_df.iloc[0]
|
|
93
|
+
|
|
94
|
+
if enable_json_export:
|
|
95
|
+
save_json(
|
|
96
|
+
data=squads_data_df,
|
|
97
|
+
data_source=data_source,
|
|
98
|
+
country=first_row["country"],
|
|
99
|
+
tournament=first_row["tournament"],
|
|
100
|
+
season=None,
|
|
101
|
+
week_number=None
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
if enable_excel_export:
|
|
105
|
+
save_excel(
|
|
106
|
+
data=squads_data_df,
|
|
107
|
+
data_source=data_source,
|
|
108
|
+
country=first_row["country"],
|
|
109
|
+
tournament=first_row["tournament"],
|
|
110
|
+
season=None,
|
|
111
|
+
week_number=None
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
return squads_data_df
|
|
115
|
+
|
|
116
|
+
except WebDriverException as e:
|
|
117
|
+
raise RuntimeError(f"Selenium WebDriver error: {str(e)}")
|
|
118
|
+
except Exception as e:
|
|
119
|
+
raise RuntimeError(f"Unexpected error while fetching squad data: {e.__class__.__name__} - {e}")
|
|
120
|
+
|
|
121
|
+
finally:
|
|
122
|
+
if webdriver_instance:
|
|
123
|
+
webdriver_instance.quit()
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import pandas as pd
|
|
3
|
+
from selenium.webdriver.common.by import By
|
|
4
|
+
from selenium.webdriver.support.ui import WebDriverWait
|
|
5
|
+
from selenium.webdriver.support import expected_conditions as EC
|
|
6
|
+
from selenium.common.exceptions import TimeoutException, WebDriverException
|
|
7
|
+
from datafc.utils._setup_webdriver import setup_webdriver
|
|
8
|
+
from datafc.utils._save_files import save_json, save_excel
|
|
9
|
+
from datafc.utils._config import ALLOWED_SOURCES, API_BASE_URLS
|
|
10
|
+
|
|
11
|
+
def team_stats_data(
|
|
12
|
+
standings_df: pd.DataFrame,
|
|
13
|
+
tournament_id: int,
|
|
14
|
+
season_id: int,
|
|
15
|
+
data_source: str = "sofascore",
|
|
16
|
+
element_load_timeout: int = 10,
|
|
17
|
+
enable_json_export: bool = False,
|
|
18
|
+
enable_excel_export: bool = False
|
|
19
|
+
) -> pd.DataFrame:
|
|
20
|
+
"""
|
|
21
|
+
Fetches team statistics data for each team in the provided standings dataset.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
standings_df (pd.DataFrame): A DataFrame containing team metadata, including team_id.
|
|
25
|
+
tournament_id (int): The unique identifier for the tournament.
|
|
26
|
+
season_id (int): The unique identifier for the season.
|
|
27
|
+
data_source (str): The data source ('sofascore'). Defaults to 'sofascore'.
|
|
28
|
+
element_load_timeout (int): The maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
29
|
+
enable_json_export (bool): If `True`, saves the fetched data as a JSON file. Defaults to `False`.
|
|
30
|
+
enable_excel_export (bool): If `True`, saves the fetched data as an Excel file. Defaults to `False`.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
pd.DataFrame: A DataFrame containing statistical information for each team.
|
|
34
|
+
"""
|
|
35
|
+
if data_source not in ALLOWED_SOURCES:
|
|
36
|
+
raise ValueError(f"Invalid data source: {data_source}. Must be one of {ALLOWED_SOURCES}")
|
|
37
|
+
|
|
38
|
+
if standings_df is None or standings_df.empty:
|
|
39
|
+
raise ValueError("Standings dataframe must be provided and cannot be empty.")
|
|
40
|
+
|
|
41
|
+
webdriver_instance = None
|
|
42
|
+
try:
|
|
43
|
+
webdriver_instance = setup_webdriver()
|
|
44
|
+
stats_data_list = []
|
|
45
|
+
|
|
46
|
+
standings_df = standings_df[standings_df["category"] == "Total"]
|
|
47
|
+
for _, row in standings_df.iterrows():
|
|
48
|
+
country = row["country"]
|
|
49
|
+
tournament = row["tournament"]
|
|
50
|
+
team_name = row["team_name"]
|
|
51
|
+
team_id = row["team_id"]
|
|
52
|
+
|
|
53
|
+
url = f"{API_BASE_URLS[data_source + '2']}/api/v1/team/{team_id}/unique-tournament/{tournament_id}/season/{season_id}/statistics/overall"
|
|
54
|
+
webdriver_instance.get(url)
|
|
55
|
+
|
|
56
|
+
try:
|
|
57
|
+
pre_tag = WebDriverWait(webdriver_instance, element_load_timeout).until(
|
|
58
|
+
EC.visibility_of_element_located((By.TAG_NAME, "pre"))
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
stats_json = json.loads(pre_tag.text)
|
|
62
|
+
statistics = stats_json.get("statistics", {})
|
|
63
|
+
log_entries = [
|
|
64
|
+
{"country": country, "tournament": tournament, "team_name": team_name, "team_id": team_id, "stat": stat, "value": value}
|
|
65
|
+
for stat, value in statistics.items()
|
|
66
|
+
if stat not in {"country", "tournament", "team_name", "team_id"}
|
|
67
|
+
]
|
|
68
|
+
stats_data_list.extend(log_entries)
|
|
69
|
+
except TimeoutException:
|
|
70
|
+
print(f"Timeout while fetching team stats data for team_id {team_id}.")
|
|
71
|
+
except json.JSONDecodeError:
|
|
72
|
+
print(f"Failed to decode team stats data for team_id {team_id}.")
|
|
73
|
+
except WebDriverException as e:
|
|
74
|
+
print(f"Selenium WebDriver error while fetching team stats data for team_id {team_id}: {str(e)}")
|
|
75
|
+
except Exception as e:
|
|
76
|
+
print(f"Unexpected error while fetching team stats data for team_id {team_id}: {e.__class__.__name__} - {e}")
|
|
77
|
+
|
|
78
|
+
stats_data_df = pd.DataFrame(stats_data_list)
|
|
79
|
+
|
|
80
|
+
if stats_data_df.empty:
|
|
81
|
+
raise ValueError("No team statistics data found for the specified teams.")
|
|
82
|
+
|
|
83
|
+
if enable_json_export or enable_excel_export:
|
|
84
|
+
first_row = stats_data_df.iloc[0]
|
|
85
|
+
|
|
86
|
+
if enable_json_export:
|
|
87
|
+
save_json(
|
|
88
|
+
data=stats_data_df,
|
|
89
|
+
data_source=data_source,
|
|
90
|
+
country=first_row["country"],
|
|
91
|
+
tournament=first_row["tournament"],
|
|
92
|
+
season=None,
|
|
93
|
+
week_number=None
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if enable_excel_export:
|
|
97
|
+
save_excel(
|
|
98
|
+
data=stats_data_df,
|
|
99
|
+
data_source=data_source,
|
|
100
|
+
country=first_row["country"],
|
|
101
|
+
tournament=first_row["tournament"],
|
|
102
|
+
season=None,
|
|
103
|
+
week_number=None
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
return stats_data_df
|
|
107
|
+
|
|
108
|
+
except WebDriverException as e:
|
|
109
|
+
raise RuntimeError(f"Selenium WebDriver error: {str(e)}")
|
|
110
|
+
except Exception as e:
|
|
111
|
+
raise RuntimeError(f"Unexpected error while fetching team stats data: {e.__class__.__name__} - {e}")
|
|
112
|
+
finally:
|
|
113
|
+
if webdriver_instance:
|
|
114
|
+
webdriver_instance.quit()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: datafc
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: A scalable Python library for fetching, processing, and exporting structured football match data.
|
|
5
5
|
Home-page: https://github.com/urazakgul/datafc
|
|
6
6
|
Author: Uraz Akgül
|
|
@@ -14,7 +14,7 @@ Requires-Python: >=3.8
|
|
|
14
14
|
Description-Content-Type: text/markdown
|
|
15
15
|
License-File: LICENSE
|
|
16
16
|
|
|
17
|
-
# datafc v1.
|
|
17
|
+
# datafc v1.5.0
|
|
18
18
|
|
|
19
19
|
## Overview
|
|
20
20
|
|
|
@@ -53,7 +53,7 @@ pip install git+https://github.com/urazakgul/datafc.git
|
|
|
53
53
|
To install a specific version of `datafc`, use:
|
|
54
54
|
|
|
55
55
|
```bash
|
|
56
|
-
pip install datafc==1.
|
|
56
|
+
pip install datafc==1.5.0
|
|
57
57
|
```
|
|
58
58
|
|
|
59
59
|
If you already have `datafc` installed and want to upgrade to the latest version, run:
|
|
@@ -111,7 +111,10 @@ from datafc.sofascore import (
|
|
|
111
111
|
substitutions_data,
|
|
112
112
|
goal_networks_data,
|
|
113
113
|
shots_data,
|
|
114
|
-
standings_data
|
|
114
|
+
standings_data,
|
|
115
|
+
team_stats_data,
|
|
116
|
+
player_stats_data,
|
|
117
|
+
squad_data
|
|
115
118
|
)
|
|
116
119
|
```
|
|
117
120
|
|
|
@@ -141,10 +144,22 @@ The `lineups_data` function fetches player lineup details for each match and is
|
|
|
141
144
|
|
|
142
145
|
Without `lineups_data`, these dependent functions will not work as expected.
|
|
143
146
|
|
|
147
|
+
### `standings_data`: A Foundation for Team and Player-Level Functions
|
|
148
|
+
|
|
144
149
|
Exception: `standings_data` and `past_matches_data`
|
|
145
150
|
|
|
146
151
|
Unlike other functions, `standings_data` and `past_matches_data` do not require `match_data` or `lineups_data`. They can be executed independently using only `tournament_id` and `season_id`. Additionally, `past_matches_data` includes an extra field: `week_number`.
|
|
147
152
|
|
|
153
|
+
However, `standings_data` serves as a critical dependency for the following functions:
|
|
154
|
+
|
|
155
|
+
* `team_stats_data`
|
|
156
|
+
* `player_stats_data`
|
|
157
|
+
* `squad_data`
|
|
158
|
+
|
|
159
|
+
These functions rely on team-level metadata (such as `team_id`) provided by `standings_data` to fetch more granular data. Ensure that `standings_data` is successfully executed and includes teams with `category == 'Total'` before calling any of the above functions.
|
|
160
|
+
|
|
161
|
+
`past_matches_data` also works independently and includes an extra field: `week_number`.
|
|
162
|
+
|
|
148
163
|
### Match Data
|
|
149
164
|
|
|
150
165
|
#### `match_data`
|
|
@@ -864,8 +879,182 @@ Dependencies:
|
|
|
864
879
|
|
|
865
880
|
* No prior function dependency required.
|
|
866
881
|
|
|
882
|
+
### Team Statistics Data
|
|
883
|
+
|
|
884
|
+
#### `team_stats_data`
|
|
885
|
+
|
|
886
|
+
The `team_stats_data` function fetches detailed statistical data for each team in a given tournament and season, based on the team list provided by `standings_data`.
|
|
887
|
+
|
|
888
|
+
Note: This function requires the output of `standings_data` and only processes rows where `category == 'Total'`.
|
|
889
|
+
|
|
890
|
+
Example Usage:
|
|
891
|
+
|
|
892
|
+
```python
|
|
893
|
+
standings_df = standings_data(
|
|
894
|
+
tournament_id=52,
|
|
895
|
+
season_id=63814,
|
|
896
|
+
data_source="sofascore"
|
|
897
|
+
)
|
|
898
|
+
|
|
899
|
+
team_stats_df = team_stats_data(
|
|
900
|
+
standings_df=standings_df,
|
|
901
|
+
tournament_id=52,
|
|
902
|
+
season_id=63814,
|
|
903
|
+
data_source="sofascore",
|
|
904
|
+
enable_json_export=True,
|
|
905
|
+
enable_excel_export=True
|
|
906
|
+
)
|
|
907
|
+
|
|
908
|
+
print(team_stats_df)
|
|
909
|
+
```
|
|
910
|
+
|
|
911
|
+
Parameters:
|
|
912
|
+
|
|
913
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on each team, typically returned by standings_data.
|
|
914
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
915
|
+
* `season_id` (int): The unique identifier for the season.
|
|
916
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
917
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
918
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
919
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
920
|
+
|
|
921
|
+
Data Structure:
|
|
922
|
+
|
|
923
|
+
The returned DataFrame includes the following columns:
|
|
924
|
+
|
|
925
|
+
* `country`: The country where the tournament is held.
|
|
926
|
+
* `tournament`: The name of the tournament.
|
|
927
|
+
* `team_name`: The name of the team.
|
|
928
|
+
* `team_id`: The unique identifier of the team.
|
|
929
|
+
* `stat`: The name of the statistic.
|
|
930
|
+
* `value`: The value of the statistic.
|
|
931
|
+
|
|
932
|
+
Dependencies:
|
|
933
|
+
|
|
934
|
+
* Requires `standings_data` output as `standings_df`.
|
|
935
|
+
|
|
936
|
+
### Player Statistics Data
|
|
937
|
+
|
|
938
|
+
#### `player_stats_data`
|
|
939
|
+
|
|
940
|
+
The `player_stats_data` function fetches top player statistics for each team in the given standings dataset. It processes player-level metrics like goals, assists, duels won, and more.
|
|
941
|
+
|
|
942
|
+
Note: This function requires the output of `standings_data`, and filters for rows where `category == 'Total'`.
|
|
943
|
+
|
|
944
|
+
Example Usage:
|
|
945
|
+
|
|
946
|
+
```python
|
|
947
|
+
standings_df = standings_data(
|
|
948
|
+
tournament_id=52,
|
|
949
|
+
season_id=63814,
|
|
950
|
+
data_source="sofascore"
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
player_stats_df = player_stats_data(
|
|
954
|
+
standings_df=standings_df,
|
|
955
|
+
tournament_id=52,
|
|
956
|
+
season_id=63814,
|
|
957
|
+
data_source="sofascore",
|
|
958
|
+
enable_json_export=True,
|
|
959
|
+
enable_excel_export=True
|
|
960
|
+
)
|
|
961
|
+
|
|
962
|
+
print(player_stats_df)
|
|
963
|
+
```
|
|
964
|
+
|
|
965
|
+
Parameters:
|
|
966
|
+
|
|
967
|
+
* `standings_df` (pd.DataFrame): A DataFrame with metadata on teams, returned by standings_data.
|
|
968
|
+
* `tournament_id` (int): The unique identifier for the tournament.
|
|
969
|
+
* `season_id` (int): The unique identifier for the season.
|
|
970
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
971
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
972
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
973
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
974
|
+
|
|
975
|
+
Data Structure:
|
|
976
|
+
|
|
977
|
+
The returned DataFrame includes the following columns:
|
|
978
|
+
|
|
979
|
+
* `country`: The country where the tournament is held.
|
|
980
|
+
* `tournament`: The name of the tournament.
|
|
981
|
+
* `team_name`: The name of the team.
|
|
982
|
+
* `team_id`: The unique identifier of the team.
|
|
983
|
+
* `player_name`: The name of the player.
|
|
984
|
+
* `player_id`: The unique identifier of the player.
|
|
985
|
+
* `position`: The player’s position.
|
|
986
|
+
* `stat_name`: The name of the statistic.
|
|
987
|
+
* `stat_value`: The value of the statistic.
|
|
988
|
+
|
|
989
|
+
Dependencies:
|
|
990
|
+
|
|
991
|
+
* Requires `standings_data` output as `standings_df`.
|
|
992
|
+
|
|
993
|
+
### Squad Data
|
|
994
|
+
|
|
995
|
+
#### `squad_data`
|
|
996
|
+
|
|
997
|
+
The `squad_data` function fetches detailed squad (roster) information for each team listed in the provided standings dataset. It includes player bio data such as age, height, position, market value, and contract info.
|
|
998
|
+
|
|
999
|
+
Note: This function requires the output of `standings_data`, and only processes rows where `category == 'Total'`.
|
|
1000
|
+
|
|
1001
|
+
Example Usage:
|
|
1002
|
+
|
|
1003
|
+
```python
|
|
1004
|
+
standings_df = standings_data(
|
|
1005
|
+
tournament_id=52,
|
|
1006
|
+
season_id=63814,
|
|
1007
|
+
data_source="sofascore"
|
|
1008
|
+
)
|
|
1009
|
+
|
|
1010
|
+
squad_df = squad_data(
|
|
1011
|
+
standings_df=standings_df,
|
|
1012
|
+
data_source="sofascore",
|
|
1013
|
+
enable_json_export=True,
|
|
1014
|
+
enable_excel_export=True
|
|
1015
|
+
)
|
|
1016
|
+
|
|
1017
|
+
print(squad_df)
|
|
1018
|
+
```
|
|
1019
|
+
|
|
1020
|
+
Parameters:
|
|
1021
|
+
|
|
1022
|
+
* `standings_df` (pd.DataFrame): A DataFrame with team metadata, returned by standings_data.
|
|
1023
|
+
* `data_source` (str): The data source (`sofavpn` or `sofascore`). Defaults to `"sofascore"`.
|
|
1024
|
+
* `element_load_timeout` (int): Maximum time (in seconds) to wait for the API response. Defaults to `10`.
|
|
1025
|
+
* `enable_json_export` (bool): If `True`, exports the data as a JSON file. Defaults to `False`.
|
|
1026
|
+
* `enable_excel_export` (bool): If `True`, exports the data as an Excel file. Defaults to `False`.
|
|
1027
|
+
|
|
1028
|
+
Data Structure:
|
|
1029
|
+
|
|
1030
|
+
The returned DataFrame includes the following columns:
|
|
1031
|
+
|
|
1032
|
+
* `country`: The country where the tournament is held.
|
|
1033
|
+
* `tournament`: The name of the tournament.
|
|
1034
|
+
* `team_name`: The name of the team.
|
|
1035
|
+
* `team_id`: The unique identifier of the team.
|
|
1036
|
+
* `player_name`: The name of the player.
|
|
1037
|
+
* `player_id`: The unique identifier of the player.
|
|
1038
|
+
* `age`: The date of birth timestamp (UNIX format).
|
|
1039
|
+
* `height`: The height of the player.
|
|
1040
|
+
* `player_country`: The nationality of the player.
|
|
1041
|
+
* `position`: The position of the player.
|
|
1042
|
+
* `preferred_foot`: The preferred foot of the player.
|
|
1043
|
+
* `contract_until`: The contract end date (UNIX timestamp).
|
|
1044
|
+
* `market_value`: The market value of the player.
|
|
1045
|
+
* `market_currency`: The currency used for the market value.
|
|
1046
|
+
|
|
1047
|
+
Dependencies:
|
|
1048
|
+
|
|
1049
|
+
* Requires `standings_data` output as `standings_df`.
|
|
1050
|
+
|
|
867
1051
|
## Changelog
|
|
868
1052
|
|
|
1053
|
+
* v1.5.0
|
|
1054
|
+
* Added `team_stats_data` function to retrieve detailed per-team statistics using `standings_data`.
|
|
1055
|
+
* Added `player_stats_data` function to retrieve player-level top stats per team.
|
|
1056
|
+
* Added `squad_data` function to fetch full squad information including bio and market value.
|
|
1057
|
+
|
|
869
1058
|
* v1.4.0
|
|
870
1059
|
* Added `tournament_type` and `tournament_stage` parameters to `match_data` and `past_matches_data` functions.
|
|
871
1060
|
* Extended support for UEFA tournaments, including UEFA Champions League (UCL), UEFA Europa League (UEL), UEFA Europa Conference League (UECL), and UEFA Nations League (UNL), allowing seamless data fetching across multiple competitions.
|
|
@@ -16,9 +16,12 @@ datafc/sofascore/fetch_match_odds_data.py
|
|
|
16
16
|
datafc/sofascore/fetch_match_stats_data.py
|
|
17
17
|
datafc/sofascore/fetch_momentum_data.py
|
|
18
18
|
datafc/sofascore/fetch_past_matches_data.py
|
|
19
|
+
datafc/sofascore/fetch_player_stats_data.py
|
|
19
20
|
datafc/sofascore/fetch_shots_data.py
|
|
21
|
+
datafc/sofascore/fetch_squad_data.py
|
|
20
22
|
datafc/sofascore/fetch_standings_data.py
|
|
21
23
|
datafc/sofascore/fetch_substitutions_data.py
|
|
24
|
+
datafc/sofascore/fetch_team_stats_data.py
|
|
22
25
|
datafc/utils/__init__.py
|
|
23
26
|
datafc/utils/_config.py
|
|
24
27
|
datafc/utils/_save_files.py
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="datafc",
|
|
8
|
-
version="1.
|
|
8
|
+
version="1.5.0",
|
|
9
9
|
author="Uraz Akgül",
|
|
10
10
|
author_email="urazdev@gmail.com",
|
|
11
11
|
description="A scalable Python library for fetching, processing, and exporting structured football match data.",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|