rustat-python-api 0.3.0__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rustat-python-api
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: A Python wrapper for RuStat API
5
5
  Home-page: https://github.com/dailydaniel/rustat-python-api
6
6
  Author: Daniel Zholkovsky
@@ -38,7 +38,7 @@ schedule = parser.get_schedule(team_id, season_id)
38
38
  keys = list(schedule.keys())
39
39
  match_id = keys[-1]
40
40
 
41
- events = parser.get_events(match_id)
41
+ events = parser.get_events(match_id, process=True)
42
42
 
43
43
  stats = parser.get_match_stats(match_id)
44
44
 
@@ -23,7 +23,7 @@ schedule = parser.get_schedule(team_id, season_id)
23
23
  keys = list(schedule.keys())
24
24
  match_id = keys[-1]
25
25
 
26
- events = parser.get_events(match_id)
26
+ events = parser.get_events(match_id, process=True)
27
27
 
28
28
  stats = parser.get_match_stats(match_id)
29
29
 
@@ -0,0 +1,37 @@
1
+ columns = [
2
+ 'player_name', 'team_name', 'half', 'second', 'action_id', 'action_name',
3
+ 'position_name', 'possession_number', 'pos_x', 'pos_y', 'pos_dest_x', 'pos_dest_y',
4
+ 'player_id', 'number', 'team_id', 'standart_name', 'possession_time',
5
+ 'opponent_id', 'opponent_name', 'opponent_team_id', 'opponent_team_name',
6
+ 'opponent_position_name', 'zone_name', 'zone_dest_name', 'len',
7
+ 'possession_team_id', 'possession_team_name', 'possession_name',
8
+ 'attack_status_name', 'attack_type_name', 'attack_flang_name',
9
+ 'attack_team_id', 'attack_team_name', 'attack_number',
10
+ 'body_name', 'gate_x', 'gate_y', 'assistant_id',
11
+ 'assistant_name', 'shot_type', 'touches', 'xg',
12
+ 'shot_handling', 'match_id', 'receiver_id', 'receiver_name'
13
+ ]
14
+
15
+ numeric_columns = [
16
+ 'id', 'number', 'player_id', 'team_id', 'half', 'second',
17
+ 'pos_x', 'pos_y', 'pos_dest_x', 'pos_dest_y', 'len', 'possession_id', 'possession_team_id',
18
+ 'opponent_id', 'opponent_team_id', 'zone_id', 'zone_dest_id',
19
+ 'possession_number', 'attack_status_id', 'attack_team_id', 'assistant_id', 'touches', 'xg'
20
+ ]
21
+
22
+ id2type = {
23
+ 1: 'pass', 2: 'duel', 3: 'foul',
24
+ 4: 'shot', 5: 'free kick', 6: 'interception',
25
+ 7: 'rebound', 8: 'goal', 9: 'clearance',
26
+ 10: 'bad ball control', 11: 'control', 12: 'attack',
27
+ 13: 'keeper', 14: 'substitution', 15: 'formation',
28
+ 16: 'player position', 17: 'ball off', 18: 'match status',
29
+ 19: 'mistake', 20: 'translation problem', 21: 'carry',
30
+ 22: 'receive', 23: 'goal attack involvement', 24: 'rating',
31
+ 25: 'average position', 26: 'cross', 27: 'ball out',
32
+ 28: 'other', 29: 'video', 30: 'bad mistake',
33
+ 31: 'bad keeper mistake', 32: 'goal moment', 33: 'team pressing',
34
+ 34: 'line up', 35: 'sync', 36: 'referee',
35
+ 37: 'insurance', 38: 'injury',
36
+ 128: 'staff', 161: 'sub player'
37
+ }
@@ -5,6 +5,7 @@ from tqdm import tqdm
5
5
  import time
6
6
 
7
7
  from .urls import URLs
8
+ from .config import numeric_columns
8
9
  from .processing import processing
9
10
 
10
11
 
@@ -16,13 +17,6 @@ class RuStatParser:
16
17
  urls: dict = URLs,
17
18
  sleep: int = -1
18
19
  ):
19
- self.numeric_columns = [
20
- 'id', 'number', 'player_id', 'team_id', 'half', 'second',
21
- 'pos_x', 'pos_y', 'pos_dest_x', 'pos_dest_y', 'len', 'possession_id', 'possession_team_id',
22
- 'opponent_id', 'opponent_team_id', 'zone_id', 'zone_dest_id',
23
- 'possession_number', 'attack_status_id', 'attack_team_id', 'assistant_id', 'touches', 'xg'
24
- ]
25
-
26
20
  self.user = user
27
21
  self.password = password
28
22
  self.urls = urls
@@ -110,8 +104,8 @@ class RuStatParser:
110
104
 
111
105
  df = pd.json_normalize(data["data"]["row"])
112
106
 
113
- numeric_columns = [column for column in self.numeric_columns if column in df.columns]
114
- df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
107
+ current_numeric_columns = [column for column in numeric_columns if column in df.columns]
108
+ df[current_numeric_columns] = df[current_numeric_columns].apply(pd.to_numeric, errors='coerce')
115
109
 
116
110
  if process:
117
111
  df['match_id'] = match_id
@@ -0,0 +1,82 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+ from .config import columns, id2type
5
+
6
+
7
+ def process_list(x: pd.Series):
8
+ lst = x.dropna().unique().tolist()
9
+ if len(lst) == 1:
10
+ return lst[0]
11
+ elif len(lst) == 0:
12
+ return np.nan
13
+ else:
14
+ return lst
15
+
16
+
17
+ def gluing(df: pd.DataFrame) -> pd.DataFrame:
18
+ cols = ['player_id', 'half', 'second', 'pos_x', 'pos_y']
19
+
20
+ df_gb = df.groupby(cols).agg(process_list).reset_index()
21
+ df_gb['possession_number'] = df_gb['possession_number'].apply(
22
+ lambda x: max(x) if isinstance(x, list) else x
23
+ )
24
+ df_gb = df_gb.sort_values(by=['half', 'second', 'possession_number']).reset_index(drop=True)
25
+ return df_gb
26
+
27
+
28
+ def add_reciever(glued_df: pd.DataFrame) -> pd.DataFrame:
29
+ df = glued_df.copy()
30
+ df['receiver_id'] = df['player_id'].shift(1)
31
+ df['receiver_name'] = df['player_name'].shift(1)
32
+
33
+ mask = (
34
+ (df['action_name'] == 'Ball receiving')
35
+ & (df['pos_x'] == df['pos_dest_x'].shift(1))
36
+ & (df['pos_y'] == df['pos_dest_y'].shift(1))
37
+ & (df['team_id'] == df['team_id'].shift(1))
38
+ & (df['player_id'] != df['player_id'].shift(1))
39
+ & (df['possession_number'] == df['possession_number'].shift(1))
40
+ )
41
+
42
+ idx = df[mask].index
43
+ remaining_idx = df.drop(idx-1).index
44
+
45
+ df.loc[remaining_idx, 'receiver_id'] = np.nan
46
+ df.loc[remaining_idx, 'receiver_name'] = np.nan
47
+
48
+ df = df[df['action_name'] != 'Ball receiving'].reset_index(drop=True)
49
+
50
+ return df
51
+
52
+
53
+ def filter_data(df: pd.DataFrame) -> pd.DataFrame:
54
+ for column in columns:
55
+ if column not in df.columns:
56
+ df[column] = np.nan
57
+
58
+ return df[(~df['possession_number'].isna()) | (df['second'] != 0)][columns].reset_index(drop=True)
59
+
60
+
61
+ def tagging(df: pd.DataFrame) -> pd.DataFrame:
62
+ df = df.rename(columns={'action_name': 'sub_tags', 'action_id': 'sub_tags_ids'})
63
+ df['sub_tags'] = df['sub_tags'].apply(lambda x: x if isinstance(x, list) else [x])
64
+ df['sub_tags_ids'] = df['sub_tags_ids'].apply(
65
+ lambda x:
66
+ list(set([int(t) // 1000 for t in x]))
67
+ if isinstance(x, list)
68
+ else [int(x) // 1000]
69
+ )
70
+ df['sub_tags_ids'] = df['sub_tags_ids'].apply(lambda x: [id2type[t] for t in x])
71
+ df = df.rename(columns={'sub_tags_ids': 'tags'})
72
+
73
+ return df
74
+
75
+
76
+ def processing(df: pd.DataFrame) -> pd.DataFrame:
77
+ df = gluing(df)
78
+ df = add_reciever(df)
79
+ df = filter_data(df)
80
+ df = tagging(df)
81
+
82
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rustat-python-api
3
- Version: 0.3.0
3
+ Version: 0.3.1
4
4
  Summary: A Python wrapper for RuStat API
5
5
  Home-page: https://github.com/dailydaniel/rustat-python-api
6
6
  Author: Daniel Zholkovsky
@@ -38,7 +38,7 @@ schedule = parser.get_schedule(team_id, season_id)
38
38
  keys = list(schedule.keys())
39
39
  match_id = keys[-1]
40
40
 
41
- events = parser.get_events(match_id)
41
+ events = parser.get_events(match_id, process=True)
42
42
 
43
43
  stats = parser.get_match_stats(match_id)
44
44
 
@@ -3,6 +3,7 @@ README.md
3
3
  pyproject.toml
4
4
  setup.py
5
5
  rustat_python_api/__init__.py
6
+ rustat_python_api/config.py
6
7
  rustat_python_api/parser.py
7
8
  rustat_python_api/processing.py
8
9
  rustat_python_api/urls.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='rustat-python-api',
5
- version='0.3.0',
5
+ version='0.3.1',
6
6
  description='A Python wrapper for RuStat API',
7
7
  long_description=open('README.md').read(),
8
8
  long_description_content_type='text/markdown',
@@ -1,161 +0,0 @@
1
- import pandas as pd
2
- import numpy as np
3
-
4
-
5
- def process_list(x: pd.Series):
6
- lst = x.dropna().unique().tolist()
7
- # return str(lst)
8
- if len(lst) == 1:
9
- return lst[0]
10
- elif len(lst) == 0:
11
- return np.nan
12
- else:
13
- return lst
14
-
15
-
16
- def gluing(df: pd.DataFrame) -> pd.DataFrame:
17
- cols = ['player_id', 'half', 'second', 'pos_x', 'pos_y']
18
-
19
- df_gb = df.groupby(cols).agg(process_list).reset_index()
20
- df_gb['possession_number'] = df_gb['possession_number'].apply(
21
- lambda x: max(x) if isinstance(x, list) else x
22
- )
23
- df_gb = df_gb.sort_values(by=['half', 'second', 'possession_number']).reset_index(drop=True)
24
- return df_gb
25
-
26
-
27
- def add_reciever(glued_df: pd.DataFrame) -> pd.DataFrame:
28
- df = glued_df.copy()
29
- df['receiver_id'] = df['player_id'].shift(1)
30
- df['receiver_name'] = df['player_name'].shift(1)
31
-
32
- mask = (
33
- (df['action_name'] == 'Ball receiving')
34
- & (df['pos_x'] == df['pos_dest_x'].shift(1))
35
- & (df['pos_y'] == df['pos_dest_y'].shift(1))
36
- & (df['team_id'] == df['team_id'].shift(1))
37
- & (df['player_id'] != df['player_id'].shift(1))
38
- & (df['possession_number'] == df['possession_number'].shift(1))
39
- )
40
-
41
- idx = df[mask].index
42
- remaining_idx = df.drop(idx-1).index
43
-
44
- df.loc[remaining_idx, 'receiver_id'] = np.nan
45
- df.loc[remaining_idx, 'receiver_name'] = np.nan
46
-
47
- df = df[df['action_name'] != 'Ball receiving'].reset_index(drop=True)
48
-
49
- return df
50
-
51
-
52
- def filter_data(df: pd.DataFrame) -> pd.DataFrame:
53
- columns = [
54
- 'player_name', 'team_name', 'half', 'second', 'action_name',
55
- 'position_name', 'possession_number', 'pos_x', 'pos_y', 'pos_dest_x', 'pos_dest_y',
56
- 'player_id', 'number', 'team_id', 'standart_name', 'possession_time',
57
- 'opponent_id', 'opponent_name', 'opponent_team_id', 'opponent_team_name',
58
- 'opponent_position_name', 'zone_name', 'zone_dest_name', 'len',
59
- 'possession_team_id', 'possession_team_name', 'possession_name',
60
- 'attack_status_name', 'attack_type_name', 'attack_flang_name',
61
- 'attack_team_id', 'attack_team_name', 'attack_number',
62
- 'body_name', 'gate_x', 'gate_y', 'assistant_id',
63
- 'assistant_name', 'shot_type', 'touches', 'xg',
64
- 'shot_handling', 'match_id', 'receiver_id', 'receiver_name'
65
- ]
66
-
67
- for column in columns:
68
- if column not in df.columns:
69
- df[column] = np.nan
70
-
71
- return df[(~df['possession_number'].isna()) | (df['second'] != 0)][columns].reset_index(drop=True)
72
-
73
-
74
- def tag2type(tags: list[str]) -> str:
75
- tags = [tag.lower() for tag in tags]
76
- tags_str = ', '.join(tags)
77
-
78
- if 'pass' in tags_str or 'assist' in tags_str:
79
- pass_tags = [tag for tag in tags if 'pass' in tag and tag != 'pass interception']
80
- assist_tags = [tag for tag in tags if 'assist' in tag]
81
- cross_tags = [tag for tag in tags if 'cross' in tag and tag != 'cross interception']
82
-
83
- if len(pass_tags) > 0 or (len(assist_tags) > 0 and len(cross_tags) == 0):
84
- return 'pass'
85
-
86
- if 'cross' in tags_str:
87
- cross_tags = [tag for tag in tags if 'cross' in tag and tag != 'cross interception']
88
- pass_tags = [tag for tag in tags if 'pass' in tag and tag != 'pass interception']
89
- assist_tags = [tag for tag in tags if 'assist' in tag]
90
-
91
- if len(cross_tags) > 0 or (len(assist_tags) > 0 and len(pass_tags) == 0):
92
- return 'cross'
93
-
94
- if 'shot' in tags_str:
95
- shot_tags = [
96
- tag for tag in tags
97
- if 'shot' in tag and tag != 'shot interception' and 'with a shot' not in tag
98
- ]
99
-
100
- if len(shot_tags) > 0:
101
- return 'shot'
102
-
103
- if 'dribbl' in tags_str:
104
- return 'dribble'
105
-
106
- if 'interception' in tags_str:
107
- return 'interception'
108
-
109
- if 'tackle' in tags_str:
110
- return 'tackle'
111
-
112
- if 'clearance' in tags_str:
113
- return 'clearance'
114
-
115
- if 'lost ball' in tags_str or 'bad ball control' in tags_str or 'mistake' in tags_str:
116
- return 'lost ball'
117
-
118
- if 'recovery' in tags_str:
119
- return 'recovery'
120
-
121
- if 'rebound' in tags_str:
122
- return 'rebound'
123
-
124
- if 'foul' in tags_str or 'yc, ' in tags_str or 'rc, ' in tags_str or 'rc for 2 yc' in tags_str or 'yc' == tags_str or 'rc' == tags_str:
125
- return 'foul'
126
-
127
- if 'challenge' in tags_str:
128
- return 'challenge'
129
-
130
- if 'own goal' in tags_str:
131
- return 'own goal'
132
-
133
- if 'save' in tags_str:
134
- return 'save'
135
-
136
- if 'chance created' in tags_str or 'goal' in tags_str or 'goal-scoring moment' in tags_str:
137
- goal_tags = [tag for tag in tags if 'goal' == tag or 'goal-scoring moment' in tag or 'chance created' in tag]
138
- if len(goal_tags) > 0:
139
- return 'chance'
140
-
141
- if 'opening' in tags_str:
142
- return 'opening'
143
-
144
- return 'other'
145
-
146
-
147
- def tagging(df: pd.DataFrame) -> pd.DataFrame:
148
- df = df.rename(columns={'action_name': 'tags'})
149
- df['tags'] = df['tags'].apply(lambda x: x if isinstance(x, list) else [x])
150
- df['action_type'] = df['tags'].apply(tag2type)
151
-
152
- return df
153
-
154
-
155
- def processing(df: pd.DataFrame) -> pd.DataFrame:
156
- df = gluing(df)
157
- df = add_reciever(df)
158
- df = filter_data(df)
159
- df = tagging(df)
160
-
161
- return df