rustat-python-api 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rustat-python-api
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A Python wrapper for RuStat API
5
5
  Home-page: https://github.com/dailydaniel/rustat-python-api
6
6
  Author: Daniel Zholkovsky
@@ -5,6 +5,7 @@ from tqdm import tqdm
5
5
  import time
6
6
 
7
7
  from .urls import URLs
8
+ from .processing import processing
8
9
 
9
10
 
10
11
  class RuStatParser:
@@ -95,7 +96,7 @@ class RuStatParser:
95
96
  for row in data["data"]["row"]
96
97
  }
97
98
 
98
- def get_events(self, match_id: int) -> pd.DataFrame | None:
99
+ def get_events(self, match_id: int, process: bool = True) -> pd.DataFrame | None:
99
100
  data = self.resp2data(
100
101
  self.urls["events"].format(
101
102
  user=self.user,
@@ -112,6 +113,10 @@ class RuStatParser:
112
113
  numeric_columns = [column for column in self.numeric_columns if column in df.columns]
113
114
  df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce')
114
115
 
116
+ if process:
117
+ df['match_id'] = match_id
118
+ df = processing(df)
119
+
115
120
  return df
116
121
 
117
122
  def get_tracking(self, match_id: int) -> pd.DataFrame | None:
@@ -0,0 +1,161 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+
4
+
5
+ def process_list(x: pd.Series):
6
+ lst = x.dropna().unique().tolist()
7
+ # return str(lst)
8
+ if len(lst) == 1:
9
+ return lst[0]
10
+ elif len(lst) == 0:
11
+ return np.nan
12
+ else:
13
+ return lst
14
+
15
+
16
+ def gluing(df: pd.DataFrame) -> pd.DataFrame:
17
+ cols = ['player_id', 'half', 'second', 'pos_x', 'pos_y']
18
+
19
+ df_gb = df.groupby(cols).agg(process_list).reset_index()
20
+ df_gb['possession_number'] = df_gb['possession_number'].apply(
21
+ lambda x: max(x) if isinstance(x, list) else x
22
+ )
23
+ df_gb = df_gb.sort_values(by=['half', 'second', 'possession_number']).reset_index(drop=True)
24
+ return df_gb
25
+
26
+
27
+ def add_reciever(glued_df: pd.DataFrame) -> pd.DataFrame:
28
+ df = glued_df.copy()
29
+ df['receiver_id'] = df['player_id'].shift(1)
30
+ df['receiver_name'] = df['player_name'].shift(1)
31
+
32
+ mask = (
33
+ (df['action_name'] == 'Ball receiving')
34
+ & (df['pos_x'] == df['pos_dest_x'].shift(1))
35
+ & (df['pos_y'] == df['pos_dest_y'].shift(1))
36
+ & (df['team_id'] == df['team_id'].shift(1))
37
+ & (df['player_id'] != df['player_id'].shift(1))
38
+ & (df['possession_number'] == df['possession_number'].shift(1))
39
+ )
40
+
41
+ idx = df[mask].index
42
+ remaining_idx = df.drop(idx-1).index
43
+
44
+ df.loc[remaining_idx, 'receiver_id'] = np.nan
45
+ df.loc[remaining_idx, 'receiver_name'] = np.nan
46
+
47
+ df = df[df['action_name'] != 'Ball receiving'].reset_index(drop=True)
48
+
49
+ return df
50
+
51
+
52
+ def filter_data(df: pd.DataFrame) -> pd.DataFrame:
53
+ columns = [
54
+ 'player_name', 'team_name', 'half', 'second', 'action_name',
55
+ 'position_name', 'possession_number', 'pos_x', 'pos_y', 'pos_dest_x', 'pos_dest_y',
56
+ 'player_id', 'number', 'team_id', 'standart_name', 'possession_time',
57
+ 'opponent_id', 'opponent_name', 'opponent_team_id', 'opponent_team_name',
58
+ 'opponent_position_name', 'zone_name', 'zone_dest_name', 'len',
59
+ 'possession_team_id', 'possession_team_name', 'possession_name',
60
+ 'attack_status_name', 'attack_type_name', 'attack_flang_name',
61
+ 'attack_team_id', 'attack_team_name', 'attack_number',
62
+ 'body_name', 'gate_x', 'gate_y', 'assistant_id',
63
+ 'assistant_name', 'shot_type', 'touches', 'xg',
64
+ 'shot_handling', 'match_id', 'receiver_id', 'receiver_name'
65
+ ]
66
+
67
+ for column in columns:
68
+ if column not in df.columns:
69
+ df[column] = np.nan
70
+
71
+ return df[(~df['possession_number'].isna()) | (df['second'] != 0)][columns].reset_index(drop=True)
72
+
73
+
74
+ def tag2type(tags: list[str]) -> str:
75
+ tags = [tag.lower() for tag in tags]
76
+ tags_str = ', '.join(tags)
77
+
78
+ if 'pass' in tags_str or 'assist' in tags_str:
79
+ pass_tags = [tag for tag in tags if 'pass' in tag and tag != 'pass interception']
80
+ assist_tags = [tag for tag in tags if 'assist' in tag]
81
+ cross_tags = [tag for tag in tags if 'cross' in tag and tag != 'cross interception']
82
+
83
+ if len(pass_tags) > 0 or (len(assist_tags) > 0 and len(cross_tags) == 0):
84
+ return 'pass'
85
+
86
+ if 'cross' in tags_str:
87
+ cross_tags = [tag for tag in tags if 'cross' in tag and tag != 'cross interception']
88
+ pass_tags = [tag for tag in tags if 'pass' in tag and tag != 'pass interception']
89
+ assist_tags = [tag for tag in tags if 'assist' in tag]
90
+
91
+ if len(cross_tags) > 0 or (len(assist_tags) > 0 and len(pass_tags) == 0):
92
+ return 'cross'
93
+
94
+ if 'shot' in tags_str:
95
+ shot_tags = [
96
+ tag for tag in tags
97
+ if 'shot' in tag and tag != 'shot interception' and 'with a shot' not in tag
98
+ ]
99
+
100
+ if len(shot_tags) > 0:
101
+ return 'shot'
102
+
103
+ if 'dribbl' in tags_str:
104
+ return 'dribble'
105
+
106
+ if 'interception' in tags_str:
107
+ return 'interception'
108
+
109
+ if 'tackle' in tags_str:
110
+ return 'tackle'
111
+
112
+ if 'clearance' in tags_str:
113
+ return 'clearance'
114
+
115
+ if 'lost ball' in tags_str or 'bad ball control' in tags_str or 'mistake' in tags_str:
116
+ return 'lost ball'
117
+
118
+ if 'recovery' in tags_str:
119
+ return 'recovery'
120
+
121
+ if 'rebound' in tags_str:
122
+ return 'rebound'
123
+
124
+ if 'foul' in tags_str or 'yc, ' in tags_str or 'rc, ' in tags_str or 'rc for 2 yc' in tags_str or 'yc' == tags_str or 'rc' == tags_str:
125
+ return 'foul'
126
+
127
+ if 'challenge' in tags_str:
128
+ return 'challenge'
129
+
130
+ if 'own goal' in tags_str:
131
+ return 'own goal'
132
+
133
+ if 'save' in tags_str:
134
+ return 'save'
135
+
136
+ if 'chance created' in tags_str or 'goal' in tags_str or 'goal-scoring moment' in tags_str:
137
+ goal_tags = [tag for tag in tags if 'goal' == tag or 'goal-scoring moment' in tag or 'chance created' in tag]
138
+ if len(goal_tags) > 0:
139
+ return 'chance'
140
+
141
+ if 'opening' in tags_str:
142
+ return 'opening'
143
+
144
+ return 'other'
145
+
146
+
147
+ def tagging(df: pd.DataFrame) -> pd.DataFrame:
148
+ df = df.rename(columns={'action_name': 'tags'})
149
+ df['tags'] = df['tags'].apply(lambda x: x if isinstance(x, list) else [x])
150
+ df['action_type'] = df['tags'].apply(tag2type)
151
+
152
+ return df
153
+
154
+
155
+ def processing(df: pd.DataFrame) -> pd.DataFrame:
156
+ df = gluing(df)
157
+ df = add_reciever(df)
158
+ df = filter_data(df)
159
+ df = tagging(df)
160
+
161
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rustat-python-api
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: A Python wrapper for RuStat API
5
5
  Home-page: https://github.com/dailydaniel/rustat-python-api
6
6
  Author: Daniel Zholkovsky
@@ -4,6 +4,7 @@ pyproject.toml
4
4
  setup.py
5
5
  rustat_python_api/__init__.py
6
6
  rustat_python_api/parser.py
7
+ rustat_python_api/processing.py
7
8
  rustat_python_api/urls.py
8
9
  rustat_python_api.egg-info/PKG-INFO
9
10
  rustat_python_api.egg-info/SOURCES.txt
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='rustat-python-api',
5
- version='0.2.0',
5
+ version='0.3.0',
6
6
  description='A Python wrapper for RuStat API',
7
7
  long_description=open('README.md').read(),
8
8
  long_description_content_type='text/markdown',