rustat-python-api 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,29 @@
1
1
  from .dataloader import MatchInferLoader
2
2
  from .pc_adder import PitchControlAdder
3
3
  from .tr_adder import TrackingFeaturesAdder
4
+ from .shot_loader import ShotInferLoader
5
+ from .shot_features import (
6
+ build_shot_features,
7
+ identify_goalkeepers,
8
+ filter_shots,
9
+ compute_event_features,
10
+ compute_tracking_features,
11
+ compute_pitch_control,
12
+ SHOT_NUMERIC_COLS,
13
+ SHOT_CATEGORICAL_COLS,
14
+ )
4
15
 
5
16
  __all__ = [
6
17
  "PitchControlAdder",
7
18
  "TrackingFeaturesAdder",
8
- "MatchInferLoader"
19
+ "MatchInferLoader",
20
+ "ShotInferLoader",
21
+ "build_shot_features",
22
+ "identify_goalkeepers",
23
+ "filter_shots",
24
+ "compute_event_features",
25
+ "compute_tracking_features",
26
+ "compute_pitch_control",
27
+ "SHOT_NUMERIC_COLS",
28
+ "SHOT_CATEGORICAL_COLS",
9
29
  ]
@@ -0,0 +1,324 @@
1
+ """Shot-level feature engineering for xG.
2
+
3
+ Pure functions that take events + tracking + ball and return a DataFrame of
4
+ shot rows with event, goalkeeper, opponent, shooter and pitch-control features.
5
+
6
+ Shared between:
7
+ - training pipelines (models repo)
8
+ - xG inference server (models repo, src/api)
9
+ - clients that call DynamoLab (e.g. pdf-report-creator/plots/plot_config.py)
10
+
11
+ The contract: shot ordering and column names are stable, so a model trained
12
+ on cache built by `build_shot_features` can be served against features built
13
+ by the same function on inference data.
14
+ """
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ from ast import literal_eval
19
+
20
+
21
+ GOAL_CENTER = np.array([105.0, 34.0])
22
+ POST_LEFT = np.array([105.0, 34.0 - 3.66])
23
+ POST_RIGHT = np.array([105.0, 34.0 + 3.66])
24
+
25
+
26
+ SHOT_NUMERIC_COLS = [
27
+ 'pos_x', 'pos_y', 'distance_to_goal', 'goal_angle',
28
+ 'gk_projection', 'gk_shift',
29
+ 'opp_in_cone', 'opp_between',
30
+ 'opp_radius_3m', 'opp_radius_5m', 'opp_radius_10m',
31
+ 'nearest_opp_dist', 'shooter_speed',
32
+ 'pc_r3', 'pc_r5', 'pc_r10',
33
+ ]
34
+
35
+ SHOT_CATEGORICAL_COLS = ['body_name', 'shot_type', 'attack_type_name', 'standart_name']
36
+
37
+
38
+ def filter_shots(events: pd.DataFrame) -> pd.DataFrame:
39
+ events = events.copy()
40
+ events['tags'] = events['tags'].astype(str).apply(literal_eval)
41
+ events['sub_tags'] = events['sub_tags'].astype(str).apply(literal_eval)
42
+
43
+ mask = events['tags'].apply(lambda t: 'shot' in t or 'goal' in t)
44
+ shots = events[mask].copy()
45
+
46
+ shots['is_goal'] = shots['tags'].apply(lambda t: 'goal' in t).astype(int)
47
+
48
+ return shots.reset_index(drop=True)
49
+
50
+
51
+ def compute_event_features(shots: pd.DataFrame) -> pd.DataFrame:
52
+ shots = shots.copy()
53
+
54
+ dx = GOAL_CENTER[0] - shots['pos_x'].values
55
+ dy = GOAL_CENTER[1] - shots['pos_y'].values
56
+ shots['distance_to_goal'] = np.sqrt(dx ** 2 + dy ** 2)
57
+
58
+ shot_pos = shots[['pos_x', 'pos_y']].values
59
+ vec_left = POST_LEFT - shot_pos
60
+ vec_right = POST_RIGHT - shot_pos
61
+
62
+ dot = np.sum(vec_left * vec_right, axis=1)
63
+ mag_left = np.linalg.norm(vec_left, axis=1)
64
+ mag_right = np.linalg.norm(vec_right, axis=1)
65
+
66
+ cos_angle = np.clip(dot / (mag_left * mag_right + 1e-9), -1, 1)
67
+ shots['goal_angle'] = np.arccos(cos_angle)
68
+
69
+ return shots
70
+
71
+
72
+ def identify_goalkeepers(events: pd.DataFrame) -> dict[int, int]:
73
+ """Map team_id -> goalkeeper player_id from `position_name`."""
74
+ gk_rows = events[
75
+ events['position_name'].astype(str).str.contains('Goalkeeper', case=False, na=False)
76
+ ][['team_id', 'player_id']].drop_duplicates()
77
+
78
+ gk_ids = {}
79
+ for _, row in gk_rows.iterrows():
80
+ tid = int(row['team_id'])
81
+ pid = int(row['player_id'])
82
+ gk_ids[tid] = pid
83
+ return gk_ids
84
+
85
+
86
+ def _find_goalkeeper(tracking_at_shot: pd.DataFrame, team_id: int,
87
+ gk_ids: dict[int, int]) -> pd.Series | None:
88
+ opp_team = tracking_at_shot[tracking_at_shot['team_id'] != team_id]
89
+ opp_team_id = opp_team['team_id'].iloc[0] if len(opp_team) > 0 else None
90
+
91
+ if opp_team_id is not None and opp_team_id in gk_ids:
92
+ gk_row = opp_team[opp_team['player_id'] == gk_ids[opp_team_id]]
93
+ if len(gk_row) > 0:
94
+ return gk_row.iloc[0]
95
+
96
+ if len(opp_team) > 0:
97
+ return opp_team.loc[opp_team['pos_x'].idxmax()]
98
+
99
+ return None
100
+
101
+
102
+ def _get_tracking_at_time(tracking: pd.DataFrame, half: int, second: float) -> pd.DataFrame:
103
+ half_tracking = tracking[tracking['half'] == half]
104
+ if half_tracking.empty:
105
+ return half_tracking
106
+
107
+ closest_second = half_tracking.iloc[
108
+ (half_tracking['second'].values - second).astype(float).__abs__().argmin()
109
+ ]['second']
110
+
111
+ return half_tracking[half_tracking['second'] == closest_second]
112
+
113
+
114
+ def _build_side_by_half(tracking: pd.DataFrame) -> dict[int, dict[int, str]]:
115
+ side_1h = {}
116
+ for tid in tracking['team_id'].unique():
117
+ side_1h[tid] = tracking[tracking['team_id'] == tid]['side_1h'].iloc[0]
118
+
119
+ side_by_half = {1: {}, 2: {}}
120
+ for tid, side in side_1h.items():
121
+ side_by_half[1][tid] = side
122
+ side_by_half[2][tid] = 'right' if side == 'left' else 'left'
123
+
124
+ return side_by_half
125
+
126
+
127
+ def _normalize_snapshot(snapshot: pd.DataFrame, shooting_team_id: int,
128
+ side_by_half: dict, half: int) -> pd.DataFrame:
129
+ team_side = side_by_half[half].get(shooting_team_id)
130
+
131
+ if team_side == 'right':
132
+ snapshot = snapshot.copy()
133
+ snapshot['pos_x'] = 105.0 - snapshot['pos_x']
134
+ snapshot['pos_y'] = 68.0 - snapshot['pos_y']
135
+
136
+ return snapshot
137
+
138
+
139
+ def compute_tracking_features(
140
+ shots: pd.DataFrame,
141
+ tracking: pd.DataFrame,
142
+ gk_ids: dict[int, int],
143
+ ) -> pd.DataFrame:
144
+ shots = shots.copy()
145
+ side_by_half = _build_side_by_half(tracking)
146
+
147
+ gk_projection_vals = []
148
+ gk_shift_vals = []
149
+ opp_in_cone_vals = []
150
+ opp_between_vals = []
151
+ opp_radius_3_vals = []
152
+ opp_radius_5_vals = []
153
+ opp_radius_10_vals = []
154
+ nearest_opp_dist_vals = []
155
+ shooter_speed_vals = []
156
+
157
+ for _, shot in shots.iterrows():
158
+ half = shot['half']
159
+ second = shot['second']
160
+ team_id = shot['team_id']
161
+ pos = np.array([shot['pos_x'], shot['pos_y']])
162
+
163
+ snapshot = _get_tracking_at_time(tracking, half, second)
164
+
165
+ if snapshot.empty:
166
+ gk_projection_vals.append(np.nan)
167
+ gk_shift_vals.append(np.nan)
168
+ opp_in_cone_vals.append(np.nan)
169
+ opp_between_vals.append(np.nan)
170
+ opp_radius_3_vals.append(np.nan)
171
+ opp_radius_5_vals.append(np.nan)
172
+ opp_radius_10_vals.append(np.nan)
173
+ nearest_opp_dist_vals.append(np.nan)
174
+ shooter_speed_vals.append(np.nan)
175
+ continue
176
+
177
+ snapshot = _normalize_snapshot(snapshot, int(team_id), side_by_half, half)
178
+
179
+ opponents = snapshot[snapshot['team_id'] != team_id]
180
+ opp_pos = opponents[['pos_x', 'pos_y']].values.astype(float)
181
+
182
+ gk = _find_goalkeeper(snapshot, team_id, gk_ids)
183
+
184
+ if gk is not None:
185
+ gk_pos = np.array([float(gk['pos_x']), float(gk['pos_y'])])
186
+
187
+ vec_gb = pos - GOAL_CENTER
188
+ vec_gk = gk_pos - GOAL_CENTER
189
+ dot_proj = np.dot(vec_gk, vec_gb)
190
+ dot_gb = np.dot(vec_gb, vec_gb)
191
+ projection = dot_proj / (dot_gb + 1e-9)
192
+ projection = np.clip(projection, 0, 1)
193
+ gk_projection_vals.append(projection)
194
+
195
+ ball_side = np.sign(pos[1] - GOAL_CENTER[1])
196
+ if ball_side == 0:
197
+ ball_side = 1.0
198
+ gk_shift = (gk_pos[1] - GOAL_CENTER[1]) * ball_side
199
+ gk_shift_vals.append(gk_shift)
200
+ else:
201
+ gk_projection_vals.append(np.nan)
202
+ gk_shift_vals.append(np.nan)
203
+
204
+ if len(opp_pos) > 0:
205
+ opp_between = int(np.sum(opp_pos[:, 0] > pos[0]))
206
+ opp_between_vals.append(opp_between)
207
+
208
+ dists = np.sqrt(np.sum((opp_pos - pos) ** 2, axis=1))
209
+
210
+ opp_radius_3_vals.append(int(np.sum(dists < 3)))
211
+ opp_radius_5_vals.append(int(np.sum(dists < 5)))
212
+ opp_radius_10_vals.append(int(np.sum(dists < 10)))
213
+ nearest_opp_dist_vals.append(float(np.min(dists)))
214
+
215
+ vec_to_left = POST_LEFT - pos
216
+ vec_to_right = POST_RIGHT - pos
217
+
218
+ def _cross_2d(a, b):
219
+ return a[0] * b[1] - a[1] * b[0]
220
+
221
+ in_cone = 0
222
+ for op in opp_pos:
223
+ if op[0] <= pos[0]:
224
+ continue
225
+ vec_to_opp = op - pos
226
+ cross_left = _cross_2d(vec_to_left, vec_to_opp)
227
+ cross_right = _cross_2d(vec_to_right, vec_to_opp)
228
+ if cross_left * cross_right <= 0:
229
+ in_cone += 1
230
+ opp_in_cone_vals.append(in_cone)
231
+ else:
232
+ opp_between_vals.append(0)
233
+ opp_radius_3_vals.append(0)
234
+ opp_radius_5_vals.append(0)
235
+ opp_radius_10_vals.append(0)
236
+ nearest_opp_dist_vals.append(np.nan)
237
+ opp_in_cone_vals.append(0)
238
+
239
+ shooter_id = shot.get('player_id')
240
+ if pd.notna(shooter_id):
241
+ shooter_track = snapshot[snapshot['player_id'] == int(shooter_id)]
242
+ if len(shooter_track) > 0 and 'speed' in shooter_track.columns:
243
+ shooter_speed_vals.append(float(shooter_track.iloc[0]['speed']))
244
+ else:
245
+ shooter_speed_vals.append(np.nan)
246
+ else:
247
+ shooter_speed_vals.append(np.nan)
248
+
249
+ shots['gk_projection'] = gk_projection_vals
250
+ shots['gk_shift'] = gk_shift_vals
251
+ shots['opp_in_cone'] = opp_in_cone_vals
252
+ shots['opp_between'] = opp_between_vals
253
+ shots['opp_radius_3m'] = opp_radius_3_vals
254
+ shots['opp_radius_5m'] = opp_radius_5_vals
255
+ shots['opp_radius_10m'] = opp_radius_10_vals
256
+ shots['nearest_opp_dist'] = nearest_opp_dist_vals
257
+ shots['shooter_speed'] = shooter_speed_vals
258
+
259
+ return shots
260
+
261
+
262
+ def compute_pitch_control(
263
+ shots: pd.DataFrame,
264
+ tracking: pd.DataFrame,
265
+ ball: pd.DataFrame,
266
+ device: str = "cpu",
267
+ backend: str = "pt",
268
+ ) -> pd.DataFrame:
269
+ from .pc_adder import PitchControlAdder
270
+
271
+ shots = shots.copy()
272
+
273
+ try:
274
+ tracking = tracking.copy()
275
+ tracking['team_id'] = tracking['team_id'].astype(str)
276
+ shots['team_id'] = shots['team_id'].astype(int)
277
+
278
+ pc_adder = PitchControlAdder(shots, tracking, ball, device=device, backend=backend)
279
+ pc_adder.run(modes=['mean'] * 3, rads=[3, 5, 10])
280
+ shots = pc_adder.events
281
+
282
+ rename_map = {}
283
+ for col in shots.columns:
284
+ if col.startswith('pc_mean_src_r'):
285
+ r = col.replace('pc_mean_src_r', '')
286
+ rename_map[col] = f'pc_r{r}'
287
+ shots = shots.rename(columns=rename_map)
288
+
289
+ dest_cols = [c for c in shots.columns if 'pc_mean_dest' in c]
290
+ shots = shots.drop(columns=dest_cols, errors='ignore')
291
+ except Exception as e:
292
+ print(f" [WARNING] Pitch control failed: {e}")
293
+ shots['pc_r3'] = np.nan
294
+ shots['pc_r5'] = np.nan
295
+ shots['pc_r10'] = np.nan
296
+
297
+ return shots
298
+
299
+
300
+ def build_shot_features(
301
+ events: pd.DataFrame,
302
+ tracking: pd.DataFrame,
303
+ ball: pd.DataFrame,
304
+ gk_ids: dict[int, int] | None = None,
305
+ device: str = "cpu",
306
+ backend: str = "pt",
307
+ ) -> pd.DataFrame:
308
+ """Full pipeline: filter shots -> event features -> tracking features -> pitch control.
309
+
310
+ If `gk_ids` is None, goalkeepers are identified from `events['position_name']`.
311
+ """
312
+ if gk_ids is None:
313
+ gk_ids = identify_goalkeepers(events)
314
+
315
+ shots = filter_shots(events)
316
+
317
+ if shots.empty:
318
+ return shots
319
+
320
+ shots = compute_event_features(shots)
321
+ shots = compute_tracking_features(shots, tracking, gk_ids)
322
+ shots = compute_pitch_control(shots, tracking, ball, device=device, backend=backend)
323
+
324
+ return shots
@@ -0,0 +1,49 @@
1
+ """Client-side shot loader for xG inference.
2
+
3
+ Mirrors `MatchInferLoader` but produces a shot-level DataFrame (`shots_tr`)
4
+ ready to be sent to `DynamoLab.run_model('xG (tracking)', ...)`. Preserves
5
+ the original event index in `orig_index`, so the caller can scatter the
6
+ returned xG column back onto the full events DataFrame:
7
+
8
+ events.loc[shots_tr['orig_index'], 'xG'] = shots_tr['xG']
9
+ """
10
+
11
+ import pandas as pd
12
+
13
+ from .shot_features import build_shot_features, identify_goalkeepers
14
+
15
+
16
+ class ShotInferLoader:
17
+ def __init__(
18
+ self,
19
+ events: pd.DataFrame, tracking: pd.DataFrame, ball: pd.DataFrame,
20
+ gk_ids: dict[int, int] | None = None,
21
+ device: str = "cpu", backend: str = "pt",
22
+ ):
23
+ self.events = events
24
+ self.tracking = tracking
25
+ self.ball = ball
26
+ self.gk_ids = gk_ids
27
+ self.device = device
28
+ self.backend = backend
29
+
30
+ self.shots: pd.DataFrame | None = None
31
+
32
+ def _save_index(self) -> None:
33
+ self.events = self.events.copy()
34
+ self.events['orig_index'] = self.events.index
35
+
36
+ def _build_shots(self) -> None:
37
+ gk_ids = self.gk_ids if self.gk_ids is not None else identify_goalkeepers(self.events)
38
+ self.shots = build_shot_features(
39
+ self.events, self.tracking, self.ball, gk_ids,
40
+ device=self.device, backend=self.backend,
41
+ )
42
+
43
+ def fit(self, inplace: bool = False) -> pd.DataFrame | None:
44
+ self._save_index()
45
+ self._build_shots()
46
+
47
+ if not inplace:
48
+ return self.shots
49
+ return None
@@ -461,14 +461,6 @@ class PitchControl:
461
461
  else:
462
462
  Sigma_inv = torch.linalg.inv(Sigma + eye)
463
463
 
464
- # Mask out invalid players by zeroing their Sigma_inv
465
- # This ensures their influence contribution is exactly 0
466
- Sigma_inv = torch.where(
467
- valid_mask.unsqueeze(-1).unsqueeze(-1), # (F,P,1,1)
468
- Sigma_inv,
469
- torch.zeros_like(Sigma_inv),
470
- )
471
-
472
464
  mu = pos_t_clean + 0.5 * sxy # (F,P,2)
473
465
 
474
466
  diff = locs.view(1, 1, -1, 2) # (1,1,N,2)
@@ -483,6 +475,13 @@ class PitchControl:
483
475
  maha = torch.nan_to_num(maha, nan=1e9, posinf=1e9, neginf=1e9)
484
476
  out = torch.exp(-0.5 * maha) # (F,P,N)
485
477
 
478
+ # Zero out invalid players (exp(0)=1 would inflate influence)
479
+ out = torch.where(
480
+ valid_mask.unsqueeze(-1), # (F,P,1)
481
+ out,
482
+ torch.zeros_like(out),
483
+ )
484
+
486
485
  return out.sum(dim=1) # sum over players
487
486
 
488
487
  @staticmethod
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: rustat-python-api
3
- Version: 0.9.0
3
+ Version: 0.10.0
4
4
  Summary: A Python wrapper for RuStat API
5
5
  Home-page: https://github.com/dailydaniel/rustat-python-api
6
6
  Author: Daniel Zholkovsky
@@ -2,17 +2,19 @@ rustat_python_api/__init__.py,sha256=Ij-PAm2y5ss_XAZhKTZus35cRPLzvXFyIswDa_Iq3rs
2
2
  rustat_python_api/config.py,sha256=eMvi1p8Cfvnbp6Cd4bBOwgehVN7thKnaQV5uzWyGZXM,1844
3
3
  rustat_python_api/models_api.py,sha256=oHXEqeCupvZwjVEdoxf7W9LP7ELFKA8-9DuRXpQHLno,1701
4
4
  rustat_python_api/parser.py,sha256=hMPZER6CwPk_dm8GkLulq6G0_jwOyRiZjbbDw3eO1Rc,10863
5
- rustat_python_api/pitch_control.py,sha256=XSakx2KxDAvv17qFoBtsBBIEYGn3OqI_3q3Xfq3_OoA,31688
5
+ rustat_python_api/pitch_control.py,sha256=SCef50coXfkZUmfQJbgHurmwmh3_GWwvpFas1X5QwP8,31621
6
6
  rustat_python_api/processing.py,sha256=sjZdjSs2BKyM_f7T_Utu1CJSF1l-V8UsWrAj7dhqyNg,2930
7
7
  rustat_python_api/urls.py,sha256=iJTD31T6OyXPAhmhViwFXVehrzwsOjBDONA1SIVc_40,1068
8
8
  rustat_python_api/kernels/__init__.py,sha256=eFJ-BMY8VcNZSjf3XjOnZf_nfOQ5t-7Lp57DPCHYOo0,124
9
9
  rustat_python_api/kernels/maha.py,sha256=k2PqY6VghgER2j9QH8xGYq61JLfPaHjirLXb4aLnjQw,2591
10
- rustat_python_api/matching/__init__.py,sha256=QIqkft_NVban8p0rxJsim3x4defZN3eQBqZfWcm0Acs,217
10
+ rustat_python_api/matching/__init__.py,sha256=K6MYaVQuPXD9ybgJBap_PFsLVmJa0-yTBdDEAO8_vBo,739
11
11
  rustat_python_api/matching/dataloader.py,sha256=FZ95ZC5Z-6FrZEVy-d5S1rXt5nIBwYMqTT0hvFlc4mI,2753
12
12
  rustat_python_api/matching/pc_adder.py,sha256=oP84iplIqmqF1jFDN9ernkjKw7wgsmWbuOVPEsoTNWM,7582
13
+ rustat_python_api/matching/shot_features.py,sha256=sSMm1fM1Ob2kuWD1Ltc679TuSCAtyHwtqRpMNF4okk4,10890
14
+ rustat_python_api/matching/shot_loader.py,sha256=2cvJ1Q63QMdYWsYA9-PVYY9DZx6tbbRpW_LXB_WDBt0,1581
13
15
  rustat_python_api/matching/tr_adder.py,sha256=0sAjYfoLitScDhg2yjh2PIKWTseoaSeUa5chPnVau-Q,16025
14
- rustat_python_api-0.9.0.dist-info/LICENSE,sha256=4Cohqg5p6Mq1xyrzdEX8AvFSA62GSVvapEOr2xK_tgY,57
15
- rustat_python_api-0.9.0.dist-info/METADATA,sha256=9_I5OFJ6m9tWDX4g5elAfjNgXpAIbLyJ6kdrVHEm9aE,1920
16
- rustat_python_api-0.9.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
17
- rustat_python_api-0.9.0.dist-info/top_level.txt,sha256=VK0hmkKZE9YThxolUcoE6JtGI67NFeKJMBLuet8kI4w,18
18
- rustat_python_api-0.9.0.dist-info/RECORD,,
16
+ rustat_python_api-0.10.0.dist-info/LICENSE,sha256=4Cohqg5p6Mq1xyrzdEX8AvFSA62GSVvapEOr2xK_tgY,57
17
+ rustat_python_api-0.10.0.dist-info/METADATA,sha256=y5uyWPoC1Rq6OK0dWA7jt-yWJQAzZx5XBhoaxZNGtQc,1921
18
+ rustat_python_api-0.10.0.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
19
+ rustat_python_api-0.10.0.dist-info/top_level.txt,sha256=VK0hmkKZE9YThxolUcoE6JtGI67NFeKJMBLuet8kI4w,18
20
+ rustat_python_api-0.10.0.dist-info/RECORD,,