impectPy 2.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
impectPy/xml.py ADDED
@@ -0,0 +1,990 @@
1
+ import numpy as np
2
+ import pandas as pd
3
+ import sys
4
+ from xml.etree import ElementTree as ET
5
+
6
+ ######
7
+ #
8
+ # This function returns an XML file from a given match event dataframe
9
+ #
10
+ ######
11
+
12
+ #define allowed KPIs, labels and codes
13
+ allowed_labels = [
14
+ {"order": "00 | ", "name": "eventId"},
15
+ {"order": "01 | ", "name": "matchId"},
16
+ {"order": "02 | ", "name": "periodId"},
17
+ {"order": "03 | ", "name": "phase"},
18
+ {"order": "04 | ", "name": "gameState"},
19
+ {"order": "05 | ", "name": "playerPosition"},
20
+ {"order": "06 | ", "name": "action"},
21
+ {"order": "07 | ", "name": "actionType"},
22
+ {"order": "08a | ", "name": "bodyPart"},
23
+ {"order": "08b | ", "name": "bodyPartExtended"},
24
+ {"order": "08c | ", "name": "previousPassHeight"},
25
+ {"order": "09 | ", "name": "actionTypeResult"},
26
+ {"order": "10 | ", "name": "startPackingZone"},
27
+ {"order": "11 | ", "name": "startPackingZoneGroup"},
28
+ {"order": "12 | ", "name": "startPitchPosition"},
29
+ {"order": "13 | ", "name": "startLane"},
30
+ {"order": "14 | ", "name": "endPackingZone"},
31
+ {"order": "15 | ", "name": "endPackingZoneGroup"},
32
+ {"order": "16 | ", "name": "endPitchPosition"},
33
+ {"order": "17 | ", "name": "endLane"},
34
+ {"order": "18 | ", "name": "opponents"},
35
+ {"order": "19 | ", "name": "pressure"},
36
+ {"order": "20 | ", "name": "pxTTeam"},
37
+ {"order": "21 | ", "name": "pressingPlayerName"},
38
+ {"order": "22 | ", "name": "duelType"},
39
+ {"order": "23 | ", "name": "duelPlayerName"},
40
+ {"order": "24 | ", "name": "fouledPlayerName"},
41
+ {"order": "25 | ", "name": "passDistance"},
42
+ {"order": "26 | ", "name": "passReceiverPlayerName"},
43
+ {"order": "27 | ", "name": "leadsToShot"},
44
+ {"order": "28 | ", "name": "leadsToGoal"},
45
+ {"order": "29 | ", "name": "squadName"},
46
+ {"order": "30 | ", "name": "playerName"},
47
+ {"order": "31 | ", "name": "pxTTeamStart"},
48
+ {"order": "32 | ", "name": "pxTTeamEnd"},
49
+ ]
50
+
51
+ allowed_kpis = [
52
+ {"order": "KPI: ", "name": "PXT_DELTA"},
53
+ {"order": "KPI: ", "name": "BYPASSED_OPPONENTS"},
54
+ {"order": "KPI: ", "name": "BYPASSED_DEFENDERS"},
55
+ {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_RECEIVING"},
56
+ {"order": "KPI: ", "name": "BYPASSED_DEFENDERS_RECEIVING"},
57
+ {"order": "KPI: ", "name": "BALL_LOSS_ADDED_OPPONENTS"},
58
+ {"order": "KPI: ", "name": "BALL_LOSS_REMOVED_TEAMMATES"},
59
+ {"order": "KPI: ", "name": "BALL_WIN_ADDED_TEAMMATES"},
60
+ {"order": "KPI: ", "name": "BALL_WIN_REMOVED_OPPONENTS"},
61
+ {"order": "KPI: ", "name": "REVERSE_PLAY_ADDED_OPPONENTS"},
62
+ {"order": "KPI: ", "name": "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS"},
63
+ {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_RAW"},
64
+ {"order": "KPI: ", "name": "BYPASSED_OPPONENTS_DEFENDERS_RAW"},
65
+ {"order": "KPI: ", "name": "SHOT_XG"},
66
+ {"order": "KPI: ", "name": "POSTSHOT_XG"},
67
+ {"order": "KPI: ", "name": "PACKING_XG"}
68
+ ]
69
+
70
+ allowed_codes = [
71
+ "playerName",
72
+ "squadName",
73
+ "actionType",
74
+ "action"
75
+ ]
76
+
77
+ # define allowed label/code combinations
78
+ combinations = {
79
+ "eventId": {"playerName": True, "team": False, "action": True, "actionType": True},
80
+ "matchId": {"playerName": True, "team": True, "action": True, "actionType": True},
81
+ "periodId": {"playerName": True, "team": True, "action": True, "actionType": True},
82
+ "phase": {"playerName": True, "team": False, "action": True, "actionType": True},
83
+ "gameState": {"playerName": True, "team": True, "action": True, "actionType": True},
84
+ "playerPosition": {"playerName": True, "team": False, "action": True, "actionType": True},
85
+ "action": {"playerName": True, "team": False, "action": False, "actionType": True},
86
+ "actionType": {"playerName": True, "team": False, "action": True, "actionType": False},
87
+ "bodyPart": {"playerName": True, "team": False, "action": True, "actionType": True},
88
+ "bodyPartExtended": {"playerName": True, "team": False, "action": True, "actionType": True},
89
+ "previousPassHeight": {"playerName": True, "team": False, "action": True, "actionType": True},
90
+ "actionTypeResult": {"playerName": True, "team": False, "action": True, "actionType": True},
91
+ "startPackingZone": {"playerName": True, "team": False, "action": True, "actionType": True},
92
+ "startPackingZoneGroup": {"playerName": True, "team": False, "action": True, "actionType": True},
93
+ "startPitchPosition": {"playerName": True, "team": False, "action": True, "actionType": True},
94
+ "startLane": {"playerName": True, "team": False, "action": True, "actionType": True},
95
+ "endPackingZone": {"playerName": True, "team": False, "action": True, "actionType": True},
96
+ "endPackingZoneGroup": {"playerName": True, "team": False, "action": True, "actionType": True},
97
+ "endPitchPosition": {"playerName": True, "team": False, "action": True, "actionType": True},
98
+ "endLane": {"playerName": True, "team": False, "action": True, "actionType": True},
99
+ "opponents": {"playerName": True, "team": False, "action": True, "actionType": True},
100
+ "pressure": {"playerName": True, "team": False, "action": True, "actionType": True},
101
+ "pxTTeam": {"playerName": True, "team": False, "action": True, "actionType": True},
102
+ "pressingPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True},
103
+ "duelType": {"playerName": True, "team": False, "action": True, "actionType": True},
104
+ "duelPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True},
105
+ "fouledPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True},
106
+ "passDistance": {"playerName": True, "team": False, "action": True, "actionType": True},
107
+ "passReceiverPlayerName": {"playerName": True, "team": False, "action": True, "actionType": True},
108
+ "leadsToShot": {"playerName": True, "team": True, "action": True, "actionType": True},
109
+ "leadsToGoal": {"playerName": True, "team": True, "action": True, "actionType": True},
110
+ "squadName": {"playerName": True, "team": False, "action": True, "actionType": True},
111
+ "playerName": {"playerName": False, "team": True, "action": True, "actionType": True},
112
+ "pxTTeamStart": {"playerName": False, "team": True, "action": False, "actionType": False},
113
+ "pxTTeamEnd": {"playerName": False, "team": True, "action": False, "actionType": False}
114
+ }
115
+
116
+
117
+ # define function to generate xml
118
+ def generateXML(
119
+ events: pd.DataFrame,
120
+ lead: int,
121
+ lag: int,
122
+ p1Start: int,
123
+ p2Start: int,
124
+ p3Start: int,
125
+ p4Start: int,
126
+ p5Start: int,
127
+ codeTag: str,
128
+ labels=None,
129
+ kpis=None,
130
+ labelSorting: bool = True,
131
+ sequencing: bool = True,
132
+ buckets: bool = True
133
+ ) -> ET.ElementTree:
134
+
135
+ # handle kpis and labels defaults
136
+ if labels is None:
137
+ labels = [label["name"] for label in allowed_labels if combinations.get(label.get("name")).get(codeTag)]
138
+ if kpis is None:
139
+ kpis = [kpi["name"] for kpi in allowed_kpis]
140
+
141
+ # check for invalid kpis
142
+ invalid_kpis = [kpi for kpi in kpis if kpi not in [kpi["name"] for kpi in allowed_kpis]]
143
+ if len(invalid_kpis) > 0:
144
+ raise ValueError(f"Invalid KPIs: {invalid_kpis}")
145
+
146
+ # check for invalid labels
147
+ invalid_labels = [lbl for lbl in labels if lbl not in [label["name"] for label in allowed_labels]]
148
+ if len(invalid_labels) > 0:
149
+ raise ValueError(f"Invalid Labels: {invalid_labels}")
150
+
151
+ # check for invalid code tag
152
+ if not codeTag in allowed_codes:
153
+ raise ValueError(f"Invalid Code: {codeTag}")
154
+
155
+ # keep only :
156
+ # - if KPI in kpis
157
+ # - if Label in labels
158
+ # - if code matches legend
159
+ labels_and_kpis = []
160
+ invalid_labels = []
161
+ for label in allowed_labels:
162
+ if label.get("name") in labels and label.get("name") != codeTag: # ensure code attribute is not repeated as a label
163
+ if combinations.get(label.get("name")).get(codeTag):
164
+ labels_and_kpis.append(label)
165
+ else:
166
+ invalid_labels.append(label.get("name"))
167
+
168
+ if len(invalid_labels) > 0:
169
+ raise ValueError(
170
+ f"With the selected code ('{codeTag}') following labels are invalid:\n{', '.join(invalid_labels)}"
171
+ )
172
+
173
+ for kpi in allowed_kpis:
174
+ if kpi.get("name") in kpis:
175
+ labels_and_kpis.append(kpi)
176
+
177
+ if labelSorting:
178
+ labels_and_kpis = sorted(labels_and_kpis, key=lambda x: x["order"])
179
+
180
+
181
+ # compile periods start times into dict
182
+ offsets = {
183
+ "p1": p1Start,
184
+ "p2": p2Start,
185
+ "p3": p3Start,
186
+ "p4": p4Start,
187
+ "p5": p5Start
188
+ }
189
+
190
+ # create empty dict to store bucket definitions for kpis
191
+ kpi_buckets = {}
192
+
193
+ # define bucket limits for kpis
194
+ buckets_packing = [
195
+ {"label": "[0,1[",
196
+ "min": 0,
197
+ "max": 1},
198
+ {"label": "[1,3[",
199
+ "min": 1,
200
+ "max": 3},
201
+ {"label": "[3,5[",
202
+ "min": 3,
203
+ "max": 5},
204
+ {"label": "[5,∞]",
205
+ "min": 5,
206
+ "max": 50}
207
+ ]
208
+
209
+ bucket_shotxg = [
210
+ {"label": "[0,0.02[",
211
+ "min": 0,
212
+ "max": 0.03},
213
+ {"label": "[0.02,0.05[",
214
+ "min": 0.03,
215
+ "max": 0.05},
216
+ {"label": "[0.05,0.1[",
217
+ "min": 0.05,
218
+ "max": 0.1},
219
+ {"label": "[0.1,0.15[",
220
+ "min": 0.1,
221
+ "max": 0.15},
222
+ {"label": "[0.15,1]",
223
+ "min": 0.15,
224
+ "max": 1.01}
225
+ ]
226
+
227
+ bucket_postshotxg = [
228
+ {"label": "[0,0.1[",
229
+ "min": 0,
230
+ "max": 0.1},
231
+ {"label": "[0.1,0.2[",
232
+ "min": 0.1,
233
+ "max": 0.2},
234
+ {"label": "[0.2,0.3[",
235
+ "min": 0.2,
236
+ "max": 0.3},
237
+ {"label": "[0.3,0.4[",
238
+ "min": 0.3,
239
+ "max": 0.4},
240
+ {"label": "[0.4,0.5]",
241
+ "min": 0.4,
242
+ "max": 0.5},
243
+ {"label": "[0.5,0.6[",
244
+ "min": 0.5,
245
+ "max": 0.6},
246
+ {"label": "[0.6,0.7[",
247
+ "min": 0.6,
248
+ "max": 0.7},
249
+ {"label": "[0.7,0.8[",
250
+ "min": 0.7,
251
+ "max": 0.8},
252
+ {"label": "[0.8,0.9[",
253
+ "min": 0.8,
254
+ "max": 0.9},
255
+ {"label": "[0.9,1]",
256
+ "min": 0.9,
257
+ "max": 1.01}
258
+ ]
259
+
260
+ bucket_packingxg = [
261
+ {"label": "[0,0.02[",
262
+ "min": 0,
263
+ "max": 0.03},
264
+ {"label": "[0.02,0.05[",
265
+ "min": 0.03,
266
+ "max": 0.05},
267
+ {"label": "[0.05,0.1[",
268
+ "min": 0.05,
269
+ "max": 0.1},
270
+ {"label": "[0.1,0.15[",
271
+ "min": 0.1,
272
+ "max": 0.15},
273
+ {"label": "[0.15,1]",
274
+ "min": 0.15,
275
+ "max": 1.1}
276
+ ]
277
+
278
+ # define delta pxt bucket
279
+ bucket_pxt = [
280
+ {"label": "[0%,1%[",
281
+ "min": 0,
282
+ "max": 0.01},
283
+ {"label": "[1%,2%[",
284
+ "min": 0.01,
285
+ "max": 0.02},
286
+ {"label": "[2%,5%[",
287
+ "min": 0.02,
288
+ "max": 0.05},
289
+ {"label": "[5%,10%[",
290
+ "min": 0.05,
291
+ "max": 0.1},
292
+ {"label": "[10%,100%]",
293
+ "min": 0.1,
294
+ "max": 1.01},
295
+ {"label": "[-1%,0%[",
296
+ "min": -0.01,
297
+ "max": 0},
298
+ {"label": "[-2%,-1%[",
299
+ "min": -0.02,
300
+ "max": -0.01},
301
+ {"label": "[-5%,-2%[",
302
+ "min": -0.05,
303
+ "max": -0.02},
304
+ {"label": "[-10%,-5%[",
305
+ "min": -0.1,
306
+ "max": -0.05},
307
+ {"label": "[-100%,-10%[",
308
+ "min": -1.,
309
+ "max": -0.1}
310
+ ]
311
+
312
+ # iterate over kpis and add buckets to dict
313
+ for kpi in kpis:
314
+ if kpi == "SHOT_XG":
315
+ kpi_buckets[kpi] = bucket_shotxg
316
+ elif kpi == "POSTSHOT_XG":
317
+ kpi_buckets[kpi] = bucket_postshotxg
318
+ elif kpi == "PACKING_XG":
319
+ kpi_buckets[kpi] = bucket_packingxg
320
+ elif kpi == "PXT_DELTA":
321
+ kpi_buckets[kpi] = bucket_pxt
322
+ else:
323
+ kpi_buckets[kpi] = buckets_packing
324
+
325
+ # define pressure buckets
326
+ pressure_buckets = [
327
+ {"label": "[0,30[",
328
+ "min": -1,
329
+ "max": 30},
330
+ {"label": "[30,70[",
331
+ "min": 30,
332
+ "max": 70},
333
+ {"label": "[70,100]",
334
+ "min": 70,
335
+ "max": 101}
336
+ ]
337
+
338
+ # define opponent buckets
339
+ opponent_buckets = [
340
+ {"label": "[0,5[",
341
+ "min": -1,
342
+ "max": 5},
343
+ {"label": "[5,9[",
344
+ "min": 5,
345
+ "max": 9},
346
+ {"label": "[9,11]",
347
+ "min": 9,
348
+ "max": 12}
349
+ ]
350
+
351
+ # define pass length buckets
352
+ pass_buckets = [
353
+ {"label": "[0,5[",
354
+ "min": 0,
355
+ "max": 5},
356
+ {"label": "[5,15[",
357
+ "min": 5,
358
+ "max": 15},
359
+ {"label": "[15,25[",
360
+ "min": 15,
361
+ "max": 25},
362
+ {"label": "[25,∞]",
363
+ "min": 25,
364
+ "max": 200}
365
+ ]
366
+
367
+ # define color schemes
368
+ home_colors = {
369
+ "r": "62929",
370
+ "g": "9225",
371
+ "b": "105"
372
+ }
373
+
374
+ away_colors = {
375
+ "r": "13171",
376
+ "g": "20724",
377
+ "b": "40300"
378
+ }
379
+
380
+ neutral_colors = {
381
+ "r": "13001",
382
+ "g": "13001",
383
+ "b": "13001"
384
+ }
385
+
386
+ # combine pxT kpis into single score for players (incl. PXT_REC) and team (excl. PXT_REC)
387
+ events["PXT_PLAYER_DELTA"] = events[
388
+ ["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", "PXT_BALL_WIN", "PXT_PASS", "PXT_REC", "PXT_SHOT", "PXT_SETPIECE"]
389
+ ].sum(axis=1)
390
+
391
+ events["PXT_TEAM_DELTA"] = events[
392
+ ["PXT_BLOCK", "PXT_DRIBBLE", "PXT_FOUL", "PXT_BALL_WIN", "PXT_PASS", "PXT_SHOT", "PXT_SETPIECE"]
393
+ ].sum(axis=1)
394
+
395
+ # add grouping for packing zones
396
+ base_zone_groups = {
397
+ 'AM': ['AMC', 'AML', 'AMR'],
398
+ 'CB': ['CBC', 'CBL', 'CBR'],
399
+ 'CM': ['CMC', 'CML', 'CMR'],
400
+ 'DM': ['DMC', 'DML', 'DMR'],
401
+ 'FBL': ['FBL'],
402
+ 'FBR': ['FBR'],
403
+ 'GK': ['GKC', 'GKR', 'GKL'],
404
+ 'IBC': ['IBC', 'IBR', 'IBL'],
405
+ 'IBWL': ['IBWL'],
406
+ 'IBWR': ['IBWR'],
407
+ 'WL': ['WL'],
408
+ 'WR': ['WR'],
409
+ }
410
+
411
+ # build mapping dictionary
412
+ zone_groups = {}
413
+ for group, zones in base_zone_groups.items():
414
+ for zone in zones:
415
+ zone_groups[zone] = group
416
+ zone_groups[f'OPP_{zone}'] = f'OPP_{group}'
417
+
418
+ # add new columns
419
+ events["startPackingZoneGroup"] = events["startPackingZone"].map(zone_groups).fillna(events["startPackingZone"])
420
+ events["endPackingZoneGroup"] = events["endPackingZone"].map(zone_groups).fillna(events["endPackingZone"])
421
+
422
+ # determine video timestamps
423
+
424
+ # vectorize period offset lookup
425
+ period_ids = events["periodId"]
426
+ offsets_series = period_ids.map(lambda period_id: offsets[f"p{period_id}"])
427
+
428
+ # Compute start and end time
429
+ events["start"] = (events["gameTimeInSec"]
430
+ - (period_ids - 1) * 10000
431
+ + offsets_series
432
+ - lead).clip(lower=0)
433
+
434
+ events["end"] = (events["gameTimeInSec"]
435
+ - (period_ids - 1) * 10000
436
+ + offsets_series
437
+ + events["duration"]
438
+ + lag)
439
+
440
+ # fix end time for final whistles
441
+ # (The duration of first half final whistles is always extremely high, as it is computed using the
442
+ # gameTimeInSec of the FINAL_WHISTLE event (e.g. 2730) and the gameTimeInSec of the next KICKOFF event
443
+ # (e.g. 10000).)
444
+ events["end"] = np.where(
445
+ events["action"] != "FINAL_WHISTLE",
446
+ events["end"],
447
+ events["start"] + lead + lag
448
+ )
449
+
450
+ # Group sequential plays by same player
451
+
452
+ # as the event data often has multiple consecutive events of the same player (e.g. reception + dribble + pass),
453
+ # those would be 3 separate video sequences. Because of lead and lag times, those consecutive events would overlap
454
+ # significantly. TTherefore, these events are combined into one clip.
455
+
456
+ # copy data
457
+ players = events.copy()
458
+
459
+ # filter for rows where playerId is given
460
+ players = players[players.playerId.notnull()]
461
+ if sequencing:
462
+ # create lag column for player
463
+ players["playerId_lag"] = players.playerId.shift(1, fill_value=0)
464
+
465
+ # detect changes in playerId compared to previous event using lag column
466
+ players["player_change_flag"] = np.where(
467
+ players["playerId"] == players["playerId_lag"], 0, 1
468
+ )
469
+
470
+ # apply cumulative sum function to phase_change_flag to create ID column
471
+ players["sequence_id"] = players.player_change_flag.cumsum()
472
+
473
+ # create separate df to aggregate sequence timing
474
+ sequence_timing = players.copy().groupby("sequence_id").agg(
475
+ {"start": "min",
476
+ "end": "max"}
477
+ ).reset_index()
478
+
479
+ # calculate game state
480
+
481
+ # detect goals scored
482
+ players["goal_home"] = np.where(
483
+ ((players["action"] == "GOAL") & (players["squadId"] == players["homeSquadId"])) |
484
+ ((players["action"] == "OWN_GOAL") & (players["squadId"] == players["awaySquadId"])),
485
+ 1,
486
+ 0
487
+ )
488
+
489
+ players["goal_away"] = np.where(
490
+ ((players["action"] == "GOAL") & (players["squadId"] == players["awaySquadId"])) |
491
+ ((players["action"] == "OWN_GOAL") & (players["squadId"] == players["homeSquadId"])),
492
+ 1,
493
+ 0
494
+ )
495
+
496
+ # create lag column for goals because the game state should change after the goal is scored not on the
497
+ # goal event itself
498
+ players["goal_home_lag"] = players.goal_home.shift(1, fill_value=0)
499
+ players["goal_away_lag"] = players.goal_away.shift(1, fill_value=0)
500
+
501
+ # apply cumulative sum function to goal_home_lag and goal_away_lag
502
+ players["goal_home_sum"] = players.goal_home_lag.cumsum()
503
+ players["goal_away_sum"] = players.goal_away_lag.cumsum()
504
+
505
+ # calculate teamGoals and opponentGoals
506
+ players["teamGoals"] = np.where(
507
+ players["squadId"] == players["homeSquadId"],
508
+ players["goal_home_sum"],
509
+ np.where(
510
+ players["squadId"] == players["awaySquadId"],
511
+ players["goal_away_sum"],
512
+ np.nan
513
+ )
514
+ )
515
+
516
+ players["opponentGoals"] = np.where(
517
+ players["squadId"] == players["awaySquadId"],
518
+ players["goal_home_sum"],
519
+ np.where(
520
+ players["squadId"] == players["homeSquadId"],
521
+ players["goal_away_sum"],
522
+ np.nan
523
+ )
524
+ )
525
+
526
+ # calculate game state
527
+ players["gameState"] = np.where(
528
+ players["teamGoals"] == players["opponentGoals"], "tied",
529
+ np.where(
530
+ players["teamGoals"] > players["opponentGoals"], "leading",
531
+ np.where(
532
+ players["teamGoals"] < players["opponentGoals"], "trailing",
533
+ np.NaN
534
+ )
535
+ )
536
+ )
537
+
538
+ # group possession phases
539
+
540
+ # create groups on team level for consecutive events that have the same attacking squad in order to determine
541
+ # whether an attacking possession phase leads to a shot or a goal
542
+
543
+ # create lag column for attackingSquad
544
+ players["attackingSquadId_lag"] = players.attackingSquadId.shift(1)
545
+
546
+ # detect changes in attackingSquadName compared to previous event using lag column
547
+ players["possession_change_flag"] = np.where(
548
+ players["attackingSquadId"] == players["attackingSquadId_lag"], 0, 1
549
+ )
550
+
551
+ # apply cumulative sum function to possession_change_flag to create ID column
552
+ players["possession_id"] = players.possession_change_flag.cumsum()
553
+
554
+ # create columns to detect shots and goal
555
+ players["is_shot"] = np.where(players["actionType"] == "SHOT", 1, 0)
556
+ players["is_goal"] = np.where(
557
+ (players["actionType"] == "SHOT") & (players["result"] == "SUCCESS"), 1, 0
558
+ )
559
+
560
+ # create separate df to aggregate possession results
561
+ possession_results = players.copy().groupby("possession_id").agg(
562
+ {"is_shot": "sum",
563
+ "is_goal": "sum"}
564
+ )
565
+
566
+ # convert sum of goals/shots to boolean type
567
+ possession_results["leadsToShot"] = possession_results["is_shot"] > 0
568
+ possession_results["leadsToGoal"] = possession_results["is_goal"] > 0
569
+
570
+ # add possession result to players df
571
+ players = pd.merge(
572
+ players,
573
+ possession_results,
574
+ how="left",
575
+ left_on=["possession_id"],
576
+ right_on=["possession_id"]
577
+ )
578
+
579
+ # group phases on team level
580
+
581
+ # create groups on team level for consecutive events that have the same phase and squadId in order to
582
+ # create team video clips
583
+
584
+ # create copy of data to evaluate phases
585
+ phases = players.copy()
586
+ phases = phases[phases.phase.notnull()]
587
+
588
+ # create lag columns for phase and squadId
589
+ phases["phase_lag"] = phases.phase.shift(1)
590
+ phases["squadId_lag"] = phases.squadId.shift(1)
591
+
592
+ # detect changes in either phase or squadId compared to previous event using lag columns
593
+ phases["phase_change_flag"] = np.where(
594
+ (phases["phase"] == phases["phase_lag"]) & (phases["squadId"] == phases["squadId_lag"]),
595
+ 0,
596
+ 1
597
+ )
598
+
599
+ # apply cumulative sum function to phase_change_flag to create ID column
600
+ phases["phase_id"] = phases.phase_change_flag.cumsum()
601
+
602
+ # create copies of pxTTeam
603
+ phases["pxTTeamStart"] = phases.pxTTeam
604
+ phases["pxTTeamEnd"] = phases.pxTTeam
605
+
606
+ # create columns to detect shots and goal
607
+ phases["is_shot"] = np.where(phases["actionType"] == "SHOT", 1, 0)
608
+ phases["is_goal"] = np.where(
609
+ (phases["actionType"] == "SHOT") & (phases["result"] == "SUCCESS"), 1, 0
610
+ )
611
+
612
+ # groupy by and aggregate
613
+ phases = phases.groupby(["phase_id", "phase", "squadId", "squadName"]).agg(
614
+ {"matchId": "min",
615
+ "periodId": "min",
616
+ "gameState": "first",
617
+ "BYPASSED_OPPONENTS": "sum",
618
+ "BYPASSED_DEFENDERS": "sum",
619
+ "BYPASSED_OPPONENTS_RECEIVING": "sum",
620
+ "BYPASSED_DEFENDERS_RECEIVING": "sum",
621
+ "BALL_LOSS_ADDED_OPPONENTS": "sum",
622
+ "BALL_LOSS_REMOVED_TEAMMATES": "sum",
623
+ "BALL_WIN_ADDED_TEAMMATES": "sum",
624
+ "BALL_WIN_REMOVED_OPPONENTS": "sum",
625
+ "REVERSE_PLAY_ADDED_OPPONENTS": "sum",
626
+ "REVERSE_PLAY_ADDED_OPPONENTS_DEFENDERS": "sum",
627
+ "BYPASSED_OPPONENTS_RAW": "sum",
628
+ "BYPASSED_OPPONENTS_DEFENDERS_RAW": "sum",
629
+ "SHOT_XG": "sum",
630
+ "POSTSHOT_XG": "sum",
631
+ "PACKING_XG": "sum",
632
+ "PXT_TEAM_DELTA": "sum",
633
+ "pxTTeamStart": "first",
634
+ "pxTTeamEnd": "last",
635
+ "start": "min",
636
+ "end": "max",
637
+ "is_shot": "sum",
638
+ "is_goal": "sum",
639
+ "playerName": lambda x: set(list(x))}
640
+ )
641
+
642
+ # convert sum of goals/shots to boolean type
643
+ phases["leadsToShot"] = phases["is_shot"] > 0
644
+ phases["leadsToGoal"] = phases["is_goal"] > 0
645
+
646
+ # reset index
647
+ phases.reset_index(inplace=True)
648
+
649
+ # merge phase and squadName into one column to later pass into code tag
650
+ phases["teamPhase"] = phases["squadName"] + " - " + phases["phase"].str.replace("_", " ")
651
+
652
+ # get period starts
653
+
654
+ # filter for kick off events of each period
655
+ kickoffs = events.copy()[
656
+ (events.actionType == "KICK_OFF") & ((events.gameTimeInSec - (events.periodId - 1) * 10000) < 10)
657
+ ].reset_index()
658
+
659
+ # check for penalty shootout
660
+ penalty_shootout = events.copy()[
661
+ events.periodId == 5
662
+ ]
663
+
664
+ # add row for start of penalty shootout
665
+ if len(penalty_shootout) > 0:
666
+ kickoffs = pd.concat([kickoffs, penalty_shootout.iloc[[0]]])
667
+
668
+ # rename PXT_DELTA
669
+ players = players.rename(columns={"PXT_PLAYER_DELTA": "PXT_DELTA"})
670
+ phases = phases.rename(columns={"PXT_TEAM_DELTA": "PXT_DELTA"})
671
+
672
+ # apply bucket logic
673
+ if buckets:
674
+ # define function to apply bucket logic for events
675
+ def get_bucket(bucket, value, zero_value, error_value):
676
+ # check if value is 0.0
677
+ if value == 0:
678
+ # this is required because 0 values for kpis should be handled differently from attributes
679
+ return zero_value
680
+ # iterate over bucket entries
681
+ for entry in bucket:
682
+ if entry["min"] <= value < entry["max"]:
683
+ # return bucket label
684
+ return entry["label"]
685
+ # if no bucket was assigned, actively assign error_value
686
+ return error_value
687
+
688
+ # apply on player level
689
+ # iterate over kpis
690
+ for kpi in kpis:
691
+ # get bucket for column
692
+ bucket = kpi_buckets[kpi]
693
+ # apply function
694
+ players[kpi] = players[kpi].apply(lambda x: get_bucket(bucket, x, None, None))
695
+
696
+ # apply pressure bucket
697
+ players.pressure = players.pressure.apply(lambda x: get_bucket(pressure_buckets, x, "[0%,10%[", None))
698
+
699
+ # apply opponents bucket
700
+ players.opponents = players.opponents.apply(lambda x: get_bucket(opponent_buckets, x, "[0,5[", None))
701
+
702
+ # apply pass length bucket
703
+ players.passDistance = players.passDistance.apply(lambda x: get_bucket(pass_buckets, x, "<15", None))
704
+
705
+ # apply pxT Team bucket
706
+ players.pxTTeam = players.pxTTeam.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None))
707
+
708
+ # apply on team level
709
+ # apply pxt bucket to PXT_DELTA
710
+ phases.PXT_DELTA = phases.PXT_DELTA.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None))
711
+
712
+ # apply pxT Team bucket
713
+ phases.pxTTeamStart = phases.pxTTeamStart.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None))
714
+ phases.pxTTeamEnd = phases.pxTTeamEnd.apply(lambda x: get_bucket(bucket_pxt, x, "[0%,1%[", None))
715
+
716
+ # iterate over kpis and apply buckets
717
+ for kpi in kpis:
718
+ if kpi == "PXT_DELTA":
719
+ continue
720
+ # get bucket for column
721
+ bucket = kpi_buckets[kpi]
722
+ # apply function
723
+ phases[kpi] = phases[kpi].apply(lambda x: get_bucket(bucket, x, None, None))
724
+
725
+ # convert to xml
726
+
727
+ # build a tree structure
728
+ root = ET.Element("file")
729
+ sort_info = ET.SubElement(root, "SORT_INFO")
730
+ sort_info.text = ""
731
+ sort_type = ET.SubElement(sort_info, "sort_type")
732
+ sort_type.text = "color"
733
+ instances = ET.SubElement(root, "ALL_INSTANCES")
734
+ rows = ET.SubElement(root, "ROWS")
735
+
736
+ # add kickoff events to start each period
737
+
738
+ # add to xml structure
739
+ for index, event in kickoffs.iterrows():
740
+ # add instance
741
+ instance = ET.SubElement(instances, "instance")
742
+ # add event id
743
+ event_id = ET.SubElement(instance, "ID")
744
+ event_id.text = str(event.periodId - 1)
745
+ # add start time
746
+ start = ET.SubElement(instance, "start")
747
+ start.text = str(round(event.start, 2))
748
+ # add end time
749
+ end = ET.SubElement(instance, "end")
750
+ end.text = str(round(event.end, 2))
751
+ # add "Start" as code
752
+ code = ET.SubElement(instance, "code")
753
+ if event.periodId == 1:
754
+ code.text = f"Kickoff"
755
+ elif event.periodId == 2:
756
+ code.text = f"2nd Half Kickoff"
757
+ elif event.periodId == 3:
758
+ code.text = f"ET Kickoff"
759
+ elif event.periodId == 4:
760
+ code.text = f"ET 2nd Half Kickoff"
761
+ elif event.periodId == 5:
762
+ code.text = f"Penalty Shootout"
763
+ # add period label
764
+ wrapper = ET.SubElement(instance, "label")
765
+ group = ET.SubElement(wrapper, "group")
766
+ group.text = "02 | periodId"
767
+ text = ET.SubElement(wrapper, "text")
768
+ text.text = str(event.periodId)
769
+
770
+ # add player data to XML structure
771
+
772
+ # get max id from kickoffs to ensure continuous numbering
773
+ max_id = max(kickoffs.periodId.tolist()) - 1
774
+
775
+ # concatenate actionType and result into one column if result exists
776
+ players["actionTypeResult"] = np.where(
777
+ players["result"].notna(),
778
+ players["actionType"] + "_" + players["result"],
779
+ np.NaN
780
+ )
781
+
782
+ # add data to xml structure
783
+ # the idea is to still iterate over each event separately but chose between
784
+ # creating a new instance and appending to the existing instance
785
+ if sequencing:
786
+ seq_id_current = None
787
+
788
+ # If the selected code attribute is "squadName", generate XML entries from the `phases` DataFrame
789
+ if codeTag == "squadName":
790
+ for index, phase in phases.iterrows():
791
+ # Create a new XML instance for each team phase
792
+ instance = ET.SubElement(instances, "instance")
793
+
794
+ # Set unique ID using phase_id offset by max_id
795
+ event_id = ET.SubElement(instance, "ID")
796
+ event_id.text = str(phase.phase_id + max_id)
797
+
798
+ # Define the time range of the instance
799
+ start = ET.SubElement(instance, "start")
800
+ start.text = str(round(phase.start, 2))
801
+ end = ET.SubElement(instance, "end")
802
+ end.text = str(round(phase.end, 2))
803
+
804
+ # Set the code to the team phase
805
+ code = ET.SubElement(instance, "code")
806
+ code.text = phase.teamPhase
807
+
808
+ # Add labels to the instance
809
+ for label in labels_and_kpis:
810
+ if label["name"] not in phase:
811
+ continue
812
+ value = str(phase[label["name"]])
813
+ if value not in ["None", "nan"]:
814
+ wrapper = ET.SubElement(instance, "label")
815
+ group = ET.SubElement(wrapper, "group")
816
+ group.text = label["order"] + label["name"] if labelSorting else label["name"]
817
+ text = ET.SubElement(wrapper, "text")
818
+ text.text = value
819
+
820
+ # If not team-level, use player-level data from `players` DataFrame
821
+ else:
822
+ for index, event in players.iterrows():
823
+ # Skip entries without valid player name
824
+ if pd.notnull(event.playerName):
825
+ # Set first sequence_id
826
+ if index == 0:
827
+ seq_id_current = 0
828
+
829
+ seq_id_new = event.sequence_id
830
+
831
+ # Start new clip if new sequence or first event
832
+ if seq_id_new != seq_id_current or index == 0:
833
+ instance = ET.SubElement(instances, "instance")
834
+
835
+ event_id = ET.SubElement(instance, "ID")
836
+ event_id.text = str(event.sequence_id + max_id)
837
+
838
+ start = ET.SubElement(instance, "start")
839
+ start.text = str(round(sequence_timing.at[seq_id_new - 1, "start"], 2))
840
+ end = ET.SubElement(instance, "end")
841
+ end.text = str(round(sequence_timing.at[seq_id_new - 1, "end"], 2))
842
+
843
+ # Use selected attribute (e.g., playerName, action) as the main code
844
+ code = ET.SubElement(instance, "code")
845
+ code.text = str(event[codeTag])
846
+
847
+ # Free-text description showing action sequence
848
+ free_text = ET.SubElement(instance, "free_text")
849
+ free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}"
850
+ else:
851
+ # Append to existing free-text if still same sequence
852
+ free_text.text += f" | {event.action.lower().replace('_', ' ')}"
853
+
854
+ # Add labels to the instance
855
+ for label in labels_and_kpis:
856
+ if label["name"] not in event:
857
+ continue
858
+ value = str(event[label["name"]])
859
+ if value not in ["None", "nan"]:
860
+ try:
861
+ prev_value = str(players.at[index - 1, label["name"]])
862
+ except KeyError:
863
+ prev_value = value
864
+ # Only add label if it changed or is the first event of the sequence
865
+ if seq_id_new != seq_id_current or value != prev_value:
866
+ wrapper = ET.SubElement(instance, "label")
867
+ group = ET.SubElement(wrapper, "group")
868
+ group.text = label["order"] + label["name"] if labelSorting else label["name"]
869
+ text = ET.SubElement(wrapper, "text")
870
+ text.text = value
871
+
872
+ # Update current sequence ID
873
+ seq_id_current = seq_id_new
874
+ else:
875
+ # Same logic as above, but without sequencing (i.e., one clip per row)
876
+ if codeTag == "squadName":
877
+ for index, phase in phases.iterrows():
878
+ instance = ET.SubElement(instances, "instance")
879
+ event_id = ET.SubElement(instance, "ID")
880
+ event_id.text = str(phase.phase_id + max_id)
881
+ start = ET.SubElement(instance, "start")
882
+ start.text = str(round(phase.start, 2))
883
+ end = ET.SubElement(instance, "end")
884
+ end.text = str(round(phase.end, 2))
885
+ code = ET.SubElement(instance, "code")
886
+ code.text = phase.teamPhase
887
+
888
+ for label in labels_and_kpis:
889
+ if label["name"] not in phase:
890
+ continue
891
+ value = str(phase[label["name"]])
892
+ if value not in ["None", "nan"]:
893
+ wrapper = ET.SubElement(instance, "label")
894
+ group = ET.SubElement(wrapper, "group")
895
+ group.text = label["order"] + label["name"] if labelSorting else label["name"]
896
+ text = ET.SubElement(wrapper, "text")
897
+ text.text = value
898
+ else:
899
+ for index, event in players.iterrows():
900
+ if pd.notnull(event.playerName):
901
+ instance = ET.SubElement(instances, "instance")
902
+ event_id = ET.SubElement(instance, "ID")
903
+ event_id.text = str(event.eventNumber + max_id)
904
+ start = ET.SubElement(instance, "start")
905
+ start.text = str(round(event.start, 2))
906
+ end = ET.SubElement(instance, "end")
907
+ end.text = str(round(event.end, 2))
908
+ code = ET.SubElement(instance, "code")
909
+ code.text = str(event[codeTag])
910
+ free_text = ET.SubElement(instance, "free_text")
911
+ free_text.text = f"({event.gameTime}) {event.playerName}: {event.action.lower().replace('_', ' ')}"
912
+
913
+ for label in labels_and_kpis:
914
+ if label["name"] not in event:
915
+ continue
916
+ value = str(event[label["name"]])
917
+ if value not in ["None", "nan"]:
918
+ wrapper = ET.SubElement(instance, "label")
919
+ group = ET.SubElement(wrapper, "group")
920
+ group.text = label["order"] + label["name"] if labelSorting else label["name"]
921
+ text = ET.SubElement(wrapper, "text")
922
+ text.text = value
923
+
924
+ # create row order
925
+
926
+ # get home and away team
927
+ home_team = players.homeSquadName.unique().tolist()[0]
928
+ away_team = players.awaySquadName.unique().tolist()[0]
929
+
930
+ # get home and away team players
931
+ home_players = sorted(
932
+ players[players.squadName == home_team].playerName.unique(), reverse=True)
933
+ away_players = sorted(
934
+ players[players.squadName == away_team].playerName.unique(), reverse=True)
935
+ # get home and away team phases
936
+ home_phases = sorted(
937
+ phases[phases.squadName == home_team].teamPhase.unique(), reverse=True)
938
+ away_phases = sorted(
939
+ phases[phases.squadName == away_team].teamPhase.unique(), reverse=True)
940
+
941
+ # define function to add row entries
942
+ def row(value, colors):
943
+ # add row
944
+ row = ET.SubElement(rows, "row")
945
+ # add code
946
+ code = ET.SubElement(row, "code")
947
+ code.text = value
948
+ # add colors
949
+ r = ET.SubElement(row, "R")
950
+ r.text = colors["r"]
951
+ g = ET.SubElement(row, "G")
952
+ g.text = colors["g"]
953
+ b = ET.SubElement(row, "B")
954
+ b.text = colors["b"]
955
+
956
+ # apply function
957
+ # add entries for kickoffs for each period
958
+ row("Start", neutral_colors)
959
+
960
+ if codeTag == "playerName":
961
+ # add entries for away team players
962
+ for player in away_players:
963
+ # call function
964
+ row(player, away_colors)
965
+
966
+ # add entries for home team players
967
+ for player in home_players:
968
+ # call function
969
+ row(player, home_colors)
970
+
971
+ elif codeTag == "squadName":
972
+ # add entries for away team phases
973
+ for phase in away_phases:
974
+ # call function
975
+ row(phase, away_colors)
976
+
977
+ # add entries for home team phases
978
+ for phase in home_phases:
979
+ # call function
980
+ row(phase, home_colors)
981
+
982
+ # wrap into ElementTree and save as XML
983
+ tree = ET.ElementTree(root)
984
+
985
+ # only apply indent if Python version >= 3.9
986
+ if sys.version_info >= (3, 9):
987
+ ET.indent(tree, space=" ")
988
+
989
+ # return xml tree
990
+ return tree