analyzerl-parser 0.1.2__cp39-abi3-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2 @@
1
+ from .parse import parse_replay
2
+ from .stats import calculate_stats
Binary file
@@ -0,0 +1,82 @@
1
+ from pathlib import Path
2
+ from ._boxcars import parse_frames as _parse_frames
3
+
4
+ _DATA_DIR = Path.cwd()
5
+
6
+
7
+ def set_data_dir(path):
8
+ global _DATA_DIR
9
+ _DATA_DIR = Path(path).expanduser().resolve()
10
+ return _DATA_DIR
11
+
12
+
13
+ def get_data_dir():
14
+ return _DATA_DIR
15
+
16
+
17
+ def _path(value):
18
+ value = Path(value).expanduser()
19
+ return value if value.is_absolute() else (_DATA_DIR / value).resolve()
20
+
21
+
22
+ def _replay_paths(replay_path):
23
+ replay_path = _path(replay_path)
24
+
25
+ if replay_path.is_file():
26
+ if replay_path.suffix.lower() != ".replay":
27
+ raise ValueError(f"Not a .replay file: {replay_path}")
28
+ return [replay_path]
29
+
30
+ if replay_path.is_dir():
31
+ paths = sorted(replay_path.rglob("*.replay"))
32
+ if not paths:
33
+ raise FileNotFoundError(f"No .replay files found in: {replay_path}")
34
+ return paths
35
+
36
+ raise FileNotFoundError(f"Replay path does not exist: {replay_path}")
37
+
38
+
39
+ def parse_replay(replay_path="data/replays", workers=4, return_type="list", export=None):
40
+ replay_paths = _replay_paths(replay_path)
41
+
42
+ output = []
43
+
44
+ for replay_file in replay_paths:
45
+ parsed_replays = _parse_frames(
46
+ replay_path=str(replay_file),
47
+ workers=max(int(workers or 1), 1),
48
+ )
49
+
50
+ for parsed in parsed_replays:
51
+ game_id = parsed["game_id"]
52
+ rows = parsed["rows"]
53
+
54
+ if return_type == "list":
55
+ item = rows
56
+ elif return_type == "pandas":
57
+ import pandas as pd
58
+ item = pd.DataFrame(rows)
59
+ elif return_type == "polars":
60
+ import polars as pl
61
+ item = pl.DataFrame(rows)
62
+ else:
63
+ raise ValueError("return_type must be one of: 'list', 'pandas', 'polars'")
64
+
65
+ if export is not None:
66
+ export_path = _path(export)
67
+ export_path.mkdir(parents=True, exist_ok=True)
68
+
69
+ out_file = export_path / f"{game_id}_frames.parquet"
70
+
71
+ if return_type == "polars":
72
+ item.write_parquet(out_file)
73
+ else:
74
+ import pandas as pd
75
+ if return_type == "list":
76
+ pd.DataFrame(item).to_parquet(out_file, index=False)
77
+ else:
78
+ item.to_parquet(out_file, index=False)
79
+
80
+ output.append(item)
81
+
82
+ return output
@@ -0,0 +1,494 @@
1
+ import polars as pl
2
+
3
+ from pathlib import Path
4
+
5
+ PLAYER_SLOTS = [
6
+ "blue_player_1",
7
+ "blue_player_2",
8
+ "blue_player_3",
9
+ "orange_player_1",
10
+ "orange_player_2",
11
+ "orange_player_3",
12
+ ]
13
+
14
+ def requested_columns(has_xg):
15
+ columns = [
16
+ "game_id",
17
+ "replay_id",
18
+ "event_type",
19
+ "event_team",
20
+ "event_player_1_id",
21
+ "event_player_1_name",
22
+ "event_player_1_team",
23
+ "event_player_2_id",
24
+ "event_player_2_name",
25
+ "event_player_2_team",
26
+ "seconds_elapsed",
27
+ "official_assist",
28
+ "boost_pickup_amount",
29
+ "boost_pickup_type",
30
+ "off_demo",
31
+ "off_kickoff",
32
+ "off_challenge_win",
33
+ "off_bump",
34
+ "off_air_dribble",
35
+ "off_ground_dribble",
36
+ "off_flick",
37
+ "pass_in_play",
38
+ "aerialing",
39
+ "air_dribble",
40
+ "ground_dribble",
41
+ "flick_shot",
42
+ "rebound",
43
+ "off_flip_reset",
44
+ "off_wall",
45
+ "off_ceiling",
46
+ "blue_team_name",
47
+ "orange_team_name",
48
+ ]
49
+
50
+ if has_xg:
51
+ columns.append("xG")
52
+
53
+ for slot in PLAYER_SLOTS:
54
+ for field in [
55
+ "id",
56
+ "network_id",
57
+ "name",
58
+ "platform",
59
+ "rank",
60
+ "rank_tier",
61
+ "pro_player",
62
+ "mmr",
63
+ "car_id",
64
+ "car_name",
65
+ "is_bot",
66
+ "time_in_game",
67
+ ]:
68
+ columns.append(f"{slot}_{field}")
69
+
70
+ return list(dict.fromkeys(columns))
71
+
72
+
73
+ def _to_lazy_frames(frames):
74
+ if isinstance(frames, pl.LazyFrame):
75
+ return frames
76
+
77
+ if isinstance(frames, pl.DataFrame):
78
+ return frames.lazy()
79
+
80
+ try:
81
+ import pandas as pd
82
+
83
+ if isinstance(frames, pd.DataFrame):
84
+ return pl.from_pandas(frames).lazy()
85
+ except ImportError:
86
+ pass
87
+
88
+ if not isinstance(frames, list) or not frames:
89
+ raise TypeError(
90
+ "frames must be a Polars DataFrame, pandas DataFrame, "
91
+ "list[dict], list[list[dict]], or list of DataFrames"
92
+ )
93
+
94
+ first = frames[0]
95
+
96
+ if isinstance(first, dict):
97
+ return pl.DataFrame(frames).lazy()
98
+
99
+ if isinstance(first, list):
100
+ return pl.concat(
101
+ [pl.DataFrame(replay_rows) for replay_rows in frames],
102
+ how="vertical_relaxed",
103
+ ).lazy()
104
+
105
+ if isinstance(first, pl.DataFrame):
106
+ return pl.concat(frames, how="vertical_relaxed").lazy()
107
+
108
+ try:
109
+ import pandas as pd
110
+
111
+ if isinstance(first, pd.DataFrame):
112
+ return pl.concat(
113
+ [pl.from_pandas(frame) for frame in frames],
114
+ how="vertical_relaxed",
115
+ ).lazy()
116
+ except ImportError:
117
+ pass
118
+
119
+ raise TypeError(
120
+ "frames must be a Polars DataFrame, pandas DataFrame, "
121
+ "list[dict], list[list[dict]], or list of DataFrames"
122
+ )
123
+
124
+
125
+ def _ensure_columns(rows, columns):
126
+ existing = rows.collect_schema().names()
127
+
128
+ return rows.with_columns(
129
+ [
130
+ pl.lit(None).alias(column)
131
+ for column in columns
132
+ if column not in existing
133
+ ]
134
+ ).select(columns)
135
+
136
+
137
+ def string_col(column):
138
+ return pl.col(column).cast(pl.Utf8, strict=False).fill_null("")
139
+
140
+
141
+ def number_col(column):
142
+ return pl.col(column).cast(pl.Float64, strict=False).fill_null(0.0)
143
+
144
+
145
+ def flag_col(column):
146
+ return string_col(column).str.to_lowercase().is_in(["true", "1", "yes"])
147
+
148
+
149
+ def count_if(condition, name):
150
+ return condition.cast(pl.Int64).sum().alias(name)
151
+
152
+
153
+ def sum_if(condition, value, name):
154
+ return pl.when(condition).then(value).otherwise(0.0).sum().alias(name)
155
+
156
+
157
+ def player_slot_frame(events, slot):
158
+ team = "orange" if slot.startswith("orange") else "blue"
159
+ team_name_col = "orange_team_name" if team == "orange" else "blue_team_name"
160
+
161
+ return events.select(
162
+ [
163
+ pl.col("replay_id"),
164
+ string_col(f"{slot}_id").alias("player_id"),
165
+ string_col(f"{slot}_network_id").alias("network_id"),
166
+ string_col(f"{slot}_name").alias("player_name"),
167
+ pl.lit(team).alias("team"),
168
+ string_col(team_name_col).alias("team_name"),
169
+ string_col(f"{slot}_platform").alias("platform"),
170
+ string_col(f"{slot}_rank").alias("rank"),
171
+ string_col(f"{slot}_rank_tier").alias("rank_tier"),
172
+ flag_col(f"{slot}_pro_player").alias("pro_player"),
173
+ number_col(f"{slot}_mmr").alias("mmr"),
174
+ string_col(f"{slot}_car_id").alias("car_id"),
175
+ string_col(f"{slot}_car_name").alias("car_name"),
176
+ flag_col(f"{slot}_is_bot").alias("is_bot"),
177
+ number_col(f"{slot}_time_in_game").alias("time_in_game"),
178
+ number_col("seconds_elapsed").alias("seconds_elapsed"),
179
+ ]
180
+ ).filter(pl.col("player_id") != "")
181
+
182
+
183
+ def player_frame(rows):
184
+ players = pl.concat(
185
+ [player_slot_frame(rows, slot) for slot in PLAYER_SLOTS],
186
+ how="vertical_relaxed",
187
+ )
188
+
189
+ players = players.group_by(["replay_id", "player_id"]).agg(
190
+ [
191
+ pl.col("network_id").drop_nulls().first(),
192
+ pl.col("player_name").drop_nulls().first(),
193
+ pl.col("team").drop_nulls().first(),
194
+ pl.col("team_name").drop_nulls().first(),
195
+ pl.col("platform").drop_nulls().first(),
196
+ pl.col("rank").drop_nulls().first(),
197
+ pl.col("rank_tier").drop_nulls().first(),
198
+ pl.col("pro_player").max(),
199
+ pl.col("mmr").max(),
200
+ pl.col("car_id").drop_nulls().first(),
201
+ pl.col("car_name").drop_nulls().first(),
202
+ pl.col("is_bot").max(),
203
+ pl.col("time_in_game").max(),
204
+ pl.col("seconds_elapsed").max().alias("_max_seconds_elapsed"),
205
+ ]
206
+ )
207
+
208
+ return (
209
+ players.with_columns(
210
+ [
211
+ pl.when(pl.col("time_in_game") > 0)
212
+ .then(pl.col("time_in_game"))
213
+ .otherwise(pl.col("_max_seconds_elapsed"))
214
+ .alias("time_in_game")
215
+ ]
216
+ )
217
+ .with_columns([(pl.col("time_in_game") / 60.0).alias("time_on_field")])
218
+ .drop("_max_seconds_elapsed")
219
+ )
220
+
221
+
222
+ def primary_event_stats(events, has_xg):
223
+ shot = pl.col("event_type").is_in(["shot", "goal"])
224
+ goal = pl.col("event_type") == "goal"
225
+
226
+ expressions = [
227
+ pl.len().alias("event_count"),
228
+ count_if(shot, "shots"),
229
+ count_if(goal, "goals"),
230
+ count_if(pl.col("event_type") == "save", "saves"),
231
+ count_if(pl.col("event_type") == "touch", "touches"),
232
+ count_if(pl.col("event_type") == "pass", "passes"),
233
+ count_if(pl.col("event_type") == "turnover", "turnovers"),
234
+ count_if(pl.col("event_type") == "challenge", "challenge_wins"),
235
+ count_if(pl.col("event_type") == "kickoff", "kickoff_wins"),
236
+ count_if(pl.col("event_type") == "demo", "demos"),
237
+ count_if(pl.col("event_type") == "bump", "bumps"),
238
+ count_if(pl.col("event_type") == "entry", "entries"),
239
+ count_if(pl.col("event_type") == "exit", "exits"),
240
+ count_if(pl.col("event_type") == "retrieval", "retrievals"),
241
+ count_if(pl.col("event_type") == "air_dribble", "air_dribbles"),
242
+ count_if(pl.col("event_type") == "ground_dribble", "ground_dribbles"),
243
+ count_if(pl.col("event_type") == "flick", "flicks"),
244
+ count_if(pl.col("event_type") == "flip_reset", "flip_resets"),
245
+ count_if(pl.col("event_type") == "boost-pickup", "boost_pickups"),
246
+ count_if(
247
+ (pl.col("event_type") == "boost-pickup")
248
+ & (string_col("boost_pickup_type") == "small"),
249
+ "small_boost_pickups",
250
+ ),
251
+ count_if(
252
+ (pl.col("event_type") == "boost-pickup")
253
+ & (string_col("boost_pickup_type") == "big"),
254
+ "big_boost_pickups",
255
+ ),
256
+ count_if(
257
+ (pl.col("event_type") == "boost-pickup")
258
+ & (string_col("boost_pickup_type") == "reset"),
259
+ "boost_resets",
260
+ ),
261
+ sum_if(
262
+ pl.col("event_type") == "boost-pickup",
263
+ number_col("boost_pickup_amount"),
264
+ "boost_collected",
265
+ ),
266
+ count_if(shot & flag_col("off_demo"), "off_demo_shots"),
267
+ count_if(shot & flag_col("off_kickoff"), "off_kickoff_shots"),
268
+ count_if(shot & flag_col("off_challenge_win"), "off_challenge_win_shots"),
269
+ count_if(shot & flag_col("off_bump"), "off_bump_shots"),
270
+ count_if(shot & flag_col("off_air_dribble"), "off_air_dribble_shots"),
271
+ count_if(shot & flag_col("off_ground_dribble"), "off_ground_dribble_shots"),
272
+ count_if(shot & flag_col("off_flick"), "off_flick_shots"),
273
+ count_if(shot & flag_col("pass_in_play"), "pass_in_play_shots"),
274
+ count_if(shot & flag_col("aerialing"), "aerial_shots"),
275
+ count_if(shot & flag_col("air_dribble"), "air_dribble_shots"),
276
+ count_if(shot & flag_col("ground_dribble"), "ground_dribble_shots"),
277
+ count_if(shot & flag_col("flick_shot"), "flick_shots"),
278
+ count_if(shot & flag_col("rebound"), "rebound_shots"),
279
+ count_if(shot & flag_col("off_flip_reset"), "off_flip_reset_shots"),
280
+ count_if(shot & flag_col("off_wall"), "off_wall_shots"),
281
+ count_if(shot & flag_col("off_ceiling"), "off_ceiling_shots"),
282
+ ]
283
+
284
+ if has_xg:
285
+ xg = number_col("xG")
286
+ expressions.extend(
287
+ [
288
+ sum_if(shot, xg, "expected_goals"),
289
+ sum_if(goal, xg, "expected_goals_on_goals"),
290
+ sum_if(shot & ~goal, xg, "expected_goals_on_misses"),
291
+ ]
292
+ )
293
+
294
+ return (
295
+ events.filter(string_col("event_player_1_id") != "")
296
+ .group_by(
297
+ [
298
+ "replay_id",
299
+ string_col("event_player_1_id").alias("player_id"),
300
+ ]
301
+ )
302
+ .agg(expressions)
303
+ )
304
+
305
+
306
+ def secondary_event_stats(events):
307
+ assist = (pl.col("event_type") == "goal") & (
308
+ flag_col("official_assist") | (string_col("event_player_2_id") != "")
309
+ )
310
+
311
+ return (
312
+ events.filter(string_col("event_player_2_id") != "")
313
+ .group_by(
314
+ [
315
+ "replay_id",
316
+ string_col("event_player_2_id").alias("player_id"),
317
+ ]
318
+ )
319
+ .agg(
320
+ [
321
+ count_if(assist, "assists"),
322
+ count_if(pl.col("event_type") == "pass", "passes_received"),
323
+ count_if(pl.col("event_type") == "demo", "demos_taken"),
324
+ count_if(pl.col("event_type") == "bump", "bumps_taken"),
325
+ count_if(pl.col("event_type") == "challenge", "challenge_losses"),
326
+ count_if(pl.col("event_type") == "kickoff", "kickoff_losses"),
327
+ ]
328
+ )
329
+ )
330
+
331
+
332
+ def team_event_stats(events, has_xg):
333
+ shot = pl.col("event_type").is_in(["shot", "goal"])
334
+ goal = pl.col("event_type") == "goal"
335
+
336
+ expressions = [
337
+ count_if(shot, "shots_for"),
338
+ count_if(goal, "goals_for"),
339
+ count_if(pl.col("event_type") == "save", "saves_for"),
340
+ count_if(pl.col("event_type") == "demo", "demos_for"),
341
+ count_if(pl.col("event_type") == "challenge", "challenges_won_for"),
342
+ count_if(pl.col("event_type") == "entry", "entries_for"),
343
+ count_if(pl.col("event_type") == "exit", "exits_for"),
344
+ count_if(pl.col("event_type") == "retrieval", "retrievals_for"),
345
+ count_if(pl.col("event_type") == "air_dribble", "air_dribbles_for"),
346
+ count_if(pl.col("event_type") == "ground_dribble", "ground_dribbles_for"),
347
+ count_if(pl.col("event_type") == "flick", "flicks_for"),
348
+ count_if(pl.col("event_type") == "flip_reset", "flip_resets_for"),
349
+ count_if(pl.col("event_type") == "boost-pickup", "boost_pickups_for"),
350
+ sum_if(
351
+ pl.col("event_type") == "boost-pickup",
352
+ number_col("boost_pickup_amount"),
353
+ "boost_collected_for",
354
+ ),
355
+ ]
356
+
357
+ if has_xg:
358
+ expressions.append(sum_if(shot, number_col("xG"), "expected_goals_for"))
359
+
360
+ return (
361
+ events.filter(string_col("event_team").is_in(["blue", "orange"]))
362
+ .group_by(
363
+ [
364
+ "replay_id",
365
+ pl.col("event_team").alias("team"),
366
+ ]
367
+ )
368
+ .agg(expressions)
369
+ )
370
+
371
+
372
+ def calculate_player_replay_stats(frames):
373
+ rows = _to_lazy_frames(frames)
374
+ schema = rows.collect_schema().names()
375
+
376
+ has_xg = "xG" in schema
377
+ columns = requested_columns(has_xg)
378
+ rows = _ensure_columns(rows, columns)
379
+
380
+ rows = rows.with_columns(
381
+ [
382
+ pl.when(string_col("replay_id") != "")
383
+ .then(string_col("replay_id"))
384
+ .otherwise(string_col("game_id"))
385
+ .alias("replay_id"),
386
+ string_col("event_type").alias("event_type"),
387
+ string_col("event_team").alias("event_team"),
388
+ ]
389
+ )
390
+
391
+ events = rows.filter(string_col("event_type") != "")
392
+
393
+ players = player_frame(rows)
394
+ primary = primary_event_stats(events, has_xg)
395
+ secondary = secondary_event_stats(events)
396
+ team_for = team_event_stats(events, has_xg)
397
+
398
+ team_against = team_for.rename(
399
+ {
400
+ "team": "opponent_team",
401
+ **{
402
+ column: column.replace("_for", "_against")
403
+ for column in team_for.collect_schema().names()
404
+ if column.endswith("_for")
405
+ },
406
+ }
407
+ )
408
+
409
+ stats = players.with_columns(
410
+ [
411
+ pl.when(pl.col("team") == "blue")
412
+ .then(pl.lit("orange"))
413
+ .otherwise(pl.lit("blue"))
414
+ .alias("opponent_team")
415
+ ]
416
+ ).join(primary, on=["replay_id", "player_id"], how="left")
417
+
418
+ stats = stats.join(secondary, on=["replay_id", "player_id"], how="left")
419
+ stats = stats.join(team_for, on=["replay_id", "team"], how="left")
420
+ stats = stats.join(team_against, on=["replay_id", "opponent_team"], how="left").drop(
421
+ "opponent_team"
422
+ )
423
+
424
+ identity_columns = [
425
+ "replay_id",
426
+ "player_id",
427
+ "network_id",
428
+ "player_name",
429
+ "team",
430
+ "team_name",
431
+ "platform",
432
+ "rank",
433
+ "rank_tier",
434
+ "pro_player",
435
+ "mmr",
436
+ "car_id",
437
+ "car_name",
438
+ "is_bot",
439
+ "time_in_game",
440
+ "time_on_field",
441
+ ]
442
+
443
+ count_columns = [
444
+ column
445
+ for column in stats.collect_schema().names()
446
+ if column not in identity_columns
447
+ ]
448
+
449
+ stats = stats.with_columns([pl.col(column).fill_null(0) for column in count_columns])
450
+
451
+ if has_xg:
452
+ stats = stats.with_columns(
453
+ [
454
+ pl.when(pl.col("shots") > 0)
455
+ .then(pl.col("expected_goals") / pl.col("shots"))
456
+ .otherwise(0.0)
457
+ .alias("expected_goals_per_shot"),
458
+ (pl.col("goals") - pl.col("expected_goals")).alias(
459
+ "goals_minus_expected_goals"
460
+ ),
461
+ (pl.col("goals_for") - pl.col("expected_goals_for")).alias(
462
+ "goals_for_minus_expected_goals_for"
463
+ ),
464
+ (pl.col("goals_against") - pl.col("expected_goals_against")).alias(
465
+ "goals_against_minus_expected_goals_against"
466
+ ),
467
+ ]
468
+ )
469
+
470
+ return stats.sort(["replay_id", "team", "player_name", "player_id"]).collect()
471
+
472
+
473
+ def calculate_stats(frames, return_type="polars", export=None):
474
+ stats = calculate_player_replay_stats(frames)
475
+
476
+ if export is not None:
477
+ export = Path(export)
478
+ export.parent.mkdir(parents=True, exist_ok=True)
479
+
480
+ if export.suffix == ".parquet":
481
+ stats.write_parquet(export)
482
+ else:
483
+ stats.write_csv(export)
484
+
485
+ if return_type == "polars":
486
+ return stats
487
+
488
+ if return_type == "pandas":
489
+ return stats.to_pandas()
490
+
491
+ if return_type == "list":
492
+ return stats.to_dicts()
493
+
494
+ raise ValueError("return_type must be one of: 'polars', 'pandas', 'list'")
@@ -0,0 +1,3 @@
1
+ Metadata-Version: 2.4
2
+ Name: analyzerl_parser
3
+ Version: 0.1.2
@@ -0,0 +1,8 @@
1
+ analyzerl_parser/__init__.py,sha256=B0LYJ62s2F8lAfTE1zqtWzBVgqQBmRoIPfW1EzHLBzI,69
2
+ analyzerl_parser/_boxcars.pyd,sha256=kClVGucZRCh0i1CTISBwZL1xfdXzOOVf5B6eWaYPJbg,1551360
3
+ analyzerl_parser/parse.py,sha256=EbdpbCWBRKoLwYqh4gdsM9CS11NrVqsqVmK8LOD2jj8,2519
4
+ analyzerl_parser/stats.py,sha256=5jmMLZeYE-PYZngNWkzSLANBApevB2vsCBuwdHVyGS8,16512
5
+ analyzerl_parser-0.1.2.dist-info/METADATA,sha256=t7l6oxV5mzTgO_K33aLJW3GIHJTzJblp420TAPQEw1s,60
6
+ analyzerl_parser-0.1.2.dist-info/WHEEL,sha256=ZiBmjEMKX1iTyJmqXZl_jTcrFdOtPkEbr7SVi1lbRvI,95
7
+ analyzerl_parser-0.1.2.dist-info/sboms/analyzerl_boxcars.cyclonedx.json,sha256=4FlZMlzEPVRYofRbkDlpHpV0pOxB98mAsFAehoH_4J8,179469
8
+ analyzerl_parser-0.1.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.14.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp39-abi3-win_amd64