cs2df 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cs2df/validate.py ADDED
@@ -0,0 +1,551 @@
1
+ """Validate a cs2-demo-format v3 ZIP against the JSON Schemas + package-level QA.
2
+
3
+ Pure stdlib + jsonschema — works without the demoparser2/pandas stack, so
4
+ `cs2df validate` (and the thin tools/validate.py wrapper) run anywhere.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import math
11
+ import re
12
+ import sys
13
+ import zipfile
14
+ from pathlib import Path
15
+
16
+ REQUIRED_KEYS = {
17
+ "match", "players", "rounds", "playerStats", "playerEconomies",
18
+ "kills", "damages", "blinds", "bombs", "grenades", "clutches",
19
+ }
20
+ OPTIONAL_KEYS = {"shots", "replay", "duels"}
21
+ KNOWN_SCHEMA_VERSIONS = {"cs2-demo-format/3.0"}
22
+ EPS = 0.02
23
+
24
+ # stream track columns that must all share length == frameCount
25
+ _REPLAY_COLS = ("x", "y", "z", "yaw", "pitch", "hp", "armor", "money",
26
+ "equipValue", "weapon", "place", "flash", "flags")
27
+ _DUEL_COLS = ("x", "y", "z", "yaw", "pitch", "hp", "flash")
28
+ _SHOT_COLS = ("tick", "weapon", "x", "y", "z", "vx", "vy", "vz", "yaw", "pitch")
29
+
30
+
31
+ def decode_delta(values: list) -> list:
32
+ out = []
33
+ acc = 0
34
+ for v in values:
35
+ acc += v
36
+ out.append(acc)
37
+ return out
38
+
39
+
40
+ def find_invalid_json_values(text: str) -> list[tuple[int, str]]:
41
+ return [(m.start(), m.group()) for m in re.finditer(r"\b(NaN|-?Infinity)\b", text)]
42
+
43
+
44
+ def load_schemas(spec_dir: Path) -> dict:
45
+ schemas = {}
46
+ for f in sorted(spec_dir.glob("*.schema.json")):
47
+ key = f.stem.replace(".schema", "")
48
+ schemas[key] = json.loads(f.read_text())
49
+ return schemas
50
+
51
+
52
+ def validate_zip(zip_path: Path, spec_dir: Path, strict: bool = False) -> bool:
53
+ try:
54
+ import jsonschema # noqa: F401
55
+ except ImportError:
56
+ print("ERROR: jsonschema not installed. Run: pip install jsonschema", file=sys.stderr)
57
+ sys.exit(1)
58
+
59
+ print(f"Validating: {zip_path.name}\n")
60
+ schemas = load_schemas(spec_dir)
61
+ errors: list[str] = []
62
+ warnings: list[str] = []
63
+ data_by_key: dict[str, object] = {}
64
+
65
+ def err(msg: str):
66
+ errors.append(msg)
67
+ print(f" ✗ {msg}")
68
+
69
+ def warn(msg: str):
70
+ warnings.append(msg)
71
+ print(f" ⚠ {msg}")
72
+
73
+ def ok(msg: str):
74
+ print(f" ✓ {msg}")
75
+
76
+ def note(msg: str):
77
+ print(f" • {msg}")
78
+
79
+ with zipfile.ZipFile(zip_path) as zf:
80
+ zip_names = set(zf.namelist())
81
+ if "manifest.json" not in zip_names:
82
+ err("manifest.json: missing from ZIP")
83
+ return _finish(errors, warnings, strict)
84
+
85
+ manifest = _read_json(zf, "manifest.json", errors)
86
+ if manifest is None:
87
+ return _finish(errors, warnings, strict)
88
+ data_by_key["manifest"] = manifest
89
+
90
+ _validate_schema("manifest", manifest, schemas, errors)
91
+ schema_version = manifest.get("schemaVersion", "")
92
+ if schema_version not in KNOWN_SCHEMA_VERSIONS:
93
+ err(f"manifest.json: unsupported schemaVersion '{schema_version}'")
94
+
95
+ files_map = manifest.get("files", {}) if isinstance(manifest, dict) else {}
96
+ for key in sorted(REQUIRED_KEYS):
97
+ if key not in files_map:
98
+ err(f"manifest.files: missing required key '{key}'")
99
+ for key in files_map:
100
+ if key not in REQUIRED_KEYS | OPTIONAL_KEYS:
101
+ err(f"manifest.files: unknown key '{key}'")
102
+
103
+ for key, filename in files_map.items():
104
+ if filename not in zip_names:
105
+ if key in REQUIRED_KEYS:
106
+ err(f"{filename}: declared for required key '{key}' but missing from ZIP")
107
+ else:
108
+ warn(f"{filename}: declared optional key '{key}' but file is missing")
109
+ continue
110
+ value = _read_json(zf, filename, errors)
111
+ if value is None:
112
+ continue
113
+ data_by_key[key] = value
114
+ _validate_schema(key, value, schemas, errors)
115
+ count = len(value) if isinstance(value, list) else 1
116
+ ok(f"{filename} ({count} {'rows' if count != 1 else 'row'})")
117
+
118
+ _package_qa(data_by_key, errors, warnings, note)
119
+ print()
120
+ return _finish(errors, warnings, strict)
121
+
122
+
123
+ def _read_json(zf: zipfile.ZipFile, filename: str, errors: list[str]):
124
+ raw = zf.read(filename).decode("utf-8")
125
+ invalid = find_invalid_json_values(raw)
126
+ if invalid:
127
+ sample = ", ".join(f"{v}@{off}" for off, v in invalid[:5])
128
+ errors.append(f"{filename}: invalid JSON value(s): {sample}")
129
+ print(f" ✗ {filename}: invalid JSON value(s): {sample}")
130
+ return None
131
+ try:
132
+ return json.loads(raw)
133
+ except json.JSONDecodeError as e:
134
+ errors.append(f"{filename}: JSON parse error — {e}")
135
+ print(f" ✗ {filename}: JSON parse error — {e}")
136
+ return None
137
+
138
+
139
+ def _validate_schema(key: str, value, schemas: dict, errors: list[str]):
140
+ schema = schemas.get(key)
141
+ if schema is None:
142
+ errors.append(f"{key}: no JSON Schema found")
143
+ print(f" ✗ {key}: no JSON Schema found")
144
+ return
145
+ import jsonschema
146
+
147
+ validator = jsonschema.Draft7Validator(schema)
148
+ schema_errors = sorted(validator.iter_errors(value), key=lambda e: list(e.absolute_path))
149
+ for e in schema_errors[:20]:
150
+ path = " → ".join(str(p) for p in e.absolute_path) or "(root)"
151
+ errors.append(f"{key}: [{path}] {e.message}")
152
+ print(f" ✗ {key}: [{path}] {e.message}")
153
+ if len(schema_errors) > 20:
154
+ errors.append(f"{key}: {len(schema_errors) - 20} additional schema error(s)")
155
+ print(f" ✗ {key}: {len(schema_errors) - 20} additional schema error(s)")
156
+
157
+
158
+ # ── package-level QA ─────────────────────────────────────────────────────────
159
+
160
+ def _package_qa(data: dict, errors: list[str], warnings: list[str], note):
161
+ def err(msg: str):
162
+ errors.append(msg)
163
+ print(f" ✗ {msg}")
164
+
165
+ players = _as_list(data.get("players"))
166
+ rounds = _as_list(data.get("rounds"))
167
+ stats = _as_list(data.get("playerStats"))
168
+ economies = _as_list(data.get("playerEconomies"))
169
+ kills = _as_list(data.get("kills"))
170
+ damages = _as_list(data.get("damages"))
171
+ blinds = _as_list(data.get("blinds"))
172
+ bombs = _as_list(data.get("bombs"))
173
+ grenades = _as_list(data.get("grenades"))
174
+ clutches = _as_list(data.get("clutches"))
175
+ shots = data.get("shots") if isinstance(data.get("shots"), dict) else None
176
+ replay = data.get("replay") if isinstance(data.get("replay"), dict) else None
177
+ duels = data.get("duels") if isinstance(data.get("duels"), dict) else None
178
+ match = data.get("match") if isinstance(data.get("match"), dict) else {}
179
+
180
+ n_players = len(players)
181
+ team_by_index = {i: p.get("teamKey") for i, p in enumerate(players) if isinstance(p, dict)}
182
+ round_numbers = [r.get("roundNumber") for r in rounds if isinstance(r, dict)]
183
+ round_set = set(round_numbers)
184
+ rounds_by_number = {r.get("roundNumber"): r for r in rounds if isinstance(r, dict)}
185
+
186
+ note(f"rows: players={n_players}, rounds={len(rounds)}, kills={len(kills)}, damages={len(damages)}")
187
+
188
+ if round_numbers:
189
+ expected = list(range(1, max(round_numbers) + 1))
190
+ if sorted(round_numbers) != expected:
191
+ err("rounds.json: roundNumber must be continuous from 1 with no gaps/duplicates")
192
+
193
+ bad_tick_rounds = []
194
+ for r in rounds:
195
+ if not isinstance(r, dict):
196
+ continue
197
+ if r.get("teamASide") == r.get("teamBSide"):
198
+ err(f"rounds.json round {r.get('roundNumber')}: teamASide and teamBSide must differ")
199
+ if not (r.get("startTick", 0) < r.get("freezeEndTick", 0) <= r.get("endTick", 0)):
200
+ bad_tick_rounds.append(r.get("roundNumber"))
201
+ if bad_tick_rounds:
202
+ sample = ", ".join(str(v) for v in bad_tick_rounds[:8])
203
+ err(f"rounds.json: {len(bad_tick_rounds)} row(s) violate tick order start < freezeEnd <= end; sample rounds: {sample}")
204
+
205
+ _check_match_score(match, rounds, err)
206
+ _check_round_winner_sides(rounds, err)
207
+
208
+ for name, rows in [("kills", kills), ("damages", damages), ("blinds", blinds),
209
+ ("bombs", bombs), ("grenades", grenades), ("clutches", clutches),
210
+ ("player-economies", economies)]:
211
+ _check_event_rounds(name, rows, round_set, err)
212
+
213
+ _check_tick_windows("kills", kills, rounds_by_number, err, ["tick"])
214
+ _check_tick_windows("damages", damages, rounds_by_number, err, ["tick"])
215
+ _check_tick_windows("blinds", blinds, rounds_by_number, err, ["tick"])
216
+ _check_tick_windows("bombs", bombs, rounds_by_number, err, ["tick"])
217
+ _check_tick_windows("grenades", grenades, rounds_by_number, err,
218
+ ["throwTick", "effectTick", "destroyTick"])
219
+ _check_tick_windows("clutches", clutches, rounds_by_number, err, ["tick"])
220
+
221
+ # playerIndex references must be in range
222
+ for file_name, rows, fields in [
223
+ ("kills", kills, ["killerIndex", "victimIndex", "assisterIndex", "flashAssisterIndex"]),
224
+ ("damages", damages, ["attackerIndex", "victimIndex"]),
225
+ ("blinds", blinds, ["flasherIndex", "flashedIndex"]),
226
+ ("bombs", bombs, ["actorIndex"]),
227
+ ("grenades", grenades, ["throwerIndex"]),
228
+ ("clutches", clutches, ["clutcherIndex"]),
229
+ ("player-economies", economies, ["playerIndex"]),
230
+ ("player-stats", stats, ["playerIndex"]),
231
+ ]:
232
+ _check_player_refs(file_name, rows, fields, n_players, err)
233
+
234
+ expected_economies = len(rounds) * n_players
235
+ economy_keys = {(r.get("roundNumber"), r.get("playerIndex")) for r in economies if isinstance(r, dict)}
236
+ if len(economy_keys) != expected_economies:
237
+ err(f"player-economies.json: expected {expected_economies} round/player rows, got {len(economy_keys)} unique rows")
238
+
239
+ # damages: cap equation
240
+ for d in damages:
241
+ if not isinstance(d, dict):
242
+ continue
243
+ raw = d.get("healthDamageRaw")
244
+ effective = d.get("healthDamage")
245
+ before = d.get("victimHealthBefore")
246
+ if all(isinstance(v, (int, float)) for v in [raw, effective, before]):
247
+ if effective != min(raw, before):
248
+ err(f"damages.json round {d.get('roundNumber')} tick {d.get('tick')}: healthDamage must equal min(healthDamageRaw, victimHealthBefore)")
249
+
250
+ # aggregate damage cross-check (anti-enemy, capped)
251
+ damage_by_player: dict[int, int] = {}
252
+ utility_by_player: dict[int, int] = {}
253
+ utility_weapons = {"hegrenade", "inferno", "molotov", "incendiary"}
254
+ for d in damages:
255
+ if not isinstance(d, dict):
256
+ continue
257
+ atk = d.get("attackerIndex")
258
+ vic = d.get("victimIndex")
259
+ if atk is None or atk == vic:
260
+ continue
261
+ if team_by_index.get(atk) == team_by_index.get(vic):
262
+ continue
263
+ health = d.get("healthDamage")
264
+ if not isinstance(health, int):
265
+ continue
266
+ damage_by_player[atk] = damage_by_player.get(atk, 0) + health
267
+ if d.get("weapon") in utility_weapons:
268
+ utility_by_player[atk] = utility_by_player.get(atk, 0) + health
269
+
270
+ for s in stats:
271
+ if not isinstance(s, dict):
272
+ continue
273
+ idx = s.get("playerIndex")
274
+ rounds_count = s.get("rounds")
275
+ label = f"player-stats.json playerIndex={idx}"
276
+ if rounds_count != len(rounds):
277
+ err(f"{label}: rounds must equal rounds.length ({len(rounds)})")
278
+ _expect_equal(s, "damageHealth", damage_by_player.get(idx, 0), err, label)
279
+ _expect_equal(s, "utilityDamage", utility_by_player.get(idx, 0), err, label)
280
+ if isinstance(rounds_count, int) and rounds_count > 0:
281
+ _expect_close(s.get("adr"), s.get("damageHealth", 0) / rounds_count, err, f"{label}: adr")
282
+ _expect_close(s.get("averageUtilityDamagePerRound"), s.get("utilityDamage", 0) / rounds_count, err, f"{label}: averageUtilityDamagePerRound")
283
+ _expect_close(s.get("kast"), s.get("kastRounds", 0) / rounds_count * 100, err, f"{label}: kast")
284
+ for field in ["firstKillCount", "firstDeathCount", "kastRounds", "oneKillCount",
285
+ "twoKillCount", "threeKillCount", "fourKillCount", "fiveKillCount"]:
286
+ if isinstance(s.get(field), int) and isinstance(rounds_count, int) and s[field] > rounds_count:
287
+ err(f"{label}: {field} cannot exceed rounds")
288
+ for n in ["One", "Two", "Three", "Four", "Five"]:
289
+ count = s.get(f"vs{n}Count")
290
+ won = s.get(f"vs{n}WonCount")
291
+ lost = s.get(f"vs{n}LostCount")
292
+ if all(isinstance(v, int) for v in [count, won, lost]) and won + lost != count:
293
+ err(f"{label}: vs{n}WonCount + vs{n}LostCount must equal vs{n}Count")
294
+
295
+ for k in kills:
296
+ if isinstance(k, dict) and k.get("flashAssist") and k.get("flashAssisterIndex") is None:
297
+ err(f"kills.json round {k.get('roundNumber')} tick {k.get('tick')}: flashAssist=true requires flashAssisterIndex")
298
+
299
+ for b in bombs:
300
+ if not isinstance(b, dict):
301
+ continue
302
+ if b.get("type") in {"planted", "defused"} and b.get("actorIndex") is None:
303
+ err(f"bombs.json round {b.get('roundNumber')} tick {b.get('tick')}: {b.get('type')} requires actorIndex")
304
+ _check_bomb_lifecycle(bombs, err)
305
+
306
+ for g in grenades:
307
+ if isinstance(g, dict) and isinstance(g.get("destroyTick"), int) and g["destroyTick"] < g.get("effectTick", 0):
308
+ err(f"grenades.json round {g.get('roundNumber')} tick {g.get('throwTick')}: destroyTick must be >= effectTick")
309
+
310
+ # ── columnar stream QA ────────────────────────────────────────────────
311
+ if shots is not None:
312
+ _check_shots_stream(shots, n_players, round_set, rounds_by_number, err)
313
+ if replay is not None:
314
+ _check_replay_stream(replay, n_players, round_set, rounds_by_number, err)
315
+ if duels is not None:
316
+ _check_duels_stream(duels, n_players, round_set, rounds_by_number, err)
317
+
318
+
319
+ def _check_shots_stream(shots: dict, n_players: int, round_set: set,
320
+ rounds_by_number: dict, err):
321
+ wd = shots.get("weaponDict", [])
322
+ for ti, t in enumerate(shots.get("tracks", [])):
323
+ if not isinstance(t, dict):
324
+ continue
325
+ label = f"shots.json tracks[{ti}]"
326
+ if t.get("roundNumber") not in round_set:
327
+ err(f"{label}: roundNumber {t.get('roundNumber')} not in rounds.json")
328
+ continue
329
+ if not _index_ok(t.get("playerIndex"), n_players):
330
+ err(f"{label}: playerIndex out of range")
331
+ lengths = {c: len(t.get(c, [])) for c in _SHOT_COLS}
332
+ if len(set(lengths.values())) > 1:
333
+ err(f"{label}: column lengths differ: {lengths}")
334
+ continue
335
+ for w in t.get("weapon", []):
336
+ if not (0 <= w < len(wd)):
337
+ err(f"{label}: weapon index {w} out of weaponDict range")
338
+ break
339
+ ticks = decode_delta(t.get("tick", []))
340
+ rd = rounds_by_number.get(t.get("roundNumber"))
341
+ if rd and ticks and not all(rd.get("freezeEndTick", 0) <= tk <= rd.get("endTick", 0) for tk in ticks):
342
+ err(f"{label}: decoded ticks fall outside the round window")
343
+
344
+
345
+ def _check_track_frames(label: str, track: dict, cols: tuple, frame_count: int,
346
+ n_players: int, err) -> None:
347
+ if not _index_ok(track.get("playerIndex"), n_players):
348
+ err(f"{label}: playerIndex out of range")
349
+ bad = {c: len(track.get(c, [])) for c in cols if len(track.get(c, [])) != frame_count}
350
+ if bad:
351
+ err(f"{label}: column lengths != frameCount {frame_count}: {bad}")
352
+
353
+
354
+ def _check_replay_stream(replay: dict, n_players: int, round_set: set,
355
+ rounds_by_number: dict, err):
356
+ wd = replay.get("weaponDict", [])
357
+ pld = replay.get("placeDict", [])
358
+ for rd_obj in replay.get("rounds", []):
359
+ if not isinstance(rd_obj, dict):
360
+ continue
361
+ rn = rd_obj.get("roundNumber")
362
+ label = f"replay.json round {rn}"
363
+ if rn not in round_set:
364
+ err(f"{label}: roundNumber not in rounds.json")
365
+ continue
366
+ fc = rd_obj.get("frameCount", 0)
367
+ rd = rounds_by_number.get(rn, {})
368
+ start = rd_obj.get("startTick", 0)
369
+ step = rd_obj.get("tickStep", 1)
370
+ if fc and rd:
371
+ last_tick = start + (fc - 1) * step
372
+ if start < rd.get("freezeEndTick", 0) or last_tick > rd.get("endTick", 0):
373
+ err(f"{label}: frame grid [{start}, {last_tick}] outside round window")
374
+ for pi, track in enumerate(rd_obj.get("players", [])):
375
+ tlabel = f"{label} players[{pi}]"
376
+ _check_track_frames(tlabel, track, _REPLAY_COLS, fc, n_players, err)
377
+ for w in track.get("weapon", []):
378
+ if w != -1 and not (0 <= w < len(wd)):
379
+ err(f"{tlabel}: weapon index {w} out of weaponDict range")
380
+ break
381
+ for p in track.get("place", []):
382
+ if p != -1 and not (0 <= p < len(pld)):
383
+ err(f"{tlabel}: place index {p} out of placeDict range")
384
+ break
385
+ for qi, proj in enumerate(rd_obj.get("projectiles", [])):
386
+ n = len(proj.get("x", []))
387
+ if len(proj.get("y", [])) != n or len(proj.get("z", [])) != n:
388
+ err(f"{label} projectiles[{qi}]: x/y/z lengths differ")
389
+ if proj.get("throwerIndex") is not None and not _index_ok(proj.get("throwerIndex"), n_players):
390
+ err(f"{label} projectiles[{qi}]: throwerIndex out of range")
391
+
392
+
393
+ def _check_duels_stream(duels: dict, n_players: int, round_set: set,
394
+ rounds_by_number: dict, err):
395
+ for wi, w in enumerate(duels.get("windows", [])):
396
+ if not isinstance(w, dict):
397
+ continue
398
+ rn = w.get("roundNumber")
399
+ label = f"duels.json windows[{wi}] (round {rn})"
400
+ if rn not in round_set:
401
+ err(f"{label}: roundNumber not in rounds.json")
402
+ continue
403
+ fc = w.get("frameCount", 0)
404
+ rd = rounds_by_number.get(rn, {})
405
+ start = w.get("startTick", 0)
406
+ step = w.get("tickStep", 1)
407
+ if fc and rd:
408
+ last_tick = start + (fc - 1) * step
409
+ if start < rd.get("freezeEndTick", 0) or last_tick > rd.get("endTick", 0):
410
+ err(f"{label}: frame grid [{start}, {last_tick}] outside round window")
411
+ anchors = w.get("anchors", [])
412
+ if not anchors:
413
+ err(f"{label}: empty anchors")
414
+ for a in anchors:
415
+ if not isinstance(a, dict):
416
+ continue
417
+ if not (start <= a.get("tick", 0) <= start + max(0, fc - 1) * step):
418
+ err(f"{label}: anchor tick {a.get('tick')} outside the window")
419
+ if not _index_ok(a.get("victimIndex"), n_players):
420
+ err(f"{label}: anchor victimIndex out of range")
421
+ ai = a.get("attackerIndex")
422
+ if ai is not None and not _index_ok(ai, n_players):
423
+ err(f"{label}: anchor attackerIndex out of range")
424
+ for pi, track in enumerate(w.get("players", [])):
425
+ _check_track_frames(f"{label} players[{pi}]", track, _DUEL_COLS, fc, n_players, err)
426
+
427
+
428
+ # ── shared QA helpers ─────────────────────────────────────────────────────────
429
+
430
+ def _as_list(value) -> list:
431
+ return value if isinstance(value, list) else []
432
+
433
+
434
+ def _index_ok(idx, n_players: int) -> bool:
435
+ return isinstance(idx, int) and 0 <= idx < n_players
436
+
437
+
438
+ def _check_player_refs(name: str, rows: list, fields: list[str], n_players: int, err):
439
+ for row in rows:
440
+ if not isinstance(row, dict):
441
+ continue
442
+ for field in fields:
443
+ value = row.get(field)
444
+ if value is not None and not _index_ok(value, n_players):
445
+ err(f"{name}.json: {field} {value!r} is not a valid players.json index")
446
+
447
+
448
+ def _check_event_rounds(name: str, rows: list, round_set: set, err):
449
+ missing: dict[object, int] = {}
450
+ for row in rows:
451
+ if isinstance(row, dict) and row.get("roundNumber") not in round_set:
452
+ missing[row.get("roundNumber")] = missing.get(row.get("roundNumber"), 0) + 1
453
+ if missing:
454
+ total = sum(missing.values())
455
+ sample = ", ".join(f"{k} ({v})" for k, v in list(missing.items())[:8])
456
+ err(f"{name}.json: {total} row(s) reference roundNumber not present in rounds.json; sample: {sample}")
457
+
458
+
459
+ def _check_match_score(match: dict, rounds: list, err):
460
+ if not isinstance(match, dict):
461
+ return
462
+ team_a = match.get("teamA") if isinstance(match.get("teamA"), dict) else {}
463
+ team_b = match.get("teamB") if isinstance(match.get("teamB"), dict) else {}
464
+ expected_a = sum(1 for r in rounds if isinstance(r, dict) and r.get("winnerTeamKey") == "teamA")
465
+ expected_b = sum(1 for r in rounds if isinstance(r, dict) and r.get("winnerTeamKey") == "teamB")
466
+ if team_a.get("score") != expected_a or team_b.get("score") != expected_b:
467
+ err(f"match.json: score must equal round winners ({expected_a}:{expected_b})")
468
+
469
+
470
+ def _check_round_winner_sides(rounds: list, err):
471
+ for r in rounds:
472
+ if not isinstance(r, dict):
473
+ continue
474
+ winner = r.get("winnerTeamKey")
475
+ expected = r.get("teamASide") if winner == "teamA" else r.get("teamBSide") if winner == "teamB" else None
476
+ if expected and r.get("winnerSide") != expected:
477
+ err(f"rounds.json round {r.get('roundNumber')}: winnerSide must match winnerTeamKey side")
478
+
479
+
480
+ def _check_tick_windows(name: str, rows: list, rounds_by_number: dict, err,
481
+ fields: list[str]):
482
+ bad: list[str] = []
483
+ for index, row in enumerate(rows):
484
+ if not isinstance(row, dict):
485
+ continue
486
+ round_row = rounds_by_number.get(row.get("roundNumber"))
487
+ if not isinstance(round_row, dict):
488
+ continue
489
+ for field in fields:
490
+ tick = row.get(field)
491
+ if not isinstance(tick, int):
492
+ continue
493
+ start = round_row.get("freezeEndTick")
494
+ end = round_row.get("endTick")
495
+ if not isinstance(start, int) or not isinstance(end, int):
496
+ continue
497
+ if tick < start or tick > end:
498
+ bad.append(f"row {index} round {row.get('roundNumber')} {field}={tick}")
499
+ break
500
+ if bad:
501
+ sample = "; ".join(bad[:8])
502
+ err(f"{name}.json: {len(bad)} row(s) have ticks outside their round window; sample: {sample}")
503
+
504
+
505
+ def _check_bomb_lifecycle(bombs: list, err):
506
+ by_round: dict[object, list[dict]] = {}
507
+ for b in bombs:
508
+ if isinstance(b, dict):
509
+ by_round.setdefault(b.get("roundNumber"), []).append(b)
510
+ bad: list[str] = []
511
+ for round_number, rows in by_round.items():
512
+ sorted_rows = sorted(rows, key=lambda b: b.get("tick") if isinstance(b.get("tick"), int) else -1)
513
+ planted_tick = None
514
+ for row in sorted_rows:
515
+ event_type = row.get("type")
516
+ tick = row.get("tick")
517
+ if event_type == "planted" and isinstance(tick, int):
518
+ planted_tick = tick
519
+ if event_type in {"exploded", "defused"} and (planted_tick is None or not isinstance(tick, int) or tick < planted_tick):
520
+ bad.append(f"round {round_number} {event_type}@{tick}")
521
+ break
522
+ if bad:
523
+ sample = "; ".join(bad[:8])
524
+ err(f"bombs.json: {len(bad)} round(s) have terminal bomb events before planted; sample: {sample}")
525
+
526
+
527
+ def _expect_equal(row: dict, field: str, expected: int, err, label: str):
528
+ if row.get(field) != expected:
529
+ err(f"{label}: {field} expected {expected}, got {row.get(field)}")
530
+
531
+
532
+ def _expect_close(actual, expected: float, err, label: str):
533
+ if not isinstance(actual, (int, float)) or not math.isfinite(actual) or abs(actual - expected) > EPS:
534
+ err(f"{label} expected {expected:.3f}, got {actual}")
535
+
536
+
537
+ def _finish(errors: list, warnings: list, strict: bool) -> bool:
538
+ effective_errors = len(errors) + (len(warnings) if strict else 0)
539
+ if effective_errors:
540
+ parts = []
541
+ if errors:
542
+ parts.append(f"{len(errors)} error(s)")
543
+ if warnings:
544
+ parts.append(f"{len(warnings)} warning(s)" + (" [strict]" if strict else ""))
545
+ print(f"❌ FAIL — {', '.join(parts)}")
546
+ return False
547
+ if warnings:
548
+ print(f"⚠️ PASS with {len(warnings)} warning(s) (run --strict to treat as errors)")
549
+ return True
550
+ print("✅ PASS")
551
+ return True
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.4
2
+ Name: cs2df
3
+ Version: 3.0.0
4
+ Summary: Reference exporter & validator CLI for cs2-demo-format v3 (CS2 demo → ZIP data package)
5
+ Project-URL: Homepage, https://github.com/Starfie1d1272/cs2-demo-format
6
+ Project-URL: Repository, https://github.com/Starfie1d1272/cs2-demo-format
7
+ Project-URL: Issues, https://github.com/Starfie1d1272/cs2-demo-format/issues
8
+ License: MIT
9
+ Requires-Python: >=3.11
10
+ Requires-Dist: demoparser2>=0.41.2
11
+ Requires-Dist: jsonschema>=4.21
12
+ Requires-Dist: numpy>=1.26
13
+ Requires-Dist: orjson>=3.9
14
+ Requires-Dist: pandas>=2.0
15
+ Description-Content-Type: text/markdown
16
+
17
+ # cs2df
18
+
19
+ `cs2df` is the reference Python exporter and validator for the
20
+ [`cs2-demo-format`](https://github.com/Starfie1d1272/cs2-demo-format) v3 ZIP
21
+ contract.
22
+
23
+ It parses CS2 `.dem` files with
24
+ [`demoparser2`](https://github.com/LaihoE/demoparser), writes strict v3 ZIP
25
+ packages, and validates exported packages with schema plus package-level QA.
26
+
27
+ ## Setup
28
+
29
+ ```bash
30
+ uv sync
31
+ ```
32
+
33
+ The CLI entrypoint is `cs2df`.
34
+
35
+ ## Export
36
+
37
+ ```bash
38
+ # Standard profile: required files + shots.json + replay.json
39
+ uv run cs2df export match.dem
40
+
41
+ # Research profile: also emit full-tick duels.json windows
42
+ uv run cs2df export match.dem --research
43
+
44
+ # Choose an output path
45
+ uv run cs2df export match.dem -o match.zip
46
+ ```
47
+
48
+ The default ZIP compression level is `3`, chosen from local benchmark results as
49
+ a speed/size balance. Use a higher level when smaller ZIPs matter more:
50
+
51
+ ```bash
52
+ uv run cs2df export match.dem --compress-level 6
53
+ uv run cs2df export match.dem --compress-level 9
54
+ ```
55
+
56
+ ## Batch Export
57
+
58
+ ```bash
59
+ uv run cs2df export-batch ./demos --workers 8 --descriptive
60
+ ```
61
+
62
+ `export-batch` scans one directory non-recursively for `.dem` files and writes
63
+ one ZIP per demo. It also writes `report.json` next to the outputs with:
64
+
65
+ - per-demo success/failure status
66
+ - output ZIP size
67
+ - source demo size
68
+ - compression level
69
+ - total duration
70
+ - aggregate throughput
71
+ - parse/package/write stage timings
72
+
73
+ Bad demos are reported as failed rows; a single parser failure does not crash the
74
+ whole batch. Use `--fail-fast` when you want the batch to stop after the first
75
+ failed demo.
76
+
77
+ ## Validate
78
+
79
+ ```bash
80
+ uv run cs2df validate match.zip
81
+ uv run cs2df validate match.zip --strict
82
+ ```
83
+
84
+ Validation checks JSON Schema and package-level invariants such as cross-file
85
+ player indexes, round windows, column lengths, weapon dictionary indexes, and
86
+ formal-round consistency.
87
+
88
+ ## Role in the Repository
89
+
90
+ This is a reference implementation, not the contract itself. The authoritative
91
+ contract lives in [`../schemas/index.ts`](../schemas/index.ts) and
92
+ the generated JSON Schemas in the repository's `spec/` directory. Any producer
93
+ that emits a ZIP passing strict validation is conformant.
94
+
95
+ The exporter also serves as the performance baseline for the v3 format:
96
+ per-frame data stays in pandas DataFrames until the columnar stream builders
97
+ materialize compact integer arrays for JSON serialization.
98
+
99
+ Event-extraction logic was originally ported from `cs2-demo-analysis-kit`
100
+ (and before that `DrEAmSs59/CS2-insight-agent`, with the author's permission).
@@ -0,0 +1,13 @@
1
+ cs2df/__init__.py,sha256=OK7xNHY1V2uzmeDW3LzMu7BNNO6ERmMAjbV6mDfEOSc,338
2
+ cs2df/cli.py,sha256=uihLenM2yJNA0lrG5tC90b6QrnMLoa35lRtSSr8RFFU,14962
3
+ cs2df/enums.py,sha256=kRcTTUx3ICtbnt4INLE6lvr00GvSUTeboT_f1h8-qwM,9883
4
+ cs2df/events.py,sha256=Tpc2xbNAF4phkMj2a2kxPDrswZl2utScPIM0dsMkT5c,34907
5
+ cs2df/package.py,sha256=1NmKKSt0cRWyfJ_2Xcd6pYmpQQINgJogivaGkK0vpyc,7662
6
+ cs2df/parse.py,sha256=V_-PICwUGXApqOePbKMWDYAZ_ypb1hKgAVWSdHRYDmE,20804
7
+ cs2df/rounds.py,sha256=8u-8d_y6rIL8NWlMJmHmxBmCBH-8M1KabVa0bSycrAM,12217
8
+ cs2df/streams.py,sha256=7Gly0CioGprFXrLgz4OJ3_peD53gimHpIk2uSxBqt7w,17544
9
+ cs2df/validate.py,sha256=1xdUnAALdBbBGdt0KZHHAY4ej25ggpy_6nZqYILY6sQ,24742
10
+ cs2df-3.0.0.dist-info/METADATA,sha256=Z-tcHDr3MMlyznLhCpMaKRJzzi9ivSa6cS6ZpWZGwFU,3097
11
+ cs2df-3.0.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
12
+ cs2df-3.0.0.dist-info/entry_points.txt,sha256=9hU7Hemtce09cpoFwt0MM1qiqXqa6SKNopNH9MAqD4o,41
13
+ cs2df-3.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cs2df = cs2df.cli:main