bobframes 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. bobframes/__init__.py +3 -0
  2. bobframes/_version.py +1 -0
  3. bobframes/catalog.py +154 -0
  4. bobframes/cli.py +266 -0
  5. bobframes/derive_post_merge.py +365 -0
  6. bobframes/derives/__init__.py +0 -0
  7. bobframes/derives/pass_class_breakdown.py +102 -0
  8. bobframes/derives/texture_usage.py +121 -0
  9. bobframes/discovery.py +132 -0
  10. bobframes/global_entities.py +99 -0
  11. bobframes/html/__init__.py +0 -0
  12. bobframes/html/template.py +1056 -0
  13. bobframes/lint.py +114 -0
  14. bobframes/manifest.py +127 -0
  15. bobframes/parquetize.py +282 -0
  16. bobframes/parsers/__init__.py +0 -0
  17. bobframes/parsers/derive_program_transitions.py +73 -0
  18. bobframes/parsers/parse_init_state.py +675 -0
  19. bobframes/paths.py +111 -0
  20. bobframes/probes/__init__.py +0 -0
  21. bobframes/probes/whatif.py +165 -0
  22. bobframes/qrd_harness.py +119 -0
  23. bobframes/query_examples.py +222 -0
  24. bobframes/rdcmd.py +72 -0
  25. bobframes/replay/__init__.py +26 -0
  26. bobframes/replay/replay_main.py +2305 -0
  27. bobframes/reports/__init__.py +0 -0
  28. bobframes/reports/_dashboard.py +425 -0
  29. bobframes/reports/ab.py +88 -0
  30. bobframes/reports/base.py +114 -0
  31. bobframes/reports/cache.py +147 -0
  32. bobframes/reports/chrome.py +1306 -0
  33. bobframes/reports/cli.py +99 -0
  34. bobframes/reports/delta.py +167 -0
  35. bobframes/reports/discovery.py +118 -0
  36. bobframes/reports/draws_by_class.py +165 -0
  37. bobframes/reports/formatters.py +122 -0
  38. bobframes/reports/instancing_opportunities.py +276 -0
  39. bobframes/reports/orchestrator.py +59 -0
  40. bobframes/reports/overdraw.py +293 -0
  41. bobframes/reports/pass_gpu.py +190 -0
  42. bobframes/reports/shader_hotlist.py +240 -0
  43. bobframes/reports/trend_table.py +444 -0
  44. bobframes/resource_labels.py +162 -0
  45. bobframes/run.py +480 -0
  46. bobframes/schemas.py +426 -0
  47. bobframes/stable_keys.py +83 -0
  48. bobframes/tests/__init__.py +0 -0
  49. bobframes/tests/_render_util.py +84 -0
  50. bobframes/tests/data/golden/_reports/draws_by_class.html +323 -0
  51. bobframes/tests/data/golden/_reports/drill/District 01/2026-05-28_r110600/index.html +1560 -0
  52. bobframes/tests/data/golden/_reports/index.html +264 -0
  53. bobframes/tests/data/golden/_reports/instancing_opportunities.html +266 -0
  54. bobframes/tests/data/golden/_reports/overdraw.html +275 -0
  55. bobframes/tests/data/golden/_reports/pass_gpu.html +277 -0
  56. bobframes/tests/data/golden/_reports/shader_hotlist.html +265 -0
  57. bobframes/tests/data/golden/_reports/trend_table.html +390 -0
  58. bobframes/tests/data/golden/index.html +1175 -0
  59. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/_manifest.json +51 -0
  60. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/buffers.parquet +0 -0
  61. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/clears.parquet +0 -0
  62. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/counters_per_event.parquet +0 -0
  63. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/descriptor_access.parquet +0 -0
  64. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/dispatches.parquet +0 -0
  65. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/draw_bindings.parquet +0 -0
  66. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/draws.parquet +0 -0
  67. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/events.parquet +0 -0
  68. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/fbos.parquet +0 -0
  69. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/frame_totals.parquet +0 -0
  70. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/ibo_samples.parquet +0 -0
  71. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/indirect_args.parquet +0 -0
  72. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/passes.parquet +0 -0
  73. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/pixel_history.parquet +0 -0
  74. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/post_vs_samples.parquet +0 -0
  75. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/program_transitions.parquet +0 -0
  76. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/programs.parquet +0 -0
  77. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/render_targets.parquet +0 -0
  78. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/resource_creation.parquet +0 -0
  79. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/rt_event_timeline.parquet +0 -0
  80. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/samplers.parquet +0 -0
  81. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/shaders.parquet +0 -0
  82. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/state_change_events.parquet +0 -0
  83. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/texture_samples.parquet +0 -0
  84. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/textures.parquet +0 -0
  85. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/vbo_samples.parquet +0 -0
  86. bobframes/tests/data/synthetic/_data/District 01/2026-05-27_r110565/vertex_inputs.parquet +0 -0
  87. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/_manifest.json +51 -0
  88. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/buffers.parquet +0 -0
  89. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/clears.parquet +0 -0
  90. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/counters_per_event.parquet +0 -0
  91. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/descriptor_access.parquet +0 -0
  92. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/dispatches.parquet +0 -0
  93. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/draw_bindings.parquet +0 -0
  94. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/draws.parquet +0 -0
  95. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/events.parquet +0 -0
  96. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/fbos.parquet +0 -0
  97. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/frame_totals.parquet +0 -0
  98. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/ibo_samples.parquet +0 -0
  99. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/indirect_args.parquet +0 -0
  100. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/passes.parquet +0 -0
  101. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/pixel_history.parquet +0 -0
  102. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/post_vs_samples.parquet +0 -0
  103. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/program_transitions.parquet +0 -0
  104. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/programs.parquet +0 -0
  105. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/render_targets.parquet +0 -0
  106. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/resource_creation.parquet +0 -0
  107. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/rt_event_timeline.parquet +0 -0
  108. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/samplers.parquet +0 -0
  109. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/shaders.parquet +0 -0
  110. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/state_change_events.parquet +0 -0
  111. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/texture_samples.parquet +0 -0
  112. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/textures.parquet +0 -0
  113. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/vbo_samples.parquet +0 -0
  114. bobframes/tests/data/synthetic/_data/District 01/2026-05-28_r110600/vertex_inputs.parquet +0 -0
  115. bobframes/tests/make_synthetic.py +171 -0
  116. bobframes/tests/smoke.py +199 -0
  117. bobframes/tests/test_determinism.py +19 -0
  118. bobframes/tests/test_discovery.py +97 -0
  119. bobframes/tests/test_hardening.py +142 -0
  120. bobframes/tests/test_parity.py +22 -0
  121. bobframes/tests/test_perf.py +18 -0
  122. bobframes/tests/test_replay_drift.py +115 -0
  123. bobframes/tests/test_schemas.py +26 -0
  124. bobframes/tests/test_schemas_unit.py +55 -0
  125. bobframes/tests/test_stable_keys.py +61 -0
  126. bobframes-0.1.0.dist-info/METADATA +144 -0
  127. bobframes-0.1.0.dist-info/RECORD +130 -0
  128. bobframes-0.1.0.dist-info/WHEEL +4 -0
  129. bobframes-0.1.0.dist-info/entry_points.txt +2 -0
  130. bobframes-0.1.0.dist-info/licenses/LICENSE +21 -0
bobframes/lint.py ADDED
@@ -0,0 +1,114 @@
1
+ """Banned-token lint for HTML/markdown chrome.
2
+
3
+ Catches LLM-filler vocabulary and label scaffolding that the previous
4
+ iteration of the pipeline kept generating. Applies only to chrome around
5
+ data tables, not to CSV cell contents.
6
+
7
+ Run via `python -m bobframes.lint <file...>` or imported by run.py.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ import sys
14
+ from html.parser import HTMLParser
15
+
16
+ BANNED = [
17
+ (re.compile(r'[—–]'), 'em/en dash anywhere'),
18
+ (re.compile(r'[…]'), 'ellipsis unicode'),
19
+ (re.compile(r'[“”‘’]'), 'curly quote'),
20
+ (re.compile(r'[✓✅↑↓·×⏳→←⚠✨]'), 'decorative unicode'),
21
+ (re.compile(r'\bcaps\b'), 'shorthand caps'),
22
+ (re.compile(r'\bcap\b(?![A-Za-z])'), 'shorthand cap'),
23
+ (re.compile(r'\b(comprehensive|leverage|robust|polished|sleek|seamless)\b', re.I), 'LLM filler vocabulary'),
24
+ (re.compile(r'\b(overview|insights?|breakdown of|deep dive|key findings)\b', re.I), 'report-prose noun'),
25
+ (re.compile(r'\b(this (report|chart|table|section) shows|as (you can )?see|as shown|the following|let us|we (can )?see|note that|please note|observe that)\b', re.I), 'reader-address phrase'),
26
+ (re.compile(r'\b(highlights?|takeaways?|notable|noteworthy|significant|interesting)\b', re.I), 'editorial verb'),
27
+ (re.compile(r'\b(in conclusion|to summarize|in summary|overall)\b', re.I), 'summary opener'),
28
+ (re.compile(r'\bN/A\b'), 'NA filler'),
29
+ (re.compile(r'ranks remaining work', re.I), 'LLM filler phrase'),
30
+ (re.compile(r'\*\*(What to do|Why this matters|Verify|Effort|Impact|Detail|Fix|Severity|Title):\*\*'), 'label scaffolding'),
31
+ (re.compile(r'\betc\.'), 'filler etc.'),
32
+ ]
33
+
34
+
35
+ class _HtmlTextExtractor(HTMLParser):
36
+ """Collects text outside <table>, <script>, <style> ranges.
37
+
38
+ Each entry is (lineno, text).
39
+ """
40
+
41
+ _SKIP_TAGS = {'table', 'script', 'style'}
42
+
43
+ def __init__(self) -> None:
44
+ super().__init__(convert_charrefs=True)
45
+ self._skip_depth: dict[str, int] = {t: 0 for t in self._SKIP_TAGS}
46
+ self.chunks: list[tuple[int, str]] = []
47
+
48
+ def handle_starttag(self, tag, attrs):
49
+ t = tag.lower()
50
+ if t in self._SKIP_TAGS:
51
+ self._skip_depth[t] += 1
52
+
53
+ def handle_endtag(self, tag):
54
+ t = tag.lower()
55
+ if t in self._SKIP_TAGS and self._skip_depth[t] > 0:
56
+ self._skip_depth[t] -= 1
57
+
58
+ def handle_data(self, data):
59
+ if any(v > 0 for v in self._skip_depth.values()):
60
+ return
61
+ if not data.strip():
62
+ return
63
+ line, _col = self.getpos()
64
+ self.chunks.append((line, data))
65
+
66
+
67
+ def lint_html(path: str) -> list[tuple[int, str, str]]:
68
+ """Return list of (lineno, pattern_label, snippet) for any banned matches."""
69
+ with open(path, 'r', encoding='utf-8') as f:
70
+ body = f.read()
71
+ extractor = _HtmlTextExtractor()
72
+ extractor.feed(body)
73
+ hits: list[tuple[int, str, str]] = []
74
+ for lineno, text in extractor.chunks:
75
+ for rx, label in BANNED:
76
+ m = rx.search(text)
77
+ if m:
78
+ snippet = text.strip()[:80]
79
+ hits.append((lineno, label, snippet))
80
+ return hits
81
+
82
+
83
+ def lint_markdown(path: str) -> list[tuple[int, str, str]]:
84
+ hits: list[tuple[int, str, str]] = []
85
+ with open(path, 'r', encoding='utf-8') as f:
86
+ for lineno, line in enumerate(f, start=1):
87
+ for rx, label in BANNED:
88
+ if rx.search(line):
89
+ hits.append((lineno, label, line.rstrip()[:80]))
90
+ return hits
91
+
92
+
93
+ def lint_file(path: str) -> list[tuple[int, str, str]]:
94
+ lower = path.lower()
95
+ if lower.endswith('.md'):
96
+ return lint_markdown(path)
97
+ return lint_html(path)
98
+
99
+
100
+ def main(argv: list[str]) -> int:
101
+ if not argv:
102
+ print('usage: lint.py <file...>', file=sys.stderr)
103
+ return 2
104
+ total = 0
105
+ for path in argv:
106
+ hits = lint_file(path)
107
+ for lineno, label, snippet in hits:
108
+ print(f'{path}:{lineno}: [{label}] {snippet}', file=sys.stderr)
109
+ total += 1
110
+ return 2 if total else 0
111
+
112
+
113
+ if __name__ == '__main__':
114
+ sys.exit(main(sys.argv[1:]))
bobframes/manifest.py ADDED
@@ -0,0 +1,127 @@
1
+ """Per-drop _manifest.json writer.
2
+
3
+ Records schema version, build timestamp, per-capture replay status, row
4
+ counts per table, and rotated-dir name (if a previous _analysis_out was
5
+ rotated during this run).
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import datetime as _dt
11
+ import json
12
+ import os
13
+ import platform
14
+ import subprocess
15
+ from typing import Any
16
+
17
+ from . import qrd_harness, rdcmd, schemas
18
+ from ._version import __version__
19
+
20
+
21
+ def now_iso() -> str:
22
+ """Single source of truth for timestamps: always UTC, second precision."""
23
+ return _dt.datetime.now(_dt.timezone.utc).replace(microsecond=0).isoformat()
24
+
25
+
26
+ def _tool_version(path: str) -> str:
27
+ """Best-effort `<tool> --version` first line. Never raises."""
28
+ try:
29
+ p = subprocess.run([path, '--version'], capture_output=True, text=True, timeout=15)
30
+ out = (p.stdout or p.stderr or '').strip()
31
+ return out.splitlines()[0].strip() if out else 'unknown'
32
+ except Exception:
33
+ return 'unknown'
34
+
35
+
36
+ def gather_tool_versions() -> dict[str, str]:
37
+ """Record renderdoccmd / qrenderdoc versions at ingest (G-6). Best-effort."""
38
+ versions: dict[str, str] = {}
39
+ try:
40
+ versions['renderdoccmd'] = _tool_version(rdcmd.find_renderdoccmd())
41
+ except Exception:
42
+ versions['renderdoccmd'] = 'unknown'
43
+ try:
44
+ versions['qrenderdoc'] = _tool_version(qrd_harness.find_qrenderdoc())
45
+ except Exception:
46
+ versions['qrenderdoc'] = 'unknown'
47
+ return versions
48
+
49
+
50
+ def gather_host_info() -> dict[str, str]:
51
+ """Record GPU/driver/CPU/OS + bobframes version at ingest (G-7). Best-effort."""
52
+ gpu, driver = 'unknown', 'unknown'
53
+ try:
54
+ ps = subprocess.run(
55
+ ['powershell', '-NoProfile', '-Command',
56
+ 'Get-CimInstance Win32_VideoController | '
57
+ 'Select-Object -First 1 -Property Name,DriverVersion | ConvertTo-Json -Compress'],
58
+ capture_output=True, text=True, timeout=20,
59
+ )
60
+ if ps.returncode == 0 and ps.stdout.strip():
61
+ obj = json.loads(ps.stdout)
62
+ if isinstance(obj, list):
63
+ obj = obj[0] if obj else {}
64
+ gpu = (obj.get('Name') or 'unknown').strip()
65
+ driver = (obj.get('DriverVersion') or 'unknown').strip()
66
+ except Exception:
67
+ pass
68
+ return {
69
+ 'gpu': gpu,
70
+ 'gpu_driver': driver,
71
+ 'cpu': platform.processor() or 'unknown',
72
+ 'os': platform.platform(),
73
+ 'bobframes': __version__,
74
+ }
75
+
76
+
77
+ def build_manifest(
78
+ *,
79
+ area: str,
80
+ drop_date: str,
81
+ drop_label: str,
82
+ captures: list[str],
83
+ capture_status: dict[str, str],
84
+ row_counts: dict[str, int],
85
+ rotated_from: str | None,
86
+ build_timestamp: str | None = None,
87
+ tool_versions: dict[str, str] | None = None,
88
+ host_info: dict[str, str] | None = None,
89
+ ) -> dict[str, Any]:
90
+ return {
91
+ 'schema_version': schemas.SCHEMA_VERSION,
92
+ 'build_timestamp': build_timestamp or now_iso(),
93
+ 'area': area,
94
+ 'drop_date': drop_date,
95
+ 'drop_label': drop_label,
96
+ 'captures': sorted(captures, key=lambda s: (len(s), s)),
97
+ 'capture_status': dict(capture_status),
98
+ 'row_counts': dict(row_counts),
99
+ 'tool_versions': dict(tool_versions or {}),
100
+ 'host_info': dict(host_info or {}),
101
+ 'rotated_from': rotated_from,
102
+ }
103
+
104
+
105
+ def write_manifest(out_dir: str, manifest: dict[str, Any]) -> str:
106
+ """Atomically write _manifest.json (tmp + os.replace) so a crash mid-write
107
+ never leaves a partial file the catalog would silently skip (R-1)."""
108
+ path = os.path.join(out_dir, '_manifest.json')
109
+ tmp = path + '.tmp'
110
+ try:
111
+ with open(tmp, 'w', encoding='utf-8') as f:
112
+ json.dump(manifest, f, indent=2, sort_keys=False)
113
+ f.write('\n')
114
+ os.replace(tmp, path)
115
+ except BaseException:
116
+ try:
117
+ os.remove(tmp)
118
+ except OSError:
119
+ pass
120
+ raise
121
+ return path
122
+
123
+
124
+ def read_manifest(out_dir: str) -> dict[str, Any]:
125
+ path = os.path.join(out_dir, '_manifest.json')
126
+ with open(path, 'r', encoding='utf-8') as f:
127
+ return json.load(f)
@@ -0,0 +1,282 @@
1
+ """Merge per-capture CSV fragments into drop-level CSV + Parquet pairs.
2
+
3
+ For each table in schemas.TABLES:
4
+ 1. Read every _stage/<capture>/<table>.csv that exists.
5
+ 2. Verify the CSV header equals schemas.<TABLE>_COLS exactly (no drift).
6
+ 3. Concatenate (preserving capture order).
7
+ 4. Compute stable_key for entity tables.
8
+ 5. Coerce dtypes via schemas.infer_dtype.
9
+ 6. Write _analysis_out.tmp/<table>.parquet (snappy) and <table>.csv.
10
+
11
+ Also copies non-tabular sidecars (shader_src/, histogram/, jsonl) from the
12
+ stage tree into _analysis_out.tmp/.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import csv
18
+ import json
19
+ import os
20
+ import shutil
21
+ from typing import Iterable
22
+
23
+ import pyarrow as pa
24
+ import pyarrow.csv as pacsv
25
+ import pyarrow.parquet as papq
26
+
27
+ from . import schemas, stable_keys
28
+
29
+
30
+ def _list_stage_dirs(stage_root: str) -> list[str]:
31
+ if not os.path.isdir(stage_root):
32
+ return []
33
+ names = []
34
+ for entry in os.listdir(stage_root):
35
+ full = os.path.join(stage_root, entry)
36
+ if os.path.isdir(full):
37
+ names.append(entry)
38
+ names.sort(key=lambda s: (len(s), s))
39
+ return names
40
+
41
+
42
+ def _read_csv_compat(path: str, expected_cols: tuple[str, ...]) -> tuple[list[list[str]], list[int | None]]:
43
+ """Read CSV, return rows ordered into the expected_cols positions.
44
+
45
+ Any expected column missing from the CSV header is filled with empty
46
+ strings (post-merge derives populate them later). Extra columns in the
47
+ CSV are ignored. Reorders columns as needed to match expected order.
48
+
49
+ Returns (rows, position_map) where rows[i][j] is the value for
50
+ expected_cols[j]. position_map records which CSV column index maps to
51
+ each expected column (None if not present).
52
+ """
53
+ with open(path, 'r', encoding='utf-8', newline='') as f:
54
+ reader = csv.reader(f)
55
+ try:
56
+ header = next(reader)
57
+ except StopIteration:
58
+ return [], []
59
+ idx_for = {c: i for i, c in enumerate(header)}
60
+ pos_map: list[int | None] = [idx_for.get(c) for c in expected_cols]
61
+
62
+ out_rows: list[list[str]] = []
63
+ for raw in reader:
64
+ row: list[str] = []
65
+ for p in pos_map:
66
+ if p is None or p >= len(raw):
67
+ row.append('')
68
+ else:
69
+ row.append(raw[p])
70
+ out_rows.append(row)
71
+ return out_rows, pos_map
72
+
73
+
74
+ def _cast_value(v: str, dtype: str):
75
+ if v == '' or v is None:
76
+ if dtype == 'int': return 0
77
+ if dtype == 'float': return 0.0
78
+ if dtype == 'bool': return False
79
+ return ''
80
+ try:
81
+ if dtype == 'int':
82
+ try: return int(v)
83
+ except (ValueError, TypeError): return int(float(v))
84
+ if dtype == 'float': return float(v)
85
+ if dtype == 'bool': return v not in ('0', '', 'False', 'false')
86
+ except (ValueError, TypeError):
87
+ if dtype == 'int': return 0
88
+ if dtype == 'float': return 0.0
89
+ if dtype == 'bool': return False
90
+ return v
91
+
92
+
93
+ def _as_int(v) -> int:
94
+ try:
95
+ return int(v) if v not in ('', None) else 0
96
+ except (ValueError, TypeError):
97
+ return 0
98
+
99
+
100
+ def _apply_stable_key(table_stem: str, columns: dict[str, list]) -> None:
101
+ """For entity tables, fill the stable_key column from row content.
102
+
103
+ Called BEFORE dtype coercion; all column values are still strings here.
104
+ Numeric inputs are cast via _as_int.
105
+ """
106
+ n = len(next(iter(columns.values())))
107
+ if 'stable_key' not in columns:
108
+ return
109
+
110
+ keys: list[str] = ['' for _ in range(n)]
111
+
112
+ if table_stem == 'shaders':
113
+ for i in range(n):
114
+ keys[i] = (columns.get('src_hash') or [''] * n)[i] or ''
115
+ elif table_stem in ('textures', 'render_targets'):
116
+ for i in range(n):
117
+ keys[i] = stable_keys.texture_key(
118
+ (columns.get('label') or [''] * n)[i],
119
+ (columns.get('format') or [''] * n)[i],
120
+ _as_int((columns.get('width') or [''] * n)[i]),
121
+ _as_int((columns.get('height') or [''] * n)[i]),
122
+ _as_int((columns.get('depth') or [''] * n)[i]),
123
+ _as_int((columns.get('mip_levels') or [''] * n)[i]),
124
+ _as_int((columns.get('sample_count') or [''] * n)[i]),
125
+ )
126
+ elif table_stem == 'samplers':
127
+ for i in range(n):
128
+ keys[i] = stable_keys.sampler_key(
129
+ (columns.get('min_filter') or [''] * n)[i],
130
+ (columns.get('mag_filter') or [''] * n)[i],
131
+ (columns.get('wrap_s') or [''] * n)[i],
132
+ (columns.get('wrap_t') or [''] * n)[i],
133
+ (columns.get('wrap_r') or [''] * n)[i],
134
+ _as_int((columns.get('max_anisotropy') or [''] * n)[i]),
135
+ (columns.get('compare_mode') or [''] * n)[i],
136
+ (columns.get('compare_func') or [''] * n)[i],
137
+ )
138
+ elif table_stem == 'buffers':
139
+ for i in range(n):
140
+ tgts = (columns.get('target_history') or [''] * n)[i]
141
+ first_target = (tgts.split(';')[0] if tgts else '')
142
+ keys[i] = stable_keys.buffer_key(
143
+ (columns.get('usage_hint') or [''] * n)[i],
144
+ _as_int((columns.get('allocated_size_bytes') or [''] * n)[i]),
145
+ first_target,
146
+ )
147
+ elif table_stem == 'programs':
148
+ for i in range(n):
149
+ ids = (columns.get('attached_shader_ids') or [''] * n)[i]
150
+ id_list = [x for x in ids.split(';') if x] if ids else []
151
+ if id_list:
152
+ keys[i] = stable_keys.program_key(id_list)
153
+ elif table_stem == 'fbos':
154
+ for i in range(n):
155
+ rid = (columns.get('resource_id') or [''] * n)[i] or ''
156
+ keys[i] = stable_keys.fbo_key([rid]) if rid and rid != '0' else ''
157
+
158
+ columns['stable_key'] = keys
159
+
160
+
161
+ def _build_table(table_stem: str, stage_root: str) -> tuple[pa.Table | None, int]:
162
+ """Return (pa.Table or None, row_count). None if no fragments existed."""
163
+ expected_cols = schemas.expected_columns(table_stem)
164
+ captures = _list_stage_dirs(stage_root)
165
+
166
+ columns: dict[str, list] = {c: [] for c in expected_cols}
167
+
168
+ found_any = False
169
+ for capture in captures:
170
+ path = os.path.join(stage_root, capture, f'{table_stem}.csv')
171
+ if not os.path.exists(path):
172
+ continue
173
+ found_any = True
174
+ rows, _pos = _read_csv_compat(path, expected_cols)
175
+ for row in rows:
176
+ for i, col in enumerate(expected_cols):
177
+ columns[col].append(row[i])
178
+
179
+ if not found_any:
180
+ return None, 0
181
+
182
+ n_rows = len(columns[expected_cols[0]])
183
+
184
+ if schemas.is_entity_table(table_stem):
185
+ _apply_stable_key(table_stem, columns)
186
+
187
+ arrays: dict[str, pa.Array] = {}
188
+ for col in expected_cols:
189
+ dtype = schemas.infer_dtype(col)
190
+ raw = columns[col]
191
+ if dtype == 'int':
192
+ arrays[col] = pa.array([_cast_value(v, 'int') for v in raw], type=pa.int64())
193
+ elif dtype == 'float':
194
+ arrays[col] = pa.array([_cast_value(v, 'float') for v in raw], type=pa.float64())
195
+ elif dtype == 'bool':
196
+ arrays[col] = pa.array([_cast_value(v, 'bool') for v in raw], type=pa.bool_())
197
+ else:
198
+ arrays[col] = pa.array(raw, type=pa.string())
199
+
200
+ return pa.table(arrays), n_rows
201
+
202
+
203
+ def _write_pair(table: pa.Table, out_dir: str, name: str) -> None:
204
+ """Stage Parquet+CSV to .tmp, then atomically rename both. If either write
205
+ fails, roll back both tmps so a half-written pair is never committed (R-2)."""
206
+ pq_path = os.path.join(out_dir, f'{name}.parquet')
207
+ csv_path = os.path.join(out_dir, f'{name}.csv')
208
+ pq_tmp = pq_path + '.tmp'
209
+ csv_tmp = csv_path + '.tmp'
210
+ try:
211
+ papq.write_table(table, pq_tmp, compression='snappy')
212
+ pacsv.write_csv(table, csv_tmp)
213
+ except BaseException:
214
+ for t in (pq_tmp, csv_tmp):
215
+ try:
216
+ os.remove(t)
217
+ except OSError:
218
+ pass
219
+ raise
220
+ os.replace(pq_tmp, pq_path)
221
+ os.replace(csv_tmp, csv_path)
222
+
223
+
224
+ def _copy_sidecars(stage_root: str, out_dir: str) -> None:
225
+ """Copy shader_src/, histogram/ and jsonl sidecars from stage to out."""
226
+ captures = _list_stage_dirs(stage_root)
227
+ shader_src_dst = os.path.join(out_dir, 'shader_src')
228
+ histogram_dst = os.path.join(out_dir, 'histogram')
229
+ os.makedirs(shader_src_dst, exist_ok=True)
230
+ os.makedirs(histogram_dst, exist_ok=True)
231
+
232
+ # jsonl merging across captures
233
+ fm_path = os.path.join(out_dir, 'frame_metadata.jsonl')
234
+ up_path = os.path.join(out_dir, 'uniforms_per_pass.jsonl')
235
+ fm_lines: list[str] = []
236
+ up_lines: list[str] = []
237
+
238
+ for capture in captures:
239
+ cap_dir = os.path.join(stage_root, capture)
240
+
241
+ src = os.path.join(cap_dir, 'shader_src')
242
+ if os.path.isdir(src):
243
+ for f in os.listdir(src):
244
+ shutil.copy2(os.path.join(src, f), os.path.join(shader_src_dst, f))
245
+
246
+ hist = os.path.join(cap_dir, 'histogram')
247
+ if os.path.isdir(hist):
248
+ for f in os.listdir(hist):
249
+ shutil.copy2(os.path.join(hist, f), os.path.join(histogram_dst, f))
250
+
251
+ fm = os.path.join(cap_dir, 'frame_metadata.json')
252
+ if os.path.exists(fm):
253
+ with open(fm, 'r', encoding='utf-8') as f:
254
+ obj = json.load(f)
255
+ fm_lines.append(json.dumps(obj))
256
+
257
+ up = os.path.join(cap_dir, 'uniforms_per_pass.jsonl')
258
+ if os.path.exists(up):
259
+ with open(up, 'r', encoding='utf-8') as f:
260
+ up_lines.append(f.read().rstrip('\n'))
261
+
262
+ if fm_lines:
263
+ with open(fm_path, 'w', encoding='utf-8') as f:
264
+ f.write('\n'.join(fm_lines) + '\n')
265
+ if up_lines:
266
+ with open(up_path, 'w', encoding='utf-8') as f:
267
+ f.write('\n'.join(up_lines) + '\n')
268
+
269
+
270
+ def merge_drop(stage_root: str, out_dir: str) -> dict[str, int]:
271
+ """Merge all stage CSVs into out_dir as Parquet+CSV pairs. Returns row counts."""
272
+ os.makedirs(out_dir, exist_ok=True)
273
+ row_counts: dict[str, int] = {}
274
+ for table_stem in schemas.TABLES:
275
+ tbl, n_rows = _build_table(table_stem, stage_root)
276
+ if tbl is None:
277
+ row_counts[table_stem] = 0
278
+ continue
279
+ _write_pair(tbl, out_dir, table_stem)
280
+ row_counts[table_stem] = n_rows
281
+ _copy_sidecars(stage_root, out_dir)
282
+ return row_counts
File without changes
@@ -0,0 +1,73 @@
1
+ """Derive program_transitions.parquet from draws.parquet.
2
+
3
+ Walk draws in event_id order; emit (from_program_id, to_program_id, count)
4
+ aggregated across the whole drop. Per (area, drop_date, drop_label, capture).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from collections import Counter
11
+
12
+ import pyarrow as pa
13
+ import pyarrow.parquet as papq
14
+ import pyarrow.csv as pacsv
15
+
16
+ from .. import schemas
17
+
18
+
19
+ def derive(out_dir: str) -> int:
20
+ draws_pq = os.path.join(out_dir, 'draws.parquet')
21
+ if not os.path.exists(draws_pq):
22
+ return 0
23
+
24
+ t = papq.read_table(draws_pq, columns=list(schemas.ID_COLS) + ['event_id', 'program_id'])
25
+ n = t.num_rows
26
+ if n == 0:
27
+ return 0
28
+
29
+ # Group by (area, drop_date, drop_label, capture) and walk in event_id order.
30
+ cols = {c: t.column(c).to_pylist() for c in t.column_names}
31
+ groups: dict[tuple, list[tuple[int, int]]] = {}
32
+ for i in range(n):
33
+ key = (cols['area'][i], cols['drop_date'][i], cols['drop_label'][i], cols['capture'][i])
34
+ groups.setdefault(key, []).append((cols['event_id'][i], cols['program_id'][i]))
35
+
36
+ out_rows: dict[tuple, dict] = {}
37
+ for key, draws_for_capture in groups.items():
38
+ draws_for_capture.sort(key=lambda x: x[0])
39
+ prev = 0
40
+ counter: Counter = Counter()
41
+ for ev, pid in draws_for_capture:
42
+ if prev and pid and prev != pid:
43
+ counter[(prev, pid)] += 1
44
+ prev = pid
45
+ for (a, b), c in counter.items():
46
+ out_rows[(key, a, b)] = {
47
+ 'area': key[0], 'drop_date': key[1], 'drop_label': key[2], 'capture': key[3],
48
+ 'from_program_id': a, 'to_program_id': b, 'count': c,
49
+ }
50
+
51
+ cols_out = list(schemas.PROG_TRANS_COLS)
52
+ arrays: dict[str, pa.Array] = {}
53
+ for c in cols_out:
54
+ vs = [r[c] for r in out_rows.values()]
55
+ dt = schemas.infer_dtype(c)
56
+ if dt == 'int':
57
+ arrays[c] = pa.array(vs, type=pa.int64())
58
+ else:
59
+ arrays[c] = pa.array(vs, type=pa.string())
60
+
61
+ table = pa.table(arrays)
62
+ papq.write_table(table, os.path.join(out_dir, 'program_transitions.parquet'),
63
+ compression='snappy')
64
+ pacsv.write_csv(table, os.path.join(out_dir, 'program_transitions.csv'))
65
+ return table.num_rows
66
+
67
+
68
+ if __name__ == '__main__':
69
+ import sys
70
+ if len(sys.argv) != 2:
71
+ print('usage: derive_program_transitions.py <out_dir>', file=sys.stderr)
72
+ sys.exit(2)
73
+ print(f'wrote {derive(sys.argv[1])} program_transitions rows')