@leejungkiin/awkit 1.7.1 → 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (245) hide show
  1. package/bin/awk.js +576 -84
  2. package/core/CLAUDE.md +1 -1
  3. package/core/GEMINI.md +148 -167
  4. package/core/GEMINI.md.bak +149 -116
  5. package/core/skill-runtime-manifest.json +3 -0
  6. package/docs/Claude Fable 5.md +3826 -0
  7. package/docs/android_kotlin_system_instruction.md +210 -0
  8. package/docs/brainstorm_ponytail_integration.md +146 -0
  9. package/docs/brainstorm_smart_setup.md +113 -0
  10. package/docs/deep-research-report (1).md +293 -0
  11. package/docs/history/GEMINI.v1.md +135 -0
  12. package/docs/history/brainstorm_antigravity_unified_architecture.v1.md +105 -0
  13. package/docs/history/implementation_plan.v1.md +58 -0
  14. package/package.json +4 -1
  15. package/scripts/artifact-storage.js +130 -0
  16. package/scripts/automation-gate.js +35 -2
  17. package/scripts/claude-plan.js +76 -0
  18. package/scripts/dependency-manager.js +210 -0
  19. package/scripts/exec-rtk.js +11 -5
  20. package/scripts/i18n-helper.js +381 -0
  21. package/scripts/multi-model-pipeline.js +144 -0
  22. package/skill-packs/mobile-ios/pack.json +4 -2
  23. package/skill-packs/reverse-engineering/pack.json +1 -0
  24. package/skills/CATALOG.md +20 -0
  25. package/skills/GEMINI.md +9 -1
  26. package/skills/TRIGGER_INDEX.md +10 -0
  27. package/skills/ai-music/SKILL.md +275 -0
  28. package/skills/android-re-analyzer/SKILL.md +238 -0
  29. package/skills/android-re-analyzer/references/api-extraction-patterns.md +119 -0
  30. package/skills/android-re-analyzer/references/call-flow-analysis.md +176 -0
  31. package/skills/android-re-analyzer/references/fernflower-usage.md +115 -0
  32. package/skills/android-re-analyzer/references/jadx-usage.md +116 -0
  33. package/skills/android-re-analyzer/references/setup-guide.md +221 -0
  34. package/skills/android-re-analyzer/scripts/check-deps.sh +129 -0
  35. package/skills/android-re-analyzer/scripts/decompile.sh +375 -0
  36. package/skills/android-re-analyzer/scripts/find-api-calls.sh +118 -0
  37. package/skills/android-re-analyzer/scripts/install-dep.sh +448 -0
  38. package/skills/animal-island-ui-style/SKILL.md +1450 -0
  39. package/skills/app-store-review-agent/SKILL.md +164 -0
  40. package/skills/app-store-review-agent/references/guidelines/README.md +154 -0
  41. package/skills/app-store-review-agent/references/guidelines/by-app-type/ai_apps.md +37 -0
  42. package/skills/app-store-review-agent/references/guidelines/by-app-type/all_apps.md +50 -0
  43. package/skills/app-store-review-agent/references/guidelines/by-app-type/crypto_finance.md +31 -0
  44. package/skills/app-store-review-agent/references/guidelines/by-app-type/games.md +31 -0
  45. package/skills/app-store-review-agent/references/guidelines/by-app-type/health_fitness.md +31 -0
  46. package/skills/app-store-review-agent/references/guidelines/by-app-type/kids.md +27 -0
  47. package/skills/app-store-review-agent/references/guidelines/by-app-type/macos.md +38 -0
  48. package/skills/app-store-review-agent/references/guidelines/by-app-type/social_ugc.md +32 -0
  49. package/skills/app-store-review-agent/references/guidelines/by-app-type/subscription_iap.md +34 -0
  50. package/skills/app-store-review-agent/references/guidelines/by-app-type/vpn.md +18 -0
  51. package/skills/app-store-review-agent/references/rules/design/minimum_functionality.md +96 -0
  52. package/skills/app-store-review-agent/references/rules/design/sign_in_with_apple.md +54 -0
  53. package/skills/app-store-review-agent/references/rules/entitlements/unused_entitlements.md +83 -0
  54. package/skills/app-store-review-agent/references/rules/metadata/accurate_metadata.md +54 -0
  55. package/skills/app-store-review-agent/references/rules/metadata/apple_trademark.md +99 -0
  56. package/skills/app-store-review-agent/references/rules/metadata/china_storefront.md +72 -0
  57. package/skills/app-store-review-agent/references/rules/metadata/competitor_terms.md +56 -0
  58. package/skills/app-store-review-agent/references/rules/metadata/subscription_metadata.md +81 -0
  59. package/skills/app-store-review-agent/references/rules/privacy/privacy_manifest.md +84 -0
  60. package/skills/app-store-review-agent/references/rules/privacy/unnecessary_data.md +60 -0
  61. package/skills/app-store-review-agent/references/rules/subscription/misleading_pricing.md +63 -0
  62. package/skills/app-store-review-agent/references/rules/subscription/missing_tos_pp.md +54 -0
  63. package/skills/awf-ponytail/SKILL.md +91 -0
  64. package/skills/awf-ponytail-review/SKILL.md +67 -0
  65. package/skills/awf-session-restore/SKILL.md +3 -3
  66. package/skills/brainstorm-agent/SKILL.md +11 -2
  67. package/skills/brainstorm-agent/templates/brief-template.md +8 -0
  68. package/skills/claude-planner/SKILL.md +47 -0
  69. package/skills/code-review/SKILL.md +87 -0
  70. package/skills/expo-game-development/SKILL.md +163 -0
  71. package/skills/flutter/LICENSE.txt +202 -0
  72. package/skills/flutter/SKILL.md +127 -0
  73. package/skills/flutter-project-creater/LICENSE.txt +202 -0
  74. package/skills/flutter-project-creater/SKILL.md +106 -0
  75. package/skills/game-developer/SKILL.md +163 -0
  76. package/skills/game-developer/references/ecs-patterns.md +501 -0
  77. package/skills/game-developer/references/multiplayer-networking.md +475 -0
  78. package/skills/game-developer/references/performance-optimization.md +422 -0
  79. package/skills/game-developer/references/unity-patterns.md +271 -0
  80. package/skills/game-developer/references/unreal-cpp.md +352 -0
  81. package/skills/generate-gui-assets/SKILL.md +305 -0
  82. package/skills/generate-gui-assets/agents/openai.yaml +4 -0
  83. package/skills/generate-gui-assets/references/catalog-schema.md +58 -0
  84. package/skills/generate-gui-assets/references/extraction-techniques.md +21 -0
  85. package/skills/generate-gui-assets/references/prompt-patterns.md +58 -0
  86. package/skills/generate-gui-assets/scripts/__pycache__/clean_chroma_edges.cpython-311.pyc +0 -0
  87. package/skills/generate-gui-assets/scripts/build_gui_contact_sheet.py +51 -0
  88. package/skills/generate-gui-assets/scripts/clean_chroma_edges.py +262 -0
  89. package/skills/generate-gui-assets/scripts/copy_approved_icons.py +64 -0
  90. package/skills/generate-gui-assets/scripts/prepare_gui_asset_run.py +91 -0
  91. package/skills/generate-gui-assets/scripts/suggest_grid_options.py +63 -0
  92. package/skills/generate-gui-assets/scripts/validate_gui_catalog.py +50 -0
  93. package/skills/godot-game-development/SKILL.md +142 -0
  94. package/skills/hatch-pet/LICENSE.txt +201 -0
  95. package/skills/hatch-pet/SKILL.md +420 -0
  96. package/skills/hatch-pet/agents/openai.yaml +4 -0
  97. package/skills/hatch-pet/references/animation-rows.md +29 -0
  98. package/skills/hatch-pet/references/codex-pet-contract.md +35 -0
  99. package/skills/hatch-pet/references/qa-rubric.md +60 -0
  100. package/skills/hatch-pet/scripts/__pycache__/clean_chroma_edges.cpython-311.pyc +0 -0
  101. package/skills/hatch-pet/scripts/clean_chroma_edges.py +262 -0
  102. package/skills/hatch-pet/scripts/compose_atlas.py +150 -0
  103. package/skills/hatch-pet/scripts/derive_running_left_from_running_right.py +143 -0
  104. package/skills/hatch-pet/scripts/extract_strip_frames.py +323 -0
  105. package/skills/hatch-pet/scripts/finalize_pet_run.py +382 -0
  106. package/skills/hatch-pet/scripts/generate_pet_images.py +287 -0
  107. package/skills/hatch-pet/scripts/inspect_frames.py +246 -0
  108. package/skills/hatch-pet/scripts/make_contact_sheet.py +96 -0
  109. package/skills/hatch-pet/scripts/package_custom_pet.py +108 -0
  110. package/skills/hatch-pet/scripts/pet_job_status.py +117 -0
  111. package/skills/hatch-pet/scripts/prepare_pet_run.py +673 -0
  112. package/skills/hatch-pet/scripts/queue_pet_repairs.py +172 -0
  113. package/skills/hatch-pet/scripts/record_imagegen_result.py +250 -0
  114. package/skills/hatch-pet/scripts/render_animation_videos.py +134 -0
  115. package/skills/hatch-pet/scripts/render_animation_videos.sh +5 -0
  116. package/skills/hatch-pet/scripts/validate_atlas.py +139 -0
  117. package/skills/i18n-orchestrator/SKILL.md +37 -0
  118. package/skills/ios-simulator-skill/SKILL.md +390 -0
  119. package/skills/ios-simulator-skill/scripts/accessibility_audit.py +300 -0
  120. package/skills/ios-simulator-skill/scripts/app_launcher.py +326 -0
  121. package/skills/ios-simulator-skill/scripts/app_state_capture.py +400 -0
  122. package/skills/ios-simulator-skill/scripts/appearance.py +385 -0
  123. package/skills/ios-simulator-skill/scripts/build_and_test.py +348 -0
  124. package/skills/ios-simulator-skill/scripts/clipboard.py +103 -0
  125. package/skills/ios-simulator-skill/scripts/common/__init__.py +61 -0
  126. package/skills/ios-simulator-skill/scripts/common/cache_utils.py +289 -0
  127. package/skills/ios-simulator-skill/scripts/common/device_utils.py +462 -0
  128. package/skills/ios-simulator-skill/scripts/common/env_config.py +35 -0
  129. package/skills/ios-simulator-skill/scripts/common/hang_pipeline.py +862 -0
  130. package/skills/ios-simulator-skill/scripts/common/hang_sessions.py +490 -0
  131. package/skills/ios-simulator-skill/scripts/common/idb_utils.py +180 -0
  132. package/skills/ios-simulator-skill/scripts/common/screenshot_utils.py +338 -0
  133. package/skills/ios-simulator-skill/scripts/container.py +668 -0
  134. package/skills/ios-simulator-skill/scripts/gesture.py +394 -0
  135. package/skills/ios-simulator-skill/scripts/hang_watcher.py +1533 -0
  136. package/skills/ios-simulator-skill/scripts/keyboard.py +391 -0
  137. package/skills/ios-simulator-skill/scripts/localization_audit.py +483 -0
  138. package/skills/ios-simulator-skill/scripts/location.py +467 -0
  139. package/skills/ios-simulator-skill/scripts/log_monitor.py +493 -0
  140. package/skills/ios-simulator-skill/scripts/model_inspector.py +645 -0
  141. package/skills/ios-simulator-skill/scripts/navigator.py +461 -0
  142. package/skills/ios-simulator-skill/scripts/privacy_manager.py +310 -0
  143. package/skills/ios-simulator-skill/scripts/push_notification.py +240 -0
  144. package/skills/ios-simulator-skill/scripts/screen_mapper.py +296 -0
  145. package/skills/ios-simulator-skill/scripts/sim_health_check.sh +245 -0
  146. package/skills/ios-simulator-skill/scripts/sim_list.py +299 -0
  147. package/skills/ios-simulator-skill/scripts/simctl_boot.py +312 -0
  148. package/skills/ios-simulator-skill/scripts/simctl_create.py +316 -0
  149. package/skills/ios-simulator-skill/scripts/simctl_delete.py +357 -0
  150. package/skills/ios-simulator-skill/scripts/simctl_erase.py +351 -0
  151. package/skills/ios-simulator-skill/scripts/simctl_shutdown.py +290 -0
  152. package/skills/ios-simulator-skill/scripts/simulator_selector.py +375 -0
  153. package/skills/ios-simulator-skill/scripts/status_bar.py +250 -0
  154. package/skills/ios-simulator-skill/scripts/test_recorder.py +323 -0
  155. package/skills/ios-simulator-skill/scripts/visual_diff.py +235 -0
  156. package/skills/ios-simulator-skill/scripts/xcode/__init__.py +13 -0
  157. package/skills/ios-simulator-skill/scripts/xcode/builder.py +397 -0
  158. package/skills/ios-simulator-skill/scripts/xcode/cache.py +204 -0
  159. package/skills/ios-simulator-skill/scripts/xcode/config.py +178 -0
  160. package/skills/ios-simulator-skill/scripts/xcode/reporter.py +343 -0
  161. package/skills/ios-simulator-skill/scripts/xcode/xcresult.py +451 -0
  162. package/skills/ios-visual-qa-strategist/SKILL.md +111 -0
  163. package/skills/ios-visual-qa-strategist/agents/openai.yaml +4 -0
  164. package/skills/ios-visual-qa-strategist/references/ios-tool-selection.md +61 -0
  165. package/skills/ios-visual-qa-strategist/references/minimal-capture-policy.md +56 -0
  166. package/skills/ios-visual-qa-strategist/references/visual-reasoning-heuristics.md +53 -0
  167. package/skills/orchestrator/SKILL.md +0 -20
  168. package/skills/persistent-storage/SKILL.md +55 -0
  169. package/skills/short-maker/SKILL.md +23 -0
  170. package/skills/short-maker/scripts/effects.js +56 -0
  171. package/skills/short-maker/scripts/shortmaker-bridge.js +332 -0
  172. package/skills/short-maker/scripts/videomix.js +601 -0
  173. package/skills/short-maker/templates/hyperframes/cinematic-character.template.html +172 -0
  174. package/skills/short-maker/templates/hyperframes/index.template.html +194 -0
  175. package/skills/smali-to-kotlin/SKILL.md +128 -0
  176. package/skills/smali-to-kotlin/examples/getting-started/tech-stack.md +58 -0
  177. package/skills/smali-to-kotlin/examples/pipeline/data-ui-parity.md +118 -0
  178. package/skills/smali-to-kotlin/examples/pipeline/scanner-and-bootstrap.md +106 -0
  179. package/skills/smali-to-kotlin/library-patterns.md +189 -0
  180. package/skills/smali-to-kotlin/phase-0-discovery.md +128 -0
  181. package/skills/smali-to-kotlin/phase-1-architecture.md +166 -0
  182. package/skills/smali-to-kotlin/phase-2-blueprint-ui.md +347 -0
  183. package/skills/smali-to-kotlin/phase-2-blueprint.md +228 -0
  184. package/skills/smali-to-kotlin/phase-3-build.md +248 -0
  185. package/skills/smali-to-kotlin/phase-3-logic-build.md +268 -0
  186. package/skills/smali-to-kotlin/smali-reading-guide.md +310 -0
  187. package/skills/smali-to-kotlin/templates/app-map.md +101 -0
  188. package/skills/smali-to-kotlin/templates/architecture.md +142 -0
  189. package/skills/smali-to-kotlin/templates/blueprint.md +145 -0
  190. package/skills/spec-gate/SKILL.md +6 -2
  191. package/skills/symphony-enforcer/SKILL.md +8 -0
  192. package/skills/symphony-enforcer/examples/mindful-stop.md +2 -0
  193. package/skills/symphony-enforcer/examples/three-phase.md +16 -0
  194. package/skills/symphony-enforcer/examples/trigger-points.md +7 -1
  195. package/skills/unity-game-development/SKILL.md +231 -0
  196. package/skills/video-edit/SKILL.md +36 -0
  197. package/skills/video-edit/scripts/video_edit.py +324 -0
  198. package/templates/project-identity/android.json +2 -2
  199. package/templates/project-identity/backend-nestjs.json +2 -2
  200. package/templates/project-identity/expo.json +2 -2
  201. package/templates/project-identity/ios.json +2 -2
  202. package/templates/project-identity/web-nextjs.json +2 -2
  203. package/templates/setup-mapping.json +48 -0
  204. package/templates/specs/design-template.md +161 -71
  205. package/templates/specs/requirements-template.md +65 -133
  206. package/templates/specs/task-spec-template.xml +3 -0
  207. package/workflows/_uncategorized/critic.md +40 -0
  208. package/workflows/_uncategorized/git-rebase-flow.md +81 -0
  209. package/workflows/_uncategorized/image-gen.md +118 -0
  210. package/workflows/_uncategorized/multi-model-pipeline.md +60 -0
  211. package/workflows/_uncategorized/pixel-gen.md +86 -0
  212. package/workflows/_uncategorized/pixel-setup.md +90 -0
  213. package/workflows/_uncategorized/ponytail-review.md +59 -0
  214. package/workflows/_uncategorized/reverse-android-build.md +222 -0
  215. package/workflows/_uncategorized/reverse-android-design.md +139 -0
  216. package/workflows/_uncategorized/reverse-android-discover.md +150 -0
  217. package/workflows/_uncategorized/reverse-android-scan.md +158 -0
  218. package/workflows/_uncategorized/reverse-android.md +143 -0
  219. package/workflows/_uncategorized/reverse-ios-build.md +240 -0
  220. package/workflows/_uncategorized/reverse-ios-design.md +112 -0
  221. package/workflows/_uncategorized/reverse-ios-discover.md +120 -0
  222. package/workflows/_uncategorized/reverse-ios-scan.md +155 -0
  223. package/workflows/_uncategorized/reverse-ios.md +152 -0
  224. package/workflows/_uncategorized/safety-router.md +34 -0
  225. package/workflows/_uncategorized/teach.md +89 -0
  226. package/workflows/_uncategorized/verify-ui.md +53 -0
  227. package/workflows/_uncategorized/visualize-screenshots.md +34 -0
  228. package/workflows/ads/ads-analyst.md +201 -0
  229. package/workflows/ads/ads-audit.md +106 -0
  230. package/workflows/ads/ads-optimize.md +97 -0
  231. package/workflows/ads/ads-targeting.md +241 -0
  232. package/workflows/ads/adsExpert.md +160 -0
  233. package/workflows/ads/smali-ads-config.md +400 -0
  234. package/workflows/ads/smali-ads-flow.md +331 -0
  235. package/workflows/ads/smali-ads-interstitial.md +377 -0
  236. package/workflows/ads/smali-ads-native.md +382 -0
  237. package/workflows/context/teach.md +89 -0
  238. package/workflows/gitnexus.md +8 -8
  239. package/workflows/lifecycle/brainstorm.md +43 -0
  240. package/workflows/lifecycle/code.md +5 -0
  241. package/workflows/lifecycle/init.md +23 -5
  242. package/workflows/lifecycle/multi-model-pipeline.md +60 -0
  243. package/workflows/quality/ponytail-review.md +59 -0
  244. package/workflows/roles/critic.md +40 -0
  245. package/workflows/roles/safety-router.md +34 -0
@@ -0,0 +1,862 @@
1
+ #!/usr/bin/env python3
2
+ """HangBuster filter pipeline — pure functions, no I/O.
3
+
4
+ Stages: parse → normalise → threshold → bucket → cluster → aggregate → rank → format.
5
+
6
+ Each function is independently testable; the worker and the `--stop` path both
7
+ compose them. Scoped to hang detection for now (AHA — promote to a generic
8
+ log-filter module when a second consumer needs it).
9
+
10
+ Token budgets are enforced via a documented char/4 heuristic
11
+ (`estimate_tokens`) — accurate to within ~10% of real tokenizers and
12
+ dependency-free.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import hashlib
18
+ import itertools
19
+ import json
20
+ import math
21
+ import re
22
+ from collections.abc import Callable
23
+ from dataclasses import asdict, dataclass, field
24
+ from datetime import datetime
25
+ from enum import StrEnum
26
+
27
+ # === CONSTANTS ===
28
+
29
+ FINGERPRINT_VERSION = 2
30
+ """Bump when normalise_message, compute_fingerprint, or severity boundaries change.
31
+ `--diff` skips structural comparison across mismatched versions.
32
+
33
+ v2 (2026-05): compute_fingerprint() now hashes its input with sha256[:16].
34
+ v1 used the raw symbol / normalised prefix — collision risk when heavy upstream
35
+ normalisation reduced distinct messages to identical prefixes."""
36
+
37
+ _HEX_ADDR = re.compile(r"0x[0-9a-fA-F]{4,}")
38
+ _PID_REF = re.compile(r"\bpid[:= ]\s*\d+\b", re.IGNORECASE)
39
+ _BARE_INT = re.compile(r"\b\d{4,}\b")
40
+ _WHITESPACE = re.compile(r"\s+")
41
+ _BOILERPLATE_PREFIXES = (
42
+ "Hang detected by RunningBoard:",
43
+ "Hang detected:",
44
+ "[RunningBoard]",
45
+ )
46
+ _SYMBOL_PATTERNS = [
47
+ re.compile(r"([+-]?\[[A-Za-z_][\w]*\s+[A-Za-z_][\w:]*\])"), # [Foo bar:] / +[Foo bar:]
48
+ re.compile(r"\b([A-Z][A-Za-z0-9_]+\.[A-Za-z_][\w]+(?:\([^)]*\))?)\b"), # Swift Foo.bar()
49
+ ]
50
+
51
+ _LOG_LINE_PATTERN = re.compile(
52
+ r"^(\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}:\d{2}\.\d+(?:[+-]\d{4})?)"
53
+ r"\s+0x[\da-f]+"
54
+ r"\s+\S+"
55
+ r"\s+0x[\da-f]+"
56
+ r"\s+(\d+)"
57
+ r"\s+\d+"
58
+ r"\s+([^:]+):"
59
+ r"\s*(.*)",
60
+ re.IGNORECASE,
61
+ )
62
+
63
+ _DURATION_PATTERNS = [
64
+ # Order matters: ms before s, so "487ms" doesn't get parsed as 487 seconds.
65
+ re.compile(r"(\d+(?:\.\d+)?)\s*(?:ms|milliseconds?)\b", re.IGNORECASE),
66
+ re.compile(r"(\d+(?:\.\d+)?)\s*(?:s|seconds?)\b", re.IGNORECASE),
67
+ ]
68
+
69
+
70
+ # === TYPES ===
71
+
72
+
73
+ class Severity(StrEnum):
74
+ """Hang severity bucket. String-valued for stable JSON serialisation."""
75
+
76
+ MINOR = "minor"
77
+ WARN = "warn"
78
+ CRITICAL = "critical"
79
+ FROZEN = "frozen"
80
+
81
+
82
+ _SEVERITY_WEIGHT = {
83
+ Severity.MINOR: 1,
84
+ Severity.WARN: 2,
85
+ Severity.CRITICAL: 4,
86
+ Severity.FROZEN: 8,
87
+ }
88
+
89
+
90
+ @dataclass
91
+ class NormalisedEvent:
92
+ """A single hang event after parse + normalise + bucket."""
93
+
94
+ delta_ms: int
95
+ process: str
96
+ pid: int
97
+ duration_ms: float
98
+ severity: Severity
99
+ symbol: str | None
100
+ message_prefix: str
101
+ fingerprint: str
102
+ raw_message: str = ""
103
+
104
+
105
+ @dataclass
106
+ class Cluster:
107
+ """A group of NormalisedEvents sharing a fingerprint."""
108
+
109
+ fingerprint: str
110
+ count: int
111
+ max_duration_ms: float
112
+ total_duration_ms: float
113
+ first_delta_ms: int
114
+ severity: Severity
115
+ symbol_or_prefix: str
116
+ sample_event: NormalisedEvent
117
+ auto_sample: dict | None = None
118
+ auto_samples: list[dict] | None = None
119
+
120
+
121
+ @dataclass
122
+ class SessionSummary:
123
+ """End-state summary of a session. Persisted to summary.json."""
124
+
125
+ session_id: str
126
+ started_at: str
127
+ duration_ms: int
128
+ event_count: int
129
+ dropped_below_threshold: int
130
+ matched_lines: int
131
+ total_lines: int
132
+ clusters: list[Cluster]
133
+ aggregates: dict
134
+ fingerprint_version: int = FINGERPRINT_VERSION
135
+
136
+
137
+ # === STAGE 1: PARSE ===
138
+
139
+
140
+ def parse_log_line(line: str) -> dict | None:
141
+ """Parse one `xcrun simctl spawn log stream` line into a raw event dict.
142
+
143
+ Returns ``None`` for non-log lines or lines that don't describe a hang.
144
+ """
145
+ if not line.strip():
146
+ return None
147
+ match = _LOG_LINE_PATTERN.match(line)
148
+ if not match:
149
+ return None
150
+ timestamp_str, pid_str, process_name, message = match.groups()
151
+ message = message.strip()
152
+ if not is_hang_message(message):
153
+ return None
154
+ event: dict = {
155
+ "timestamp": timestamp_str.strip(),
156
+ "pid": int(pid_str),
157
+ "process": process_name.strip(),
158
+ "message": message,
159
+ }
160
+ duration_ms = extract_duration_ms(message)
161
+ if duration_ms is not None:
162
+ event["duration_ms"] = duration_ms
163
+ return event
164
+
165
+
166
+ def is_hang_message(message: str) -> bool:
167
+ """Return True if message text describes a hang/stall/watchdog event."""
168
+ lower = message.lower()
169
+ return any(kw in lower for kw in ("hang", "stall", "unresponsive", "watchdog", "jetsam"))
170
+
171
+
172
+ def extract_duration_ms(message: str) -> float | None:
173
+ """Parse hang duration from message text. Returns milliseconds."""
174
+ match = _DURATION_PATTERNS[0].search(message)
175
+ if match:
176
+ return float(match.group(1))
177
+ match = _DURATION_PATTERNS[1].search(message)
178
+ if match:
179
+ return float(match.group(1)) * 1000
180
+ return None
181
+
182
+
183
+ # === STAGE 2: NORMALISE ===
184
+
185
+
186
+ def normalise_message(message: str, max_len: int = 40) -> str:
187
+ """Strip boilerplate, redact volatile tokens, truncate to ``max_len``."""
188
+ text = message
189
+ for prefix in _BOILERPLATE_PREFIXES:
190
+ if text.startswith(prefix):
191
+ text = text[len(prefix) :].lstrip()
192
+ break
193
+ text = _HEX_ADDR.sub("<addr>", text)
194
+ text = _PID_REF.sub("<pid>", text)
195
+ text = _BARE_INT.sub("<n>", text)
196
+ text = _WHITESPACE.sub(" ", text).strip()
197
+ if len(text) > max_len:
198
+ text = text[:max_len].rstrip()
199
+ return text
200
+
201
+
202
+ def extract_symbol(message: str) -> str | None:
203
+ """Return the first Obj-C / Swift symbol mention if present."""
204
+ for pattern in _SYMBOL_PATTERNS:
205
+ match = pattern.search(message)
206
+ if match:
207
+ return match.group(1)
208
+ return None
209
+
210
+
211
+ # === STAGE 3: THRESHOLD ===
212
+
213
+
214
+ def above_threshold(duration_ms: float | None, min_hang_ms: int) -> bool:
215
+ """Drop events with no duration or below the minimum hang threshold."""
216
+ return duration_ms is not None and duration_ms >= min_hang_ms
217
+
218
+
219
+ # === STAGE 4: SEVERITY BUCKET ===
220
+
221
+
222
+ def bucket_severity(duration_ms: float) -> Severity:
223
+ """Map ms to a severity band."""
224
+ if duration_ms < 250:
225
+ return Severity.MINOR
226
+ if duration_ms < 500:
227
+ return Severity.WARN
228
+ if duration_ms < 2000:
229
+ return Severity.CRITICAL
230
+ return Severity.FROZEN
231
+
232
+
233
+ # === STAGE 5: NORMALISED EVENT + FINGERPRINT ===
234
+
235
+
236
+ def build_normalised_event(
237
+ raw_event: dict, session_start_ms: int, current_ms: int | None = None
238
+ ) -> NormalisedEvent | None:
239
+ """Combine stages 2 + 4 + fingerprint into one ``NormalisedEvent``.
240
+
241
+ Returns ``None`` if duration is missing — threshold filtering should have
242
+ dropped these already, but we guard for safety.
243
+ """
244
+ duration = raw_event.get("duration_ms")
245
+ if duration is None:
246
+ return None
247
+ if current_ms is None:
248
+ current_ms = _timestamp_to_ms(raw_event.get("timestamp", ""))
249
+ delta_ms = max(0, current_ms - session_start_ms) if current_ms else 0
250
+ message = raw_event.get("message", "")
251
+ symbol = extract_symbol(message)
252
+ prefix = normalise_message(message)
253
+ fingerprint = compute_fingerprint(symbol, prefix)
254
+ return NormalisedEvent(
255
+ delta_ms=delta_ms,
256
+ process=raw_event.get("process", "unknown"),
257
+ pid=int(raw_event.get("pid", 0)),
258
+ duration_ms=float(duration),
259
+ severity=bucket_severity(float(duration)),
260
+ symbol=symbol,
261
+ message_prefix=prefix,
262
+ fingerprint=fingerprint,
263
+ raw_message=message,
264
+ )
265
+
266
+
267
+ def compute_fingerprint(symbol: str | None, message_prefix: str) -> str:
268
+ """Stable identity hash for clustering and diff.
269
+
270
+ Hashed (sha256[:16]) so distinct messages with overlapping normalised
271
+ prefixes don't collide into the same cluster. Symbol when present (high
272
+ signal); otherwise normalised message prefix is the hash input.
273
+
274
+ The human-readable label lives in ``Cluster.symbol_or_prefix`` — the
275
+ fingerprint is purely an identity key.
276
+ """
277
+ key = f"sym:{symbol}" if symbol else f"msg:{message_prefix}"
278
+ return f"fp:{hashlib.sha256(key.encode()).hexdigest()[:16]}"
279
+
280
+
281
+ def _timestamp_to_ms(ts: str) -> int:
282
+ """Parse an os_log timestamp like '2026-05-22 14:30:52.123456-0800' to ms epoch."""
283
+ if not ts:
284
+ return 0
285
+ try:
286
+ # `%z` requires a colon-less offset, which is what os_log emits.
287
+ dt = datetime.strptime(ts, "%Y-%m-%d %H:%M:%S.%f%z")
288
+ except ValueError:
289
+ try:
290
+ dt = datetime.strptime(ts.split(".", maxsplit=1)[0], "%Y-%m-%d %H:%M:%S")
291
+ except ValueError:
292
+ return 0
293
+ return int(dt.timestamp() * 1000)
294
+
295
+
296
+ # === STAGE 6: CLUSTER ===
297
+
298
+
299
+ def cluster_events(events: list[NormalisedEvent]) -> list[Cluster]:
300
+ """Group events by fingerprint, aggregating count + duration stats."""
301
+ by_fp: dict[str, list[NormalisedEvent]] = {}
302
+ for event in events:
303
+ by_fp.setdefault(event.fingerprint, []).append(event)
304
+ clusters: list[Cluster] = []
305
+ for fingerprint, group in by_fp.items():
306
+ durations = [e.duration_ms for e in group]
307
+ deltas = [e.delta_ms for e in group]
308
+ max_severity = max(group, key=lambda e: _SEVERITY_WEIGHT[e.severity]).severity
309
+ sample = max(group, key=lambda e: e.duration_ms)
310
+ clusters.append(
311
+ Cluster(
312
+ fingerprint=fingerprint,
313
+ count=len(group),
314
+ max_duration_ms=max(durations),
315
+ total_duration_ms=sum(durations),
316
+ first_delta_ms=min(deltas),
317
+ severity=max_severity,
318
+ symbol_or_prefix=sample.symbol or sample.message_prefix,
319
+ sample_event=sample,
320
+ )
321
+ )
322
+ return clusters
323
+
324
+
325
+ # === STAGE 7: AGGREGATE ===
326
+
327
+
328
+ def detect_temporal_bursts(
329
+ events: list[NormalisedEvent], window_ms: int = 1000, min_count: int = 3
330
+ ) -> list[dict]:
331
+ """Find windows containing ``min_count`` or more events within ``window_ms``."""
332
+ if not events:
333
+ return []
334
+ sorted_events = sorted(events, key=lambda e: e.delta_ms)
335
+ bursts: list[dict] = []
336
+ i = 0
337
+ while i < len(sorted_events):
338
+ window_start = sorted_events[i].delta_ms
339
+ j = i
340
+ while j < len(sorted_events) and sorted_events[j].delta_ms - window_start <= window_ms:
341
+ j += 1
342
+ burst_size = j - i
343
+ if burst_size >= min_count:
344
+ bursts.append(
345
+ {
346
+ "starts_at_ms": window_start,
347
+ "ends_at_ms": sorted_events[j - 1].delta_ms,
348
+ "count": burst_size,
349
+ }
350
+ )
351
+ i = j
352
+ else:
353
+ i += 1
354
+ return bursts
355
+
356
+
357
+ def detect_quiet_periods(events: list[NormalisedEvent], threshold_ms: int = 5000) -> list[dict]:
358
+ """Find gaps between adjacent events that exceed ``threshold_ms``."""
359
+ if len(events) < 2:
360
+ return []
361
+ sorted_events = sorted(events, key=lambda e: e.delta_ms)
362
+ periods: list[dict] = []
363
+ for prev, curr in itertools.pairwise(sorted_events):
364
+ gap = curr.delta_ms - prev.delta_ms
365
+ if gap >= threshold_ms:
366
+ periods.append({"from_ms": prev.delta_ms, "to_ms": curr.delta_ms, "gap_ms": gap})
367
+ return periods
368
+
369
+
370
+ def process_distribution(events: list[NormalisedEvent]) -> dict[str, int]:
371
+ """Count events per process name."""
372
+ dist: dict[str, int] = {}
373
+ for event in events:
374
+ dist[event.process] = dist.get(event.process, 0) + 1
375
+ return dist
376
+
377
+
378
+ # === STAGE 8: RANK ===
379
+
380
+
381
+ def rank_clusters(clusters: list[Cluster], top_n: int | None = None) -> list[Cluster]:
382
+ """Sort by severity_weight * max_duration_ms * log(count + 1), descending."""
383
+
384
+ def score(cluster: Cluster) -> float:
385
+ weight = _SEVERITY_WEIGHT[cluster.severity]
386
+ return weight * cluster.max_duration_ms * math.log(cluster.count + 1)
387
+
388
+ ranked = sorted(clusters, key=score, reverse=True)
389
+ return ranked if top_n is None else ranked[:top_n]
390
+
391
+
392
+ # === STAGE 9: FORMAT ===
393
+
394
+
395
+ def format_l0(summary: SessionSummary) -> str:
396
+ """Single-line status (~20 tokens). Cache-friendly for agent context."""
397
+ if not summary.clusters:
398
+ return f"Session {summary.session_id}: no hangs above threshold."
399
+ top = summary.clusters[0]
400
+ critical = sum(
401
+ 1 for c in summary.clusters if c.severity in (Severity.CRITICAL, Severity.FROZEN)
402
+ )
403
+ return (
404
+ f"Session {summary.session_id}: {summary.duration_ms / 1000:.1f}s, "
405
+ f"{summary.event_count} hangs ({critical} critical), top: "
406
+ f"{top.symbol_or_prefix} {top.max_duration_ms:.0f}ms ×{top.count}"
407
+ )
408
+
409
+
410
+ def format_l1(summary: SessionSummary, top_n: int = 3) -> str:
411
+ """Default ~80-120 token output: header + top-N clusters + drill hint."""
412
+ if not summary.clusters:
413
+ return (
414
+ f"Session {summary.session_id}: {summary.duration_ms / 1000:.1f}s, "
415
+ f"no hangs ≥ threshold (scanned {summary.matched_lines}/{summary.total_lines} lines).\n"
416
+ f"Drill: hang_watcher.py --get-details {summary.session_id}"
417
+ )
418
+ lines = [
419
+ f"Session {summary.session_id}: {summary.duration_ms / 1000:.1f}s captured, "
420
+ f"{len(summary.clusters)} clusters ({summary.event_count} events)"
421
+ ]
422
+ icons = {
423
+ Severity.MINOR: "·",
424
+ Severity.WARN: "⚠",
425
+ Severity.CRITICAL: "‼",
426
+ Severity.FROZEN: "🛑",
427
+ }
428
+ for cluster in summary.clusters[:top_n]:
429
+ icon = icons[cluster.severity]
430
+ at = f"{cluster.first_delta_ms / 1000:.1f}s"
431
+ lines.append(
432
+ f"{icon} {cluster.max_duration_ms:.0f}ms × {cluster.count} — "
433
+ f"{cluster.symbol_or_prefix} at {at}"
434
+ )
435
+ lines.append(f"Drill: hang_watcher.py --get-details {summary.session_id} [--cluster N]")
436
+ return "\n".join(lines)
437
+
438
+
439
+ def format_l2(summary: SessionSummary) -> str:
440
+ """Expanded ~300 token output: all clusters + aggregates."""
441
+ parts = [format_l1(summary, top_n=len(summary.clusters))]
442
+ sev_hist = _severity_histogram(summary.clusters)
443
+ parts.append("Severity: " + ", ".join(f"{k}={v}" for k, v in sev_hist.items() if v))
444
+ aggregates = summary.aggregates or {}
445
+ bursts = aggregates.get("bursts", [])
446
+ if bursts:
447
+ burst_str = "; ".join(
448
+ f"{b['count']} in {(b['ends_at_ms'] - b['starts_at_ms'])}ms @ {b['starts_at_ms'] / 1000:.1f}s"
449
+ for b in bursts[:3]
450
+ )
451
+ parts.append(f"Bursts: {burst_str}")
452
+ quiet = aggregates.get("quiet_periods", [])
453
+ if quiet:
454
+ parts.append(f"Quiet periods: {len(quiet)} (longest {max(q['gap_ms'] for q in quiet)}ms)")
455
+ proc = aggregates.get("process_distribution", {})
456
+ if len(proc) > 1:
457
+ top_proc = sorted(proc.items(), key=lambda kv: kv[1], reverse=True)[:3]
458
+ parts.append("Processes: " + ", ".join(f"{p}({c})" for p, c in top_proc))
459
+ parts.append(
460
+ f"Lines: matched {summary.matched_lines}/{summary.total_lines}, "
461
+ f"dropped {summary.dropped_below_threshold} sub-threshold"
462
+ )
463
+ return "\n".join(parts)
464
+
465
+
466
+ def format_cluster_detail(cluster: Cluster, events: list[NormalisedEvent]) -> str:
467
+ """L3: per-event detail for a single cluster, plus stack if sampled."""
468
+ lines = [
469
+ f"Cluster: {cluster.symbol_or_prefix}",
470
+ f" fingerprint={cluster.fingerprint} severity={cluster.severity.value}",
471
+ f" count={cluster.count} max={cluster.max_duration_ms:.0f}ms "
472
+ f"total={cluster.total_duration_ms:.0f}ms first@{cluster.first_delta_ms}ms",
473
+ ]
474
+ for event in events[:20]:
475
+ lines.append(
476
+ f" · t={event.delta_ms}ms duration={event.duration_ms:.0f}ms "
477
+ f"process={event.process} pid={event.pid}"
478
+ )
479
+ if event.raw_message:
480
+ lines.append(f" msg: {event.raw_message[:120]}")
481
+ for sample in _iter_auto_samples(cluster):
482
+ lines.extend(_format_auto_sample(sample))
483
+ return "\n".join(lines)
484
+
485
+
486
+ def _iter_auto_samples(cluster: Cluster) -> list[dict]:
487
+ """Yield auto-samples for a cluster, preferring the multi-kind list and
488
+ falling back to the legacy single ``auto_sample`` field for old summaries."""
489
+ if cluster.auto_samples:
490
+ return cluster.auto_samples
491
+ if cluster.auto_sample:
492
+ return [cluster.auto_sample]
493
+ return []
494
+
495
+
496
+ _ADDRESS_RE = re.compile(r"\[(0x[0-9a-fA-F]+)\]")
497
+
498
+
499
+ def extract_stack_addresses(stack: str) -> list[str]:
500
+ """Return unique ``0x...`` addresses from a sample/spindump stack, in order.
501
+
502
+ Both ``sample`` and ``spindump`` print frame addresses in ``[0xADDR]``
503
+ notation at the end of each frame line. We match that form and only that
504
+ form — looser regexes risk grabbing unrelated hex tokens.
505
+ """
506
+ seen: set[str] = set()
507
+ ordered: list[str] = []
508
+ for match in _ADDRESS_RE.finditer(stack):
509
+ addr = match.group(1)
510
+ if addr not in seen:
511
+ seen.add(addr)
512
+ ordered.append(addr)
513
+ return ordered
514
+
515
+
516
+ def symbolicate_stack(stack: str, resolver: Callable[[list[str]], dict[str, str]]) -> str:
517
+ """Rewrite ``[0xADDR]`` tokens with ``[0xADDR → resolved]`` using ``resolver``.
518
+
519
+ ``resolver`` takes the deduped address list and returns ``{addr: text}``.
520
+ Addresses with no resolution (or a resolved text equal to the address
521
+ itself) are left unchanged so we don't add noise where atos couldn't help.
522
+ """
523
+ addresses = extract_stack_addresses(stack)
524
+ if not addresses:
525
+ return stack
526
+ resolved = resolver(addresses) or {}
527
+
528
+ def _replace(match: re.Match) -> str:
529
+ addr = match.group(1)
530
+ text = resolved.get(addr)
531
+ if not text or text.strip() == addr:
532
+ return match.group(0)
533
+ return f"[{addr} → {text.strip()}]"
534
+
535
+ return _ADDRESS_RE.sub(_replace, stack)
536
+
537
+
538
+ def _format_auto_sample(sample: dict) -> list[str]:
539
+ """Render one auto-sample block: header + first 10 stack lines or a reason."""
540
+ kind = sample.get("kind") or "auto-sample"
541
+ stack = sample.get("stack")
542
+ if not stack:
543
+ return [f"{kind}: unavailable ({sample.get('reason', 'unknown')})"]
544
+ # Stack is multi-line text from `sample` or `spindump`. Show the first 10
545
+ # non-empty lines so the cluster detail stays bounded.
546
+ head = [line for line in stack.splitlines() if line.strip()][:10]
547
+ return [f"{kind} stack (top 10):", *(f" {line}" for line in head)]
548
+
549
+
550
+ def format_diff(diff: dict) -> str:
551
+ """Render a diff_sessions() result for human + agent consumption."""
552
+ if diff.get("version_mismatch"):
553
+ return (
554
+ f"⚠ fingerprint_version mismatch: A={diff['fingerprint_version_a']} "
555
+ f"B={diff['fingerprint_version_b']}. Structural compare skipped."
556
+ )
557
+ new = diff.get("new_clusters", [])
558
+ resolved = diff.get("resolved_clusters", [])
559
+ drift = diff.get("drift", [])
560
+ stable = diff.get("stable_count", 0)
561
+ verdict = diff.get("verdict", "no change")
562
+ lines = [f"Diff {diff['session_a']} → {diff['session_b']}: {verdict}"]
563
+ if new:
564
+ lines.append(f"New ({len(new)}):")
565
+ for cluster in new[:5]:
566
+ lines.append(
567
+ f" + {cluster['severity']} {cluster['max_duration_ms']:.0f}ms × "
568
+ f"{cluster['count']} — {cluster['symbol_or_prefix']}"
569
+ )
570
+ if resolved:
571
+ lines.append(f"Resolved ({len(resolved)}):")
572
+ for cluster in resolved[:5]:
573
+ lines.append(
574
+ f" - {cluster['severity']} {cluster['max_duration_ms']:.0f}ms × "
575
+ f"{cluster['count']} — {cluster['symbol_or_prefix']}"
576
+ )
577
+ if drift:
578
+ lines.append(f"Drift ({len(drift)}):")
579
+ for entry in drift[:5]:
580
+ # inf delta (0 → N) renders as "new"; finite deltas keep the % suffix.
581
+ delta = entry["delta_pct"]
582
+ delta_str = "new" if delta == float("inf") else f"{delta:+.0f}%"
583
+ lines.append(
584
+ f" ~ {entry['symbol_or_prefix']}: "
585
+ f"{entry['max_duration_ms_a']:.0f} → {entry['max_duration_ms_b']:.0f}ms "
586
+ f"({delta_str})"
587
+ )
588
+ if stable:
589
+ lines.append(f"Stable: {stable} cluster(s) unchanged")
590
+ return "\n".join(lines)
591
+
592
+
593
+ def _severity_histogram(clusters: list[Cluster]) -> dict[str, int]:
594
+ """Total event count per severity band across clusters."""
595
+ hist = {s.value: 0 for s in Severity}
596
+ for cluster in clusters:
597
+ hist[cluster.severity.value] += cluster.count
598
+ return hist
599
+
600
+
601
+ # === STAGE 10: TOKEN BUDGET ===
602
+
603
+
604
+ def estimate_tokens(text: str) -> int:
605
+ """Documented char/4 heuristic. Real tokenizers differ ~10%; tests use this estimator."""
606
+ return len(text) // 4
607
+
608
+
609
+ def compress_to_budget(
610
+ summary: SessionSummary, max_tokens: int | None, default_top_n: int = 3
611
+ ) -> str:
612
+ """Pick the densest level that fits ``max_tokens``.
613
+
614
+ Order: L2 (full) → L1 (top-N) → L0 (one-liner). When ``max_tokens`` is
615
+ ``None`` we return L1 unconditionally.
616
+ """
617
+ if max_tokens is None:
618
+ return format_l1(summary, top_n=default_top_n)
619
+ if max_tokens >= 200:
620
+ candidate = format_l2(summary)
621
+ if estimate_tokens(candidate) <= max_tokens:
622
+ return candidate
623
+ if max_tokens >= 60:
624
+ candidate = format_l1(summary, top_n=default_top_n)
625
+ if estimate_tokens(candidate) <= max_tokens:
626
+ return candidate
627
+ # Shrink top-N until it fits, never below 1.
628
+ for n in (2, 1):
629
+ candidate = format_l1(summary, top_n=n)
630
+ if estimate_tokens(candidate) <= max_tokens:
631
+ return candidate
632
+ return format_l0(summary)
633
+
634
+
635
+ # === DIFF ===
636
+
637
+
638
+ def diff_sessions(
639
+ summary_a: SessionSummary, summary_b: SessionSummary, drift_threshold_pct: float = 20.0
640
+ ) -> dict:
641
+ """Compare two SessionSummary instances. Returns a dict structured for format_diff."""
642
+ if summary_a.fingerprint_version != summary_b.fingerprint_version:
643
+ return {
644
+ "session_a": summary_a.session_id,
645
+ "session_b": summary_b.session_id,
646
+ "version_mismatch": True,
647
+ "fingerprint_version_a": summary_a.fingerprint_version,
648
+ "fingerprint_version_b": summary_b.fingerprint_version,
649
+ "verdict": "skipped (version mismatch)",
650
+ }
651
+ a_map = {c.fingerprint: c for c in summary_a.clusters}
652
+ b_map = {c.fingerprint: c for c in summary_b.clusters}
653
+ new_keys = b_map.keys() - a_map.keys()
654
+ resolved_keys = a_map.keys() - b_map.keys()
655
+ shared_keys = a_map.keys() & b_map.keys()
656
+ drift: list[dict] = []
657
+ stable = 0
658
+ for key in shared_keys:
659
+ ca, cb = a_map[key], b_map[key]
660
+ if ca.max_duration_ms == 0 and cb.max_duration_ms == 0:
661
+ stable += 1
662
+ continue
663
+ if ca.max_duration_ms == 0:
664
+ # 0 → N: a previously-silent cluster now hangs; treat as max worsening.
665
+ delta_pct: float = float("inf")
666
+ elif cb.max_duration_ms == 0:
667
+ # N → 0: cluster present in A but flat in B; fully improved.
668
+ delta_pct = -100.0
669
+ else:
670
+ delta_pct = (cb.max_duration_ms - ca.max_duration_ms) / ca.max_duration_ms * 100
671
+ if delta_pct == float("inf") or abs(delta_pct) >= drift_threshold_pct:
672
+ drift.append(
673
+ {
674
+ "fingerprint": key,
675
+ "symbol_or_prefix": cb.symbol_or_prefix,
676
+ "max_duration_ms_a": ca.max_duration_ms,
677
+ "max_duration_ms_b": cb.max_duration_ms,
678
+ "delta_pct": delta_pct,
679
+ }
680
+ )
681
+ else:
682
+ stable += 1
683
+ new_clusters = [_cluster_to_dict(b_map[k]) for k in new_keys]
684
+ resolved_clusters = [_cluster_to_dict(a_map[k]) for k in resolved_keys]
685
+ new_critical = sum(
686
+ 1 for c in new_clusters if c["severity"] in (Severity.CRITICAL.value, Severity.FROZEN.value)
687
+ )
688
+ if new_critical:
689
+ verdict = f"regression: {new_critical} new critical"
690
+ elif new_clusters:
691
+ verdict = f"regression: {len(new_clusters)} new minor"
692
+ elif resolved_clusters and not drift:
693
+ verdict = f"improvement: {len(resolved_clusters)} resolved"
694
+ elif drift:
695
+ worsened = sum(1 for d in drift if d["delta_pct"] > 0)
696
+ verdict = f"drift: {worsened} worsened, {len(drift) - worsened} improved"
697
+ else:
698
+ verdict = "no change"
699
+ return {
700
+ "session_a": summary_a.session_id,
701
+ "session_b": summary_b.session_id,
702
+ "version_mismatch": False,
703
+ "new_clusters": new_clusters,
704
+ "resolved_clusters": resolved_clusters,
705
+ "drift": drift,
706
+ "stable_count": stable,
707
+ "verdict": verdict,
708
+ }
709
+
710
+
711
+ def _cluster_to_dict(cluster: Cluster) -> dict:
712
+ """Lightweight dict view for diff output (skips full sample_event for token economy)."""
713
+ return {
714
+ "fingerprint": cluster.fingerprint,
715
+ "symbol_or_prefix": cluster.symbol_or_prefix,
716
+ "severity": cluster.severity.value,
717
+ "count": cluster.count,
718
+ "max_duration_ms": cluster.max_duration_ms,
719
+ "first_delta_ms": cluster.first_delta_ms,
720
+ }
721
+
722
+
723
+ # === SERIALISATION HELPERS ===
724
+
725
+
726
+ def cluster_to_json(cluster: Cluster) -> dict:
727
+ """JSON-serialisable representation of a Cluster (handles enum + nested dataclass)."""
728
+ # asdict() already serialises Severity (StrEnum) members via their string value.
729
+ return asdict(cluster)
730
+
731
+
732
+ def summary_to_json(summary: SessionSummary) -> dict:
733
+ """JSON-serialisable representation of a SessionSummary."""
734
+ return {
735
+ "session_id": summary.session_id,
736
+ "started_at": summary.started_at,
737
+ "duration_ms": summary.duration_ms,
738
+ "event_count": summary.event_count,
739
+ "dropped_below_threshold": summary.dropped_below_threshold,
740
+ "matched_lines": summary.matched_lines,
741
+ "total_lines": summary.total_lines,
742
+ "fingerprint_version": summary.fingerprint_version,
743
+ "clusters": [cluster_to_json(c) for c in summary.clusters],
744
+ "aggregates": summary.aggregates,
745
+ }
746
+
747
+
748
+ def summary_from_json(payload: dict) -> SessionSummary:
749
+ """Rehydrate a SessionSummary from disk JSON."""
750
+ clusters = [_cluster_from_json(c) for c in payload.get("clusters", [])]
751
+ return SessionSummary(
752
+ session_id=payload["session_id"],
753
+ started_at=payload["started_at"],
754
+ duration_ms=payload["duration_ms"],
755
+ event_count=payload["event_count"],
756
+ dropped_below_threshold=payload.get("dropped_below_threshold", 0),
757
+ matched_lines=payload.get("matched_lines", 0),
758
+ total_lines=payload.get("total_lines", 0),
759
+ clusters=clusters,
760
+ aggregates=payload.get("aggregates", {}),
761
+ fingerprint_version=payload.get("fingerprint_version", 1),
762
+ )
763
+
764
+
765
+ def _cluster_from_json(payload: dict) -> Cluster:
766
+ sample_payload = payload["sample_event"]
767
+ sample = NormalisedEvent(
768
+ delta_ms=sample_payload["delta_ms"],
769
+ process=sample_payload["process"],
770
+ pid=sample_payload["pid"],
771
+ duration_ms=sample_payload["duration_ms"],
772
+ severity=Severity(sample_payload["severity"]),
773
+ symbol=sample_payload.get("symbol"),
774
+ message_prefix=sample_payload["message_prefix"],
775
+ fingerprint=sample_payload["fingerprint"],
776
+ raw_message=sample_payload.get("raw_message", ""),
777
+ )
778
+ return Cluster(
779
+ fingerprint=payload["fingerprint"],
780
+ count=payload["count"],
781
+ max_duration_ms=payload["max_duration_ms"],
782
+ total_duration_ms=payload["total_duration_ms"],
783
+ first_delta_ms=payload["first_delta_ms"],
784
+ severity=Severity(payload["severity"]),
785
+ symbol_or_prefix=payload["symbol_or_prefix"],
786
+ sample_event=sample,
787
+ auto_sample=payload.get("auto_sample"),
788
+ auto_samples=payload.get("auto_samples"),
789
+ )
790
+
791
+
792
+ def event_to_jsonl(event: NormalisedEvent) -> str:
793
+ """Encode one normalised event as a single JSONL line."""
794
+ return json.dumps(asdict(event), separators=(",", ":"))
795
+
796
+
797
+ def event_from_jsonl(line: str) -> NormalisedEvent:
798
+ """Decode a single JSONL line back to NormalisedEvent."""
799
+ payload = json.loads(line)
800
+ return NormalisedEvent(
801
+ delta_ms=payload["delta_ms"],
802
+ process=payload["process"],
803
+ pid=payload["pid"],
804
+ duration_ms=payload["duration_ms"],
805
+ severity=Severity(payload["severity"]),
806
+ symbol=payload.get("symbol"),
807
+ message_prefix=payload["message_prefix"],
808
+ fingerprint=payload["fingerprint"],
809
+ raw_message=payload.get("raw_message", ""),
810
+ )
811
+
812
+
813
+ # === BUILDERS ===
814
+
815
+
816
+ @dataclass
817
+ class SummaryBuilder:
818
+ """Compose clusters + aggregates into a SessionSummary in one place."""
819
+
820
+ session_id: str
821
+ started_at: str
822
+ duration_ms: int
823
+ matched_lines: int = 0
824
+ total_lines: int = 0
825
+ dropped_below_threshold: int = 0
826
+ extras: dict = field(default_factory=dict)
827
+
828
+ def build(
829
+ self,
830
+ events: list[NormalisedEvent],
831
+ top_n: int | None = None,
832
+ auto_samples_by_fp: dict[str, list[dict]] | None = None,
833
+ ) -> SessionSummary:
834
+ """Cluster, aggregate, rank, and emit a SessionSummary.
835
+
836
+ ``auto_samples_by_fp`` attaches per-fingerprint stack captures (from
837
+ ``--auto-sample`` / ``--auto-spindump``) onto the matching clusters so
838
+ they survive into ``summary.json``.
839
+ """
840
+ clusters = rank_clusters(cluster_events(events), top_n=top_n)
841
+ if auto_samples_by_fp:
842
+ for cluster in clusters:
843
+ samples = auto_samples_by_fp.get(cluster.fingerprint)
844
+ if samples:
845
+ cluster.auto_samples = samples
846
+ aggregates = {
847
+ "bursts": detect_temporal_bursts(events),
848
+ "quiet_periods": detect_quiet_periods(events),
849
+ "process_distribution": process_distribution(events),
850
+ }
851
+ aggregates.update(self.extras)
852
+ return SessionSummary(
853
+ session_id=self.session_id,
854
+ started_at=self.started_at,
855
+ duration_ms=self.duration_ms,
856
+ event_count=len(events),
857
+ dropped_below_threshold=self.dropped_below_threshold,
858
+ matched_lines=self.matched_lines,
859
+ total_lines=self.total_lines,
860
+ clusters=clusters,
861
+ aggregates=aggregates,
862
+ )