create-walle 0.9.11 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. package/README.md +3 -3
  2. package/package.json +2 -2
  3. package/template/bin/dev.sh +7 -1
  4. package/template/bin/setup.js +53 -9
  5. package/template/bin/sync-images.js +53 -0
  6. package/template/builder-journal.md +17 -0
  7. package/template/claude-task-manager/api-prompts.js +98 -13
  8. package/template/claude-task-manager/api-reviews.js +82 -5
  9. package/template/claude-task-manager/db.js +32 -5
  10. package/template/claude-task-manager/docs/session-capture-foundation-design.md +1273 -0
  11. package/template/claude-task-manager/lib/claude-desktop-sessions.js +696 -0
  12. package/template/claude-task-manager/lib/coding-agent-models.js +49 -1
  13. package/template/claude-task-manager/lib/session-capture.js +421 -0
  14. package/template/claude-task-manager/lib/session-history.js +135 -15
  15. package/template/claude-task-manager/lib/session-jobs.js +10 -5
  16. package/template/claude-task-manager/lib/session-stream.js +87 -19
  17. package/template/claude-task-manager/lib/setup-provider-config.js +115 -0
  18. package/template/claude-task-manager/lib/walle-ctm-history.js +72 -0
  19. package/template/claude-task-manager/lib/walle-session-context.js +61 -0
  20. package/template/claude-task-manager/lib/walle-transcript.js +176 -0
  21. package/template/claude-task-manager/public/css/setup.css +35 -8
  22. package/template/claude-task-manager/public/css/walle-session.css +56 -0
  23. package/template/claude-task-manager/public/css/walle.css +120 -0
  24. package/template/claude-task-manager/public/index.html +814 -181
  25. package/template/claude-task-manager/public/js/message-renderer.js +148 -19
  26. package/template/claude-task-manager/public/js/reviews.js +120 -62
  27. package/template/claude-task-manager/public/js/setup.js +75 -31
  28. package/template/claude-task-manager/public/js/stream-view.js +115 -55
  29. package/template/claude-task-manager/public/js/walle-session.js +84 -2
  30. package/template/claude-task-manager/public/js/walle.js +308 -54
  31. package/template/claude-task-manager/server.js +1092 -146
  32. package/template/claude-task-manager/session-integrity.js +181 -54
  33. package/template/claude-task-manager/session-utils.js +123 -41
  34. package/template/claude-task-manager/workers/state-detectors/codex.js +5 -2
  35. package/template/package.json +1 -1
  36. package/template/wall-e/adapters/ctm.js +39 -18
  37. package/template/wall-e/agent-runners/contract.js +17 -0
  38. package/template/wall-e/agent-runners/index.js +22 -0
  39. package/template/wall-e/agent-runtime/harness.js +212 -0
  40. package/template/wall-e/agent-runtime/index.js +8 -0
  41. package/template/wall-e/agent-runtime/registry.js +67 -0
  42. package/template/wall-e/agent-runtime/session-store.js +179 -0
  43. package/template/wall-e/agent-runtime/spawn.js +208 -0
  44. package/template/wall-e/api-walle.js +174 -7
  45. package/template/wall-e/brain.js +266 -28
  46. package/template/wall-e/channels/policy.js +88 -0
  47. package/template/wall-e/channels/registry.js +15 -1
  48. package/template/wall-e/channels/reply-dispatcher.js +70 -0
  49. package/template/wall-e/channels/session-bindings.js +51 -0
  50. package/template/wall-e/chat/code-review-context.js +29 -0
  51. package/template/wall-e/chat.js +188 -42
  52. package/template/wall-e/coding/acp-adapter.js +188 -0
  53. package/template/wall-e/coding/agent-catalog.js +129 -0
  54. package/template/wall-e/coding/compaction-service.js +247 -0
  55. package/template/wall-e/coding/execution-trace.js +3 -0
  56. package/template/wall-e/coding/instruction-service.js +224 -0
  57. package/template/wall-e/coding/model-message.js +67 -0
  58. package/template/wall-e/coding/permission-rules-store.js +111 -0
  59. package/template/wall-e/coding/permission-service.js +266 -0
  60. package/template/wall-e/coding/prompt-bundle.js +67 -0
  61. package/template/wall-e/coding/prompt-runtime.js +243 -0
  62. package/template/wall-e/coding/provider-transform.js +188 -0
  63. package/template/wall-e/coding/runtime-mode.js +132 -0
  64. package/template/wall-e/coding/snapshot-service.js +155 -0
  65. package/template/wall-e/coding/stream-processor.js +268 -0
  66. package/template/wall-e/coding/task-tool.js +255 -0
  67. package/template/wall-e/coding/tool-registry.js +361 -0
  68. package/template/wall-e/coding/transcript-writer.js +143 -0
  69. package/template/wall-e/coding/workspace-replay.js +324 -0
  70. package/template/wall-e/coding-context.js +4 -22
  71. package/template/wall-e/coding-orchestrator.js +307 -18
  72. package/template/wall-e/coding-prompts.js +44 -3
  73. package/template/wall-e/context/context-builder.js +43 -1
  74. package/template/wall-e/context/topic-matcher.js +1 -1
  75. package/template/wall-e/eval/agent-runner.js +59 -13
  76. package/template/wall-e/eval/benchmarks/memory-retrieval.json +155 -57
  77. package/template/wall-e/eval/benchmarks.js +100 -16
  78. package/template/wall-e/eval/eval-orchestrator.js +218 -8
  79. package/template/wall-e/eval/harvester.js +62 -5
  80. package/template/wall-e/eval/head-to-head.js +23 -2
  81. package/template/wall-e/eval/humaneval-adapter.js +30 -5
  82. package/template/wall-e/eval/livecodebench-adapter.js +29 -5
  83. package/template/wall-e/eval/manifest.js +186 -0
  84. package/template/wall-e/eval/run-agent-benchmarks.js +66 -2
  85. package/template/wall-e/eval/session-retrieval-benchmark.js +150 -0
  86. package/template/wall-e/eval/session-transcripts.js +57 -4
  87. package/template/wall-e/eval/swebench-adapter.js +109 -3
  88. package/template/wall-e/evaluation/agent-router.js +53 -1
  89. package/template/wall-e/evaluation/coding-quorum.js +48 -1
  90. package/template/wall-e/evaluation/router.js +4 -2
  91. package/template/wall-e/evaluation/tier-selector.js +11 -1
  92. package/template/wall-e/extraction/contradiction.js +2 -2
  93. package/template/wall-e/extraction/indexer.js +2 -1
  94. package/template/wall-e/extraction/knowledge-extractor.js +2 -2
  95. package/template/wall-e/hooks/cli.js +92 -0
  96. package/template/wall-e/hooks/discovery.js +119 -0
  97. package/template/wall-e/hooks/index.js +7 -0
  98. package/template/wall-e/hooks/manifest.js +55 -0
  99. package/template/wall-e/hooks/runtime.js +84 -0
  100. package/template/wall-e/hooks/session-memory.js +225 -0
  101. package/template/wall-e/http/auth.js +6 -2
  102. package/template/wall-e/http/chat-api.js +54 -8
  103. package/template/wall-e/integrations/claude-plugin/hooks/hooks.json +27 -0
  104. package/template/wall-e/integrations/claude-plugin/hooks/walle-precompact-hook.sh +5 -0
  105. package/template/wall-e/integrations/claude-plugin/hooks/walle-stop-hook.sh +5 -0
  106. package/template/wall-e/integrations/codex-plugin/hooks/walle-hook.sh +7 -0
  107. package/template/wall-e/integrations/codex-plugin/hooks.json +37 -0
  108. package/template/wall-e/listening/calendar.js +3 -1
  109. package/template/wall-e/llm/client.js +64 -10
  110. package/template/wall-e/llm/google.js +39 -5
  111. package/template/wall-e/llm/ollama.js +1 -1
  112. package/template/wall-e/llm/ollama.plugin.json +1 -1
  113. package/template/wall-e/llm/provider-availability.js +10 -0
  114. package/template/wall-e/llm/provider-error.js +269 -0
  115. package/template/wall-e/llm/tool-adapter.js +48 -12
  116. package/template/wall-e/loops/boot.js +2 -1
  117. package/template/wall-e/loops/initiative.js +2 -2
  118. package/template/wall-e/loops/tasks.js +8 -47
  119. package/template/wall-e/loops/workspace-prompts.js +20 -0
  120. package/template/wall-e/mcp-server.js +442 -1
  121. package/template/wall-e/memory/session-ingest-service.js +159 -0
  122. package/template/wall-e/memory/source-indexer.js +289 -0
  123. package/template/wall-e/plugins/discovery.js +83 -0
  124. package/template/wall-e/plugins/manifest-loader.js +50 -10
  125. package/template/wall-e/plugins/manifest-schema.js +69 -0
  126. package/template/wall-e/plugins/model-catalog.js +55 -0
  127. package/template/wall-e/prompts/coding/base.txt +2 -0
  128. package/template/wall-e/prompts/coding/deepseek.txt +1 -0
  129. package/template/wall-e/prompts/coding/memory-protocol.md +9 -0
  130. package/template/wall-e/prompts/coding/plan.txt +1 -0
  131. package/template/wall-e/runtime/execution-trace.js +220 -0
  132. package/template/wall-e/security/audit.js +266 -0
  133. package/template/wall-e/security/ssrf.js +236 -0
  134. package/template/wall-e/session-files.js +303 -0
  135. package/template/wall-e/skills/_bundled/slack-backfill/SKILL.md +3 -0
  136. package/template/wall-e/skills/_bundled/slack-sync/SKILL.md +3 -0
  137. package/template/wall-e/skills/internal-skill-registry.js +2 -2
  138. package/template/wall-e/skills/script-skill-runner.js +143 -0
  139. package/template/wall-e/skills/skill-executor.js +5 -6
  140. package/template/wall-e/skills/skill-fallback.js +3 -1
  141. package/template/wall-e/skills/skill-harness-registry.js +7 -8
  142. package/template/wall-e/skills/skill-planner.js +52 -4
  143. package/template/wall-e/skills/slack-ingest.js +11 -3
  144. package/template/wall-e/sources/base.js +90 -0
  145. package/template/wall-e/sources/builtin.js +33 -0
  146. package/template/wall-e/sources/claude-code-jsonl.js +78 -0
  147. package/template/wall-e/sources/codex-jsonl.js +125 -0
  148. package/template/wall-e/sources/coding-session-utils.js +117 -0
  149. package/template/wall-e/sources/contract-suite.js +59 -0
  150. package/template/wall-e/sources/gemini-jsonl.js +85 -0
  151. package/template/wall-e/sources/index.js +9 -0
  152. package/template/wall-e/sources/jsonl-utils.js +181 -0
  153. package/template/wall-e/sources/record-types.js +252 -0
  154. package/template/wall-e/sources/registry.js +92 -0
  155. package/template/wall-e/sources/transforms.js +100 -0
  156. package/template/wall-e/sources/walle-jsonl.js +108 -0
  157. package/template/wall-e/tools/coding-middleware.js +31 -1
  158. package/template/wall-e/tools/file-tracker.js +25 -1
  159. package/template/wall-e/tools/local-tools.js +75 -47
  160. package/template/wall-e/tools/session-sharing.js +68 -1
  161. package/template/wall-e/tools/shell-analyzer.js +1 -1
  162. package/template/wall-e/tools/shell-policy.js +47 -0
  163. package/template/wall-e/tools/snapshot.js +42 -0
  164. package/template/wall-e/training/harvester.js +62 -5
  165. package/template/wall-e/utils/repair.js +253 -1
  166. package/template/website/index.html +3 -3
  167. package/template/wall-e/skills/_bundled/slack-mentions/.watched-threads.json +0 -18
@@ -0,0 +1,1273 @@
1
+ # Session Capture Foundation Design
2
+
3
+ Date: 2026-04-28
4
+ Status: design draft, source-code reuse pass applied
5
+ Owner: CTM
6
+
7
+ ## Summary
8
+
9
+ Build a shared `SessionCapture` foundation by promoting the existing
10
+ `SessionStream`/session-history/status/approval machinery into a clearer
11
+ provider-neutral contract. The codebase already captures most of the requested
12
+ surface: live transcript events, persisted conversation messages, active-session
13
+ status, hover summaries, prompt queues, approval decisions, and restart
14
+ scrollback. The foundation should reuse those pieces first.
15
+
16
+ This should not replace the existing terminal scrollback recorder or the
17
+ existing provider-specific JSONL stream tailers. The foundation should sit above
18
+ them as a normalization and projection layer:
19
+
20
+ 1. Capture from the richest source available for each provider.
21
+ 2. Normalize provider events into a small CTM event vocabulary.
22
+ 3. Maintain cheap in-memory live projections for UI and automation.
23
+ 4. Persist durable message/search/history data through existing tables.
24
+ 5. Let downstream features subscribe to the same substrate instead of each
25
+ feature scraping the terminal or re-parsing provider files independently.
26
+
27
+ The key architectural decision is to make `SessionStream` the first-class live
28
+ capture bus, make raw PTY bytes a fallback signal, and avoid adding new tables
29
+ where existing tables already own the data.
30
+
31
+ ## Why This Matters
32
+
33
+ Several features want the same facts:
34
+
35
+ - What did the user ask recently?
36
+ - What has the coding agent emitted recently?
37
+ - Is the session running, waiting for input, waiting for approval, idle, exited,
38
+ or unknown?
39
+ - Is there an approval request, and what exact command or action needs a
40
+ decision?
41
+ - What is the recent work summary for a tooltip or active-session preview?
42
+ - Should a monitor agent intervene because the session is stuck, looping,
43
+ blocked, failing, or drifting?
44
+
45
+ Today these questions are answered by separate mechanisms:
46
+
47
+ - PTY scrollback capture restores terminal output.
48
+ - Provider-specific stream readers tail structured files where possible.
49
+ - Hooks and telemetry provide partial status signals.
50
+ - The approver still relies heavily on screen parsing.
51
+ - UI status and summaries are computed through existing stream APIs.
52
+
53
+ Those pieces are valuable, but they are not yet exposed as one reusable
54
+ foundation. `SessionCapture` should be a thin contract over existing code first,
55
+ not a duplicate recorder.
56
+
57
+ ## Current Codebase Findings
58
+
59
+ ### Existing Session And Scrollback Capture
60
+
61
+ The codebase already captures raw terminal output for active sessions.
62
+
63
+ Relevant files:
64
+
65
+ - `claude-task-manager/server.js`
66
+ - `claude-task-manager/workers/scrollback-worker.js`
67
+ - `claude-task-manager/workers/headless-term-worker.js`
68
+ - `claude-task-manager/lib/session-history.js`
69
+ - `claude-task-manager/db.js`
70
+
71
+ Observed behavior:
72
+
73
+ - PTY output is observed in the server and fed unthrottled to the headless xterm
74
+ worker for current terminal state snapshots.
75
+ - PTY output is also batched into `scrollback_log` for restart survival.
76
+ - `scrollback_log` is explicitly cleared on normal session exit, so it is not a
77
+ long-term semantic capture store.
78
+ - This is useful for restoring the terminal view and as a fallback, but raw
79
+ terminal bytes are expensive and ambiguous for semantic features.
80
+
81
+ Implication:
82
+
83
+ - Keep scrollback capture.
84
+ - Do not use it as the primary semantic conversation source when structured
85
+ provider logs exist.
86
+ - Use it for:
87
+ - visual restore,
88
+ - coarse activity heartbeat,
89
+ - fallback for providers without structured transcripts,
90
+ - debugging capture gaps.
91
+
92
+ ### Existing Structured Stream Layer
93
+
94
+ The codebase already has a structured stream layer.
95
+
96
+ Relevant files:
97
+
98
+ - `claude-task-manager/lib/session-stream.js`
99
+ - `claude-task-manager/lib/session-state-bus.js`
100
+ - `claude-task-manager/lib/telemetry-receiver.js`
101
+ - `claude-task-manager/public/js/stream-view.js`
102
+ - `claude-task-manager/public/index.html`
103
+
104
+ Observed behavior:
105
+
106
+ - Provider session files are tailed and converted into events/status.
107
+ - The server exposes stream status and session stream APIs.
108
+ - The frontend subscribes to stream events over websocket.
109
+ - The UI already uses stream summaries/status for active sessions in some
110
+ places.
111
+ - `SessionStream` already has:
112
+ - `JsonlTailer` with byte offsets and partial-line handling,
113
+ - a per-agent ring buffer,
114
+ - CTM session ID to agent session ID mapping,
115
+ - Claude and Codex JSONL parsing,
116
+ - user prompt cache,
117
+ - debounced summary generation,
118
+ - `getRecentEvents`, `getSummary`, and `getAllStatuses`.
119
+ - `stream-view.js` already consumes `/api/sessions/:id/summary`,
120
+ `/api/stream/status`, `stream-init`, `stream-event`, and `stream-status`.
121
+
122
+ Implication:
123
+
124
+ - `SessionCapture` should evolve out of this layer rather than start as an
125
+ unrelated subsystem.
126
+ - The new foundation should preserve existing APIs while tightening vocabulary,
127
+ storage, projections, and downstream contracts.
128
+
129
+ ### Existing Durable Session Tables
130
+
131
+ Relevant file:
132
+
133
+ - `claude-task-manager/db.js`
134
+
135
+ Existing durable data includes:
136
+
137
+ - `ctm_sessions`
138
+ - `agent_sessions`
139
+ - `session_conversations`
140
+ - `session_messages`
141
+ - `session_messages_fts`
142
+ - `session_analyses`
143
+ - `session_analyses_fts`
144
+ - `scrollback_log`
145
+ - `startup_tasks`
146
+ - `approval_decisions`
147
+ - `approval_rules`
148
+ - `permission_rules` / `perm_rules`
149
+ - `prompt_queues`
150
+
151
+ Implication:
152
+
153
+ - There is already a durable conversation cache, message search index, session
154
+ identity model, active-session restore model, prompt queue store, and approval
155
+ audit log.
156
+ - Do not add `session_live_state` or `session_turns` in the first implementation.
157
+ - Do not duplicate `session_messages` for user/assistant conversation text.
158
+ - The only plausible new durable table is an append-only capture/event table for
159
+ facts that existing tables do not own: status transitions, approval-request
160
+ lifecycle, tool calls/results, capture health, and monitor-agent annotations.
161
+
162
+ ### Existing Approval Path
163
+
164
+ Relevant files:
165
+
166
+ - `claude-task-manager/approval-agent.js`
167
+ - `claude-task-manager/lib/session-jobs.js`
168
+ - provider-specific approval parsers
169
+ - `claude-task-manager/server.js`
170
+
171
+ Observed behavior:
172
+
173
+ - The approver has substantial logic for interpreting terminal screens and
174
+ provider-specific approval surfaces.
175
+ - `approval_decisions` already audits approved/escalated decisions.
176
+ - `approval_rules` already stores learned auto-approval rules and command
177
+ signatures.
178
+ - This works as a compatibility strategy, but it couples approval automation to
179
+ screen rendering.
180
+
181
+ Implication:
182
+
183
+ - A normalized event like `approval.requested` should become the preferred input
184
+ to future monitor/automation, but the first version should reuse the existing
185
+ provider parsers and `approval_decisions` audit path.
186
+ - Screen parsing should remain as fallback.
187
+
188
+ ### Source-Code Reuse Matrix
189
+
190
+ | Need | Reuse first | Why |
191
+ | --- | --- | --- |
192
+ | CTM session identity | `ctm_sessions` | Existing root row for tab/session title, cwd, provider, starred state. |
193
+ | Provider transcript identity | `agent_sessions` | Already maps provider session IDs to CTM IDs and stores provider, `jsonl_path`, model, branch, counts, slug. |
194
+ | Active-session restore | `startup_tasks` | Already tracks live CTM tasks, command, cwd, model, provider type, chat/session IDs, worktree, branch. |
195
+ | Live transcript stream | `SessionStream` | Already tails JSONL, normalizes Claude/Codex user/assistant/tool-result events, holds a ring, emits WS events/status, and builds summaries. |
196
+ | File watching | `JsonlWatcher` + `session-jobs` reconciliation | Already combines `fs.watch`, periodic rescans, symlink hardening, compact `.bak` handling, and 10-minute reconciliation. |
197
+ | Durable rendered conversation | `session_conversations` | Existing JSON message cache used by Review and Conversation views. |
198
+ | Durable message search | `session_messages` + `session_messages_fts` | Existing per-message table and FTS index for deep search. Extend only if capture needs metadata columns. |
199
+ | Historical AI analysis | `session_analyses` + FTS | Existing title/summary/topics/category table for completed/offline session analysis. |
200
+ | Live terminal restore | `headless-term-worker` and `scrollback_log` | Existing xterm snapshot source and restart scrollback persistence. Do not use as semantic history. |
201
+ | Status signals | `telemetry-receiver`, `status-hooks`, `SessionStream.getAllStatuses` | Existing hook/OTEL/stream status paths already feed active-session UI. |
202
+ | Approval automation | `approval-agent`, provider parsers, `approval_decisions`, `approval_rules` | Existing parser/rule/decision pipeline should remain the execution path. |
203
+ | Prompt queue input | `queue-engine`, `prompt_queues` | Existing queued prompts and auto-advance state can emit capture-side prompt-submitted events. |
204
+ | Active-session UI | `stream-view.js`, `getSessionStatus`, `SessionActivityUtils` | Existing tooltip/status/grouping surface already consumes stream and authoritative status signals. |
205
+
206
+ Reuse rule: add new schema only where the fact is not already represented above,
207
+ or where overloading an existing table would break its current contract.
208
+
209
+ ## Online Research
210
+
211
+ ### Claude Code Transcripts
212
+
213
+ Source: https://code.claude.com/docs/en/claude-directory
214
+
215
+ Claude Code stores conversation history under the user-level Claude directory,
216
+ including project-scoped transcript JSONL files. This means CTM can usually
217
+ read a structured conversation stream without scraping terminal output.
218
+
219
+ Design implication:
220
+
221
+ - Prefer Claude transcript JSONL as the source for user prompts, assistant
222
+ responses, tool calls, tool results, and session IDs.
223
+ - Tail incrementally by byte offset and inode.
224
+ - Treat file rotation, compaction, and permission failures as recoverable.
225
+
226
+ ### Claude Code Hooks
227
+
228
+ Source: https://code.claude.com/docs/en/hooks
229
+
230
+ Claude Code supports lifecycle hooks around prompts, tool use, stop events, and
231
+ related agent activity.
232
+
233
+ Design implication:
234
+
235
+ - Hooks are strong status and event signals.
236
+ - They should not be the only source of truth because hooks can be disabled,
237
+ misconfigured, or fail, but they are useful for low-latency status changes and
238
+ approval/tool events.
239
+
240
+ ### Claude Agent SDK Sessions
241
+
242
+ Source: https://code.claude.com/docs/en/agent-sdk/sessions
243
+
244
+ Claude's SDK session model supports continuing and resuming sessions by ID.
245
+
246
+ Design implication:
247
+
248
+ - CTM should persist provider session IDs and map them to CTM session IDs.
249
+ - Capture should keep the provider ID in every normalized event for replay and
250
+ cross-provider debugging.
251
+
252
+ ### Gemini CLI Save/Resume And Checkpointing
253
+
254
+ Sources:
255
+
256
+ - https://google-gemini.github.io/gemini-cli/docs/cli/commands.html
257
+ - https://google-gemini.github.io/gemini-cli/docs/checkpointing.html
258
+
259
+ Gemini CLI has explicit chat save/resume behavior and checkpointing features.
260
+
261
+ Design implication:
262
+
263
+ - Provider adapters must not assume every coding agent exposes a Claude-like
264
+ JSONL transcript.
265
+ - The adapter boundary should allow:
266
+ - durable transcript tailing,
267
+ - exported/saved chat files,
268
+ - checkpoint metadata,
269
+ - PTY fallback.
270
+
271
+ ### GitHub Copilot CLI Chronicle
272
+
273
+ Source: https://docs.github.com/en/copilot/concepts/agents/copilot-cli/chronicle
274
+
275
+ GitHub Copilot CLI documents local session data and a session history concept.
276
+
277
+ Design implication:
278
+
279
+ - "Agent session chronicle" is a solved product pattern.
280
+ - CTM should model a provider-neutral chronicle for coding-agent sessions, not a
281
+ provider-specific log parser API.
282
+
283
+ ### OpenTelemetry Logs And GenAI Events
284
+
285
+ Sources:
286
+
287
+ - https://opentelemetry.io/docs/specs/otel/logs/data-model/
288
+ - https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-events/
289
+
290
+ OpenTelemetry log records and GenAI semantic conventions provide useful prior
291
+ art for event attributes, timestamps, trace/session correlation, and GenAI
292
+ message events.
293
+
294
+ Design implication:
295
+
296
+ - Use OTel-inspired event fields:
297
+ - timestamp,
298
+ - severity,
299
+ - body,
300
+ - attributes,
301
+ - trace/span/session correlation where available.
302
+ - Keep CTM's internal schema small, but align naming where it does not make the
303
+ system awkward.
304
+
305
+ ### Terminal Recording Prior Art
306
+
307
+ Sources:
308
+
309
+ - https://docs.asciinema.org/how-it-works/
310
+ - https://docs.asciinema.org/manual/asciicast/v1/
311
+ - https://github.com/microsoft/node-pty
312
+ - https://www.npmjs.com/package/@xterm/addon-serialize/v/0.12.0
313
+
314
+ asciinema records terminal sessions as timed output events. `node-pty` is the
315
+ common primitive for spawning and observing terminal processes. xterm's
316
+ serialize addon can serialize terminal buffer state.
317
+
318
+ Design implication:
319
+
320
+ - Terminal capture is a solved primitive for visual replay.
321
+ - Semantic capture should not be built from ANSI parsing unless no structured
322
+ source is available.
323
+ - If CTM wants later visual replay, raw PTY chunks plus periodic xterm
324
+ snapshots are appropriate.
325
+
326
+ ### File Watching Caveats
327
+
328
+ Source: https://nodejs.org/api/fs.html
329
+
330
+ Node's `fs.watch` has platform-specific caveats and can miss or coalesce events.
331
+
332
+ Design implication:
333
+
334
+ - Transcript tailers should combine watch notifications with periodic polling.
335
+ - Store byte offsets.
336
+ - Detect truncation and inode changes.
337
+ - Make replay idempotent with provider event IDs or content hashes.
338
+
339
+ ## Goals
340
+
341
+ 1. Provide near-realtime normalized events for active coding sessions.
342
+ 2. Capture both user prompts and coding agent output.
343
+ 3. Support many concurrent sessions with low CPU and memory overhead.
344
+ 4. Make session status derivable from shared signals.
345
+ 5. Feed approver automation without screen scraping when possible.
346
+ 6. Generate rolling summaries and active-session tooltips.
347
+ 7. Enable monitor agents and future automation.
348
+ 8. Preserve current terminal restore behavior.
349
+ 9. Allow provider-specific adapters without leaking provider details into every
350
+ downstream feature.
351
+
352
+ ## Non-Goals
353
+
354
+ 1. Full visual terminal replay in the first implementation.
355
+ 2. Replacing every existing stream/status API at once.
356
+ 3. Perfect semantic parsing from raw PTY output.
357
+ 4. Capturing secrets or hidden terminal input.
358
+ 5. Running a summarizer on every output chunk.
359
+ 6. Building a distributed event bus unless local-process scaling becomes a real
360
+ bottleneck.
361
+
362
+ ## Proposed Architecture
363
+
364
+ ### High-Level Flow
365
+
366
+ ```text
367
+ Existing provider files/hooks/PTY/input events
368
+ |
369
+ v
370
+ Existing adapters:
371
+ JsonlWatcher + SessionStream + telemetry-receiver + status-hooks + approver
372
+ |
373
+ v
374
+ SessionCapture contract over SessionStream
375
+ |
376
+ +--> existing hot ring buffers
377
+ +--> existing session_conversations/session_messages projections
378
+ +--> optional capture_events for non-message lifecycle facts
379
+ +--> existing websocket/API stream
380
+ +--> existing active-session status/tooltip UI
381
+ +--> existing approver + future monitor agents
382
+ ```
383
+
384
+ ### Source Adapters
385
+
386
+ Do not start by creating parallel adapters. Start by wrapping and extending the
387
+ adapters that already exist.
388
+
389
+ Initial reuse mapping:
390
+
391
+ - `JsonlWatcher`
392
+ - Existing source of Claude JSONL `file-new` / `file-change` events.
393
+ - Already hardened against symlink/path traversal issues.
394
+
395
+ - `SessionStream.JsonlTailer`
396
+ - Existing byte-offset incremental reader.
397
+ - Reuse it for live transcript tailing rather than adding another tailer.
398
+
399
+ - `SessionStream._processEntry`
400
+ - Existing Claude user/assistant/tool-result normalizer.
401
+ - Extend it with normalized `kind`/`source` metadata instead of reparsing JSONL
402
+ somewhere else.
403
+
404
+ - `SessionStream._processCodexEntry`
405
+ - Existing Codex user/assistant normalizer using `session-history.js`.
406
+ - Extend here for Codex capture rather than adding a second Codex parser.
407
+
408
+ - `telemetry-receiver`
409
+ - Existing hook/OTEL fan-in for Claude, Codex, and Gemini status.
410
+ - Add capture-state notifications here if needed, but preserve the existing
411
+ `session.status` websocket contract.
412
+
413
+ - `approval-agent` + provider parsers
414
+ - Existing screen-derived approval context parser and decision engine.
415
+ - Emit capture lifecycle events from this path; do not duplicate approval
416
+ parsing.
417
+
418
+ - `queue-engine` and `handleInput`
419
+ - Existing paths that write user/queued prompts to PTY.
420
+ - These are useful low-latency prompt-submission signals before provider JSONL
421
+ catches up, but transcript events should still be the high-confidence source.
422
+
423
+ Future adapters:
424
+
425
+ - Gemini transcript/checkpoint adapter only if Gemini exposes a durable
426
+ conversation source CTM does not already ingest.
427
+ - A provider-neutral adapter interface only after at least two providers need
428
+ different code paths that cannot live inside `SessionStream` cleanly.
429
+
430
+ ### Normalized Event Shape
431
+
432
+ ```ts
433
+ type SessionCaptureEvent = {
434
+ id: string;
435
+ sessionId: string; // CTM session id
436
+ provider: 'claude' | 'codex' | 'gemini' | 'unknown';
437
+ providerSessionId?: string;
438
+ source: 'transcript' | 'hook' | 'pty' | 'ctm-input' | 'telemetry';
439
+ kind:
440
+ | 'session.started'
441
+ | 'session.exited'
442
+ | 'turn.started'
443
+ | 'turn.completed'
444
+ | 'user.prompt'
445
+ | 'assistant.delta'
446
+ | 'assistant.message'
447
+ | 'tool.call'
448
+ | 'tool.result'
449
+ | 'approval.requested'
450
+ | 'approval.resolved'
451
+ | 'status.changed'
452
+ | 'error'
453
+ | 'heartbeat';
454
+ createdAt: string; // ISO timestamp
455
+ observedAt: string; // ISO timestamp when CTM observed it
456
+ sequence: number; // per CTM session monotonic sequence
457
+ turnId?: string;
458
+ parentId?: string;
459
+ text?: string;
460
+ data?: Record<string, unknown>;
461
+ confidence: 'high' | 'medium' | 'low';
462
+ };
463
+ ```
464
+
465
+ Important rules:
466
+
467
+ - `createdAt` comes from the provider if available.
468
+ - `observedAt` always comes from CTM.
469
+ - `sequence` is assigned by CTM after normalization.
470
+ - `confidence` lets downstream users prefer transcript events over PTY fallback.
471
+ - Large raw payloads should not be copied into every event. Store compact text
472
+ and structured metadata; keep raw payload references when needed.
473
+
474
+ ### Event Identity And Idempotency
475
+
476
+ Events need stable IDs because file watchers can replay lines.
477
+
478
+ Preferred ID order:
479
+
480
+ 1. Provider event/message ID if available.
481
+ 2. Provider session ID plus transcript byte offset.
482
+ 3. Hash of provider session ID, source, kind, timestamp, and compact payload.
483
+
484
+ The event bus should dedupe per session by event ID.
485
+
486
+ ### Hot In-Memory State
487
+
488
+ Maintain a compact per-session live object:
489
+
490
+ ```ts
491
+ type SessionLiveState = {
492
+ sessionId: string;
493
+ status: 'running' | 'waiting' | 'waiting_approval' | 'idle' | 'exited' | 'unknown';
494
+ provider: string;
495
+ providerSessionId?: string;
496
+ lastEventAt?: string;
497
+ lastUserPromptAt?: string;
498
+ lastAssistantOutputAt?: string;
499
+ activeTurnId?: string;
500
+ pendingApprovalId?: string;
501
+ recentEvents: RingBuffer<SessionCaptureEvent>;
502
+ recentText: RingBuffer<CompactTextChunk>;
503
+ recentTurns: RingBuffer<SessionTurn>;
504
+ summary?: SessionSummary;
505
+ };
506
+ ```
507
+
508
+ Default ring sizes:
509
+
510
+ - recent events: 200 per session
511
+ - recent text chunks: 64 KB per session
512
+ - recent turns: 10 per session
513
+
514
+ For 100 active sessions, this stays small enough for one Node process if events
515
+ are compact and raw PTY chunks are not duplicated.
516
+
517
+ ### Durable Storage
518
+
519
+ Reuse existing tables first.
520
+
521
+ Keep using:
522
+
523
+ - `ctm_sessions` for CTM tab/session identity.
524
+ - `agent_sessions` for provider session identity, provider type, transcript
525
+ path, model, branch, file size, modified time, and user-message count.
526
+ - `startup_tasks` for active process restore and live task metadata.
527
+ - `session_conversations` for rendered durable conversation JSON.
528
+ - `session_messages` and `session_messages_fts` for durable per-message search.
529
+ - `session_analyses` and `session_analyses_fts` for completed-session summaries,
530
+ title/category/topic analysis, and search enrichment.
531
+ - `approval_decisions` for approval audit records.
532
+ - `approval_rules` and `perm_rules` / `permission_rules` for approval policy.
533
+ - `prompt_queues` for queued prompt state.
534
+ - `scrollback_log` only for restart scrollback, not semantic history.
535
+
536
+ Do not add in phase 1:
537
+
538
+ - `session_live_state`: `SessionStream`, `sessions`, frontend `_streamStatus`,
539
+ and authoritative `session.status` already form the live state projection.
540
+ - `session_turns`: turns can be computed from `session_messages` and the
541
+ existing `SessionStream.userPromptCache` for the first tooltip/monitoring
542
+ use cases.
543
+ - A second message table: `session_messages` already exists for per-message
544
+ persistence/search.
545
+
546
+ Possible later addition:
547
+
548
+ ```sql
549
+ CREATE TABLE IF NOT EXISTS session_capture_events (
550
+ id TEXT PRIMARY KEY,
551
+ ctm_session_id TEXT NOT NULL,
552
+ agent_session_id TEXT,
553
+ provider TEXT,
554
+ source TEXT NOT NULL,
555
+ kind TEXT NOT NULL,
556
+ sequence INTEGER NOT NULL,
557
+ parent_id TEXT,
558
+ provider_event_id TEXT,
559
+ created_at TEXT,
560
+ observed_at TEXT NOT NULL,
561
+ text TEXT,
562
+ data_json TEXT,
563
+ confidence TEXT NOT NULL,
564
+ inserted_at TEXT DEFAULT (datetime('now')),
565
+ UNIQUE(ctm_session_id, sequence)
566
+ );
567
+
568
+ CREATE INDEX IF NOT EXISTS idx_capture_events_session_kind_time
569
+ ON session_capture_events(ctm_session_id, kind, observed_at);
570
+ ```
571
+
572
+ Only add this table when there is a real consumer for durable non-message
573
+ events. Good candidates:
574
+
575
+ - `approval.requested`
576
+ - `approval.resolved`
577
+ - `status.changed`
578
+ - `tool.call`
579
+ - `tool.result`
580
+ - `session.started`
581
+ - `session.exited`
582
+ - monitor-agent annotations
583
+
584
+ For user and assistant text, continue writing through `session_conversations`
585
+ and `session_messages`; duplicating full message text into
586
+ `session_capture_events` would increase storage and create two sources of truth.
587
+
588
+ ### Status Projection
589
+
590
+ Session status is already a derived projection with multiple inputs:
591
+
592
+ - `telemetry-receiver` emits authoritative `session.status` from hooks/OTEL.
593
+ - `SessionStream` emits `stream-status` from JSONL and filtered PTY activity.
594
+ - `status-hooks` owns an idle/busy/waiting-input state bus for user hooks.
595
+ - The frontend `getSessionStatus` merges authoritative status, stream status,
596
+ and local PTY fallback.
597
+
598
+ The capture foundation should consolidate vocabulary and expose the evidence
599
+ behind the status, not replace all of those paths in one patch.
600
+
601
+ Suggested status vocabulary:
602
+
603
+ - `running`
604
+ - Recent assistant/tool output, active hook event, or active turn.
605
+
606
+ - `waiting`
607
+ - Provider is waiting for user prompt and no approval is pending.
608
+
609
+ - `waiting_approval`
610
+ - A high-confidence approval request is pending.
611
+
612
+ - `idle`
613
+ - No recent output for a configurable window, process still alive, no known
614
+ prompt/approval wait state.
615
+
616
+ - `exited`
617
+ - PTY/process ended.
618
+
619
+ - `unknown`
620
+ - Session exists but capture has insufficient evidence.
621
+
622
+ Priority order:
623
+
624
+ 1. exited
625
+ 2. waiting_approval
626
+ 3. running
627
+ 4. waiting
628
+ 5. idle
629
+ 6. unknown
630
+
631
+ Recommended idle thresholds:
632
+
633
+ - 30 seconds with no output while active turn is open: still `running`.
634
+ - 2 minutes with no output and no open turn: `idle`.
635
+ - Provider-specific prompt-ready signal: `waiting`.
636
+
637
+ Avoid deriving `waiting` solely from "no output"; that confuses long-running
638
+ commands with user-wait states.
639
+
640
+ Mapping to existing vocabulary:
641
+
642
+ - `SessionStream.running` maps to capture `running`.
643
+ - `SessionStream.waiting` maps to capture `waiting`.
644
+ - `SessionStream.idle` maps to capture `idle`.
645
+ - `SessionStateBus.busy` maps to capture `running`.
646
+ - `SessionStateBus.waiting_input` maps to capture `waiting` or
647
+ `waiting_approval` when the source reason is an approval prompt.
648
+ - `session.status working=true` maps to capture `running`.
649
+ - `session.status working=false` maps to `waiting` or `idle` depending on prompt
650
+ evidence.
651
+
652
+ ### Turn Projection
653
+
654
+ Turns are the user-facing unit for summaries and tooltips.
655
+
656
+ A turn starts on:
657
+
658
+ - `user.prompt`
659
+
660
+ A turn may include:
661
+
662
+ - assistant deltas/messages,
663
+ - tool calls/results,
664
+ - approvals,
665
+ - errors,
666
+ - status changes.
667
+
668
+ A turn completes on:
669
+
670
+ - provider stop event,
671
+ - hook stop event,
672
+ - next user prompt,
673
+ - session exit,
674
+ - timeout plus prompt-ready signal.
675
+
676
+ Store:
677
+
678
+ - prompt text,
679
+ - compact assistant text,
680
+ - tool call summary,
681
+ - approval summary,
682
+ - status,
683
+ - started/completed timestamps.
684
+
685
+ ### API And Websocket Surface
686
+
687
+ Keep existing stream APIs as the public compatibility surface. Add capture names
688
+ only when a new consumer needs them.
689
+
690
+ Existing endpoints to reuse:
691
+
692
+ - `GET /api/stream/status`
693
+ - `GET /api/sessions/:id/stream`
694
+ - `GET /api/sessions/:id/summary`
695
+ - `GET /api/session/messages`
696
+
697
+ Existing websocket events to reuse:
698
+
699
+ - `subscribe-stream`
700
+ - `stream-init`
701
+ - `stream-event`
702
+ - `stream-status`
703
+ - `session.status`
704
+ - `waiting-for-input`
705
+ - `approval-decision`
706
+
707
+ Possible later additions:
708
+
709
+ - `GET /api/sessions/:id/capture/events?after=<sequence>&limit=<n>` only after
710
+ `session_capture_events` exists.
711
+ - `capture-event` only for non-message lifecycle events that do not fit the
712
+ existing stream-event contract.
713
+ - Do not force the frontend migration into the first backend patch.
714
+
715
+ ## Downstream Consumers
716
+
717
+ ### Approver
718
+
719
+ Current pain:
720
+
721
+ - Approval automation has to inspect output directly.
722
+ - Terminal rendering is not a stable API.
723
+
724
+ New path:
725
+
726
+ 1. Provider transcript or hook emits `approval.requested`.
727
+ 2. Capture projection stores pending approval with command/action metadata.
728
+ 3. Approver consumes pending approval event.
729
+ 4. Approver sends decision through the existing provider-specific input path.
730
+ 5. Capture emits `approval.resolved`.
731
+
732
+ Fallback:
733
+
734
+ - If no structured approval event arrives but the screen parser finds an
735
+ approval prompt, emit a low-confidence `approval.requested` event sourced from
736
+ `pty`.
737
+
738
+ This lets approval automation move to structured events without losing
739
+ compatibility.
740
+
741
+ ### Active Sessions UI
742
+
743
+ Use existing active-session state for:
744
+
745
+ - status pill,
746
+ - activity timestamp,
747
+ - pending approval badge,
748
+ - recent prompt preview,
749
+ - tooltip summary,
750
+ - "what is this session doing?" detail.
751
+
752
+ The screenshot in the original request shows active sessions as the natural
753
+ surface for this. The UI should not need to read raw terminal output to populate
754
+ those affordances.
755
+
756
+ Current implementation already covers much of this:
757
+
758
+ - `stream-view.js` fetches `/api/sessions/:id/summary` on hover.
759
+ - `getSessionStatus` uses authoritative status and stream status before PTY
760
+ fallback.
761
+ - Active sessions are grouped by Running, Waiting, Idle, Exited.
762
+
763
+ Recommended change:
764
+
765
+ - Improve the existing summary/status payload and tooltip rather than adding a
766
+ new `session_live_state` table.
767
+
768
+ ### Rolling Summaries
769
+
770
+ Generate summaries over the last N turns, default N = 5.
771
+
772
+ Inputs:
773
+
774
+ - prompt text,
775
+ - assistant summary text,
776
+ - tool calls,
777
+ - approvals,
778
+ - errors,
779
+ - status.
780
+
781
+ Trigger policy:
782
+
783
+ - On completed turn.
784
+ - On session exit.
785
+ - On demand if summary missing.
786
+ - Rate limit per session, for example no more than once every 30 seconds unless
787
+ a turn completes.
788
+
789
+ Storage:
790
+
791
+ - Keep live tooltip summary in `SessionStream.cachedSummary`.
792
+ - For completed/offline analysis, reuse `session_analyses.summary`.
793
+ - If live summaries need restart persistence later, first consider adding
794
+ `live_summary`, `live_summary_updated_at`, and `live_summary_source` to
795
+ `session_conversations` or `agent_sessions` before creating a new table.
796
+
797
+ Recommended tooltip shape:
798
+
799
+ ```text
800
+ Current: waiting for approval to run npm test
801
+ Recent: implemented stream parser changes, fixed status projection, ran server tests
802
+ Last prompt: "Add capture foundation design"
803
+ ```
804
+
805
+ Keep this short. The active sessions UI needs a preview, not a full transcript.
806
+
807
+ ### Monitor Agent
808
+
809
+ The monitor agent should subscribe to capture state/events instead of watching
810
+ the terminal.
811
+
812
+ Potential monitor behaviors:
813
+
814
+ - Detect stuck sessions:
815
+ - active turn open,
816
+ - no output for threshold,
817
+ - no long-running command marker,
818
+ - process alive.
819
+
820
+ - Detect repeated failures:
821
+ - repeated test failures,
822
+ - repeated same command,
823
+ - repeated edit/revert loops.
824
+
825
+ - Detect approval backlog:
826
+ - pending approval too long,
827
+ - approval requested for risky command,
828
+ - multiple sessions blocked.
829
+
830
+ - Detect user attention needs:
831
+ - provider asks a question,
832
+ - merge conflict appears,
833
+ - auth/login failure,
834
+ - rate limit,
835
+ - network failure.
836
+
837
+ - Detect completion:
838
+ - task summary emitted,
839
+ - tests pass,
840
+ - no active turn,
841
+ - provider waiting.
842
+
843
+ Actions should start conservative:
844
+
845
+ - annotate session,
846
+ - update tooltip/status,
847
+ - notify user,
848
+ - request approval,
849
+ - queue a suggested next prompt.
850
+
851
+ Autonomous intervention should require a separate policy layer.
852
+
853
+ ## Additional Product Opportunities
854
+
855
+ The same capture foundation can support:
856
+
857
+ - Cross-session command center:
858
+ - "show sessions waiting on me"
859
+ - "show sessions running tests"
860
+ - "show sessions with failures"
861
+
862
+ - Global work feed:
863
+ - timeline of prompts, tool calls, approvals, completions.
864
+
865
+ - Session search:
866
+ - search recent prompts, assistant output, commands, errors.
867
+
868
+ - Smart resume:
869
+ - restore last five turns plus summary when reopening a session.
870
+
871
+ - Automated handoff:
872
+ - create concise handoff note from capture history.
873
+
874
+ - Cost/performance attribution:
875
+ - estimate expensive sessions by duration, tool volume, token metadata where
876
+ providers expose it.
877
+
878
+ - Quality analytics:
879
+ - number of turns to completion,
880
+ - test-fix loops,
881
+ - approval latency,
882
+ - idle time.
883
+
884
+ - Failure taxonomy:
885
+ - auth,
886
+ - missing dependency,
887
+ - test failure,
888
+ - type error,
889
+ - merge conflict,
890
+ - rate limit,
891
+ - provider crash.
892
+
893
+ - Alerting:
894
+ - notify when a long job completes,
895
+ - notify when a session waits for input,
896
+ - notify when a session reaches a risky approval.
897
+
898
+ - Dataset creation:
899
+ - collect sanitized prompt/output/tool sequences for internal evaluation.
900
+
901
+ - Replay and audit:
902
+ - reconstruct what happened without needing terminal scrollback.
903
+
904
+ - Better auto-titles:
905
+ - derive active-session names from last user intent and current activity.
906
+
907
+ - Multi-agent coordination:
908
+ - let one supervisor understand what multiple coding agents are doing.
909
+
910
+ ## Efficiency Design
911
+
912
+ ### Principle 1: Prefer Structured Deltas
913
+
914
+ Provider transcript lines are usually much smaller and more meaningful than PTY
915
+ screen updates. Tail transcript files incrementally and store offsets.
916
+
917
+ ### Principle 2: Avoid Re-Parsing Per Consumer
918
+
919
+ Adapters parse once, then publish normalized events. Consumers subscribe to the
920
+ bus or query projections.
921
+
922
+ ### Principle 3: Keep Raw Payloads Out Of Hot Paths
923
+
924
+ Do not store full raw provider messages in every in-memory projection. Keep:
925
+
926
+ - compact text,
927
+ - structured metadata,
928
+ - pointer to raw source if needed,
929
+ - rolling rings.
930
+
931
+ ### Principle 4: Batch Durable Writes
932
+
933
+ Use small batch inserts for high-volume streams:
934
+
935
+ - flush every 100 to 250 ms,
936
+ - or every N events,
937
+ - whichever comes first.
938
+
939
+ Status updates can be coalesced because only the latest live projection matters.
940
+
941
+ ### Principle 5: Backpressure And Drop Policy
942
+
943
+ For each session:
944
+
945
+ - never drop durable high-value events:
946
+ - user prompts,
947
+ - assistant final messages,
948
+ - approvals,
949
+ - errors,
950
+ - status transitions,
951
+ - tool calls/results.
952
+
953
+ - allow coalescing of:
954
+ - assistant deltas,
955
+ - heartbeat,
956
+ - raw PTY snippets.
957
+
958
+ If a consumer is slow, it should resume from the existing stream snapshot plus
959
+ `session_conversations`/`session_messages`. If the optional
960
+ `session_capture_events` table is added later, non-message event consumers can
961
+ resume from that table by sequence.
962
+
963
+ ### Principle 6: Lazy Summarization
964
+
965
+ Summarization is the expensive part. It should run:
966
+
967
+ - after turn completion,
968
+ - with rate limits,
969
+ - on compact turn text,
970
+ - only for sessions visible in active UI or recently active unless explicitly
971
+ requested.
972
+
973
+ ### Principle 7: Watch Plus Poll
974
+
975
+ Use file watch notifications for latency, but backstop with polling because
976
+ watchers can miss events. Store offsets and detect truncation/rotation.
977
+
978
+ Recommended default:
979
+
980
+ - watch transcript directory where possible.
981
+ - poll active transcript files every 500 to 1000 ms.
982
+ - use exponential slowdown for idle/exited sessions.
983
+
984
+ ## Privacy And Safety
985
+
986
+ Capture will contain user prompts, file paths, commands, errors, and possibly
987
+ secrets accidentally pasted into a terminal.
988
+
989
+ Rules:
990
+
991
+ - Do not capture hidden password input.
992
+ - Redact known secret patterns before summaries or monitor-agent prompts.
993
+ - Keep raw event retention configurable.
994
+ - Separate local-only raw capture from any exported telemetry.
995
+ - Mark low-confidence PTY-derived text so automations avoid over-trusting it.
996
+ - Allow per-session capture disablement if needed.
997
+
998
+ Redaction should apply before:
999
+
1000
+ - summaries,
1001
+ - monitor-agent prompts,
1002
+ - notifications,
1003
+ - external exports.
1004
+
1005
+ Durable local event storage can retain original text by default if CTM already
1006
+ stores local transcripts, but exports should use redacted text.
1007
+
1008
+ ## Migration Plan
1009
+
1010
+ ### Phase 0: Inventory And Contracts
1011
+
1012
+ - Document existing stream event shapes.
1013
+ - Document current session status values and frontend consumers.
1014
+ - Confirm provider transcript paths for Claude, Codex, and Gemini in this
1015
+ installation.
1016
+ - Treat `SessionStream` as the live bus unless a concrete incompatibility is
1017
+ found.
1018
+ - Treat `session_conversations` and `session_messages` as the durable message
1019
+ projection.
1020
+ - Do not add schema until a consumer needs data that the existing tables cannot
1021
+ represent.
1022
+
1023
+ Deliverable:
1024
+
1025
+ - design doc and implementation checklist.
1026
+
1027
+ ### Phase 1: Capture Core
1028
+
1029
+ - Add a small `session-capture` module or wrapper that delegates to
1030
+ `SessionStream` rather than competing with it.
1031
+ - Define a normalized event vocabulary as an adapter over existing
1032
+ `stream-event` payloads.
1033
+ - Add `kind`, `source`, `confidence`, and provider metadata to emitted stream
1034
+ events where possible without breaking the frontend.
1035
+ - Reuse existing ring buffers and `getRecentEvents`.
1036
+ - Reuse existing `/api/stream/status`, `/api/sessions/:id/stream`, and
1037
+ websocket `stream-*` contracts.
1038
+
1039
+ No UI rewrite yet.
1040
+
1041
+ ### Phase 2: Status Projection
1042
+
1043
+ - Write a reducer that merges existing evidence:
1044
+ - `SessionStream` status,
1045
+ - `telemetry-receiver` `session.status`,
1046
+ - `status-hooks` state,
1047
+ - approval detection,
1048
+ - prompt-ready fallback.
1049
+ - Keep existing frontend status behavior, but expose debug/evidence fields for
1050
+ monitor agents.
1051
+ - Add tests for status transitions:
1052
+ - running,
1053
+ - waiting,
1054
+ - waiting approval,
1055
+ - idle,
1056
+ - exited.
1057
+
1058
+ ### Phase 3: Approver Integration
1059
+
1060
+ - Emit normalized approval lifecycle events from the existing approver path.
1061
+ - Keep `approval-agent` as the execution and parsing owner.
1062
+ - Keep writing `approval_decisions`.
1063
+ - Consider `decision='pending'` only if the UI/monitor needs a durable pending
1064
+ record; otherwise keep pending approval in hot capture state.
1065
+ - Add tests for approval request/resolution lifecycle.
1066
+
1067
+ ### Phase 4: Summaries And Tooltips
1068
+
1069
+ - Extend existing `SessionStream.getSummary` from the current prompt-cache model
1070
+ toward a last-five-prompt/turn model.
1071
+ - Reuse the existing `/api/sessions/:id/summary` endpoint and stream tooltip.
1072
+ - Reuse `session_analyses.summary` for completed/offline sessions.
1073
+ - Add rate limits and redaction.
1074
+
1075
+ ### Phase 5: Monitor Agent Substrate
1076
+
1077
+ - Add capture subscription for monitor agent.
1078
+ - Start with read-only classification and notifications.
1079
+ - Add policy-gated actions later.
1080
+
1081
+ ### Phase 6: Provider Expansion And Hardening
1082
+
1083
+ - Harden Codex and Gemini adapters.
1084
+ - Add replay from durable events on restart.
1085
+ - Add capture health dashboard:
1086
+ - active adapters,
1087
+ - lag,
1088
+ - dropped/coalesced events,
1089
+ - last event time,
1090
+ - errors.
1091
+
1092
+ Only in this phase should we revisit a durable `session_capture_events` table,
1093
+ and only if monitor/replay/approval consumers need non-message event history
1094
+ that cannot be reconstructed from transcripts plus existing tables.
1095
+
1096
+ ## Testing Strategy
1097
+
1098
+ ### Unit Tests
1099
+
1100
+ - Event normalization per provider.
1101
+ - Event ID/idempotency logic.
1102
+ - Status reducer.
1103
+ - Turn reducer.
1104
+ - Summary input builder.
1105
+ - Redaction.
1106
+
1107
+ ### Integration Tests
1108
+
1109
+ - Tail a fixture transcript and verify emitted events.
1110
+ - Simulate file truncation/rotation.
1111
+ - Simulate duplicate watcher notifications.
1112
+ - Simulate PTY fallback when transcript is absent.
1113
+ - Verify websocket subscribers receive ordered events.
1114
+ - Verify slow consumer can catch up by sequence.
1115
+
1116
+ ### Regression Tests
1117
+
1118
+ - Existing session stream APIs still work.
1119
+ - Existing terminal scrollback restore still works.
1120
+ - Existing approver screen parsing still works as fallback.
1121
+
1122
+ ### Performance Tests
1123
+
1124
+ Simulate:
1125
+
1126
+ - 10 active sessions,
1127
+ - 50 active sessions,
1128
+ - 100 active sessions.
1129
+
1130
+ Measure:
1131
+
1132
+ - CPU while tailing idle files,
1133
+ - CPU while streaming output,
1134
+ - memory per session,
1135
+ - database write rate,
1136
+ - websocket fanout overhead,
1137
+ - summary invocation count.
1138
+
1139
+ Target:
1140
+
1141
+ - Near-zero CPU for idle sessions.
1142
+ - No per-consumer transcript parsing.
1143
+ - Bounded memory per session.
1144
+ - Durable writes batched under load.
1145
+
1146
+ ## Open Decisions
1147
+
1148
+ 1. Should durable non-message events get a new table, or can we stay entirely on
1149
+ existing tables for now?
1150
+
1151
+ Recommendation: stay on existing tables for phase 1. Add
1152
+ `session_capture_events` later only for non-message events such as approval
1153
+ request lifecycle, status transitions, monitor annotations, and tool events.
1154
+ Keep `session_messages` as the user/assistant text projection.
1155
+
1156
+ 2. Should summaries be generated locally by the coding-agent provider, by CTM's
1157
+ chosen model, or by a lightweight heuristic first?
1158
+
1159
+ Recommendation: reuse `SessionStream`'s current cloud/local/fallback summary
1160
+ tiers, but change the input from "last prompt cache only" toward the last five
1161
+ prompt/assistant turns where available. Keep completed-session summaries in
1162
+ `session_analyses`.
1163
+
1164
+ 3. How much raw assistant delta text should be stored?
1165
+
1166
+ Recommendation: keep final/rendered messages in `session_conversations` and
1167
+ `session_messages`; keep deltas in the existing hot ring only. Do not persist
1168
+ token-level deltas unless visual replay becomes a goal.
1169
+
1170
+ 4. How should monitor agents be allowed to act?
1171
+
1172
+ Recommendation: start read-only. Add action policies separately.
1173
+
1174
+ 5. Should capture run for exited sessions?
1175
+
1176
+ Recommendation: no active watchers after exit. Keep durable replay and final
1177
+ summary generation through `session_conversations`, `session_messages`, and
1178
+ `session_analyses`.
1179
+
1180
+ ## Main Risks
1181
+
1182
+ ### Provider Format Drift
1183
+
1184
+ Provider transcript schemas can change.
1185
+
1186
+ Mitigation:
1187
+
1188
+ - Keep adapters small and fixture-tested.
1189
+ - Preserve unknown payload fields in `data`.
1190
+ - Use confidence levels.
1191
+
1192
+ ### Watcher Misses
1193
+
1194
+ File watching can miss changes.
1195
+
1196
+ Mitigation:
1197
+
1198
+ - Watch plus poll.
1199
+ - Store offsets.
1200
+ - Detect truncation/rotation.
1201
+ - Make parsing idempotent.
1202
+
1203
+ ### Over-Capture
1204
+
1205
+ Capturing everything can create privacy and performance problems.
1206
+
1207
+ Mitigation:
1208
+
1209
+ - Compact events.
1210
+ - Redact summaries/exports.
1211
+ - Bound hot buffers.
1212
+ - Configurable retention.
1213
+
1214
+ ### Status Misclassification
1215
+
1216
+ Idle/running/waiting can be ambiguous.
1217
+
1218
+ Mitigation:
1219
+
1220
+ - Prefer explicit provider/hook signals.
1221
+ - Use priority reducer.
1222
+ - Expose confidence and "last evidence" in debug state.
1223
+
1224
+ ### Duplicate Systems
1225
+
1226
+ Adding a new foundation could duplicate existing `session-stream.js`.
1227
+
1228
+ Mitigation:
1229
+
1230
+ - Build by evolving/wrapping the existing stream layer.
1231
+ - Keep old APIs as compatibility wrappers.
1232
+ - Move consumers gradually.
1233
+
1234
+ ## Recommended First Implementation
1235
+
1236
+ The first patch should be deliberately small:
1237
+
1238
+ 1. Add a `session-capture` adapter module that wraps `SessionStream` and
1239
+ existing status/approval signals.
1240
+ 2. Add normalized event vocabulary fields to `SessionStream` events in a
1241
+ backwards-compatible way.
1242
+ 3. Add a status evidence reducer that consumes existing `stream-status`,
1243
+ `session.status`, `waiting-for-input`, and approval signals.
1244
+ 4. Reuse existing `/api/stream/status`, `/api/sessions/:id/stream`, and
1245
+ `/api/sessions/:id/summary`; add no tables in the first patch.
1246
+ 5. Add tests around the reducer and around event compatibility with
1247
+ `stream-view.js`.
1248
+
1249
+ Then:
1250
+
1251
+ 1. Extend `SessionStream.getSummary` to use the last five meaningful
1252
+ prompt/assistant turns.
1253
+ 2. Emit approval lifecycle events from the existing approver path.
1254
+ 3. Teach monitor-agent code to consume the capture adapter.
1255
+ 4. Add `session_capture_events` only if a durable monitor/replay use case needs
1256
+ non-message event history.
1257
+
1258
+ This lets the foundation prove value quickly without destabilizing terminal
1259
+ restore or provider-specific session handling.
1260
+
1261
+ ## Bottom Line
1262
+
1263
+ The codebase already has more than raw ingredients: it has a live structured
1264
+ stream (`SessionStream`), durable conversation/message tables, FTS search,
1265
+ hook/OTEL state, active-session status UI, hover summaries, prompt queues,
1266
+ headless terminal snapshots, restart scrollback, and approval automation. The
1267
+ missing layer is a provider-neutral capture contract that reuses those systems
1268
+ and fills only the gaps.
1269
+
1270
+ Build `SessionCapture` as that contract. Treat `SessionStream` as the live bus,
1271
+ structured provider transcripts and hooks as primary evidence, PTY output as
1272
+ fallback, existing message tables as durable conversation storage, and any new
1273
+ event table as a later, narrow store for non-message lifecycle facts.