create-walle 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +8 -3
  2. package/bin/create-walle.js +232 -32
  3. package/bin/mcp-inject.js +18 -53
  4. package/package.json +3 -1
  5. package/template/claude-task-manager/api-prompts.js +11 -2
  6. package/template/claude-task-manager/approval-agent.js +7 -0
  7. package/template/claude-task-manager/db.js +94 -75
  8. package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
  9. package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
  10. package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
  11. package/template/claude-task-manager/fuzzy-utils.js +10 -2
  12. package/template/claude-task-manager/git-utils.js +140 -10
  13. package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
  14. package/template/claude-task-manager/lib/agent-presets.js +38 -5
  15. package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
  16. package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
  17. package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
  18. package/template/claude-task-manager/lib/session-history.js +309 -16
  19. package/template/claude-task-manager/lib/session-standup.js +409 -0
  20. package/template/claude-task-manager/lib/session-stream.js +253 -20
  21. package/template/claude-task-manager/lib/standup-attention.js +200 -0
  22. package/template/claude-task-manager/lib/status-hooks.js +8 -2
  23. package/template/claude-task-manager/lib/update-telemetry.js +114 -0
  24. package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
  25. package/template/claude-task-manager/lib/walle-default-model.js +55 -0
  26. package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
  27. package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
  28. package/template/claude-task-manager/lib/walle-transcript.js +1 -3
  29. package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
  30. package/template/claude-task-manager/package.json +1 -0
  31. package/template/claude-task-manager/providers/codex-mcp.js +104 -0
  32. package/template/claude-task-manager/providers/index.js +2 -0
  33. package/template/claude-task-manager/public/css/setup.css +2 -1
  34. package/template/claude-task-manager/public/css/walle.css +71 -0
  35. package/template/claude-task-manager/public/index.html +2388 -429
  36. package/template/claude-task-manager/public/js/message-renderer.js +314 -35
  37. package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
  38. package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
  39. package/template/claude-task-manager/public/js/setup.js +62 -19
  40. package/template/claude-task-manager/public/js/stream-view.js +396 -55
  41. package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
  42. package/template/claude-task-manager/public/js/walle-session.js +234 -26
  43. package/template/claude-task-manager/public/js/walle.js +143 -2
  44. package/template/claude-task-manager/server.js +1402 -433
  45. package/template/claude-task-manager/session-integrity.js +77 -28
  46. package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
  47. package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
  48. package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
  49. package/template/package.json +1 -1
  50. package/template/wall-e/agent-runners/claude-code.js +2 -0
  51. package/template/wall-e/agent.js +63 -8
  52. package/template/wall-e/api-walle.js +330 -52
  53. package/template/wall-e/brain.js +291 -42
  54. package/template/wall-e/chat.js +172 -15
  55. package/template/wall-e/coding/compaction-service.js +19 -5
  56. package/template/wall-e/coding/stream-processor.js +22 -2
  57. package/template/wall-e/coding/workspace-replay.js +1 -4
  58. package/template/wall-e/coding-orchestrator.js +250 -80
  59. package/template/wall-e/compat.js +0 -28
  60. package/template/wall-e/context/context-builder.js +3 -1
  61. package/template/wall-e/embeddings.js +2 -7
  62. package/template/wall-e/eval/agent-runner.js +30 -9
  63. package/template/wall-e/eval/benchmark-generator.js +21 -1
  64. package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
  65. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
  66. package/template/wall-e/eval/cc-replay.js +1 -0
  67. package/template/wall-e/eval/codex-cli-baseline.js +633 -0
  68. package/template/wall-e/eval/debug-agent003.js +1 -0
  69. package/template/wall-e/eval/eval-orchestrator.js +3 -3
  70. package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
  71. package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
  72. package/template/wall-e/eval/run-model-comparison.js +1 -0
  73. package/template/wall-e/eval/swebench-adapter.js +1 -0
  74. package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
  75. package/template/wall-e/extraction/knowledge-extractor.js +1 -2
  76. package/template/wall-e/lib/mcp-integration.js +336 -0
  77. package/template/wall-e/llm/ollama.js +47 -8
  78. package/template/wall-e/llm/ollama.plugin.json +1 -1
  79. package/template/wall-e/llm/tool-adapter.js +1 -0
  80. package/template/wall-e/loops/ingest.js +42 -8
  81. package/template/wall-e/loops/initiative.js +87 -2
  82. package/template/wall-e/mcp-server.js +872 -19
  83. package/template/wall-e/memory/ctm-context-client.js +230 -0
  84. package/template/wall-e/memory/ctm-session-context.js +1376 -0
  85. package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
  86. package/template/wall-e/server.js +30 -1
  87. package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
  88. package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
  89. package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
  90. package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
  91. package/template/wall-e/skills/skill-planner.js +86 -4
  92. package/template/wall-e/slack/socket-mode-listener.js +276 -0
  93. package/template/wall-e/telemetry.js +70 -2
  94. package/template/wall-e/tools/builtin-middleware.js +55 -2
  95. package/template/wall-e/tools/shell-policy.js +1 -1
  96. package/template/wall-e/tools/slack-owner.js +104 -0
  97. package/template/website/index.html +4 -4
  98. package/template/builder-journal.md +0 -17
@@ -0,0 +1,224 @@
1
+ # Session Tooltip Freshness Design
2
+
3
+ ## Problem
4
+
5
+ The active-session tooltip can show an AI summary that is technically cached
6
+ correctly but no longer represents the current task. The most visible failure is
7
+ a running session whose tooltip headline still describes an older task while the
8
+ latest prompt and progress have already moved on.
9
+
10
+ This is worse than an empty tooltip because it gives false confidence. The
11
+ operator uses the tooltip to decide which session needs attention, so stale
12
+ intent text can send the user to the wrong session or hide a current blocker.
13
+
14
+ ## Existing Behavior
15
+
16
+ Current flow:
17
+
18
+ 1. `stream-view.js` opens the tooltip after hover and fetches
19
+ `/api/sessions/:id/summary?turns=3`.
20
+ 2. `SessionStream` stores cleaned user prompts in `userPromptCache`.
21
+ 3. A new user prompt debounces AI summary generation by 2 seconds.
22
+ 4. The configured provider generates a 10-15 word summary over the cached
23
+ prompt list.
24
+ 5. `getSummary()` returns `summary`, `intent`, `displayPrompt`, `lastPrompt`,
25
+ and `progress`.
26
+
27
+ Current failure modes:
28
+
29
+ - Cached `intent.source = ai-summary` is treated as authoritative even when its
30
+ timestamp is older than the latest prompt.
31
+ - The tooltip does not refetch or rerender while it is already open for the same
32
+ session.
33
+ - The AI summary input is a flat list of recent prompts, so older work can
34
+ dominate the summary after a task switch.
35
+ - The UI label `Intent` does not explain freshness or distinguish current
36
+ prompt evidence from slower AI synthesis.
37
+
38
+ ## Design Principles
39
+
40
+ - Latest user intent is the primary truth. AI is a compression layer, not a
41
+ source of freshness.
42
+ - Stale AI should be visible as context, never promoted as the current task.
43
+ - Refresh should be scoped to visible UI. Do not add broad polling across every
44
+ session.
45
+ - The tooltip should be copyable, stable, dense, and operational.
46
+ - Existing API fields must stay backward compatible for Session Overview and
47
+ older clients.
48
+
49
+ ## API Contract
50
+
51
+ `GET /api/sessions/:id/summary` keeps existing fields and adds:
52
+
53
+ ```js
54
+ {
55
+ currentTask: {
56
+ text: string | null,
57
+ source: 'latest-prompt' | 'ai-summary' | 'prompt-fallback' | 'title-fallback' | 'missing',
58
+ freshness: 'fresh' | 'updating' | 'stale' | 'missing',
59
+ updatedAt: number,
60
+ promptTimestamp: number,
61
+ staleReason?: string
62
+ },
63
+ latestPrompt: {
64
+ text: string | null,
65
+ timestamp: number
66
+ },
67
+ aiSummary: {
68
+ text: string | null,
69
+ source: string | null,
70
+ status: 'fresh' | 'updating' | 'stale' | 'fallback' | 'unavailable',
71
+ updatedAt: number,
72
+ promptTimestamp: number,
73
+ promptCount: number,
74
+ staleReason?: string
75
+ }
76
+ }
77
+ ```
78
+
79
+ Compatibility mapping:
80
+
81
+ - `intent` remains present.
82
+ - `summary` remains present.
83
+ - When AI is stale, `intent.text` should follow `currentTask.text` so existing
84
+ clients do not keep rendering stale AI as the primary task.
85
+ - `aiSummary.text` retains the older AI text for secondary display.
86
+
87
+ Freshness rules:
88
+
89
+ - If usable AI summary exists and `aiSummary.updatedAt >= latestPrompt.timestamp`,
90
+ `currentTask.source = ai-summary` and `freshness = fresh`.
91
+ - If usable AI summary exists but is older than the latest prompt,
92
+ `currentTask.source = latest-prompt`, `freshness = updating`, and
93
+ `aiSummary.status = stale` or `updating`.
94
+ - If no AI summary exists, use the most recent content-rich prompt and mark the
95
+ AI summary as `unavailable` or `fallback`.
96
+ - If a fallback summary is raw prompt text, keep current task on prompt evidence
97
+ and mark `aiSummary.status = fallback`.
98
+
99
+ ## Summary Generation
100
+
101
+ Change the AI prompt from a flat prompt list to latest-prompt-weighted input.
102
+
103
+ Recommended input:
104
+
105
+ ```text
106
+ Latest prompt:
107
+ <most recent cleaned prompt>
108
+
109
+ Recent context:
110
+ 1. <older prompt>
111
+ 2. <older prompt>
112
+ 3. <older prompt>
113
+ ```
114
+
115
+ Recommended system prompt:
116
+
117
+ ```text
118
+ Summarize the user's current task. Prioritize the latest prompt. Use older
119
+ prompts only as context. Return 8-14 words. Return only the summary, no quotes
120
+ or prefix.
121
+ ```
122
+
123
+ The cached summary should record the prompt timestamp and prompt count used to
124
+ generate it. This makes freshness testable without depending on provider speed.
125
+
126
+ ## Tooltip UX
127
+
128
+ Replace the main `Intent` section with `Current Task`.
129
+
130
+ Fresh state:
131
+
132
+ ```text
133
+ CURRENT TASK
134
+ [AI SUMMARY] [FRESH] [now]
135
+ Fixing Wall-E session history restore and tooltip freshness.
136
+ ```
137
+
138
+ Updating state:
139
+
140
+ ```text
141
+ CURRENT TASK
142
+ [LATEST PROMPT] [UPDATING] [now]
143
+ Does WallE coding agent have auto compact logic like OpenCode or Claude?
144
+
145
+ AI SUMMARY
146
+ [STALE] [2m ago]
147
+ Previous: Fixing session tab drag behavior and search issues.
148
+ ```
149
+
150
+ Unavailable state:
151
+
152
+ ```text
153
+ CURRENT TASK
154
+ [LATEST PROMPT] [FALLBACK] [now]
155
+ Fix the session tooltip freshness behavior.
156
+ ```
157
+
158
+ Progress remains separate and should keep using assistant-event evidence.
159
+
160
+ Interaction behavior:
161
+
162
+ - Opening a tooltip fetches summary immediately.
163
+ - If the tooltip is already open for that session, new stream events rerender it.
164
+ - While visible, perform a lightweight refresh every 10 seconds.
165
+ - Stop the refresh timer when the tooltip is hidden.
166
+ - Clicking inside the tooltip continues to preserve it for copy/select.
167
+ - Activating a different session tooltip replaces the current tooltip.
168
+
169
+ ## Implementation Plan
170
+
171
+ Phase 1: documentation
172
+
173
+ - Add this design note.
174
+ - Commit documentation by itself after TL review.
175
+
176
+ Phase 2: backend freshness
177
+
178
+ - Extend `SessionStream` cached summary metadata with prompt timestamp/count.
179
+ - Add `latestPrompt`, `aiSummary`, and `currentTask` to `getSummary()`.
180
+ - Ensure `intent` maps to `currentTask`, not stale AI.
181
+ - Update summary generation prompt to prioritize latest prompt.
182
+ - Add unit tests for fresh, stale, fallback, and provider-lag cases.
183
+
184
+ Phase 3: tooltip UI
185
+
186
+ - Render `Current Task` from `currentTask`.
187
+ - Render stale AI only as secondary context.
188
+ - Add freshness/status pills.
189
+ - Add visible-tooltip refresh and stream-event rerender.
190
+ - Add browser/render coverage for a tooltip that updates while open.
191
+
192
+ Phase 4: dev validation
193
+
194
+ - Start isolated CTM via `ctm-dev`.
195
+ - Verify service health on the dev port pair.
196
+ - Exercise stale summary behavior through a real browser.
197
+ - Confirm no browser console errors for the tooltip flow.
198
+ - Commit implementation after TL review.
199
+
200
+ ## Test Matrix
201
+
202
+ Backend:
203
+
204
+ - AI summary generated after latest prompt: primary task is AI summary.
205
+ - AI summary timestamp older than latest prompt: primary task is latest prompt.
206
+ - AI provider slow or failed: primary task remains latest prompt.
207
+ - Fallback summary raw prompt: primary task remains prompt evidence.
208
+ - Summary generation input puts latest prompt before recent context.
209
+
210
+ Frontend:
211
+
212
+ - Tooltip opens and labels `Current Task`.
213
+ - Stale AI is demoted into `AI Summary`.
214
+ - New user stream event updates an already-open tooltip.
215
+ - Fresh summary event promotes AI summary.
216
+ - Tooltip remains copyable and dismisses only on outside click or another
217
+ tooltip activation.
218
+
219
+ Dev validation:
220
+
221
+ - CTM dev server starts on a random non-primary port.
222
+ - `/api/services/status` succeeds on the dev port.
223
+ - Browser test uses the dev port, never `3456` or `3457`.
224
+ - Screenshot or DOM assertions prove the freshness state.
@@ -0,0 +1,369 @@
1
+ # Claude/Codex Session UX Issue Review
2
+
3
+ Date: 2026-05-01
4
+ Scope: CTM session terminal UX, restore/restart behavior, status accuracy,
5
+ search/review history, and Wall-E coding session integration.
6
+
7
+ ## Evidence Used
8
+
9
+ - Wall-E MCP status check: the Wall-E MCP server is installed and exposes the
10
+ CTM session tools, but the live CTM DB health path currently reports
11
+ `ctm_db_schema_read_failed` with a `disk I/O error`.
12
+ - CTM cached DB fallback: `~/.walle/data/task-manager.db` has the expected CTM
13
+ session tables and conversation blobs, but `session_messages` and
14
+ `session_messages_fts` are empty in the sampled cache.
15
+ - Session corpus scan: recent Claude and Codex session JSONLs plus CTM
16
+ `session_conversations.messages` were scanned for direct user reports from
17
+ April 15 onward. The largest clusters were search/review, Wall-E/model, blank
18
+ or restore behavior, identity/restart, malformed output, input/approval, and
19
+ status accuracy.
20
+ - Git history: recent CTM fixes show repeated work around blank tabs, Codex
21
+ terminal restore, output budget handling, status projection, search identity,
22
+ Wall-E default model hydration, and Codex rollout recovery.
23
+ - Regression tests: the render and session tests now cover blank-tab retry,
24
+ worker saturation, Codex panel restore gaps, TUI redraw, terminal query
25
+ handling, output freeze, status typing, search dedupe, title preservation, and
26
+ Wall-E default model behavior.
27
+ - Code review: current review focused on `public/index.html`, `server.js`,
28
+ `lib/session-history.js`, Wall-E MCP/session context code, and the Playwright
29
+ regression tests.
30
+
31
+ ## Issue Clusters
32
+
33
+ ### 1. Blank tabs, lost output, and restore gaps
34
+
35
+ Observed symptoms:
36
+
37
+ - Switching back to a long-running tab could show a blank terminal.
38
+ - Restart or panel restore could lose visible output or show a large blank tail.
39
+ - Codex tabs sometimes opened at the top or with the prompt far below the useful
40
+ output.
41
+ - Review or live tab state could be overwritten by a stale shorter snapshot.
42
+
43
+ Likely causes:
44
+
45
+ - Snapshot cache was consumed too early, so a retry had no usable fallback.
46
+ - Headless worker saturation caused soft timeouts; late worker responses were
47
+ dropped instead of cached.
48
+ - Hidden terminals restored with stale dimensions, then painted an invalid
49
+ snapshot.
50
+ - Browser WebGL state and xterm state could diverge after tab switches or
51
+ context loss.
52
+ - Codex emits full-screen terminal clear sequences and can leave a viewport-sized
53
+ blank tail after ratatui redraws.
54
+
55
+ Fixes already landed:
56
+
57
+ - Preserve cached snapshots across attach/retry and keep late worker responses
58
+ usable after soft timeout.
59
+ - Recreate WebGL renderers, force xterm refresh, and reflow on activation.
60
+ - Reject dimension-mismatched snapshots until resize/reflow catches up.
61
+ - Preserve browser-visible output when a stale server snapshot is shorter.
62
+ - Compact Codex internal blank gaps and anchor follow behavior to the useful
63
+ prompt viewport.
64
+
65
+ Representative coverage:
66
+
67
+ - `blank-tab-retry-recovery.spec.js`
68
+ - `blank-tab-worker-saturation.spec.js`
69
+ - `codex-panel-restore-gap.spec.js`
70
+ - `snapshot-restore-ordering.spec.js`
71
+ - `dimension-mismatch.spec.js`
72
+
73
+ ### 2. Malformed TUI output and terminal freezes
74
+
75
+ Observed symptoms:
76
+
77
+ - Claude Code sticky status blocks duplicated or drifted.
78
+ - Codex output could become visually "messed up" while typing.
79
+ - Codex skill picker and helper input alignment could break.
80
+ - Output sometimes froze after high-volume bursts or after a TUI exit.
81
+ - Startup terminal queries could leak into the UI or back into the PTY.
82
+
83
+ Likely causes:
84
+
85
+ - The output-budget path split stateful ANSI frames across throttled writes.
86
+ - Raw post-snapshot bytes could be double-applied after a snapshot restore.
87
+ - Terminal query responses such as DSR/DA mode probes were not always consumed
88
+ silently.
89
+ - Helper textarea position was not consistently aligned to the current xterm
90
+ viewport.
91
+ - Blank-gap compaction was unsafe while Codex interactive picker UI was visible.
92
+
93
+ Fixes already landed:
94
+
95
+ - Treat headless xterm snapshots as the source of truth during output-budget
96
+ suppression.
97
+ - Add a short post-snapshot grace/buffer so late raw bytes are applied in order.
98
+ - Restore via reset plus FIFO writes to avoid stale state after snapshot reset.
99
+ - Consume known terminal queries and add a real Codex DSR regression.
100
+ - Align the Codex helper textarea to the viewport and avoid compacting during
101
+ active skill picker UI.
102
+
103
+ Representative coverage:
104
+
105
+ - `claude-tui-sticky-block.spec.js`
106
+ - `tui-redraw.spec.js`
107
+ - `tui-rerender.spec.js`
108
+ - `output-freeze.spec.js`
109
+ - `real-codex-dsr-query.spec.js`
110
+ - `codex-clear-scrollback.spec.js`
111
+
112
+ ### 3. Running/idle/waiting status inaccuracies
113
+
114
+ Observed symptoms:
115
+
116
+ - Idle Claude and Codex sessions were shown as Running.
117
+ - Active session list, overview, and command tab disagreed.
118
+ - Switching tabs could cause an idle session to become Running.
119
+ - Typing or UI-induced resize/reflow could be treated as agent activity.
120
+
121
+ Likely causes:
122
+
123
+ - TUI redraws and status-only terminal output were interpreted as fresh work.
124
+ - Multiple status sources competed: server session payloads, stream status,
125
+ client PTY detection, standup projection, and overview cards.
126
+ - Codex running holds were renewed from the wrong signal in some cases.
127
+ - Freshness windows differed across UI surfaces.
128
+
129
+ Fixes already landed:
130
+
131
+ - Add status hooks/state bus and make live server status the authoritative
132
+ source while it is fresh.
133
+ - Filter status-only ANSI/TUI noise and suppress UI-refresh output as activity.
134
+ - Renew Codex running holds from detector receipts rather than incidental tab
135
+ redraws.
136
+ - Align standup and active-session projections on the same live status payload.
137
+
138
+ Representative coverage:
139
+
140
+ - `status-detector.spec.js`
141
+ - `session-status-typing.spec.js`
142
+ - overview and standup browser checks from the recent fix passes
143
+
144
+ ### 4. Identity, title, context, and restart recovery
145
+
146
+ Observed symptoms:
147
+
148
+ - Sessions lost title or context after restart.
149
+ - Some Codex sessions disappeared after CTM crashes.
150
+ - Review tabs resolved to old prompts or wrong titles.
151
+ - Duplicate titles such as repeated `task-manager` appeared.
152
+ - Codex rollouts were found by the wrong file when IDs overlapped.
153
+
154
+ Likely causes:
155
+
156
+ - CTM tab IDs and provider session IDs are separate identities and were not
157
+ linked consistently on every path.
158
+ - Codex `state_5.sqlite` could be corrupt, missing, or incomplete.
159
+ - JSONL fallback did not always replay the full rollout history.
160
+ - Rollout lookup used weak matching instead of exact UUID resolution.
161
+ - Auto-title refresh could overwrite user or session-owned titles.
162
+
163
+ Fixes already landed:
164
+
165
+ - Use dual CTM/agent session IDs and backfill/migrate identities.
166
+ - Resolve Codex rollouts by exact UUID and fall back to JSONL when state DB is
167
+ unavailable.
168
+ - Replay full Codex rollout history instead of a partial latest fragment.
169
+ - Harden title ownership, preserve explicit renames, and clean branch/title UI.
170
+
171
+ Representative coverage:
172
+
173
+ - `codex-title-preservation.spec.js`
174
+ - `session-title-branch-badge.spec.js`
175
+ - recent commits around Codex exact UUID resolution, JSONL fallback, and full
176
+ rollout replay
177
+
178
+ ### 5. Search, review, and session summary reliability
179
+
180
+ Observed symptoms:
181
+
182
+ - Search missed obvious terms such as `$publish`.
183
+ - Search returned duplicate sessions for the same underlying agent run.
184
+ - A summary could say "1 msg" even when the session had many messages.
185
+ - Review panes could be empty while the live terminal had content.
186
+ - Prompt history or prompt dropdown state could be stale.
187
+
188
+ Likely causes:
189
+
190
+ - Raw grep over Codex rollouts included developer context and skill files, which
191
+ produced false positives and hid user-message semantics.
192
+ - Session dedupe did not consistently collapse CTM tab IDs, provider IDs,
193
+ provisional IDs, and JSONL paths.
194
+ - Search filters and active-state filters could hide relevant historical rows.
195
+ - In the sampled Wall-E cache, `session_conversations.messages` has content but
196
+ `session_messages` and `session_messages_fts` are empty, so any MCP/search path
197
+ relying only on message FTS will miss conversation content.
198
+ - Prompt preview caches were not refreshed on every relevant session update.
199
+
200
+ Fixes already landed:
201
+
202
+ - Parse Codex sessions through the session-history layer instead of raw text
203
+ grep for user-visible search semantics.
204
+ - Normalize command prefixes and dedupe session identities more aggressively.
205
+ - Stabilize overview/session search state and live prompt preview refresh.
206
+
207
+ Remaining gap:
208
+
209
+ - Wall-E/CTM DB session search should either backfill `session_messages` and FTS
210
+ from `session_conversations.messages` or query the conversation JSON blobs as a
211
+ fallback. The cache sample shows this is still a real blind spot.
212
+
213
+ ### 6. Wall-E coding session model and MCP setup
214
+
215
+ Observed symptoms:
216
+
217
+ - New Wall-E coding sessions could have no default model.
218
+ - Sending a command with no model selected could enter a busy loop.
219
+ - Wall-E MCP showed as unsupported or unavailable for some users.
220
+ - Npx installs did not reliably make the MCP usable in downstream agents.
221
+ - MCP health did not clearly distinguish "installed" from "DB usable".
222
+
223
+ Likely causes:
224
+
225
+ - Model hydration happened too late or was not persisted into the session UI
226
+ state.
227
+ - Duplicate sends were not blocked while the model handle was unresolved.
228
+ - MCP install/setup checks focused on config presence, not full tool and DB
229
+ health.
230
+ - The live CTM DB path can be unreadable from Wall-E even when the cached Wall-E
231
+ DB is healthy.
232
+
233
+ Fixes already landed:
234
+
235
+ - Hydrate a default model for Wall-E sessions and ignore duplicate busy sends.
236
+ - Add browser coverage for Wall-E default model behavior.
237
+ - Improve MCP health reporting so DB schema failures are visible.
238
+
239
+ Remaining gap:
240
+
241
+ - Make Wall-E MCP degrade gracefully when the configured CTM DB cannot be read:
242
+ fall back to the healthy Wall-E cache or CTM HTTP APIs, and surface the
243
+ degraded source in `walle_memory_status`.
244
+
245
+ ### 7. Input, focus, and approval UX
246
+
247
+ Observed symptoms:
248
+
249
+ - Input lag became severe under high CPU or heavy output.
250
+ - Input could freeze after websocket reconnect.
251
+ - Renaming, queue input, and toolbar focus could be interrupted by session
252
+ events.
253
+ - Auto-approval sometimes missed Codex approval prompts.
254
+
255
+ Likely causes:
256
+
257
+ - High-volume output saturated the UI path and delayed input handling.
258
+ - Reconnect and toolbar code did not always restore focus and local echo state.
259
+ - Approval prompt parsing was too dependent on provider-specific terminal text.
260
+
261
+ Fixes already landed:
262
+
263
+ - Add input-freeze regressions and reconnect-focused fixes.
264
+ - Improve approval transition snapshots so prompts and decisions do not corrupt
265
+ the terminal state.
266
+ - Expand approval/provider parsing in the recent fix passes.
267
+
268
+ ## Codebase Improvement Opportunities
269
+
270
+ ### P0: Fix DB-first Wall-E session retrieval
271
+
272
+ The current MCP surface is configured, but live CTM DB health can fail and the
273
+ cached DB has conversation blobs without populated message/FTS rows. This is the
274
+ largest remaining observability hole because it affects future debugging and
275
+ agent memory retrieval.
276
+
277
+ Recommended change:
278
+
279
+ - In Wall-E session context search, if `session_messages_fts` is empty or the
280
+ configured CTM DB fails schema checks, fall back to:
281
+ - `session_conversations.messages` JSON search in the healthy cache, then
282
+ - CTM HTTP/session APIs if available.
283
+ - Add a backfill/repair command to populate `session_messages` and
284
+ `session_messages_fts` from existing `session_conversations`.
285
+ - Make `walle_memory_status` report the active source: live CTM DB, Wall-E cache,
286
+ or API fallback.
287
+
288
+ ### P1: Extract terminal restore and status state machines
289
+
290
+ `public/index.html` now owns a large amount of terminal restore, blank watchdog,
291
+ WebGL recovery, scroll anchoring, Codex compaction, and status precedence logic.
292
+ The recent fixes are directionally correct, but the code is hard to reason about
293
+ because the state machine is distributed across inline UI handlers.
294
+
295
+ Recommended change:
296
+
297
+ - Extract terminal restore logic into a small module with explicit states:
298
+ `inactive`, `attaching`, `snapshotRequested`, `restoring`, `live`,
299
+ `retrying`, `reflowing`, and `failed`.
300
+ - Extract status precedence into a shared module or generated fixture used by
301
+ server/client tests.
302
+ - Keep the UI layer as a renderer of state transitions, not the owner of every
303
+ transition rule.
304
+
305
+ ### P1: Add per-session diagnostic ring buffers
306
+
307
+ Many past reports required reconstructing what happened from screenshots,
308
+ terminal state, DB state, and git history.
309
+
310
+ Recommended change:
311
+
312
+ - Add a lightweight debug endpoint per session that records the last N restore
313
+ and status decisions:
314
+ - snapshot source and dimensions,
315
+ - cache hit/miss and timeout path,
316
+ - blank watchdog attempts,
317
+ - WebGL recreate/reflow events,
318
+ - status source and freshness,
319
+ - identity mapping and title owner.
320
+ - Expose it in dev/debug mode so future agent fixes start from facts instead of
321
+ screenshots alone.
322
+
323
+ ### P2: Reconcile stale restore invariants in tests and comments
324
+
325
+ One reviewed test comment says activation flushes the writer queue, while the
326
+ current activation path clears queued writes before requesting an authoritative
327
+ snapshot. That may be an intentional newer invariant, but it should be made
328
+ explicit.
329
+
330
+ Recommended change:
331
+
332
+ - Update the test comments if the new invariant is correct.
333
+ - Add a narrow regression proving post-snapshot buffered bytes are not lost when
334
+ activation clears stale queued bytes.
335
+
336
+ ### P2: Promote the session UX test pack into a standard smoke suite
337
+
338
+ The project now has good focused tests, but the failure modes recur when fixes
339
+ touch adjacent code.
340
+
341
+ Recommended change:
342
+
343
+ - Define a small "session UX smoke" command covering:
344
+ - blank tab restore,
345
+ - worker saturation retry,
346
+ - Codex panel restore gap,
347
+ - Codex clear scrollback,
348
+ - status typing,
349
+ - search dedupe,
350
+ - Wall-E default model.
351
+ - Run it before CTM releases and after changes to `public/index.html`,
352
+ `server.js`, `lib/session-history.js`, Wall-E MCP code, or DB migrations.
353
+
354
+ ## Bottom Line
355
+
356
+ Most of the user-visible regressions were not caused by a single missing check.
357
+ They came from CTM treating live terminal bytes, headless snapshots, structured
358
+ provider logs, persisted DB rows, and client-only UI state as interchangeable.
359
+ The recent fixes work because they make one source authoritative for each job:
360
+
361
+ - headless snapshots for visual terminal recovery,
362
+ - structured session history for search/review semantics,
363
+ - fresh server live status for status badges,
364
+ - explicit CTM/agent IDs for identity,
365
+ - hydrated model state for Wall-E command sends.
366
+
367
+ The next quality jump is to make those authority boundaries explicit in code and
368
+ diagnostics, especially for Wall-E DB-first retrieval and the terminal
369
+ restore/status state machines.
@@ -384,8 +384,16 @@ function keyboardProximityScore(a, b) {
384
384
 
385
385
  // --- Query Expansion Orchestrator ---
386
386
 
387
+ function normalizeFuzzySearchValue(value) {
388
+ return String(value || '')
389
+ .trim()
390
+ .toLowerCase()
391
+ .replace(/(^|[\s([{])[$/]+(?=[a-z0-9_-])/g, '$1')
392
+ .replace(/\s+/g, ' ');
393
+ }
394
+
387
395
  function expandQueryFuzzy(query) {
388
- const terms = (query || '').trim().split(/\s+/).filter(t => t.length >= 2);
396
+ const terms = normalizeFuzzySearchValue(query).split(/\s+/).filter(t => t.length >= 2);
389
397
  if (terms.length === 0) return { ftsQuery: '', expansions: [] };
390
398
 
391
399
  const MAX_VARIANTS_PER_TERM = 8;
@@ -451,5 +459,5 @@ module.exports = {
451
459
  levenshtein, generate1EditVariants,
452
460
  doubleMetaphone, phoneticMatch,
453
461
  keyboardProximityScore, QWERTY_NEIGHBORS,
454
- expandQueryFuzzy,
462
+ expandQueryFuzzy, normalizeFuzzySearchValue,
455
463
  };