create-walle 0.9.13 → 0.9.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -3
- package/bin/create-walle.js +232 -32
- package/bin/mcp-inject.js +18 -53
- package/package.json +3 -1
- package/template/claude-task-manager/api-prompts.js +11 -2
- package/template/claude-task-manager/approval-agent.js +7 -0
- package/template/claude-task-manager/db.js +94 -75
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
- package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
- package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
- package/template/claude-task-manager/fuzzy-utils.js +10 -2
- package/template/claude-task-manager/git-utils.js +140 -10
- package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
- package/template/claude-task-manager/lib/agent-presets.js +38 -5
- package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
- package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
- package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
- package/template/claude-task-manager/lib/session-history.js +309 -16
- package/template/claude-task-manager/lib/session-standup.js +409 -0
- package/template/claude-task-manager/lib/session-stream.js +253 -20
- package/template/claude-task-manager/lib/standup-attention.js +200 -0
- package/template/claude-task-manager/lib/status-hooks.js +8 -2
- package/template/claude-task-manager/lib/update-telemetry.js +114 -0
- package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
- package/template/claude-task-manager/lib/walle-default-model.js +55 -0
- package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
- package/template/claude-task-manager/lib/walle-transcript.js +1 -3
- package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
- package/template/claude-task-manager/package.json +1 -0
- package/template/claude-task-manager/providers/codex-mcp.js +104 -0
- package/template/claude-task-manager/providers/index.js +2 -0
- package/template/claude-task-manager/public/css/setup.css +2 -1
- package/template/claude-task-manager/public/css/walle.css +71 -0
- package/template/claude-task-manager/public/index.html +2388 -429
- package/template/claude-task-manager/public/js/message-renderer.js +314 -35
- package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
- package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
- package/template/claude-task-manager/public/js/setup.js +62 -19
- package/template/claude-task-manager/public/js/stream-view.js +396 -55
- package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
- package/template/claude-task-manager/public/js/walle-session.js +234 -26
- package/template/claude-task-manager/public/js/walle.js +143 -2
- package/template/claude-task-manager/server.js +1402 -433
- package/template/claude-task-manager/session-integrity.js +77 -28
- package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
- package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
- package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
- package/template/package.json +1 -1
- package/template/wall-e/agent-runners/claude-code.js +2 -0
- package/template/wall-e/agent.js +63 -8
- package/template/wall-e/api-walle.js +330 -52
- package/template/wall-e/brain.js +291 -42
- package/template/wall-e/chat.js +172 -15
- package/template/wall-e/coding/compaction-service.js +19 -5
- package/template/wall-e/coding/stream-processor.js +22 -2
- package/template/wall-e/coding/workspace-replay.js +1 -4
- package/template/wall-e/coding-orchestrator.js +250 -80
- package/template/wall-e/compat.js +0 -28
- package/template/wall-e/context/context-builder.js +3 -1
- package/template/wall-e/embeddings.js +2 -7
- package/template/wall-e/eval/agent-runner.js +30 -9
- package/template/wall-e/eval/benchmark-generator.js +21 -1
- package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
- package/template/wall-e/eval/cc-replay.js +1 -0
- package/template/wall-e/eval/codex-cli-baseline.js +633 -0
- package/template/wall-e/eval/debug-agent003.js +1 -0
- package/template/wall-e/eval/eval-orchestrator.js +3 -3
- package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
- package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
- package/template/wall-e/eval/run-model-comparison.js +1 -0
- package/template/wall-e/eval/swebench-adapter.js +1 -0
- package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
- package/template/wall-e/extraction/knowledge-extractor.js +1 -2
- package/template/wall-e/lib/mcp-integration.js +336 -0
- package/template/wall-e/llm/ollama.js +47 -8
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/tool-adapter.js +1 -0
- package/template/wall-e/loops/ingest.js +42 -8
- package/template/wall-e/loops/initiative.js +87 -2
- package/template/wall-e/mcp-server.js +872 -19
- package/template/wall-e/memory/ctm-context-client.js +230 -0
- package/template/wall-e/memory/ctm-session-context.js +1376 -0
- package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
- package/template/wall-e/server.js +30 -1
- package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
- package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
- package/template/wall-e/skills/skill-planner.js +86 -4
- package/template/wall-e/slack/socket-mode-listener.js +276 -0
- package/template/wall-e/telemetry.js +70 -2
- package/template/wall-e/tools/builtin-middleware.js +55 -2
- package/template/wall-e/tools/shell-policy.js +1 -1
- package/template/wall-e/tools/slack-owner.js +104 -0
- package/template/website/index.html +4 -4
- package/template/builder-journal.md +0 -17
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
# Session Tooltip Freshness Design
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
The active-session tooltip can show an AI summary that is technically cached
|
|
6
|
+
correctly but no longer represents the current task. The most visible failure is
|
|
7
|
+
a running session whose tooltip headline still describes an older task while the
|
|
8
|
+
latest prompt and progress have already moved on.
|
|
9
|
+
|
|
10
|
+
This is worse than an empty tooltip because it gives false confidence. The
|
|
11
|
+
operator uses the tooltip to decide which session needs attention, so stale
|
|
12
|
+
intent text can send the user to the wrong session or hide a current blocker.
|
|
13
|
+
|
|
14
|
+
## Existing Behavior
|
|
15
|
+
|
|
16
|
+
Current flow:
|
|
17
|
+
|
|
18
|
+
1. `stream-view.js` opens the tooltip after hover and fetches
|
|
19
|
+
`/api/sessions/:id/summary?turns=3`.
|
|
20
|
+
2. `SessionStream` stores cleaned user prompts in `userPromptCache`.
|
|
21
|
+
3. A new user prompt debounces AI summary generation by 2 seconds.
|
|
22
|
+
4. The configured provider generates a 10-15 word summary over the cached
|
|
23
|
+
prompt list.
|
|
24
|
+
5. `getSummary()` returns `summary`, `intent`, `displayPrompt`, `lastPrompt`,
|
|
25
|
+
and `progress`.
|
|
26
|
+
|
|
27
|
+
Current failure modes:
|
|
28
|
+
|
|
29
|
+
- Cached `intent.source = ai-summary` is treated as authoritative even when its
|
|
30
|
+
timestamp is older than the latest prompt.
|
|
31
|
+
- The tooltip does not refetch or rerender while it is already open for the same
|
|
32
|
+
session.
|
|
33
|
+
- The AI summary input is a flat list of recent prompts, so older work can
|
|
34
|
+
dominate the summary after a task switch.
|
|
35
|
+
- The UI label `Intent` does not explain freshness or distinguish current
|
|
36
|
+
prompt evidence from slower AI synthesis.
|
|
37
|
+
|
|
38
|
+
## Design Principles
|
|
39
|
+
|
|
40
|
+
- Latest user intent is the primary truth. AI is a compression layer, not a
|
|
41
|
+
source of freshness.
|
|
42
|
+
- Stale AI should be visible as context, never promoted as the current task.
|
|
43
|
+
- Refresh should be scoped to visible UI. Do not add broad polling across every
|
|
44
|
+
session.
|
|
45
|
+
- The tooltip should be copyable, stable, dense, and operational.
|
|
46
|
+
- Existing API fields must stay backward compatible for Session Overview and
|
|
47
|
+
older clients.
|
|
48
|
+
|
|
49
|
+
## API Contract
|
|
50
|
+
|
|
51
|
+
`GET /api/sessions/:id/summary` keeps existing fields and adds:
|
|
52
|
+
|
|
53
|
+
```js
|
|
54
|
+
{
|
|
55
|
+
currentTask: {
|
|
56
|
+
text: string | null,
|
|
57
|
+
source: 'latest-prompt' | 'ai-summary' | 'prompt-fallback' | 'title-fallback' | 'missing',
|
|
58
|
+
freshness: 'fresh' | 'updating' | 'stale' | 'missing',
|
|
59
|
+
updatedAt: number,
|
|
60
|
+
promptTimestamp: number,
|
|
61
|
+
staleReason?: string
|
|
62
|
+
},
|
|
63
|
+
latestPrompt: {
|
|
64
|
+
text: string | null,
|
|
65
|
+
timestamp: number
|
|
66
|
+
},
|
|
67
|
+
aiSummary: {
|
|
68
|
+
text: string | null,
|
|
69
|
+
source: string | null,
|
|
70
|
+
status: 'fresh' | 'updating' | 'stale' | 'fallback' | 'unavailable',
|
|
71
|
+
updatedAt: number,
|
|
72
|
+
promptTimestamp: number,
|
|
73
|
+
promptCount: number,
|
|
74
|
+
staleReason?: string
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
Compatibility mapping:
|
|
80
|
+
|
|
81
|
+
- `intent` remains present.
|
|
82
|
+
- `summary` remains present.
|
|
83
|
+
- When AI is stale, `intent.text` should follow `currentTask.text` so existing
|
|
84
|
+
clients do not keep rendering stale AI as the primary task.
|
|
85
|
+
- `aiSummary.text` retains the older AI text for secondary display.
|
|
86
|
+
|
|
87
|
+
Freshness rules:
|
|
88
|
+
|
|
89
|
+
- If usable AI summary exists and `aiSummary.updatedAt >= latestPrompt.timestamp`,
|
|
90
|
+
`currentTask.source = ai-summary` and `freshness = fresh`.
|
|
91
|
+
- If usable AI summary exists but is older than the latest prompt,
|
|
92
|
+
`currentTask.source = latest-prompt`, `freshness = updating`, and
|
|
93
|
+
`aiSummary.status = stale` or `updating`.
|
|
94
|
+
- If no AI summary exists, use the most recent content-rich prompt and mark the
|
|
95
|
+
AI summary as `unavailable` or `fallback`.
|
|
96
|
+
- If a fallback summary is raw prompt text, keep current task on prompt evidence
|
|
97
|
+
and mark `aiSummary.status = fallback`.
|
|
98
|
+
|
|
99
|
+
## Summary Generation
|
|
100
|
+
|
|
101
|
+
Change the AI prompt from a flat prompt list to latest-prompt-weighted input.
|
|
102
|
+
|
|
103
|
+
Recommended input:
|
|
104
|
+
|
|
105
|
+
```text
|
|
106
|
+
Latest prompt:
|
|
107
|
+
<most recent cleaned prompt>
|
|
108
|
+
|
|
109
|
+
Recent context:
|
|
110
|
+
1. <older prompt>
|
|
111
|
+
2. <older prompt>
|
|
112
|
+
3. <older prompt>
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Recommended system prompt:
|
|
116
|
+
|
|
117
|
+
```text
|
|
118
|
+
Summarize the user's current task. Prioritize the latest prompt. Use older
|
|
119
|
+
prompts only as context. Return 8-14 words. Return only the summary, no quotes
|
|
120
|
+
or prefix.
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
The cached summary should record the prompt timestamp and prompt count used to
|
|
124
|
+
generate it. This makes freshness testable without depending on provider speed.
|
|
125
|
+
|
|
126
|
+
## Tooltip UX
|
|
127
|
+
|
|
128
|
+
Replace the main `Intent` section with `Current Task`.
|
|
129
|
+
|
|
130
|
+
Fresh state:
|
|
131
|
+
|
|
132
|
+
```text
|
|
133
|
+
CURRENT TASK
|
|
134
|
+
[AI SUMMARY] [FRESH] [now]
|
|
135
|
+
Fixing Wall-E session history restore and tooltip freshness.
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
Updating state:
|
|
139
|
+
|
|
140
|
+
```text
|
|
141
|
+
CURRENT TASK
|
|
142
|
+
[LATEST PROMPT] [UPDATING] [now]
|
|
143
|
+
Does WallE coding agent have auto compact logic like OpenCode or Claude?
|
|
144
|
+
|
|
145
|
+
AI SUMMARY
|
|
146
|
+
[STALE] [2m ago]
|
|
147
|
+
Previous: Fixing session tab drag behavior and search issues.
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
Unavailable state:
|
|
151
|
+
|
|
152
|
+
```text
|
|
153
|
+
CURRENT TASK
|
|
154
|
+
[LATEST PROMPT] [FALLBACK] [now]
|
|
155
|
+
Fix the session tooltip freshness behavior.
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
Progress remains separate and should keep using assistant-event evidence.
|
|
159
|
+
|
|
160
|
+
Interaction behavior:
|
|
161
|
+
|
|
162
|
+
- Opening a tooltip fetches summary immediately.
|
|
163
|
+
- If the tooltip is already open for that session, new stream events rerender it.
|
|
164
|
+
- While visible, perform a lightweight refresh every 10 seconds.
|
|
165
|
+
- Stop the refresh timer when the tooltip is hidden.
|
|
166
|
+
- Clicking inside the tooltip continues to preserve it for copy/select.
|
|
167
|
+
- Activating a different session tooltip replaces the current tooltip.
|
|
168
|
+
|
|
169
|
+
## Implementation Plan
|
|
170
|
+
|
|
171
|
+
Phase 1: documentation
|
|
172
|
+
|
|
173
|
+
- Add this design note.
|
|
174
|
+
- Commit documentation by itself after TL review.
|
|
175
|
+
|
|
176
|
+
Phase 2: backend freshness
|
|
177
|
+
|
|
178
|
+
- Extend `SessionStream` cached summary metadata with prompt timestamp/count.
|
|
179
|
+
- Add `latestPrompt`, `aiSummary`, and `currentTask` to `getSummary()`.
|
|
180
|
+
- Ensure `intent` maps to `currentTask`, not stale AI.
|
|
181
|
+
- Update summary generation prompt to prioritize latest prompt.
|
|
182
|
+
- Add unit tests for fresh, stale, fallback, and provider-lag cases.
|
|
183
|
+
|
|
184
|
+
Phase 3: tooltip UI
|
|
185
|
+
|
|
186
|
+
- Render `Current Task` from `currentTask`.
|
|
187
|
+
- Render stale AI only as secondary context.
|
|
188
|
+
- Add freshness/status pills.
|
|
189
|
+
- Add visible-tooltip refresh and stream-event rerender.
|
|
190
|
+
- Add browser/render coverage for a tooltip that updates while open.
|
|
191
|
+
|
|
192
|
+
Phase 4: dev validation
|
|
193
|
+
|
|
194
|
+
- Start isolated CTM via `ctm-dev`.
|
|
195
|
+
- Verify service health on the dev port pair.
|
|
196
|
+
- Exercise stale summary behavior through a real browser.
|
|
197
|
+
- Confirm no browser console errors for the tooltip flow.
|
|
198
|
+
- Commit implementation after TL review.
|
|
199
|
+
|
|
200
|
+
## Test Matrix
|
|
201
|
+
|
|
202
|
+
Backend:
|
|
203
|
+
|
|
204
|
+
- AI summary generated after latest prompt: primary task is AI summary.
|
|
205
|
+
- AI summary timestamp older than latest prompt: primary task is latest prompt.
|
|
206
|
+
- AI provider slow or failed: primary task remains latest prompt.
|
|
207
|
+
- Fallback summary raw prompt: primary task remains prompt evidence.
|
|
208
|
+
- Summary generation input puts latest prompt before recent context.
|
|
209
|
+
|
|
210
|
+
Frontend:
|
|
211
|
+
|
|
212
|
+
- Tooltip opens and labels `Current Task`.
|
|
213
|
+
- Stale AI is demoted into `AI Summary`.
|
|
214
|
+
- New user stream event updates an already-open tooltip.
|
|
215
|
+
- Fresh summary event promotes AI summary.
|
|
216
|
+
- Tooltip remains copyable and dismisses only on outside click or another
|
|
217
|
+
tooltip activation.
|
|
218
|
+
|
|
219
|
+
Dev validation:
|
|
220
|
+
|
|
221
|
+
- CTM dev server starts on a random non-primary port.
|
|
222
|
+
- `/api/services/status` succeeds on the dev port.
|
|
223
|
+
- Browser test uses the dev port, never `3456` or `3457`.
|
|
224
|
+
- Screenshot or DOM assertions prove the freshness state.
|
|
@@ -0,0 +1,369 @@
|
|
|
1
|
+
# Claude/Codex Session UX Issue Review
|
|
2
|
+
|
|
3
|
+
Date: 2026-05-01
|
|
4
|
+
Scope: CTM session terminal UX, restore/restart behavior, status accuracy,
|
|
5
|
+
search/review history, and Wall-E coding session integration.
|
|
6
|
+
|
|
7
|
+
## Evidence Used
|
|
8
|
+
|
|
9
|
+
- Wall-E MCP status check: the Wall-E MCP server is installed and exposes the
|
|
10
|
+
CTM session tools, but the live CTM DB health path currently reports
|
|
11
|
+
`ctm_db_schema_read_failed` with a `disk I/O error`.
|
|
12
|
+
- CTM cached DB fallback: `~/.walle/data/task-manager.db` has the expected CTM
|
|
13
|
+
session tables and conversation blobs, but `session_messages` and
|
|
14
|
+
`session_messages_fts` are empty in the sampled cache.
|
|
15
|
+
- Session corpus scan: recent Claude and Codex session JSONLs plus CTM
|
|
16
|
+
`session_conversations.messages` were scanned for direct user reports from
|
|
17
|
+
April 15 onward. The largest clusters were search/review, Wall-E/model, blank
|
|
18
|
+
or restore behavior, identity/restart, malformed output, input/approval, and
|
|
19
|
+
status accuracy.
|
|
20
|
+
- Git history: recent CTM fixes show repeated work around blank tabs, Codex
|
|
21
|
+
terminal restore, output budget handling, status projection, search identity,
|
|
22
|
+
Wall-E default model hydration, and Codex rollout recovery.
|
|
23
|
+
- Regression tests: the render and session tests now cover blank-tab retry,
|
|
24
|
+
worker saturation, Codex panel restore gaps, TUI redraw, terminal query
|
|
25
|
+
handling, output freeze, status typing, search dedupe, title preservation, and
|
|
26
|
+
Wall-E default model behavior.
|
|
27
|
+
- Code review: current review focused on `public/index.html`, `server.js`,
|
|
28
|
+
`lib/session-history.js`, Wall-E MCP/session context code, and the Playwright
|
|
29
|
+
regression tests.
|
|
30
|
+
|
|
31
|
+
## Issue Clusters
|
|
32
|
+
|
|
33
|
+
### 1. Blank tabs, lost output, and restore gaps
|
|
34
|
+
|
|
35
|
+
Observed symptoms:
|
|
36
|
+
|
|
37
|
+
- Switching back to a long-running tab could show a blank terminal.
|
|
38
|
+
- Restart or panel restore could lose visible output or show a large blank tail.
|
|
39
|
+
- Codex tabs sometimes opened at the top or with the prompt far below the useful
|
|
40
|
+
output.
|
|
41
|
+
- Review or live tab state could be overwritten by a stale shorter snapshot.
|
|
42
|
+
|
|
43
|
+
Likely causes:
|
|
44
|
+
|
|
45
|
+
- Snapshot cache was consumed too early, so a retry had no usable fallback.
|
|
46
|
+
- Headless worker saturation caused soft timeouts; late worker responses were
|
|
47
|
+
dropped instead of cached.
|
|
48
|
+
- Hidden terminals restored with stale dimensions, then painted an invalid
|
|
49
|
+
snapshot.
|
|
50
|
+
- Browser WebGL state and xterm state could diverge after tab switches or
|
|
51
|
+
context loss.
|
|
52
|
+
- Codex emits full-screen terminal clear sequences and can leave a viewport-sized
|
|
53
|
+
blank tail after ratatui redraws.
|
|
54
|
+
|
|
55
|
+
Fixes already landed:
|
|
56
|
+
|
|
57
|
+
- Preserve cached snapshots across attach/retry and keep late worker responses
|
|
58
|
+
usable after soft timeout.
|
|
59
|
+
- Recreate WebGL renderers, force xterm refresh, and reflow on activation.
|
|
60
|
+
- Reject dimension-mismatched snapshots until resize/reflow catches up.
|
|
61
|
+
- Preserve browser-visible output when a stale server snapshot is shorter.
|
|
62
|
+
- Compact Codex internal blank gaps and anchor follow behavior to the useful
|
|
63
|
+
prompt viewport.
|
|
64
|
+
|
|
65
|
+
Representative coverage:
|
|
66
|
+
|
|
67
|
+
- `blank-tab-retry-recovery.spec.js`
|
|
68
|
+
- `blank-tab-worker-saturation.spec.js`
|
|
69
|
+
- `codex-panel-restore-gap.spec.js`
|
|
70
|
+
- `snapshot-restore-ordering.spec.js`
|
|
71
|
+
- `dimension-mismatch.spec.js`
|
|
72
|
+
|
|
73
|
+
### 2. Malformed TUI output and terminal freezes
|
|
74
|
+
|
|
75
|
+
Observed symptoms:
|
|
76
|
+
|
|
77
|
+
- Claude Code sticky status blocks duplicated or drifted.
|
|
78
|
+
- Codex output could become visually "messed up" while typing.
|
|
79
|
+
- Codex skill picker and helper input alignment could break.
|
|
80
|
+
- Output sometimes froze after high-volume bursts or after a TUI exit.
|
|
81
|
+
- Startup terminal queries could leak into the UI or back into the PTY.
|
|
82
|
+
|
|
83
|
+
Likely causes:
|
|
84
|
+
|
|
85
|
+
- The output-budget path split stateful ANSI frames across throttled writes.
|
|
86
|
+
- Raw post-snapshot bytes could be double-applied after a snapshot restore.
|
|
87
|
+
- Terminal query responses such as DSR/DA mode probes were not always consumed
|
|
88
|
+
silently.
|
|
89
|
+
- Helper textarea position was not consistently aligned to the current xterm
|
|
90
|
+
viewport.
|
|
91
|
+
- Blank-gap compaction was unsafe while Codex interactive picker UI was visible.
|
|
92
|
+
|
|
93
|
+
Fixes already landed:
|
|
94
|
+
|
|
95
|
+
- Treat headless xterm snapshots as the source of truth during output-budget
|
|
96
|
+
suppression.
|
|
97
|
+
- Add a short post-snapshot grace/buffer so late raw bytes are applied in order.
|
|
98
|
+
- Restore via reset plus FIFO writes to avoid stale state after snapshot reset.
|
|
99
|
+
- Consume known terminal queries and add a real Codex DSR regression.
|
|
100
|
+
- Align the Codex helper textarea to the viewport and avoid compacting during
|
|
101
|
+
active skill picker UI.
|
|
102
|
+
|
|
103
|
+
Representative coverage:
|
|
104
|
+
|
|
105
|
+
- `claude-tui-sticky-block.spec.js`
|
|
106
|
+
- `tui-redraw.spec.js`
|
|
107
|
+
- `tui-rerender.spec.js`
|
|
108
|
+
- `output-freeze.spec.js`
|
|
109
|
+
- `real-codex-dsr-query.spec.js`
|
|
110
|
+
- `codex-clear-scrollback.spec.js`
|
|
111
|
+
|
|
112
|
+
### 3. Running/idle/waiting status inaccuracies
|
|
113
|
+
|
|
114
|
+
Observed symptoms:
|
|
115
|
+
|
|
116
|
+
- Idle Claude and Codex sessions were shown as Running.
|
|
117
|
+
- Active session list, overview, and command tab disagreed.
|
|
118
|
+
- Switching tabs could cause an idle session to become Running.
|
|
119
|
+
- Typing or UI-induced resize/reflow could be treated as agent activity.
|
|
120
|
+
|
|
121
|
+
Likely causes:
|
|
122
|
+
|
|
123
|
+
- TUI redraws and status-only terminal output were interpreted as fresh work.
|
|
124
|
+
- Multiple status sources competed: server session payloads, stream status,
|
|
125
|
+
client PTY detection, standup projection, and overview cards.
|
|
126
|
+
- Codex running holds were renewed from the wrong signal in some cases.
|
|
127
|
+
- Freshness windows differed across UI surfaces.
|
|
128
|
+
|
|
129
|
+
Fixes already landed:
|
|
130
|
+
|
|
131
|
+
- Add status hooks/state bus and make live server status the authoritative
|
|
132
|
+
source while it is fresh.
|
|
133
|
+
- Filter status-only ANSI/TUI noise and suppress UI-refresh output as activity.
|
|
134
|
+
- Renew Codex running holds from detector receipts rather than incidental tab
|
|
135
|
+
redraws.
|
|
136
|
+
- Align standup and active-session projections on the same live status payload.
|
|
137
|
+
|
|
138
|
+
Representative coverage:
|
|
139
|
+
|
|
140
|
+
- `status-detector.spec.js`
|
|
141
|
+
- `session-status-typing.spec.js`
|
|
142
|
+
- overview and standup browser checks from the recent fix passes
|
|
143
|
+
|
|
144
|
+
### 4. Identity, title, context, and restart recovery
|
|
145
|
+
|
|
146
|
+
Observed symptoms:
|
|
147
|
+
|
|
148
|
+
- Sessions lost title or context after restart.
|
|
149
|
+
- Some Codex sessions disappeared after CTM crashes.
|
|
150
|
+
- Review tabs resolved to old prompts or wrong titles.
|
|
151
|
+
- Duplicate titles such as repeated `task-manager` appeared.
|
|
152
|
+
- Codex rollouts were found by the wrong file when IDs overlapped.
|
|
153
|
+
|
|
154
|
+
Likely causes:
|
|
155
|
+
|
|
156
|
+
- CTM tab IDs and provider session IDs are separate identities and were not
|
|
157
|
+
linked consistently on every path.
|
|
158
|
+
- Codex `state_5.sqlite` could be corrupt, missing, or incomplete.
|
|
159
|
+
- JSONL fallback did not always replay the full rollout history.
|
|
160
|
+
- Rollout lookup used weak matching instead of exact UUID resolution.
|
|
161
|
+
- Auto-title refresh could overwrite user or session-owned titles.
|
|
162
|
+
|
|
163
|
+
Fixes already landed:
|
|
164
|
+
|
|
165
|
+
- Use dual CTM/agent session IDs and backfill/migrate identities.
|
|
166
|
+
- Resolve Codex rollouts by exact UUID and fall back to JSONL when state DB is
|
|
167
|
+
unavailable.
|
|
168
|
+
- Replay full Codex rollout history instead of a partial latest fragment.
|
|
169
|
+
- Harden title ownership, preserve explicit renames, and clean branch/title UI.
|
|
170
|
+
|
|
171
|
+
Representative coverage:
|
|
172
|
+
|
|
173
|
+
- `codex-title-preservation.spec.js`
|
|
174
|
+
- `session-title-branch-badge.spec.js`
|
|
175
|
+
- recent commits around Codex exact UUID resolution, JSONL fallback, and full
|
|
176
|
+
rollout replay
|
|
177
|
+
|
|
178
|
+
### 5. Search, review, and session summary reliability
|
|
179
|
+
|
|
180
|
+
Observed symptoms:
|
|
181
|
+
|
|
182
|
+
- Search missed obvious terms such as `$publish`.
|
|
183
|
+
- Search returned duplicate sessions for the same underlying agent run.
|
|
184
|
+
- A summary could say "1 msg" even when the session had many messages.
|
|
185
|
+
- Review panes could be empty while the live terminal had content.
|
|
186
|
+
- Prompt history or prompt dropdown state could be stale.
|
|
187
|
+
|
|
188
|
+
Likely causes:
|
|
189
|
+
|
|
190
|
+
- Raw grep over Codex rollouts included developer context and skill files, which
|
|
191
|
+
produced false positives and hid user-message semantics.
|
|
192
|
+
- Session dedupe did not consistently collapse CTM tab IDs, provider IDs,
|
|
193
|
+
provisional IDs, and JSONL paths.
|
|
194
|
+
- Search filters and active-state filters could hide relevant historical rows.
|
|
195
|
+
- In the sampled Wall-E cache, `session_conversations.messages` has content but
|
|
196
|
+
`session_messages` and `session_messages_fts` are empty, so any MCP/search path
|
|
197
|
+
relying only on message FTS will miss conversation content.
|
|
198
|
+
- Prompt preview caches were not refreshed on every relevant session update.
|
|
199
|
+
|
|
200
|
+
Fixes already landed:
|
|
201
|
+
|
|
202
|
+
- Parse Codex sessions through the session-history layer instead of raw text
|
|
203
|
+
grep for user-visible search semantics.
|
|
204
|
+
- Normalize command prefixes and dedupe session identities more aggressively.
|
|
205
|
+
- Stabilize overview/session search state and live prompt preview refresh.
|
|
206
|
+
|
|
207
|
+
Remaining gap:
|
|
208
|
+
|
|
209
|
+
- Wall-E/CTM DB session search should either backfill `session_messages` and FTS
|
|
210
|
+
from `session_conversations.messages` or query the conversation JSON blobs as a
|
|
211
|
+
fallback. The cache sample shows this is still a real blind spot.
|
|
212
|
+
|
|
213
|
+
### 6. Wall-E coding session model and MCP setup
|
|
214
|
+
|
|
215
|
+
Observed symptoms:
|
|
216
|
+
|
|
217
|
+
- New Wall-E coding sessions could have no default model.
|
|
218
|
+
- Sending a command with no model selected could enter a busy loop.
|
|
219
|
+
- Wall-E MCP showed as unsupported or unavailable for some users.
|
|
220
|
+
- Npx installs did not reliably make the MCP usable in downstream agents.
|
|
221
|
+
- MCP health did not clearly distinguish "installed" from "DB usable".
|
|
222
|
+
|
|
223
|
+
Likely causes:
|
|
224
|
+
|
|
225
|
+
- Model hydration happened too late or was not persisted into the session UI
|
|
226
|
+
state.
|
|
227
|
+
- Duplicate sends were not blocked while the model handle was unresolved.
|
|
228
|
+
- MCP install/setup checks focused on config presence, not full tool and DB
|
|
229
|
+
health.
|
|
230
|
+
- The live CTM DB path can be unreadable from Wall-E even when the cached Wall-E
|
|
231
|
+
DB is healthy.
|
|
232
|
+
|
|
233
|
+
Fixes already landed:
|
|
234
|
+
|
|
235
|
+
- Hydrate a default model for Wall-E sessions and ignore duplicate busy sends.
|
|
236
|
+
- Add browser coverage for Wall-E default model behavior.
|
|
237
|
+
- Improve MCP health reporting so DB schema failures are visible.
|
|
238
|
+
|
|
239
|
+
Remaining gap:
|
|
240
|
+
|
|
241
|
+
- Make Wall-E MCP degrade gracefully when the configured CTM DB cannot be read:
|
|
242
|
+
fall back to the healthy Wall-E cache or CTM HTTP APIs, and surface the
|
|
243
|
+
degraded source in `walle_memory_status`.
|
|
244
|
+
|
|
245
|
+
### 7. Input, focus, and approval UX
|
|
246
|
+
|
|
247
|
+
Observed symptoms:
|
|
248
|
+
|
|
249
|
+
- Input lag became severe under high CPU or heavy output.
|
|
250
|
+
- Input could freeze after websocket reconnect.
|
|
251
|
+
- Renaming, queue input, and toolbar focus could be interrupted by session
|
|
252
|
+
events.
|
|
253
|
+
- Auto-approval sometimes missed Codex approval prompts.
|
|
254
|
+
|
|
255
|
+
Likely causes:
|
|
256
|
+
|
|
257
|
+
- High-volume output saturated the UI path and delayed input handling.
|
|
258
|
+
- Reconnect and toolbar code did not always restore focus and local echo state.
|
|
259
|
+
- Approval prompt parsing was too dependent on provider-specific terminal text.
|
|
260
|
+
|
|
261
|
+
Fixes already landed:
|
|
262
|
+
|
|
263
|
+
- Add input-freeze regressions and reconnect-focused fixes.
|
|
264
|
+
- Improve approval transition snapshots so prompts and decisions do not corrupt
|
|
265
|
+
the terminal state.
|
|
266
|
+
- Expand approval/provider parsing in the recent fix passes.
|
|
267
|
+
|
|
268
|
+
## Codebase Improvement Opportunities
|
|
269
|
+
|
|
270
|
+
### P0: Fix DB-first Wall-E session retrieval
|
|
271
|
+
|
|
272
|
+
The current MCP surface is configured, but live CTM DB health can fail and the
|
|
273
|
+
cached DB has conversation blobs without populated message/FTS rows. This is the
|
|
274
|
+
largest remaining observability hole because it affects future debugging and
|
|
275
|
+
agent memory retrieval.
|
|
276
|
+
|
|
277
|
+
Recommended change:
|
|
278
|
+
|
|
279
|
+
- In Wall-E session context search, if `session_messages_fts` is empty or the
|
|
280
|
+
configured CTM DB fails schema checks, fall back to:
|
|
281
|
+
- `session_conversations.messages` JSON search in the healthy cache, then
|
|
282
|
+
- CTM HTTP/session APIs if available.
|
|
283
|
+
- Add a backfill/repair command to populate `session_messages` and
|
|
284
|
+
`session_messages_fts` from existing `session_conversations`.
|
|
285
|
+
- Make `walle_memory_status` report the active source: live CTM DB, Wall-E cache,
|
|
286
|
+
or API fallback.
|
|
287
|
+
|
|
288
|
+
### P1: Extract terminal restore and status state machines
|
|
289
|
+
|
|
290
|
+
`public/index.html` now owns a large amount of terminal restore, blank watchdog,
|
|
291
|
+
WebGL recovery, scroll anchoring, Codex compaction, and status precedence logic.
|
|
292
|
+
The recent fixes are directionally correct, but the code is hard to reason about
|
|
293
|
+
because the state machine is distributed across inline UI handlers.
|
|
294
|
+
|
|
295
|
+
Recommended change:
|
|
296
|
+
|
|
297
|
+
- Extract terminal restore logic into a small module with explicit states:
|
|
298
|
+
`inactive`, `attaching`, `snapshotRequested`, `restoring`, `live`,
|
|
299
|
+
`retrying`, `reflowing`, and `failed`.
|
|
300
|
+
- Extract status precedence into a shared module or generated fixture used by
|
|
301
|
+
server/client tests.
|
|
302
|
+
- Keep the UI layer as a renderer of state transitions, not the owner of every
|
|
303
|
+
transition rule.
|
|
304
|
+
|
|
305
|
+
### P1: Add per-session diagnostic ring buffers
|
|
306
|
+
|
|
307
|
+
Many past reports required reconstructing what happened from screenshots,
|
|
308
|
+
terminal state, DB state, and git history.
|
|
309
|
+
|
|
310
|
+
Recommended change:
|
|
311
|
+
|
|
312
|
+
- Add a lightweight debug endpoint per session that records the last N restore
|
|
313
|
+
and status decisions:
|
|
314
|
+
- snapshot source and dimensions,
|
|
315
|
+
- cache hit/miss and timeout path,
|
|
316
|
+
- blank watchdog attempts,
|
|
317
|
+
- WebGL recreate/reflow events,
|
|
318
|
+
- status source and freshness,
|
|
319
|
+
- identity mapping and title owner.
|
|
320
|
+
- Expose it in dev/debug mode so future agent fixes start from facts instead of
|
|
321
|
+
screenshots alone.
|
|
322
|
+
|
|
323
|
+
### P2: Reconcile stale restore invariants in tests and comments
|
|
324
|
+
|
|
325
|
+
One reviewed test comment says activation flushes the writer queue, while the
|
|
326
|
+
current activation path clears queued writes before requesting an authoritative
|
|
327
|
+
snapshot. That may be an intentional newer invariant, but it should be made
|
|
328
|
+
explicit.
|
|
329
|
+
|
|
330
|
+
Recommended change:
|
|
331
|
+
|
|
332
|
+
- Update the test comments if the new invariant is correct.
|
|
333
|
+
- Add a narrow regression proving post-snapshot buffered bytes are not lost when
|
|
334
|
+
activation clears stale queued bytes.
|
|
335
|
+
|
|
336
|
+
### P2: Promote the session UX test pack into a standard smoke suite
|
|
337
|
+
|
|
338
|
+
The project now has good focused tests, but the failure modes recur when fixes
|
|
339
|
+
touch adjacent code.
|
|
340
|
+
|
|
341
|
+
Recommended change:
|
|
342
|
+
|
|
343
|
+
- Define a small "session UX smoke" command covering:
|
|
344
|
+
- blank tab restore,
|
|
345
|
+
- worker saturation retry,
|
|
346
|
+
- Codex panel restore gap,
|
|
347
|
+
- Codex clear scrollback,
|
|
348
|
+
- status typing,
|
|
349
|
+
- search dedupe,
|
|
350
|
+
- Wall-E default model.
|
|
351
|
+
- Run it before CTM releases and after changes to `public/index.html`,
|
|
352
|
+
`server.js`, `lib/session-history.js`, Wall-E MCP code, or DB migrations.
|
|
353
|
+
|
|
354
|
+
## Bottom Line
|
|
355
|
+
|
|
356
|
+
Most of the user-visible regressions were not caused by a single missing check.
|
|
357
|
+
They came from CTM treating live terminal bytes, headless snapshots, structured
|
|
358
|
+
provider logs, persisted DB rows, and client-only UI state as interchangeable.
|
|
359
|
+
The recent fixes work because they make one source authoritative for each job:
|
|
360
|
+
|
|
361
|
+
- headless snapshots for visual terminal recovery,
|
|
362
|
+
- structured session history for search/review semantics,
|
|
363
|
+
- fresh server live status for status badges,
|
|
364
|
+
- explicit CTM/agent IDs for identity,
|
|
365
|
+
- hydrated model state for Wall-E command sends.
|
|
366
|
+
|
|
367
|
+
The next quality jump is to make those authority boundaries explicit in code and
|
|
368
|
+
diagnostics, especially for Wall-E DB-first retrieval and the terminal
|
|
369
|
+
restore/status state machines.
|
|
@@ -384,8 +384,16 @@ function keyboardProximityScore(a, b) {
|
|
|
384
384
|
|
|
385
385
|
// --- Query Expansion Orchestrator ---
|
|
386
386
|
|
|
387
|
+
function normalizeFuzzySearchValue(value) {
|
|
388
|
+
return String(value || '')
|
|
389
|
+
.trim()
|
|
390
|
+
.toLowerCase()
|
|
391
|
+
.replace(/(^|[\s([{])[$/]+(?=[a-z0-9_-])/g, '$1')
|
|
392
|
+
.replace(/\s+/g, ' ');
|
|
393
|
+
}
|
|
394
|
+
|
|
387
395
|
function expandQueryFuzzy(query) {
|
|
388
|
-
const terms = (query
|
|
396
|
+
const terms = normalizeFuzzySearchValue(query).split(/\s+/).filter(t => t.length >= 2);
|
|
389
397
|
if (terms.length === 0) return { ftsQuery: '', expansions: [] };
|
|
390
398
|
|
|
391
399
|
const MAX_VARIANTS_PER_TERM = 8;
|
|
@@ -451,5 +459,5 @@ module.exports = {
|
|
|
451
459
|
levenshtein, generate1EditVariants,
|
|
452
460
|
doubleMetaphone, phoneticMatch,
|
|
453
461
|
keyboardProximityScore, QWERTY_NEIGHBORS,
|
|
454
|
-
expandQueryFuzzy,
|
|
462
|
+
expandQueryFuzzy, normalizeFuzzySearchValue,
|
|
455
463
|
};
|