create-walle 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/README.md +8 -3
  2. package/bin/create-walle.js +232 -32
  3. package/bin/mcp-inject.js +18 -53
  4. package/package.json +3 -1
  5. package/template/claude-task-manager/api-prompts.js +11 -2
  6. package/template/claude-task-manager/approval-agent.js +7 -0
  7. package/template/claude-task-manager/db.js +94 -75
  8. package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
  9. package/template/claude-task-manager/docs/session-tooltip-freshness-design.md +224 -0
  10. package/template/claude-task-manager/docs/session-ux-issue-review-2026-05-01.md +369 -0
  11. package/template/claude-task-manager/fuzzy-utils.js +10 -2
  12. package/template/claude-task-manager/git-utils.js +140 -10
  13. package/template/claude-task-manager/lib/agent-capabilities.js +1 -1
  14. package/template/claude-task-manager/lib/agent-presets.js +38 -5
  15. package/template/claude-task-manager/lib/codex-terminal-final.js +53 -0
  16. package/template/claude-task-manager/lib/ctm-session-context-api.js +222 -0
  17. package/template/claude-task-manager/lib/session-diagnostics.js +56 -0
  18. package/template/claude-task-manager/lib/session-history.js +309 -16
  19. package/template/claude-task-manager/lib/session-standup.js +409 -0
  20. package/template/claude-task-manager/lib/session-stream.js +253 -20
  21. package/template/claude-task-manager/lib/standup-attention.js +200 -0
  22. package/template/claude-task-manager/lib/status-hooks.js +8 -2
  23. package/template/claude-task-manager/lib/update-telemetry.js +114 -0
  24. package/template/claude-task-manager/lib/walle-ctm-history.js +49 -6
  25. package/template/claude-task-manager/lib/walle-default-model.js +55 -0
  26. package/template/claude-task-manager/lib/walle-mcp-auto-config.js +66 -0
  27. package/template/claude-task-manager/lib/walle-supervisor.js +86 -19
  28. package/template/claude-task-manager/lib/walle-transcript.js +1 -3
  29. package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
  30. package/template/claude-task-manager/package.json +1 -0
  31. package/template/claude-task-manager/providers/codex-mcp.js +104 -0
  32. package/template/claude-task-manager/providers/index.js +2 -0
  33. package/template/claude-task-manager/public/css/setup.css +2 -1
  34. package/template/claude-task-manager/public/css/walle.css +71 -0
  35. package/template/claude-task-manager/public/index.html +2388 -429
  36. package/template/claude-task-manager/public/js/message-renderer.js +314 -35
  37. package/template/claude-task-manager/public/js/session-search-utils.js +185 -3
  38. package/template/claude-task-manager/public/js/session-status-precedence.js +125 -0
  39. package/template/claude-task-manager/public/js/setup.js +62 -19
  40. package/template/claude-task-manager/public/js/stream-view.js +396 -55
  41. package/template/claude-task-manager/public/js/terminal-restore-state.js +57 -0
  42. package/template/claude-task-manager/public/js/walle-session.js +234 -26
  43. package/template/claude-task-manager/public/js/walle.js +143 -2
  44. package/template/claude-task-manager/server.js +1402 -433
  45. package/template/claude-task-manager/session-integrity.js +77 -28
  46. package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
  47. package/template/claude-task-manager/workers/scrollback-worker.js +5 -6
  48. package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
  49. package/template/package.json +1 -1
  50. package/template/wall-e/agent-runners/claude-code.js +2 -0
  51. package/template/wall-e/agent.js +63 -8
  52. package/template/wall-e/api-walle.js +330 -52
  53. package/template/wall-e/brain.js +291 -42
  54. package/template/wall-e/chat.js +172 -15
  55. package/template/wall-e/coding/compaction-service.js +19 -5
  56. package/template/wall-e/coding/stream-processor.js +22 -2
  57. package/template/wall-e/coding/workspace-replay.js +1 -4
  58. package/template/wall-e/coding-orchestrator.js +250 -80
  59. package/template/wall-e/compat.js +0 -28
  60. package/template/wall-e/context/context-builder.js +3 -1
  61. package/template/wall-e/embeddings.js +2 -7
  62. package/template/wall-e/eval/agent-runner.js +30 -9
  63. package/template/wall-e/eval/benchmark-generator.js +21 -1
  64. package/template/wall-e/eval/benchmarks/chat-eval.json +66 -6
  65. package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
  66. package/template/wall-e/eval/cc-replay.js +1 -0
  67. package/template/wall-e/eval/codex-cli-baseline.js +633 -0
  68. package/template/wall-e/eval/debug-agent003.js +1 -0
  69. package/template/wall-e/eval/eval-orchestrator.js +3 -3
  70. package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
  71. package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
  72. package/template/wall-e/eval/run-model-comparison.js +1 -0
  73. package/template/wall-e/eval/swebench-adapter.js +1 -0
  74. package/template/wall-e/evaluation/quorum-evaluator.js +0 -1
  75. package/template/wall-e/extraction/knowledge-extractor.js +1 -2
  76. package/template/wall-e/lib/mcp-integration.js +336 -0
  77. package/template/wall-e/llm/ollama.js +47 -8
  78. package/template/wall-e/llm/ollama.plugin.json +1 -1
  79. package/template/wall-e/llm/tool-adapter.js +1 -0
  80. package/template/wall-e/loops/ingest.js +42 -8
  81. package/template/wall-e/loops/initiative.js +87 -2
  82. package/template/wall-e/mcp-server.js +872 -19
  83. package/template/wall-e/memory/ctm-context-client.js +230 -0
  84. package/template/wall-e/memory/ctm-session-context.js +1376 -0
  85. package/template/wall-e/prompts/coding/memory-protocol.md +6 -0
  86. package/template/wall-e/server.js +30 -1
  87. package/template/wall-e/skills/_bundled/memory-search/SKILL.md +8 -0
  88. package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
  89. package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
  90. package/template/wall-e/skills/_bundled/slack-mentions/run.js +471 -188
  91. package/template/wall-e/skills/skill-planner.js +86 -4
  92. package/template/wall-e/slack/socket-mode-listener.js +276 -0
  93. package/template/wall-e/telemetry.js +70 -2
  94. package/template/wall-e/tools/builtin-middleware.js +55 -2
  95. package/template/wall-e/tools/shell-policy.js +1 -1
  96. package/template/wall-e/tools/slack-owner.js +104 -0
  97. package/template/website/index.html +4 -4
  98. package/template/builder-journal.md +0 -17
@@ -282,6 +282,101 @@ function _providerConfigFromRegistryRow(row) {
282
282
  return config;
283
283
  }
284
284
 
285
+ function _createChatProvider(type, config = {}, opts = {}) {
286
+ if (typeof opts.providerFactory === 'function') return opts.providerFactory(type, config);
287
+ return createClient(type, config);
288
+ }
289
+
290
+ function _providerRuntimeType(row = {}) {
291
+ if (row.type === 'anthropic' && row.auth_method === 'claude_cli') return 'claude-cli';
292
+ if (row.type === 'openai' && row.auth_method === 'codex_cli') return 'codex-cli';
293
+ return row.type;
294
+ }
295
+
296
+ function _providerRuntimeConfig(row = {}) {
297
+ if (row.type === 'anthropic' && row.auth_method === 'oauth_proxy') {
298
+ return {
299
+ apiKey: 'oauth-proxy-placeholder',
300
+ baseUrl: `http://127.0.0.1:${process.env.OAUTH_PROXY_PORT || '3458'}`,
301
+ };
302
+ }
303
+ return _providerConfigFromRegistryRow(row);
304
+ }
305
+
306
+ function _providerRowHasRuntimeAccess(row = {}) {
307
+ if (!row.enabled) return false;
308
+ if (row.type === 'ollama' || row.type === 'mlx') return true;
309
+ if (row.auth_method && row.auth_method !== 'api_key') return true;
310
+ if (row.api_key_encrypted) return true;
311
+ if (row.type === 'anthropic' && process.env.ANTHROPIC_API_KEY) return true;
312
+ if (row.type === 'openai' && process.env.OPENAI_API_KEY) return true;
313
+ if (row.type === 'google' && (process.env.GOOGLE_API_KEY || process.env.GEMINI_API_KEY)) return true;
314
+ if (row.type === 'deepseek' && process.env.DEEPSEEK_API_KEY) return true;
315
+ return false;
316
+ }
317
+
318
+ function _fallbackErrorType(providerError = {}) {
319
+ return new Set(['rate_limited', 'network', 'provider_unavailable', 'timeout']).has(providerError.type);
320
+ }
321
+
322
+ function _selectFallbackModel(providerId, runtimeType, taskType) {
323
+ try {
324
+ const preferred = brain.getModelDefault(taskType || 'chat');
325
+ if (preferred?.model_registry_id) {
326
+ const entry = brain.getModelRegistryEntry?.(preferred.model_registry_id);
327
+ if (entry?.provider_id === providerId && entry.model_id) return entry.model_id;
328
+ }
329
+ } catch {}
330
+ try {
331
+ const models = brain.listModelsByProvider?.(providerId) || [];
332
+ const first = models.find(m => m.enabled !== 0);
333
+ if (first?.model_id) return first.model_id;
334
+ } catch {}
335
+ return getDefaultModelForProvider(runtimeType);
336
+ }
337
+
338
+ function _listChatFallbackRows({ attemptedProviderTypes, providerAvailability }) {
339
+ let rows = [];
340
+ try {
341
+ rows = (brain.listModelProviders?.() || [])
342
+ .map(row => brain.getModelProviderWithKey?.(row.id) || row)
343
+ .filter(row => row && _providerRowHasRuntimeAccess(row));
344
+ } catch {
345
+ rows = [];
346
+ }
347
+ const candidates = [];
348
+ for (const row of rows) {
349
+ const runtimeType = _providerRuntimeType(row);
350
+ if (!runtimeType) continue;
351
+ if (attemptedProviderTypes.has(row.type) || attemptedProviderTypes.has(runtimeType)) continue;
352
+ if (providerAvailability?.isProviderUsable) {
353
+ if (!providerAvailability.isProviderUsable(row.id) || !providerAvailability.isProviderUsable(runtimeType)) continue;
354
+ }
355
+ candidates.push({ row, runtimeType });
356
+ }
357
+ return candidates;
358
+ }
359
+
360
+ function _findChatProviderFallback({ attemptedProviderTypes, providerError, taskType, opts, providerAvailability }) {
361
+ if (opts.provider || opts.model || _clientOverride) return null;
362
+ if (!_fallbackErrorType(providerError)) return null;
363
+ for (const { row, runtimeType } of _listChatFallbackRows({ attemptedProviderTypes, providerAvailability })) {
364
+ const model = resolveCompatibleModel(
365
+ _selectFallbackModel(row.id, runtimeType, taskType),
366
+ runtimeType,
367
+ );
368
+ try {
369
+ return {
370
+ provider: _createChatProvider(runtimeType, _providerRuntimeConfig(row), opts),
371
+ providerId: row.id,
372
+ providerType: runtimeType,
373
+ model,
374
+ };
375
+ } catch {}
376
+ }
377
+ return null;
378
+ }
379
+
285
380
  function resolveModelSelection(model, explicitProvider) {
286
381
  if (!model) {
287
382
  return { input: model, model: null, provider: explicitProvider || null, providerConfig: null, registryId: null };
@@ -547,11 +642,18 @@ async function chat(message, opts = {}) {
547
642
  }
548
643
  }
549
644
 
645
+ let workspaceContextBlock = '';
646
+ if (effectiveCwd) {
647
+ workspaceContextBlock = '\n\n## Current Workspace\n'
648
+ + `Working directory: ${effectiveCwd}\n`
649
+ + 'When the user refers to this project, current project, codebase, repo, or asks to study/review/summarize code without a path, treat that as this working directory. Do not ask for a project path first; explore it with glob, search_files, grep_files, read_file, read_project_file, or search_project. Ask for a path only if this directory is unavailable or the user explicitly asks for a different project.';
650
+ }
651
+
550
652
  const promptStart = Date.now();
551
653
  const baseSystemPrompt = await buildSystemPrompt(message, channel, {
552
654
  sessionSummary: existingSession?.summary || null,
553
655
  intent,
554
- }) + reviewContextBlock + codeReviewContextBlock;
656
+ }) + reviewContextBlock + codeReviewContextBlock + workspaceContextBlock;
555
657
  // Item D (multi-agent deep-dive): opt-in workspace directives override
556
658
  // for the chat surface. Tutorial-style or persona-tweak directives can
557
659
  // live in `wall-e/loops/chat.directives.md` (or
@@ -579,8 +681,8 @@ async function chat(message, opts = {}) {
579
681
  const routeConfig = selectedRoute.providerConfig || {};
580
682
  const providerConfig = { ...routeConfig, ...(opts.providerConfig || {}) };
581
683
  const hasSpecificConfig = !!(providerConfig.apiKey || providerConfig.baseUrl || providerConfig.customHeaders);
582
- if (targetProviderType !== defaultProviderType || opts.provider || hasSpecificConfig) {
583
- provider = createClient(targetProviderType, providerConfig);
684
+ if (opts.providerFactory || targetProviderType !== defaultProviderType || opts.provider || hasSpecificConfig) {
685
+ provider = _createChatProvider(targetProviderType, providerConfig, opts);
584
686
  } else {
585
687
  provider = getDefaultClient();
586
688
  }
@@ -591,7 +693,7 @@ async function chat(message, opts = {}) {
591
693
  try {
592
694
  const localEngine = brain.getDb().prepare("SELECT value FROM brain_metadata WHERE key = 'local_engine'").get()?.value;
593
695
  if (localEngine === 'mlx') {
594
- provider = createClient('mlx', {});
696
+ provider = _createChatProvider('mlx', {}, opts);
595
697
  }
596
698
  } catch (e) {
597
699
  console.warn('[chat] MLX engine swap failed, staying on Ollama:', e.message);
@@ -667,7 +769,7 @@ async function chat(message, opts = {}) {
667
769
  },
668
770
  {
669
771
  name: 'search_memories',
670
- description: 'Hybrid search (BM25 + semantic vectors). Call MULTIPLE searches in ONE turn to batch them. Finds both exact keyword matches AND semantically related content.',
772
+ description: 'Hybrid search (BM25 + semantic vectors) across private/user memory. Use before public web search for remembered context, prior discussions, decisions, preferences, people, projects, tools, or Slack/email/calendar work context. Call MULTIPLE searches in ONE turn to batch them.',
671
773
  input_schema: { type: 'object', properties: { query: { type: 'string', description: 'Search query. Finds keyword matches AND semantically similar content.' }, source: { type: 'string', description: 'Filter: slack, ctm, wall-e-chat' }, limit: { type: 'number', default: 15 } }, required: ['query'] },
672
774
  },
673
775
  {
@@ -900,6 +1002,8 @@ async function chat(message, opts = {}) {
900
1002
  if (effectiveCwd && input && typeof input === 'object') {
901
1003
  if (name === 'run_shell' && !input.cwd) {
902
1004
  input = { ...input, cwd: effectiveCwd };
1005
+ } else if (name === 'search_files' && !input.directory) {
1006
+ input = { ...input, directory: effectiveCwd };
903
1007
  } else if (name === 'glob' && !input.directory) {
904
1008
  input = { ...input, directory: effectiveCwd, projectRoot: effectiveCwd };
905
1009
  } else if (name === 'grep_files' && !input.directory) {
@@ -1480,6 +1584,7 @@ async function chat(message, opts = {}) {
1480
1584
  console.log('[chat] Intent:', intent, '| topics:', queryTopics.join(','), '| limits: turns=', MAX_TURNS, 'tools=', MAX_TOOL_CALLS, 'timeout=', MESSAGE_TIMEOUT_MS, 'ms');
1481
1585
 
1482
1586
  const chatStart = Date.now();
1587
+ const attemptedProviderTypes = new Set([targetProviderType, provider.type].filter(Boolean));
1483
1588
  for (let turn = 0; turn < MAX_TURNS; turn++) {
1484
1589
  if (opts.abortSignal?.aborted) {
1485
1590
  clearTimeout(timeout);
@@ -1552,15 +1657,19 @@ async function chat(message, opts = {}) {
1552
1657
  }
1553
1658
 
1554
1659
  resetTurnTimeout();
1660
+ const { withRetry } = require('./llm/retry');
1661
+ const toolsForTurn = (() => {
1662
+ if (codeReviewFastPath) return [];
1663
+ let tools = opts.allowedTools ? chatTools.filter(t => opts.allowedTools.includes(t.name)) : filterToolsForIntent(chatTools, intent);
1664
+ if (channel === 'task' || channel === 'eval') tools = tools.filter(t => !TASK_CHANNEL_EXCLUDE.has(t.name));
1665
+ return tools;
1666
+ })();
1667
+ const primaryRetries = (!opts.provider && !opts.model && !_clientOverride
1668
+ && _listChatFallbackRows({ attemptedProviderTypes, providerAvailability }).length > 0)
1669
+ ? 0
1670
+ : 2;
1555
1671
  let response;
1556
1672
  try {
1557
- const { withRetry } = require('./llm/retry');
1558
- const toolsForTurn = (() => {
1559
- if (codeReviewFastPath) return [];
1560
- let tools = opts.allowedTools ? chatTools.filter(t => opts.allowedTools.includes(t.name)) : filterToolsForIntent(chatTools, intent);
1561
- if (channel === 'task' || channel === 'eval') tools = tools.filter(t => !TASK_CHANNEL_EXCLUDE.has(t.name));
1562
- return tools;
1563
- })();
1564
1673
  response = await withRetry(() => provider.chat({
1565
1674
  model: selectedModel,
1566
1675
  maxTokens: codeReviewFastPath ? 2048 : 4096,
@@ -1569,7 +1678,7 @@ async function chat(message, opts = {}) {
1569
1678
  tools: toolsForTurn,
1570
1679
  ...reasoningOptions,
1571
1680
  signal: controller.signal,
1572
- }), 2); // max 2 retries for chat (faster failure than coding agent)
1681
+ }), primaryRetries); // Fast-fail automatic routing when another provider can handle fallback.
1573
1682
  } catch (llmErr) {
1574
1683
  // Track provider health — failed LLM call
1575
1684
  try {
@@ -1581,8 +1690,56 @@ async function chat(message, opts = {}) {
1581
1690
  provider: usedProvider || targetProviderType || getDefaultProviderType(),
1582
1691
  model: selectedModel,
1583
1692
  });
1584
- recordProviderFailureAlert(decorated.providerError, brain);
1585
- throw decorated;
1693
+ const fallback = _findChatProviderFallback({
1694
+ attemptedProviderTypes,
1695
+ providerError: decorated.providerError,
1696
+ taskType: opts.taskType || 'chat',
1697
+ opts,
1698
+ providerAvailability,
1699
+ });
1700
+ if (fallback) {
1701
+ try {
1702
+ provider = fallback.provider;
1703
+ selectedModel = fallback.model;
1704
+ usedProvider = fallback.provider.type || fallback.providerType;
1705
+ attemptedProviderTypes.add(fallback.providerType);
1706
+ attemptedProviderTypes.add(fallback.provider?.type);
1707
+ try {
1708
+ _telemetry.track('chat_provider_failover', {
1709
+ from_provider: decorated.providerError.provider || targetProviderType,
1710
+ from_model: decorated.providerError.model || selectedModel,
1711
+ to_provider: fallback.providerType,
1712
+ to_model: fallback.model,
1713
+ reason: decorated.providerError.type,
1714
+ });
1715
+ } catch {}
1716
+ response = await withRetry(() => provider.chat({
1717
+ model: selectedModel,
1718
+ maxTokens: codeReviewFastPath ? 2048 : 4096,
1719
+ system: systemPrompt,
1720
+ messages,
1721
+ tools: toolsForTurn,
1722
+ ...reasoningOptions,
1723
+ signal: controller.signal,
1724
+ }), 2);
1725
+ try {
1726
+ if (fallback.providerId) providerAvailability.recordSuccess(fallback.providerId);
1727
+ } catch {}
1728
+ } catch (fallbackErr) {
1729
+ try {
1730
+ if (fallback.providerId) providerAvailability.recordFailure(fallback.providerId, fallbackErr.message);
1731
+ } catch {}
1732
+ const fallbackDecorated = decorateProviderError(fallbackErr, {
1733
+ provider: fallback.providerType,
1734
+ model: fallback.model,
1735
+ });
1736
+ recordProviderFailureAlert(fallbackDecorated.providerError, brain);
1737
+ throw fallbackDecorated;
1738
+ }
1739
+ } else {
1740
+ recordProviderFailureAlert(decorated.providerError, brain);
1741
+ throw decorated;
1742
+ }
1586
1743
  }
1587
1744
  const modelElapsed = Date.now() - turnStart;
1588
1745
  timings.modelMs += modelElapsed;
@@ -97,6 +97,8 @@ class CompactionService {
97
97
  sessionMemory = null,
98
98
  tailTokenBudget,
99
99
  keepRecentUserTurns,
100
+ tailMode = 'preserve',
101
+ continuePrompt = '',
100
102
  } = {}) {
101
103
  if (sessionMemory && typeof sessionMemory.precompact === 'function') {
102
104
  await sessionMemory.precompact({ messages, sessionId, cwd, reason });
@@ -104,7 +106,11 @@ class CompactionService {
104
106
 
105
107
  const selection = this.selectTail(messages, { tailTokenBudget, keepRecentUserTurns });
106
108
  const tokensBefore = estimateMessagesTokens(messages);
107
- if (selection.head.length === 0) {
109
+ const summarizeAndContinue = tailMode === 'continue';
110
+ const summaryMessages = summarizeAndContinue
111
+ ? cloneMessages(messages)
112
+ : selection.head;
113
+ if (summaryMessages.length === 0) {
108
114
  const noCompaction = {
109
115
  compacted: false,
110
116
  reason: 'no_compactable_head',
@@ -117,7 +123,8 @@ class CompactionService {
117
123
  }
118
124
 
119
125
  const compactionId = newId('compaction');
120
- const summary = await this._summarize(selection.head, selection);
126
+ const summarySelection = { ...selection, head: summaryMessages };
127
+ const summary = await this._summarize(summaryMessages, summarySelection);
121
128
  const timestamp = this.now();
122
129
  const userMessage = {
123
130
  role: 'user',
@@ -127,7 +134,13 @@ class CompactionService {
127
134
  role: 'assistant',
128
135
  content: `Compaction summary:\n${summary}`,
129
136
  };
130
- const compactedMessages = [userMessage, assistantMessage, ...selection.tail];
137
+ const continuationMessage = {
138
+ role: 'user',
139
+ content: continuePrompt || 'Continue the Wall-E coding task from the compaction summary above. Re-read files or rerun commands if any exact detail is missing.',
140
+ };
141
+ const compactedMessages = summarizeAndContinue
142
+ ? [userMessage, assistantMessage, continuationMessage]
143
+ : [userMessage, assistantMessage, ...selection.tail];
131
144
  const tokensAfter = estimateMessagesTokens(compactedMessages);
132
145
  const metadata = {
133
146
  compactionId,
@@ -140,8 +153,9 @@ class CompactionService {
140
153
  tail_start_id: selection.tailStartId,
141
154
  tail_start_index: selection.tailStartIndex,
142
155
  tail_message_ids: selection.tail.map((msg, index) => getMessageId(msg, selection.tailStartIndex + index)),
143
- compacted_message_count: selection.head.length,
144
- retained_message_count: selection.tail.length,
156
+ tail_mode: summarizeAndContinue ? 'continue' : 'preserve',
157
+ compacted_message_count: summaryMessages.length,
158
+ retained_message_count: summarizeAndContinue ? 1 : selection.tail.length,
145
159
  tokens_before: tokensBefore,
146
160
  tokens_after: tokensAfter,
147
161
  summary,
@@ -84,6 +84,9 @@ class StreamProcessor extends EventEmitter {
84
84
  stopReason: '',
85
85
  status: 'running',
86
86
  errors: [],
87
+ toolErrors: [],
88
+ hadEdit: false,
89
+ verified: false,
87
90
  events: [],
88
91
  };
89
92
 
@@ -111,7 +114,7 @@ class StreamProcessor extends EventEmitter {
111
114
  const snapshot = await this.snapshotService.captureStepFinish({ sessionId, cwd, messageId: assistantMessageId });
112
115
  if (snapshot) await this._record(sessionId, cwd, 'snapshot', snapshot);
113
116
  }
114
- state.status = state.errors.length > 0 ? 'error' : 'finished';
117
+ state.status = 'finished';
115
118
  } catch (err) {
116
119
  state.status = 'error';
117
120
  state.errors.push(err.message);
@@ -135,6 +138,8 @@ class StreamProcessor extends EventEmitter {
135
138
  toolCalls: state.toolCalls,
136
139
  }),
137
140
  toolResultMessage: state.toolResults.length > 0 ? toolResultMessage(state.toolResults) : null,
141
+ hadEdit: state.hadEdit,
142
+ verified: state.verified,
138
143
  next: state.status === 'error' ? 'stop' : state.toolResults.length > 0 ? 'continue' : 'stop',
139
144
  };
140
145
  }
@@ -223,6 +228,8 @@ class StreamProcessor extends EventEmitter {
223
228
  input: call.input,
224
229
  });
225
230
  const result = await this.toolExecutor(call, { sessionId, cwd, model: state.model, provider: state.provider });
231
+ if (isEditTool(call.name) && !result?.error) state.hadEdit = true;
232
+ if (isSuccessfulTestCommand(call, result)) state.verified = true;
226
233
  state.toolResults.push({ toolCallId: call.id, name: call.name, result });
227
234
  await this._record(sessionId, cwd, 'tool', {
228
235
  state: 'completed',
@@ -231,7 +238,7 @@ class StreamProcessor extends EventEmitter {
231
238
  result,
232
239
  });
233
240
  } catch (err) {
234
- state.errors.push(err.message);
241
+ state.toolErrors.push(err.message);
235
242
  state.toolResults.push({ toolCallId: call.id, name: call.name, error: err.message });
236
243
  await this._record(sessionId, cwd, 'tool', {
237
244
  state: 'error',
@@ -262,7 +269,20 @@ class StreamProcessor extends EventEmitter {
262
269
  }
263
270
  }
264
271
 
272
+ function isEditTool(name) {
273
+ return ['edit_file', 'write_file', 'apply_patch', 'multi_edit'].includes(name);
274
+ }
275
+
276
+ function isSuccessfulTestCommand(call, result) {
277
+ if (call?.name !== 'run_shell') return false;
278
+ const command = String(call.input?.command || '');
279
+ if (!/\b(test|spec|jest|mocha|pytest|npm\s+test|node\s+test\.js)\b/i.test(command)) return false;
280
+ if (result?.error || result?.exitCode) return false;
281
+ return true;
282
+ }
283
+
265
284
  module.exports = {
266
285
  StreamProcessor,
267
286
  streamFromChat,
287
+ isSuccessfulTestCommand,
268
288
  };
@@ -153,9 +153,7 @@ function defaultSessionsDir(env = process.env) {
153
153
  if (env.WALL_E_DATA_DIR && env.WALL_E_DATA_DIR !== path.join(os.homedir(), '.walle', 'data')) {
154
154
  return path.join(env.WALL_E_DATA_DIR, 'sessions');
155
155
  }
156
- if (env.CTM_DATA_DIR && env.CTM_DATA_DIR !== path.join(os.homedir(), '.walle', 'data')) {
157
- return path.join(env.CTM_DATA_DIR, 'sessions');
158
- }
156
+ // CTM_DATA_DIR belongs to the task manager; Wall-E transcripts stay in Wall-E-owned storage.
159
157
  return path.join(os.homedir(), '.walle', 'sessions');
160
158
  }
161
159
 
@@ -180,7 +178,6 @@ function findSessionPath(sessionId, { transcriptPath = '', sessionsDir = default
180
178
  process.env.WALL_E_SESSIONS_DIR,
181
179
  process.env.WALLE_DEV_DIR ? path.join(process.env.WALLE_DEV_DIR, 'sessions') : '',
182
180
  process.env.WALL_E_DATA_DIR ? path.join(process.env.WALL_E_DATA_DIR, 'sessions') : '',
183
- process.env.CTM_DATA_DIR ? path.join(process.env.CTM_DATA_DIR, 'sessions') : '',
184
181
  path.join(os.homedir(), '.walle', 'sessions'),
185
182
  ].filter(Boolean);
186
183
  for (const root of [...new Set(roots)]) {