create-walle 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/bin/create-walle.js +195 -30
- package/bin/mcp-inject.js +18 -53
- package/package.json +3 -1
- package/template/claude-task-manager/approval-agent.js +7 -0
- package/template/claude-task-manager/docs/session-standup-command-center-design.md +242 -0
- package/template/claude-task-manager/git-utils.js +111 -3
- package/template/claude-task-manager/lib/session-history.js +144 -16
- package/template/claude-task-manager/lib/session-standup.js +409 -0
- package/template/claude-task-manager/lib/standup-attention.js +200 -0
- package/template/claude-task-manager/lib/status-hooks.js +8 -2
- package/template/claude-task-manager/lib/update-telemetry.js +114 -0
- package/template/claude-task-manager/lib/walle-default-model.js +55 -0
- package/template/claude-task-manager/lib/walle-mcp-auto-config.js +62 -0
- package/template/claude-task-manager/lib/walle-supervisor.js +83 -19
- package/template/claude-task-manager/lib/worktree-cwd.js +82 -0
- package/template/claude-task-manager/providers/codex-mcp.js +104 -0
- package/template/claude-task-manager/providers/index.js +2 -0
- package/template/claude-task-manager/public/css/setup.css +2 -1
- package/template/claude-task-manager/public/css/walle.css +5 -0
- package/template/claude-task-manager/public/index.html +1596 -283
- package/template/claude-task-manager/public/js/session-search-utils.js +171 -1
- package/template/claude-task-manager/public/js/setup.js +62 -19
- package/template/claude-task-manager/public/js/stream-view.js +55 -6
- package/template/claude-task-manager/public/js/walle-session.js +73 -16
- package/template/claude-task-manager/public/js/walle.js +34 -2
- package/template/claude-task-manager/server.js +780 -177
- package/template/claude-task-manager/session-integrity.js +58 -15
- package/template/claude-task-manager/workers/approval-widget-validator.js +15 -5
- package/template/claude-task-manager/workers/state-detectors/codex.js +6 -0
- package/template/package.json +1 -1
- package/template/wall-e/agent.js +36 -7
- package/template/wall-e/api-walle.js +72 -20
- package/template/wall-e/coding/stream-processor.js +22 -2
- package/template/wall-e/coding-orchestrator.js +26 -6
- package/template/wall-e/eval/agent-runner.js +16 -4
- package/template/wall-e/eval/benchmark-generator.js +21 -1
- package/template/wall-e/eval/benchmarks/coding-agent.json +0 -596
- package/template/wall-e/eval/codex-cli-baseline.js +633 -0
- package/template/wall-e/eval/eval-orchestrator.js +3 -3
- package/template/wall-e/eval/run-agent-benchmarks.js +11 -3
- package/template/wall-e/eval/run-codex-cli-baseline.js +177 -0
- package/template/wall-e/lib/mcp-integration.js +220 -0
- package/template/wall-e/llm/ollama.js +47 -8
- package/template/wall-e/llm/ollama.plugin.json +1 -1
- package/template/wall-e/llm/tool-adapter.js +1 -0
- package/template/wall-e/loops/ingest.js +42 -8
- package/template/wall-e/mcp-server.js +272 -10
- package/template/wall-e/memory/ctm-session-context.js +910 -0
- package/template/wall-e/server.js +26 -1
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/SKILL.md +20 -0
- package/template/wall-e/skills/_bundled/scan-ctm-sessions/run.js +43 -0
- package/template/wall-e/skills/skill-planner.js +52 -3
- package/template/wall-e/tools/builtin-middleware.js +55 -2
- package/template/wall-e/tools/shell-policy.js +1 -1
- package/template/wall-e/tools/slack-owner.js +104 -0
- package/template/website/index.html +2 -2
- package/template/builder-journal.md +0 -17
|
@@ -15,11 +15,60 @@
|
|
|
15
15
|
const fs = require('fs');
|
|
16
16
|
const path = require('path');
|
|
17
17
|
const claudeDesktopSessions = require('./lib/claude-desktop-sessions');
|
|
18
|
+
const { codexRolloutIdFromPath, findCodexSessionFiles } = require('./lib/session-history');
|
|
18
19
|
|
|
19
20
|
const CLAUDE_PROJECTS_DIR = path.join(process.env.HOME, '.claude', 'projects');
|
|
20
21
|
|
|
21
22
|
// --- Detection ---
|
|
22
23
|
|
|
24
|
+
function sessionFileIdFromPath(filePath) {
|
|
25
|
+
const virtual = claudeDesktopSessions.parseVirtualSessionPath(filePath);
|
|
26
|
+
if (virtual) return virtual.sessionId;
|
|
27
|
+
const codexId = codexRolloutIdFromPath(filePath);
|
|
28
|
+
if (codexId) return codexId;
|
|
29
|
+
const base = path.basename(filePath).replace(/\.jsonl(\.bak)?$/, '');
|
|
30
|
+
const uuid = base.match(/([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i);
|
|
31
|
+
return uuid ? uuid[1].toLowerCase() : base;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function fileEntryFromPath(filePath, expectedFileId, projectEntry = '') {
|
|
35
|
+
if (!filePath) return null;
|
|
36
|
+
const actualFileId = sessionFileIdFromPath(filePath);
|
|
37
|
+
if (expectedFileId && actualFileId !== expectedFileId) return null;
|
|
38
|
+
try {
|
|
39
|
+
const sourcePath = claudeDesktopSessions.sourcePathForStat(filePath);
|
|
40
|
+
const stat = fs.statSync(sourcePath);
|
|
41
|
+
if (!stat.isFile()) return null;
|
|
42
|
+
return { filePath, stat, projectEntry };
|
|
43
|
+
} catch {
|
|
44
|
+
return null;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function addFileIndexEntry(fileIndex, filePath, projectEntry) {
|
|
49
|
+
const file = fileEntryFromPath(filePath, null, projectEntry);
|
|
50
|
+
if (file) fileIndex[sessionFileIdFromPath(filePath)] = file;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function resolveDbSessionFile(row, expectedFileId, fileIndex) {
|
|
54
|
+
if (!expectedFileId) return null;
|
|
55
|
+
if (fileIndex[expectedFileId]) return fileIndex[expectedFileId];
|
|
56
|
+
|
|
57
|
+
const stored = fileEntryFromPath(row?.jsonl_path, expectedFileId);
|
|
58
|
+
if (stored) return stored;
|
|
59
|
+
|
|
60
|
+
if ((row?.provider === 'codex') || String(row?.jsonl_path || '').includes(`${path.sep}.codex${path.sep}sessions${path.sep}`)) {
|
|
61
|
+
try {
|
|
62
|
+
for (const filePath of findCodexSessionFiles(expectedFileId)) {
|
|
63
|
+
const file = fileEntryFromPath(filePath, expectedFileId);
|
|
64
|
+
if (file) return file;
|
|
65
|
+
}
|
|
66
|
+
} catch {}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
|
|
23
72
|
function dbTimestampFromIso(value) {
|
|
24
73
|
if (!value) return '';
|
|
25
74
|
const ms = new Date(value).getTime();
|
|
@@ -138,7 +187,7 @@ function detectMismatches(db, getAllSessionFiles) {
|
|
|
138
187
|
} catch {}
|
|
139
188
|
const slugCol = hasSlugColumn ? 'a.slug' : "'' AS slug";
|
|
140
189
|
allSessions = db.prepare(`
|
|
141
|
-
SELECT c.id, c.title, c.user_renamed, c.starred, c.project_path, c.cwd,
|
|
190
|
+
SELECT c.id, c.provider, c.title, c.user_renamed, c.starred, c.project_path, c.cwd,
|
|
142
191
|
c.created_at, c.updated_at,
|
|
143
192
|
a.agent_session_id, a.jsonl_path, a.file_size, a.first_message,
|
|
144
193
|
a.modified_at, a.hostname, a.model, a.git_branch, a.user_msg_count,
|
|
@@ -156,12 +205,7 @@ function detectMismatches(db, getAllSessionFiles) {
|
|
|
156
205
|
const fileIndex = {}; // uuid -> { filePath, stat, projectEntry }
|
|
157
206
|
try {
|
|
158
207
|
for (const { filePath, projectEntry } of getAllSessionFiles()) {
|
|
159
|
-
|
|
160
|
-
const uuid = virtual ? virtual.sessionId : path.basename(filePath).replace(/\.jsonl(\.bak)?$/, '');
|
|
161
|
-
try {
|
|
162
|
-
const stat = fs.statSync(claudeDesktopSessions.sourcePathForStat(filePath));
|
|
163
|
-
fileIndex[uuid] = { filePath, stat, projectEntry };
|
|
164
|
-
} catch {}
|
|
208
|
+
addFileIndexEntry(fileIndex, filePath, projectEntry);
|
|
165
209
|
}
|
|
166
210
|
} catch (e) {
|
|
167
211
|
issues.push({ type: 'scan_error', severity: 'warning', sessionId: null,
|
|
@@ -183,7 +227,7 @@ function detectMismatches(db, getAllSessionFiles) {
|
|
|
183
227
|
|
|
184
228
|
// Skip DB-only rows with no file expectation (legacy tabs with no agent_session_id)
|
|
185
229
|
const expectedFileId = (agentId && agentId !== sid) ? agentId : sid;
|
|
186
|
-
const file = fileIndex
|
|
230
|
+
const file = resolveDbSessionFile(row, expectedFileId, fileIndex);
|
|
187
231
|
|
|
188
232
|
// Check 1: Missing file
|
|
189
233
|
if (!file && row.file_size > 0) {
|
|
@@ -193,6 +237,7 @@ function detectMismatches(db, getAllSessionFiles) {
|
|
|
193
237
|
expected_file_id: expectedFileId,
|
|
194
238
|
db_file_size: row.file_size,
|
|
195
239
|
db_jsonl_path: row.jsonl_path || '',
|
|
240
|
+
db_provider: row.provider || '',
|
|
196
241
|
db_title: row.title || '',
|
|
197
242
|
},
|
|
198
243
|
suggestion: 'File may have been deleted or moved. Check .jsonl.bak variant.',
|
|
@@ -215,6 +260,7 @@ function detectMismatches(db, getAllSessionFiles) {
|
|
|
215
260
|
db_file_size: row.file_size,
|
|
216
261
|
actual_file_size: file.stat.size,
|
|
217
262
|
size_diff: sizeDiff,
|
|
263
|
+
db_jsonl_path: row.jsonl_path || '',
|
|
218
264
|
},
|
|
219
265
|
suggestion: 'DB metadata is stale — will be refreshed on next session list load.',
|
|
220
266
|
});
|
|
@@ -439,12 +485,7 @@ function recoverMismatches(db, issues, getAllSessionFiles) {
|
|
|
439
485
|
const fileIndex = {};
|
|
440
486
|
try {
|
|
441
487
|
for (const { filePath, projectEntry } of getAllSessionFiles()) {
|
|
442
|
-
|
|
443
|
-
const uuid = virtual ? virtual.sessionId : path.basename(filePath).replace(/\.jsonl(\.bak)?$/, '');
|
|
444
|
-
try {
|
|
445
|
-
const stat = fs.statSync(claudeDesktopSessions.sourcePathForStat(filePath));
|
|
446
|
-
fileIndex[uuid] = { filePath, stat, projectEntry };
|
|
447
|
-
} catch {}
|
|
488
|
+
addFileIndexEntry(fileIndex, filePath, projectEntry);
|
|
448
489
|
}
|
|
449
490
|
} catch {}
|
|
450
491
|
|
|
@@ -505,7 +546,9 @@ function recoverMismatches(db, issues, getAllSessionFiles) {
|
|
|
505
546
|
case 'stale_metadata': {
|
|
506
547
|
// Refresh metadata from actual file
|
|
507
548
|
const fileId = issue.details.file_id;
|
|
508
|
-
const file = fileIndex[fileId]
|
|
549
|
+
const file = fileIndex[fileId]
|
|
550
|
+
|| fileEntryFromPath(issue.details?.db_jsonl_path, fileId)
|
|
551
|
+
|| resolveDbSessionFile({ provider: 'codex', jsonl_path: issue.details?.db_jsonl_path || '' }, fileId, fileIndex);
|
|
509
552
|
if (!file) { result.skipped++; break; }
|
|
510
553
|
try {
|
|
511
554
|
db.prepare('UPDATE agent_sessions SET file_size = ?, modified_at = ?, updated_at = datetime(\'now\') WHERE ctm_session_id = ?')
|
|
@@ -28,12 +28,12 @@ const ABOVE_ANCHOR_DEPTH = 40;
|
|
|
28
28
|
// Claude Code: "Esc to cancel". Codex: "Press enter to confirm or esc to cancel".
|
|
29
29
|
const ANCHOR_RE = /Esc to cancel|esc to cancel|Press enter to confirm/;
|
|
30
30
|
|
|
31
|
-
//
|
|
31
|
+
// Approval-option pattern. Accepts an optional selection-arrow prefix in any of
|
|
32
32
|
// the forms different CLIs use: ❯ (Claude Code), ›/▶/▸ (Cursor/others), or
|
|
33
33
|
// plain ASCII > (Codex). Without this, Codex's "> 1. Yes, proceed (y)" would
|
|
34
34
|
// be skipped over and the validator would lock onto option 2 ("2. Yes, ...")
|
|
35
35
|
// — which is unstyled in Codex's renderer and trips no-widget-formatting.
|
|
36
|
-
const YES_RE = /^\s*(?:[❯›▶▸>]\s*)?\d+\.\s*Yes\b/i;
|
|
36
|
+
const YES_RE = /^\s*(?:[❯›▶▸>]\s*)?\d+\.\s*(?:Yes|Allow)\b/i;
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
39
|
* Check if the terminal is currently displaying an active approval widget.
|
|
@@ -142,9 +142,10 @@ function _hasWidgetFormatting(buf, yesRow, totalRows) {
|
|
|
142
142
|
const yesText = yesLine.translateToString(true);
|
|
143
143
|
if (/[❯›▶▸]/.test(yesText)) return true;
|
|
144
144
|
|
|
145
|
-
// Check for
|
|
146
|
-
|
|
147
|
-
|
|
145
|
+
// Check for a selection marker near the approval options. Codex MCP forms can
|
|
146
|
+
// select option 2 ("Allow for this session"), while option 1 is the first
|
|
147
|
+
// approval-shaped line used for anchoring.
|
|
148
|
+
for (let row = Math.max(0, yesRow - 1); row < Math.min(totalRows, yesRow + 8); row++) {
|
|
148
149
|
const line = buf.getLine(buf.viewportY + row);
|
|
149
150
|
if (!line) continue;
|
|
150
151
|
const text = line.translateToString(true);
|
|
@@ -152,6 +153,15 @@ function _hasWidgetFormatting(buf, yesRow, totalRows) {
|
|
|
152
153
|
if (/[❯›▶▸]/.test(text)) return true;
|
|
153
154
|
}
|
|
154
155
|
|
|
156
|
+
// Check for "❯" marker anywhere in bottom 5 rows for prompts whose option
|
|
157
|
+
// block is pushed down by long wrapped content.
|
|
158
|
+
for (let row = Math.max(0, totalRows - 5); row < totalRows; row++) {
|
|
159
|
+
const line = buf.getLine(buf.viewportY + row);
|
|
160
|
+
if (!line) continue;
|
|
161
|
+
const text = line.translateToString(true);
|
|
162
|
+
if (/[❯›▶▸]/.test(text)) return true;
|
|
163
|
+
}
|
|
164
|
+
|
|
155
165
|
// Check for ANSI foreground color on the Yes-option line.
|
|
156
166
|
// xterm's BufferLine.getCell(x) returns an IBufferCell with .getFgColor()
|
|
157
167
|
// (0 = default). Any non-default fg color = styled = widget.
|
|
@@ -47,6 +47,12 @@ function isCodexStatusRedraw(data) {
|
|
|
47
47
|
module.exports = {
|
|
48
48
|
...baseDetector,
|
|
49
49
|
id: 'codex',
|
|
50
|
+
// Codex's ratatui status frames arrive in bursts. A short Claude-style
|
|
51
|
+
// debounce lets the sidebar bounce between Running and Waiting/Idle while the
|
|
52
|
+
// terminal still says "Working". Keep the busy state stable long enough for
|
|
53
|
+
// multiple server heartbeats to confirm or renew it, while explicit
|
|
54
|
+
// approval/choice prompts still bypass this elsewhere.
|
|
55
|
+
idleDebounceMs: 15000,
|
|
50
56
|
|
|
51
57
|
isActiveChunk(data) {
|
|
52
58
|
if (!baseDetector.isActiveChunk(data)) return false;
|
package/template/package.json
CHANGED
package/template/wall-e/agent.js
CHANGED
|
@@ -109,7 +109,8 @@ function bootstrapSkills() {
|
|
|
109
109
|
description: 'Scan Claude Code session files for new conversations',
|
|
110
110
|
trigger_type: 'interval',
|
|
111
111
|
trigger_config: JSON.stringify({ interval_ms: 60000 }),
|
|
112
|
-
prompt_template: '
|
|
112
|
+
prompt_template: 'INTERNAL_SKILL:scan-ctm-sessions',
|
|
113
|
+
execution: 'script',
|
|
113
114
|
});
|
|
114
115
|
|
|
115
116
|
brain.insertSkill({
|
|
@@ -140,16 +141,43 @@ function bootstrapSkills() {
|
|
|
140
141
|
function syncBundledSkills() {
|
|
141
142
|
const filesystemSkills = loadAllSkills();
|
|
142
143
|
const dbSkills = brain.listSkills({});
|
|
143
|
-
const
|
|
144
|
+
const dbByName = new Map(dbSkills.map(s => [s.name, s]));
|
|
145
|
+
const dbNames = new Set(dbByName.keys());
|
|
144
146
|
|
|
145
147
|
let added = 0;
|
|
148
|
+
let updated = 0;
|
|
146
149
|
for (const skill of filesystemSkills) {
|
|
147
|
-
if (dbNames.has(skill.name)) continue;
|
|
148
|
-
|
|
149
150
|
const triggerType = (skill.trigger && skill.trigger.type) || skill.execution || 'manual';
|
|
150
151
|
const triggerConfig = skill.trigger && skill.trigger.interval_ms
|
|
151
152
|
? JSON.stringify({ interval_ms: skill.trigger.interval_ms })
|
|
152
153
|
: null;
|
|
154
|
+
const promptTemplate = skill.execution === 'script'
|
|
155
|
+
? `INTERNAL_SKILL:${skill.name}`
|
|
156
|
+
: skill.instructions || '';
|
|
157
|
+
|
|
158
|
+
if (dbNames.has(skill.name)) {
|
|
159
|
+
const existing = dbByName.get(skill.name);
|
|
160
|
+
// Upgrade legacy prompt-based CTM scanning to the deterministic script
|
|
161
|
+
// path. Session continuity must not depend on live LLM/network access.
|
|
162
|
+
if (skill.name === 'scan-ctm-sessions' && skill.execution === 'script' && existing) {
|
|
163
|
+
const updates = {};
|
|
164
|
+
if (existing.execution !== 'script') updates.execution = 'script';
|
|
165
|
+
if (existing.prompt_template !== promptTemplate) updates.prompt_template = promptTemplate;
|
|
166
|
+
if (existing.trigger_type !== triggerType) updates.trigger_type = triggerType;
|
|
167
|
+
if (triggerConfig && existing.trigger_config !== triggerConfig) updates.trigger_config = triggerConfig;
|
|
168
|
+
if (existing.auto_disabled_at) {
|
|
169
|
+
updates.enabled = 1;
|
|
170
|
+
updates.auto_disabled_at = null;
|
|
171
|
+
}
|
|
172
|
+
if (existing.auto_disabled_reason) updates.auto_disabled_reason = null;
|
|
173
|
+
if (Object.keys(updates).length > 0) {
|
|
174
|
+
brain.updateSkill(existing.id, updates);
|
|
175
|
+
updated++;
|
|
176
|
+
console.log(`[wall-e] Updated bundled skill: ${skill.name}`);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
continue;
|
|
180
|
+
}
|
|
153
181
|
|
|
154
182
|
brain.insertSkill({
|
|
155
183
|
name: skill.name,
|
|
@@ -159,9 +187,7 @@ function syncBundledSkills() {
|
|
|
159
187
|
// Persist the legacy `INTERNAL_SKILL:` marker so downgrades can still
|
|
160
188
|
// dispatch script skills via the prompt_template fallback. The schema
|
|
161
189
|
// column `execution` is the authoritative source going forward.
|
|
162
|
-
prompt_template:
|
|
163
|
-
? `INTERNAL_SKILL:${skill.name}`
|
|
164
|
-
: skill.instructions || '',
|
|
190
|
+
prompt_template: promptTemplate,
|
|
165
191
|
execution: skill.execution === 'script' ? 'script' : 'agent',
|
|
166
192
|
});
|
|
167
193
|
added++;
|
|
@@ -171,6 +197,9 @@ function syncBundledSkills() {
|
|
|
171
197
|
if (added > 0) {
|
|
172
198
|
console.log(`[wall-e] Synced ${added} new bundled skill(s) to DB`);
|
|
173
199
|
}
|
|
200
|
+
if (updated > 0) {
|
|
201
|
+
console.log(`[wall-e] Updated ${updated} bundled skill(s) in DB`);
|
|
202
|
+
}
|
|
174
203
|
}
|
|
175
204
|
|
|
176
205
|
function bootstrapTasks() {
|
|
@@ -364,25 +364,78 @@ function handleWalleApi(req, res, url) {
|
|
|
364
364
|
if (p === '/api/wall-e/slack/status' && m === 'GET') {
|
|
365
365
|
try {
|
|
366
366
|
const slackMcp = require('./tools/slack-mcp');
|
|
367
|
+
const { getSlackOwnerRepairState } = require('./tools/slack-owner');
|
|
367
368
|
const token = slackMcp.loadToken();
|
|
368
|
-
|
|
369
|
+
const owner = getSlackOwnerRepairState();
|
|
370
|
+
jsonResponse(res, {
|
|
371
|
+
data: {
|
|
372
|
+
authenticated: !!token?.access_token,
|
|
373
|
+
team: token?.team_name,
|
|
374
|
+
user: token?.user_id,
|
|
375
|
+
obtained_at: token?.obtained_at,
|
|
376
|
+
owner_configured: owner.configured,
|
|
377
|
+
owner_can_repair: owner.canRepair,
|
|
378
|
+
},
|
|
379
|
+
});
|
|
369
380
|
} catch (e) {
|
|
370
381
|
jsonResponse(res, { data: { authenticated: false } });
|
|
371
382
|
}
|
|
372
383
|
return true;
|
|
373
384
|
}
|
|
374
385
|
|
|
386
|
+
// POST /api/wall-e/slack/repair-owner — derive Slack owner id from OAuth token
|
|
387
|
+
if (p === '/api/wall-e/slack/repair-owner' && m === 'POST') {
|
|
388
|
+
try {
|
|
389
|
+
const { repairSlackOwnerIdentity } = require('./tools/slack-owner');
|
|
390
|
+
const { clearServiceAlerts } = require('./skills/skill-planner');
|
|
391
|
+
const result = repairSlackOwnerIdentity({ persist: true });
|
|
392
|
+
if (!result.ok) {
|
|
393
|
+
return jsonResponse(res, {
|
|
394
|
+
ok: false,
|
|
395
|
+
error: result.error || 'Could not repair Slack owner identity',
|
|
396
|
+
needsSlackAuth: !!result.needsSlackAuth,
|
|
397
|
+
}, result.needsSlackAuth ? 409 : 500), true;
|
|
398
|
+
}
|
|
399
|
+
clearServiceAlerts('slack');
|
|
400
|
+
return jsonResponse(res, {
|
|
401
|
+
ok: true,
|
|
402
|
+
user_id_configured: true,
|
|
403
|
+
source: result.source,
|
|
404
|
+
persisted: !!result.persisted,
|
|
405
|
+
already_configured: !!result.alreadyConfigured,
|
|
406
|
+
}), true;
|
|
407
|
+
} catch (e) {
|
|
408
|
+
return jsonResponse(res, { ok: false, error: e.message }, 500), true;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
375
412
|
// POST /api/wall-e/slack/auth — start OAuth flow (opens browser)
|
|
376
413
|
if (p === '/api/wall-e/slack/auth' && m === 'POST') {
|
|
377
414
|
try {
|
|
378
415
|
const slackMcp = require('./tools/slack-mcp');
|
|
379
416
|
// If already authenticated, return immediately
|
|
380
417
|
if (slackMcp.isAuthenticatedSync()) {
|
|
418
|
+
try {
|
|
419
|
+
const { repairSlackOwnerIdentity } = require('./tools/slack-owner');
|
|
420
|
+
const { clearServiceAlerts } = require('./skills/skill-planner');
|
|
421
|
+
const repaired = repairSlackOwnerIdentity({ persist: true });
|
|
422
|
+
if (repaired.ok) clearServiceAlerts('slack');
|
|
423
|
+
} catch (repairErr) {
|
|
424
|
+
console.warn('[wall-e] Slack owner repair skipped:', repairErr.message);
|
|
425
|
+
}
|
|
381
426
|
jsonResponse(res, { ok: true, already: true });
|
|
382
427
|
return true;
|
|
383
428
|
}
|
|
384
429
|
// Start OAuth — opens browser, temp server on port 3118 handles callback
|
|
385
430
|
slackMcp.authenticate().then(() => {
|
|
431
|
+
try {
|
|
432
|
+
const { repairSlackOwnerIdentity } = require('./tools/slack-owner');
|
|
433
|
+
const { clearServiceAlerts } = require('./skills/skill-planner');
|
|
434
|
+
const repaired = repairSlackOwnerIdentity({ persist: true });
|
|
435
|
+
if (repaired.ok) clearServiceAlerts('slack');
|
|
436
|
+
} catch (repairErr) {
|
|
437
|
+
console.error('[wall-e] Slack owner repair failed:', repairErr.message);
|
|
438
|
+
}
|
|
386
439
|
console.log('[wall-e] Slack OAuth completed');
|
|
387
440
|
}).catch(err => {
|
|
388
441
|
console.error('[wall-e] Slack OAuth failed:', err.message);
|
|
@@ -714,24 +767,9 @@ function handleWalleApi(req, res, url) {
|
|
|
714
767
|
// GET /api/wall-e/mcp/integrations — check which AI tools have Wall-E MCP configured
|
|
715
768
|
if (p === '/api/wall-e/mcp/integrations' && m === 'GET') {
|
|
716
769
|
try {
|
|
717
|
-
const
|
|
718
|
-
const { MCP_TARGETS } = require('../create-walle/bin/mcp-inject');
|
|
770
|
+
const { detectMcpIntegrations } = require('./lib/mcp-integration');
|
|
719
771
|
const wallePort = parseInt(process.env.WALL_E_PORT) || 3457;
|
|
720
|
-
const
|
|
721
|
-
const results = MCP_TARGETS.map(target => {
|
|
722
|
-
const detectPath = path.join(home, target.detectDir);
|
|
723
|
-
const configPath = path.join(home, target.configPath);
|
|
724
|
-
if (!fs.existsSync(detectPath)) return { tool: target.tool, status: 'not_installed' };
|
|
725
|
-
try {
|
|
726
|
-
const config = JSON.parse(fs.readFileSync(configPath, 'utf8'));
|
|
727
|
-
const entry = config?.mcpServers?.['wall-e'];
|
|
728
|
-
if (entry && entry.url === `http://localhost:${wallePort}/mcp`) return { tool: target.tool, status: 'configured', configPath };
|
|
729
|
-
if (entry) return { tool: target.tool, status: 'wrong_port', configPath };
|
|
730
|
-
return { tool: target.tool, status: 'not_configured', configPath };
|
|
731
|
-
} catch {
|
|
732
|
-
return { tool: target.tool, status: 'not_configured', configPath };
|
|
733
|
-
}
|
|
734
|
-
});
|
|
772
|
+
const results = detectMcpIntegrations(wallePort);
|
|
735
773
|
jsonResponse(res, { data: results, wallePort });
|
|
736
774
|
} catch (e) {
|
|
737
775
|
jsonResponse(res, { data: [], error: e.message });
|
|
@@ -742,9 +780,9 @@ function handleWalleApi(req, res, url) {
|
|
|
742
780
|
// POST /api/wall-e/mcp/inject — run MCP config injection for all detected AI tools
|
|
743
781
|
if (p === '/api/wall-e/mcp/inject' && m === 'POST') {
|
|
744
782
|
try {
|
|
745
|
-
const {
|
|
783
|
+
const { ensureMcpIntegrations } = require('./lib/mcp-integration');
|
|
746
784
|
const wallePort = parseInt(process.env.WALL_E_PORT) || 3457;
|
|
747
|
-
const results =
|
|
785
|
+
const results = ensureMcpIntegrations(wallePort);
|
|
748
786
|
const added = results.filter(r => r.action === 'added' || r.action === 'updated').length;
|
|
749
787
|
try { require('./telemetry').track('mcp_inject', { added, total: results.length }); } catch {}
|
|
750
788
|
jsonResponse(res, { ok: true, results });
|
|
@@ -754,6 +792,20 @@ function handleWalleApi(req, res, url) {
|
|
|
754
792
|
return true;
|
|
755
793
|
}
|
|
756
794
|
|
|
795
|
+
// GET /api/wall-e/mcp/test - verify the live Wall-E MCP endpoint responds
|
|
796
|
+
if (p === '/api/wall-e/mcp/test' && m === 'GET') {
|
|
797
|
+
try {
|
|
798
|
+
const { testWallEMcpEndpoint } = require('./lib/mcp-integration');
|
|
799
|
+
const wallePort = parseInt(process.env.WALL_E_PORT) || 3457;
|
|
800
|
+
testWallEMcpEndpoint(wallePort, { timeoutMs: 1500 })
|
|
801
|
+
.then(result => jsonResponse(res, { data: result, wallePort }))
|
|
802
|
+
.catch(e => jsonResponse(res, { data: { ok: false, error: e.message }, wallePort }, 500));
|
|
803
|
+
} catch (e) {
|
|
804
|
+
jsonResponse(res, { data: { ok: false, error: e.message } }, 500);
|
|
805
|
+
}
|
|
806
|
+
return true;
|
|
807
|
+
}
|
|
808
|
+
|
|
757
809
|
// GET /api/wall-e/status
|
|
758
810
|
if (p === '/api/wall-e/status' && m === 'GET') {
|
|
759
811
|
const result = getStatus();
|
|
@@ -84,6 +84,9 @@ class StreamProcessor extends EventEmitter {
|
|
|
84
84
|
stopReason: '',
|
|
85
85
|
status: 'running',
|
|
86
86
|
errors: [],
|
|
87
|
+
toolErrors: [],
|
|
88
|
+
hadEdit: false,
|
|
89
|
+
verified: false,
|
|
87
90
|
events: [],
|
|
88
91
|
};
|
|
89
92
|
|
|
@@ -111,7 +114,7 @@ class StreamProcessor extends EventEmitter {
|
|
|
111
114
|
const snapshot = await this.snapshotService.captureStepFinish({ sessionId, cwd, messageId: assistantMessageId });
|
|
112
115
|
if (snapshot) await this._record(sessionId, cwd, 'snapshot', snapshot);
|
|
113
116
|
}
|
|
114
|
-
state.status =
|
|
117
|
+
state.status = 'finished';
|
|
115
118
|
} catch (err) {
|
|
116
119
|
state.status = 'error';
|
|
117
120
|
state.errors.push(err.message);
|
|
@@ -135,6 +138,8 @@ class StreamProcessor extends EventEmitter {
|
|
|
135
138
|
toolCalls: state.toolCalls,
|
|
136
139
|
}),
|
|
137
140
|
toolResultMessage: state.toolResults.length > 0 ? toolResultMessage(state.toolResults) : null,
|
|
141
|
+
hadEdit: state.hadEdit,
|
|
142
|
+
verified: state.verified,
|
|
138
143
|
next: state.status === 'error' ? 'stop' : state.toolResults.length > 0 ? 'continue' : 'stop',
|
|
139
144
|
};
|
|
140
145
|
}
|
|
@@ -223,6 +228,8 @@ class StreamProcessor extends EventEmitter {
|
|
|
223
228
|
input: call.input,
|
|
224
229
|
});
|
|
225
230
|
const result = await this.toolExecutor(call, { sessionId, cwd, model: state.model, provider: state.provider });
|
|
231
|
+
if (isEditTool(call.name) && !result?.error) state.hadEdit = true;
|
|
232
|
+
if (isSuccessfulTestCommand(call, result)) state.verified = true;
|
|
226
233
|
state.toolResults.push({ toolCallId: call.id, name: call.name, result });
|
|
227
234
|
await this._record(sessionId, cwd, 'tool', {
|
|
228
235
|
state: 'completed',
|
|
@@ -231,7 +238,7 @@ class StreamProcessor extends EventEmitter {
|
|
|
231
238
|
result,
|
|
232
239
|
});
|
|
233
240
|
} catch (err) {
|
|
234
|
-
state.
|
|
241
|
+
state.toolErrors.push(err.message);
|
|
235
242
|
state.toolResults.push({ toolCallId: call.id, name: call.name, error: err.message });
|
|
236
243
|
await this._record(sessionId, cwd, 'tool', {
|
|
237
244
|
state: 'error',
|
|
@@ -262,7 +269,20 @@ class StreamProcessor extends EventEmitter {
|
|
|
262
269
|
}
|
|
263
270
|
}
|
|
264
271
|
|
|
272
|
+
function isEditTool(name) {
|
|
273
|
+
return ['edit_file', 'write_file', 'apply_patch', 'multi_edit'].includes(name);
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
function isSuccessfulTestCommand(call, result) {
|
|
277
|
+
if (call?.name !== 'run_shell') return false;
|
|
278
|
+
const command = String(call.input?.command || '');
|
|
279
|
+
if (!/\b(test|spec|jest|mocha|pytest|npm\s+test|node\s+test\.js)\b/i.test(command)) return false;
|
|
280
|
+
if (result?.error || result?.exitCode) return false;
|
|
281
|
+
return true;
|
|
282
|
+
}
|
|
283
|
+
|
|
265
284
|
module.exports = {
|
|
266
285
|
StreamProcessor,
|
|
267
286
|
streamFromChat,
|
|
287
|
+
isSuccessfulTestCommand,
|
|
268
288
|
};
|
|
@@ -750,7 +750,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
750
750
|
|
|
751
751
|
const mw = opts.middleware || (() => {
|
|
752
752
|
const m = new CodingMiddleware();
|
|
753
|
-
registerBuiltinMiddleware(m, { cwd, provider: llm?.type, model: modelId, claudeMd: opts.claudeMd, mode: opts.mode, taskEnv: opts.env });
|
|
753
|
+
registerBuiltinMiddleware(m, { cwd, provider: llm?.type, model: modelId, claudeMd: opts.claudeMd, mode: opts.mode, taskEnv: opts.env, benchmark: opts.benchmark });
|
|
754
754
|
return m;
|
|
755
755
|
})();
|
|
756
756
|
const events = opts.events || new CodingEvents();
|
|
@@ -812,6 +812,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
812
812
|
const questionManager = opts.questionManager || new QuestionManager(events);
|
|
813
813
|
|
|
814
814
|
// projectInfo already detected above (before system prompt)
|
|
815
|
+
const llmCtxRef = { current: null }; // populated each turn (see llmCtx below)
|
|
815
816
|
|
|
816
817
|
// Stream-native runtime: model deltas, tool states, snapshots, permissions,
|
|
817
818
|
// and step boundaries are persisted as typed transcript parts while the loop
|
|
@@ -835,9 +836,15 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
835
836
|
if (call.name === 'list_directory' && input.directory && !path.isAbsolute(input.directory)) {
|
|
836
837
|
input.directory = path.join(resolvedCwd, input.directory);
|
|
837
838
|
}
|
|
839
|
+
if (call.name === 'run_shell' && !input.cwd) {
|
|
840
|
+
input.cwd = resolvedCwd;
|
|
841
|
+
}
|
|
838
842
|
input.sessionId = sid;
|
|
839
843
|
input.projectRoot = resolvedCwd;
|
|
840
|
-
|
|
844
|
+
const toolCtx = { sessionId: sid, cwd: resolvedCwd, model: modelId, provider: llm.type, runtimeMode: runtimeMode.id };
|
|
845
|
+
const finalInput = await mw.run('tool.before', toolCtx, call.name, input);
|
|
846
|
+
const result = await toolRegistry.execute(call.name, finalInput, toolCtx);
|
|
847
|
+
return mw.run('tool.after', toolCtx, call.name, finalInput, result);
|
|
841
848
|
},
|
|
842
849
|
});
|
|
843
850
|
processor.on('event', (evt) => emitProgress({
|
|
@@ -851,6 +858,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
851
858
|
let streamStopReason = '';
|
|
852
859
|
let streamModel = modelId;
|
|
853
860
|
const streamErrors = [];
|
|
861
|
+
let streamHadEdit = false;
|
|
854
862
|
for (let turnIndex = opts._resumeTurn || 0; turnIndex < turns; turnIndex++) {
|
|
855
863
|
const remaining = deadline - Date.now();
|
|
856
864
|
if (remaining <= 0) {
|
|
@@ -878,14 +886,24 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
878
886
|
runtimeMode: runtimeMode.id,
|
|
879
887
|
cwd: resolvedCwd,
|
|
880
888
|
});
|
|
889
|
+
const llmCtx = { params: { maxTokens: taskFileHints.length >= 4 ? 8192 : 4096 }, system: systemPrompt, cwd: resolvedCwd,
|
|
890
|
+
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
|
|
891
|
+
toolsAvailable: toolsForTurn.length > 0 };
|
|
892
|
+
llmCtxRef.current = llmCtx;
|
|
893
|
+
await mw.run('llm.before', llmCtx);
|
|
881
894
|
turn = await processor.runTurn({
|
|
882
895
|
sessionId: sid,
|
|
883
896
|
cwd: resolvedCwd,
|
|
884
|
-
system:
|
|
897
|
+
system: llmCtx.system,
|
|
885
898
|
messages,
|
|
886
899
|
tools: toolsForTurn,
|
|
887
900
|
maxTokens: taskFileHints.length >= 4 ? 8192 : 4096,
|
|
888
901
|
signal: ac.signal,
|
|
902
|
+
maxTokens: llmCtx.params.maxTokens,
|
|
903
|
+
temperature: llmCtx.params.temperature,
|
|
904
|
+
thinking: llmCtx.params.thinking,
|
|
905
|
+
reasoningEffort: llmCtx.params.reasoningEffort,
|
|
906
|
+
options: llmCtx.params.options,
|
|
889
907
|
});
|
|
890
908
|
} finally {
|
|
891
909
|
clearTimeout(timer);
|
|
@@ -911,6 +929,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
911
929
|
content: turn.text,
|
|
912
930
|
stopReason: turn.stopReason,
|
|
913
931
|
});
|
|
932
|
+
if (turn.hadEdit) streamHadEdit = true;
|
|
914
933
|
|
|
915
934
|
if (turn.status === 'error') break;
|
|
916
935
|
if ((turn.toolCalls || []).length === 0) {
|
|
@@ -931,6 +950,7 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
931
950
|
}
|
|
932
951
|
if (turn.assistantMessage) messages.push(turn.assistantMessage);
|
|
933
952
|
if (turn.toolResultMessage) messages.push(turn.toolResultMessage);
|
|
953
|
+
if (turn.verified && streamHadEdit) break;
|
|
934
954
|
if (turn.next !== 'continue') break;
|
|
935
955
|
}
|
|
936
956
|
|
|
@@ -971,7 +991,6 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
971
991
|
// ── Bridge: event bus → middleware (A2) ──
|
|
972
992
|
// When the event bus fires, propagate to middleware's onEvent hook so
|
|
973
993
|
// registered middleware can react to file edits, reads, and context overflow.
|
|
974
|
-
const llmCtxRef = { current: null }; // populated each turn (see llmCtx below)
|
|
975
994
|
const _bridgeHandlers = {};
|
|
976
995
|
for (const evtType of ['file.edited', 'file.read', 'context.overflow']) {
|
|
977
996
|
const handler = (data) => {
|
|
@@ -1073,8 +1092,10 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1073
1092
|
const timer = setTimeout(() => ac.abort(), Math.min(remaining, perTurnCap));
|
|
1074
1093
|
|
|
1075
1094
|
// Middleware: prepare LLM call
|
|
1095
|
+
const turnsRemaining = turns - turn;
|
|
1076
1096
|
const llmCtx = { params: { maxTokens: taskFileHints.length >= 4 ? 8192 : 4096 }, system: systemPrompt, cwd: resolvedCwd,
|
|
1077
|
-
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {}
|
|
1097
|
+
provider: llm.type, model: modelId, mode: opts.mode, runtimeMode: runtimeMode.id, claudeMd: opts.claudeMd, log: {},
|
|
1098
|
+
toolsAvailable: turnsRemaining > 1 };
|
|
1078
1099
|
llmCtxRef.current = llmCtx; // expose to event bridge (A2)
|
|
1079
1100
|
await mw.run('llm.before', llmCtx);
|
|
1080
1101
|
let adaptedTools = await toolRegistry.getDefinitions(llmCtx);
|
|
@@ -1136,7 +1157,6 @@ async function runAgentLoop(prompt, opts = {}) {
|
|
|
1136
1157
|
// Graceful max-steps degradation (6n)
|
|
1137
1158
|
// Note: warnings are appended to the LAST message's content (not as separate
|
|
1138
1159
|
// user messages) to avoid consecutive user messages which the API rejects.
|
|
1139
|
-
const turnsRemaining = turns - turn;
|
|
1140
1160
|
if (turnsRemaining <= 1) {
|
|
1141
1161
|
// Final turn: disable tools, force structured summary
|
|
1142
1162
|
adaptedTools = [];
|
|
@@ -100,10 +100,13 @@ async function runAgentBenchmark(benchmark, options = {}) {
|
|
|
100
100
|
}
|
|
101
101
|
|
|
102
102
|
// Run the agent loop with hard timeout safety net
|
|
103
|
-
const
|
|
103
|
+
const maxTurns = expectations.maxTurns || 20;
|
|
104
|
+
const turnBudgetTimeout = maxTurns * 30000;
|
|
105
|
+
const effectiveTimeout = Math.min(timeoutMs || turnBudgetTimeout, turnBudgetTimeout);
|
|
104
106
|
const agentPromise = runAgentLoop(benchmark.prompt, {
|
|
105
107
|
cwd: sandboxDir,
|
|
106
108
|
timeoutMs: effectiveTimeout,
|
|
109
|
+
maxTurns,
|
|
107
110
|
provider,
|
|
108
111
|
model,
|
|
109
112
|
mode: 'build',
|
|
@@ -156,6 +159,10 @@ async function runAgentBenchmark(benchmark, options = {}) {
|
|
|
156
159
|
const inputTokens = usage.inputTokens ?? usage.input ?? 0;
|
|
157
160
|
const expectedFileChanges = expectations.expectedFileChanges || [];
|
|
158
161
|
const missingExpectedWork = expectedFileChanges.length > 0 && actualFileChanges.length === 0;
|
|
162
|
+
const attemptedFileChange = actualToolCalls.some((call) => {
|
|
163
|
+
const name = typeof call === 'string' ? call : call?.name;
|
|
164
|
+
return /edit|write|patch|create|delete|modify/i.test(String(name || ''));
|
|
165
|
+
});
|
|
159
166
|
const testRegression = (expectations.testCommand && testsPassed === false);
|
|
160
167
|
const rawError = result.stderr || result.error || null;
|
|
161
168
|
const validatedByTests = Boolean(
|
|
@@ -199,7 +206,7 @@ async function runAgentBenchmark(benchmark, options = {}) {
|
|
|
199
206
|
: testRegression
|
|
200
207
|
? 'tests_failed'
|
|
201
208
|
: missingExpectedWork
|
|
202
|
-
? 'no_file_changes'
|
|
209
|
+
? attemptedFileChange ? 'missing_expected_changes' : 'no_file_changes'
|
|
203
210
|
: 'no_effort' },
|
|
204
211
|
};
|
|
205
212
|
}
|
|
@@ -296,6 +303,10 @@ function scoreAgentResult(benchmark, actual) {
|
|
|
296
303
|
});
|
|
297
304
|
}
|
|
298
305
|
|
|
306
|
+
function isTrustedAgentResult(result = {}) {
|
|
307
|
+
return result.success === true && !result.error && result.testsPassed === true;
|
|
308
|
+
}
|
|
309
|
+
|
|
299
310
|
/**
|
|
300
311
|
* Run a multi-turn benchmark — sends each turn's prompt sequentially,
|
|
301
312
|
* accumulating conversation context. Scores after the final turn.
|
|
@@ -507,7 +518,7 @@ async function runAgentBenchmarkSuite(options = {}) {
|
|
|
507
518
|
outputTokens: result.outputTokens ?? null,
|
|
508
519
|
scorerVersion: DEFAULT_SCORER_VERSION,
|
|
509
520
|
scoringMethod,
|
|
510
|
-
trusted:
|
|
521
|
+
trusted: isTrustedAgentResult(result),
|
|
511
522
|
runConfig: { timeoutMs, scoringMethod },
|
|
512
523
|
}, {
|
|
513
524
|
suite: 'coding-agent',
|
|
@@ -517,7 +528,7 @@ async function runAgentBenchmarkSuite(options = {}) {
|
|
|
517
528
|
model: resolveModelName(model),
|
|
518
529
|
scoringMethod,
|
|
519
530
|
scorerVersion: DEFAULT_SCORER_VERSION,
|
|
520
|
-
trusted:
|
|
531
|
+
trusted: isTrustedAgentResult(result),
|
|
521
532
|
runConfig: { timeoutMs, scoringMethod },
|
|
522
533
|
}));
|
|
523
534
|
} catch { /* non-fatal */ }
|
|
@@ -666,6 +677,7 @@ module.exports = {
|
|
|
666
677
|
runMultiTurnBenchmark,
|
|
667
678
|
runAgentBenchmarkSuite,
|
|
668
679
|
scoreAgentResult,
|
|
680
|
+
isTrustedAgentResult,
|
|
669
681
|
extractToolCalls,
|
|
670
682
|
extractToolCallDetails,
|
|
671
683
|
countTests,
|