create-walle 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. package/README.md +2 -2
  2. package/package.json +1 -1
  3. package/template/claude-task-manager/db.js +131 -0
  4. package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +58 -50
  5. package/template/claude-task-manager/docs/phone-access-design.md +23 -7
  6. package/template/claude-task-manager/docs/walle-session-model-preferences.md +119 -0
  7. package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +32 -48
  8. package/template/claude-task-manager/lib/remote-relay-protocol.js +5 -0
  9. package/template/claude-task-manager/lib/walle-external-actions.js +20 -3
  10. package/template/claude-task-manager/public/index.html +25 -0
  11. package/template/claude-task-manager/public/js/setup.js +16 -12
  12. package/template/claude-task-manager/public/js/walle-session.js +31 -3
  13. package/template/claude-task-manager/public/js/walle.js +93 -23
  14. package/template/claude-task-manager/public/m/app.css +417 -21
  15. package/template/claude-task-manager/public/m/app.js +831 -44
  16. package/template/claude-task-manager/public/m/claim.html +1 -1
  17. package/template/claude-task-manager/public/m/index.html +41 -7
  18. package/template/claude-task-manager/public/m/sw.js +1 -1
  19. package/template/claude-task-manager/server.js +377 -30
  20. package/template/claude-task-manager/workers/state-detectors/codex.js +18 -3
  21. package/template/package.json +1 -1
  22. package/template/wall-e/chat.js +32 -2
  23. package/template/wall-e/coding/stream-processor.js +36 -0
  24. package/template/wall-e/coding-orchestrator.js +45 -0
  25. package/template/wall-e/docs/external-action-controller.md +60 -2
  26. package/template/wall-e/external-action-controller.js +23 -1
  27. package/template/wall-e/external-action-gateway.js +163 -0
  28. package/template/wall-e/fly.toml +1 -0
  29. package/template/wall-e/tools/local-tools.js +122 -4
  30. package/template/website/index.html +2 -2
@@ -31,6 +31,7 @@ const {
31
31
  inputForExternalActionEnvelope,
32
32
  reviewExternalAction,
33
33
  } = require('./external-action-controller');
34
+ const { reviewExternalActionGateway } = require('./external-action-gateway');
34
35
  const { runShadow } = require('./eval/shadow');
35
36
  const {
36
37
  buildCodeReviewContextBlock,
@@ -984,7 +985,14 @@ function _externalActionLine(envelope = {}, result = {}) {
984
985
  const suffix = target ? `: ${target}` : '';
985
986
  if (result.alreadyExecuted) return `Already ${verb}${suffix}; I did not run it again.`;
986
987
  if (result.error) return `Failed to ${_externalActionVerb(envelope, 'present')}${suffix}: ${result.error}`;
987
- return `Approved action ${verb}${suffix}.`;
988
+ if (_externalActionResultVerified(result)) return `Approved action ${verb} and verified${suffix}.`;
989
+ const verifyReason = result?.verification?.reason || result?.verification?.error || 'no read-after-write verification evidence was returned';
990
+ return `Approved action ${verb}${suffix}, but verification did not confirm it: ${verifyReason}`;
991
+ }
992
+
993
+ function _externalActionResultVerified(result = {}) {
994
+ if (!result || result.error || result.ok === false || result.success === false) return false;
995
+ return result.verified === true || result.verification?.ok === true || result.alreadyExecuted === true;
988
996
  }
989
997
 
990
998
  function _progressToolResultPayload(result, resultStr) {
@@ -1057,6 +1065,7 @@ async function _executeApprovedExternalActions({
1057
1065
  }
1058
1066
 
1059
1067
  const failed = !!(result && (result.error || result.ok === false || result.success === false));
1068
+ const verified = _externalActionResultVerified(result || {});
1060
1069
  try {
1061
1070
  const confidence = require('./decision/confidence');
1062
1071
  if (envelope.domain && !result?.alreadyExecuted) confidence.recordAction(envelope.domain, !failed);
@@ -1071,7 +1080,9 @@ async function _executeApprovedExternalActions({
1071
1080
  ? 'Approved action was already executed'
1072
1081
  : failed
1073
1082
  ? 'Approved action failed'
1074
- : 'Approved action executed',
1083
+ : verified
1084
+ ? 'Approved action executed and verified'
1085
+ : 'Approved action executed without verification',
1075
1086
  error: failed ? (result.error || 'External action failed') : null,
1076
1087
  });
1077
1088
  lines.push(_externalActionLine(envelope, result || {}));
@@ -2191,6 +2202,15 @@ async function chat(message, opts = {}) {
2191
2202
  name = normalizedCall.name;
2192
2203
  input = normalizedCall.input;
2193
2204
 
2205
+ const gatewayReview = reviewExternalActionGateway({
2206
+ toolName: name,
2207
+ input,
2208
+ userMessage: routingMessage || message,
2209
+ });
2210
+ if (!gatewayReview.admitted) {
2211
+ return gatewayReview.result;
2212
+ }
2213
+
2194
2214
  // Eval hook: allow test harness to intercept tool calls with mock results
2195
2215
  if (typeof opts.toolInterceptor === 'function') {
2196
2216
  const intercepted = await opts.toolInterceptor(name, input);
@@ -2252,6 +2272,16 @@ async function chat(message, opts = {}) {
2252
2272
  args: name === 'run_shell' ? input.args : undefined,
2253
2273
  reason: perm.reason,
2254
2274
  source: perm.source,
2275
+ approval_options: [
2276
+ { id: 'allow_once', label: 'Yes', scope: 'this_tool_call' },
2277
+ { id: 'allow_always', label: 'Always yes for this project/pattern', scope: 'project_permission_pattern' },
2278
+ { id: 'deny', label: 'No', scope: 'this_tool_call' },
2279
+ ],
2280
+ approval_policy: {
2281
+ kind: 'local_permission',
2282
+ allow_always: true,
2283
+ approval_scope: 'project_permission_pattern',
2284
+ },
2255
2285
  });
2256
2286
  // Store resolver so server can call it when user responds
2257
2287
  if (!opts._permissionResolvers) opts._permissionResolvers = new Map();
@@ -12,6 +12,7 @@ const { recoverAllowedTextToolCalls } = require('../llm/text-tool-calls');
12
12
  const { ArtifactStore } = require('./artifact-store');
13
13
  const { normalizeResponse, transformRequest, providerId } = require('./provider-transform');
14
14
  const { Confidence, EventName, FailureClass, Provenance, createLaneEvent } = require('./lane-events');
15
+ const { reviewExternalActionGateway } = require('../external-action-gateway');
15
16
 
16
17
  async function* streamFromChat(provider, request) {
17
18
  const response = normalizeResponse(recoverAllowedTextToolCalls(await provider.chat(request), request.tools), {
@@ -345,6 +346,41 @@ class StreamProcessor extends EventEmitter {
345
346
  name: call.name,
346
347
  data: { input: call.input },
347
348
  });
349
+ const gatewayReview = reviewExternalActionGateway({
350
+ toolName: call.name,
351
+ input: call.input,
352
+ });
353
+ if (!gatewayReview.admitted) {
354
+ const result = gatewayReview.result;
355
+ state.toolResults.push({ toolCallId: call.id, name: call.name, result });
356
+ await this._runtimeItem(sessionId, cwd, {
357
+ threadId: state.threadId,
358
+ turnId: state.turnId,
359
+ itemId: call.id,
360
+ role: 'tool',
361
+ status: 'completed',
362
+ name: call.name,
363
+ data: { result },
364
+ });
365
+ await this._record(sessionId, cwd, 'tool', {
366
+ state: 'blocked',
367
+ toolCallId: call.id,
368
+ name: call.name,
369
+ input: call.input,
370
+ result,
371
+ });
372
+ await this._laneEvent({
373
+ name: EventName.TOOL_FINISHED,
374
+ sessionId,
375
+ cwd,
376
+ provider: state.provider,
377
+ model: state.model,
378
+ runId: state.messageId,
379
+ confidence: Confidence.HIGH,
380
+ data: { toolCallId: call.id, name: call.name, blocked: true, reason: result.reason },
381
+ });
382
+ return;
383
+ }
348
384
  if (this.permissionService?.authorize) {
349
385
  await this._record(sessionId, cwd, 'tool', {
350
386
  state: 'permission_check',
@@ -565,6 +565,27 @@ function hasToolCall(toolCallHistory = [], names = new Set()) {
565
565
  return (toolCallHistory || []).some((call) => names.has(call.name));
566
566
  }
567
567
 
568
+ function isVerificationToolCall(call = {}) {
569
+ const name = String(call.name || '');
570
+ const input = String(call.inputHash || JSON.stringify(call.input || {}));
571
+ if (name === 'browser_screenshot') return true;
572
+ if (name === 'run_shell') {
573
+ return /\b(?:test|spec|lint|build|typecheck|tsc|pytest|jest|mocha|vitest|playwright|node\s+--(?:test|check)|npm\s+(?:test|run)|pnpm\s+(?:test|run)|yarn\s+(?:test|run)|git\s+diff\s+--check)\b/i.test(input);
574
+ }
575
+ return /(?:test|verify|screenshot|diagnostic|lint|build)/i.test(name);
576
+ }
577
+
578
+ function hasVerificationEvidence(toolCallHistory = []) {
579
+ return (toolCallHistory || []).some(isVerificationToolCall);
580
+ }
581
+
582
+ function isVerificationBlockerResponse(content) {
583
+ const text = contentToText(content);
584
+ if (!text.trim()) return false;
585
+ return /\b(?:could not|couldn'?t|unable to|not able to|cannot)\b[\s\S]{0,120}\b(?:test|verify|build|run|execute)\b/i.test(text)
586
+ || /\b(?:tests?|verification|build)\b[\s\S]{0,120}\b(?:not run|not available|blocked|unavailable|missing)\b/i.test(text);
587
+ }
588
+
568
589
  function stripPathLikeTokens(text) {
569
590
  return String(text || '').replace(/(?:^|[\s`'"(])((?:\.?[A-Za-z0-9_.@-]+\/)+[A-Za-z0-9_.@-]+)(?=[\s`'",):;.\]]|$)/g, ' ');
570
591
  }
@@ -610,6 +631,29 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
610
631
  if (!isActionRequiredPrompt(prompt, { mode })) return null;
611
632
 
612
633
  const madeEdits = hasToolCall(toolCallHistory, EDIT_TOOL_NAMES);
634
+ if (madeEdits && !hasVerificationEvidence(toolCallHistory) && !isVerificationBlockerResponse(content)) {
635
+ const reason = 'The assistant made file changes but ended before running verification.';
636
+ if (!toolsAvailable) {
637
+ return {
638
+ action: 'fail',
639
+ reason: `${reason} No tool turns remain.`,
640
+ };
641
+ }
642
+ if (nudges >= maxNudges) {
643
+ return {
644
+ action: 'fail',
645
+ reason: `${reason} Verification continuation limit reached.`,
646
+ };
647
+ }
648
+ return {
649
+ action: 'continue',
650
+ reason,
651
+ message: `[SYSTEM] ${reason} This is not complete.\n` +
652
+ `Run the relevant verification now: tests, lint, build, typecheck, browser screenshot, or at minimum git diff --check when no project test exists.\n` +
653
+ `Only summarize success after a tool result proves the work. If verification is genuinely impossible, state the blocker with tool-backed evidence.\n` +
654
+ `Working directory: ${cwd}`,
655
+ };
656
+ }
613
657
  if (madeEdits) return null;
614
658
  if (isLegitimateNoEditResponse(content, toolCallHistory)) return null;
615
659
 
@@ -3294,6 +3338,7 @@ module.exports = {
3294
3338
  isActionRequiredPrompt,
3295
3339
  isPrematureActionResponse,
3296
3340
  getNoActionContinuation,
3341
+ hasVerificationEvidence,
3297
3342
  subtaskRequiresFileChanges,
3298
3343
  screenshotTrackerHook,
3299
3344
  collectEmptyChangedFiles,
@@ -32,6 +32,38 @@ user confirmation, and the exact approved envelope is replayed back to Wall-E.
32
32
  Wall-E then executes the original payload directly rather than asking the model
33
33
  to recreate it.
34
34
 
35
+ ## Approval Tiers
36
+
37
+ Wall-E uses two approval tiers:
38
+
39
+ 1. **Local permission approval** is for reversible project-local work such as
40
+ shell test commands, builds, and file edits. The UI may offer `Yes`,
41
+ `Always yes for this project/pattern`, and `No`. The persisted rule is scoped
42
+ to the project plus the permission pattern, never to vague model wording.
43
+ 2. **External action approval** is for real-world side effects: email, calendar,
44
+ Slack, SMS, reminders, and notifications. These approvals are exact-payload
45
+ approvals. The default choices are `Approve once` or `Cancel`; broad
46
+ `always yes` is intentionally disabled because the next payload may target a
47
+ different person, account, calendar, or time.
48
+
49
+ This keeps the fast Claude Code-style flow for local coding work without giving
50
+ models an ambient ability to send messages or schedule events.
51
+
52
+ ## Side-Effect Gateway
53
+
54
+ All side-effecting routes pass through a host-side gateway before normal
55
+ permission checks. The gateway blocks shell, AppleScript, and generic MCP calls
56
+ that try to dispatch external actions directly, for example:
57
+
58
+ - `osascript`/JXA creating Calendar events.
59
+ - `gws calendar events insert`.
60
+ - Gmail `messages.send` through shell, curl, or MCP.
61
+ - Slack `chat.postMessage` through shell or MCP.
62
+
63
+ The gateway returns a structured `external_action_gateway` tool result that
64
+ tells the model which dedicated tool to use. The action is not executed, and the
65
+ model is instructed not to claim success.
66
+
35
67
  Sequence:
36
68
 
37
69
  1. Wall-E blocks `mail_send`, `mail_reply`, `calendar_create`, and other external actions and
@@ -46,8 +78,9 @@ Sequence:
46
78
  to `chat()`.
47
79
  5. `chat()` validates that each approval id/hash still matches the reconstructed
48
80
  tool input, checks validation issues, applies a session-scoped idempotency
49
- guard, executes the local tool, and emits normal `tool_call` / `tool_result`
50
- progress events.
81
+ guard, executes the local tool, runs read-after-write verification where the
82
+ connector supports it, and emits normal `tool_call` / `tool_result` progress
83
+ events.
51
84
 
52
85
  This means provider wording is not part of the safety decision. DeepSeek,
53
86
  Anthropic, OpenAI, and other providers all use the same envelope replay path.
@@ -81,14 +114,39 @@ Anthropic, OpenAI, and other providers all use the same envelope replay path.
81
114
  approve mail/calendar side effects.
82
115
  - Approved envelopes are idempotent per Wall-E session and payload hash to avoid
83
116
  accidental duplicate sends from double-submit or retry.
117
+ - Calendar approval envelopes preserve `account`, `source`, `calendarId`,
118
+ `calendar`, `location`, and time fields so replay cannot silently fall back to
119
+ a different provider or calendar.
120
+ - Google Calendar creates verify with `calendar.events.get` against the same
121
+ account, calendar id, and event id before Wall-E can summarize the action as
122
+ verified.
123
+ - Gmail sends and replies verify with `gmail.messages.get`. Replies keep the
124
+ original message id separately from the sent reply id so thread evidence is
125
+ not confused with source evidence.
126
+ - Final summary guards treat `sent` or `created` without `verified` evidence as
127
+ incomplete. Shell stdout, AppleScript UIDs, or model prose do not count as
128
+ external-action completion evidence.
129
+
130
+ ## Coding-Agent Completion Contract
131
+
132
+ Wall-E coding sessions use the same evidence rule. If an action-oriented coding
133
+ prompt caused file edits, the agent must run a relevant verification tool before
134
+ ending with a success summary. Accepted evidence includes tests, lint, build,
135
+ typecheck, Playwright/browser screenshots for UI work, or `git diff --check`
136
+ when no project-specific verifier exists. If verification is impossible, the
137
+ agent must say so with tool-backed evidence instead of claiming success.
84
138
 
85
139
  ## Tests
86
140
 
87
141
  Focused regressions:
88
142
 
89
143
  - `claude-task-manager/tests/walle-permission-policy.test.js`
144
+ - `claude-task-manager/tests/walle-external-actions.test.js`
90
145
  - `wall-e/tests/external-action-controller.test.js`
146
+ - `wall-e/tests/external-action-gateway.test.js`
91
147
  - `wall-e/tests/local-tools-gws-live-files.test.js`
148
+ - `wall-e/tests/coding-orchestrator.test.js`
149
+ - `wall-e/tests/coding-stream-processor.test.js`
92
150
  - `wall-e/tests/execution-trace.test.js`
93
151
  - `wall-e/tests/chat.test.js` with `stages a draft email`
94
152
 
@@ -133,7 +133,13 @@ function targetForTool(toolName, input = {}) {
133
133
  return { channel: input.channel_name || input.channel || null };
134
134
  }
135
135
  if (toolName === 'calendar_create') {
136
- return { calendar: input.calendar || null, attendees: normalizeAddressList(input.attendees) };
136
+ return {
137
+ calendar: input.calendar || input.calendar_name || input.calendarId || input.calendar_id || null,
138
+ calendarId: input.calendarId || input.calendar_id || null,
139
+ account: input.account || null,
140
+ source: input.source || input.provider || null,
141
+ attendees: normalizeAddressList(input.attendees),
142
+ };
137
143
  }
138
144
  if (toolName === 'reminder_create') {
139
145
  return { list: input.list || null };
@@ -163,6 +169,7 @@ function payloadForTool(toolName, input = {}) {
163
169
  title: input.title || '',
164
170
  start: calendarStart(input),
165
171
  end: calendarEnd(input),
172
+ location: input.location || '',
166
173
  notes: input.notes || '',
167
174
  };
168
175
  }
@@ -208,10 +215,14 @@ function inputForExternalActionEnvelope(envelope = {}) {
208
215
  if (toolName === 'calendar_create') {
209
216
  return {
210
217
  calendar: target.calendar || null,
218
+ calendarId: target.calendarId || null,
219
+ account: actor.account || target.account || null,
220
+ source: target.source || null,
211
221
  attendees: normalizeAddressList(target.attendees),
212
222
  title: payload.title || '',
213
223
  start_date: payload.start || null,
214
224
  end_date: payload.end || null,
225
+ location: payload.location || '',
215
226
  notes: payload.notes || '',
216
227
  };
217
228
  }
@@ -315,6 +326,17 @@ function buildBlockedToolResult(envelope, decision) {
315
326
  ? 'External action was staged because the user asked for prepared content, not dispatch.'
316
327
  : 'External action requires validation and explicit confirmation before execution.',
317
328
  action: envelope,
329
+ approval_options: isPreview
330
+ ? []
331
+ : [
332
+ { id: 'approve_once', label: 'Approve once', scope: 'exact_action_payload' },
333
+ { id: 'cancel', label: 'Cancel', scope: 'exact_action_payload' },
334
+ ],
335
+ approval_policy: {
336
+ kind: 'external_action',
337
+ allow_always: false,
338
+ approval_scope: 'exact_action_payload',
339
+ },
318
340
  draft: envelope.domain === 'email'
319
341
  ? {
320
342
  to: envelope.toolName === 'mail_reply' ? 'derived_from_original_message' : envelope.target.to,
@@ -0,0 +1,163 @@
1
+ 'use strict';
2
+
3
+ const SHELL_SIDE_EFFECT_RULES = [
4
+ {
5
+ domain: 'calendar',
6
+ operation: 'create_event',
7
+ recommendedTool: 'calendar_create',
8
+ reason: 'Calendar event creation must use calendar_create so account, calendar, approval, and verification are tracked.',
9
+ patterns: [
10
+ /\bgws\b[\s\S]*\bcalendar\b[\s\S]*\bevents?\b[\s\S]*\binsert\b/i,
11
+ /calendar\.googleapis\.com[\s\S]*\/events/i,
12
+ /osascript[\s\S]*(?:tell\s+application\s+"Calendar"|Application\(["']Calendar["']\))[\s\S]*(?:make\s+new\s+event|events\.push|new\s+event\s+with\s+properties)/i,
13
+ /(?:tell\s+application\s+"Calendar"|Application\(["']Calendar["']\))[\s\S]*(?:make\s+new\s+event|events\.push|new\s+event\s+with\s+properties)/i,
14
+ ],
15
+ },
16
+ {
17
+ domain: 'email',
18
+ operation: 'send_email',
19
+ recommendedTool: 'mail_send or mail_reply',
20
+ reason: 'Email dispatch must use mail_send or mail_reply so recipients, threading, approval, and sent-mail verification are tracked.',
21
+ patterns: [
22
+ /\bgws\b[\s\S]*\bgmail\b[\s\S]*(?:\+send|messages\s+send|users\s+messages\s+send)\b/i,
23
+ /gmail\.googleapis\.com[\s\S]*\/messages\/send\b/i,
24
+ /osascript[\s\S]*(?:tell\s+application\s+"Mail"|Application\(["']Mail["']\))[\s\S]*(?:make\s+new\s+outgoing\s+message|send\b)/i,
25
+ /(?:tell\s+application\s+"Mail"|Application\(["']Mail["']\))[\s\S]*(?:make\s+new\s+outgoing\s+message|send\b)/i,
26
+ ],
27
+ },
28
+ {
29
+ domain: 'slack',
30
+ operation: 'send_message',
31
+ recommendedTool: 'slack_send_message',
32
+ reason: 'Slack message dispatch must use slack_send_message so channel, approval, and execution evidence are tracked.',
33
+ patterns: [
34
+ /slack\.com\/api\/chat\.postMessage/i,
35
+ /\bslack\b[\s\S]*\bchat\.postMessage\b/i,
36
+ ],
37
+ },
38
+ {
39
+ domain: 'reminder',
40
+ operation: 'create_reminder',
41
+ recommendedTool: 'reminder_create',
42
+ reason: 'Reminder creation must use reminder_create so approval and execution evidence are tracked.',
43
+ patterns: [
44
+ /osascript[\s\S]*(?:tell\s+application\s+"Reminders"|Application\(["']Reminders["']\))[\s\S]*(?:make\s+new\s+reminder|new\s+reminder)/i,
45
+ /(?:tell\s+application\s+"Reminders"|Application\(["']Reminders["']\))[\s\S]*(?:make\s+new\s+reminder|new\s+reminder)/i,
46
+ ],
47
+ },
48
+ ];
49
+
50
+ const MCP_SIDE_EFFECT_RULES = [
51
+ {
52
+ domain: 'calendar',
53
+ operation: 'create_event',
54
+ recommendedTool: 'calendar_create',
55
+ pattern: /(?:calendar|google[-_ ]?calendar).*?(?:create|insert|add|schedule)|(?:create|insert|add|schedule).*?(?:calendar|event)/i,
56
+ },
57
+ {
58
+ domain: 'email',
59
+ operation: 'send_email',
60
+ recommendedTool: 'mail_send or mail_reply',
61
+ pattern: /(?:gmail|mail|email).*?(?:send|reply|respond)|(?:send|reply|respond).*?(?:gmail|mail|email)/i,
62
+ },
63
+ {
64
+ domain: 'slack',
65
+ operation: 'send_message',
66
+ recommendedTool: 'slack_send_message',
67
+ pattern: /(?:slack).*?(?:send|post|message)|(?:send|post).*?(?:slack|channel)/i,
68
+ },
69
+ ];
70
+
71
+ function commandText(input = {}) {
72
+ if (!input || typeof input !== 'object') return '';
73
+ if (typeof input.command === 'string') {
74
+ const args = Array.isArray(input.args) && input.args.length ? ` ${input.args.join(' ')}` : '';
75
+ return `${input.command}${args}`;
76
+ }
77
+ if (typeof input.script === 'string') return input.script;
78
+ return '';
79
+ }
80
+
81
+ function classifyExternalActionBypass(toolName, input = {}) {
82
+ const name = String(toolName || '').trim();
83
+ if (name === 'run_shell' || name === 'shell' || name === 'bash' || name === 'terminal' || name === 'applescript') {
84
+ const text = commandText(input);
85
+ if (!text) return null;
86
+ for (const rule of SHELL_SIDE_EFFECT_RULES) {
87
+ if (rule.patterns.some((pattern) => pattern.test(text))) {
88
+ return {
89
+ toolName: name,
90
+ domain: rule.domain,
91
+ operation: rule.operation,
92
+ recommendedTool: rule.recommendedTool,
93
+ reason: rule.reason,
94
+ evidence: text.slice(0, 500),
95
+ };
96
+ }
97
+ }
98
+ }
99
+
100
+ if (name === 'mcp_call' || name.startsWith('mcp__')) {
101
+ const callName = name === 'mcp_call'
102
+ ? `${input.server || ''}.${input.tool || ''}`
103
+ : name;
104
+ const text = `${callName} ${JSON.stringify(input.arguments || input.args || {})}`.slice(0, 1000);
105
+ for (const rule of MCP_SIDE_EFFECT_RULES) {
106
+ if (rule.pattern.test(text)) {
107
+ return {
108
+ toolName: name,
109
+ domain: rule.domain,
110
+ operation: rule.operation,
111
+ recommendedTool: rule.recommendedTool,
112
+ reason: `MCP side-effect tools must be routed through ${rule.recommendedTool} so approval and verification are tracked.`,
113
+ evidence: text.slice(0, 500),
114
+ };
115
+ }
116
+ }
117
+ }
118
+
119
+ return null;
120
+ }
121
+
122
+ function buildGatewayBlockedResult(finding) {
123
+ return {
124
+ external_action: true,
125
+ external_action_gateway: true,
126
+ blocked: true,
127
+ executed: false,
128
+ verified: false,
129
+ decision: 'dedicated_tool_required',
130
+ domain: finding.domain,
131
+ operation: finding.operation,
132
+ original_tool: finding.toolName,
133
+ recommended_tool: finding.recommendedTool,
134
+ reason: finding.reason,
135
+ evidence: finding.evidence,
136
+ approval_policy: {
137
+ kind: 'external_action',
138
+ allow_always: false,
139
+ approval_scope: 'exact_action_payload',
140
+ },
141
+ model_instruction: [
142
+ `Do not retry this ${finding.domain} side effect through ${finding.toolName}.`,
143
+ `Use ${finding.recommendedTool} instead so Wall-E can stage an exact approval envelope, execute the approved payload, and verify the result before summarizing success.`,
144
+ 'Do not claim this action was executed.',
145
+ ].join(' '),
146
+ };
147
+ }
148
+
149
+ function reviewExternalActionGateway({ toolName, input = {} } = {}) {
150
+ const finding = classifyExternalActionBypass(toolName, input);
151
+ if (!finding) return { admitted: true };
152
+ return {
153
+ admitted: false,
154
+ finding,
155
+ result: buildGatewayBlockedResult(finding),
156
+ };
157
+ }
158
+
159
+ module.exports = {
160
+ buildGatewayBlockedResult,
161
+ classifyExternalActionBypass,
162
+ reviewExternalActionGateway,
163
+ };
@@ -20,6 +20,7 @@ primary_region = "sjc" # Change to your nearest region: https://fly.io/docs/ref
20
20
  WALL_E_PORT = "3457"
21
21
  NODE_ENV = "production"
22
22
  WALLE_TELEMETRY_SERVER = "1"
23
+ WALLE_TELEMETRY_CLEANUP_ENABLED = "1"
23
24
 
24
25
  [[vm]]
25
26
  size = "shared-cpu-1x"