npm - create-walle - Versions diffs - 0.9.20 → 0.9.21 - Mend

create-walle 0.9.20 → 0.9.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

package/README.md +2 -2
package/package.json +1 -1
package/template/claude-task-manager/db.js +131 -0
package/template/claude-task-manager/docs/microsoft-dev-tunnel-phone-access-design.md +58 -50
package/template/claude-task-manager/docs/phone-access-design.md +23 -7
package/template/claude-task-manager/docs/walle-session-model-preferences.md +119 -0
package/template/claude-task-manager/lib/microsoft-dev-tunnel-setup.js +32 -48
package/template/claude-task-manager/lib/remote-relay-protocol.js +5 -0
package/template/claude-task-manager/lib/walle-external-actions.js +20 -3
package/template/claude-task-manager/public/index.html +25 -0
package/template/claude-task-manager/public/js/setup.js +16 -12
package/template/claude-task-manager/public/js/walle-session.js +31 -3
package/template/claude-task-manager/public/js/walle.js +93 -23
package/template/claude-task-manager/public/m/app.css +417 -21
package/template/claude-task-manager/public/m/app.js +831 -44
package/template/claude-task-manager/public/m/claim.html +1 -1
package/template/claude-task-manager/public/m/index.html +41 -7
package/template/claude-task-manager/public/m/sw.js +1 -1
package/template/claude-task-manager/server.js +377 -30
package/template/claude-task-manager/workers/state-detectors/codex.js +18 -3
package/template/package.json +1 -1
package/template/wall-e/chat.js +32 -2
package/template/wall-e/coding/stream-processor.js +36 -0
package/template/wall-e/coding-orchestrator.js +45 -0
package/template/wall-e/docs/external-action-controller.md +60 -2
package/template/wall-e/external-action-controller.js +23 -1
package/template/wall-e/external-action-gateway.js +163 -0
package/template/wall-e/fly.toml +1 -0
package/template/wall-e/tools/local-tools.js +122 -4
package/template/website/index.html +2 -2

package/template/wall-e/chat.js CHANGED Viewed

@@ -31,6 +31,7 @@ const {
   inputForExternalActionEnvelope,
   reviewExternalAction,
 } = require('./external-action-controller');
+const { reviewExternalActionGateway } = require('./external-action-gateway');
 const { runShadow } = require('./eval/shadow');
 const {
   buildCodeReviewContextBlock,
@@ -984,7 +985,14 @@ function _externalActionLine(envelope = {}, result = {}) {
   const suffix = target ? `: ${target}` : '';
   if (result.alreadyExecuted) return `Already ${verb}${suffix}; I did not run it again.`;
   if (result.error) return `Failed to ${_externalActionVerb(envelope, 'present')}${suffix}: ${result.error}`;
-  return `Approved action ${verb}${suffix}.`;
+  if (_externalActionResultVerified(result)) return `Approved action ${verb} and verified${suffix}.`;
+  const verifyReason = result?.verification?.reason || result?.verification?.error || 'no read-after-write verification evidence was returned';
+  return `Approved action ${verb}${suffix}, but verification did not confirm it: ${verifyReason}`;
+}
+function _externalActionResultVerified(result = {}) {
+  if (!result || result.error || result.ok === false || result.success === false) return false;
+  return result.verified === true || result.verification?.ok === true || result.alreadyExecuted === true;
 }
 function _progressToolResultPayload(result, resultStr) {
@@ -1057,6 +1065,7 @@ async function _executeApprovedExternalActions({
     }
     const failed = !!(result && (result.error || result.ok === false || result.success === false));
+    const verified = _externalActionResultVerified(result || {});
     try {
       const confidence = require('./decision/confidence');
       if (envelope.domain && !result?.alreadyExecuted) confidence.recordAction(envelope.domain, !failed);
@@ -1071,7 +1080,9 @@ async function _executeApprovedExternalActions({
         ? 'Approved action was already executed'
         : failed
         ? 'Approved action failed'
-        : 'Approved action executed',
+        : verified
+        ? 'Approved action executed and verified'
+        : 'Approved action executed without verification',
       error: failed ? (result.error || 'External action failed') : null,
     });
     lines.push(_externalActionLine(envelope, result || {}));
@@ -2191,6 +2202,15 @@ async function chat(message, opts = {}) {
     name = normalizedCall.name;
     input = normalizedCall.input;
+    const gatewayReview = reviewExternalActionGateway({
+      toolName: name,
+      input,
+      userMessage: routingMessage || message,
+    });
+    if (!gatewayReview.admitted) {
+      return gatewayReview.result;
+    }
     // Eval hook: allow test harness to intercept tool calls with mock results
     if (typeof opts.toolInterceptor === 'function') {
       const intercepted = await opts.toolInterceptor(name, input);
@@ -2252,6 +2272,16 @@ async function chat(message, opts = {}) {
             args: name === 'run_shell' ? input.args : undefined,
             reason: perm.reason,
             source: perm.source,
+            approval_options: [
+              { id: 'allow_once', label: 'Yes', scope: 'this_tool_call' },
+              { id: 'allow_always', label: 'Always yes for this project/pattern', scope: 'project_permission_pattern' },
+              { id: 'deny', label: 'No', scope: 'this_tool_call' },
+            ],
+            approval_policy: {
+              kind: 'local_permission',
+              allow_always: true,
+              approval_scope: 'project_permission_pattern',
+            },
           });
           // Store resolver so server can call it when user responds
           if (!opts._permissionResolvers) opts._permissionResolvers = new Map();

package/template/wall-e/coding/stream-processor.js CHANGED Viewed

@@ -12,6 +12,7 @@ const { recoverAllowedTextToolCalls } = require('../llm/text-tool-calls');
 const { ArtifactStore } = require('./artifact-store');
 const { normalizeResponse, transformRequest, providerId } = require('./provider-transform');
 const { Confidence, EventName, FailureClass, Provenance, createLaneEvent } = require('./lane-events');
+const { reviewExternalActionGateway } = require('../external-action-gateway');
 async function* streamFromChat(provider, request) {
   const response = normalizeResponse(recoverAllowedTextToolCalls(await provider.chat(request), request.tools), {
@@ -345,6 +346,41 @@ class StreamProcessor extends EventEmitter {
         name: call.name,
         data: { input: call.input },
       });
+      const gatewayReview = reviewExternalActionGateway({
+        toolName: call.name,
+        input: call.input,
+      });
+      if (!gatewayReview.admitted) {
+        const result = gatewayReview.result;
+        state.toolResults.push({ toolCallId: call.id, name: call.name, result });
+        await this._runtimeItem(sessionId, cwd, {
+          threadId: state.threadId,
+          turnId: state.turnId,
+          itemId: call.id,
+          role: 'tool',
+          status: 'completed',
+          name: call.name,
+          data: { result },
+        });
+        await this._record(sessionId, cwd, 'tool', {
+          state: 'blocked',
+          toolCallId: call.id,
+          name: call.name,
+          input: call.input,
+          result,
+        });
+        await this._laneEvent({
+          name: EventName.TOOL_FINISHED,
+          sessionId,
+          cwd,
+          provider: state.provider,
+          model: state.model,
+          runId: state.messageId,
+          confidence: Confidence.HIGH,
+          data: { toolCallId: call.id, name: call.name, blocked: true, reason: result.reason },
+        });
+        return;
+      }
       if (this.permissionService?.authorize) {
         await this._record(sessionId, cwd, 'tool', {
           state: 'permission_check',

package/template/wall-e/coding-orchestrator.js CHANGED Viewed

@@ -565,6 +565,27 @@ function hasToolCall(toolCallHistory = [], names = new Set()) {
   return (toolCallHistory || []).some((call) => names.has(call.name));
 }
+function isVerificationToolCall(call = {}) {
+  const name = String(call.name || '');
+  const input = String(call.inputHash || JSON.stringify(call.input || {}));
+  if (name === 'browser_screenshot') return true;
+  if (name === 'run_shell') {
+    return /\b(?:test|spec|lint|build|typecheck|tsc|pytest|jest|mocha|vitest|playwright|node\s+--(?:test|check)|npm\s+(?:test|run)|pnpm\s+(?:test|run)|yarn\s+(?:test|run)|git\s+diff\s+--check)\b/i.test(input);
+  }
+  return /(?:test|verify|screenshot|diagnostic|lint|build)/i.test(name);
+}
+function hasVerificationEvidence(toolCallHistory = []) {
+  return (toolCallHistory || []).some(isVerificationToolCall);
+}
+function isVerificationBlockerResponse(content) {
+  const text = contentToText(content);
+  if (!text.trim()) return false;
+  return /\b(?:could not|couldn'?t|unable to|not able to|cannot)\b[\s\S]{0,120}\b(?:test|verify|build|run|execute)\b/i.test(text)
+    || /\b(?:tests?|verification|build)\b[\s\S]{0,120}\b(?:not run|not available|blocked|unavailable|missing)\b/i.test(text);
+}
 function stripPathLikeTokens(text) {
   return String(text || '').replace(/(?:^|[\s`'"(])((?:\.?[A-Za-z0-9_.@-]+\/)+[A-Za-z0-9_.@-]+)(?=[\s`'",):;.\]]|$)/g, ' ');
 }
@@ -610,6 +631,29 @@ function getNoActionContinuation({ prompt, content, toolCallHistory = [], mode,
   if (!isActionRequiredPrompt(prompt, { mode })) return null;
   const madeEdits = hasToolCall(toolCallHistory, EDIT_TOOL_NAMES);
+  if (madeEdits && !hasVerificationEvidence(toolCallHistory) && !isVerificationBlockerResponse(content)) {
+    const reason = 'The assistant made file changes but ended before running verification.';
+    if (!toolsAvailable) {
+      return {
+        action: 'fail',
+        reason: `${reason} No tool turns remain.`,
+      };
+    }
+    if (nudges >= maxNudges) {
+      return {
+        action: 'fail',
+        reason: `${reason} Verification continuation limit reached.`,
+      };
+    }
+    return {
+      action: 'continue',
+      reason,
+      message: `[SYSTEM] ${reason} This is not complete.\n` +
+        `Run the relevant verification now: tests, lint, build, typecheck, browser screenshot, or at minimum git diff --check when no project test exists.\n` +
+        `Only summarize success after a tool result proves the work. If verification is genuinely impossible, state the blocker with tool-backed evidence.\n` +
+        `Working directory: ${cwd}`,
+    };
+  }
   if (madeEdits) return null;
   if (isLegitimateNoEditResponse(content, toolCallHistory)) return null;
@@ -3294,6 +3338,7 @@ module.exports = {
   isActionRequiredPrompt,
   isPrematureActionResponse,
   getNoActionContinuation,
+  hasVerificationEvidence,
   subtaskRequiresFileChanges,
   screenshotTrackerHook,
   collectEmptyChangedFiles,

package/template/wall-e/docs/external-action-controller.md CHANGED Viewed

@@ -32,6 +32,38 @@ user confirmation, and the exact approved envelope is replayed back to Wall-E.
 Wall-E then executes the original payload directly rather than asking the model
 to recreate it.
+## Approval Tiers
+Wall-E uses two approval tiers:
+1. **Local permission approval** is for reversible project-local work such as
+   shell test commands, builds, and file edits. The UI may offer `Yes`,
+   `Always yes for this project/pattern`, and `No`. The persisted rule is scoped
+   to the project plus the permission pattern, never to vague model wording.
+2. **External action approval** is for real-world side effects: email, calendar,
+   Slack, SMS, reminders, and notifications. These approvals are exact-payload
+   approvals. The default choices are `Approve once` or `Cancel`; broad
+   `always yes` is intentionally disabled because the next payload may target a
+   different person, account, calendar, or time.
+This keeps the fast Claude Code-style flow for local coding work without giving
+models an ambient ability to send messages or schedule events.
+## Side-Effect Gateway
+All side-effecting routes pass through a host-side gateway before normal
+permission checks. The gateway blocks shell, AppleScript, and generic MCP calls
+that try to dispatch external actions directly, for example:
+- `osascript`/JXA creating Calendar events.
+- `gws calendar events insert`.
+- Gmail `messages.send` through shell, curl, or MCP.
+- Slack `chat.postMessage` through shell or MCP.
+The gateway returns a structured `external_action_gateway` tool result that
+tells the model which dedicated tool to use. The action is not executed, and the
+model is instructed not to claim success.
 Sequence:
 1. Wall-E blocks `mail_send`, `mail_reply`, `calendar_create`, and other external actions and
@@ -46,8 +78,9 @@ Sequence:
    to `chat()`.
 5. `chat()` validates that each approval id/hash still matches the reconstructed
    tool input, checks validation issues, applies a session-scoped idempotency
-   guard, executes the local tool, and emits normal `tool_call` / `tool_result`
-   progress events.
+   guard, executes the local tool, runs read-after-write verification where the
+   connector supports it, and emits normal `tool_call` / `tool_result` progress
+   events.
 This means provider wording is not part of the safety decision. DeepSeek,
 Anthropic, OpenAI, and other providers all use the same envelope replay path.
@@ -81,14 +114,39 @@ Anthropic, OpenAI, and other providers all use the same envelope replay path.
   approve mail/calendar side effects.
 - Approved envelopes are idempotent per Wall-E session and payload hash to avoid
   accidental duplicate sends from double-submit or retry.
+- Calendar approval envelopes preserve `account`, `source`, `calendarId`,
+  `calendar`, `location`, and time fields so replay cannot silently fall back to
+  a different provider or calendar.
+- Google Calendar creates verify with `calendar.events.get` against the same
+  account, calendar id, and event id before Wall-E can summarize the action as
+  verified.
+- Gmail sends and replies verify with `gmail.messages.get`. Replies keep the
+  original message id separately from the sent reply id so thread evidence is
+  not confused with source evidence.
+- Final summary guards treat `sent` or `created` without `verified` evidence as
+  incomplete. Shell stdout, AppleScript UIDs, or model prose do not count as
+  external-action completion evidence.
+## Coding-Agent Completion Contract
+Wall-E coding sessions use the same evidence rule. If an action-oriented coding
+prompt caused file edits, the agent must run a relevant verification tool before
+ending with a success summary. Accepted evidence includes tests, lint, build,
+typecheck, Playwright/browser screenshots for UI work, or `git diff --check`
+when no project-specific verifier exists. If verification is impossible, the
+agent must say so with tool-backed evidence instead of claiming success.
 ## Tests
 Focused regressions:
 - `claude-task-manager/tests/walle-permission-policy.test.js`
+- `claude-task-manager/tests/walle-external-actions.test.js`
 - `wall-e/tests/external-action-controller.test.js`
+- `wall-e/tests/external-action-gateway.test.js`
 - `wall-e/tests/local-tools-gws-live-files.test.js`
+- `wall-e/tests/coding-orchestrator.test.js`
+- `wall-e/tests/coding-stream-processor.test.js`
 - `wall-e/tests/execution-trace.test.js`
 - `wall-e/tests/chat.test.js` with `stages a draft email`

package/template/wall-e/external-action-controller.js CHANGED Viewed

@@ -133,7 +133,13 @@ function targetForTool(toolName, input = {}) {
     return { channel: input.channel_name || input.channel || null };
   }
   if (toolName === 'calendar_create') {
-    return { calendar: input.calendar || null, attendees: normalizeAddressList(input.attendees) };
+    return {
+      calendar: input.calendar || input.calendar_name || input.calendarId || input.calendar_id || null,
+      calendarId: input.calendarId || input.calendar_id || null,
+      account: input.account || null,
+      source: input.source || input.provider || null,
+      attendees: normalizeAddressList(input.attendees),
+    };
   }
   if (toolName === 'reminder_create') {
     return { list: input.list || null };
@@ -163,6 +169,7 @@ function payloadForTool(toolName, input = {}) {
       title: input.title || '',
       start: calendarStart(input),
       end: calendarEnd(input),
+      location: input.location || '',
       notes: input.notes || '',
     };
   }
@@ -208,10 +215,14 @@ function inputForExternalActionEnvelope(envelope = {}) {
   if (toolName === 'calendar_create') {
     return {
       calendar: target.calendar || null,
+      calendarId: target.calendarId || null,
+      account: actor.account || target.account || null,
+      source: target.source || null,
       attendees: normalizeAddressList(target.attendees),
       title: payload.title || '',
       start_date: payload.start || null,
       end_date: payload.end || null,
+      location: payload.location || '',
       notes: payload.notes || '',
     };
   }
@@ -315,6 +326,17 @@ function buildBlockedToolResult(envelope, decision) {
       ? 'External action was staged because the user asked for prepared content, not dispatch.'
       : 'External action requires validation and explicit confirmation before execution.',
     action: envelope,
+    approval_options: isPreview
+      ? []
+      : [
+        { id: 'approve_once', label: 'Approve once', scope: 'exact_action_payload' },
+        { id: 'cancel', label: 'Cancel', scope: 'exact_action_payload' },
+      ],
+    approval_policy: {
+      kind: 'external_action',
+      allow_always: false,
+      approval_scope: 'exact_action_payload',
+    },
     draft: envelope.domain === 'email'
       ? {
         to: envelope.toolName === 'mail_reply' ? 'derived_from_original_message' : envelope.target.to,

package/template/wall-e/external-action-gateway.js ADDED Viewed

@@ -0,0 +1,163 @@
+'use strict';
+const SHELL_SIDE_EFFECT_RULES = [
+  {
+    domain: 'calendar',
+    operation: 'create_event',
+    recommendedTool: 'calendar_create',
+    reason: 'Calendar event creation must use calendar_create so account, calendar, approval, and verification are tracked.',
+    patterns: [
+      /\bgws\b[\s\S]*\bcalendar\b[\s\S]*\bevents?\b[\s\S]*\binsert\b/i,
+      /calendar\.googleapis\.com[\s\S]*\/events/i,
+      /osascript[\s\S]*(?:tell\s+application\s+"Calendar"|Application\(["']Calendar["']\))[\s\S]*(?:make\s+new\s+event|events\.push|new\s+event\s+with\s+properties)/i,
+      /(?:tell\s+application\s+"Calendar"|Application\(["']Calendar["']\))[\s\S]*(?:make\s+new\s+event|events\.push|new\s+event\s+with\s+properties)/i,
+    ],
+  },
+  {
+    domain: 'email',
+    operation: 'send_email',
+    recommendedTool: 'mail_send or mail_reply',
+    reason: 'Email dispatch must use mail_send or mail_reply so recipients, threading, approval, and sent-mail verification are tracked.',
+    patterns: [
+      /\bgws\b[\s\S]*\bgmail\b[\s\S]*(?:\+send|messages\s+send|users\s+messages\s+send)\b/i,
+      /gmail\.googleapis\.com[\s\S]*\/messages\/send\b/i,
+      /osascript[\s\S]*(?:tell\s+application\s+"Mail"|Application\(["']Mail["']\))[\s\S]*(?:make\s+new\s+outgoing\s+message|send\b)/i,
+      /(?:tell\s+application\s+"Mail"|Application\(["']Mail["']\))[\s\S]*(?:make\s+new\s+outgoing\s+message|send\b)/i,
+    ],
+  },
+  {
+    domain: 'slack',
+    operation: 'send_message',
+    recommendedTool: 'slack_send_message',
+    reason: 'Slack message dispatch must use slack_send_message so channel, approval, and execution evidence are tracked.',
+    patterns: [
+      /slack\.com\/api\/chat\.postMessage/i,
+      /\bslack\b[\s\S]*\bchat\.postMessage\b/i,
+    ],
+  },
+  {
+    domain: 'reminder',
+    operation: 'create_reminder',
+    recommendedTool: 'reminder_create',
+    reason: 'Reminder creation must use reminder_create so approval and execution evidence are tracked.',
+    patterns: [
+      /osascript[\s\S]*(?:tell\s+application\s+"Reminders"|Application\(["']Reminders["']\))[\s\S]*(?:make\s+new\s+reminder|new\s+reminder)/i,
+      /(?:tell\s+application\s+"Reminders"|Application\(["']Reminders["']\))[\s\S]*(?:make\s+new\s+reminder|new\s+reminder)/i,
+    ],
+  },
+];
+const MCP_SIDE_EFFECT_RULES = [
+  {
+    domain: 'calendar',
+    operation: 'create_event',
+    recommendedTool: 'calendar_create',
+    pattern: /(?:calendar|google[-_ ]?calendar).*?(?:create|insert|add|schedule)|(?:create|insert|add|schedule).*?(?:calendar|event)/i,
+  },
+  {
+    domain: 'email',
+    operation: 'send_email',
+    recommendedTool: 'mail_send or mail_reply',
+    pattern: /(?:gmail|mail|email).*?(?:send|reply|respond)|(?:send|reply|respond).*?(?:gmail|mail|email)/i,
+  },
+  {
+    domain: 'slack',
+    operation: 'send_message',
+    recommendedTool: 'slack_send_message',
+    pattern: /(?:slack).*?(?:send|post|message)|(?:send|post).*?(?:slack|channel)/i,
+  },
+];
+function commandText(input = {}) {
+  if (!input || typeof input !== 'object') return '';
+  if (typeof input.command === 'string') {
+    const args = Array.isArray(input.args) && input.args.length ? ` ${input.args.join(' ')}` : '';
+    return `${input.command}${args}`;
+  }
+  if (typeof input.script === 'string') return input.script;
+  return '';
+}
+function classifyExternalActionBypass(toolName, input = {}) {
+  const name = String(toolName || '').trim();
+  if (name === 'run_shell' || name === 'shell' || name === 'bash' || name === 'terminal' || name === 'applescript') {
+    const text = commandText(input);
+    if (!text) return null;
+    for (const rule of SHELL_SIDE_EFFECT_RULES) {
+      if (rule.patterns.some((pattern) => pattern.test(text))) {
+        return {
+          toolName: name,
+          domain: rule.domain,
+          operation: rule.operation,
+          recommendedTool: rule.recommendedTool,
+          reason: rule.reason,
+          evidence: text.slice(0, 500),
+        };
+      }
+    }
+  }
+  if (name === 'mcp_call' || name.startsWith('mcp__')) {
+    const callName = name === 'mcp_call'
+      ? `${input.server || ''}.${input.tool || ''}`
+      : name;
+    const text = `${callName} ${JSON.stringify(input.arguments || input.args || {})}`.slice(0, 1000);
+    for (const rule of MCP_SIDE_EFFECT_RULES) {
+      if (rule.pattern.test(text)) {
+        return {
+          toolName: name,
+          domain: rule.domain,
+          operation: rule.operation,
+          recommendedTool: rule.recommendedTool,
+          reason: `MCP side-effect tools must be routed through ${rule.recommendedTool} so approval and verification are tracked.`,
+          evidence: text.slice(0, 500),
+        };
+      }
+    }
+  }
+  return null;
+}
+function buildGatewayBlockedResult(finding) {
+  return {
+    external_action: true,
+    external_action_gateway: true,
+    blocked: true,
+    executed: false,
+    verified: false,
+    decision: 'dedicated_tool_required',
+    domain: finding.domain,
+    operation: finding.operation,
+    original_tool: finding.toolName,
+    recommended_tool: finding.recommendedTool,
+    reason: finding.reason,
+    evidence: finding.evidence,
+    approval_policy: {
+      kind: 'external_action',
+      allow_always: false,
+      approval_scope: 'exact_action_payload',
+    },
+    model_instruction: [
+      `Do not retry this ${finding.domain} side effect through ${finding.toolName}.`,
+      `Use ${finding.recommendedTool} instead so Wall-E can stage an exact approval envelope, execute the approved payload, and verify the result before summarizing success.`,
+      'Do not claim this action was executed.',
+    ].join(' '),
+  };
+}
+function reviewExternalActionGateway({ toolName, input = {} } = {}) {
+  const finding = classifyExternalActionBypass(toolName, input);
+  if (!finding) return { admitted: true };
+  return {
+    admitted: false,
+    finding,
+    result: buildGatewayBlockedResult(finding),
+  };
+}
+module.exports = {
+  buildGatewayBlockedResult,
+  classifyExternalActionBypass,
+  reviewExternalActionGateway,
+};

package/template/wall-e/fly.toml CHANGED Viewed

@@ -20,6 +20,7 @@ primary_region = "sjc"  # Change to your nearest region: https://fly.io/docs/ref
   WALL_E_PORT = "3457"
   NODE_ENV = "production"
   WALLE_TELEMETRY_SERVER = "1"
+  WALLE_TELEMETRY_CLEANUP_ENABLED = "1"
 [[vm]]
   size = "shared-cpu-1x"