npm - agent-tempo - Versions diffs - 1.7.0-beta.3 → 1.7.0-beta.5 - Mend

agent-tempo 1.7.0-beta.3 → 1.7.0-beta.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dashboard/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "agent-tempo-dashboard",
   "private": true,
-  "version": "1.7.0-beta.3",
+  "version": "1.7.0-beta.5",
   "type": "module",
   "description": "Web dashboard for agent-tempo. Bundled into the npm package; served by the daemon at /dashboard/*.",
   "scripts": {

package/dist/pi/headless.d.ts CHANGED Viewed

@@ -38,42 +38,37 @@ export interface RunHeadlessPiOptions {
     continueSessionId?: string;
 }
 /**
- * Build the `DefaultResourceLoader` options for a headless Pi player.
+ * #715 — compute the registration-level `excludeTools` denylist for
+ * `createAgentSession`. Excluded tools are never registered → ABSENT from the
+ * model's toolset AND system prompt: the LLM cannot request what it never sees.
  *
- * SECURITY — S2 (MD-C deny-list soundness). The `restricted` tool gate is a
- * DENY-LIST over shell/exec tool *names* (tool-capability.ts EXEC_TOOLS, via
- * `classify(name) === 'exec'` — F1 replaced extension.ts's former local set). That
- * guarantee — "restricted = no host execution" — holds ONLY IF no third-party
- * extension can register an un-blacklisted execution tool (e.g. a custom
- * `python` / `npm` / `run` tool). It therefore depends on a hard structural
- * fact: which extensions Pi loads.
+ * This is a registration-level FLOOR beneath the call-time MD-C handler + #712
+ * gate. It is tamper-RESISTANT, NOT tamper-PROOF: it defends a PROMPT-INJECTED
+ * agent (and holds even if the call-time gate had a bug — the tool simply isn't
+ * there), but it does NOT defend against PROCESS COMPROMISE — a tampered /
+ * modified extension can re-register or un-exclude tools (this is OUR code
+ * passing a denylist; an attacker who modifies the code/process bypasses it).
+ * That residual is OS-sandbox + supply-chain integrity, tracked as #724.
  *
- * Verified against the installed Pi SDK 0.78 source (NOT assumed):
- *   - `DefaultResourceLoader.reload()` (resource-loader.js:271-276) builds
- *     `extensionPaths = noExtensions ? cliEnabledExtensions
- *                                     : merge(cliEnabledExtensions, enabledExtensions)`
- *     where `enabledExtensions` (line 229) are the DISK/package extensions from
- *     `packageManager.resolve()` (`~/.pi/agent/extensions/`, `<cwd>/.pi/extensions/`,
- *     installed packages). `loadExtensions(extensionPaths)` then loads them and
- *     MERGES with our inline factories (lines 274-276).
- *   - `noExtensions` defaults to `false` (constructor, line 132) — so the naive
- *     loader DOES load disk extensions. That is the S2 gap.
+ * `excludeTools` is matched by NAME against BOTH Pi built-ins AND
+ * extension-registered tools (incl. agent-tempo's MCP tools via `renderToPi`), so
+ * this list contains ONLY Pi-built-in / exec names — never agent-tempo tool names
+ * (`cue`/`report`/`recruit`/…). Posture:
+ *   - `toolAccess === 'restricted'` → exclude {@link EXEC_TOOLS} (exec/bash
+ *     registration-absent; a strict upgrade of the prior call-time block, and the
+ *     headless default → the model never even sees exec).
+ *   - `guardrailPolicy === 'observe-only'` → also exclude the Pi built-in act
+ *     tools ({@link PI_BUILTIN_ACT_TOOLS}); read/grep/glob stay. The agent-tempo
+ *     MCP act tools (recruit/destroy/…) stay covered by the client-side no-act
+ *     handler (commit 5) — excludeTools handles the Pi built-ins only.
+ *   - `monitored` / `supervised` / `autonomous` → NO exec exclusion: those tools
+ *     stay REGISTERED so they can be gated/approved per-use (#712). (`supervised`
+ *     = approve-and-run, NOT exec-absent.)
  *
- * Fix (= security's "exclude the extensions dir", done structurally):
- *   - `noExtensions: true` → `extensionPaths` collapses to `cliEnabledExtensions`,
- *     which is empty because we pass NO `additionalExtensionPaths`. So
- *     `loadExtensions([])` registers nothing from disk/packages.
- *   - Inline `extensionFactories` load UNCONDITIONALLY (reload() line 275 is not
- *     gated by `noExtensions`), so our agent-tempo extension still attaches.
- * Net: the ONLY tools present are Pi's built-ins (bash/read/edit/write/grep —
- * all covered by the deny-list) + our agent-tempo MCP tools (no exec). No
- * third-party tool can slip past the deny-list. Skills/prompts/themes cannot
- * register tools, so they are not a vector and are left at defaults.
- *
- * Kept as a pure, exported helper so the `noExtensions: true` invariant has a
- * unit regression test (test/pi-headless-loader.test.ts) without needing the Pi
- * SDK installed.
+ * Pure + exported so the registration-absence invariant has a unit regression
+ * test without the Pi SDK (mirrors {@link buildPiResourceLoaderOptions}).
  */
+export declare function computeExcludeTools(toolAccess: PiToolAccess, guardrailPolicy: GuardrailPolicy | undefined): string[];
 export declare function buildPiResourceLoaderOptions(params: {
     cwd: string;
     agentDir: string;

package/dist/pi/headless.js CHANGED Viewed

@@ -1,5 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
+exports.computeExcludeTools = computeExcludeTools;
 exports.buildPiResourceLoaderOptions = buildPiResourceLoaderOptions;
 exports.runHeadlessPi = runHeadlessPi;
 /**
@@ -31,6 +32,7 @@ exports.runHeadlessPi = runHeadlessPi;
 const config_1 = require("../config");
 const sdk_probe_1 = require("../utils/sdk-probe");
 const extension_1 = require("./extension");
+const tool_capability_1 = require("../security/tool-capability");
 const probe_1 = require("./probe");
 const session_seed_1 = require("./session-seed");
 const log = (...args) => {
@@ -114,6 +116,61 @@ async function resolveModel(modelStr) {
  * unit regression test (test/pi-headless-loader.test.ts) without needing the Pi
  * SDK installed.
  */
+/**
+ * Pi BUILT-IN mutating ("act") tool names, excluded at registration for the
+ * `observe-only` no-act posture (#715). Pi's default built-ins are
+ * read/bash/edit/write; `multiedit` is listed defensively (a no-op if Pi doesn't
+ * register it). `bash`/exec are covered by {@link EXEC_TOOLS}. We keep the READ
+ * built-ins (read/grep/glob/ls). These are Pi BUILT-IN names — deliberately NOT
+ * agent-tempo MCP tool names, so excluding them never removes an agent-tempo
+ * coordination tool (those stay handler-gated, commit 5).
+ */
+const PI_BUILTIN_ACT_TOOLS = ['write', 'edit', 'multiedit'];
+/**
+ * #715 — compute the registration-level `excludeTools` denylist for
+ * `createAgentSession`. Excluded tools are never registered → ABSENT from the
+ * model's toolset AND system prompt: the LLM cannot request what it never sees.
+ *
+ * This is a registration-level FLOOR beneath the call-time MD-C handler + #712
+ * gate. It is tamper-RESISTANT, NOT tamper-PROOF: it defends a PROMPT-INJECTED
+ * agent (and holds even if the call-time gate had a bug — the tool simply isn't
+ * there), but it does NOT defend against PROCESS COMPROMISE — a tampered /
+ * modified extension can re-register or un-exclude tools (this is OUR code
+ * passing a denylist; an attacker who modifies the code/process bypasses it).
+ * That residual is OS-sandbox + supply-chain integrity, tracked as #724.
+ *
+ * `excludeTools` is matched by NAME against BOTH Pi built-ins AND
+ * extension-registered tools (incl. agent-tempo's MCP tools via `renderToPi`), so
+ * this list contains ONLY Pi-built-in / exec names — never agent-tempo tool names
+ * (`cue`/`report`/`recruit`/…). Posture:
+ *   - `toolAccess === 'restricted'` → exclude {@link EXEC_TOOLS} (exec/bash
+ *     registration-absent; a strict upgrade of the prior call-time block, and the
+ *     headless default → the model never even sees exec).
+ *   - `guardrailPolicy === 'observe-only'` → also exclude the Pi built-in act
+ *     tools ({@link PI_BUILTIN_ACT_TOOLS}); read/grep/glob stay. The agent-tempo
+ *     MCP act tools (recruit/destroy/…) stay covered by the client-side no-act
+ *     handler (commit 5) — excludeTools handles the Pi built-ins only.
+ *   - `monitored` / `supervised` / `autonomous` → NO exec exclusion: those tools
+ *     stay REGISTERED so they can be gated/approved per-use (#712). (`supervised`
+ *     = approve-and-run, NOT exec-absent.)
+ *
+ * Pure + exported so the registration-absence invariant has a unit regression
+ * test without the Pi SDK (mirrors {@link buildPiResourceLoaderOptions}).
+ */
+function computeExcludeTools(toolAccess, guardrailPolicy) {
+    const exclude = new Set();
+    if (toolAccess === 'restricted') {
+        for (const t of tool_capability_1.EXEC_TOOLS)
+            exclude.add(t);
+    }
+    if (guardrailPolicy === 'observe-only') {
+        for (const t of tool_capability_1.EXEC_TOOLS)
+            exclude.add(t); // no-act ⊇ no-exec
+        for (const t of PI_BUILTIN_ACT_TOOLS)
+            exclude.add(t);
+    }
+    return [...exclude];
+}
 function buildPiResourceLoaderOptions(params) {
     return {
         cwd: params.cwd,
@@ -192,10 +249,20 @@ async function runHeadlessPi(opts = {}) {
     // heartbeat). The SDK's own doc comment (sdk.js:74-83) prescribes this exact
     // construct → reload() → pass-as-resourceLoader sequence.
     await resourceLoader.reload();
+    // #715 — registration-level exec/act exclusion (the true "agent physically
+    // lacks the tools" boundary; see computeExcludeTools). Excluded tools are never
+    // registered, so they're absent from the model's toolset + system prompt — a
+    // hard layer beyond the call-time MD-C handler + #712 gate (kept as
+    // belt-and-suspenders). Empty for monitored/supervised/autonomous+standard.
+    const excludeTools = computeExcludeTools(toolAccess, opts.guardrailPolicy);
+    if (excludeTools.length > 0) {
+        log(`#715: excluding ${excludeTools.length} tool(s) at registration (toolAccess=${toolAccess}, guardrailPolicy=${opts.guardrailPolicy ?? 'autonomous'}): ${excludeTools.join(', ')}`);
+    }
     const { session } = await createAgentSession({
         cwd: process.cwd(),
         agentDir,
         ...(model ? { model } : {}),
+        ...(excludeTools.length > 0 ? { excludeTools } : {}),
         resourceLoader,
         // H1 (#645): in-memory session (seeded above via the session-seed chokepoint).
         // H2 will seed it from agent-tempo durable state (ENV.PI_CONTINUE_SESSION

package/dist/types.d.ts CHANGED Viewed

@@ -238,24 +238,39 @@ export interface SessionMetadata {
      * the real posture on EVERY attach (across restart / migrate / re-attach), so
      * a previously-`supervised` agent stays supervised. (tempo-architect ruling.)
      *
-     * **★ Enforcement scope (#712 — claims EXACTLY this, no more).** The daemon
-     * enforces fail-closed for a `supervised` player's gate ENGAGEMENTS: the gate's
-     * `open()` cross-check derives the `failMode` from THIS durable policy (read by
-     * the daemon, populated at spawn + lazily resolved on a cache-miss), so the
-     * `failMode` an engaging agent stamps on its `gate_pending` frame can't
-     * self-downgrade a supervised player out of fail-closed (a `'open'` claim against
-     * a `supervised` policy is overridden to `closed` and a `failmode-override`
-     * audit record is written). This is **NOT a hard boundary** against an agent
-     * that SKIPS gate-engagement entirely (runs the tool without emitting a
-     * `gate_pending` frame) or bypasses the client-side MD-C exec-block — a true
-     * boundary requires spawn-time tool-restriction (the agent physically lacks the
-     * tools), tracked separately in **#715**.
+     * **★ Enforcement scope (#712/#715).** `supervised` is the daemon-enforced
+     * approval boundary for the realistic threat: a prompt-injected agent. A
+     * manipulated LLM can only *emit* tool-call requests — Pi routes every one to
+     * agent-tempo's `tool_call` handler, which engages the gate (non-`low-risk`;
+     * #712 daemon-computes `failMode` from this durable policy, falling `closed` on
+     * any lookup failure — no-fail-open, so an engaging agent can't self-downgrade).
+     * The agent **cannot** skip the gate — it doesn't control the hook.
+     *
+     * **#715 adds a registration-level floor.** For `toolAccess: 'restricted'` (and
+     * `observe-only`'s act tools) the exec/act tools are EXCLUDED at
+     * `createAgentSession` (`excludeTools`) → **absent** from the model's toolset and
+     * system prompt entirely; the LLM cannot request what it never sees. That is
+     * stronger than a call-time block — it holds even if the call-time gate had a bug
+     * (the tool simply isn't there). `supervised` with exec present keeps exec
+     * **present + gated** (approve-per-use), so this floor applies to the exec/no-act
+     * postures, not to a `supervised`+`standard` player.
+     *
+     * **Residual (all postures): process compromise** — code execution *inside* the
+     * Pi process (in-process syscalls; host RCE bypassing the handler), OR a
+     * tampered / modified extension that un-excludes or re-registers tools.
+     * `excludeTools` is OUR code passing a denylist; an attacker who modifies that
+     * code or the process bypasses it. The only defense is OS-level sandboxing +
+     * supply-chain integrity, a separate future `'sandboxed'` posture (#724). So:
+     * **tamper-RESISTANT** vs prompt-injection + an honest gate bug; **NOT
+     * tamper-PROOF** vs a compromised process. Against prompt-injection — the
+     * realistic threat — it **is** a real enforcement boundary; #724 is not a gap in
+     * that scope.
      *
      * **Post-restart window:** on daemon restart the in-memory ingest tokens are
      * invalidated, so existing players' gate engagements are rejected (403) until a
      * re-spawn re-mints. In that window a `supervised` player's gate-client
-     * fail-closes on its own derived deadline (client-side safety holds), but the
-     * gate is NOT daemon-mediated — the #715 client-cooperative residual.
+     * fail-closes on its own derived deadline (client-side safety holds, not
+     * daemon-mediated) — same process-compromise residual, not a distinct gap.
      */
     guardrailPolicy?: GuardrailPolicy;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-tempo",
-  "version": "1.7.0-beta.3",
+  "version": "1.7.0-beta.5",
   "description": "Many agents, one tempo. Durable coordination for multi-agent work via Temporal.",
   "keywords": [
     "mcp",
@@ -72,12 +72,12 @@
     "copilot-bridge": "ts-node src/adapters/copilot/adapter.ts",
     "clean:test": "node -e \"require('fs').rmSync('dist-test',{recursive:true,force:true})\"",
     "build:test": "npm run clean:test && tsc -p test/tsconfig.json",
-    "pretest": "npm run build:test",
+    "pretest": "node scripts/check-bundle-present.js && npm run build:test",
     "test:tui": "vitest run",
     "test:conformance": "npm run build:test && mocha --config .mocharc.conformance.yml",
-    "pretest:shard-1": "npm run build:test && npm run build:scripts",
+    "pretest:shard-1": "node scripts/check-bundle-present.js && npm run build:test && npm run build:scripts",
     "test:shard-1": "node dist/scripts/run-shard.js 1",
-    "pretest:shard-2": "npm run build:test && npm run build:scripts",
+    "pretest:shard-2": "node scripts/check-bundle-present.js && npm run build:test && npm run build:scripts",
     "test:shard-2": "node dist/scripts/run-shard.js 2",
     "test": "mocha && vitest run",
     "lint:surface-drift": "node scripts/check-surface-drift.js",