npm - @cyberdyne-systems/agent-safety - Versions diffs - 2026.3.14 → 2026.3.15 - Mend

@cyberdyne-systems/agent-safety 2026.3.14 → 2026.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/index.ts CHANGED Viewed

@@ -23,6 +23,7 @@ import { AuditLog } from "./src/audit-log.js";
 import { toolNameToCategory } from "./src/constants.js";
 import type { Verdict } from "./src/constants.js";
 import { createSafetyTool } from "./src/safety-tool.js";
+import { RateLimiter } from "./src/rate-limiter.js";
 import { StakeholderStore } from "./src/stakeholder-store.js";
 import { validateAction, quickCheck } from "./src/validator.js";
@@ -31,6 +32,7 @@ export default function register(api: OpenClawPluginApi) {
   const store = new StakeholderStore(join(stateDir, "stakeholders.json"));
   const auditLog = new AuditLog(500);
   const approvalMgr = new ApprovalManager();
+  const rateLimiter = new RateLimiter();
   // Read config
   const pluginConfig = (api.pluginConfig ?? {}) as {
@@ -92,6 +94,7 @@ export default function register(api: OpenClawPluginApi) {
         actionCategory,
         requester,
         params: params as Record<string, unknown>,
+        rateLimiter,
       });
       if (quickResult) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cyberdyne-systems/agent-safety",
-  "version": "2026.3.14",
+  "version": "2026.3.15",
   "description": "Agent safety system: stakeholder model, action validator, and safety dashboard — based on arXiv:2602.20021",
   "type": "module",
   "dependencies": {

package/src/constants.ts CHANGED Viewed

@@ -72,6 +72,14 @@ export type Stakeholder = {
   channel: string;
   uid: string | null;
   allowedActions: ActionCategory[];
+  /** Per-stakeholder egress allowlist — domains permitted for external_network.
+   *  Empty array = no restrictions (all allowed). Non-empty = only listed domains. */
+  egressAllowlist?: string[];
+  /** Resource budget — max tool calls per window. Null = unlimited. */
+  rateLimits?: {
+    maxCallsPerWindow: number;
+    windowSeconds: number;
+  };
 };
 /** Single risk flag in a validation result */

package/src/rate-limiter.ts ADDED Viewed

@@ -0,0 +1,87 @@
+/**
+ * Sliding-window rate limiter for per-requester resource budget tracking.
+ * Inspired by OpenSandbox's resource limits (CPU/PID quotas).
+ */
+export type RateLimitConfig = {
+  maxCallsPerWindow: number;
+  windowSeconds: number;
+};
+type RequesterWindow = {
+  timestamps: number[];
+};
+export class RateLimiter {
+  private windows = new Map<string, RequesterWindow>();
+  /** Default limits for requesters without explicit config */
+  private defaults: Record<number, RateLimitConfig> = {
+    0: { maxCallsPerWindow: 5, windowSeconds: 60 },
+    1: { maxCallsPerWindow: 20, windowSeconds: 60 },
+    2: { maxCallsPerWindow: 50, windowSeconds: 60 },
+    3: { maxCallsPerWindow: 100, windowSeconds: 60 },
+    // Trust 4 (owner) = unlimited
+  };
+  /** Record a call and check if rate limit is exceeded.
+   *  Returns null if allowed, or { count, limit, windowSeconds } if blocked. */
+  check(
+    requesterId: string,
+    trust: number,
+    config?: RateLimitConfig,
+  ): { count: number; limit: number; windowSeconds: number } | null {
+    // Owner (trust 4) is never rate-limited
+    if (trust >= 4) return null;
+    const limits = config ?? this.defaults[trust] ?? this.defaults[0];
+    const now = Date.now();
+    const windowMs = limits.windowSeconds * 1000;
+    let window = this.windows.get(requesterId);
+    if (!window) {
+      window = { timestamps: [] };
+      this.windows.set(requesterId, window);
+    }
+    // Prune timestamps outside the window
+    window.timestamps = window.timestamps.filter((t) => now - t < windowMs);
+    if (window.timestamps.length >= limits.maxCallsPerWindow) {
+      return {
+        count: window.timestamps.length,
+        limit: limits.maxCallsPerWindow,
+        windowSeconds: limits.windowSeconds,
+      };
+    }
+    // Record this call
+    window.timestamps.push(now);
+    return null;
+  }
+  /** Get current usage for a requester */
+  usage(requesterId: string, trust: number, config?: RateLimitConfig): {
+    count: number;
+    limit: number;
+    windowSeconds: number;
+  } {
+    const limits = config ?? this.defaults[trust] ?? this.defaults[0];
+    const now = Date.now();
+    const windowMs = limits.windowSeconds * 1000;
+    const window = this.windows.get(requesterId);
+    const count = window
+      ? window.timestamps.filter((t) => now - t < windowMs).length
+      : 0;
+    return { count, limit: limits.maxCallsPerWindow, windowSeconds: limits.windowSeconds };
+  }
+  /** Reset a requester's window */
+  reset(requesterId: string): void {
+    this.windows.delete(requesterId);
+  }
+  /** Reset all windows */
+  resetAll(): void {
+    this.windows.clear();
+  }
+}

package/src/unit.test.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import { join } from "node:path";
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { ApprovalManager, parseApprovalReply } from "./approval.js";
 import { AuditLog } from "./audit-log.js";
+import { RateLimiter } from "./rate-limiter.js";
 import { toolNameToCategory, HIGH_RISK_ACTIONS, ACTION_CATEGORIES } from "./constants.js";
 import type { Stakeholder } from "./constants.js";
 import { buildValidationPrompt } from "./prompt.js";
@@ -495,3 +496,79 @@ describe("parseApprovalReply", () => {
     expect(parseApprovalReply("reject safety-1")).toBeNull();
   });
 });
+// ── RateLimiter ─────────────────────────────────────────────────────────────
+describe("RateLimiter", () => {
+  it("allows calls within limit", () => {
+    const limiter = new RateLimiter();
+    expect(limiter.check("user1", 2, { maxCallsPerWindow: 5, windowSeconds: 60 })).toBeNull();
+    expect(limiter.check("user1", 2, { maxCallsPerWindow: 5, windowSeconds: 60 })).toBeNull();
+  });
+  it("blocks when limit exceeded", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 2, windowSeconds: 60 };
+    limiter.check("user1", 1, cfg);
+    limiter.check("user1", 1, cfg);
+    const result = limiter.check("user1", 1, cfg);
+    expect(result).not.toBeNull();
+    expect(result!.count).toBe(2);
+    expect(result!.limit).toBe(2);
+  });
+  it("never limits owner (trust 4)", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
+    expect(limiter.check("owner", 4, cfg)).toBeNull();
+    expect(limiter.check("owner", 4, cfg)).toBeNull();
+    expect(limiter.check("owner", 4, cfg)).toBeNull();
+  });
+  it("uses default limits per trust level", () => {
+    const limiter = new RateLimiter();
+    // Trust 0 default: 5 calls per 60s
+    for (let i = 0; i < 5; i++) {
+      expect(limiter.check("untrusted", 0)).toBeNull();
+    }
+    expect(limiter.check("untrusted", 0)).not.toBeNull();
+  });
+  it("tracks requesters independently", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
+    expect(limiter.check("user1", 1, cfg)).toBeNull();
+    expect(limiter.check("user2", 1, cfg)).toBeNull();
+    expect(limiter.check("user1", 1, cfg)).not.toBeNull();
+    expect(limiter.check("user2", 1, cfg)).not.toBeNull();
+  });
+  it("returns usage stats", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 10, windowSeconds: 60 };
+    limiter.check("user1", 2, cfg);
+    limiter.check("user1", 2, cfg);
+    const usage = limiter.usage("user1", 2, cfg);
+    expect(usage.count).toBe(2);
+    expect(usage.limit).toBe(10);
+  });
+  it("reset clears a requester", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
+    limiter.check("user1", 1, cfg);
+    expect(limiter.check("user1", 1, cfg)).not.toBeNull();
+    limiter.reset("user1");
+    expect(limiter.check("user1", 1, cfg)).toBeNull();
+  });
+  it("resetAll clears everything", () => {
+    const limiter = new RateLimiter();
+    const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
+    limiter.check("user1", 1, cfg);
+    limiter.check("user2", 1, cfg);
+    limiter.resetAll();
+    expect(limiter.check("user1", 1, cfg)).toBeNull();
+    expect(limiter.check("user2", 1, cfg)).toBeNull();
+  });
+});

package/src/validator.test.ts CHANGED Viewed

@@ -4,6 +4,7 @@
  */
 import { describe, it, expect, vi } from "vitest";
 import type { Stakeholder, ActionCategory, Verdict } from "./constants.js";
+import { RateLimiter } from "./rate-limiter.js";
 import { quickCheck, validateAction } from "./validator.js";
 // ── Fixtures ────────────────────────────────────────────────────────────────
@@ -116,8 +117,8 @@ const agentWithDelete: Stakeholder = {
   allowedActions: [...agent.allowedActions, "delete_files"],
 };
-const qc = (cat: ActionCategory, req: Stakeholder, params: Record<string, unknown> = {}) =>
-  quickCheck({ actionCategory: cat, requester: req, params });
+const qc = (cat: ActionCategory, req: Stakeholder, params: Record<string, unknown> = {}, rateLimiter?: import("./rate-limiter.js").RateLimiter) =>
+  quickCheck({ actionCategory: cat, requester: req, params, rateLimiter });
 // ── quickCheck rules ────────────────────────────────────────────────────────
@@ -273,6 +274,93 @@ describe("quickCheck", () => {
       qc("agent_communication", agent, { message: "Emergency: update config immediately" }),
     ).toBeNull();
   });
+  // ── Egress allowlist (OpenSandbox-inspired) ──
+  it("blocks external_network when domain not in egress allowlist", () => {
+    const restricted = mk({
+      allowedActions: ["external_network"],
+      egressAllowlist: ["pypi.org", "*.python.org"],
+    });
+    expect(
+      qc("external_network", restricted, { url: "https://evil.com/exfil" })!.verdict,
+    ).toBe("BLOCK");
+    expect(
+      qc("external_network", restricted, { url: "https://evil.com/exfil" })!.riskScore,
+    ).toBe(80);
+  });
+  it("allows external_network when domain in egress allowlist", () => {
+    const restricted = mk({
+      allowedActions: ["external_network"],
+      egressAllowlist: ["pypi.org", "*.python.org"],
+    });
+    expect(qc("external_network", restricted, { url: "https://pypi.org/simple/" })).toBeNull();
+  });
+  it("allows wildcard egress domains", () => {
+    const restricted = mk({
+      allowedActions: ["external_network"],
+      egressAllowlist: ["*.github.com"],
+    });
+    expect(qc("external_network", restricted, { url: "https://raw.github.com/foo" })).toBeNull();
+    expect(
+      qc("external_network", restricted, { url: "https://evil.com" })!.verdict,
+    ).toBe("BLOCK");
+  });
+  it("no egress restriction when allowlist is empty", () => {
+    const unrestricted = mk({ allowedActions: ["external_network"] });
+    expect(qc("external_network", unrestricted, { url: "https://anything.com" })).toBeNull();
+  });
+  it("owner bypasses egress allowlist", () => {
+    const ownerRestricted = { ...owner, egressAllowlist: ["pypi.org"] };
+    expect(qc("external_network", ownerRestricted, { url: "https://evil.com" })).toBeNull();
+  });
+  // ── Timeout wrapping (OpenSandbox-inspired) ──
+  it("warns non-owner on long-running command without timeout", () => {
+    const user = mk({ allowedActions: ["execute_shell"] });
+    expect(
+      qc("execute_shell", user, { command: "python server.py" })!.verdict,
+    ).toBe("WARN");
+    expect(
+      qc("execute_shell", user, { command: "node app.js --listen 3000" })!.verdict,
+    ).toBe("WARN");
+  });
+  it("allows long-running command with explicit timeout", () => {
+    const user = mk({ allowedActions: ["execute_shell"] });
+    expect(
+      qc("execute_shell", user, { command: "timeout 120 python server.py" }),
+    ).toBeNull();
+  });
+  it("owner not warned on long-running commands", () => {
+    expect(qc("execute_shell", owner, { command: "python server.py" })).toBeNull();
+  });
+  // ── Rate limiting (OpenSandbox-inspired) ──
+  it("blocks when rate limit exceeded", () => {
+    const limiter = new RateLimiter();
+    const user = mk({ allowedActions: ["read_files"], rateLimits: { maxCallsPerWindow: 3, windowSeconds: 60 } });
+    // First 3 calls pass
+    expect(qc("read_files", user, {}, limiter)).toBeNull();
+    expect(qc("read_files", user, {}, limiter)).toBeNull();
+    expect(qc("read_files", user, {}, limiter)).toBeNull();
+    // 4th call blocked
+    const result = qc("read_files", user, {}, limiter);
+    expect(result).not.toBeNull();
+    expect(result!.verdict).toBe("BLOCK");
+    expect(result!.risks[0].type).toBe("resource");
+  });
+  it("owner is never rate-limited", () => {
+    const limiter = new RateLimiter();
+    for (let i = 0; i < 200; i++) {
+      expect(qc("read_files", owner, {}, limiter)).toBeNull();
+    }
+  });
 });
 // ── validateAction API ──────────────────────────────────────────────────────
@@ -722,6 +810,14 @@ const MUST_BLOCK: BC[] = [
     params: { command: "nohup python server.py &" },
     expect: "BLOCK",
   },
+  // Egress allowlist (OpenSandbox-inspired)
+  {
+    label: "Non-owner egress to blocked domain",
+    cat: "external_network",
+    req: mk({ allowedActions: ["external_network"], egressAllowlist: ["pypi.org"] }),
+    params: { url: "https://evil.com/steal" },
+    expect: "BLOCK",
+  },
   // Destructive deletion (#1)
   {
     label: "Delete MEMORY.md",
@@ -864,6 +960,22 @@ const MUST_ALLOW: BC[] = [
     params: { message: "base64 encoding is useful for binary data" },
     expect: "ALLOW",
   },
+  // Egress allowlist — allowed domain
+  {
+    label: "Non-owner egress to allowed domain",
+    cat: "external_network",
+    req: mk({ allowedActions: ["external_network"], egressAllowlist: ["pypi.org"] }),
+    params: { url: "https://pypi.org/simple/" },
+    expect: "ALLOW",
+  },
+  // Timeout — command with explicit timeout
+  {
+    label: "Non-owner server with timeout",
+    cat: "execute_shell",
+    req: mk({ allowedActions: ["execute_shell"] }),
+    params: { command: "timeout 60 python server.py" },
+    expect: "ALLOW",
+  },
 ];
 describe("Benchmark", () => {

package/src/validator.ts CHANGED Viewed

@@ -8,6 +8,7 @@
 import type { Stakeholder, ValidationResult, ActionCategory } from "./constants.js";
 import { HIGH_RISK_ACTIONS } from "./constants.js";
 import { buildValidationPrompt } from "./prompt.js";
+import type { RateLimiter } from "./rate-limiter.js";
 export type ValidateActionInput = {
   toolName: string;
@@ -86,8 +87,38 @@ export function quickCheck(input: {
   actionCategory: ActionCategory;
   requester: Stakeholder;
   params: Record<string, unknown>;
+  rateLimiter?: RateLimiter;
 }): ValidationResult | null {
-  const { actionCategory, requester, params } = input;
+  const { actionCategory, requester, params, rateLimiter } = input;
+  // Rate limit check — resource budget per requester (inspired by OpenSandbox quotas)
+  if (rateLimiter) {
+    const exceeded = rateLimiter.check(
+      requester.id,
+      requester.trust,
+      requester.rateLimits,
+    );
+    if (exceeded) {
+      return {
+        verdict: "BLOCK",
+        riskScore: 75,
+        risks: [
+          {
+            type: "resource",
+            severity: "high",
+            description: `Rate limit exceeded: ${exceeded.count}/${exceeded.limit} calls in ${exceeded.windowSeconds}s window`,
+          },
+        ],
+        reasoning: `Blocked: requester "${requester.name}" exceeded resource budget (${exceeded.limit} calls per ${exceeded.windowSeconds}s). This prevents resource exhaustion from rapid-fire tool calls.`,
+        recommendations: [
+          "Wait for the current window to expire before retrying",
+          "Request owner to increase rate limits for this stakeholder",
+        ],
+        requiresOwnerConfirmation: false,
+        caseStudyReference: "#4/#5 — Resource Exhaustion",
+      };
+    }
+  }
   // Unverified + no UID + high-risk action = immediate block
   if (!requester.verified && !requester.uid && HIGH_RISK_ACTIONS.includes(actionCategory)) {
@@ -145,6 +176,43 @@ export function quickCheck(input: {
     };
   }
+  // Egress allowlist — restrict external_network to specific domains per stakeholder
+  // Inspired by OpenSandbox's per-sandbox network egress policies (default-deny + allowlist)
+  if (actionCategory === "external_network" && requester.trust < 4 && requester.egressAllowlist && requester.egressAllowlist.length > 0) {
+    const url = String(params.url ?? params.target ?? params.command ?? "");
+    const urlMatch = url.match(/https?:\/\/([^/?#:]+)/i);
+    const domain = urlMatch?.[1]?.toLowerCase() ?? "";
+    if (domain) {
+      const allowed = requester.egressAllowlist.some((pattern) => {
+        const p = pattern.toLowerCase();
+        if (p.startsWith("*.")) {
+          return domain === p.slice(2) || domain.endsWith("." + p.slice(2));
+        }
+        return domain === p;
+      });
+      if (!allowed) {
+        return {
+          verdict: "BLOCK",
+          riskScore: 80,
+          risks: [
+            {
+              type: "authority",
+              severity: "high",
+              description: `Domain "${domain}" is not in requester's egress allowlist: [${requester.egressAllowlist.join(", ")}]`,
+            },
+          ],
+          reasoning: `Blocked: "${domain}" not in egress allowlist for "${requester.name}". Only [${requester.egressAllowlist.join(", ")}] are permitted.`,
+          recommendations: [
+            "Request owner to add this domain to the stakeholder's egress allowlist",
+            "Use an allowed domain instead",
+          ],
+          requiresOwnerConfirmation: false,
+          caseStudyReference: "OpenSandbox — Per-sandbox egress control",
+        };
+      }
+    }
+  }
   // Dangerous shell command patterns — applies to ALL users including owner.
   // Owner gets Telegram approval instead of hard block.
   if (actionCategory === "execute_shell") {
@@ -228,6 +296,35 @@ export function quickCheck(input: {
         caseStudyReference: "#4 — Waste of Resources (Looping)",
       };
     }
+    // Timeout wrapping — non-owner shell commands that look long-running without timeout
+    // Inspired by OpenSandbox's TTL/auto-expiration. Only fires for non-infinite patterns
+    // that still look like they'll run a long time (long sleeps, server processes).
+    if (requester.trust < 4) {
+      const hasTimeout = /\btimeout\s+\d|--timeout|ulimit\s+-t/i.test(command);
+      const isLongRunning = /\bsleep\s+[6-9]\d{2,}|sleep\s+\d{4,}|server\b|listen\b|serve\b/i.test(command);
+      const isPersistent = /\bcron\b|crontab|nohup|systemctl\s+enable|launchctl\s+load|daemon|&\s*$/i.test(command);
+      if (isLongRunning && !hasTimeout && !isPersistent) {
+        return {
+          verdict: "WARN",
+          riskScore: 55,
+          risks: [
+            {
+              type: "resource",
+              severity: "medium",
+              description: "Non-owner shell command is potentially long-running without explicit timeout",
+            },
+          ],
+          reasoning: `Warning: non-owner shell command appears long-running without timeout wrapper. Consider using "timeout N <command>" to enforce resource limits.`,
+          recommendations: [
+            "Wrap with: timeout 120 <command>",
+            "Add --timeout flag if the tool supports it",
+          ],
+          requiresOwnerConfirmation: false,
+          caseStudyReference: "OpenSandbox — TTL/auto-expiration",
+        };
+      }
+    }
   }
   // Detect mass broadcast / mass send patterns (Case #11 — Libelous Broadcast)