@cyberdyne-systems/agent-safety 2026.3.14 → 2026.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -23,6 +23,7 @@ import { AuditLog } from "./src/audit-log.js";
23
23
  import { toolNameToCategory } from "./src/constants.js";
24
24
  import type { Verdict } from "./src/constants.js";
25
25
  import { createSafetyTool } from "./src/safety-tool.js";
26
+ import { RateLimiter } from "./src/rate-limiter.js";
26
27
  import { StakeholderStore } from "./src/stakeholder-store.js";
27
28
  import { validateAction, quickCheck } from "./src/validator.js";
28
29
 
@@ -31,6 +32,7 @@ export default function register(api: OpenClawPluginApi) {
31
32
  const store = new StakeholderStore(join(stateDir, "stakeholders.json"));
32
33
  const auditLog = new AuditLog(500);
33
34
  const approvalMgr = new ApprovalManager();
35
+ const rateLimiter = new RateLimiter();
34
36
 
35
37
  // Read config
36
38
  const pluginConfig = (api.pluginConfig ?? {}) as {
@@ -92,6 +94,7 @@ export default function register(api: OpenClawPluginApi) {
92
94
  actionCategory,
93
95
  requester,
94
96
  params: params as Record<string, unknown>,
97
+ rateLimiter,
95
98
  });
96
99
 
97
100
  if (quickResult) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@cyberdyne-systems/agent-safety",
3
- "version": "2026.3.14",
3
+ "version": "2026.3.15",
4
4
  "description": "Agent safety system: stakeholder model, action validator, and safety dashboard — based on arXiv:2602.20021",
5
5
  "type": "module",
6
6
  "dependencies": {
package/src/constants.ts CHANGED
@@ -72,6 +72,14 @@ export type Stakeholder = {
72
72
  channel: string;
73
73
  uid: string | null;
74
74
  allowedActions: ActionCategory[];
75
+ /** Per-stakeholder egress allowlist — domains permitted for external_network.
76
+ * Empty array = no restrictions (all allowed). Non-empty = only listed domains. */
77
+ egressAllowlist?: string[];
78
+ /** Resource budget — max tool calls per window. Null = unlimited. */
79
+ rateLimits?: {
80
+ maxCallsPerWindow: number;
81
+ windowSeconds: number;
82
+ };
75
83
  };
76
84
 
77
85
  /** Single risk flag in a validation result */
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Sliding-window rate limiter for per-requester resource budget tracking.
3
+ * Inspired by OpenSandbox's resource limits (CPU/PID quotas).
4
+ */
5
+
6
+ export type RateLimitConfig = {
7
+ maxCallsPerWindow: number;
8
+ windowSeconds: number;
9
+ };
10
+
11
+ type RequesterWindow = {
12
+ timestamps: number[];
13
+ };
14
+
15
+ export class RateLimiter {
16
+ private windows = new Map<string, RequesterWindow>();
17
+ /** Default limits for requesters without explicit config */
18
+ private defaults: Record<number, RateLimitConfig> = {
19
+ 0: { maxCallsPerWindow: 5, windowSeconds: 60 },
20
+ 1: { maxCallsPerWindow: 20, windowSeconds: 60 },
21
+ 2: { maxCallsPerWindow: 50, windowSeconds: 60 },
22
+ 3: { maxCallsPerWindow: 100, windowSeconds: 60 },
23
+ // Trust 4 (owner) = unlimited
24
+ };
25
+
26
+ /** Record a call and check if rate limit is exceeded.
27
+ * Returns null if allowed, or { count, limit, windowSeconds } if blocked. */
28
+ check(
29
+ requesterId: string,
30
+ trust: number,
31
+ config?: RateLimitConfig,
32
+ ): { count: number; limit: number; windowSeconds: number } | null {
33
+ // Owner (trust 4) is never rate-limited
34
+ if (trust >= 4) return null;
35
+
36
+ const limits = config ?? this.defaults[trust] ?? this.defaults[0];
37
+ const now = Date.now();
38
+ const windowMs = limits.windowSeconds * 1000;
39
+
40
+ let window = this.windows.get(requesterId);
41
+ if (!window) {
42
+ window = { timestamps: [] };
43
+ this.windows.set(requesterId, window);
44
+ }
45
+
46
+ // Prune timestamps outside the window
47
+ window.timestamps = window.timestamps.filter((t) => now - t < windowMs);
48
+
49
+ if (window.timestamps.length >= limits.maxCallsPerWindow) {
50
+ return {
51
+ count: window.timestamps.length,
52
+ limit: limits.maxCallsPerWindow,
53
+ windowSeconds: limits.windowSeconds,
54
+ };
55
+ }
56
+
57
+ // Record this call
58
+ window.timestamps.push(now);
59
+ return null;
60
+ }
61
+
62
+ /** Get current usage for a requester */
63
+ usage(requesterId: string, trust: number, config?: RateLimitConfig): {
64
+ count: number;
65
+ limit: number;
66
+ windowSeconds: number;
67
+ } {
68
+ const limits = config ?? this.defaults[trust] ?? this.defaults[0];
69
+ const now = Date.now();
70
+ const windowMs = limits.windowSeconds * 1000;
71
+ const window = this.windows.get(requesterId);
72
+ const count = window
73
+ ? window.timestamps.filter((t) => now - t < windowMs).length
74
+ : 0;
75
+ return { count, limit: limits.maxCallsPerWindow, windowSeconds: limits.windowSeconds };
76
+ }
77
+
78
+ /** Reset a requester's window */
79
+ reset(requesterId: string): void {
80
+ this.windows.delete(requesterId);
81
+ }
82
+
83
+ /** Reset all windows */
84
+ resetAll(): void {
85
+ this.windows.clear();
86
+ }
87
+ }
package/src/unit.test.ts CHANGED
@@ -7,6 +7,7 @@ import { join } from "node:path";
7
7
  import { describe, it, expect, beforeEach, afterEach } from "vitest";
8
8
  import { ApprovalManager, parseApprovalReply } from "./approval.js";
9
9
  import { AuditLog } from "./audit-log.js";
10
+ import { RateLimiter } from "./rate-limiter.js";
10
11
  import { toolNameToCategory, HIGH_RISK_ACTIONS, ACTION_CATEGORIES } from "./constants.js";
11
12
  import type { Stakeholder } from "./constants.js";
12
13
  import { buildValidationPrompt } from "./prompt.js";
@@ -495,3 +496,79 @@ describe("parseApprovalReply", () => {
495
496
  expect(parseApprovalReply("reject safety-1")).toBeNull();
496
497
  });
497
498
  });
499
+
500
+ // ── RateLimiter ─────────────────────────────────────────────────────────────
501
+
502
+ describe("RateLimiter", () => {
503
+ it("allows calls within limit", () => {
504
+ const limiter = new RateLimiter();
505
+ expect(limiter.check("user1", 2, { maxCallsPerWindow: 5, windowSeconds: 60 })).toBeNull();
506
+ expect(limiter.check("user1", 2, { maxCallsPerWindow: 5, windowSeconds: 60 })).toBeNull();
507
+ });
508
+
509
+ it("blocks when limit exceeded", () => {
510
+ const limiter = new RateLimiter();
511
+ const cfg = { maxCallsPerWindow: 2, windowSeconds: 60 };
512
+ limiter.check("user1", 1, cfg);
513
+ limiter.check("user1", 1, cfg);
514
+ const result = limiter.check("user1", 1, cfg);
515
+ expect(result).not.toBeNull();
516
+ expect(result!.count).toBe(2);
517
+ expect(result!.limit).toBe(2);
518
+ });
519
+
520
+ it("never limits owner (trust 4)", () => {
521
+ const limiter = new RateLimiter();
522
+ const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
523
+ expect(limiter.check("owner", 4, cfg)).toBeNull();
524
+ expect(limiter.check("owner", 4, cfg)).toBeNull();
525
+ expect(limiter.check("owner", 4, cfg)).toBeNull();
526
+ });
527
+
528
+ it("uses default limits per trust level", () => {
529
+ const limiter = new RateLimiter();
530
+ // Trust 0 default: 5 calls per 60s
531
+ for (let i = 0; i < 5; i++) {
532
+ expect(limiter.check("untrusted", 0)).toBeNull();
533
+ }
534
+ expect(limiter.check("untrusted", 0)).not.toBeNull();
535
+ });
536
+
537
+ it("tracks requesters independently", () => {
538
+ const limiter = new RateLimiter();
539
+ const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
540
+ expect(limiter.check("user1", 1, cfg)).toBeNull();
541
+ expect(limiter.check("user2", 1, cfg)).toBeNull();
542
+ expect(limiter.check("user1", 1, cfg)).not.toBeNull();
543
+ expect(limiter.check("user2", 1, cfg)).not.toBeNull();
544
+ });
545
+
546
+ it("returns usage stats", () => {
547
+ const limiter = new RateLimiter();
548
+ const cfg = { maxCallsPerWindow: 10, windowSeconds: 60 };
549
+ limiter.check("user1", 2, cfg);
550
+ limiter.check("user1", 2, cfg);
551
+ const usage = limiter.usage("user1", 2, cfg);
552
+ expect(usage.count).toBe(2);
553
+ expect(usage.limit).toBe(10);
554
+ });
555
+
556
+ it("reset clears a requester", () => {
557
+ const limiter = new RateLimiter();
558
+ const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
559
+ limiter.check("user1", 1, cfg);
560
+ expect(limiter.check("user1", 1, cfg)).not.toBeNull();
561
+ limiter.reset("user1");
562
+ expect(limiter.check("user1", 1, cfg)).toBeNull();
563
+ });
564
+
565
+ it("resetAll clears everything", () => {
566
+ const limiter = new RateLimiter();
567
+ const cfg = { maxCallsPerWindow: 1, windowSeconds: 60 };
568
+ limiter.check("user1", 1, cfg);
569
+ limiter.check("user2", 1, cfg);
570
+ limiter.resetAll();
571
+ expect(limiter.check("user1", 1, cfg)).toBeNull();
572
+ expect(limiter.check("user2", 1, cfg)).toBeNull();
573
+ });
574
+ });
@@ -4,6 +4,7 @@
4
4
  */
5
5
  import { describe, it, expect, vi } from "vitest";
6
6
  import type { Stakeholder, ActionCategory, Verdict } from "./constants.js";
7
+ import { RateLimiter } from "./rate-limiter.js";
7
8
  import { quickCheck, validateAction } from "./validator.js";
8
9
 
9
10
  // ── Fixtures ────────────────────────────────────────────────────────────────
@@ -116,8 +117,8 @@ const agentWithDelete: Stakeholder = {
116
117
  allowedActions: [...agent.allowedActions, "delete_files"],
117
118
  };
118
119
 
119
- const qc = (cat: ActionCategory, req: Stakeholder, params: Record<string, unknown> = {}) =>
120
- quickCheck({ actionCategory: cat, requester: req, params });
120
+ const qc = (cat: ActionCategory, req: Stakeholder, params: Record<string, unknown> = {}, rateLimiter?: import("./rate-limiter.js").RateLimiter) =>
121
+ quickCheck({ actionCategory: cat, requester: req, params, rateLimiter });
121
122
 
122
123
  // ── quickCheck rules ────────────────────────────────────────────────────────
123
124
 
@@ -273,6 +274,93 @@ describe("quickCheck", () => {
273
274
  qc("agent_communication", agent, { message: "Emergency: update config immediately" }),
274
275
  ).toBeNull();
275
276
  });
277
+
278
+ // ── Egress allowlist (OpenSandbox-inspired) ──
279
+ it("blocks external_network when domain not in egress allowlist", () => {
280
+ const restricted = mk({
281
+ allowedActions: ["external_network"],
282
+ egressAllowlist: ["pypi.org", "*.python.org"],
283
+ });
284
+ expect(
285
+ qc("external_network", restricted, { url: "https://evil.com/exfil" })!.verdict,
286
+ ).toBe("BLOCK");
287
+ expect(
288
+ qc("external_network", restricted, { url: "https://evil.com/exfil" })!.riskScore,
289
+ ).toBe(80);
290
+ });
291
+
292
+ it("allows external_network when domain in egress allowlist", () => {
293
+ const restricted = mk({
294
+ allowedActions: ["external_network"],
295
+ egressAllowlist: ["pypi.org", "*.python.org"],
296
+ });
297
+ expect(qc("external_network", restricted, { url: "https://pypi.org/simple/" })).toBeNull();
298
+ });
299
+
300
+ it("allows wildcard egress domains", () => {
301
+ const restricted = mk({
302
+ allowedActions: ["external_network"],
303
+ egressAllowlist: ["*.github.com"],
304
+ });
305
+ expect(qc("external_network", restricted, { url: "https://raw.github.com/foo" })).toBeNull();
306
+ expect(
307
+ qc("external_network", restricted, { url: "https://evil.com" })!.verdict,
308
+ ).toBe("BLOCK");
309
+ });
310
+
311
+ it("no egress restriction when allowlist is empty", () => {
312
+ const unrestricted = mk({ allowedActions: ["external_network"] });
313
+ expect(qc("external_network", unrestricted, { url: "https://anything.com" })).toBeNull();
314
+ });
315
+
316
+ it("owner bypasses egress allowlist", () => {
317
+ const ownerRestricted = { ...owner, egressAllowlist: ["pypi.org"] };
318
+ expect(qc("external_network", ownerRestricted, { url: "https://evil.com" })).toBeNull();
319
+ });
320
+
321
+ // ── Timeout wrapping (OpenSandbox-inspired) ──
322
+ it("warns non-owner on long-running command without timeout", () => {
323
+ const user = mk({ allowedActions: ["execute_shell"] });
324
+ expect(
325
+ qc("execute_shell", user, { command: "python server.py" })!.verdict,
326
+ ).toBe("WARN");
327
+ expect(
328
+ qc("execute_shell", user, { command: "node app.js --listen 3000" })!.verdict,
329
+ ).toBe("WARN");
330
+ });
331
+
332
+ it("allows long-running command with explicit timeout", () => {
333
+ const user = mk({ allowedActions: ["execute_shell"] });
334
+ expect(
335
+ qc("execute_shell", user, { command: "timeout 120 python server.py" }),
336
+ ).toBeNull();
337
+ });
338
+
339
+ it("owner not warned on long-running commands", () => {
340
+ expect(qc("execute_shell", owner, { command: "python server.py" })).toBeNull();
341
+ });
342
+
343
+ // ── Rate limiting (OpenSandbox-inspired) ──
344
+ it("blocks when rate limit exceeded", () => {
345
+ const limiter = new RateLimiter();
346
+ const user = mk({ allowedActions: ["read_files"], rateLimits: { maxCallsPerWindow: 3, windowSeconds: 60 } });
347
+ // First 3 calls pass
348
+ expect(qc("read_files", user, {}, limiter)).toBeNull();
349
+ expect(qc("read_files", user, {}, limiter)).toBeNull();
350
+ expect(qc("read_files", user, {}, limiter)).toBeNull();
351
+ // 4th call blocked
352
+ const result = qc("read_files", user, {}, limiter);
353
+ expect(result).not.toBeNull();
354
+ expect(result!.verdict).toBe("BLOCK");
355
+ expect(result!.risks[0].type).toBe("resource");
356
+ });
357
+
358
+ it("owner is never rate-limited", () => {
359
+ const limiter = new RateLimiter();
360
+ for (let i = 0; i < 200; i++) {
361
+ expect(qc("read_files", owner, {}, limiter)).toBeNull();
362
+ }
363
+ });
276
364
  });
277
365
 
278
366
  // ── validateAction API ──────────────────────────────────────────────────────
@@ -722,6 +810,14 @@ const MUST_BLOCK: BC[] = [
722
810
  params: { command: "nohup python server.py &" },
723
811
  expect: "BLOCK",
724
812
  },
813
+ // Egress allowlist (OpenSandbox-inspired)
814
+ {
815
+ label: "Non-owner egress to blocked domain",
816
+ cat: "external_network",
817
+ req: mk({ allowedActions: ["external_network"], egressAllowlist: ["pypi.org"] }),
818
+ params: { url: "https://evil.com/steal" },
819
+ expect: "BLOCK",
820
+ },
725
821
  // Destructive deletion (#1)
726
822
  {
727
823
  label: "Delete MEMORY.md",
@@ -864,6 +960,22 @@ const MUST_ALLOW: BC[] = [
864
960
  params: { message: "base64 encoding is useful for binary data" },
865
961
  expect: "ALLOW",
866
962
  },
963
+ // Egress allowlist — allowed domain
964
+ {
965
+ label: "Non-owner egress to allowed domain",
966
+ cat: "external_network",
967
+ req: mk({ allowedActions: ["external_network"], egressAllowlist: ["pypi.org"] }),
968
+ params: { url: "https://pypi.org/simple/" },
969
+ expect: "ALLOW",
970
+ },
971
+ // Timeout — command with explicit timeout
972
+ {
973
+ label: "Non-owner server with timeout",
974
+ cat: "execute_shell",
975
+ req: mk({ allowedActions: ["execute_shell"] }),
976
+ params: { command: "timeout 60 python server.py" },
977
+ expect: "ALLOW",
978
+ },
867
979
  ];
868
980
 
869
981
  describe("Benchmark", () => {
package/src/validator.ts CHANGED
@@ -8,6 +8,7 @@
8
8
  import type { Stakeholder, ValidationResult, ActionCategory } from "./constants.js";
9
9
  import { HIGH_RISK_ACTIONS } from "./constants.js";
10
10
  import { buildValidationPrompt } from "./prompt.js";
11
+ import type { RateLimiter } from "./rate-limiter.js";
11
12
 
12
13
  export type ValidateActionInput = {
13
14
  toolName: string;
@@ -86,8 +87,38 @@ export function quickCheck(input: {
86
87
  actionCategory: ActionCategory;
87
88
  requester: Stakeholder;
88
89
  params: Record<string, unknown>;
90
+ rateLimiter?: RateLimiter;
89
91
  }): ValidationResult | null {
90
- const { actionCategory, requester, params } = input;
92
+ const { actionCategory, requester, params, rateLimiter } = input;
93
+
94
+ // Rate limit check — resource budget per requester (inspired by OpenSandbox quotas)
95
+ if (rateLimiter) {
96
+ const exceeded = rateLimiter.check(
97
+ requester.id,
98
+ requester.trust,
99
+ requester.rateLimits,
100
+ );
101
+ if (exceeded) {
102
+ return {
103
+ verdict: "BLOCK",
104
+ riskScore: 75,
105
+ risks: [
106
+ {
107
+ type: "resource",
108
+ severity: "high",
109
+ description: `Rate limit exceeded: ${exceeded.count}/${exceeded.limit} calls in ${exceeded.windowSeconds}s window`,
110
+ },
111
+ ],
112
+ reasoning: `Blocked: requester "${requester.name}" exceeded resource budget (${exceeded.limit} calls per ${exceeded.windowSeconds}s). This prevents resource exhaustion from rapid-fire tool calls.`,
113
+ recommendations: [
114
+ "Wait for the current window to expire before retrying",
115
+ "Request owner to increase rate limits for this stakeholder",
116
+ ],
117
+ requiresOwnerConfirmation: false,
118
+ caseStudyReference: "#4/#5 — Resource Exhaustion",
119
+ };
120
+ }
121
+ }
91
122
 
92
123
  // Unverified + no UID + high-risk action = immediate block
93
124
  if (!requester.verified && !requester.uid && HIGH_RISK_ACTIONS.includes(actionCategory)) {
@@ -145,6 +176,43 @@ export function quickCheck(input: {
145
176
  };
146
177
  }
147
178
 
179
+ // Egress allowlist — restrict external_network to specific domains per stakeholder
180
+ // Inspired by OpenSandbox's per-sandbox network egress policies (default-deny + allowlist)
181
+ if (actionCategory === "external_network" && requester.trust < 4 && requester.egressAllowlist && requester.egressAllowlist.length > 0) {
182
+ const url = String(params.url ?? params.target ?? params.command ?? "");
183
+ const urlMatch = url.match(/https?:\/\/([^/?#:]+)/i);
184
+ const domain = urlMatch?.[1]?.toLowerCase() ?? "";
185
+ if (domain) {
186
+ const allowed = requester.egressAllowlist.some((pattern) => {
187
+ const p = pattern.toLowerCase();
188
+ if (p.startsWith("*.")) {
189
+ return domain === p.slice(2) || domain.endsWith("." + p.slice(2));
190
+ }
191
+ return domain === p;
192
+ });
193
+ if (!allowed) {
194
+ return {
195
+ verdict: "BLOCK",
196
+ riskScore: 80,
197
+ risks: [
198
+ {
199
+ type: "authority",
200
+ severity: "high",
201
+ description: `Domain "${domain}" is not in requester's egress allowlist: [${requester.egressAllowlist.join(", ")}]`,
202
+ },
203
+ ],
204
+ reasoning: `Blocked: "${domain}" not in egress allowlist for "${requester.name}". Only [${requester.egressAllowlist.join(", ")}] are permitted.`,
205
+ recommendations: [
206
+ "Request owner to add this domain to the stakeholder's egress allowlist",
207
+ "Use an allowed domain instead",
208
+ ],
209
+ requiresOwnerConfirmation: false,
210
+ caseStudyReference: "OpenSandbox — Per-sandbox egress control",
211
+ };
212
+ }
213
+ }
214
+ }
215
+
148
216
  // Dangerous shell command patterns — applies to ALL users including owner.
149
217
  // Owner gets Telegram approval instead of hard block.
150
218
  if (actionCategory === "execute_shell") {
@@ -228,6 +296,35 @@ export function quickCheck(input: {
228
296
  caseStudyReference: "#4 — Waste of Resources (Looping)",
229
297
  };
230
298
  }
299
+
300
+ // Timeout wrapping — non-owner shell commands that look long-running without timeout
301
+ // Inspired by OpenSandbox's TTL/auto-expiration. Only fires for non-infinite patterns
302
+ // that still look like they'll run a long time (long sleeps, server processes).
303
+ if (requester.trust < 4) {
304
+ const hasTimeout = /\btimeout\s+\d|--timeout|ulimit\s+-t/i.test(command);
305
+ const isLongRunning = /\bsleep\s+[6-9]\d{2,}|sleep\s+\d{4,}|server\b|listen\b|serve\b/i.test(command);
306
+ const isPersistent = /\bcron\b|crontab|nohup|systemctl\s+enable|launchctl\s+load|daemon|&\s*$/i.test(command);
307
+ if (isLongRunning && !hasTimeout && !isPersistent) {
308
+ return {
309
+ verdict: "WARN",
310
+ riskScore: 55,
311
+ risks: [
312
+ {
313
+ type: "resource",
314
+ severity: "medium",
315
+ description: "Non-owner shell command is potentially long-running without explicit timeout",
316
+ },
317
+ ],
318
+ reasoning: `Warning: non-owner shell command appears long-running without timeout wrapper. Consider using "timeout N <command>" to enforce resource limits.`,
319
+ recommendations: [
320
+ "Wrap with: timeout 120 <command>",
321
+ "Add --timeout flag if the tool supports it",
322
+ ],
323
+ requiresOwnerConfirmation: false,
324
+ caseStudyReference: "OpenSandbox — TTL/auto-expiration",
325
+ };
326
+ }
327
+ }
231
328
  }
232
329
 
233
330
  // Detect mass broadcast / mass send patterns (Case #11 — Libelous Broadcast)