@openape/ape-agent 2.6.3 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bridge.mjs +94 -6
  2. package/package.json +6 -5
package/dist/bridge.mjs CHANGED
@@ -1333,6 +1333,66 @@ async function ensureFreshIdpAuth(now = Math.floor(Date.now() / 1e3)) {
1333
1333
  return next;
1334
1334
  }
1335
1335
 
1336
+ // ../../packages/prompt-injection-detector/dist/index.js
1337
+ var DEFAULT_THRESHOLD = 0.7;
1338
+ var DEFAULT_OWNER_THRESHOLD = 0.95;
1339
+ async function decide(detector, input, opts = {}) {
1340
+ const threshold = input.sender.isOwner ? opts.ownerThreshold ?? DEFAULT_OWNER_THRESHOLD : opts.threshold ?? DEFAULT_THRESHOLD;
1341
+ const result = await detector.classify(input);
1342
+ return {
1343
+ ...result,
1344
+ threshold,
1345
+ blocked: result.score >= threshold
1346
+ };
1347
+ }
1348
+ var PATTERNS = [
1349
+ // Instruction-override family. The defining phrase of prompt
1350
+ // injection — telling the model to discard its instructions in
1351
+ // favour of new ones.
1352
+ { re: /\bignore (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding) (?:instructions?|rules?|context|prompts?|messages?)\b/i, weight: 0.6, reason: "instruction-override" },
1353
+ { re: /\bdisregard (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding)?\s*(?:instructions?|rules?|context)\b/i, weight: 0.6, reason: "instruction-override" },
1354
+ { re: /\b(?:you are|act as|pretend to be|roleplay as) (?:now |a |an )?(?:different|new|unrestricted|jailbroken|dan|do anything now)\b/i, weight: 0.55, reason: "role-override" },
1355
+ { re: /\b(?:forget|drop|reset) (?:everything|all|your) (?:above|prior|previous|instructions?|rules?|context)\b/i, weight: 0.55, reason: "context-reset" },
1356
+ // Filesystem-exfiltration. Specific paths that have no business
1357
+ // appearing in normal chat — auth tokens, SSH keys, agent config.
1358
+ // `\b` would fail on `/etc/passwd` (slash is non-word, no boundary
1359
+ // with preceding space) — match the literal forms instead.
1360
+ { re: /(?:~\/\.config\/apes|~\/\.openape|~\/\.ssh|\/etc\/passwd|\/etc\/shadow|\bid_rsa\b|\bid_ed25519\b|\bauth\.json\b|\.env(?:\.[\w-]+)?\b)/i, weight: 0.45, reason: "sensitive-path" },
1361
+ // Tool-call coercion. Phrases that try to talk the agent into
1362
+ // executing tools or running shell commands as part of the reply.
1363
+ { re: /\b(?:run|execute|invoke|call)\s+(?:the\s+)?(?:shell|bash|sh|cmd|powershell|tool|command|script)\b/i, weight: 0.35, reason: "tool-coercion" },
1364
+ { re: /\b(?:and\s+)?(?:post|send|share|paste|return|reply with|output)\s+(?:the\s+)?(?:contents?|output|result|file|secret|token|api[-_ ]?key)\b/i, weight: 0.3, reason: "exfil-request" },
1365
+ // Override + override-and-do (combined "do X without telling Y" forms).
1366
+ { re: /\bwithout (?:telling|asking|informing|notifying|consulting|the consent of)\b/i, weight: 0.4, reason: "covert-action" },
1367
+ // System-prompt extraction.
1368
+ { re: /\b(?:show|print|reveal|repeat|tell me|what is|what's) (?:your |the )?(?:system prompt|initial prompt|instructions|rules|directives|guidelines)\b/i, weight: 0.5, reason: "prompt-extraction" },
1369
+ // Encoding-based bypass attempts.
1370
+ { re: /\b(?:base64|rot13|decode|decrypt) (?:this|the following|below)\b/i, weight: 0.3, reason: "encoding-bypass" }
1371
+ ];
1372
+ function classifyHeuristic(input) {
1373
+ const text = input.text;
1374
+ let total = 0;
1375
+ const reasons = [];
1376
+ for (const p of PATTERNS) {
1377
+ if (p.re.test(text)) {
1378
+ total += p.weight;
1379
+ if (!reasons.includes(p.reason)) reasons.push(p.reason);
1380
+ if (total >= 1) break;
1381
+ }
1382
+ }
1383
+ const score = Math.min(1, total);
1384
+ return {
1385
+ score,
1386
+ backend: "heuristic",
1387
+ ...reasons.length > 0 ? { reason: reasons.join(", ") } : {}
1388
+ };
1389
+ }
1390
+ function createHeuristicDetector() {
1391
+ return {
1392
+ classify: async (input) => classifyHeuristic(input)
1393
+ };
1394
+ }
1395
+
1336
1396
  // src/bridge.ts
1337
1397
  import { decodeJwt } from "jose";
1338
1398
  import WebSocket from "ws";
@@ -3112,10 +3172,8 @@ var consola = createConsola2();
3112
3172
  // ../../packages/apes/dist/chunk-DYSFQ26B.js
3113
3173
  var import_shell_quote = __toESM(require_shell_quote(), 1);
3114
3174
 
3115
- // ../../node_modules/.pnpm/consola@3.4.2/node_modules/consola/dist/utils.mjs
3116
- import "tty";
3117
-
3118
- // ../../node_modules/.pnpm/citty@0.1.6/node_modules/citty/dist/index.mjs
3175
+ // ../../node_modules/.pnpm/citty@0.2.2/node_modules/citty/dist/index.mjs
3176
+ import { parseArgs as parseArgs$1 } from "util";
3119
3177
  function defineCommand(def) {
3120
3178
  return def;
3121
3179
  }
@@ -4334,6 +4392,12 @@ function sleep(ms) {
4334
4392
  function truncate(s2, n2) {
4335
4393
  return s2.length <= n2 ? s2 : `${s2.slice(0, n2 - 1)}\u2026`;
4336
4394
  }
4395
+ function refusalText(reason) {
4396
+ const base = "I won't process this message \u2014 it looks like a prompt-injection attempt.";
4397
+ return reason ? `${base}
4398
+
4399
+ (matched: ${reason})` : base;
4400
+ }
4337
4401
  var Bridge = class {
4338
4402
  constructor(cfg, selfEmail, ownerEmail) {
4339
4403
  this.cfg = cfg;
@@ -4364,6 +4428,10 @@ var Bridge = class {
4364
4428
  chat;
4365
4429
  bearer;
4366
4430
  cron;
4431
+ // Prompt-injection gate (#277). Pure heuristic by default — pluggable
4432
+ // backend later. The bridge is the choke-point for every chat message
4433
+ // before it reaches the agent runtime, so this is the right place.
4434
+ injectionDetector = createHeuristicDetector();
4367
4435
  /**
4368
4436
  * RuntimeConfig is shared across thread sessions and the cron runner.
4369
4437
  * The bridge resolves it from its own env at boot and reuses for the
@@ -4414,7 +4482,7 @@ var Bridge = class {
4414
4482
  if (accepted.length > 0) log(`accepted: ${accepted.join(", ")}`);
4415
4483
  if (skipped.length > 0) log(`skipped (not on allowlist): ${skipped.join(", ")}`);
4416
4484
  }
4417
- handleInbound(msg) {
4485
+ async handleInbound(msg) {
4418
4486
  if (msg.senderEmail === this.selfEmail) return;
4419
4487
  if (!msg.body.trim()) return;
4420
4488
  if (this.cfg.roomFilter && msg.roomId !== this.cfg.roomFilter) return;
@@ -4423,6 +4491,26 @@ var Bridge = class {
4423
4491
  return;
4424
4492
  }
4425
4493
  log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] in: ${truncate(msg.body, 80)}`);
4494
+ const decision = await decide(this.injectionDetector, {
4495
+ text: msg.body,
4496
+ sender: {
4497
+ email: msg.senderEmail,
4498
+ isOwner: msg.senderEmail === this.ownerEmail
4499
+ }
4500
+ });
4501
+ if (decision.blocked) {
4502
+ log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] BLOCKED prompt-injection (score=${decision.score.toFixed(2)}, reason=${decision.reason ?? "n/a"})`);
4503
+ try {
4504
+ await this.chat.postMessage(msg.roomId, refusalText(decision.reason), {
4505
+ replyTo: msg.id,
4506
+ threadId: msg.threadId
4507
+ });
4508
+ } catch (err) {
4509
+ const m2 = err instanceof Error ? err.message : String(err);
4510
+ log(`[${msg.roomId}] failed to post refusal: ${m2}`);
4511
+ }
4512
+ return;
4513
+ }
4426
4514
  const session = this.getOrCreateThread(msg.roomId, msg.threadId);
4427
4515
  session.enqueue(msg.body, msg.id);
4428
4516
  }
@@ -4489,7 +4577,7 @@ var Bridge = class {
4489
4577
  return;
4490
4578
  }
4491
4579
  if (frame.type !== "message") return;
4492
- this.handleInbound(frame.payload);
4580
+ void this.handleInbound(frame.payload);
4493
4581
  });
4494
4582
  ws.on("close", () => {
4495
4583
  if (pingTimer) clearInterval(pingTimer);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openape/ape-agent",
3
- "version": "2.6.3",
3
+ "version": "2.7.0",
4
4
  "description": "OpenApe agent runtime: per-agent process that connects to chat.openape.ai, runs the LLM loop with tools + cron tasks, and streams replies back to owners.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -23,17 +23,18 @@
23
23
  "ofetch": "^1.4.1",
24
24
  "ws": "^8.18.0",
25
25
  "yaml": "^2.8.0",
26
- "@openape/apes": "1.25.0",
27
- "@openape/cli-auth": "0.4.0"
26
+ "@openape/apes": "1.25.1",
27
+ "@openape/prompt-injection-detector": "0.1.0",
28
+ "@openape/cli-auth": "0.4.1"
28
29
  },
29
30
  "devDependencies": {
30
31
  "@antfu/eslint-config": "^7.6.1",
31
32
  "@types/node": "^22.19.13",
32
33
  "@types/ws": "^8.5.13",
33
- "eslint": "^9.35.0",
34
+ "eslint": "^10.4.0",
34
35
  "tsup": "^8.5.1",
35
36
  "typescript": "^5.9.3",
36
- "vitest": "^3.2.4"
37
+ "vitest": "^4.1.7"
37
38
  },
38
39
  "engines": {
39
40
  "node": ">=22"