@openape/ape-agent 2.6.3 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bridge.mjs +94 -6
- package/package.json +6 -5
package/dist/bridge.mjs
CHANGED
|
@@ -1333,6 +1333,66 @@ async function ensureFreshIdpAuth(now = Math.floor(Date.now() / 1e3)) {
|
|
|
1333
1333
|
return next;
|
|
1334
1334
|
}
|
|
1335
1335
|
|
|
1336
|
+
// ../../packages/prompt-injection-detector/dist/index.js
|
|
1337
|
+
var DEFAULT_THRESHOLD = 0.7;
|
|
1338
|
+
var DEFAULT_OWNER_THRESHOLD = 0.95;
|
|
1339
|
+
async function decide(detector, input, opts = {}) {
|
|
1340
|
+
const threshold = input.sender.isOwner ? opts.ownerThreshold ?? DEFAULT_OWNER_THRESHOLD : opts.threshold ?? DEFAULT_THRESHOLD;
|
|
1341
|
+
const result = await detector.classify(input);
|
|
1342
|
+
return {
|
|
1343
|
+
...result,
|
|
1344
|
+
threshold,
|
|
1345
|
+
blocked: result.score >= threshold
|
|
1346
|
+
};
|
|
1347
|
+
}
|
|
1348
|
+
var PATTERNS = [
|
|
1349
|
+
// Instruction-override family. The defining phrase of prompt
|
|
1350
|
+
// injection — telling the model to discard its instructions in
|
|
1351
|
+
// favour of new ones.
|
|
1352
|
+
{ re: /\bignore (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding) (?:instructions?|rules?|context|prompts?|messages?)\b/i, weight: 0.6, reason: "instruction-override" },
|
|
1353
|
+
{ re: /\bdisregard (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding)?\s*(?:instructions?|rules?|context)\b/i, weight: 0.6, reason: "instruction-override" },
|
|
1354
|
+
{ re: /\b(?:you are|act as|pretend to be|roleplay as) (?:now |a |an )?(?:different|new|unrestricted|jailbroken|dan|do anything now)\b/i, weight: 0.55, reason: "role-override" },
|
|
1355
|
+
{ re: /\b(?:forget|drop|reset) (?:everything|all|your) (?:above|prior|previous|instructions?|rules?|context)\b/i, weight: 0.55, reason: "context-reset" },
|
|
1356
|
+
// Filesystem-exfiltration. Specific paths that have no business
|
|
1357
|
+
// appearing in normal chat — auth tokens, SSH keys, agent config.
|
|
1358
|
+
// `\b` would fail on `/etc/passwd` (slash is non-word, no boundary
|
|
1359
|
+
// with preceding space) — match the literal forms instead.
|
|
1360
|
+
{ re: /(?:~\/\.config\/apes|~\/\.openape|~\/\.ssh|\/etc\/passwd|\/etc\/shadow|\bid_rsa\b|\bid_ed25519\b|\bauth\.json\b|\.env(?:\.[\w-]+)?\b)/i, weight: 0.45, reason: "sensitive-path" },
|
|
1361
|
+
// Tool-call coercion. Phrases that try to talk the agent into
|
|
1362
|
+
// executing tools or running shell commands as part of the reply.
|
|
1363
|
+
{ re: /\b(?:run|execute|invoke|call)\s+(?:the\s+)?(?:shell|bash|sh|cmd|powershell|tool|command|script)\b/i, weight: 0.35, reason: "tool-coercion" },
|
|
1364
|
+
{ re: /\b(?:and\s+)?(?:post|send|share|paste|return|reply with|output)\s+(?:the\s+)?(?:contents?|output|result|file|secret|token|api[-_ ]?key)\b/i, weight: 0.3, reason: "exfil-request" },
|
|
1365
|
+
// Override + override-and-do (combined "do X without telling Y" forms).
|
|
1366
|
+
{ re: /\bwithout (?:telling|asking|informing|notifying|consulting|the consent of)\b/i, weight: 0.4, reason: "covert-action" },
|
|
1367
|
+
// System-prompt extraction.
|
|
1368
|
+
{ re: /\b(?:show|print|reveal|repeat|tell me|what is|what's) (?:your |the )?(?:system prompt|initial prompt|instructions|rules|directives|guidelines)\b/i, weight: 0.5, reason: "prompt-extraction" },
|
|
1369
|
+
// Encoding-based bypass attempts.
|
|
1370
|
+
{ re: /\b(?:base64|rot13|decode|decrypt) (?:this|the following|below)\b/i, weight: 0.3, reason: "encoding-bypass" }
|
|
1371
|
+
];
|
|
1372
|
+
function classifyHeuristic(input) {
|
|
1373
|
+
const text = input.text;
|
|
1374
|
+
let total = 0;
|
|
1375
|
+
const reasons = [];
|
|
1376
|
+
for (const p of PATTERNS) {
|
|
1377
|
+
if (p.re.test(text)) {
|
|
1378
|
+
total += p.weight;
|
|
1379
|
+
if (!reasons.includes(p.reason)) reasons.push(p.reason);
|
|
1380
|
+
if (total >= 1) break;
|
|
1381
|
+
}
|
|
1382
|
+
}
|
|
1383
|
+
const score = Math.min(1, total);
|
|
1384
|
+
return {
|
|
1385
|
+
score,
|
|
1386
|
+
backend: "heuristic",
|
|
1387
|
+
...reasons.length > 0 ? { reason: reasons.join(", ") } : {}
|
|
1388
|
+
};
|
|
1389
|
+
}
|
|
1390
|
+
function createHeuristicDetector() {
|
|
1391
|
+
return {
|
|
1392
|
+
classify: async (input) => classifyHeuristic(input)
|
|
1393
|
+
};
|
|
1394
|
+
}
|
|
1395
|
+
|
|
1336
1396
|
// src/bridge.ts
|
|
1337
1397
|
import { decodeJwt } from "jose";
|
|
1338
1398
|
import WebSocket from "ws";
|
|
@@ -3112,10 +3172,8 @@ var consola = createConsola2();
|
|
|
3112
3172
|
// ../../packages/apes/dist/chunk-DYSFQ26B.js
|
|
3113
3173
|
var import_shell_quote = __toESM(require_shell_quote(), 1);
|
|
3114
3174
|
|
|
3115
|
-
// ../../node_modules/.pnpm/
|
|
3116
|
-
import "
|
|
3117
|
-
|
|
3118
|
-
// ../../node_modules/.pnpm/citty@0.1.6/node_modules/citty/dist/index.mjs
|
|
3175
|
+
// ../../node_modules/.pnpm/citty@0.2.2/node_modules/citty/dist/index.mjs
|
|
3176
|
+
import { parseArgs as parseArgs$1 } from "util";
|
|
3119
3177
|
function defineCommand(def) {
|
|
3120
3178
|
return def;
|
|
3121
3179
|
}
|
|
@@ -4334,6 +4392,12 @@ function sleep(ms) {
|
|
|
4334
4392
|
function truncate(s2, n2) {
|
|
4335
4393
|
return s2.length <= n2 ? s2 : `${s2.slice(0, n2 - 1)}\u2026`;
|
|
4336
4394
|
}
|
|
4395
|
+
function refusalText(reason) {
|
|
4396
|
+
const base = "I won't process this message \u2014 it looks like a prompt-injection attempt.";
|
|
4397
|
+
return reason ? `${base}
|
|
4398
|
+
|
|
4399
|
+
(matched: ${reason})` : base;
|
|
4400
|
+
}
|
|
4337
4401
|
var Bridge = class {
|
|
4338
4402
|
constructor(cfg, selfEmail, ownerEmail) {
|
|
4339
4403
|
this.cfg = cfg;
|
|
@@ -4364,6 +4428,10 @@ var Bridge = class {
|
|
|
4364
4428
|
chat;
|
|
4365
4429
|
bearer;
|
|
4366
4430
|
cron;
|
|
4431
|
+
// Prompt-injection gate (#277). Pure heuristic by default — pluggable
|
|
4432
|
+
// backend later. The bridge is the choke-point for every chat message
|
|
4433
|
+
// before it reaches the agent runtime, so this is the right place.
|
|
4434
|
+
injectionDetector = createHeuristicDetector();
|
|
4367
4435
|
/**
|
|
4368
4436
|
* RuntimeConfig is shared across thread sessions and the cron runner.
|
|
4369
4437
|
* The bridge resolves it from its own env at boot and reuses for the
|
|
@@ -4414,7 +4482,7 @@ var Bridge = class {
|
|
|
4414
4482
|
if (accepted.length > 0) log(`accepted: ${accepted.join(", ")}`);
|
|
4415
4483
|
if (skipped.length > 0) log(`skipped (not on allowlist): ${skipped.join(", ")}`);
|
|
4416
4484
|
}
|
|
4417
|
-
handleInbound(msg) {
|
|
4485
|
+
async handleInbound(msg) {
|
|
4418
4486
|
if (msg.senderEmail === this.selfEmail) return;
|
|
4419
4487
|
if (!msg.body.trim()) return;
|
|
4420
4488
|
if (this.cfg.roomFilter && msg.roomId !== this.cfg.roomFilter) return;
|
|
@@ -4423,6 +4491,26 @@ var Bridge = class {
|
|
|
4423
4491
|
return;
|
|
4424
4492
|
}
|
|
4425
4493
|
log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] in: ${truncate(msg.body, 80)}`);
|
|
4494
|
+
const decision = await decide(this.injectionDetector, {
|
|
4495
|
+
text: msg.body,
|
|
4496
|
+
sender: {
|
|
4497
|
+
email: msg.senderEmail,
|
|
4498
|
+
isOwner: msg.senderEmail === this.ownerEmail
|
|
4499
|
+
}
|
|
4500
|
+
});
|
|
4501
|
+
if (decision.blocked) {
|
|
4502
|
+
log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] BLOCKED prompt-injection (score=${decision.score.toFixed(2)}, reason=${decision.reason ?? "n/a"})`);
|
|
4503
|
+
try {
|
|
4504
|
+
await this.chat.postMessage(msg.roomId, refusalText(decision.reason), {
|
|
4505
|
+
replyTo: msg.id,
|
|
4506
|
+
threadId: msg.threadId
|
|
4507
|
+
});
|
|
4508
|
+
} catch (err) {
|
|
4509
|
+
const m2 = err instanceof Error ? err.message : String(err);
|
|
4510
|
+
log(`[${msg.roomId}] failed to post refusal: ${m2}`);
|
|
4511
|
+
}
|
|
4512
|
+
return;
|
|
4513
|
+
}
|
|
4426
4514
|
const session = this.getOrCreateThread(msg.roomId, msg.threadId);
|
|
4427
4515
|
session.enqueue(msg.body, msg.id);
|
|
4428
4516
|
}
|
|
@@ -4489,7 +4577,7 @@ var Bridge = class {
|
|
|
4489
4577
|
return;
|
|
4490
4578
|
}
|
|
4491
4579
|
if (frame.type !== "message") return;
|
|
4492
|
-
this.handleInbound(frame.payload);
|
|
4580
|
+
void this.handleInbound(frame.payload);
|
|
4493
4581
|
});
|
|
4494
4582
|
ws.on("close", () => {
|
|
4495
4583
|
if (pingTimer) clearInterval(pingTimer);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@openape/ape-agent",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.7.0",
|
|
4
4
|
"description": "OpenApe agent runtime: per-agent process that connects to chat.openape.ai, runs the LLM loop with tools + cron tasks, and streams replies back to owners.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"license": "MIT",
|
|
@@ -23,17 +23,18 @@
|
|
|
23
23
|
"ofetch": "^1.4.1",
|
|
24
24
|
"ws": "^8.18.0",
|
|
25
25
|
"yaml": "^2.8.0",
|
|
26
|
-
"@openape/apes": "1.25.
|
|
27
|
-
"@openape/
|
|
26
|
+
"@openape/apes": "1.25.1",
|
|
27
|
+
"@openape/prompt-injection-detector": "0.1.0",
|
|
28
|
+
"@openape/cli-auth": "0.4.1"
|
|
28
29
|
},
|
|
29
30
|
"devDependencies": {
|
|
30
31
|
"@antfu/eslint-config": "^7.6.1",
|
|
31
32
|
"@types/node": "^22.19.13",
|
|
32
33
|
"@types/ws": "^8.5.13",
|
|
33
|
-
"eslint": "^
|
|
34
|
+
"eslint": "^10.4.0",
|
|
34
35
|
"tsup": "^8.5.1",
|
|
35
36
|
"typescript": "^5.9.3",
|
|
36
|
-
"vitest": "^
|
|
37
|
+
"vitest": "^4.1.7"
|
|
37
38
|
},
|
|
38
39
|
"engines": {
|
|
39
40
|
"node": ">=22"
|