npm - @openape/ape-agent - Versions diffs - 2.6.3 → 2.7.0 - Mend

@openape/ape-agent 2.6.3 → 2.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/dist/bridge.mjs +94 -6
package/package.json +6 -5

package/dist/bridge.mjs CHANGED Viewed

@@ -1333,6 +1333,66 @@ async function ensureFreshIdpAuth(now = Math.floor(Date.now() / 1e3)) {
   return next;
 }
+// ../../packages/prompt-injection-detector/dist/index.js
+var DEFAULT_THRESHOLD = 0.7;
+var DEFAULT_OWNER_THRESHOLD = 0.95;
+async function decide(detector, input, opts = {}) {
+  const threshold = input.sender.isOwner ? opts.ownerThreshold ?? DEFAULT_OWNER_THRESHOLD : opts.threshold ?? DEFAULT_THRESHOLD;
+  const result = await detector.classify(input);
+  return {
+    ...result,
+    threshold,
+    blocked: result.score >= threshold
+  };
+}
+var PATTERNS = [
+  // Instruction-override family. The defining phrase of prompt
+  // injection — telling the model to discard its instructions in
+  // favour of new ones.
+  { re: /\bignore (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding) (?:instructions?|rules?|context|prompts?|messages?)\b/i, weight: 0.6, reason: "instruction-override" },
+  { re: /\bdisregard (?:all |any |the |your )?(?:previous|prior|above|earlier|preceding)?\s*(?:instructions?|rules?|context)\b/i, weight: 0.6, reason: "instruction-override" },
+  { re: /\b(?:you are|act as|pretend to be|roleplay as) (?:now |a |an )?(?:different|new|unrestricted|jailbroken|dan|do anything now)\b/i, weight: 0.55, reason: "role-override" },
+  { re: /\b(?:forget|drop|reset) (?:everything|all|your) (?:above|prior|previous|instructions?|rules?|context)\b/i, weight: 0.55, reason: "context-reset" },
+  // Filesystem-exfiltration. Specific paths that have no business
+  // appearing in normal chat — auth tokens, SSH keys, agent config.
+  // `\b` would fail on `/etc/passwd` (slash is non-word, no boundary
+  // with preceding space) — match the literal forms instead.
+  { re: /(?:~\/\.config\/apes|~\/\.openape|~\/\.ssh|\/etc\/passwd|\/etc\/shadow|\bid_rsa\b|\bid_ed25519\b|\bauth\.json\b|\.env(?:\.[\w-]+)?\b)/i, weight: 0.45, reason: "sensitive-path" },
+  // Tool-call coercion. Phrases that try to talk the agent into
+  // executing tools or running shell commands as part of the reply.
+  { re: /\b(?:run|execute|invoke|call)\s+(?:the\s+)?(?:shell|bash|sh|cmd|powershell|tool|command|script)\b/i, weight: 0.35, reason: "tool-coercion" },
+  { re: /\b(?:and\s+)?(?:post|send|share|paste|return|reply with|output)\s+(?:the\s+)?(?:contents?|output|result|file|secret|token|api[-_ ]?key)\b/i, weight: 0.3, reason: "exfil-request" },
+  // Override + override-and-do (combined "do X without telling Y" forms).
+  { re: /\bwithout (?:telling|asking|informing|notifying|consulting|the consent of)\b/i, weight: 0.4, reason: "covert-action" },
+  // System-prompt extraction.
+  { re: /\b(?:show|print|reveal|repeat|tell me|what is|what's) (?:your |the )?(?:system prompt|initial prompt|instructions|rules|directives|guidelines)\b/i, weight: 0.5, reason: "prompt-extraction" },
+  // Encoding-based bypass attempts.
+  { re: /\b(?:base64|rot13|decode|decrypt) (?:this|the following|below)\b/i, weight: 0.3, reason: "encoding-bypass" }
+];
+function classifyHeuristic(input) {
+  const text = input.text;
+  let total = 0;
+  const reasons = [];
+  for (const p of PATTERNS) {
+    if (p.re.test(text)) {
+      total += p.weight;
+      if (!reasons.includes(p.reason)) reasons.push(p.reason);
+      if (total >= 1) break;
+    }
+  }
+  const score = Math.min(1, total);
+  return {
+    score,
+    backend: "heuristic",
+    ...reasons.length > 0 ? { reason: reasons.join(", ") } : {}
+  };
+}
+function createHeuristicDetector() {
+  return {
+    classify: async (input) => classifyHeuristic(input)
+  };
+}
 // src/bridge.ts
 import { decodeJwt } from "jose";
 import WebSocket from "ws";
@@ -3112,10 +3172,8 @@ var consola = createConsola2();
 // ../../packages/apes/dist/chunk-DYSFQ26B.js
 var import_shell_quote = __toESM(require_shell_quote(), 1);
-// ../../node_modules/.pnpm/consola@3.4.2/node_modules/consola/dist/utils.mjs
-import "tty";
-// ../../node_modules/.pnpm/citty@0.1.6/node_modules/citty/dist/index.mjs
+// ../../node_modules/.pnpm/citty@0.2.2/node_modules/citty/dist/index.mjs
+import { parseArgs as parseArgs$1 } from "util";
 function defineCommand(def) {
   return def;
 }
@@ -4334,6 +4392,12 @@ function sleep(ms) {
 function truncate(s2, n2) {
   return s2.length <= n2 ? s2 : `${s2.slice(0, n2 - 1)}\u2026`;
 }
+function refusalText(reason) {
+  const base = "I won't process this message \u2014 it looks like a prompt-injection attempt.";
+  return reason ? `${base}
+(matched: ${reason})` : base;
+}
 var Bridge = class {
   constructor(cfg, selfEmail, ownerEmail) {
     this.cfg = cfg;
@@ -4364,6 +4428,10 @@ var Bridge = class {
   chat;
   bearer;
   cron;
+  // Prompt-injection gate (#277). Pure heuristic by default — pluggable
+  // backend later. The bridge is the choke-point for every chat message
+  // before it reaches the agent runtime, so this is the right place.
+  injectionDetector = createHeuristicDetector();
   /**
    * RuntimeConfig is shared across thread sessions and the cron runner.
    * The bridge resolves it from its own env at boot and reuses for the
@@ -4414,7 +4482,7 @@ var Bridge = class {
     if (accepted.length > 0) log(`accepted: ${accepted.join(", ")}`);
     if (skipped.length > 0) log(`skipped (not on allowlist): ${skipped.join(", ")}`);
   }
-  handleInbound(msg) {
+  async handleInbound(msg) {
     if (msg.senderEmail === this.selfEmail) return;
     if (!msg.body.trim()) return;
     if (this.cfg.roomFilter && msg.roomId !== this.cfg.roomFilter) return;
@@ -4423,6 +4491,26 @@ var Bridge = class {
       return;
     }
     log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] in: ${truncate(msg.body, 80)}`);
+    const decision = await decide(this.injectionDetector, {
+      text: msg.body,
+      sender: {
+        email: msg.senderEmail,
+        isOwner: msg.senderEmail === this.ownerEmail
+      }
+    });
+    if (decision.blocked) {
+      log(`[${msg.roomId}/${msg.threadId.slice(0, 8)}] BLOCKED prompt-injection (score=${decision.score.toFixed(2)}, reason=${decision.reason ?? "n/a"})`);
+      try {
+        await this.chat.postMessage(msg.roomId, refusalText(decision.reason), {
+          replyTo: msg.id,
+          threadId: msg.threadId
+        });
+      } catch (err) {
+        const m2 = err instanceof Error ? err.message : String(err);
+        log(`[${msg.roomId}] failed to post refusal: ${m2}`);
+      }
+      return;
+    }
     const session = this.getOrCreateThread(msg.roomId, msg.threadId);
     session.enqueue(msg.body, msg.id);
   }
@@ -4489,7 +4577,7 @@ var Bridge = class {
           return;
         }
         if (frame.type !== "message") return;
-        this.handleInbound(frame.payload);
+        void this.handleInbound(frame.payload);
       });
       ws.on("close", () => {
         if (pingTimer) clearInterval(pingTimer);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@openape/ape-agent",
-  "version": "2.6.3",
+  "version": "2.7.0",
   "description": "OpenApe agent runtime: per-agent process that connects to chat.openape.ai, runs the LLM loop with tools + cron tasks, and streams replies back to owners.",
   "type": "module",
   "license": "MIT",
@@ -23,17 +23,18 @@
     "ofetch": "^1.4.1",
     "ws": "^8.18.0",
     "yaml": "^2.8.0",
-    "@openape/apes": "1.25.0",
-    "@openape/cli-auth": "0.4.0"
+    "@openape/apes": "1.25.1",
+    "@openape/prompt-injection-detector": "0.1.0",
+    "@openape/cli-auth": "0.4.1"
   },
   "devDependencies": {
     "@antfu/eslint-config": "^7.6.1",
     "@types/node": "^22.19.13",
     "@types/ws": "^8.5.13",
-    "eslint": "^9.35.0",
+    "eslint": "^10.4.0",
     "tsup": "^8.5.1",
     "typescript": "^5.9.3",
-    "vitest": "^3.2.4"
+    "vitest": "^4.1.7"
   },
   "engines": {
     "node": ">=22"