npm - @strav/brain - Versions diffs - 0.3.32 → 0.4.0 - Mend

@strav/brain 0.3.32 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +3 -3
package/src/helpers.ts +6 -7
package/src/providers/openai_responses_provider.ts +3 -1
package/src/utils/error_scrub.ts +5 -0
package/src/utils/prompt.ts +65 -0
package/src/utils/retry.ts +9 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@strav/brain",
-  "version": "0.3.32",
+  "version": "0.4.0",
   "type": "module",
   "description": "AI module for the Strav framework",
   "license": "MIT",
@@ -15,10 +15,10 @@
     "CHANGELOG.md"
   ],
   "peerDependencies": {
-    "@strav/kernel": "0.3.32"
+    "@strav/kernel": "0.4.0"
   },
   "dependencies": {
-    "@strav/workflow": "0.3.32",
+    "@strav/workflow": "0.4.0",
     "zod": "^3.25 || ^4.0"
   },
   "scripts": {

package/src/helpers.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import BrainManager from './brain_manager.ts'
 import { Agent } from './agent.ts'
 import { Workflow } from './workflow.ts'
 import { zodToJsonSchema } from './utils/schema.ts'
+import { interpolateInstructions } from './utils/prompt.ts'
 import { MemoryManager } from './memory/memory_manager.ts'
 import { ContextBudget } from './memory/context_budget.ts'
 import type { MemoryConfig, SerializedMemoryThread, Fact } from './memory/types.ts'
@@ -335,12 +336,12 @@ export class AgentRunner<T extends Agent = Agent> {
       }
     }
-    // Build system prompt with context interpolation
+    // Build system prompt with context interpolation. interpolateInstructions
+    // warns when a context value looks like a prompt-injection attempt — see
+    // packages/brain/CLAUDE.md ("Prompt-injection threat model").
     let system: string | undefined = agent.instructions || undefined
     if (system) {
-      for (const [key, value] of Object.entries(this._context)) {
-        system = system.replaceAll(`{{${key}}}`, String(value))
-      }
+      system = interpolateInstructions(system, this._context)
     }
     // Prepare structured output schema
@@ -483,9 +484,7 @@ export class AgentRunner<T extends Agent = Agent> {
     let system: string | undefined = agent.instructions || undefined
     if (system) {
-      for (const [key, value] of Object.entries(this._context)) {
-        system = system.replaceAll(`{{${key}}}`, String(value))
-      }
+      system = interpolateInstructions(system, this._context)
     }
     let schema: JsonSchema | undefined

package/src/providers/openai_responses_provider.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { parseSSE } from '../utils/sse_parser.ts'
 import { retryableFetch, type RetryOptions } from '../utils/retry.ts'
 import { ExternalServiceError } from '@strav/kernel'
+import { scrubProviderError } from '../utils/error_scrub.ts'
 import type {
   AIProvider,
   CompletionRequest,
@@ -150,7 +151,8 @@ export class OpenAIResponsesProvider implements AIProvider {
       // ── Error ─────────────────────────────────────────────────────
       if (eventType === 'error') {
-        throw new ExternalServiceError('OpenAI', undefined, data.message ?? JSON.stringify(data))
+        const message = typeof data.message === 'string' ? data.message : JSON.stringify(data)
+        throw new ExternalServiceError('OpenAI', undefined, scrubProviderError(message))
       }
     }
   }

package/src/utils/error_scrub.ts ADDED Viewed

@@ -0,0 +1,5 @@
+// Re-export the shared kernel helper so the same scrubber is used across
+// every package that wraps upstream-provider errors. Keeping this thin
+// re-export avoids a breaking import-path change for callers that
+// already pulled `scrubProviderError` from `@strav/brain/utils/error_scrub`.
+export { scrubProviderError } from '@strav/kernel'

package/src/utils/prompt.ts ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * Heuristic detector for prompt-injection markers in untrusted strings
+ * destined for the system prompt. The interpolation in
+ * `agent.instructions` does naïve `replaceAll` of `{{key}}` placeholders
+ * with string values — any user-controlled value flowing through is a
+ * prompt-injection vector against the LLM.
+ *
+ * We can't fully solve this at the template layer (the proper fix is to
+ * pass values as structured user-role messages, not interpolate them
+ * into the system role). What we can do is detect the easy cases and
+ * warn the developer that a value looks suspicious. Detection here is
+ * deliberately loose — false positives are cheap, missed cases let
+ * exploits through silently.
+ */
+const INJECTION_PATTERNS: readonly RegExp[] = [
+  /ignore\s+(?:[\w\s]{0,30}\s+)?(?:instructions?|prompts?|messages?)/i,
+  /disregard\s+(?:[\w\s]{0,30}\s+)?(?:instructions?|prompts?|messages?)/i,
+  /(?:^|\n)\s*system\s*[:>]/i,
+  /(?:^|\n)\s*assistant\s*[:>]/i,
+  /\bsystem\s*:\s*\S/i,
+  /you\s+are\s+now\s+(?:a|an|the)/i,
+  /act\s+as\s+(?:a|an|the)\s+(?:different|new)/i,
+  /\[INST\]|\[\/INST\]/i,
+  /<\|im_(?:start|end)\|>/i,
+  /<\|system\|>|<\|user\|>|<\|assistant\|>/i,
+  /###\s*(?:system|instruction|new\s+instruction)/i,
+]
+/** Return true if the string contains a known prompt-injection marker. */
+export function looksLikePromptInjection(value: string): boolean {
+  if (!value || typeof value !== 'string') return false
+  return INJECTION_PATTERNS.some(re => re.test(value))
+}
+/**
+ * Substitute `{{key}}` placeholders in a system-prompt template with
+ * values from `context`. Emits a `console.warn` when a value matches
+ * `looksLikePromptInjection()` so developers notice when untrusted
+ * input is reaching the system prompt.
+ *
+ * The replacement still happens — the warning is informational. Callers
+ * who need hard rejection should validate context themselves before
+ * calling. The framework cannot decide whether a given context value is
+ * trusted; only the application can.
+ */
+export function interpolateInstructions(
+  template: string,
+  context: Record<string, unknown>
+): string {
+  let out = template
+  for (const [key, rawValue] of Object.entries(context)) {
+    const stringValue = String(rawValue)
+    if (looksLikePromptInjection(stringValue)) {
+      console.warn(
+        `[brain] Possible prompt-injection in agent context.${key} — ` +
+          `the value contains markers commonly used to override system ` +
+          `instructions. Treat untrusted user input as user-role messages, ` +
+          `not as interpolated system-prompt context. ` +
+          `See packages/brain/CLAUDE.md ("Prompt-injection threat model").`
+      )
+    }
+    out = out.replaceAll(`{{${key}}}`, stringValue)
+  }
+  return out
+}

package/src/utils/retry.ts CHANGED Viewed

@@ -1,4 +1,5 @@
 import { ExternalServiceError } from '@strav/kernel'
+import { scrubProviderError } from './error_scrub.ts'
 export interface RetryOptions {
   maxRetries?: number
@@ -36,12 +37,14 @@ export async function retryableFetch(
     try {
       response = await fetch(url, init)
     } catch (err) {
-      // Network error (DNS, connection refused, etc.)
+      // Network error (DNS, connection refused, etc.). Some Bun/Node
+      // network errors include the URL — scrub before surfacing in
+      // case it carries credentials in query params.
       if (attempt === maxRetries) {
         throw new ExternalServiceError(
           service,
           undefined,
-          err instanceof Error ? err.message : String(err)
+          scrubProviderError(err instanceof Error ? err.message : String(err))
         )
       }
       await sleep(backoffDelay(attempt, baseDelay, maxDelay))
@@ -50,16 +53,17 @@ export async function retryableFetch(
     if (response.ok) return response
-    // Non-retryable status — fail immediately
+    // Non-retryable status — fail immediately. Provider response bodies
+    // can echo request headers or other context; scrub before wrapping.
     if (!retryable.includes(response.status)) {
       const text = await response.text()
-      throw new ExternalServiceError(service, response.status, text)
+      throw new ExternalServiceError(service, response.status, scrubProviderError(text))
     }
     // Retryable status — wait and retry (unless last attempt)
     if (attempt === maxRetries) {
       const text = await response.text()
-      throw new ExternalServiceError(service, response.status, text)
+      throw new ExternalServiceError(service, response.status, scrubProviderError(text))
     }
     const delay = parseRetryAfter(response) ?? backoffDelay(attempt, baseDelay, maxDelay)