npm - palaryn - Versions diffs - 0.3.7 → 0.4.4 - Mend

palaryn 0.3.7 → 0.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (98) hide show

package/README.md +2 -1
package/dist/src/auth/routes.d.ts.map +1 -1
package/dist/src/auth/routes.js +5 -1
package/dist/src/auth/routes.js.map +1 -1
package/dist/src/config/defaults.d.ts.map +1 -1
package/dist/src/config/defaults.js +7 -2
package/dist/src/config/defaults.js.map +1 -1
package/dist/src/dlp/composite-scanner.d.ts.map +1 -1
package/dist/src/dlp/composite-scanner.js +26 -1
package/dist/src/dlp/composite-scanner.js.map +1 -1
package/dist/src/dlp/heuristic-scorer.d.ts +31 -0
package/dist/src/dlp/heuristic-scorer.d.ts.map +1 -0
package/dist/src/dlp/heuristic-scorer.js +314 -0
package/dist/src/dlp/heuristic-scorer.js.map +1 -0
package/dist/src/dlp/llm-classifier.d.ts +38 -0
package/dist/src/dlp/llm-classifier.d.ts.map +1 -0
package/dist/src/dlp/llm-classifier.js +152 -0
package/dist/src/dlp/llm-classifier.js.map +1 -0
package/dist/src/dlp/patterns.d.ts.map +1 -1
package/dist/src/dlp/patterns.js +1 -0
package/dist/src/dlp/patterns.js.map +1 -1
package/dist/src/dlp/prompt-injection-backend.d.ts.map +1 -1
package/dist/src/dlp/prompt-injection-backend.js +17 -0
package/dist/src/dlp/prompt-injection-backend.js.map +1 -1
package/dist/src/dlp/prompt-injection-patterns.d.ts.map +1 -1
package/dist/src/dlp/prompt-injection-patterns.js +36 -0
package/dist/src/dlp/prompt-injection-patterns.js.map +1 -1
package/dist/src/dlp/regex-backend.d.ts.map +1 -1
package/dist/src/dlp/regex-backend.js +2 -38
package/dist/src/dlp/regex-backend.js.map +1 -1
package/dist/src/dlp/scanner.d.ts.map +1 -1
package/dist/src/dlp/scanner.js +38 -6
package/dist/src/dlp/scanner.js.map +1 -1
package/dist/src/dlp/text-normalizer.d.ts +10 -1
package/dist/src/dlp/text-normalizer.d.ts.map +1 -1
package/dist/src/dlp/text-normalizer.js +124 -2
package/dist/src/dlp/text-normalizer.js.map +1 -1
package/dist/src/mcp/http-transport.d.ts +2 -0
package/dist/src/mcp/http-transport.d.ts.map +1 -1
package/dist/src/mcp/http-transport.js +25 -6
package/dist/src/mcp/http-transport.js.map +1 -1
package/dist/src/policy/engine.d.ts.map +1 -1
package/dist/src/policy/engine.js +109 -0
package/dist/src/policy/engine.js.map +1 -1
package/dist/src/saas/routes.d.ts.map +1 -1
package/dist/src/saas/routes.js +19 -5
package/dist/src/saas/routes.js.map +1 -1
package/dist/src/server/app.d.ts.map +1 -1
package/dist/src/server/app.js +7 -0
package/dist/src/server/app.js.map +1 -1
package/dist/src/server/gateway.d.ts +1 -0
package/dist/src/server/gateway.d.ts.map +1 -1
package/dist/src/server/gateway.js +160 -1
package/dist/src/server/gateway.js.map +1 -1
package/dist/src/types/config.d.ts +14 -1
package/dist/src/types/config.d.ts.map +1 -1
package/dist/tests/security/pentest-payloads.d.ts +46 -0
package/dist/tests/security/pentest-payloads.d.ts.map +1 -0
package/dist/tests/security/pentest-payloads.js +475 -0
package/dist/tests/security/pentest-payloads.js.map +1 -0
package/dist/tests/unit/adversarial-pipeline.test.d.ts +15 -0
package/dist/tests/unit/adversarial-pipeline.test.d.ts.map +1 -0
package/dist/tests/unit/adversarial-pipeline.test.js +1557 -0
package/dist/tests/unit/adversarial-pipeline.test.js.map +1 -0
package/dist/tests/unit/dlp-scanner.test.js +5 -5
package/dist/tests/unit/gateway-branches.test.js +137 -0
package/dist/tests/unit/gateway-branches.test.js.map +1 -1
package/dist/tests/unit/heuristic-scorer.test.d.ts +2 -0
package/dist/tests/unit/heuristic-scorer.test.d.ts.map +1 -0
package/dist/tests/unit/heuristic-scorer.test.js +248 -0
package/dist/tests/unit/heuristic-scorer.test.js.map +1 -0
package/dist/tests/unit/llm-classifier.test.d.ts +2 -0
package/dist/tests/unit/llm-classifier.test.d.ts.map +1 -0
package/dist/tests/unit/llm-classifier.test.js +349 -0
package/dist/tests/unit/llm-classifier.test.js.map +1 -0
package/dist/tests/unit/prompt-injection-backend.test.js +122 -0
package/dist/tests/unit/prompt-injection-backend.test.js.map +1 -1
package/dist/tests/unit/text-normalizer.test.js +52 -1
package/dist/tests/unit/text-normalizer.test.js.map +1 -1
package/package.json +1 -1
package/policy-packs/default.yaml +88 -0
package/src/auth/routes.ts +6 -1
package/src/config/defaults.ts +7 -2
package/src/dlp/composite-scanner.ts +27 -1
package/src/dlp/heuristic-scorer.ts +342 -0
package/src/dlp/llm-classifier.ts +191 -0
package/src/dlp/patterns.ts +1 -0
package/src/dlp/prompt-injection-backend.ts +19 -1
package/src/dlp/prompt-injection-patterns.ts +38 -0
package/src/dlp/regex-backend.ts +2 -45
package/src/dlp/scanner.ts +36 -6
package/src/dlp/text-normalizer.ts +130 -2
package/src/mcp/http-transport.ts +29 -6
package/src/policy/engine.ts +102 -0
package/src/saas/routes.ts +22 -5
package/src/server/app.ts +7 -0
package/src/server/gateway.ts +196 -1
package/src/types/config.ts +15 -1

package/src/server/gateway.ts CHANGED Viewed

@@ -9,6 +9,8 @@ import { DLPScanner } from '../dlp/scanner';
 import { CompositeDLPScanner } from '../dlp/composite-scanner';
 import { DLPBackend } from '../dlp/interfaces';
 import { PromptInjectionBackend } from '../dlp/prompt-injection-backend';
+import { HeuristicScorerBackend } from '../dlp/heuristic-scorer';
+import { scorePromptInjection } from '../dlp/heuristic-scorer';
 import { TruffleHogBackend } from '../dlp/trufflehog-backend';
 import { BudgetManager, CostRecord } from '../budget/manager';
 import { UsageExtractor } from '../budget/usage-extractor';
@@ -29,6 +31,7 @@ import { UsageData } from '../types/tool-result';
 import { GatewayMetrics } from '../metrics';
 import { GatewayTracer } from '../tracing';
 import { AnomalyDetector } from '../anomaly';
+import { LlmPromptInjectionClassifier } from '../dlp/llm-classifier';
 import { log as devLog, logger } from './logger';
 export interface PreExecuteResult {
@@ -118,6 +121,7 @@ export class Gateway {
   private rateLimitConfigStore?: RateLimitConfigStore;
   private budgetConfigStore?: BudgetConfigStore;
   private usageExtractor: UsageExtractor;
+  private llmClassifier?: LlmPromptInjectionClassifier;
   private inFlightCleanupInterval?: ReturnType<typeof setInterval>;
   /**
    * Tracks tool_call_ids currently being processed to prevent TOCTOU races.
@@ -137,6 +141,7 @@ export class Gateway {
       dlpBackends.push(new PromptInjectionBackend({
         scan_output: config.dlp.scan_output,
       }));
+      dlpBackends.push(new HeuristicScorerBackend());
     }
     if (config.dlp.trufflehog?.enabled) {
       dlpBackends.push(new TruffleHogBackend({
@@ -165,6 +170,14 @@ export class Gateway {
       this.opaEngine = new OPAEngine(config.policy.opa);
     }
+    // Set up LLM-based prompt injection classifier if enabled
+    if (config.dlp.llm_classifier?.enabled) {
+      this.llmClassifier = new LlmPromptInjectionClassifier(config.dlp.llm_classifier);
+      console.log(`[Gateway] LLM classifier enabled (model: ${config.dlp.llm_classifier.model || 'default'})`);
+    } else {
+      console.log(`[Gateway] LLM classifier disabled (PALARYN_LLM_API_KEY ${process.env.PALARYN_LLM_API_KEY ? 'set' : 'NOT set'})`);
+    }
     this.usageExtractor = new UsageExtractor(config.budget.token_pricing);
     // Set up executor registry with HTTP as default + catch-all fallback
@@ -310,7 +323,9 @@ export class Gateway {
     // Prompt injection blocking check (before policy, so it always runs)
     const piAction = this.config.dlp.prompt_injection_action || 'log';
     if (piAction === 'block' && argsDlp.detected.length > 0) {
-      const piDetections = argsDlp.detected.filter((d: string) => d.startsWith('prompt_injection_'));
+      const piDetections = argsDlp.detected.filter((d: string) =>
+        d.startsWith('prompt_injection_') || d.startsWith('heuristic_prompt_injection')
+      );
       if (piDetections.length > 0) {
         const threshold = this.config.dlp.prompt_injection_block_threshold || 'high';
         const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
@@ -355,6 +370,115 @@ export class Gateway {
       }
     }
+    // Heuristic scoring — force LLM classifier when structural signals are elevated
+    let forceLlmClassification = false;
+    const inputText = JSON.stringify(toolCall);
+    const heuristicResult = scorePromptInjection(inputText);
+    if (heuristicResult.score >= 0.4) {
+      forceLlmClassification = true;
+      devLog.pipelineStep('🔍', 'HEURISTIC_SCORER',
+        `score=${heuristicResult.score.toFixed(2)} signals=[${heuristicResult.signals.join(',')}]`);
+      // Fallback: if heuristic score >= 0.5 and no LLM classifier is available, block directly.
+      // Without an LLM classifier the forceLlmClassification flag has no effect,
+      // so we treat a score of 0.5+ (3+ structural signals) as sufficient evidence to block.
+      if (heuristicResult.score >= 0.5 && !this.llmClassifier && piAction === 'block') {
+        const heuristicDetection = `heuristic_prompt_injection (score=${heuristicResult.score.toFixed(2)}, signals=${heuristicResult.signals.join(',')})`;
+        argsDlp.detected.push(heuristicDetection);
+        argsDlp.severity = this.maxSeverity(argsDlp.severity, 'high');
+        this.auditLogger.logDLPScanned(toolCall, [heuristicDetection], 'high', 0);
+        this.metrics?.recordDLPDetection('heuristic_prompt_injection', 'high');
+        devLog.pipelineStep('🛡️', 'HEURISTIC_BLOCK', `score=${heuristicResult.score.toFixed(2)} — no LLM classifier available, blocking`);
+        const durationSec = (Date.now() - startTime) / 1000;
+        this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
+        const result = this.buildResult(toolCall, 'blocked', {
+          decision: 'deny',
+          rule_id: 'heuristic_prompt_injection_block',
+          rule_name: 'Heuristic prompt injection detected',
+          reasons: [`Heuristic prompt injection detected: ${heuristicResult.signals.join(', ')} (score=${heuristicResult.score.toFixed(2)})`],
+        }, startTime, undefined,
+          `Blocked by heuristic prompt injection scorer (score=${heuristicResult.score.toFixed(2)}, signals: ${heuristicResult.signals.join(', ')})`,
+          undefined, argsDlp);
+        devLog.pipelineEnd('blocked', Date.now() - startTime);
+        return { allowed: false, result, stepTimings, startTime };
+      }
+    }
+    // LLM-based prompt injection classification on INPUT (async, runs after sync DLP scan)
+    if ((this.llmClassifier && this.config.dlp.llm_classifier?.scan_input !== false) || (forceLlmClassification && this.llmClassifier)) {
+      const llmInputStart = Date.now();
+      const llmInputResult = await asyncChildSpan(otel, 'gateway.llm_classifier_input', () =>
+        this.llmClassifier!.classify(inputText)
+      );
+      stepTimings.llm_classifier_input = Date.now() - llmInputStart;
+      const llmInputClassifications = llmInputResult.classifications;
+      // Degraded signal: LLM classifier failed while heuristic flagged suspicious content
+      if (llmInputResult.error && forceLlmClassification) {
+        const degradedName = 'llm_classifier_unavailable';
+        argsDlp.detected.push(degradedName);
+        argsDlp.severity = this.maxSeverity(argsDlp.severity, 'medium');
+        this.auditLogger.logDLPScanned(toolCall, [degradedName], 'medium', 0);
+        this.metrics?.recordDLPDetection(degradedName, 'medium');
+        devLog.pipelineStep('⚠️', 'LLM_CLASSIFIER_UNAVAILABLE',
+          `LLM classifier failed with heuristic score=${heuristicResult.score.toFixed(2)} — emitting degraded signal`);
+      }
+      if (llmInputClassifications.length > 0) {
+        const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmInputClassifications, inputText);
+        const llmDetectionNames = llmDetections.map(d => d.pattern_name);
+        argsDlp.detected.push(...llmDetectionNames);
+        for (const d of llmDetections) {
+          argsDlp.severity = this.maxSeverity(argsDlp.severity, d.severity);
+        }
+        this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, argsDlp.severity, 0);
+        for (const name of llmDetectionNames) {
+          this.metrics?.recordDLPDetection(name, argsDlp.severity);
+        }
+        // Re-check prompt injection blocking with LLM classifier detections.
+        // LLM classifier uses 'high' threshold to reduce false positives on benign content —
+        // it's more aggressive than regex/heuristic and needs higher confidence to block.
+        if (piAction === 'block') {
+          const llmPiDetections = argsDlp.detected.filter((d: string) => d.startsWith('llm_classifier_'));
+          if (llmPiDetections.length > 0) {
+            // Use max of configured threshold and 'high' — LLM-only blocks require high severity
+            const configThreshold = this.config.dlp.prompt_injection_block_threshold || 'high';
+            const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
+            const hasHeuristicDetection = argsDlp.detected.some((d: string) => d.startsWith('heuristic_prompt_injection'));
+            // If heuristic also flagged it, trust the LLM at configured threshold;
+            // otherwise require 'high' to avoid false positives on benign content.
+            const threshold = hasHeuristicDetection ? configThreshold : 'high';
+            const thresholdRank = severityRank[threshold] ?? 2;
+            const maxLlmSeverity = llmInputClassifications.reduce((max, c) => {
+              const rank = severityRank[c.severity] ?? 0;
+              return rank > max ? rank : max;
+            }, 0);
+            if (maxLlmSeverity >= thresholdRank) {
+              devLog.pipelineStep('🛡️', 'LLM_CLASSIFIER_INPUT_BLOCK',
+                `Blocked: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity}, threshold: ${threshold})`);
+              const durationSec = (Date.now() - startTime) / 1000;
+              this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
+              const result = this.buildResult(toolCall, 'blocked', {
+                decision: 'deny',
+                rule_id: 'llm_classifier_input_block',
+                rule_name: 'LLM classifier detected prompt injection in input',
+                reasons: [`LLM classifier detected: ${llmPiDetections.join(', ')}`],
+              }, startTime, undefined,
+                `Blocked by LLM classifier: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity})`,
+                undefined, argsDlp);
+              devLog.pipelineEnd('blocked', Date.now() - startTime);
+              return { allowed: false, result, stepTimings, startTime };
+            }
+          }
+        }
+      }
+    }
     // Policy evaluation — DLP context is passed so DLP-conditioned rules
     // compete with all other rules in a single priority-ordered pass.
     stepStart = Date.now();
@@ -533,6 +657,77 @@ export class Gateway {
     }
     stepTimings.dlp_out = Date.now() - stepStart;
+    // LLM-based prompt injection classification (async, runs after sync DLP scan)
+    if (this.llmClassifier && output.body && this.config.dlp.llm_classifier?.scan_output !== false) {
+      const llmStart = Date.now();
+      const text = typeof output.body === 'string' ? output.body : JSON.stringify(output.body);
+      const llmOutputResult = await asyncChildSpan(otel, 'gateway.llm_classifier', () =>
+        this.llmClassifier!.classify(text)
+      );
+      stepTimings.llm_classifier = Date.now() - llmStart;
+      const llmClassifications = llmOutputResult.classifications;
+      if (llmClassifications.length > 0) {
+        const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmClassifications, text);
+        const llmDetectionNames = llmDetections.map(d => d.pattern_name);
+        outputDlp.detected.push(...llmDetectionNames);
+        // Recalculate severity: take the max of existing and LLM detections
+        for (const d of llmDetections) {
+          outputDlp.severity = this.maxSeverity(outputDlp.severity, d.severity);
+        }
+        this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, outputDlp.severity, 0);
+        for (const name of llmDetectionNames) {
+          this.metrics?.recordDLPDetection(name, outputDlp.severity);
+        }
+      }
+    }
+    // Prompt injection blocking check on output (regex + LLM classifier detections)
+    const piOutputAction = this.config.dlp.prompt_injection_action || 'log';
+    if (piOutputAction === 'block' && outputDlp.detected.length > 0) {
+      const piDetections = outputDlp.detected.filter((d: string) =>
+        d.startsWith('prompt_injection_') || d.startsWith('llm_classifier_')
+      );
+      if (piDetections.length > 0) {
+        const threshold = this.config.dlp.prompt_injection_block_threshold || 'medium';
+        const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
+        const thresholdRank = severityRank[threshold] ?? 1;
+        const maxSeverityRank = severityRank[outputDlp.severity] ?? 0;
+        if (maxSeverityRank >= thresholdRank) {
+          // Release budget reservation since we're blocking
+          if (reservationKey) {
+            this.budgetManager.commitReservation(reservationKey, 0);
+          }
+          devLog.pipelineStep('🛡️', 'PROMPT_INJECTION_OUTPUT_BLOCK',
+            `Blocked output: ${piDetections.join(', ')} (severity: ${outputDlp.severity}, threshold: ${threshold})`);
+          const durationSec = (Date.now() - startTime) / 1000;
+          this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
+          const defaultPolicy = policyResult || { decision: 'allow' as const, rule_id: 'passthrough', rule_name: 'Passthrough', reasons: [] };
+          const argsDlpSafe = argsDlp || { detected: [], redactions: [], severity: 'low' as DLPSeverity };
+          const mergedDlp = {
+            detected: [...new Set([...argsDlpSafe.detected, ...outputDlp.detected])],
+            redactions: [...argsDlpSafe.redactions, ...outputDlp.redactions],
+            severity: this.maxSeverity(argsDlpSafe.severity, outputDlp.severity),
+          };
+          const result = this.buildResult(toolCall, 'blocked', {
+            decision: 'deny',
+            rule_id: 'prompt_injection_output_block',
+            rule_name: 'Prompt injection detected in output',
+            reasons: [`Prompt injection detected in output: ${piDetections.join(', ')}`],
+          }, startTime, undefined,
+            `Blocked by prompt injection detection in output: ${piDetections.join(', ')} (severity: ${outputDlp.severity})`,
+            undefined, mergedDlp);
+          this.auditLogger.logToolResultReturned(toolCall, 'blocked', Date.now() - startTime, { prompt_injection_blocked: true, detections: piDetections });
+          devLog.pipelineEnd('blocked', Date.now() - startTime);
+          return result;
+        }
+      }
+    }
     // Extract usage data from response
     const headerUsage = this.usageExtractor.extractFromHeaders(output.headers);
     const bodyUsage = this.usageExtractor.extractFromBody(output.body);

package/src/types/config.ts CHANGED Viewed

@@ -140,6 +140,16 @@ export interface OPAConfig {
   package_name?: string;
 }
+export interface LlmClassifierConfig {
+  enabled: boolean;
+  model?: string;
+  confidence_threshold?: number;
+  /** Run LLM classifier on input/args (default: true) */
+  scan_input?: boolean;
+  /** Run LLM classifier on output (default: true) */
+  scan_output?: boolean;
+}
 export interface DLPConfig {
   enabled: boolean;
   scan_args: boolean;
@@ -149,7 +159,7 @@ export interface DLPConfig {
   prompt_injection_detection?: boolean;
   /** Action when prompt injection is detected: 'log' (default), 'flag', or 'block' */
   prompt_injection_action?: 'log' | 'flag' | 'block';
-  /** Minimum severity to trigger blocking (when action is 'block'). Default: 'high' */
+  /** Minimum severity to trigger blocking (when action is 'block'). Default: 'medium' */
   prompt_injection_block_threshold?: 'medium' | 'high';
   /** Response mode when injection is blocked: 'deny' (default), 'sanitize', or 'require_approval' */
   prompt_injection_response?: 'deny' | 'sanitize' | 'require_approval';
@@ -159,6 +169,10 @@ export interface DLPConfig {
     binary_path?: string;
     timeout_ms?: number;
   };
+  /** Maximum scan depth for nested structures (default: 64) */
+  max_scan_depth?: number;
+  /** LLM-based prompt injection classifier (async, semantic analysis) */
+  llm_classifier?: LlmClassifierConfig;
 }
 export interface AuditConfig {