palaryn 0.3.6 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/README.md +2 -1
  2. package/dist/src/auth/routes.d.ts.map +1 -1
  3. package/dist/src/auth/routes.js +5 -1
  4. package/dist/src/auth/routes.js.map +1 -1
  5. package/dist/src/config/defaults.d.ts.map +1 -1
  6. package/dist/src/config/defaults.js +7 -2
  7. package/dist/src/config/defaults.js.map +1 -1
  8. package/dist/src/dlp/composite-scanner.d.ts.map +1 -1
  9. package/dist/src/dlp/composite-scanner.js +26 -1
  10. package/dist/src/dlp/composite-scanner.js.map +1 -1
  11. package/dist/src/dlp/heuristic-scorer.d.ts +31 -0
  12. package/dist/src/dlp/heuristic-scorer.d.ts.map +1 -0
  13. package/dist/src/dlp/heuristic-scorer.js +286 -0
  14. package/dist/src/dlp/heuristic-scorer.js.map +1 -0
  15. package/dist/src/dlp/llm-classifier.d.ts +33 -0
  16. package/dist/src/dlp/llm-classifier.d.ts.map +1 -0
  17. package/dist/src/dlp/llm-classifier.js +145 -0
  18. package/dist/src/dlp/llm-classifier.js.map +1 -0
  19. package/dist/src/dlp/patterns.d.ts.map +1 -1
  20. package/dist/src/dlp/patterns.js +1 -0
  21. package/dist/src/dlp/patterns.js.map +1 -1
  22. package/dist/src/dlp/prompt-injection-backend.d.ts.map +1 -1
  23. package/dist/src/dlp/prompt-injection-backend.js +17 -0
  24. package/dist/src/dlp/prompt-injection-backend.js.map +1 -1
  25. package/dist/src/dlp/prompt-injection-patterns.d.ts.map +1 -1
  26. package/dist/src/dlp/prompt-injection-patterns.js +36 -0
  27. package/dist/src/dlp/prompt-injection-patterns.js.map +1 -1
  28. package/dist/src/dlp/scanner.d.ts.map +1 -1
  29. package/dist/src/dlp/scanner.js +38 -6
  30. package/dist/src/dlp/scanner.js.map +1 -1
  31. package/dist/src/dlp/text-normalizer.d.ts +5 -0
  32. package/dist/src/dlp/text-normalizer.d.ts.map +1 -1
  33. package/dist/src/dlp/text-normalizer.js +118 -0
  34. package/dist/src/dlp/text-normalizer.js.map +1 -1
  35. package/dist/src/mcp/http-transport.d.ts +2 -0
  36. package/dist/src/mcp/http-transport.d.ts.map +1 -1
  37. package/dist/src/mcp/http-transport.js +25 -6
  38. package/dist/src/mcp/http-transport.js.map +1 -1
  39. package/dist/src/mcp/oauth-provider.d.ts +1 -0
  40. package/dist/src/mcp/oauth-provider.d.ts.map +1 -1
  41. package/dist/src/mcp/oauth-provider.js +26 -1
  42. package/dist/src/mcp/oauth-provider.js.map +1 -1
  43. package/dist/src/policy/engine.d.ts.map +1 -1
  44. package/dist/src/policy/engine.js +109 -0
  45. package/dist/src/policy/engine.js.map +1 -1
  46. package/dist/src/saas/routes.d.ts.map +1 -1
  47. package/dist/src/saas/routes.js +19 -5
  48. package/dist/src/saas/routes.js.map +1 -1
  49. package/dist/src/server/app.d.ts.map +1 -1
  50. package/dist/src/server/app.js +7 -0
  51. package/dist/src/server/app.js.map +1 -1
  52. package/dist/src/server/gateway.d.ts +1 -0
  53. package/dist/src/server/gateway.d.ts.map +1 -1
  54. package/dist/src/server/gateway.js +113 -0
  55. package/dist/src/server/gateway.js.map +1 -1
  56. package/dist/src/types/config.d.ts +14 -1
  57. package/dist/src/types/config.d.ts.map +1 -1
  58. package/dist/tests/security/pentest-payloads.d.ts +46 -0
  59. package/dist/tests/security/pentest-payloads.d.ts.map +1 -0
  60. package/dist/tests/security/pentest-payloads.js +459 -0
  61. package/dist/tests/security/pentest-payloads.js.map +1 -0
  62. package/dist/tests/unit/adversarial-pipeline.test.d.ts +15 -0
  63. package/dist/tests/unit/adversarial-pipeline.test.d.ts.map +1 -0
  64. package/dist/tests/unit/adversarial-pipeline.test.js +1552 -0
  65. package/dist/tests/unit/adversarial-pipeline.test.js.map +1 -0
  66. package/dist/tests/unit/dlp-scanner.test.js +5 -5
  67. package/dist/tests/unit/gateway-branches.test.js +131 -0
  68. package/dist/tests/unit/gateway-branches.test.js.map +1 -1
  69. package/dist/tests/unit/heuristic-scorer.test.d.ts +2 -0
  70. package/dist/tests/unit/heuristic-scorer.test.d.ts.map +1 -0
  71. package/dist/tests/unit/heuristic-scorer.test.js +248 -0
  72. package/dist/tests/unit/heuristic-scorer.test.js.map +1 -0
  73. package/dist/tests/unit/llm-classifier.test.d.ts +2 -0
  74. package/dist/tests/unit/llm-classifier.test.d.ts.map +1 -0
  75. package/dist/tests/unit/llm-classifier.test.js +343 -0
  76. package/dist/tests/unit/llm-classifier.test.js.map +1 -0
  77. package/dist/tests/unit/mcp-oauth.test.js +6 -2
  78. package/dist/tests/unit/mcp-oauth.test.js.map +1 -1
  79. package/dist/tests/unit/prompt-injection-backend.test.js +122 -0
  80. package/dist/tests/unit/prompt-injection-backend.test.js.map +1 -1
  81. package/dist/tests/unit/text-normalizer.test.js +45 -0
  82. package/dist/tests/unit/text-normalizer.test.js.map +1 -1
  83. package/package.json +1 -1
  84. package/policy-packs/default.yaml +88 -0
  85. package/src/auth/routes.ts +6 -1
  86. package/src/config/defaults.ts +7 -2
  87. package/src/dlp/composite-scanner.ts +27 -1
  88. package/src/dlp/heuristic-scorer.ts +312 -0
  89. package/src/dlp/llm-classifier.ts +176 -0
  90. package/src/dlp/patterns.ts +1 -0
  91. package/src/dlp/prompt-injection-backend.ts +19 -1
  92. package/src/dlp/prompt-injection-patterns.ts +38 -0
  93. package/src/dlp/scanner.ts +36 -6
  94. package/src/dlp/text-normalizer.ts +124 -0
  95. package/src/mcp/http-transport.ts +29 -6
  96. package/src/mcp/oauth-provider.ts +22 -1
  97. package/src/policy/engine.ts +102 -0
  98. package/src/saas/routes.ts +22 -5
  99. package/src/server/app.ts +7 -0
  100. package/src/server/gateway.ts +142 -0
  101. package/src/types/config.ts +15 -1
@@ -9,6 +9,8 @@ import { DLPScanner } from '../dlp/scanner';
9
9
  import { CompositeDLPScanner } from '../dlp/composite-scanner';
10
10
  import { DLPBackend } from '../dlp/interfaces';
11
11
  import { PromptInjectionBackend } from '../dlp/prompt-injection-backend';
12
+ import { HeuristicScorerBackend } from '../dlp/heuristic-scorer';
13
+ import { scorePromptInjection } from '../dlp/heuristic-scorer';
12
14
  import { TruffleHogBackend } from '../dlp/trufflehog-backend';
13
15
  import { BudgetManager, CostRecord } from '../budget/manager';
14
16
  import { UsageExtractor } from '../budget/usage-extractor';
@@ -29,6 +31,7 @@ import { UsageData } from '../types/tool-result';
29
31
  import { GatewayMetrics } from '../metrics';
30
32
  import { GatewayTracer } from '../tracing';
31
33
  import { AnomalyDetector } from '../anomaly';
34
+ import { LlmPromptInjectionClassifier } from '../dlp/llm-classifier';
32
35
  import { log as devLog, logger } from './logger';
33
36
 
34
37
  export interface PreExecuteResult {
@@ -118,6 +121,7 @@ export class Gateway {
118
121
  private rateLimitConfigStore?: RateLimitConfigStore;
119
122
  private budgetConfigStore?: BudgetConfigStore;
120
123
  private usageExtractor: UsageExtractor;
124
+ private llmClassifier?: LlmPromptInjectionClassifier;
121
125
  private inFlightCleanupInterval?: ReturnType<typeof setInterval>;
122
126
  /**
123
127
  * Tracks tool_call_ids currently being processed to prevent TOCTOU races.
@@ -137,6 +141,7 @@ export class Gateway {
137
141
  dlpBackends.push(new PromptInjectionBackend({
138
142
  scan_output: config.dlp.scan_output,
139
143
  }));
144
+ dlpBackends.push(new HeuristicScorerBackend());
140
145
  }
141
146
  if (config.dlp.trufflehog?.enabled) {
142
147
  dlpBackends.push(new TruffleHogBackend({
@@ -165,6 +170,11 @@ export class Gateway {
165
170
  this.opaEngine = new OPAEngine(config.policy.opa);
166
171
  }
167
172
 
173
+ // Set up LLM-based prompt injection classifier if enabled
174
+ if (config.dlp.llm_classifier?.enabled) {
175
+ this.llmClassifier = new LlmPromptInjectionClassifier(config.dlp.llm_classifier);
176
+ }
177
+
168
178
  this.usageExtractor = new UsageExtractor(config.budget.token_pricing);
169
179
 
170
180
  // Set up executor registry with HTTP as default + catch-all fallback
@@ -355,6 +365,68 @@ export class Gateway {
355
365
  }
356
366
  }
357
367
 
368
+ // Heuristic scoring — force LLM classifier when structural signals are elevated
369
+ let forceLlmClassification = false;
370
+ const inputText = JSON.stringify(toolCall);
371
+ const heuristicResult = scorePromptInjection(inputText);
372
+ if (heuristicResult.score >= 0.4) {
373
+ forceLlmClassification = true;
374
+ devLog.pipelineStep('🔍', 'HEURISTIC_SCORER',
375
+ `score=${heuristicResult.score.toFixed(2)} signals=[${heuristicResult.signals.join(',')}]`);
376
+ }
377
+
378
+ // LLM-based prompt injection classification on INPUT (async, runs after sync DLP scan)
379
+ if ((this.llmClassifier && this.config.dlp.llm_classifier?.scan_input !== false) || (forceLlmClassification && this.llmClassifier)) {
380
+ const llmInputStart = Date.now();
381
+ const llmInputClassifications = await asyncChildSpan(otel, 'gateway.llm_classifier_input', () =>
382
+ this.llmClassifier!.classify(inputText)
383
+ );
384
+ stepTimings.llm_classifier_input = Date.now() - llmInputStart;
385
+
386
+ if (llmInputClassifications.length > 0) {
387
+ const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmInputClassifications, inputText);
388
+ const llmDetectionNames = llmDetections.map(d => d.pattern_name);
389
+ argsDlp.detected.push(...llmDetectionNames);
390
+
391
+ for (const d of llmDetections) {
392
+ argsDlp.severity = this.maxSeverity(argsDlp.severity, d.severity);
393
+ }
394
+
395
+ this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, argsDlp.severity, 0);
396
+ for (const name of llmDetectionNames) {
397
+ this.metrics?.recordDLPDetection(name, argsDlp.severity);
398
+ }
399
+
400
+ // Re-check prompt injection blocking with LLM classifier detections
401
+ if (piAction === 'block') {
402
+ const llmPiDetections = argsDlp.detected.filter((d: string) => d.startsWith('llm_classifier_'));
403
+ if (llmPiDetections.length > 0) {
404
+ const threshold = this.config.dlp.prompt_injection_block_threshold || 'high';
405
+ const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
406
+ const thresholdRank = severityRank[threshold] ?? 2;
407
+ const maxSeverityRank = severityRank[argsDlp.severity] ?? 0;
408
+
409
+ if (maxSeverityRank >= thresholdRank) {
410
+ devLog.pipelineStep('🛡️', 'LLM_CLASSIFIER_INPUT_BLOCK',
411
+ `Blocked: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity}, threshold: ${threshold})`);
412
+ const durationSec = (Date.now() - startTime) / 1000;
413
+ this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
414
+ const result = this.buildResult(toolCall, 'blocked', {
415
+ decision: 'deny',
416
+ rule_id: 'llm_classifier_input_block',
417
+ rule_name: 'LLM classifier detected prompt injection in input',
418
+ reasons: [`LLM classifier detected: ${llmPiDetections.join(', ')}`],
419
+ }, startTime, undefined,
420
+ `Blocked by LLM classifier: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity})`,
421
+ undefined, argsDlp);
422
+ devLog.pipelineEnd('blocked', Date.now() - startTime);
423
+ return { allowed: false, result, stepTimings, startTime };
424
+ }
425
+ }
426
+ }
427
+ }
428
+ }
429
+
358
430
  // Policy evaluation — DLP context is passed so DLP-conditioned rules
359
431
  // compete with all other rules in a single priority-ordered pass.
360
432
  stepStart = Date.now();
@@ -533,6 +605,76 @@ export class Gateway {
533
605
  }
534
606
  stepTimings.dlp_out = Date.now() - stepStart;
535
607
 
608
+ // LLM-based prompt injection classification (async, runs after sync DLP scan)
609
+ if (this.llmClassifier && output.body && this.config.dlp.llm_classifier?.scan_output !== false) {
610
+ const llmStart = Date.now();
611
+ const text = typeof output.body === 'string' ? output.body : JSON.stringify(output.body);
612
+ const llmClassifications = await asyncChildSpan(otel, 'gateway.llm_classifier', () =>
613
+ this.llmClassifier!.classify(text)
614
+ );
615
+ stepTimings.llm_classifier = Date.now() - llmStart;
616
+
617
+ if (llmClassifications.length > 0) {
618
+ const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmClassifications, text);
619
+ const llmDetectionNames = llmDetections.map(d => d.pattern_name);
620
+ outputDlp.detected.push(...llmDetectionNames);
621
+
622
+ // Recalculate severity: take the max of existing and LLM detections
623
+ for (const d of llmDetections) {
624
+ outputDlp.severity = this.maxSeverity(outputDlp.severity, d.severity);
625
+ }
626
+
627
+ this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, outputDlp.severity, 0);
628
+ for (const name of llmDetectionNames) {
629
+ this.metrics?.recordDLPDetection(name, outputDlp.severity);
630
+ }
631
+ }
632
+ }
633
+
634
+ // Prompt injection blocking check on output (regex + LLM classifier detections)
635
+ const piOutputAction = this.config.dlp.prompt_injection_action || 'log';
636
+ if (piOutputAction === 'block' && outputDlp.detected.length > 0) {
637
+ const piDetections = outputDlp.detected.filter((d: string) =>
638
+ d.startsWith('prompt_injection_') || d.startsWith('llm_classifier_')
639
+ );
640
+ if (piDetections.length > 0) {
641
+ const threshold = this.config.dlp.prompt_injection_block_threshold || 'medium';
642
+ const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
643
+ const thresholdRank = severityRank[threshold] ?? 1;
644
+ const maxSeverityRank = severityRank[outputDlp.severity] ?? 0;
645
+
646
+ if (maxSeverityRank >= thresholdRank) {
647
+ // Release budget reservation since we're blocking
648
+ if (reservationKey) {
649
+ this.budgetManager.commitReservation(reservationKey, 0);
650
+ }
651
+
652
+ devLog.pipelineStep('🛡️', 'PROMPT_INJECTION_OUTPUT_BLOCK',
653
+ `Blocked output: ${piDetections.join(', ')} (severity: ${outputDlp.severity}, threshold: ${threshold})`);
654
+ const durationSec = (Date.now() - startTime) / 1000;
655
+ this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
656
+ const defaultPolicy = policyResult || { decision: 'allow' as const, rule_id: 'passthrough', rule_name: 'Passthrough', reasons: [] };
657
+ const argsDlpSafe = argsDlp || { detected: [], redactions: [], severity: 'low' as DLPSeverity };
658
+ const mergedDlp = {
659
+ detected: [...new Set([...argsDlpSafe.detected, ...outputDlp.detected])],
660
+ redactions: [...argsDlpSafe.redactions, ...outputDlp.redactions],
661
+ severity: this.maxSeverity(argsDlpSafe.severity, outputDlp.severity),
662
+ };
663
+ const result = this.buildResult(toolCall, 'blocked', {
664
+ decision: 'deny',
665
+ rule_id: 'prompt_injection_output_block',
666
+ rule_name: 'Prompt injection detected in output',
667
+ reasons: [`Prompt injection detected in output: ${piDetections.join(', ')}`],
668
+ }, startTime, undefined,
669
+ `Blocked by prompt injection detection in output: ${piDetections.join(', ')} (severity: ${outputDlp.severity})`,
670
+ undefined, mergedDlp);
671
+ this.auditLogger.logToolResultReturned(toolCall, 'blocked', Date.now() - startTime, { prompt_injection_blocked: true, detections: piDetections });
672
+ devLog.pipelineEnd('blocked', Date.now() - startTime);
673
+ return result;
674
+ }
675
+ }
676
+ }
677
+
536
678
  // Extract usage data from response
537
679
  const headerUsage = this.usageExtractor.extractFromHeaders(output.headers);
538
680
  const bodyUsage = this.usageExtractor.extractFromBody(output.body);
@@ -140,6 +140,16 @@ export interface OPAConfig {
140
140
  package_name?: string;
141
141
  }
142
142
 
143
+ export interface LlmClassifierConfig {
144
+ enabled: boolean;
145
+ model?: string;
146
+ confidence_threshold?: number;
147
+ /** Run LLM classifier on input/args (default: true) */
148
+ scan_input?: boolean;
149
+ /** Run LLM classifier on output (default: true) */
150
+ scan_output?: boolean;
151
+ }
152
+
143
153
  export interface DLPConfig {
144
154
  enabled: boolean;
145
155
  scan_args: boolean;
@@ -149,7 +159,7 @@ export interface DLPConfig {
149
159
  prompt_injection_detection?: boolean;
150
160
  /** Action when prompt injection is detected: 'log' (default), 'flag', or 'block' */
151
161
  prompt_injection_action?: 'log' | 'flag' | 'block';
152
- /** Minimum severity to trigger blocking (when action is 'block'). Default: 'high' */
162
+ /** Minimum severity to trigger blocking (when action is 'block'). Default: 'medium' */
153
163
  prompt_injection_block_threshold?: 'medium' | 'high';
154
164
  /** Response mode when injection is blocked: 'deny' (default), 'sanitize', or 'require_approval' */
155
165
  prompt_injection_response?: 'deny' | 'sanitize' | 'require_approval';
@@ -159,6 +169,10 @@ export interface DLPConfig {
159
169
  binary_path?: string;
160
170
  timeout_ms?: number;
161
171
  };
172
+ /** Maximum scan depth for nested structures (default: 64) */
173
+ max_scan_depth?: number;
174
+ /** LLM-based prompt injection classifier (async, semantic analysis) */
175
+ llm_classifier?: LlmClassifierConfig;
162
176
  }
163
177
 
164
178
  export interface AuditConfig {