palaryn 0.3.7 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/src/auth/routes.d.ts.map +1 -1
- package/dist/src/auth/routes.js +5 -1
- package/dist/src/auth/routes.js.map +1 -1
- package/dist/src/config/defaults.d.ts.map +1 -1
- package/dist/src/config/defaults.js +7 -2
- package/dist/src/config/defaults.js.map +1 -1
- package/dist/src/dlp/composite-scanner.d.ts.map +1 -1
- package/dist/src/dlp/composite-scanner.js +26 -1
- package/dist/src/dlp/composite-scanner.js.map +1 -1
- package/dist/src/dlp/heuristic-scorer.d.ts +31 -0
- package/dist/src/dlp/heuristic-scorer.d.ts.map +1 -0
- package/dist/src/dlp/heuristic-scorer.js +314 -0
- package/dist/src/dlp/heuristic-scorer.js.map +1 -0
- package/dist/src/dlp/llm-classifier.d.ts +38 -0
- package/dist/src/dlp/llm-classifier.d.ts.map +1 -0
- package/dist/src/dlp/llm-classifier.js +152 -0
- package/dist/src/dlp/llm-classifier.js.map +1 -0
- package/dist/src/dlp/patterns.d.ts.map +1 -1
- package/dist/src/dlp/patterns.js +1 -0
- package/dist/src/dlp/patterns.js.map +1 -1
- package/dist/src/dlp/prompt-injection-backend.d.ts.map +1 -1
- package/dist/src/dlp/prompt-injection-backend.js +17 -0
- package/dist/src/dlp/prompt-injection-backend.js.map +1 -1
- package/dist/src/dlp/prompt-injection-patterns.d.ts.map +1 -1
- package/dist/src/dlp/prompt-injection-patterns.js +36 -0
- package/dist/src/dlp/prompt-injection-patterns.js.map +1 -1
- package/dist/src/dlp/regex-backend.d.ts.map +1 -1
- package/dist/src/dlp/regex-backend.js +2 -38
- package/dist/src/dlp/regex-backend.js.map +1 -1
- package/dist/src/dlp/scanner.d.ts.map +1 -1
- package/dist/src/dlp/scanner.js +38 -6
- package/dist/src/dlp/scanner.js.map +1 -1
- package/dist/src/dlp/text-normalizer.d.ts +10 -1
- package/dist/src/dlp/text-normalizer.d.ts.map +1 -1
- package/dist/src/dlp/text-normalizer.js +124 -2
- package/dist/src/dlp/text-normalizer.js.map +1 -1
- package/dist/src/mcp/http-transport.d.ts +2 -0
- package/dist/src/mcp/http-transport.d.ts.map +1 -1
- package/dist/src/mcp/http-transport.js +25 -6
- package/dist/src/mcp/http-transport.js.map +1 -1
- package/dist/src/policy/engine.d.ts.map +1 -1
- package/dist/src/policy/engine.js +109 -0
- package/dist/src/policy/engine.js.map +1 -1
- package/dist/src/saas/routes.d.ts.map +1 -1
- package/dist/src/saas/routes.js +19 -5
- package/dist/src/saas/routes.js.map +1 -1
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +7 -0
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/gateway.d.ts +1 -0
- package/dist/src/server/gateway.d.ts.map +1 -1
- package/dist/src/server/gateway.js +160 -1
- package/dist/src/server/gateway.js.map +1 -1
- package/dist/src/types/config.d.ts +14 -1
- package/dist/src/types/config.d.ts.map +1 -1
- package/dist/tests/security/pentest-payloads.d.ts +46 -0
- package/dist/tests/security/pentest-payloads.d.ts.map +1 -0
- package/dist/tests/security/pentest-payloads.js +475 -0
- package/dist/tests/security/pentest-payloads.js.map +1 -0
- package/dist/tests/unit/adversarial-pipeline.test.d.ts +15 -0
- package/dist/tests/unit/adversarial-pipeline.test.d.ts.map +1 -0
- package/dist/tests/unit/adversarial-pipeline.test.js +1557 -0
- package/dist/tests/unit/adversarial-pipeline.test.js.map +1 -0
- package/dist/tests/unit/dlp-scanner.test.js +5 -5
- package/dist/tests/unit/gateway-branches.test.js +137 -0
- package/dist/tests/unit/gateway-branches.test.js.map +1 -1
- package/dist/tests/unit/heuristic-scorer.test.d.ts +2 -0
- package/dist/tests/unit/heuristic-scorer.test.d.ts.map +1 -0
- package/dist/tests/unit/heuristic-scorer.test.js +248 -0
- package/dist/tests/unit/heuristic-scorer.test.js.map +1 -0
- package/dist/tests/unit/llm-classifier.test.d.ts +2 -0
- package/dist/tests/unit/llm-classifier.test.d.ts.map +1 -0
- package/dist/tests/unit/llm-classifier.test.js +349 -0
- package/dist/tests/unit/llm-classifier.test.js.map +1 -0
- package/dist/tests/unit/prompt-injection-backend.test.js +122 -0
- package/dist/tests/unit/prompt-injection-backend.test.js.map +1 -1
- package/dist/tests/unit/text-normalizer.test.js +52 -1
- package/dist/tests/unit/text-normalizer.test.js.map +1 -1
- package/package.json +1 -1
- package/policy-packs/default.yaml +88 -0
- package/src/auth/routes.ts +6 -1
- package/src/config/defaults.ts +7 -2
- package/src/dlp/composite-scanner.ts +27 -1
- package/src/dlp/heuristic-scorer.ts +342 -0
- package/src/dlp/llm-classifier.ts +191 -0
- package/src/dlp/patterns.ts +1 -0
- package/src/dlp/prompt-injection-backend.ts +19 -1
- package/src/dlp/prompt-injection-patterns.ts +38 -0
- package/src/dlp/regex-backend.ts +2 -45
- package/src/dlp/scanner.ts +36 -6
- package/src/dlp/text-normalizer.ts +130 -2
- package/src/mcp/http-transport.ts +29 -6
- package/src/policy/engine.ts +102 -0
- package/src/saas/routes.ts +22 -5
- package/src/server/app.ts +7 -0
- package/src/server/gateway.ts +196 -1
- package/src/types/config.ts +15 -1
package/src/server/gateway.ts
CHANGED
|
@@ -9,6 +9,8 @@ import { DLPScanner } from '../dlp/scanner';
|
|
|
9
9
|
import { CompositeDLPScanner } from '../dlp/composite-scanner';
|
|
10
10
|
import { DLPBackend } from '../dlp/interfaces';
|
|
11
11
|
import { PromptInjectionBackend } from '../dlp/prompt-injection-backend';
|
|
12
|
+
import { HeuristicScorerBackend } from '../dlp/heuristic-scorer';
|
|
13
|
+
import { scorePromptInjection } from '../dlp/heuristic-scorer';
|
|
12
14
|
import { TruffleHogBackend } from '../dlp/trufflehog-backend';
|
|
13
15
|
import { BudgetManager, CostRecord } from '../budget/manager';
|
|
14
16
|
import { UsageExtractor } from '../budget/usage-extractor';
|
|
@@ -29,6 +31,7 @@ import { UsageData } from '../types/tool-result';
|
|
|
29
31
|
import { GatewayMetrics } from '../metrics';
|
|
30
32
|
import { GatewayTracer } from '../tracing';
|
|
31
33
|
import { AnomalyDetector } from '../anomaly';
|
|
34
|
+
import { LlmPromptInjectionClassifier } from '../dlp/llm-classifier';
|
|
32
35
|
import { log as devLog, logger } from './logger';
|
|
33
36
|
|
|
34
37
|
export interface PreExecuteResult {
|
|
@@ -118,6 +121,7 @@ export class Gateway {
|
|
|
118
121
|
private rateLimitConfigStore?: RateLimitConfigStore;
|
|
119
122
|
private budgetConfigStore?: BudgetConfigStore;
|
|
120
123
|
private usageExtractor: UsageExtractor;
|
|
124
|
+
private llmClassifier?: LlmPromptInjectionClassifier;
|
|
121
125
|
private inFlightCleanupInterval?: ReturnType<typeof setInterval>;
|
|
122
126
|
/**
|
|
123
127
|
* Tracks tool_call_ids currently being processed to prevent TOCTOU races.
|
|
@@ -137,6 +141,7 @@ export class Gateway {
|
|
|
137
141
|
dlpBackends.push(new PromptInjectionBackend({
|
|
138
142
|
scan_output: config.dlp.scan_output,
|
|
139
143
|
}));
|
|
144
|
+
dlpBackends.push(new HeuristicScorerBackend());
|
|
140
145
|
}
|
|
141
146
|
if (config.dlp.trufflehog?.enabled) {
|
|
142
147
|
dlpBackends.push(new TruffleHogBackend({
|
|
@@ -165,6 +170,14 @@ export class Gateway {
|
|
|
165
170
|
this.opaEngine = new OPAEngine(config.policy.opa);
|
|
166
171
|
}
|
|
167
172
|
|
|
173
|
+
// Set up LLM-based prompt injection classifier if enabled
|
|
174
|
+
if (config.dlp.llm_classifier?.enabled) {
|
|
175
|
+
this.llmClassifier = new LlmPromptInjectionClassifier(config.dlp.llm_classifier);
|
|
176
|
+
console.log(`[Gateway] LLM classifier enabled (model: ${config.dlp.llm_classifier.model || 'default'})`);
|
|
177
|
+
} else {
|
|
178
|
+
console.log(`[Gateway] LLM classifier disabled (PALARYN_LLM_API_KEY ${process.env.PALARYN_LLM_API_KEY ? 'set' : 'NOT set'})`);
|
|
179
|
+
}
|
|
180
|
+
|
|
168
181
|
this.usageExtractor = new UsageExtractor(config.budget.token_pricing);
|
|
169
182
|
|
|
170
183
|
// Set up executor registry with HTTP as default + catch-all fallback
|
|
@@ -310,7 +323,9 @@ export class Gateway {
|
|
|
310
323
|
// Prompt injection blocking check (before policy, so it always runs)
|
|
311
324
|
const piAction = this.config.dlp.prompt_injection_action || 'log';
|
|
312
325
|
if (piAction === 'block' && argsDlp.detected.length > 0) {
|
|
313
|
-
const piDetections = argsDlp.detected.filter((d: string) =>
|
|
326
|
+
const piDetections = argsDlp.detected.filter((d: string) =>
|
|
327
|
+
d.startsWith('prompt_injection_') || d.startsWith('heuristic_prompt_injection')
|
|
328
|
+
);
|
|
314
329
|
if (piDetections.length > 0) {
|
|
315
330
|
const threshold = this.config.dlp.prompt_injection_block_threshold || 'high';
|
|
316
331
|
const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
|
|
@@ -355,6 +370,115 @@ export class Gateway {
|
|
|
355
370
|
}
|
|
356
371
|
}
|
|
357
372
|
|
|
373
|
+
// Heuristic scoring — force LLM classifier when structural signals are elevated
|
|
374
|
+
let forceLlmClassification = false;
|
|
375
|
+
const inputText = JSON.stringify(toolCall);
|
|
376
|
+
const heuristicResult = scorePromptInjection(inputText);
|
|
377
|
+
if (heuristicResult.score >= 0.4) {
|
|
378
|
+
forceLlmClassification = true;
|
|
379
|
+
devLog.pipelineStep('🔍', 'HEURISTIC_SCORER',
|
|
380
|
+
`score=${heuristicResult.score.toFixed(2)} signals=[${heuristicResult.signals.join(',')}]`);
|
|
381
|
+
|
|
382
|
+
// Fallback: if heuristic score >= 0.5 and no LLM classifier is available, block directly.
|
|
383
|
+
// Without an LLM classifier the forceLlmClassification flag has no effect,
|
|
384
|
+
// so we treat a score of 0.5+ (3+ structural signals) as sufficient evidence to block.
|
|
385
|
+
if (heuristicResult.score >= 0.5 && !this.llmClassifier && piAction === 'block') {
|
|
386
|
+
const heuristicDetection = `heuristic_prompt_injection (score=${heuristicResult.score.toFixed(2)}, signals=${heuristicResult.signals.join(',')})`;
|
|
387
|
+
argsDlp.detected.push(heuristicDetection);
|
|
388
|
+
argsDlp.severity = this.maxSeverity(argsDlp.severity, 'high');
|
|
389
|
+
this.auditLogger.logDLPScanned(toolCall, [heuristicDetection], 'high', 0);
|
|
390
|
+
this.metrics?.recordDLPDetection('heuristic_prompt_injection', 'high');
|
|
391
|
+
|
|
392
|
+
devLog.pipelineStep('🛡️', 'HEURISTIC_BLOCK', `score=${heuristicResult.score.toFixed(2)} — no LLM classifier available, blocking`);
|
|
393
|
+
const durationSec = (Date.now() - startTime) / 1000;
|
|
394
|
+
this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
|
|
395
|
+
const result = this.buildResult(toolCall, 'blocked', {
|
|
396
|
+
decision: 'deny',
|
|
397
|
+
rule_id: 'heuristic_prompt_injection_block',
|
|
398
|
+
rule_name: 'Heuristic prompt injection detected',
|
|
399
|
+
reasons: [`Heuristic prompt injection detected: ${heuristicResult.signals.join(', ')} (score=${heuristicResult.score.toFixed(2)})`],
|
|
400
|
+
}, startTime, undefined,
|
|
401
|
+
`Blocked by heuristic prompt injection scorer (score=${heuristicResult.score.toFixed(2)}, signals: ${heuristicResult.signals.join(', ')})`,
|
|
402
|
+
undefined, argsDlp);
|
|
403
|
+
devLog.pipelineEnd('blocked', Date.now() - startTime);
|
|
404
|
+
return { allowed: false, result, stepTimings, startTime };
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// LLM-based prompt injection classification on INPUT (async, runs after sync DLP scan)
|
|
409
|
+
if ((this.llmClassifier && this.config.dlp.llm_classifier?.scan_input !== false) || (forceLlmClassification && this.llmClassifier)) {
|
|
410
|
+
const llmInputStart = Date.now();
|
|
411
|
+
const llmInputResult = await asyncChildSpan(otel, 'gateway.llm_classifier_input', () =>
|
|
412
|
+
this.llmClassifier!.classify(inputText)
|
|
413
|
+
);
|
|
414
|
+
stepTimings.llm_classifier_input = Date.now() - llmInputStart;
|
|
415
|
+
const llmInputClassifications = llmInputResult.classifications;
|
|
416
|
+
|
|
417
|
+
// Degraded signal: LLM classifier failed while heuristic flagged suspicious content
|
|
418
|
+
if (llmInputResult.error && forceLlmClassification) {
|
|
419
|
+
const degradedName = 'llm_classifier_unavailable';
|
|
420
|
+
argsDlp.detected.push(degradedName);
|
|
421
|
+
argsDlp.severity = this.maxSeverity(argsDlp.severity, 'medium');
|
|
422
|
+
this.auditLogger.logDLPScanned(toolCall, [degradedName], 'medium', 0);
|
|
423
|
+
this.metrics?.recordDLPDetection(degradedName, 'medium');
|
|
424
|
+
devLog.pipelineStep('⚠️', 'LLM_CLASSIFIER_UNAVAILABLE',
|
|
425
|
+
`LLM classifier failed with heuristic score=${heuristicResult.score.toFixed(2)} — emitting degraded signal`);
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
if (llmInputClassifications.length > 0) {
|
|
429
|
+
const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmInputClassifications, inputText);
|
|
430
|
+
const llmDetectionNames = llmDetections.map(d => d.pattern_name);
|
|
431
|
+
argsDlp.detected.push(...llmDetectionNames);
|
|
432
|
+
|
|
433
|
+
for (const d of llmDetections) {
|
|
434
|
+
argsDlp.severity = this.maxSeverity(argsDlp.severity, d.severity);
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, argsDlp.severity, 0);
|
|
438
|
+
for (const name of llmDetectionNames) {
|
|
439
|
+
this.metrics?.recordDLPDetection(name, argsDlp.severity);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Re-check prompt injection blocking with LLM classifier detections.
|
|
443
|
+
// LLM classifier uses 'high' threshold to reduce false positives on benign content —
|
|
444
|
+
// it's more aggressive than regex/heuristic and needs higher confidence to block.
|
|
445
|
+
if (piAction === 'block') {
|
|
446
|
+
const llmPiDetections = argsDlp.detected.filter((d: string) => d.startsWith('llm_classifier_'));
|
|
447
|
+
if (llmPiDetections.length > 0) {
|
|
448
|
+
// Use max of configured threshold and 'high' — LLM-only blocks require high severity
|
|
449
|
+
const configThreshold = this.config.dlp.prompt_injection_block_threshold || 'high';
|
|
450
|
+
const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
|
|
451
|
+
const hasHeuristicDetection = argsDlp.detected.some((d: string) => d.startsWith('heuristic_prompt_injection'));
|
|
452
|
+
// If heuristic also flagged it, trust the LLM at configured threshold;
|
|
453
|
+
// otherwise require 'high' to avoid false positives on benign content.
|
|
454
|
+
const threshold = hasHeuristicDetection ? configThreshold : 'high';
|
|
455
|
+
const thresholdRank = severityRank[threshold] ?? 2;
|
|
456
|
+
const maxLlmSeverity = llmInputClassifications.reduce((max, c) => {
|
|
457
|
+
const rank = severityRank[c.severity] ?? 0;
|
|
458
|
+
return rank > max ? rank : max;
|
|
459
|
+
}, 0);
|
|
460
|
+
|
|
461
|
+
if (maxLlmSeverity >= thresholdRank) {
|
|
462
|
+
devLog.pipelineStep('🛡️', 'LLM_CLASSIFIER_INPUT_BLOCK',
|
|
463
|
+
`Blocked: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity}, threshold: ${threshold})`);
|
|
464
|
+
const durationSec = (Date.now() - startTime) / 1000;
|
|
465
|
+
this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
|
|
466
|
+
const result = this.buildResult(toolCall, 'blocked', {
|
|
467
|
+
decision: 'deny',
|
|
468
|
+
rule_id: 'llm_classifier_input_block',
|
|
469
|
+
rule_name: 'LLM classifier detected prompt injection in input',
|
|
470
|
+
reasons: [`LLM classifier detected: ${llmPiDetections.join(', ')}`],
|
|
471
|
+
}, startTime, undefined,
|
|
472
|
+
`Blocked by LLM classifier: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity})`,
|
|
473
|
+
undefined, argsDlp);
|
|
474
|
+
devLog.pipelineEnd('blocked', Date.now() - startTime);
|
|
475
|
+
return { allowed: false, result, stepTimings, startTime };
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
358
482
|
// Policy evaluation — DLP context is passed so DLP-conditioned rules
|
|
359
483
|
// compete with all other rules in a single priority-ordered pass.
|
|
360
484
|
stepStart = Date.now();
|
|
@@ -533,6 +657,77 @@ export class Gateway {
|
|
|
533
657
|
}
|
|
534
658
|
stepTimings.dlp_out = Date.now() - stepStart;
|
|
535
659
|
|
|
660
|
+
// LLM-based prompt injection classification (async, runs after sync DLP scan)
|
|
661
|
+
if (this.llmClassifier && output.body && this.config.dlp.llm_classifier?.scan_output !== false) {
|
|
662
|
+
const llmStart = Date.now();
|
|
663
|
+
const text = typeof output.body === 'string' ? output.body : JSON.stringify(output.body);
|
|
664
|
+
const llmOutputResult = await asyncChildSpan(otel, 'gateway.llm_classifier', () =>
|
|
665
|
+
this.llmClassifier!.classify(text)
|
|
666
|
+
);
|
|
667
|
+
stepTimings.llm_classifier = Date.now() - llmStart;
|
|
668
|
+
const llmClassifications = llmOutputResult.classifications;
|
|
669
|
+
|
|
670
|
+
if (llmClassifications.length > 0) {
|
|
671
|
+
const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmClassifications, text);
|
|
672
|
+
const llmDetectionNames = llmDetections.map(d => d.pattern_name);
|
|
673
|
+
outputDlp.detected.push(...llmDetectionNames);
|
|
674
|
+
|
|
675
|
+
// Recalculate severity: take the max of existing and LLM detections
|
|
676
|
+
for (const d of llmDetections) {
|
|
677
|
+
outputDlp.severity = this.maxSeverity(outputDlp.severity, d.severity);
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, outputDlp.severity, 0);
|
|
681
|
+
for (const name of llmDetectionNames) {
|
|
682
|
+
this.metrics?.recordDLPDetection(name, outputDlp.severity);
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
// Prompt injection blocking check on output (regex + LLM classifier detections)
|
|
688
|
+
const piOutputAction = this.config.dlp.prompt_injection_action || 'log';
|
|
689
|
+
if (piOutputAction === 'block' && outputDlp.detected.length > 0) {
|
|
690
|
+
const piDetections = outputDlp.detected.filter((d: string) =>
|
|
691
|
+
d.startsWith('prompt_injection_') || d.startsWith('llm_classifier_')
|
|
692
|
+
);
|
|
693
|
+
if (piDetections.length > 0) {
|
|
694
|
+
const threshold = this.config.dlp.prompt_injection_block_threshold || 'medium';
|
|
695
|
+
const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
|
|
696
|
+
const thresholdRank = severityRank[threshold] ?? 1;
|
|
697
|
+
const maxSeverityRank = severityRank[outputDlp.severity] ?? 0;
|
|
698
|
+
|
|
699
|
+
if (maxSeverityRank >= thresholdRank) {
|
|
700
|
+
// Release budget reservation since we're blocking
|
|
701
|
+
if (reservationKey) {
|
|
702
|
+
this.budgetManager.commitReservation(reservationKey, 0);
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
devLog.pipelineStep('🛡️', 'PROMPT_INJECTION_OUTPUT_BLOCK',
|
|
706
|
+
`Blocked output: ${piDetections.join(', ')} (severity: ${outputDlp.severity}, threshold: ${threshold})`);
|
|
707
|
+
const durationSec = (Date.now() - startTime) / 1000;
|
|
708
|
+
this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
|
|
709
|
+
const defaultPolicy = policyResult || { decision: 'allow' as const, rule_id: 'passthrough', rule_name: 'Passthrough', reasons: [] };
|
|
710
|
+
const argsDlpSafe = argsDlp || { detected: [], redactions: [], severity: 'low' as DLPSeverity };
|
|
711
|
+
const mergedDlp = {
|
|
712
|
+
detected: [...new Set([...argsDlpSafe.detected, ...outputDlp.detected])],
|
|
713
|
+
redactions: [...argsDlpSafe.redactions, ...outputDlp.redactions],
|
|
714
|
+
severity: this.maxSeverity(argsDlpSafe.severity, outputDlp.severity),
|
|
715
|
+
};
|
|
716
|
+
const result = this.buildResult(toolCall, 'blocked', {
|
|
717
|
+
decision: 'deny',
|
|
718
|
+
rule_id: 'prompt_injection_output_block',
|
|
719
|
+
rule_name: 'Prompt injection detected in output',
|
|
720
|
+
reasons: [`Prompt injection detected in output: ${piDetections.join(', ')}`],
|
|
721
|
+
}, startTime, undefined,
|
|
722
|
+
`Blocked by prompt injection detection in output: ${piDetections.join(', ')} (severity: ${outputDlp.severity})`,
|
|
723
|
+
undefined, mergedDlp);
|
|
724
|
+
this.auditLogger.logToolResultReturned(toolCall, 'blocked', Date.now() - startTime, { prompt_injection_blocked: true, detections: piDetections });
|
|
725
|
+
devLog.pipelineEnd('blocked', Date.now() - startTime);
|
|
726
|
+
return result;
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
|
|
536
731
|
// Extract usage data from response
|
|
537
732
|
const headerUsage = this.usageExtractor.extractFromHeaders(output.headers);
|
|
538
733
|
const bodyUsage = this.usageExtractor.extractFromBody(output.body);
|
package/src/types/config.ts
CHANGED
|
@@ -140,6 +140,16 @@ export interface OPAConfig {
|
|
|
140
140
|
package_name?: string;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
export interface LlmClassifierConfig {
|
|
144
|
+
enabled: boolean;
|
|
145
|
+
model?: string;
|
|
146
|
+
confidence_threshold?: number;
|
|
147
|
+
/** Run LLM classifier on input/args (default: true) */
|
|
148
|
+
scan_input?: boolean;
|
|
149
|
+
/** Run LLM classifier on output (default: true) */
|
|
150
|
+
scan_output?: boolean;
|
|
151
|
+
}
|
|
152
|
+
|
|
143
153
|
export interface DLPConfig {
|
|
144
154
|
enabled: boolean;
|
|
145
155
|
scan_args: boolean;
|
|
@@ -149,7 +159,7 @@ export interface DLPConfig {
|
|
|
149
159
|
prompt_injection_detection?: boolean;
|
|
150
160
|
/** Action when prompt injection is detected: 'log' (default), 'flag', or 'block' */
|
|
151
161
|
prompt_injection_action?: 'log' | 'flag' | 'block';
|
|
152
|
-
/** Minimum severity to trigger blocking (when action is 'block'). Default: '
|
|
162
|
+
/** Minimum severity to trigger blocking (when action is 'block'). Default: 'medium' */
|
|
153
163
|
prompt_injection_block_threshold?: 'medium' | 'high';
|
|
154
164
|
/** Response mode when injection is blocked: 'deny' (default), 'sanitize', or 'require_approval' */
|
|
155
165
|
prompt_injection_response?: 'deny' | 'sanitize' | 'require_approval';
|
|
@@ -159,6 +169,10 @@ export interface DLPConfig {
|
|
|
159
169
|
binary_path?: string;
|
|
160
170
|
timeout_ms?: number;
|
|
161
171
|
};
|
|
172
|
+
/** Maximum scan depth for nested structures (default: 64) */
|
|
173
|
+
max_scan_depth?: number;
|
|
174
|
+
/** LLM-based prompt injection classifier (async, semantic analysis) */
|
|
175
|
+
llm_classifier?: LlmClassifierConfig;
|
|
162
176
|
}
|
|
163
177
|
|
|
164
178
|
export interface AuditConfig {
|