palaryn 0.3.6 → 0.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/src/auth/routes.d.ts.map +1 -1
- package/dist/src/auth/routes.js +5 -1
- package/dist/src/auth/routes.js.map +1 -1
- package/dist/src/config/defaults.d.ts.map +1 -1
- package/dist/src/config/defaults.js +7 -2
- package/dist/src/config/defaults.js.map +1 -1
- package/dist/src/dlp/composite-scanner.d.ts.map +1 -1
- package/dist/src/dlp/composite-scanner.js +26 -1
- package/dist/src/dlp/composite-scanner.js.map +1 -1
- package/dist/src/dlp/heuristic-scorer.d.ts +31 -0
- package/dist/src/dlp/heuristic-scorer.d.ts.map +1 -0
- package/dist/src/dlp/heuristic-scorer.js +286 -0
- package/dist/src/dlp/heuristic-scorer.js.map +1 -0
- package/dist/src/dlp/llm-classifier.d.ts +33 -0
- package/dist/src/dlp/llm-classifier.d.ts.map +1 -0
- package/dist/src/dlp/llm-classifier.js +145 -0
- package/dist/src/dlp/llm-classifier.js.map +1 -0
- package/dist/src/dlp/patterns.d.ts.map +1 -1
- package/dist/src/dlp/patterns.js +1 -0
- package/dist/src/dlp/patterns.js.map +1 -1
- package/dist/src/dlp/prompt-injection-backend.d.ts.map +1 -1
- package/dist/src/dlp/prompt-injection-backend.js +17 -0
- package/dist/src/dlp/prompt-injection-backend.js.map +1 -1
- package/dist/src/dlp/prompt-injection-patterns.d.ts.map +1 -1
- package/dist/src/dlp/prompt-injection-patterns.js +36 -0
- package/dist/src/dlp/prompt-injection-patterns.js.map +1 -1
- package/dist/src/dlp/scanner.d.ts.map +1 -1
- package/dist/src/dlp/scanner.js +38 -6
- package/dist/src/dlp/scanner.js.map +1 -1
- package/dist/src/dlp/text-normalizer.d.ts +5 -0
- package/dist/src/dlp/text-normalizer.d.ts.map +1 -1
- package/dist/src/dlp/text-normalizer.js +118 -0
- package/dist/src/dlp/text-normalizer.js.map +1 -1
- package/dist/src/mcp/http-transport.d.ts +2 -0
- package/dist/src/mcp/http-transport.d.ts.map +1 -1
- package/dist/src/mcp/http-transport.js +25 -6
- package/dist/src/mcp/http-transport.js.map +1 -1
- package/dist/src/mcp/oauth-provider.d.ts +1 -0
- package/dist/src/mcp/oauth-provider.d.ts.map +1 -1
- package/dist/src/mcp/oauth-provider.js +26 -1
- package/dist/src/mcp/oauth-provider.js.map +1 -1
- package/dist/src/policy/engine.d.ts.map +1 -1
- package/dist/src/policy/engine.js +109 -0
- package/dist/src/policy/engine.js.map +1 -1
- package/dist/src/saas/routes.d.ts.map +1 -1
- package/dist/src/saas/routes.js +19 -5
- package/dist/src/saas/routes.js.map +1 -1
- package/dist/src/server/app.d.ts.map +1 -1
- package/dist/src/server/app.js +7 -0
- package/dist/src/server/app.js.map +1 -1
- package/dist/src/server/gateway.d.ts +1 -0
- package/dist/src/server/gateway.d.ts.map +1 -1
- package/dist/src/server/gateway.js +113 -0
- package/dist/src/server/gateway.js.map +1 -1
- package/dist/src/types/config.d.ts +14 -1
- package/dist/src/types/config.d.ts.map +1 -1
- package/dist/tests/security/pentest-payloads.d.ts +46 -0
- package/dist/tests/security/pentest-payloads.d.ts.map +1 -0
- package/dist/tests/security/pentest-payloads.js +459 -0
- package/dist/tests/security/pentest-payloads.js.map +1 -0
- package/dist/tests/unit/adversarial-pipeline.test.d.ts +15 -0
- package/dist/tests/unit/adversarial-pipeline.test.d.ts.map +1 -0
- package/dist/tests/unit/adversarial-pipeline.test.js +1552 -0
- package/dist/tests/unit/adversarial-pipeline.test.js.map +1 -0
- package/dist/tests/unit/dlp-scanner.test.js +5 -5
- package/dist/tests/unit/gateway-branches.test.js +131 -0
- package/dist/tests/unit/gateway-branches.test.js.map +1 -1
- package/dist/tests/unit/heuristic-scorer.test.d.ts +2 -0
- package/dist/tests/unit/heuristic-scorer.test.d.ts.map +1 -0
- package/dist/tests/unit/heuristic-scorer.test.js +248 -0
- package/dist/tests/unit/heuristic-scorer.test.js.map +1 -0
- package/dist/tests/unit/llm-classifier.test.d.ts +2 -0
- package/dist/tests/unit/llm-classifier.test.d.ts.map +1 -0
- package/dist/tests/unit/llm-classifier.test.js +343 -0
- package/dist/tests/unit/llm-classifier.test.js.map +1 -0
- package/dist/tests/unit/mcp-oauth.test.js +6 -2
- package/dist/tests/unit/mcp-oauth.test.js.map +1 -1
- package/dist/tests/unit/prompt-injection-backend.test.js +122 -0
- package/dist/tests/unit/prompt-injection-backend.test.js.map +1 -1
- package/dist/tests/unit/text-normalizer.test.js +45 -0
- package/dist/tests/unit/text-normalizer.test.js.map +1 -1
- package/package.json +1 -1
- package/policy-packs/default.yaml +88 -0
- package/src/auth/routes.ts +6 -1
- package/src/config/defaults.ts +7 -2
- package/src/dlp/composite-scanner.ts +27 -1
- package/src/dlp/heuristic-scorer.ts +312 -0
- package/src/dlp/llm-classifier.ts +176 -0
- package/src/dlp/patterns.ts +1 -0
- package/src/dlp/prompt-injection-backend.ts +19 -1
- package/src/dlp/prompt-injection-patterns.ts +38 -0
- package/src/dlp/scanner.ts +36 -6
- package/src/dlp/text-normalizer.ts +124 -0
- package/src/mcp/http-transport.ts +29 -6
- package/src/mcp/oauth-provider.ts +22 -1
- package/src/policy/engine.ts +102 -0
- package/src/saas/routes.ts +22 -5
- package/src/server/app.ts +7 -0
- package/src/server/gateway.ts +142 -0
- package/src/types/config.ts +15 -1
package/src/server/gateway.ts
CHANGED
|
@@ -9,6 +9,8 @@ import { DLPScanner } from '../dlp/scanner';
|
|
|
9
9
|
import { CompositeDLPScanner } from '../dlp/composite-scanner';
|
|
10
10
|
import { DLPBackend } from '../dlp/interfaces';
|
|
11
11
|
import { PromptInjectionBackend } from '../dlp/prompt-injection-backend';
|
|
12
|
+
import { HeuristicScorerBackend } from '../dlp/heuristic-scorer';
|
|
13
|
+
import { scorePromptInjection } from '../dlp/heuristic-scorer';
|
|
12
14
|
import { TruffleHogBackend } from '../dlp/trufflehog-backend';
|
|
13
15
|
import { BudgetManager, CostRecord } from '../budget/manager';
|
|
14
16
|
import { UsageExtractor } from '../budget/usage-extractor';
|
|
@@ -29,6 +31,7 @@ import { UsageData } from '../types/tool-result';
|
|
|
29
31
|
import { GatewayMetrics } from '../metrics';
|
|
30
32
|
import { GatewayTracer } from '../tracing';
|
|
31
33
|
import { AnomalyDetector } from '../anomaly';
|
|
34
|
+
import { LlmPromptInjectionClassifier } from '../dlp/llm-classifier';
|
|
32
35
|
import { log as devLog, logger } from './logger';
|
|
33
36
|
|
|
34
37
|
export interface PreExecuteResult {
|
|
@@ -118,6 +121,7 @@ export class Gateway {
|
|
|
118
121
|
private rateLimitConfigStore?: RateLimitConfigStore;
|
|
119
122
|
private budgetConfigStore?: BudgetConfigStore;
|
|
120
123
|
private usageExtractor: UsageExtractor;
|
|
124
|
+
private llmClassifier?: LlmPromptInjectionClassifier;
|
|
121
125
|
private inFlightCleanupInterval?: ReturnType<typeof setInterval>;
|
|
122
126
|
/**
|
|
123
127
|
* Tracks tool_call_ids currently being processed to prevent TOCTOU races.
|
|
@@ -137,6 +141,7 @@ export class Gateway {
|
|
|
137
141
|
dlpBackends.push(new PromptInjectionBackend({
|
|
138
142
|
scan_output: config.dlp.scan_output,
|
|
139
143
|
}));
|
|
144
|
+
dlpBackends.push(new HeuristicScorerBackend());
|
|
140
145
|
}
|
|
141
146
|
if (config.dlp.trufflehog?.enabled) {
|
|
142
147
|
dlpBackends.push(new TruffleHogBackend({
|
|
@@ -165,6 +170,11 @@ export class Gateway {
|
|
|
165
170
|
this.opaEngine = new OPAEngine(config.policy.opa);
|
|
166
171
|
}
|
|
167
172
|
|
|
173
|
+
// Set up LLM-based prompt injection classifier if enabled
|
|
174
|
+
if (config.dlp.llm_classifier?.enabled) {
|
|
175
|
+
this.llmClassifier = new LlmPromptInjectionClassifier(config.dlp.llm_classifier);
|
|
176
|
+
}
|
|
177
|
+
|
|
168
178
|
this.usageExtractor = new UsageExtractor(config.budget.token_pricing);
|
|
169
179
|
|
|
170
180
|
// Set up executor registry with HTTP as default + catch-all fallback
|
|
@@ -355,6 +365,68 @@ export class Gateway {
|
|
|
355
365
|
}
|
|
356
366
|
}
|
|
357
367
|
|
|
368
|
+
// Heuristic scoring — force LLM classifier when structural signals are elevated
|
|
369
|
+
let forceLlmClassification = false;
|
|
370
|
+
const inputText = JSON.stringify(toolCall);
|
|
371
|
+
const heuristicResult = scorePromptInjection(inputText);
|
|
372
|
+
if (heuristicResult.score >= 0.4) {
|
|
373
|
+
forceLlmClassification = true;
|
|
374
|
+
devLog.pipelineStep('🔍', 'HEURISTIC_SCORER',
|
|
375
|
+
`score=${heuristicResult.score.toFixed(2)} signals=[${heuristicResult.signals.join(',')}]`);
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// LLM-based prompt injection classification on INPUT (async, runs after sync DLP scan)
|
|
379
|
+
if ((this.llmClassifier && this.config.dlp.llm_classifier?.scan_input !== false) || (forceLlmClassification && this.llmClassifier)) {
|
|
380
|
+
const llmInputStart = Date.now();
|
|
381
|
+
const llmInputClassifications = await asyncChildSpan(otel, 'gateway.llm_classifier_input', () =>
|
|
382
|
+
this.llmClassifier!.classify(inputText)
|
|
383
|
+
);
|
|
384
|
+
stepTimings.llm_classifier_input = Date.now() - llmInputStart;
|
|
385
|
+
|
|
386
|
+
if (llmInputClassifications.length > 0) {
|
|
387
|
+
const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmInputClassifications, inputText);
|
|
388
|
+
const llmDetectionNames = llmDetections.map(d => d.pattern_name);
|
|
389
|
+
argsDlp.detected.push(...llmDetectionNames);
|
|
390
|
+
|
|
391
|
+
for (const d of llmDetections) {
|
|
392
|
+
argsDlp.severity = this.maxSeverity(argsDlp.severity, d.severity);
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, argsDlp.severity, 0);
|
|
396
|
+
for (const name of llmDetectionNames) {
|
|
397
|
+
this.metrics?.recordDLPDetection(name, argsDlp.severity);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Re-check prompt injection blocking with LLM classifier detections
|
|
401
|
+
if (piAction === 'block') {
|
|
402
|
+
const llmPiDetections = argsDlp.detected.filter((d: string) => d.startsWith('llm_classifier_'));
|
|
403
|
+
if (llmPiDetections.length > 0) {
|
|
404
|
+
const threshold = this.config.dlp.prompt_injection_block_threshold || 'high';
|
|
405
|
+
const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
|
|
406
|
+
const thresholdRank = severityRank[threshold] ?? 2;
|
|
407
|
+
const maxSeverityRank = severityRank[argsDlp.severity] ?? 0;
|
|
408
|
+
|
|
409
|
+
if (maxSeverityRank >= thresholdRank) {
|
|
410
|
+
devLog.pipelineStep('🛡️', 'LLM_CLASSIFIER_INPUT_BLOCK',
|
|
411
|
+
`Blocked: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity}, threshold: ${threshold})`);
|
|
412
|
+
const durationSec = (Date.now() - startTime) / 1000;
|
|
413
|
+
this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
|
|
414
|
+
const result = this.buildResult(toolCall, 'blocked', {
|
|
415
|
+
decision: 'deny',
|
|
416
|
+
rule_id: 'llm_classifier_input_block',
|
|
417
|
+
rule_name: 'LLM classifier detected prompt injection in input',
|
|
418
|
+
reasons: [`LLM classifier detected: ${llmPiDetections.join(', ')}`],
|
|
419
|
+
}, startTime, undefined,
|
|
420
|
+
`Blocked by LLM classifier: ${llmPiDetections.join(', ')} (severity: ${argsDlp.severity})`,
|
|
421
|
+
undefined, argsDlp);
|
|
422
|
+
devLog.pipelineEnd('blocked', Date.now() - startTime);
|
|
423
|
+
return { allowed: false, result, stepTimings, startTime };
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
358
430
|
// Policy evaluation — DLP context is passed so DLP-conditioned rules
|
|
359
431
|
// compete with all other rules in a single priority-ordered pass.
|
|
360
432
|
stepStart = Date.now();
|
|
@@ -533,6 +605,76 @@ export class Gateway {
|
|
|
533
605
|
}
|
|
534
606
|
stepTimings.dlp_out = Date.now() - stepStart;
|
|
535
607
|
|
|
608
|
+
// LLM-based prompt injection classification (async, runs after sync DLP scan)
|
|
609
|
+
if (this.llmClassifier && output.body && this.config.dlp.llm_classifier?.scan_output !== false) {
|
|
610
|
+
const llmStart = Date.now();
|
|
611
|
+
const text = typeof output.body === 'string' ? output.body : JSON.stringify(output.body);
|
|
612
|
+
const llmClassifications = await asyncChildSpan(otel, 'gateway.llm_classifier', () =>
|
|
613
|
+
this.llmClassifier!.classify(text)
|
|
614
|
+
);
|
|
615
|
+
stepTimings.llm_classifier = Date.now() - llmStart;
|
|
616
|
+
|
|
617
|
+
if (llmClassifications.length > 0) {
|
|
618
|
+
const llmDetections = LlmPromptInjectionClassifier.toDLPDetections(llmClassifications, text);
|
|
619
|
+
const llmDetectionNames = llmDetections.map(d => d.pattern_name);
|
|
620
|
+
outputDlp.detected.push(...llmDetectionNames);
|
|
621
|
+
|
|
622
|
+
// Recalculate severity: take the max of existing and LLM detections
|
|
623
|
+
for (const d of llmDetections) {
|
|
624
|
+
outputDlp.severity = this.maxSeverity(outputDlp.severity, d.severity);
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
this.auditLogger.logDLPScanned(toolCall, llmDetectionNames, outputDlp.severity, 0);
|
|
628
|
+
for (const name of llmDetectionNames) {
|
|
629
|
+
this.metrics?.recordDLPDetection(name, outputDlp.severity);
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// Prompt injection blocking check on output (regex + LLM classifier detections)
|
|
635
|
+
const piOutputAction = this.config.dlp.prompt_injection_action || 'log';
|
|
636
|
+
if (piOutputAction === 'block' && outputDlp.detected.length > 0) {
|
|
637
|
+
const piDetections = outputDlp.detected.filter((d: string) =>
|
|
638
|
+
d.startsWith('prompt_injection_') || d.startsWith('llm_classifier_')
|
|
639
|
+
);
|
|
640
|
+
if (piDetections.length > 0) {
|
|
641
|
+
const threshold = this.config.dlp.prompt_injection_block_threshold || 'medium';
|
|
642
|
+
const severityRank: Record<string, number> = { low: 0, medium: 1, high: 2 };
|
|
643
|
+
const thresholdRank = severityRank[threshold] ?? 1;
|
|
644
|
+
const maxSeverityRank = severityRank[outputDlp.severity] ?? 0;
|
|
645
|
+
|
|
646
|
+
if (maxSeverityRank >= thresholdRank) {
|
|
647
|
+
// Release budget reservation since we're blocking
|
|
648
|
+
if (reservationKey) {
|
|
649
|
+
this.budgetManager.commitReservation(reservationKey, 0);
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
devLog.pipelineStep('🛡️', 'PROMPT_INJECTION_OUTPUT_BLOCK',
|
|
653
|
+
`Blocked output: ${piDetections.join(', ')} (severity: ${outputDlp.severity}, threshold: ${threshold})`);
|
|
654
|
+
const durationSec = (Date.now() - startTime) / 1000;
|
|
655
|
+
this.metrics?.recordRequest('blocked', toolCall.tool.name, toolCall.tool.capability, durationSec);
|
|
656
|
+
const defaultPolicy = policyResult || { decision: 'allow' as const, rule_id: 'passthrough', rule_name: 'Passthrough', reasons: [] };
|
|
657
|
+
const argsDlpSafe = argsDlp || { detected: [], redactions: [], severity: 'low' as DLPSeverity };
|
|
658
|
+
const mergedDlp = {
|
|
659
|
+
detected: [...new Set([...argsDlpSafe.detected, ...outputDlp.detected])],
|
|
660
|
+
redactions: [...argsDlpSafe.redactions, ...outputDlp.redactions],
|
|
661
|
+
severity: this.maxSeverity(argsDlpSafe.severity, outputDlp.severity),
|
|
662
|
+
};
|
|
663
|
+
const result = this.buildResult(toolCall, 'blocked', {
|
|
664
|
+
decision: 'deny',
|
|
665
|
+
rule_id: 'prompt_injection_output_block',
|
|
666
|
+
rule_name: 'Prompt injection detected in output',
|
|
667
|
+
reasons: [`Prompt injection detected in output: ${piDetections.join(', ')}`],
|
|
668
|
+
}, startTime, undefined,
|
|
669
|
+
`Blocked by prompt injection detection in output: ${piDetections.join(', ')} (severity: ${outputDlp.severity})`,
|
|
670
|
+
undefined, mergedDlp);
|
|
671
|
+
this.auditLogger.logToolResultReturned(toolCall, 'blocked', Date.now() - startTime, { prompt_injection_blocked: true, detections: piDetections });
|
|
672
|
+
devLog.pipelineEnd('blocked', Date.now() - startTime);
|
|
673
|
+
return result;
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
|
|
536
678
|
// Extract usage data from response
|
|
537
679
|
const headerUsage = this.usageExtractor.extractFromHeaders(output.headers);
|
|
538
680
|
const bodyUsage = this.usageExtractor.extractFromBody(output.body);
|
package/src/types/config.ts
CHANGED
|
@@ -140,6 +140,16 @@ export interface OPAConfig {
|
|
|
140
140
|
package_name?: string;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
export interface LlmClassifierConfig {
|
|
144
|
+
enabled: boolean;
|
|
145
|
+
model?: string;
|
|
146
|
+
confidence_threshold?: number;
|
|
147
|
+
/** Run LLM classifier on input/args (default: true) */
|
|
148
|
+
scan_input?: boolean;
|
|
149
|
+
/** Run LLM classifier on output (default: true) */
|
|
150
|
+
scan_output?: boolean;
|
|
151
|
+
}
|
|
152
|
+
|
|
143
153
|
export interface DLPConfig {
|
|
144
154
|
enabled: boolean;
|
|
145
155
|
scan_args: boolean;
|
|
@@ -149,7 +159,7 @@ export interface DLPConfig {
|
|
|
149
159
|
prompt_injection_detection?: boolean;
|
|
150
160
|
/** Action when prompt injection is detected: 'log' (default), 'flag', or 'block' */
|
|
151
161
|
prompt_injection_action?: 'log' | 'flag' | 'block';
|
|
152
|
-
/** Minimum severity to trigger blocking (when action is 'block'). Default: '
|
|
162
|
+
/** Minimum severity to trigger blocking (when action is 'block'). Default: 'medium' */
|
|
153
163
|
prompt_injection_block_threshold?: 'medium' | 'high';
|
|
154
164
|
/** Response mode when injection is blocked: 'deny' (default), 'sanitize', or 'require_approval' */
|
|
155
165
|
prompt_injection_response?: 'deny' | 'sanitize' | 'require_approval';
|
|
@@ -159,6 +169,10 @@ export interface DLPConfig {
|
|
|
159
169
|
binary_path?: string;
|
|
160
170
|
timeout_ms?: number;
|
|
161
171
|
};
|
|
172
|
+
/** Maximum scan depth for nested structures (default: 64) */
|
|
173
|
+
max_scan_depth?: number;
|
|
174
|
+
/** LLM-based prompt injection classifier (async, semantic analysis) */
|
|
175
|
+
llm_classifier?: LlmClassifierConfig;
|
|
162
176
|
}
|
|
163
177
|
|
|
164
178
|
export interface AuditConfig {
|