@vpdeva/blackwall-llm-shield-js 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +46 -9
- package/package.json +9 -2
- package/src/index.js +271 -26
- package/src/providers.js +152 -0
package/README.md
CHANGED
|
@@ -12,6 +12,7 @@ JavaScript security middleware for LLM applications in Node.js and Next.js. Blac
|
|
|
12
12
|
- Supports shadow mode and side-by-side policy-pack evaluation
|
|
13
13
|
- Notifies webhooks or alert handlers when risky traffic appears
|
|
14
14
|
- Emits structured telemetry for prompt risk, masking volume, and output review outcomes
|
|
15
|
+
- Includes first-class provider adapters for OpenAI, Anthropic, Gemini, and OpenRouter
|
|
15
16
|
- Inspects model outputs for leaks, unsafe code, grounding drift, and tone violations
|
|
16
17
|
- Ships Express, LangChain, and LlamaIndex integration helpers
|
|
17
18
|
- Enforces allowlists, denylists, validators, and approval-gated tools
|
|
@@ -74,6 +75,10 @@ console.log(guarded.report);
|
|
|
74
75
|
|
|
75
76
|
Use `shadowMode` with `shadowPolicyPacks` or `comparePolicyPacks` to record what would have been blocked without interrupting traffic.
|
|
76
77
|
|
|
78
|
+
### Provider adapters and stable wrappers
|
|
79
|
+
|
|
80
|
+
Use `createOpenAIAdapter()`, `createAnthropicAdapter()`, `createGeminiAdapter()`, or `createOpenRouterAdapter()` with `protectWithAdapter()` when you want Blackwall to wrap the provider call end to end.
|
|
81
|
+
|
|
77
82
|
### Output grounding and tone review
|
|
78
83
|
|
|
79
84
|
`OutputFirewall` can compare responses against retrieved documents and flag hallucination-style unsupported claims or unprofessional tone.
|
|
@@ -86,13 +91,15 @@ Use `createExpressMiddleware()`, `createLangChainCallbacks()`, or `createLlamaIn
|
|
|
86
91
|
|
|
87
92
|
Use `require('blackwall-llm-shield-js/integrations')` for callback wrappers and `require('blackwall-llm-shield-js/semantic')` for optional local semantic scoring adapters.
|
|
88
93
|
|
|
94
|
+
Use `require('blackwall-llm-shield-js/providers')` for provider adapter factories.
|
|
95
|
+
|
|
89
96
|
## Core Building Blocks
|
|
90
97
|
|
|
91
98
|
### `BlackwallShield`
|
|
92
99
|
|
|
93
100
|
Use it to sanitize inbound messages, mask sensitive data, score prompt-injection risk, and decide whether the request should continue to the model provider.
|
|
94
101
|
|
|
95
|
-
It also exposes `protectModelCall()` and `reviewModelResponse()` so you can enforce request checks before
|
|
102
|
+
It also exposes `protectModelCall()`, `protectWithAdapter()`, and `reviewModelResponse()` so you can enforce request checks before provider calls and review outputs before they go back to the user.
|
|
96
103
|
|
|
97
104
|
### `OutputFirewall`
|
|
98
105
|
|
|
@@ -127,19 +134,22 @@ if (!guarded.allowed) {
|
|
|
127
134
|
### Wrap a provider call end to end
|
|
128
135
|
|
|
129
136
|
```js
|
|
137
|
+
const { BlackwallShield, createOpenAIAdapter } = require('blackwall-llm-shield-js');
|
|
138
|
+
|
|
130
139
|
const shield = new BlackwallShield({
|
|
131
|
-
|
|
140
|
+
preset: 'shadowFirst',
|
|
132
141
|
onTelemetry: async (event) => console.log(JSON.stringify(event)),
|
|
133
142
|
});
|
|
134
143
|
|
|
135
|
-
const
|
|
144
|
+
const adapter = createOpenAIAdapter({
|
|
145
|
+
client: openai,
|
|
146
|
+
model: 'gpt-4.1-mini',
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
const result = await shield.protectWithAdapter({
|
|
150
|
+
adapter,
|
|
136
151
|
messages: [{ role: 'user', content: 'Summarize this shipment exception.' }],
|
|
137
152
|
metadata: { route: '/api/chat', tenantId: 'au-commerce', userId: 'ops-7' },
|
|
138
|
-
callModel: async ({ messages }) => openai.responses.create({
|
|
139
|
-
model: 'gpt-4.1-mini',
|
|
140
|
-
input: messages.map((msg) => `${msg.role}: ${msg.content}`).join('\n'),
|
|
141
|
-
}),
|
|
142
|
-
mapOutput: (response) => response.output_text,
|
|
143
153
|
firewallOptions: {
|
|
144
154
|
retrievalDocuments: [
|
|
145
155
|
{ id: 'kb-1', content: 'Shipment exceptions should include the parcel ID, lane, and next action.' },
|
|
@@ -150,6 +160,30 @@ const result = await shield.protectModelCall({
|
|
|
150
160
|
console.log(result.stage, result.allowed);
|
|
151
161
|
```
|
|
152
162
|
|
|
163
|
+
### Use presets and route-level policy overrides
|
|
164
|
+
|
|
165
|
+
```js
|
|
166
|
+
const shield = new BlackwallShield({
|
|
167
|
+
preset: 'shadowFirst',
|
|
168
|
+
routePolicies: [
|
|
169
|
+
{
|
|
170
|
+
route: '/api/admin/*',
|
|
171
|
+
options: {
|
|
172
|
+
preset: 'strict',
|
|
173
|
+
policyPack: 'finance',
|
|
174
|
+
},
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
route: '/api/health',
|
|
178
|
+
options: {
|
|
179
|
+
shadowMode: true,
|
|
180
|
+
suppressPromptRules: ['ignore_instructions'],
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
],
|
|
184
|
+
});
|
|
185
|
+
```
|
|
186
|
+
|
|
153
187
|
### Inspect model output
|
|
154
188
|
|
|
155
189
|
```js
|
|
@@ -190,12 +224,15 @@ console.log(tools.inspectCall({ tool: 'lookupCustomer', args: { id: 'cus_123' }
|
|
|
190
224
|
- `npm run release:check` runs the JS test suite before release
|
|
191
225
|
- `npm run release:pack` creates the local npm tarball
|
|
192
226
|
- `npm run release:publish` publishes the package to npm
|
|
227
|
+
- `npm run changeset` creates a version/changelog entry for the next release
|
|
228
|
+
- `npm run version-packages` applies pending Changesets locally
|
|
193
229
|
|
|
194
230
|
## Rollout Notes
|
|
195
231
|
|
|
196
|
-
- Start with `shadowMode: true` and inspect `report.telemetry` plus `onTelemetry` events before enabling hard blocking.
|
|
232
|
+
- Start with `preset: 'shadowFirst'` or `shadowMode: true` and inspect `report.telemetry` plus `onTelemetry` events before enabling hard blocking.
|
|
197
233
|
- Use `RetrievalSanitizer` and `ToolPermissionFirewall` in front of RAG, search, admin actions, and tool-calling flows.
|
|
198
234
|
- Add regression prompts for instruction overrides, prompt leaks, token leaks, and Australian PII samples so upgrades stay safe.
|
|
235
|
+
- Expect some latency increase from grounding checks, output review, and custom detectors; benchmark with your real prompt and response sizes before enforcing globally.
|
|
199
236
|
|
|
200
237
|
## Support
|
|
201
238
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@vpdeva/blackwall-llm-shield-js",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.3",
|
|
4
4
|
"description": "Open-source JavaScript enterprise LLM protection toolkit for Node.js and Next.js",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"author": "Vish <hello@vish.au> (https://vish.au)",
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
"exports": {
|
|
10
10
|
".": "./src/index.js",
|
|
11
11
|
"./integrations": "./src/integrations.js",
|
|
12
|
+
"./providers": "./src/providers.js",
|
|
12
13
|
"./semantic": "./src/semantic.js"
|
|
13
14
|
},
|
|
14
15
|
"bin": {
|
|
@@ -16,6 +17,9 @@
|
|
|
16
17
|
},
|
|
17
18
|
"scripts": {
|
|
18
19
|
"test": "node --test tests/*.test.js",
|
|
20
|
+
"changeset": "changeset",
|
|
21
|
+
"version-packages": "changeset version",
|
|
22
|
+
"release": "changeset publish",
|
|
19
23
|
"release:check": "npm test",
|
|
20
24
|
"release:pack": "npm pack",
|
|
21
25
|
"release:publish": "npm publish --access public --provenance"
|
|
@@ -49,5 +53,8 @@
|
|
|
49
53
|
"enterprise",
|
|
50
54
|
"nextjs",
|
|
51
55
|
"node"
|
|
52
|
-
]
|
|
56
|
+
],
|
|
57
|
+
"devDependencies": {
|
|
58
|
+
"@changesets/cli": "^2.29.6"
|
|
59
|
+
}
|
|
53
60
|
}
|
package/src/index.js
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
const crypto = require('crypto');
|
|
2
2
|
const RED_TEAM_PROMPT_LIBRARY = require('./red_team_prompts.json');
|
|
3
|
+
const {
|
|
4
|
+
createOpenAIAdapter,
|
|
5
|
+
createAnthropicAdapter,
|
|
6
|
+
createGeminiAdapter,
|
|
7
|
+
createOpenRouterAdapter,
|
|
8
|
+
} = require('./providers');
|
|
3
9
|
|
|
4
10
|
const SENSITIVE_PATTERNS = {
|
|
5
11
|
email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
|
|
@@ -100,6 +106,44 @@ const POLICY_PACKS = {
|
|
|
100
106
|
},
|
|
101
107
|
};
|
|
102
108
|
|
|
109
|
+
const SHIELD_PRESETS = {
|
|
110
|
+
balanced: {
|
|
111
|
+
blockOnPromptInjection: true,
|
|
112
|
+
promptInjectionThreshold: 'high',
|
|
113
|
+
notifyOnRiskLevel: 'medium',
|
|
114
|
+
shadowMode: false,
|
|
115
|
+
},
|
|
116
|
+
shadowFirst: {
|
|
117
|
+
blockOnPromptInjection: true,
|
|
118
|
+
promptInjectionThreshold: 'medium',
|
|
119
|
+
notifyOnRiskLevel: 'medium',
|
|
120
|
+
shadowMode: true,
|
|
121
|
+
},
|
|
122
|
+
strict: {
|
|
123
|
+
blockOnPromptInjection: true,
|
|
124
|
+
promptInjectionThreshold: 'medium',
|
|
125
|
+
notifyOnRiskLevel: 'medium',
|
|
126
|
+
shadowMode: false,
|
|
127
|
+
allowSystemMessages: false,
|
|
128
|
+
},
|
|
129
|
+
developerFriendly: {
|
|
130
|
+
blockOnPromptInjection: true,
|
|
131
|
+
promptInjectionThreshold: 'high',
|
|
132
|
+
notifyOnRiskLevel: 'high',
|
|
133
|
+
shadowMode: true,
|
|
134
|
+
allowSystemMessages: true,
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const CORE_INTERFACE_VERSION = '1.0';
|
|
139
|
+
const CORE_INTERFACES = Object.freeze({
|
|
140
|
+
guardModelRequest: CORE_INTERFACE_VERSION,
|
|
141
|
+
reviewModelResponse: CORE_INTERFACE_VERSION,
|
|
142
|
+
protectModelCall: CORE_INTERFACE_VERSION,
|
|
143
|
+
toolPermissionFirewall: CORE_INTERFACE_VERSION,
|
|
144
|
+
retrievalSanitizer: CORE_INTERFACE_VERSION,
|
|
145
|
+
});
|
|
146
|
+
|
|
103
147
|
const RISK_ORDER = ['low', 'medium', 'high', 'critical'];
|
|
104
148
|
const LEETSPEAK_MAP = { '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '7': 't', '@': 'a', '$': 's' };
|
|
105
149
|
const TOXICITY_PATTERNS = [
|
|
@@ -223,6 +267,129 @@ function createTelemetryEvent(type, payload = {}) {
|
|
|
223
267
|
};
|
|
224
268
|
}
|
|
225
269
|
|
|
270
|
+
function resolveShieldPreset(name) {
|
|
271
|
+
if (!name) return {};
|
|
272
|
+
return SHIELD_PRESETS[name] ? { ...SHIELD_PRESETS[name] } : {};
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
function dedupeArray(values = []) {
|
|
276
|
+
return [...new Set((Array.isArray(values) ? values : []).filter(Boolean))];
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
function routePatternMatches(pattern, route = '', metadata = {}) {
|
|
280
|
+
if (!pattern) return false;
|
|
281
|
+
if (typeof pattern === 'function') return !!pattern(route, metadata);
|
|
282
|
+
if (pattern instanceof RegExp) return pattern.test(route);
|
|
283
|
+
if (typeof pattern === 'string') {
|
|
284
|
+
if (pattern === route) return true;
|
|
285
|
+
if (pattern.includes('*')) {
|
|
286
|
+
const regex = new RegExp(`^${pattern.split('*').map((part) => part.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('.*')}$`);
|
|
287
|
+
return regex.test(route);
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return false;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
function resolveRoutePolicy(routePolicies = [], metadata = {}) {
|
|
294
|
+
const route = metadata.route || metadata.path || '';
|
|
295
|
+
const matched = (Array.isArray(routePolicies) ? routePolicies : []).filter((entry) => routePatternMatches(entry && entry.route, route, metadata));
|
|
296
|
+
if (!matched.length) return null;
|
|
297
|
+
return matched.reduce((acc, entry) => {
|
|
298
|
+
const options = entry && entry.options ? entry.options : {};
|
|
299
|
+
return {
|
|
300
|
+
...acc,
|
|
301
|
+
...options,
|
|
302
|
+
shadowPolicyPacks: dedupeArray([...(acc.shadowPolicyPacks || []), ...(options.shadowPolicyPacks || [])]),
|
|
303
|
+
entityDetectors: [...(acc.entityDetectors || []), ...(options.entityDetectors || [])],
|
|
304
|
+
customPromptDetectors: [...(acc.customPromptDetectors || []), ...(options.customPromptDetectors || [])],
|
|
305
|
+
suppressPromptRules: dedupeArray([...(acc.suppressPromptRules || []), ...(options.suppressPromptRules || [])]),
|
|
306
|
+
};
|
|
307
|
+
}, {});
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function applyPromptRuleSuppressions(injection, suppressedIds = []) {
|
|
311
|
+
const suppressionSet = new Set(dedupeArray(suppressedIds));
|
|
312
|
+
if (!suppressionSet.size) return injection;
|
|
313
|
+
const matches = (injection.matches || []).filter((item) => !suppressionSet.has(item.id));
|
|
314
|
+
const score = Math.min(matches.reduce((sum, item) => sum + (item.score || 0), 0), 100);
|
|
315
|
+
return {
|
|
316
|
+
...injection,
|
|
317
|
+
matches,
|
|
318
|
+
score,
|
|
319
|
+
level: riskLevelFromScore(score),
|
|
320
|
+
blockedByDefault: score >= 45,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
function applyCustomPromptDetectors(injection, text, options = {}, metadata = {}) {
|
|
325
|
+
const detectors = Array.isArray(options.customPromptDetectors) ? options.customPromptDetectors : [];
|
|
326
|
+
if (!detectors.length) return injection;
|
|
327
|
+
const matches = [...(injection.matches || [])];
|
|
328
|
+
const seen = new Set(matches.map((item) => item.id));
|
|
329
|
+
let score = injection.score || 0;
|
|
330
|
+
for (const detector of detectors) {
|
|
331
|
+
if (typeof detector !== 'function') continue;
|
|
332
|
+
const result = detector({ text, injection, metadata, options }) || [];
|
|
333
|
+
const findings = Array.isArray(result) ? result : [result];
|
|
334
|
+
for (const finding of findings) {
|
|
335
|
+
if (!finding || !finding.id || seen.has(finding.id)) continue;
|
|
336
|
+
seen.add(finding.id);
|
|
337
|
+
matches.push({
|
|
338
|
+
id: finding.id,
|
|
339
|
+
score: Math.max(0, Math.min(finding.score || 0, 40)),
|
|
340
|
+
reason: finding.reason || 'Custom prompt detector triggered',
|
|
341
|
+
source: finding.source || 'custom',
|
|
342
|
+
});
|
|
343
|
+
score += Math.max(0, Math.min(finding.score || 0, 40));
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
const cappedScore = Math.min(score, 100);
|
|
347
|
+
return {
|
|
348
|
+
...injection,
|
|
349
|
+
matches,
|
|
350
|
+
score: cappedScore,
|
|
351
|
+
level: riskLevelFromScore(cappedScore),
|
|
352
|
+
blockedByDefault: cappedScore >= 45,
|
|
353
|
+
};
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
function resolveEffectiveShieldOptions(baseOptions = {}, metadata = {}) {
|
|
357
|
+
const presetOptions = resolveShieldPreset(baseOptions.preset);
|
|
358
|
+
const routePolicy = resolveRoutePolicy(baseOptions.routePolicies, metadata);
|
|
359
|
+
const routePresetOptions = resolveShieldPreset(routePolicy && routePolicy.preset);
|
|
360
|
+
return {
|
|
361
|
+
...baseOptions,
|
|
362
|
+
...presetOptions,
|
|
363
|
+
...routePresetOptions,
|
|
364
|
+
...(routePolicy || {}),
|
|
365
|
+
shadowPolicyPacks: dedupeArray([
|
|
366
|
+
...((presetOptions && presetOptions.shadowPolicyPacks) || []),
|
|
367
|
+
...((routePresetOptions && routePresetOptions.shadowPolicyPacks) || []),
|
|
368
|
+
...(baseOptions.shadowPolicyPacks || []),
|
|
369
|
+
...((routePolicy && routePolicy.shadowPolicyPacks) || []),
|
|
370
|
+
]),
|
|
371
|
+
entityDetectors: [
|
|
372
|
+
...((presetOptions && presetOptions.entityDetectors) || []),
|
|
373
|
+
...((routePresetOptions && routePresetOptions.entityDetectors) || []),
|
|
374
|
+
...(baseOptions.entityDetectors || []),
|
|
375
|
+
...((routePolicy && routePolicy.entityDetectors) || []),
|
|
376
|
+
],
|
|
377
|
+
customPromptDetectors: [
|
|
378
|
+
...((presetOptions && presetOptions.customPromptDetectors) || []),
|
|
379
|
+
...((routePresetOptions && routePresetOptions.customPromptDetectors) || []),
|
|
380
|
+
...(baseOptions.customPromptDetectors || []),
|
|
381
|
+
...((routePolicy && routePolicy.customPromptDetectors) || []),
|
|
382
|
+
],
|
|
383
|
+
suppressPromptRules: dedupeArray([
|
|
384
|
+
...((presetOptions && presetOptions.suppressPromptRules) || []),
|
|
385
|
+
...((routePresetOptions && routePresetOptions.suppressPromptRules) || []),
|
|
386
|
+
...(baseOptions.suppressPromptRules || []),
|
|
387
|
+
...((routePolicy && routePolicy.suppressPromptRules) || []),
|
|
388
|
+
]),
|
|
389
|
+
routePolicy,
|
|
390
|
+
};
|
|
391
|
+
}
|
|
392
|
+
|
|
226
393
|
function cloneRegex(regex) {
|
|
227
394
|
return new RegExp(regex.source, regex.flags);
|
|
228
395
|
}
|
|
@@ -748,14 +915,19 @@ class BlackwallShield {
|
|
|
748
915
|
maxLength: 5000,
|
|
749
916
|
allowSystemMessages: false,
|
|
750
917
|
shadowMode: false,
|
|
918
|
+
preset: null,
|
|
751
919
|
policyPack: null,
|
|
752
920
|
shadowPolicyPacks: [],
|
|
753
921
|
entityDetectors: [],
|
|
922
|
+
customPromptDetectors: [],
|
|
923
|
+
suppressPromptRules: [],
|
|
924
|
+
routePolicies: [],
|
|
754
925
|
detectNamedEntities: false,
|
|
755
926
|
semanticScorer: null,
|
|
756
927
|
sessionBuffer: null,
|
|
757
928
|
tokenBudgetFirewall: null,
|
|
758
929
|
systemPrompt: null,
|
|
930
|
+
outputFirewallDefaults: {},
|
|
759
931
|
onAlert: null,
|
|
760
932
|
onTelemetry: null,
|
|
761
933
|
webhookUrl: null,
|
|
@@ -764,10 +936,13 @@ class BlackwallShield {
|
|
|
764
936
|
}
|
|
765
937
|
|
|
766
938
|
inspectText(text) {
|
|
767
|
-
const
|
|
768
|
-
const
|
|
939
|
+
const effectiveOptions = resolveEffectiveShieldOptions(this.options);
|
|
940
|
+
const pii = maskValue(text, effectiveOptions);
|
|
941
|
+
let injection = detectPromptInjection(text, effectiveOptions);
|
|
942
|
+
injection = applyCustomPromptDetectors(injection, String(text || ''), effectiveOptions, {});
|
|
943
|
+
injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
|
|
769
944
|
return {
|
|
770
|
-
sanitized: pii.original || sanitizeText(text,
|
|
945
|
+
sanitized: pii.original || sanitizeText(text, effectiveOptions.maxLength),
|
|
771
946
|
promptInjection: injection,
|
|
772
947
|
sensitiveData: {
|
|
773
948
|
findings: pii.findings,
|
|
@@ -792,35 +967,43 @@ class BlackwallShield {
|
|
|
792
967
|
}
|
|
793
968
|
|
|
794
969
|
async guardModelRequest({ messages = [], metadata = {}, allowSystemMessages = this.options.allowSystemMessages, comparePolicyPacks = [] } = {}) {
|
|
970
|
+
const effectiveOptions = resolveEffectiveShieldOptions(this.options, metadata);
|
|
971
|
+
const effectiveAllowSystemMessages = allowSystemMessages === this.options.allowSystemMessages
|
|
972
|
+
? effectiveOptions.allowSystemMessages
|
|
973
|
+
: allowSystemMessages;
|
|
795
974
|
const normalizedMessages = normalizeMessages(messages, {
|
|
796
|
-
maxMessages:
|
|
797
|
-
allowSystemMessages,
|
|
975
|
+
maxMessages: effectiveOptions.maxMessages,
|
|
976
|
+
allowSystemMessages: effectiveAllowSystemMessages,
|
|
798
977
|
});
|
|
799
978
|
const masked = maskMessages(normalizedMessages, {
|
|
800
|
-
includeOriginals:
|
|
801
|
-
syntheticReplacement:
|
|
802
|
-
maxLength:
|
|
803
|
-
allowSystemMessages,
|
|
979
|
+
includeOriginals: effectiveOptions.includeOriginals,
|
|
980
|
+
syntheticReplacement: effectiveOptions.syntheticReplacement,
|
|
981
|
+
maxLength: effectiveOptions.maxLength,
|
|
982
|
+
allowSystemMessages: effectiveAllowSystemMessages,
|
|
983
|
+
entityDetectors: effectiveOptions.entityDetectors,
|
|
984
|
+
detectNamedEntities: effectiveOptions.detectNamedEntities,
|
|
804
985
|
});
|
|
805
986
|
const promptCandidate = normalizedMessages.filter((msg) => msg.role !== 'assistant');
|
|
806
|
-
const sessionBuffer =
|
|
987
|
+
const sessionBuffer = effectiveOptions.sessionBuffer;
|
|
807
988
|
if (sessionBuffer && typeof sessionBuffer.record === 'function') {
|
|
808
989
|
promptCandidate.forEach((msg) => sessionBuffer.record(msg.content));
|
|
809
990
|
}
|
|
810
991
|
const sessionContext = sessionBuffer && typeof sessionBuffer.render === 'function'
|
|
811
992
|
? sessionBuffer.render()
|
|
812
993
|
: promptCandidate;
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
const
|
|
818
|
-
const
|
|
819
|
-
const
|
|
820
|
-
const
|
|
821
|
-
const
|
|
822
|
-
const
|
|
823
|
-
|
|
994
|
+
let injection = detectPromptInjection(sessionContext, effectiveOptions);
|
|
995
|
+
injection = applyCustomPromptDetectors(injection, Array.isArray(sessionContext) ? JSON.stringify(sessionContext) : String(sessionContext || ''), effectiveOptions, metadata);
|
|
996
|
+
injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
|
|
997
|
+
|
|
998
|
+
const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
|
|
999
|
+
const threshold = (primaryPolicy && primaryPolicy.promptInjectionThreshold) || effectiveOptions.promptInjectionThreshold;
|
|
1000
|
+
const wouldBlock = effectiveOptions.blockOnPromptInjection && compareRisk(injection.level, threshold);
|
|
1001
|
+
const shouldBlock = effectiveOptions.shadowMode ? false : wouldBlock;
|
|
1002
|
+
const shouldNotify = compareRisk(injection.level, effectiveOptions.notifyOnRiskLevel);
|
|
1003
|
+
const policyNames = [...new Set([...(effectiveOptions.shadowPolicyPacks || []), ...comparePolicyPacks].filter(Boolean))];
|
|
1004
|
+
const policyComparisons = policyNames.map((name) => evaluatePolicyPack(injection, name, effectiveOptions.promptInjectionThreshold));
|
|
1005
|
+
const budgetResult = effectiveOptions.tokenBudgetFirewall && typeof effectiveOptions.tokenBudgetFirewall.inspect === 'function'
|
|
1006
|
+
? effectiveOptions.tokenBudgetFirewall.inspect({
|
|
824
1007
|
userId: metadata.userId || metadata.user_id || 'anonymous',
|
|
825
1008
|
tenantId: metadata.tenantId || metadata.tenant_id || 'default',
|
|
826
1009
|
messages: normalizedMessages,
|
|
@@ -838,7 +1021,7 @@ class BlackwallShield {
|
|
|
838
1021
|
hasSensitiveData: masked.hasSensitiveData,
|
|
839
1022
|
},
|
|
840
1023
|
enforcement: {
|
|
841
|
-
shadowMode:
|
|
1024
|
+
shadowMode: effectiveOptions.shadowMode,
|
|
842
1025
|
wouldBlock: wouldBlock || !budgetResult.allowed,
|
|
843
1026
|
blocked: shouldBlock || !budgetResult.allowed,
|
|
844
1027
|
threshold,
|
|
@@ -846,6 +1029,13 @@ class BlackwallShield {
|
|
|
846
1029
|
policyPack: primaryPolicy ? primaryPolicy.name : null,
|
|
847
1030
|
policyComparisons,
|
|
848
1031
|
tokenBudget: budgetResult,
|
|
1032
|
+
coreInterfaces: CORE_INTERFACES,
|
|
1033
|
+
routePolicy: effectiveOptions.routePolicy ? {
|
|
1034
|
+
route: metadata.route || metadata.path || null,
|
|
1035
|
+
suppressPromptRules: effectiveOptions.routePolicy.suppressPromptRules || [],
|
|
1036
|
+
policyPack: effectiveOptions.routePolicy.policyPack || null,
|
|
1037
|
+
preset: effectiveOptions.routePolicy.preset || null,
|
|
1038
|
+
} : null,
|
|
849
1039
|
telemetry: {
|
|
850
1040
|
eventType: 'llm_request_reviewed',
|
|
851
1041
|
promptInjectionRuleHits: countFindingsByType(injection.matches),
|
|
@@ -861,7 +1051,7 @@ class BlackwallShield {
|
|
|
861
1051
|
await this.emitTelemetry(createTelemetryEvent('llm_request_reviewed', {
|
|
862
1052
|
metadata,
|
|
863
1053
|
blocked: shouldBlock || !budgetResult.allowed,
|
|
864
|
-
shadowMode:
|
|
1054
|
+
shadowMode: effectiveOptions.shadowMode,
|
|
865
1055
|
report,
|
|
866
1056
|
}));
|
|
867
1057
|
|
|
@@ -886,14 +1076,17 @@ class BlackwallShield {
|
|
|
886
1076
|
}
|
|
887
1077
|
|
|
888
1078
|
async reviewModelResponse({ output, metadata = {}, outputFirewall = null, firewallOptions = {} } = {}) {
|
|
889
|
-
const
|
|
1079
|
+
const effectiveOptions = resolveEffectiveShieldOptions(this.options, metadata);
|
|
1080
|
+
const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
|
|
890
1081
|
const firewall = outputFirewall || new OutputFirewall({
|
|
891
1082
|
riskThreshold: (primaryPolicy && primaryPolicy.outputRiskThreshold) || 'high',
|
|
892
|
-
systemPrompt:
|
|
1083
|
+
systemPrompt: effectiveOptions.systemPrompt,
|
|
1084
|
+
...effectiveOptions.outputFirewallDefaults,
|
|
893
1085
|
...firewallOptions,
|
|
894
1086
|
});
|
|
895
1087
|
const review = firewall.inspect(output, {
|
|
896
|
-
systemPrompt:
|
|
1088
|
+
systemPrompt: effectiveOptions.systemPrompt,
|
|
1089
|
+
...(effectiveOptions.outputFirewallDefaults || {}),
|
|
897
1090
|
...firewallOptions,
|
|
898
1091
|
});
|
|
899
1092
|
const report = {
|
|
@@ -902,6 +1095,7 @@ class BlackwallShield {
|
|
|
902
1095
|
metadata,
|
|
903
1096
|
outputReview: {
|
|
904
1097
|
...review,
|
|
1098
|
+
coreInterfaces: CORE_INTERFACES,
|
|
905
1099
|
telemetry: {
|
|
906
1100
|
eventType: 'llm_output_reviewed',
|
|
907
1101
|
findingCounts: countFindingsByType(review.findings),
|
|
@@ -988,6 +1182,38 @@ class BlackwallShield {
|
|
|
988
1182
|
review,
|
|
989
1183
|
};
|
|
990
1184
|
}
|
|
1185
|
+
|
|
1186
|
+
async protectWithAdapter({
|
|
1187
|
+
adapter,
|
|
1188
|
+
messages = [],
|
|
1189
|
+
metadata = {},
|
|
1190
|
+
allowSystemMessages = this.options.allowSystemMessages,
|
|
1191
|
+
comparePolicyPacks = [],
|
|
1192
|
+
outputFirewall = null,
|
|
1193
|
+
firewallOptions = {},
|
|
1194
|
+
} = {}) {
|
|
1195
|
+
if (!adapter || typeof adapter.invoke !== 'function') {
|
|
1196
|
+
throw new TypeError('adapter.invoke must be a function');
|
|
1197
|
+
}
|
|
1198
|
+
return this.protectModelCall({
|
|
1199
|
+
messages,
|
|
1200
|
+
metadata,
|
|
1201
|
+
allowSystemMessages,
|
|
1202
|
+
comparePolicyPacks,
|
|
1203
|
+
outputFirewall,
|
|
1204
|
+
firewallOptions,
|
|
1205
|
+
callModel: async (payload) => {
|
|
1206
|
+
const result = await adapter.invoke(payload);
|
|
1207
|
+
return result && Object.prototype.hasOwnProperty.call(result, 'response') ? result.response : result;
|
|
1208
|
+
},
|
|
1209
|
+
mapOutput: async (response, request) => {
|
|
1210
|
+
if (typeof adapter.extractOutput === 'function') {
|
|
1211
|
+
return adapter.extractOutput(response, request);
|
|
1212
|
+
}
|
|
1213
|
+
return response && Object.prototype.hasOwnProperty.call(response, 'output') ? response.output : response;
|
|
1214
|
+
},
|
|
1215
|
+
});
|
|
1216
|
+
}
|
|
991
1217
|
}
|
|
992
1218
|
|
|
993
1219
|
function validateGrounding(text, documents = [], options = {}) {
|
|
@@ -1641,6 +1867,18 @@ function createLlamaIndexCallback({ shield, metadata = {} } = {}) {
|
|
|
1641
1867
|
};
|
|
1642
1868
|
}
|
|
1643
1869
|
|
|
1870
|
+
function buildShieldOptions(options = {}) {
|
|
1871
|
+
const presetOptions = resolveShieldPreset(options.preset);
|
|
1872
|
+
return {
|
|
1873
|
+
...presetOptions,
|
|
1874
|
+
...options,
|
|
1875
|
+
shadowPolicyPacks: dedupeArray([
|
|
1876
|
+
...(presetOptions.shadowPolicyPacks || []),
|
|
1877
|
+
...(options.shadowPolicyPacks || []),
|
|
1878
|
+
]),
|
|
1879
|
+
};
|
|
1880
|
+
}
|
|
1881
|
+
|
|
1644
1882
|
module.exports = {
|
|
1645
1883
|
AgenticCapabilityGater,
|
|
1646
1884
|
AgentIdentityRegistry,
|
|
@@ -1659,6 +1897,8 @@ module.exports = {
|
|
|
1659
1897
|
SENSITIVE_PATTERNS,
|
|
1660
1898
|
PROMPT_INJECTION_RULES,
|
|
1661
1899
|
POLICY_PACKS,
|
|
1900
|
+
SHIELD_PRESETS,
|
|
1901
|
+
CORE_INTERFACES,
|
|
1662
1902
|
sanitizeText,
|
|
1663
1903
|
deobfuscateText,
|
|
1664
1904
|
maskText,
|
|
@@ -1680,6 +1920,11 @@ module.exports = {
|
|
|
1680
1920
|
buildAdminDashboardModel,
|
|
1681
1921
|
getRedTeamPromptLibrary,
|
|
1682
1922
|
runRedTeamSuite,
|
|
1923
|
+
buildShieldOptions,
|
|
1924
|
+
createOpenAIAdapter,
|
|
1925
|
+
createAnthropicAdapter,
|
|
1926
|
+
createGeminiAdapter,
|
|
1927
|
+
createOpenRouterAdapter,
|
|
1683
1928
|
createExpressMiddleware,
|
|
1684
1929
|
createLangChainCallbacks,
|
|
1685
1930
|
createLlamaIndexCallback,
|
package/src/providers.js
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
function stringifyContent(content) {
|
|
2
|
+
if (typeof content === 'string') return content;
|
|
3
|
+
if (Array.isArray(content)) {
|
|
4
|
+
return content.map((item) => {
|
|
5
|
+
if (typeof item === 'string') return item;
|
|
6
|
+
if (item && typeof item.text === 'string') return item.text;
|
|
7
|
+
if (item && item.type === 'text' && typeof item.text === 'string') return item.text;
|
|
8
|
+
return '';
|
|
9
|
+
}).filter(Boolean).join('\n');
|
|
10
|
+
}
|
|
11
|
+
if (content && typeof content.text === 'string') return content.text;
|
|
12
|
+
return String(content || '');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function toOpenAIInput(messages = []) {
|
|
16
|
+
return messages.map((message) => ({
|
|
17
|
+
role: message.role,
|
|
18
|
+
content: stringifyContent(message.content),
|
|
19
|
+
}));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function toAnthropicMessages(messages = []) {
|
|
23
|
+
return messages
|
|
24
|
+
.filter((message) => message.role !== 'system')
|
|
25
|
+
.map((message) => ({
|
|
26
|
+
role: message.role === 'assistant' ? 'assistant' : 'user',
|
|
27
|
+
content: stringifyContent(message.content),
|
|
28
|
+
}));
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function extractSystemPrompt(messages = []) {
|
|
32
|
+
return messages.filter((message) => message.role === 'system').map((message) => stringifyContent(message.content)).join('\n\n');
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function defaultAdapterResult(response, output) {
|
|
36
|
+
return { response, output };
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function createOpenAIAdapter({ client, model, request = {}, method = 'responses', extractOutput = null } = {}) {
|
|
40
|
+
if (!client) throw new TypeError('client is required');
|
|
41
|
+
return {
|
|
42
|
+
provider: 'openai',
|
|
43
|
+
async invoke({ messages, metadata = {} }) {
|
|
44
|
+
if (method === 'chat.completions') {
|
|
45
|
+
const response = await client.chat.completions.create({
|
|
46
|
+
model,
|
|
47
|
+
messages: toOpenAIInput(messages),
|
|
48
|
+
metadata,
|
|
49
|
+
...request,
|
|
50
|
+
});
|
|
51
|
+
return defaultAdapterResult(response, response && response.choices && response.choices[0] && response.choices[0].message
|
|
52
|
+
? stringifyContent(response.choices[0].message.content)
|
|
53
|
+
: '');
|
|
54
|
+
}
|
|
55
|
+
const response = await client.responses.create({
|
|
56
|
+
model,
|
|
57
|
+
input: toOpenAIInput(messages),
|
|
58
|
+
metadata,
|
|
59
|
+
...request,
|
|
60
|
+
});
|
|
61
|
+
return defaultAdapterResult(response, response && typeof response.output_text === 'string' ? response.output_text : '');
|
|
62
|
+
},
|
|
63
|
+
extractOutput(response) {
|
|
64
|
+
if (typeof extractOutput === 'function') return extractOutput(response);
|
|
65
|
+
if (response && typeof response.output_text === 'string') return response.output_text;
|
|
66
|
+
return response && response.choices && response.choices[0] && response.choices[0].message
|
|
67
|
+
? stringifyContent(response.choices[0].message.content)
|
|
68
|
+
: '';
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function createAnthropicAdapter({ client, model, request = {}, extractOutput = null } = {}) {
|
|
74
|
+
if (!client) throw new TypeError('client is required');
|
|
75
|
+
return {
|
|
76
|
+
provider: 'anthropic',
|
|
77
|
+
async invoke({ messages, metadata = {} }) {
|
|
78
|
+
const response = await client.messages.create({
|
|
79
|
+
model,
|
|
80
|
+
system: extractSystemPrompt(messages) || undefined,
|
|
81
|
+
messages: toAnthropicMessages(messages),
|
|
82
|
+
metadata,
|
|
83
|
+
...request,
|
|
84
|
+
});
|
|
85
|
+
const output = Array.isArray(response && response.content)
|
|
86
|
+
? response.content.map((item) => stringifyContent(item)).filter(Boolean).join('\n')
|
|
87
|
+
: '';
|
|
88
|
+
return defaultAdapterResult(response, output);
|
|
89
|
+
},
|
|
90
|
+
extractOutput(response) {
|
|
91
|
+
if (typeof extractOutput === 'function') return extractOutput(response);
|
|
92
|
+
return Array.isArray(response && response.content)
|
|
93
|
+
? response.content.map((item) => stringifyContent(item)).filter(Boolean).join('\n')
|
|
94
|
+
: '';
|
|
95
|
+
},
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function createGeminiAdapter({ client, model, request = {}, extractOutput = null } = {}) {
|
|
100
|
+
if (!client) throw new TypeError('client is required');
|
|
101
|
+
return {
|
|
102
|
+
provider: 'gemini',
|
|
103
|
+
async invoke({ messages }) {
|
|
104
|
+
const response = await client.models.generateContent({
|
|
105
|
+
model,
|
|
106
|
+
contents: messages.map((message) => ({
|
|
107
|
+
role: message.role === 'assistant' ? 'model' : 'user',
|
|
108
|
+
parts: [{ text: stringifyContent(message.content) }],
|
|
109
|
+
})),
|
|
110
|
+
...request,
|
|
111
|
+
});
|
|
112
|
+
return defaultAdapterResult(response, response && typeof response.text === 'string' ? response.text : '');
|
|
113
|
+
},
|
|
114
|
+
extractOutput(response) {
|
|
115
|
+
if (typeof extractOutput === 'function') return extractOutput(response);
|
|
116
|
+
if (response && typeof response.text === 'string') return response.text;
|
|
117
|
+
if (typeof response === 'string') return response;
|
|
118
|
+
return '';
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function createOpenRouterAdapter({ client, model, request = {}, extractOutput = null } = {}) {
|
|
124
|
+
if (!client) throw new TypeError('client is required');
|
|
125
|
+
return {
|
|
126
|
+
provider: 'openrouter',
|
|
127
|
+
async invoke({ messages }) {
|
|
128
|
+
const response = await client.chat.completions.create({
|
|
129
|
+
model,
|
|
130
|
+
messages: toOpenAIInput(messages),
|
|
131
|
+
...request,
|
|
132
|
+
});
|
|
133
|
+
const output = response && response.choices && response.choices[0] && response.choices[0].message
|
|
134
|
+
? stringifyContent(response.choices[0].message.content)
|
|
135
|
+
: '';
|
|
136
|
+
return defaultAdapterResult(response, output);
|
|
137
|
+
},
|
|
138
|
+
extractOutput(response) {
|
|
139
|
+
if (typeof extractOutput === 'function') return extractOutput(response);
|
|
140
|
+
return response && response.choices && response.choices[0] && response.choices[0].message
|
|
141
|
+
? stringifyContent(response.choices[0].message.content)
|
|
142
|
+
: '';
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
module.exports = {
|
|
148
|
+
createOpenAIAdapter,
|
|
149
|
+
createAnthropicAdapter,
|
|
150
|
+
createGeminiAdapter,
|
|
151
|
+
createOpenRouterAdapter,
|
|
152
|
+
};
|