@vpdeva/blackwall-llm-shield-js 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -12,6 +12,7 @@ JavaScript security middleware for LLM applications in Node.js and Next.js. Blac
12
12
  - Supports shadow mode and side-by-side policy-pack evaluation
13
13
  - Notifies webhooks or alert handlers when risky traffic appears
14
14
  - Emits structured telemetry for prompt risk, masking volume, and output review outcomes
15
+ - Includes first-class provider adapters for OpenAI, Anthropic, Gemini, and OpenRouter
15
16
  - Inspects model outputs for leaks, unsafe code, grounding drift, and tone violations
16
17
  - Ships Express, LangChain, and LlamaIndex integration helpers
17
18
  - Enforces allowlists, denylists, validators, and approval-gated tools
@@ -74,6 +75,10 @@ console.log(guarded.report);
74
75
 
75
76
  Use `shadowMode` with `shadowPolicyPacks` or `comparePolicyPacks` to record what would have been blocked without interrupting traffic.
76
77
 
78
+ ### Provider adapters and stable wrappers
79
+
80
+ Use `createOpenAIAdapter()`, `createAnthropicAdapter()`, `createGeminiAdapter()`, or `createOpenRouterAdapter()` with `protectWithAdapter()` when you want Blackwall to wrap the provider call end to end.
81
+
77
82
  ### Output grounding and tone review
78
83
 
79
84
  `OutputFirewall` can compare responses against retrieved documents and flag hallucination-style unsupported claims or unprofessional tone.
@@ -86,13 +91,15 @@ Use `createExpressMiddleware()`, `createLangChainCallbacks()`, or `createLlamaIn
86
91
 
87
92
  Use `require('blackwall-llm-shield-js/integrations')` for callback wrappers and `require('blackwall-llm-shield-js/semantic')` for optional local semantic scoring adapters.
88
93
 
94
+ Use `require('blackwall-llm-shield-js/providers')` for provider adapter factories.
95
+
89
96
  ## Core Building Blocks
90
97
 
91
98
  ### `BlackwallShield`
92
99
 
93
100
  Use it to sanitize inbound messages, mask sensitive data, score prompt-injection risk, and decide whether the request should continue to the model provider.
94
101
 
95
- It also exposes `protectModelCall()` and `reviewModelResponse()` so you can enforce request checks before OpenAI or Anthropic calls and review outputs before they go back to the user.
102
+ It also exposes `protectModelCall()`, `protectWithAdapter()`, and `reviewModelResponse()` so you can enforce request checks before provider calls and review outputs before they go back to the user.
96
103
 
97
104
  ### `OutputFirewall`
98
105
 
@@ -127,19 +134,22 @@ if (!guarded.allowed) {
127
134
  ### Wrap a provider call end to end
128
135
 
129
136
  ```js
137
+ const { BlackwallShield, createOpenAIAdapter } = require('blackwall-llm-shield-js');
138
+
130
139
  const shield = new BlackwallShield({
131
- shadowMode: true,
140
+ preset: 'shadowFirst',
132
141
  onTelemetry: async (event) => console.log(JSON.stringify(event)),
133
142
  });
134
143
 
135
- const result = await shield.protectModelCall({
144
+ const adapter = createOpenAIAdapter({
145
+ client: openai,
146
+ model: 'gpt-4.1-mini',
147
+ });
148
+
149
+ const result = await shield.protectWithAdapter({
150
+ adapter,
136
151
  messages: [{ role: 'user', content: 'Summarize this shipment exception.' }],
137
152
  metadata: { route: '/api/chat', tenantId: 'au-commerce', userId: 'ops-7' },
138
- callModel: async ({ messages }) => openai.responses.create({
139
- model: 'gpt-4.1-mini',
140
- input: messages.map((msg) => `${msg.role}: ${msg.content}`).join('\n'),
141
- }),
142
- mapOutput: (response) => response.output_text,
143
153
  firewallOptions: {
144
154
  retrievalDocuments: [
145
155
  { id: 'kb-1', content: 'Shipment exceptions should include the parcel ID, lane, and next action.' },
@@ -150,6 +160,30 @@ const result = await shield.protectModelCall({
150
160
  console.log(result.stage, result.allowed);
151
161
  ```
152
162
 
163
+ ### Use presets and route-level policy overrides
164
+
165
+ ```js
166
+ const shield = new BlackwallShield({
167
+ preset: 'shadowFirst',
168
+ routePolicies: [
169
+ {
170
+ route: '/api/admin/*',
171
+ options: {
172
+ preset: 'strict',
173
+ policyPack: 'finance',
174
+ },
175
+ },
176
+ {
177
+ route: '/api/health',
178
+ options: {
179
+ shadowMode: true,
180
+ suppressPromptRules: ['ignore_instructions'],
181
+ },
182
+ },
183
+ ],
184
+ });
185
+ ```
186
+
153
187
  ### Inspect model output
154
188
 
155
189
  ```js
@@ -190,12 +224,15 @@ console.log(tools.inspectCall({ tool: 'lookupCustomer', args: { id: 'cus_123' }
190
224
  - `npm run release:check` runs the JS test suite before release
191
225
  - `npm run release:pack` creates the local npm tarball
192
226
  - `npm run release:publish` publishes the package to npm
227
+ - `npm run changeset` creates a version/changelog entry for the next release
228
+ - `npm run version-packages` applies pending Changesets locally
193
229
 
194
230
  ## Rollout Notes
195
231
 
196
- - Start with `shadowMode: true` and inspect `report.telemetry` plus `onTelemetry` events before enabling hard blocking.
232
+ - Start with `preset: 'shadowFirst'` or `shadowMode: true` and inspect `report.telemetry` plus `onTelemetry` events before enabling hard blocking.
197
233
  - Use `RetrievalSanitizer` and `ToolPermissionFirewall` in front of RAG, search, admin actions, and tool-calling flows.
198
234
  - Add regression prompts for instruction overrides, prompt leaks, token leaks, and Australian PII samples so upgrades stay safe.
235
+ - Expect some latency increase from grounding checks, output review, and custom detectors; benchmark with your real prompt and response sizes before enforcing globally.
199
236
 
200
237
  ## Support
201
238
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vpdeva/blackwall-llm-shield-js",
3
- "version": "0.1.2",
3
+ "version": "0.1.3",
4
4
  "description": "Open-source JavaScript enterprise LLM protection toolkit for Node.js and Next.js",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Vish <hello@vish.au> (https://vish.au)",
@@ -9,6 +9,7 @@
9
9
  "exports": {
10
10
  ".": "./src/index.js",
11
11
  "./integrations": "./src/integrations.js",
12
+ "./providers": "./src/providers.js",
12
13
  "./semantic": "./src/semantic.js"
13
14
  },
14
15
  "bin": {
@@ -16,6 +17,9 @@
16
17
  },
17
18
  "scripts": {
18
19
  "test": "node --test tests/*.test.js",
20
+ "changeset": "changeset",
21
+ "version-packages": "changeset version",
22
+ "release": "changeset publish",
19
23
  "release:check": "npm test",
20
24
  "release:pack": "npm pack",
21
25
  "release:publish": "npm publish --access public --provenance"
@@ -49,5 +53,8 @@
49
53
  "enterprise",
50
54
  "nextjs",
51
55
  "node"
52
- ]
56
+ ],
57
+ "devDependencies": {
58
+ "@changesets/cli": "^2.29.6"
59
+ }
53
60
  }
package/src/index.js CHANGED
@@ -1,5 +1,11 @@
1
1
  const crypto = require('crypto');
2
2
  const RED_TEAM_PROMPT_LIBRARY = require('./red_team_prompts.json');
3
+ const {
4
+ createOpenAIAdapter,
5
+ createAnthropicAdapter,
6
+ createGeminiAdapter,
7
+ createOpenRouterAdapter,
8
+ } = require('./providers');
3
9
 
4
10
  const SENSITIVE_PATTERNS = {
5
11
  email: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g,
@@ -100,6 +106,44 @@ const POLICY_PACKS = {
100
106
  },
101
107
  };
102
108
 
109
+ const SHIELD_PRESETS = {
110
+ balanced: {
111
+ blockOnPromptInjection: true,
112
+ promptInjectionThreshold: 'high',
113
+ notifyOnRiskLevel: 'medium',
114
+ shadowMode: false,
115
+ },
116
+ shadowFirst: {
117
+ blockOnPromptInjection: true,
118
+ promptInjectionThreshold: 'medium',
119
+ notifyOnRiskLevel: 'medium',
120
+ shadowMode: true,
121
+ },
122
+ strict: {
123
+ blockOnPromptInjection: true,
124
+ promptInjectionThreshold: 'medium',
125
+ notifyOnRiskLevel: 'medium',
126
+ shadowMode: false,
127
+ allowSystemMessages: false,
128
+ },
129
+ developerFriendly: {
130
+ blockOnPromptInjection: true,
131
+ promptInjectionThreshold: 'high',
132
+ notifyOnRiskLevel: 'high',
133
+ shadowMode: true,
134
+ allowSystemMessages: true,
135
+ },
136
+ };
137
+
138
+ const CORE_INTERFACE_VERSION = '1.0';
139
+ const CORE_INTERFACES = Object.freeze({
140
+ guardModelRequest: CORE_INTERFACE_VERSION,
141
+ reviewModelResponse: CORE_INTERFACE_VERSION,
142
+ protectModelCall: CORE_INTERFACE_VERSION,
143
+ toolPermissionFirewall: CORE_INTERFACE_VERSION,
144
+ retrievalSanitizer: CORE_INTERFACE_VERSION,
145
+ });
146
+
103
147
  const RISK_ORDER = ['low', 'medium', 'high', 'critical'];
104
148
  const LEETSPEAK_MAP = { '0': 'o', '1': 'i', '3': 'e', '4': 'a', '5': 's', '7': 't', '@': 'a', '$': 's' };
105
149
  const TOXICITY_PATTERNS = [
@@ -223,6 +267,129 @@ function createTelemetryEvent(type, payload = {}) {
223
267
  };
224
268
  }
225
269
 
270
+ function resolveShieldPreset(name) {
271
+ if (!name) return {};
272
+ return SHIELD_PRESETS[name] ? { ...SHIELD_PRESETS[name] } : {};
273
+ }
274
+
275
+ function dedupeArray(values = []) {
276
+ return [...new Set((Array.isArray(values) ? values : []).filter(Boolean))];
277
+ }
278
+
279
+ function routePatternMatches(pattern, route = '', metadata = {}) {
280
+ if (!pattern) return false;
281
+ if (typeof pattern === 'function') return !!pattern(route, metadata);
282
+ if (pattern instanceof RegExp) return pattern.test(route);
283
+ if (typeof pattern === 'string') {
284
+ if (pattern === route) return true;
285
+ if (pattern.includes('*')) {
286
+ const regex = new RegExp(`^${pattern.split('*').map((part) => part.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')).join('.*')}$`);
287
+ return regex.test(route);
288
+ }
289
+ }
290
+ return false;
291
+ }
292
+
293
+ function resolveRoutePolicy(routePolicies = [], metadata = {}) {
294
+ const route = metadata.route || metadata.path || '';
295
+ const matched = (Array.isArray(routePolicies) ? routePolicies : []).filter((entry) => routePatternMatches(entry && entry.route, route, metadata));
296
+ if (!matched.length) return null;
297
+ return matched.reduce((acc, entry) => {
298
+ const options = entry && entry.options ? entry.options : {};
299
+ return {
300
+ ...acc,
301
+ ...options,
302
+ shadowPolicyPacks: dedupeArray([...(acc.shadowPolicyPacks || []), ...(options.shadowPolicyPacks || [])]),
303
+ entityDetectors: [...(acc.entityDetectors || []), ...(options.entityDetectors || [])],
304
+ customPromptDetectors: [...(acc.customPromptDetectors || []), ...(options.customPromptDetectors || [])],
305
+ suppressPromptRules: dedupeArray([...(acc.suppressPromptRules || []), ...(options.suppressPromptRules || [])]),
306
+ };
307
+ }, {});
308
+ }
309
+
310
+ function applyPromptRuleSuppressions(injection, suppressedIds = []) {
311
+ const suppressionSet = new Set(dedupeArray(suppressedIds));
312
+ if (!suppressionSet.size) return injection;
313
+ const matches = (injection.matches || []).filter((item) => !suppressionSet.has(item.id));
314
+ const score = Math.min(matches.reduce((sum, item) => sum + (item.score || 0), 0), 100);
315
+ return {
316
+ ...injection,
317
+ matches,
318
+ score,
319
+ level: riskLevelFromScore(score),
320
+ blockedByDefault: score >= 45,
321
+ };
322
+ }
323
+
324
+ function applyCustomPromptDetectors(injection, text, options = {}, metadata = {}) {
325
+ const detectors = Array.isArray(options.customPromptDetectors) ? options.customPromptDetectors : [];
326
+ if (!detectors.length) return injection;
327
+ const matches = [...(injection.matches || [])];
328
+ const seen = new Set(matches.map((item) => item.id));
329
+ let score = injection.score || 0;
330
+ for (const detector of detectors) {
331
+ if (typeof detector !== 'function') continue;
332
+ const result = detector({ text, injection, metadata, options }) || [];
333
+ const findings = Array.isArray(result) ? result : [result];
334
+ for (const finding of findings) {
335
+ if (!finding || !finding.id || seen.has(finding.id)) continue;
336
+ seen.add(finding.id);
337
+ matches.push({
338
+ id: finding.id,
339
+ score: Math.max(0, Math.min(finding.score || 0, 40)),
340
+ reason: finding.reason || 'Custom prompt detector triggered',
341
+ source: finding.source || 'custom',
342
+ });
343
+ score += Math.max(0, Math.min(finding.score || 0, 40));
344
+ }
345
+ }
346
+ const cappedScore = Math.min(score, 100);
347
+ return {
348
+ ...injection,
349
+ matches,
350
+ score: cappedScore,
351
+ level: riskLevelFromScore(cappedScore),
352
+ blockedByDefault: cappedScore >= 45,
353
+ };
354
+ }
355
+
356
+ function resolveEffectiveShieldOptions(baseOptions = {}, metadata = {}) {
357
+ const presetOptions = resolveShieldPreset(baseOptions.preset);
358
+ const routePolicy = resolveRoutePolicy(baseOptions.routePolicies, metadata);
359
+ const routePresetOptions = resolveShieldPreset(routePolicy && routePolicy.preset);
360
+ return {
361
+ ...baseOptions,
362
+ ...presetOptions,
363
+ ...routePresetOptions,
364
+ ...(routePolicy || {}),
365
+ shadowPolicyPacks: dedupeArray([
366
+ ...((presetOptions && presetOptions.shadowPolicyPacks) || []),
367
+ ...((routePresetOptions && routePresetOptions.shadowPolicyPacks) || []),
368
+ ...(baseOptions.shadowPolicyPacks || []),
369
+ ...((routePolicy && routePolicy.shadowPolicyPacks) || []),
370
+ ]),
371
+ entityDetectors: [
372
+ ...((presetOptions && presetOptions.entityDetectors) || []),
373
+ ...((routePresetOptions && routePresetOptions.entityDetectors) || []),
374
+ ...(baseOptions.entityDetectors || []),
375
+ ...((routePolicy && routePolicy.entityDetectors) || []),
376
+ ],
377
+ customPromptDetectors: [
378
+ ...((presetOptions && presetOptions.customPromptDetectors) || []),
379
+ ...((routePresetOptions && routePresetOptions.customPromptDetectors) || []),
380
+ ...(baseOptions.customPromptDetectors || []),
381
+ ...((routePolicy && routePolicy.customPromptDetectors) || []),
382
+ ],
383
+ suppressPromptRules: dedupeArray([
384
+ ...((presetOptions && presetOptions.suppressPromptRules) || []),
385
+ ...((routePresetOptions && routePresetOptions.suppressPromptRules) || []),
386
+ ...(baseOptions.suppressPromptRules || []),
387
+ ...((routePolicy && routePolicy.suppressPromptRules) || []),
388
+ ]),
389
+ routePolicy,
390
+ };
391
+ }
392
+
226
393
  function cloneRegex(regex) {
227
394
  return new RegExp(regex.source, regex.flags);
228
395
  }
@@ -748,14 +915,19 @@ class BlackwallShield {
748
915
  maxLength: 5000,
749
916
  allowSystemMessages: false,
750
917
  shadowMode: false,
918
+ preset: null,
751
919
  policyPack: null,
752
920
  shadowPolicyPacks: [],
753
921
  entityDetectors: [],
922
+ customPromptDetectors: [],
923
+ suppressPromptRules: [],
924
+ routePolicies: [],
754
925
  detectNamedEntities: false,
755
926
  semanticScorer: null,
756
927
  sessionBuffer: null,
757
928
  tokenBudgetFirewall: null,
758
929
  systemPrompt: null,
930
+ outputFirewallDefaults: {},
759
931
  onAlert: null,
760
932
  onTelemetry: null,
761
933
  webhookUrl: null,
@@ -764,10 +936,13 @@ class BlackwallShield {
764
936
  }
765
937
 
766
938
  inspectText(text) {
767
- const pii = maskValue(text, this.options);
768
- const injection = detectPromptInjection(text, this.options);
939
+ const effectiveOptions = resolveEffectiveShieldOptions(this.options);
940
+ const pii = maskValue(text, effectiveOptions);
941
+ let injection = detectPromptInjection(text, effectiveOptions);
942
+ injection = applyCustomPromptDetectors(injection, String(text || ''), effectiveOptions, {});
943
+ injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
769
944
  return {
770
- sanitized: pii.original || sanitizeText(text, this.options.maxLength),
945
+ sanitized: pii.original || sanitizeText(text, effectiveOptions.maxLength),
771
946
  promptInjection: injection,
772
947
  sensitiveData: {
773
948
  findings: pii.findings,
@@ -792,35 +967,43 @@ class BlackwallShield {
792
967
  }
793
968
 
794
969
  async guardModelRequest({ messages = [], metadata = {}, allowSystemMessages = this.options.allowSystemMessages, comparePolicyPacks = [] } = {}) {
970
+ const effectiveOptions = resolveEffectiveShieldOptions(this.options, metadata);
971
+ const effectiveAllowSystemMessages = allowSystemMessages === this.options.allowSystemMessages
972
+ ? effectiveOptions.allowSystemMessages
973
+ : allowSystemMessages;
795
974
  const normalizedMessages = normalizeMessages(messages, {
796
- maxMessages: this.options.maxMessages,
797
- allowSystemMessages,
975
+ maxMessages: effectiveOptions.maxMessages,
976
+ allowSystemMessages: effectiveAllowSystemMessages,
798
977
  });
799
978
  const masked = maskMessages(normalizedMessages, {
800
- includeOriginals: this.options.includeOriginals,
801
- syntheticReplacement: this.options.syntheticReplacement,
802
- maxLength: this.options.maxLength,
803
- allowSystemMessages,
979
+ includeOriginals: effectiveOptions.includeOriginals,
980
+ syntheticReplacement: effectiveOptions.syntheticReplacement,
981
+ maxLength: effectiveOptions.maxLength,
982
+ allowSystemMessages: effectiveAllowSystemMessages,
983
+ entityDetectors: effectiveOptions.entityDetectors,
984
+ detectNamedEntities: effectiveOptions.detectNamedEntities,
804
985
  });
805
986
  const promptCandidate = normalizedMessages.filter((msg) => msg.role !== 'assistant');
806
- const sessionBuffer = this.options.sessionBuffer;
987
+ const sessionBuffer = effectiveOptions.sessionBuffer;
807
988
  if (sessionBuffer && typeof sessionBuffer.record === 'function') {
808
989
  promptCandidate.forEach((msg) => sessionBuffer.record(msg.content));
809
990
  }
810
991
  const sessionContext = sessionBuffer && typeof sessionBuffer.render === 'function'
811
992
  ? sessionBuffer.render()
812
993
  : promptCandidate;
813
- const injection = detectPromptInjection(sessionContext, this.options);
814
-
815
- const primaryPolicy = resolvePolicyPack(this.options.policyPack);
816
- const threshold = (primaryPolicy && primaryPolicy.promptInjectionThreshold) || this.options.promptInjectionThreshold;
817
- const wouldBlock = this.options.blockOnPromptInjection && compareRisk(injection.level, threshold);
818
- const shouldBlock = this.options.shadowMode ? false : wouldBlock;
819
- const shouldNotify = compareRisk(injection.level, this.options.notifyOnRiskLevel);
820
- const policyNames = [...new Set([...(this.options.shadowPolicyPacks || []), ...comparePolicyPacks].filter(Boolean))];
821
- const policyComparisons = policyNames.map((name) => evaluatePolicyPack(injection, name, this.options.promptInjectionThreshold));
822
- const budgetResult = this.options.tokenBudgetFirewall && typeof this.options.tokenBudgetFirewall.inspect === 'function'
823
- ? this.options.tokenBudgetFirewall.inspect({
994
+ let injection = detectPromptInjection(sessionContext, effectiveOptions);
995
+ injection = applyCustomPromptDetectors(injection, Array.isArray(sessionContext) ? JSON.stringify(sessionContext) : String(sessionContext || ''), effectiveOptions, metadata);
996
+ injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
997
+
998
+ const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
999
+ const threshold = (primaryPolicy && primaryPolicy.promptInjectionThreshold) || effectiveOptions.promptInjectionThreshold;
1000
+ const wouldBlock = effectiveOptions.blockOnPromptInjection && compareRisk(injection.level, threshold);
1001
+ const shouldBlock = effectiveOptions.shadowMode ? false : wouldBlock;
1002
+ const shouldNotify = compareRisk(injection.level, effectiveOptions.notifyOnRiskLevel);
1003
+ const policyNames = [...new Set([...(effectiveOptions.shadowPolicyPacks || []), ...comparePolicyPacks].filter(Boolean))];
1004
+ const policyComparisons = policyNames.map((name) => evaluatePolicyPack(injection, name, effectiveOptions.promptInjectionThreshold));
1005
+ const budgetResult = effectiveOptions.tokenBudgetFirewall && typeof effectiveOptions.tokenBudgetFirewall.inspect === 'function'
1006
+ ? effectiveOptions.tokenBudgetFirewall.inspect({
824
1007
  userId: metadata.userId || metadata.user_id || 'anonymous',
825
1008
  tenantId: metadata.tenantId || metadata.tenant_id || 'default',
826
1009
  messages: normalizedMessages,
@@ -838,7 +1021,7 @@ class BlackwallShield {
838
1021
  hasSensitiveData: masked.hasSensitiveData,
839
1022
  },
840
1023
  enforcement: {
841
- shadowMode: this.options.shadowMode,
1024
+ shadowMode: effectiveOptions.shadowMode,
842
1025
  wouldBlock: wouldBlock || !budgetResult.allowed,
843
1026
  blocked: shouldBlock || !budgetResult.allowed,
844
1027
  threshold,
@@ -846,6 +1029,13 @@ class BlackwallShield {
846
1029
  policyPack: primaryPolicy ? primaryPolicy.name : null,
847
1030
  policyComparisons,
848
1031
  tokenBudget: budgetResult,
1032
+ coreInterfaces: CORE_INTERFACES,
1033
+ routePolicy: effectiveOptions.routePolicy ? {
1034
+ route: metadata.route || metadata.path || null,
1035
+ suppressPromptRules: effectiveOptions.routePolicy.suppressPromptRules || [],
1036
+ policyPack: effectiveOptions.routePolicy.policyPack || null,
1037
+ preset: effectiveOptions.routePolicy.preset || null,
1038
+ } : null,
849
1039
  telemetry: {
850
1040
  eventType: 'llm_request_reviewed',
851
1041
  promptInjectionRuleHits: countFindingsByType(injection.matches),
@@ -861,7 +1051,7 @@ class BlackwallShield {
861
1051
  await this.emitTelemetry(createTelemetryEvent('llm_request_reviewed', {
862
1052
  metadata,
863
1053
  blocked: shouldBlock || !budgetResult.allowed,
864
- shadowMode: this.options.shadowMode,
1054
+ shadowMode: effectiveOptions.shadowMode,
865
1055
  report,
866
1056
  }));
867
1057
 
@@ -886,14 +1076,17 @@ class BlackwallShield {
886
1076
  }
887
1077
 
888
1078
  async reviewModelResponse({ output, metadata = {}, outputFirewall = null, firewallOptions = {} } = {}) {
889
- const primaryPolicy = resolvePolicyPack(this.options.policyPack);
1079
+ const effectiveOptions = resolveEffectiveShieldOptions(this.options, metadata);
1080
+ const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
890
1081
  const firewall = outputFirewall || new OutputFirewall({
891
1082
  riskThreshold: (primaryPolicy && primaryPolicy.outputRiskThreshold) || 'high',
892
- systemPrompt: this.options.systemPrompt,
1083
+ systemPrompt: effectiveOptions.systemPrompt,
1084
+ ...effectiveOptions.outputFirewallDefaults,
893
1085
  ...firewallOptions,
894
1086
  });
895
1087
  const review = firewall.inspect(output, {
896
- systemPrompt: this.options.systemPrompt,
1088
+ systemPrompt: effectiveOptions.systemPrompt,
1089
+ ...(effectiveOptions.outputFirewallDefaults || {}),
897
1090
  ...firewallOptions,
898
1091
  });
899
1092
  const report = {
@@ -902,6 +1095,7 @@ class BlackwallShield {
902
1095
  metadata,
903
1096
  outputReview: {
904
1097
  ...review,
1098
+ coreInterfaces: CORE_INTERFACES,
905
1099
  telemetry: {
906
1100
  eventType: 'llm_output_reviewed',
907
1101
  findingCounts: countFindingsByType(review.findings),
@@ -988,6 +1182,38 @@ class BlackwallShield {
988
1182
  review,
989
1183
  };
990
1184
  }
1185
+
1186
+ async protectWithAdapter({
1187
+ adapter,
1188
+ messages = [],
1189
+ metadata = {},
1190
+ allowSystemMessages = this.options.allowSystemMessages,
1191
+ comparePolicyPacks = [],
1192
+ outputFirewall = null,
1193
+ firewallOptions = {},
1194
+ } = {}) {
1195
+ if (!adapter || typeof adapter.invoke !== 'function') {
1196
+ throw new TypeError('adapter.invoke must be a function');
1197
+ }
1198
+ return this.protectModelCall({
1199
+ messages,
1200
+ metadata,
1201
+ allowSystemMessages,
1202
+ comparePolicyPacks,
1203
+ outputFirewall,
1204
+ firewallOptions,
1205
+ callModel: async (payload) => {
1206
+ const result = await adapter.invoke(payload);
1207
+ return result && Object.prototype.hasOwnProperty.call(result, 'response') ? result.response : result;
1208
+ },
1209
+ mapOutput: async (response, request) => {
1210
+ if (typeof adapter.extractOutput === 'function') {
1211
+ return adapter.extractOutput(response, request);
1212
+ }
1213
+ return response && Object.prototype.hasOwnProperty.call(response, 'output') ? response.output : response;
1214
+ },
1215
+ });
1216
+ }
991
1217
  }
992
1218
 
993
1219
  function validateGrounding(text, documents = [], options = {}) {
@@ -1641,6 +1867,18 @@ function createLlamaIndexCallback({ shield, metadata = {} } = {}) {
1641
1867
  };
1642
1868
  }
1643
1869
 
1870
+ function buildShieldOptions(options = {}) {
1871
+ const presetOptions = resolveShieldPreset(options.preset);
1872
+ return {
1873
+ ...presetOptions,
1874
+ ...options,
1875
+ shadowPolicyPacks: dedupeArray([
1876
+ ...(presetOptions.shadowPolicyPacks || []),
1877
+ ...(options.shadowPolicyPacks || []),
1878
+ ]),
1879
+ };
1880
+ }
1881
+
1644
1882
  module.exports = {
1645
1883
  AgenticCapabilityGater,
1646
1884
  AgentIdentityRegistry,
@@ -1659,6 +1897,8 @@ module.exports = {
1659
1897
  SENSITIVE_PATTERNS,
1660
1898
  PROMPT_INJECTION_RULES,
1661
1899
  POLICY_PACKS,
1900
+ SHIELD_PRESETS,
1901
+ CORE_INTERFACES,
1662
1902
  sanitizeText,
1663
1903
  deobfuscateText,
1664
1904
  maskText,
@@ -1680,6 +1920,11 @@ module.exports = {
1680
1920
  buildAdminDashboardModel,
1681
1921
  getRedTeamPromptLibrary,
1682
1922
  runRedTeamSuite,
1923
+ buildShieldOptions,
1924
+ createOpenAIAdapter,
1925
+ createAnthropicAdapter,
1926
+ createGeminiAdapter,
1927
+ createOpenRouterAdapter,
1683
1928
  createExpressMiddleware,
1684
1929
  createLangChainCallbacks,
1685
1930
  createLlamaIndexCallback,
@@ -0,0 +1,152 @@
1
+ function stringifyContent(content) {
2
+ if (typeof content === 'string') return content;
3
+ if (Array.isArray(content)) {
4
+ return content.map((item) => {
5
+ if (typeof item === 'string') return item;
6
+ if (item && typeof item.text === 'string') return item.text;
7
+ if (item && item.type === 'text' && typeof item.text === 'string') return item.text;
8
+ return '';
9
+ }).filter(Boolean).join('\n');
10
+ }
11
+ if (content && typeof content.text === 'string') return content.text;
12
+ return String(content || '');
13
+ }
14
+
15
+ function toOpenAIInput(messages = []) {
16
+ return messages.map((message) => ({
17
+ role: message.role,
18
+ content: stringifyContent(message.content),
19
+ }));
20
+ }
21
+
22
+ function toAnthropicMessages(messages = []) {
23
+ return messages
24
+ .filter((message) => message.role !== 'system')
25
+ .map((message) => ({
26
+ role: message.role === 'assistant' ? 'assistant' : 'user',
27
+ content: stringifyContent(message.content),
28
+ }));
29
+ }
30
+
31
+ function extractSystemPrompt(messages = []) {
32
+ return messages.filter((message) => message.role === 'system').map((message) => stringifyContent(message.content)).join('\n\n');
33
+ }
34
+
35
+ function defaultAdapterResult(response, output) {
36
+ return { response, output };
37
+ }
38
+
39
+ function createOpenAIAdapter({ client, model, request = {}, method = 'responses', extractOutput = null } = {}) {
40
+ if (!client) throw new TypeError('client is required');
41
+ return {
42
+ provider: 'openai',
43
+ async invoke({ messages, metadata = {} }) {
44
+ if (method === 'chat.completions') {
45
+ const response = await client.chat.completions.create({
46
+ model,
47
+ messages: toOpenAIInput(messages),
48
+ metadata,
49
+ ...request,
50
+ });
51
+ return defaultAdapterResult(response, response && response.choices && response.choices[0] && response.choices[0].message
52
+ ? stringifyContent(response.choices[0].message.content)
53
+ : '');
54
+ }
55
+ const response = await client.responses.create({
56
+ model,
57
+ input: toOpenAIInput(messages),
58
+ metadata,
59
+ ...request,
60
+ });
61
+ return defaultAdapterResult(response, response && typeof response.output_text === 'string' ? response.output_text : '');
62
+ },
63
+ extractOutput(response) {
64
+ if (typeof extractOutput === 'function') return extractOutput(response);
65
+ if (response && typeof response.output_text === 'string') return response.output_text;
66
+ return response && response.choices && response.choices[0] && response.choices[0].message
67
+ ? stringifyContent(response.choices[0].message.content)
68
+ : '';
69
+ },
70
+ };
71
+ }
72
+
73
+ function createAnthropicAdapter({ client, model, request = {}, extractOutput = null } = {}) {
74
+ if (!client) throw new TypeError('client is required');
75
+ return {
76
+ provider: 'anthropic',
77
+ async invoke({ messages, metadata = {} }) {
78
+ const response = await client.messages.create({
79
+ model,
80
+ system: extractSystemPrompt(messages) || undefined,
81
+ messages: toAnthropicMessages(messages),
82
+ metadata,
83
+ ...request,
84
+ });
85
+ const output = Array.isArray(response && response.content)
86
+ ? response.content.map((item) => stringifyContent(item)).filter(Boolean).join('\n')
87
+ : '';
88
+ return defaultAdapterResult(response, output);
89
+ },
90
+ extractOutput(response) {
91
+ if (typeof extractOutput === 'function') return extractOutput(response);
92
+ return Array.isArray(response && response.content)
93
+ ? response.content.map((item) => stringifyContent(item)).filter(Boolean).join('\n')
94
+ : '';
95
+ },
96
+ };
97
+ }
98
+
99
+ function createGeminiAdapter({ client, model, request = {}, extractOutput = null } = {}) {
100
+ if (!client) throw new TypeError('client is required');
101
+ return {
102
+ provider: 'gemini',
103
+ async invoke({ messages }) {
104
+ const response = await client.models.generateContent({
105
+ model,
106
+ contents: messages.map((message) => ({
107
+ role: message.role === 'assistant' ? 'model' : 'user',
108
+ parts: [{ text: stringifyContent(message.content) }],
109
+ })),
110
+ ...request,
111
+ });
112
+ return defaultAdapterResult(response, response && typeof response.text === 'string' ? response.text : '');
113
+ },
114
+ extractOutput(response) {
115
+ if (typeof extractOutput === 'function') return extractOutput(response);
116
+ if (response && typeof response.text === 'string') return response.text;
117
+ if (typeof response === 'string') return response;
118
+ return '';
119
+ },
120
+ };
121
+ }
122
+
123
+ function createOpenRouterAdapter({ client, model, request = {}, extractOutput = null } = {}) {
124
+ if (!client) throw new TypeError('client is required');
125
+ return {
126
+ provider: 'openrouter',
127
+ async invoke({ messages }) {
128
+ const response = await client.chat.completions.create({
129
+ model,
130
+ messages: toOpenAIInput(messages),
131
+ ...request,
132
+ });
133
+ const output = response && response.choices && response.choices[0] && response.choices[0].message
134
+ ? stringifyContent(response.choices[0].message.content)
135
+ : '';
136
+ return defaultAdapterResult(response, output);
137
+ },
138
+ extractOutput(response) {
139
+ if (typeof extractOutput === 'function') return extractOutput(response);
140
+ return response && response.choices && response.choices[0] && response.choices[0].message
141
+ ? stringifyContent(response.choices[0].message.content)
142
+ : '';
143
+ },
144
+ };
145
+ }
146
+
147
+ module.exports = {
148
+ createOpenAIAdapter,
149
+ createAnthropicAdapter,
150
+ createGeminiAdapter,
151
+ createOpenRouterAdapter,
152
+ };