@vpdeva/blackwall-llm-shield-js 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -81,6 +81,10 @@ Use `shadowMode` with `shadowPolicyPacks` or `comparePolicyPacks` to record what
81
81
 
82
82
  Use `createOpenAIAdapter()`, `createAnthropicAdapter()`, `createGeminiAdapter()`, or `createOpenRouterAdapter()` with `protectWithAdapter()` when you want Blackwall to wrap the provider call end to end.
83
83
 
84
+ ### Controlled-pilot rollout
85
+
86
+ The current recommendation for enterprise teams is a controlled pilot first: start in shadow mode, aggregate route-level telemetry, tune suppressions explicitly, then promote the cleanest routes to enforcement.
87
+
84
88
  ### Observability and control-plane support
85
89
 
86
90
  Use `summarizeOperationalTelemetry()` with emitted telemetry events when you want route-level summaries, blocked-event counts, and rollout visibility for operators.
@@ -105,7 +109,7 @@ Use `require('@vpdeva/blackwall-llm-shield-js/providers')` for provider adapter
105
109
 
106
110
  Use it to sanitize inbound messages, mask sensitive data, score prompt-injection risk, and decide whether the request should continue to the model provider.
107
111
 
108
- It also exposes `protectModelCall()`, `protectWithAdapter()`, and `reviewModelResponse()` so you can enforce request checks before provider calls and review outputs before they go back to the user.
112
+ It also exposes `protectModelCall()`, `protectJsonModelCall()`, `protectWithAdapter()`, and `reviewModelResponse()` so you can enforce request checks before provider calls and review outputs before they go back to the user.
109
113
 
110
114
  ### `OutputFirewall`
111
115
 
@@ -129,6 +133,10 @@ Recommended presets:
129
133
  - `strict` for high-sensitivity routes
130
134
  - `ragSafe` for retrieval-heavy flows
131
135
  - `agentTools` for tool-calling and approval-gated agent actions
136
+ - `agentPlanner` for JSON-heavy planner and internal ops routes
137
+ - `documentReview` for classification and document-review pipelines
138
+ - `ragSearch` for search-heavy retrieval endpoints
139
+ - `toolCalling` for routes that broker external actions
132
140
 
133
141
  ### `AuditTrail`
134
142
 
@@ -177,6 +185,35 @@ const result = await shield.protectWithAdapter({
177
185
  console.log(result.stage, result.allowed);
178
186
  ```
179
187
 
188
+ ### Wrap Blackwall behind your own app adapter
189
+
190
+ ```js
191
+ function createModelShield(shield) {
192
+ return {
193
+ async run({ messages, metadata, callProvider }) {
194
+ return shield.protectModelCall({
195
+ messages,
196
+ metadata,
197
+ callModel: callProvider,
198
+ });
199
+ },
200
+ };
201
+ }
202
+ ```
203
+
204
+ ### Protect a strict JSON workflow
205
+
206
+ ```js
207
+ const result = await shield.protectJsonModelCall({
208
+ messages: [{ role: 'user', content: 'Return the shipment triage plan as JSON.' }],
209
+ metadata: { route: '/api/planner', feature: 'planner' },
210
+ requiredSchema: { steps: 'object' },
211
+ callModel: async () => JSON.stringify({ steps: ['triage', 'notify-ops'] }),
212
+ });
213
+
214
+ console.log(result.json.parsed);
215
+ ```
216
+
180
217
  ### Use presets and route-level policy overrides
181
218
 
182
219
  ```js
@@ -201,6 +238,18 @@ const shield = new BlackwallShield({
201
238
  });
202
239
  ```
203
240
 
241
+ ### Next.js App Router plus Gemini pattern
242
+
243
+ For App Router route handlers, the cleanest production shape is:
244
+
245
+ - parse the request in `app/api/.../route.ts`
246
+ - use `preset: 'shadowFirst'` or a route-specific preset like `agentPlanner` or `documentReview`
247
+ - attach `route`, `feature`, and `tenantId` metadata
248
+ - wrap the Gemini SDK call with `createGeminiAdapter()` plus `protectWithAdapter()`
249
+ - ship `report.telemetry` and `onTelemetry` into a route-level log sink
250
+
251
+ That keeps request guarding, output review, and operator reporting in one path without scattering policy logic across the route.
252
+
204
253
  ### Route and domain examples
205
254
 
206
255
  For RAG:
@@ -231,12 +280,47 @@ const toolFirewall = new ToolPermissionFirewall({
231
280
  });
232
281
  ```
233
282
 
283
+ For document review and verification:
284
+
285
+ ```js
286
+ const shield = new BlackwallShield({
287
+ preset: 'documentReview',
288
+ routePolicies: [
289
+ {
290
+ route: '/api/verify',
291
+ options: {
292
+ shadowMode: true,
293
+ outputFirewallDefaults: { requiredSchema: { verdict: 'string' } },
294
+ },
295
+ },
296
+ ],
297
+ });
298
+ ```
299
+
300
+ ### Choose your integration path
301
+
302
+ - Request-only guard: `guardModelRequest()`
303
+ - Request + output review: `protectModelCall()`
304
+ - Strict JSON planner/document workflows: `protectJsonModelCall()`
305
+ - Full provider wrapper: `protectWithAdapter()`
306
+ - Tool firewall + RAG sanitizer: `ToolPermissionFirewall` + `RetrievalSanitizer`
307
+
308
+ ### False-positive tuning
309
+
310
+ - Start with route-level `shadowMode: true`
311
+ - Add `suppressPromptRules` only per route, not globally, so the reason for each suppression stays obvious
312
+ - Log `report.promptInjection.matches` and `report.telemetry.promptInjectionRuleHits` to explain why a request was flagged
313
+ - Review `summary.noisiestRoutes`, `summary.byFeature`, and `summary.weeklyBlockEstimate` before raising enforcement
314
+
234
315
  ### Operational telemetry summaries
235
316
 
236
317
  ```js
237
318
  const { summarizeOperationalTelemetry } = require('@vpdeva/blackwall-llm-shield-js');
238
319
  const summary = summarizeOperationalTelemetry(events);
239
320
  console.log(summary.byRoute);
321
+ console.log(summary.byFeature);
322
+ console.log(summary.noisiestRoutes);
323
+ console.log(summary.weeklyBlockEstimate);
240
324
  console.log(summary.highestSeverity);
241
325
  ```
242
326
 
@@ -281,6 +365,14 @@ console.log(tools.inspectCall({ tool: 'lookupCustomer', args: { id: 'cus_123' }
281
365
 
282
366
  For Next.js, the most production-real patterns are App Router route handlers, server actions for trusted internal mutations, and streaming endpoints that apply output review to assembled or final chunks instead of raw intermediate tokens.
283
367
 
368
+ For Gemini-heavy apps, the bundled adapter now preserves system instructions plus mixed text/image/file parts so App Router handlers can wrap direct `@google/generative-ai` calls with less translation glue.
369
+
370
+ ## Enterprise Adoption Notes
371
+
372
+ - A controlled pilot is a good fit today when you want shadow-mode prompt and output protection without forcing hard blocking on every route immediately.
373
+ - If you prefer not to depend on Blackwall directly everywhere, wrap it behind your own internal model-security abstraction and expose only the contract your app teams need.
374
+ - For broader approval, focus rollout reviews on false-positive rates, noisiest routes, and latency budgets alongside jailbreak coverage.
375
+
284
376
  ## Release Commands
285
377
 
286
378
  - `npm run release:check` runs the JS test suite before release
package/index.d.ts CHANGED
@@ -32,6 +32,16 @@ export interface ReviewResult {
32
32
  [key: string]: unknown;
33
33
  }
34
34
 
35
+ export interface JsonProtectionResult extends Record<string, unknown> {
36
+ allowed: boolean;
37
+ blocked: boolean;
38
+ json?: {
39
+ parsed: unknown;
40
+ schemaValid: boolean;
41
+ parseError?: string;
42
+ };
43
+ }
44
+
35
45
  export interface ProviderAdapter {
36
46
  provider: string;
37
47
  invoke(payload: { messages: ShieldMessage[]; metadata?: Record<string, unknown>; guard?: GuardResult }): Promise<unknown> | unknown;
@@ -54,6 +64,7 @@ export class BlackwallShield {
54
64
  guardModelRequest(input?: { messages?: ShieldMessage[]; metadata?: Record<string, unknown>; allowSystemMessages?: boolean; comparePolicyPacks?: string[] }): Promise<GuardResult>;
55
65
  reviewModelResponse(input?: { output: unknown; metadata?: Record<string, unknown>; outputFirewall?: OutputFirewall | null; firewallOptions?: Record<string, unknown> }): Promise<ReviewResult>;
56
66
  protectModelCall(input: Record<string, unknown>): Promise<Record<string, unknown>>;
67
+ protectJsonModelCall(input: Record<string, unknown>): Promise<JsonProtectionResult>;
57
68
  protectWithAdapter(input: { adapter: ProviderAdapter; messages?: ShieldMessage[]; metadata?: Record<string, unknown>; allowSystemMessages?: boolean; comparePolicyPacks?: string[]; outputFirewall?: OutputFirewall | null; firewallOptions?: Record<string, unknown> }): Promise<Record<string, unknown>>;
58
69
  }
59
70
 
@@ -85,6 +96,7 @@ export const POLICY_PACKS: Record<string, Record<string, unknown>>;
85
96
 
86
97
  export function buildShieldOptions(options?: Record<string, unknown>): Record<string, unknown>;
87
98
  export function summarizeOperationalTelemetry(events?: Array<Record<string, unknown>>): Record<string, unknown>;
99
+ export function parseJsonOutput(output: unknown): unknown;
88
100
 
89
101
  export function createOpenAIAdapter(input: Record<string, unknown>): ProviderAdapter;
90
102
  export function createAnthropicAdapter(input: Record<string, unknown>): ProviderAdapter;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@vpdeva/blackwall-llm-shield-js",
3
- "version": "0.1.6",
3
+ "version": "0.1.8",
4
4
  "description": "Open-source JavaScript enterprise LLM protection toolkit for Node.js and Next.js",
5
5
  "license": "Apache-2.0",
6
6
  "author": "Vish <hello@vish.au> (https://vish.au)",
package/src/index.js CHANGED
@@ -145,6 +145,34 @@ const SHIELD_PRESETS = {
145
145
  notifyOnRiskLevel: 'medium',
146
146
  shadowMode: false,
147
147
  },
148
+ agentPlanner: {
149
+ blockOnPromptInjection: true,
150
+ promptInjectionThreshold: 'medium',
151
+ notifyOnRiskLevel: 'medium',
152
+ shadowMode: true,
153
+ shadowPolicyPacks: ['government'],
154
+ },
155
+ documentReview: {
156
+ blockOnPromptInjection: true,
157
+ promptInjectionThreshold: 'high',
158
+ notifyOnRiskLevel: 'medium',
159
+ shadowMode: true,
160
+ policyPack: 'healthcare',
161
+ },
162
+ ragSearch: {
163
+ blockOnPromptInjection: true,
164
+ promptInjectionThreshold: 'medium',
165
+ notifyOnRiskLevel: 'medium',
166
+ shadowMode: true,
167
+ shadowPolicyPacks: ['government'],
168
+ },
169
+ toolCalling: {
170
+ blockOnPromptInjection: true,
171
+ promptInjectionThreshold: 'medium',
172
+ notifyOnRiskLevel: 'medium',
173
+ shadowMode: false,
174
+ policyPack: 'finance',
175
+ },
148
176
  };
149
177
 
150
178
  const CORE_INTERFACE_VERSION = '1.0';
@@ -152,6 +180,7 @@ const CORE_INTERFACES = Object.freeze({
152
180
  guardModelRequest: CORE_INTERFACE_VERSION,
153
181
  reviewModelResponse: CORE_INTERFACE_VERSION,
154
182
  protectModelCall: CORE_INTERFACE_VERSION,
183
+ protectJsonModelCall: CORE_INTERFACE_VERSION,
155
184
  toolPermissionFirewall: CORE_INTERFACE_VERSION,
156
185
  retrievalSanitizer: CORE_INTERFACE_VERSION,
157
186
  });
@@ -350,6 +379,7 @@ function summarizeOperationalTelemetry(events = []) {
350
379
  shadowModeEvents: 0,
351
380
  byType: {},
352
381
  byRoute: {},
382
+ byFeature: {},
353
383
  byTenant: {},
354
384
  byModel: {},
355
385
  byPolicyOutcome: {
@@ -359,11 +389,14 @@ function summarizeOperationalTelemetry(events = []) {
359
389
  },
360
390
  topRules: {},
361
391
  highestSeverity: 'low',
392
+ noisiestRoutes: [],
393
+ weeklyBlockEstimate: 0,
362
394
  };
363
395
  for (const event of Array.isArray(events) ? events : []) {
364
396
  const type = event && event.type ? event.type : 'unknown';
365
397
  const metadata = event && event.metadata ? event.metadata : {};
366
398
  const route = metadata.route || metadata.path || 'unknown';
399
+ const feature = metadata.feature || metadata.capability || route;
367
400
  const tenant = metadata.tenantId || metadata.tenant_id || 'unknown';
368
401
  const model = metadata.model || metadata.modelName || 'unknown';
369
402
  const severity = event && event.report && event.report.outputReview
@@ -372,6 +405,7 @@ function summarizeOperationalTelemetry(events = []) {
372
405
  summary.totalEvents += 1;
373
406
  summary.byType[type] = (summary.byType[type] || 0) + 1;
374
407
  summary.byRoute[route] = (summary.byRoute[route] || 0) + 1;
408
+ summary.byFeature[feature] = (summary.byFeature[feature] || 0) + 1;
375
409
  summary.byTenant[tenant] = (summary.byTenant[tenant] || 0) + 1;
376
410
  summary.byModel[model] = (summary.byModel[model] || 0) + 1;
377
411
  if (event && event.blocked) summary.blockedEvents += 1;
@@ -390,9 +424,19 @@ function summarizeOperationalTelemetry(events = []) {
390
424
  summary.topRules = Object.fromEntries(
391
425
  Object.entries(summary.topRules).sort((a, b) => b[1] - a[1]).slice(0, 10)
392
426
  );
427
+ summary.noisiestRoutes = Object.entries(summary.byRoute)
428
+ .sort((a, b) => b[1] - a[1])
429
+ .slice(0, 5)
430
+ .map(([route, count]) => ({ route, count }));
431
+ summary.weeklyBlockEstimate = summary.byPolicyOutcome.blocked + summary.byPolicyOutcome.shadowBlocked;
393
432
  return summary;
394
433
  }
395
434
 
435
+ function parseJsonOutput(output) {
436
+ if (typeof output === 'string') return JSON.parse(output);
437
+ return output;
438
+ }
439
+
396
440
  function resolveShieldPreset(name) {
397
441
  if (!name) return {};
398
442
  return SHIELD_PRESETS[name] ? { ...SHIELD_PRESETS[name] } : {};
@@ -1352,6 +1396,69 @@ class BlackwallShield {
1352
1396
  },
1353
1397
  });
1354
1398
  }
1399
+
1400
+ async protectJsonModelCall({
1401
+ messages = [],
1402
+ metadata = {},
1403
+ allowSystemMessages = this.options.allowSystemMessages,
1404
+ comparePolicyPacks = [],
1405
+ callModel,
1406
+ mapMessages = null,
1407
+ mapOutput = null,
1408
+ outputFirewall = null,
1409
+ firewallOptions = {},
1410
+ requiredSchema = null,
1411
+ } = {}) {
1412
+ const result = await this.protectModelCall({
1413
+ messages,
1414
+ metadata,
1415
+ allowSystemMessages,
1416
+ comparePolicyPacks,
1417
+ callModel,
1418
+ mapMessages,
1419
+ mapOutput,
1420
+ outputFirewall,
1421
+ firewallOptions,
1422
+ });
1423
+ if (result.blocked) return result;
1424
+ try {
1425
+ const parsed = parseJsonOutput(result.review.maskedOutput != null ? result.review.maskedOutput : result.response);
1426
+ const schemaValid = validateRequiredSchema(parsed, requiredSchema);
1427
+ if (!schemaValid) {
1428
+ return {
1429
+ ...result,
1430
+ allowed: false,
1431
+ blocked: true,
1432
+ stage: 'output',
1433
+ reason: 'Model output failed JSON schema validation',
1434
+ json: {
1435
+ parsed,
1436
+ schemaValid: false,
1437
+ },
1438
+ };
1439
+ }
1440
+ return {
1441
+ ...result,
1442
+ json: {
1443
+ parsed,
1444
+ schemaValid: true,
1445
+ },
1446
+ };
1447
+ } catch (error) {
1448
+ return {
1449
+ ...result,
1450
+ allowed: false,
1451
+ blocked: true,
1452
+ stage: 'output',
1453
+ reason: 'Model output is not valid JSON',
1454
+ json: {
1455
+ parsed: null,
1456
+ schemaValid: false,
1457
+ parseError: error.message,
1458
+ },
1459
+ };
1460
+ }
1461
+ }
1355
1462
  }
1356
1463
 
1357
1464
  function validateGrounding(text, documents = [], options = {}) {
@@ -2060,6 +2167,7 @@ module.exports = {
2060
2167
  runRedTeamSuite,
2061
2168
  buildShieldOptions,
2062
2169
  summarizeOperationalTelemetry,
2170
+ parseJsonOutput,
2063
2171
  createOpenAIAdapter,
2064
2172
  createAnthropicAdapter,
2065
2173
  createGeminiAdapter,
package/src/providers.js CHANGED
@@ -12,6 +12,38 @@ function stringifyContent(content) {
12
12
  return String(content || '');
13
13
  }
14
14
 
15
+ function toGeminiPart(item) {
16
+ if (typeof item === 'string') return { text: item };
17
+ if (!item || typeof item !== 'object') return null;
18
+ if ((item.type === 'text' || item.type === 'input_text') && typeof item.text === 'string') {
19
+ return { text: item.text };
20
+ }
21
+ if (item.type === 'image_url' && typeof item.image_url === 'string') {
22
+ return { fileData: { fileUri: item.image_url } };
23
+ }
24
+ if (item.type === 'file') {
25
+ if (item.file_data && typeof item.file_data === 'object') return { inlineData: item.file_data };
26
+ if (typeof item.file_uri === 'string') return { fileData: { fileUri: item.file_uri } };
27
+ if (typeof item.file_id === 'string') return { fileData: { fileUri: item.file_id } };
28
+ }
29
+ if (item.type === 'json' && typeof item.value === 'string') {
30
+ return { text: item.value };
31
+ }
32
+ if (typeof item.text === 'string') return { text: item.text };
33
+ return null;
34
+ }
35
+
36
+ function toGeminiParts(content) {
37
+ if (typeof content === 'string') return [{ text: content }];
38
+ if (Array.isArray(content)) return content.map((item) => toGeminiPart(item)).filter(Boolean);
39
+ if (content && typeof content === 'object') {
40
+ if (Array.isArray(content.parts)) return toGeminiParts(content.parts);
41
+ const part = toGeminiPart(content);
42
+ return part ? [part] : [{ text: stringifyContent(content) }];
43
+ }
44
+ return [{ text: String(content || '') }];
45
+ }
46
+
15
47
  function toOpenAIInput(messages = []) {
16
48
  return messages.map((message) => ({
17
49
  role: message.role,
@@ -101,19 +133,30 @@ function createGeminiAdapter({ client, model, request = {}, extractOutput = null
101
133
  return {
102
134
  provider: 'gemini',
103
135
  async invoke({ messages }) {
136
+ const systemInstruction = extractSystemPrompt(messages);
104
137
  const response = await client.models.generateContent({
105
138
  model,
106
- contents: messages.map((message) => ({
107
- role: message.role === 'assistant' ? 'model' : 'user',
108
- parts: [{ text: stringifyContent(message.content) }],
109
- })),
139
+ contents: messages
140
+ .filter((message) => message.role !== 'system')
141
+ .map((message) => ({
142
+ role: message.role === 'assistant' ? 'model' : 'user',
143
+ parts: toGeminiParts(message.content),
144
+ })),
145
+ ...(systemInstruction ? { systemInstruction: { parts: [{ text: systemInstruction }] } } : {}),
110
146
  ...request,
111
147
  });
112
- return defaultAdapterResult(response, response && typeof response.text === 'string' ? response.text : '');
148
+ return defaultAdapterResult(response, this.extractOutput(response));
113
149
  },
114
150
  extractOutput(response) {
115
151
  if (typeof extractOutput === 'function') return extractOutput(response);
116
152
  if (response && typeof response.text === 'string') return response.text;
153
+ if (response && Array.isArray(response.candidates)) {
154
+ return response.candidates
155
+ .flatMap((candidate) => (((candidate || {}).content || {}).parts || []))
156
+ .map((part) => (part && typeof part.text === 'string' ? part.text : ''))
157
+ .filter(Boolean)
158
+ .join('\n');
159
+ }
117
160
  if (typeof response === 'string') return response;
118
161
  return '';
119
162
  },