@vpdeva/blackwall-llm-shield-js 0.2.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -1,4 +1,6 @@
1
1
  const crypto = require('crypto');
2
+ const fs = require('fs');
3
+ const path = require('path');
2
4
  const RED_TEAM_PROMPT_LIBRARY = require('./red_team_prompts.json');
3
5
  const {
4
6
  createOpenAIAdapter,
@@ -45,6 +47,23 @@ const FIELD_HINTS = [
45
47
  'tfn',
46
48
  ];
47
49
 
50
+ const HOMOGLYPH_MAP = {
51
+ А: 'A', а: 'a', В: 'B', Е: 'E', е: 'e', К: 'K', М: 'M', Н: 'H', О: 'O', о: 'o', Р: 'P', р: 'p', С: 'C', с: 'c', Т: 'T', Х: 'X', х: 'x',
52
+ Ι: 'I', і: 'i', Ѕ: 'S', ѕ: 's', ԁ: 'd', ԍ: 'g', յ: 'j', ⅼ: 'l', ո: 'n', ս: 'u',
53
+ };
54
+ const OWASP_LLM_TOP10_2025 = [
55
+ 'LLM01:2025 Prompt Injection',
56
+ 'LLM02:2025 Insecure Output Handling',
57
+ 'LLM03:2025 Training Data Poisoning',
58
+ 'LLM04:2025 Data and Model Poisoning',
59
+ 'LLM05:2025 Improper Output Reliance',
60
+ 'LLM06:2025 Sensitive Information Disclosure',
61
+ 'LLM07:2025 System Prompt Leakage',
62
+ 'LLM08:2025 Excessive Agency',
63
+ 'LLM09:2025 Overreliance',
64
+ 'LLM10:2025 Resource Exhaustion',
65
+ ];
66
+
48
67
  const PROMPT_INJECTION_RULES = [
49
68
  { id: 'ignore_instructions', score: 30, reason: 'Attempts to override previous instructions', regex: /\b(ignore|disregard|forget|bypass|override)\b.{0,40}\b(previous|above|system|developer|prior)\b/i },
50
69
  { id: 'reveal_system_prompt', score: 35, reason: 'Attempts to reveal hidden system instructions', regex: /\b(show|reveal|print|dump|display|leak)\b.{0,40}\b(system prompt|developer prompt|hidden instructions?|chain of thought)\b/i },
@@ -271,8 +290,16 @@ const COMPLIANCE_MAP = {
271
290
  unsafe_code: ['LLM02:2025 Insecure Output Handling'],
272
291
  token_budget_exceeded: ['NIST AI RMF: Govern 3.2', 'LLM10:2025 Resource Exhaustion'],
273
292
  retrieval_poisoning: ['LLM04:2025 Data and Model Poisoning'],
293
+ training_data_poisoning: ['LLM03:2025 Training Data Poisoning'],
294
+ grounding_validation: ['LLM05:2025 Improper Output Reliance'],
295
+ tool_permission_guard: ['LLM08:2025 Excessive Agency'],
296
+ human_review_gate: ['LLM09:2025 Overreliance'],
274
297
  };
275
298
 
299
+ function normalizeUnicodeText(input) {
300
+ return String(input || '').normalize('NFKC').replace(/./g, (char) => HOMOGLYPH_MAP[char] || char);
301
+ }
302
+
276
303
  function sanitizeText(input, maxLength = 5000) {
277
304
  if (typeof input !== 'string') return '';
278
305
  return input
@@ -717,6 +744,52 @@ function summarizeOperationalTelemetry(events = []) {
717
744
  return summary;
718
745
  }
719
746
 
747
+ class RouteBaselineTracker {
748
+ constructor(options = {}) {
749
+ this.windowSize = options.windowSize || 200;
750
+ this.events = [];
751
+ }
752
+
753
+ record(event = {}) {
754
+ this.events.push({
755
+ at: event.at || new Date().toISOString(),
756
+ route: event.route || event.metadata && event.metadata.route || 'unknown',
757
+ userId: event.userId || event.metadata && (event.metadata.userId || event.metadata.user_id) || 'anonymous',
758
+ blocked: !!event.blocked,
759
+ score: Number(event.score || event.report && event.report.promptInjection && event.report.promptInjection.score || 0),
760
+ });
761
+ this.events = this.events.slice(-this.windowSize);
762
+ }
763
+
764
+ detect({ route = 'unknown', userId = 'anonymous', events = [] } = {}) {
765
+ const pool = [...this.events, ...(Array.isArray(events) ? events : [])].map((event) => ({
766
+ route: event.route || event.metadata && event.metadata.route || 'unknown',
767
+ userId: event.userId || event.metadata && (event.metadata.userId || event.metadata.user_id) || 'anonymous',
768
+ blocked: !!event.blocked,
769
+ score: Number(event.score || event.report && event.report.promptInjection && event.report.promptInjection.score || 0),
770
+ }));
771
+ const routeEvents = pool.filter((event) => event.route === route);
772
+ const userEvents = pool.filter((event) => event.userId === userId);
773
+ const latest = pool.filter((event) => event.route === route && event.userId === userId).slice(-5);
774
+ const priorRoute = routeEvents.slice(0, Math.max(0, routeEvents.length - 1));
775
+ const priorUser = userEvents.slice(0, Math.max(0, userEvents.length - 1));
776
+ const routeBaseline = priorRoute.length ? priorRoute.reduce((sum, event) => sum + event.score, 0) / priorRoute.length : 0;
777
+ const userBaseline = priorUser.length ? priorUser.reduce((sum, event) => sum + event.score, 0) / priorUser.length : 0;
778
+ const latestAverage = latest.length ? latest.slice(-1).reduce((sum, event) => sum + event.score, 0) / 1 : 0;
779
+ const baseline = Math.max(routeBaseline || 0, userBaseline || 0, 1);
780
+ const ratio = latestAverage / baseline;
781
+ return {
782
+ route,
783
+ userId,
784
+ baselineScore: Number(baseline.toFixed(2)),
785
+ currentScore: Number(latestAverage.toFixed(2)),
786
+ score: Number(Math.min(0.99, ratio / 10).toFixed(2)),
787
+ anomalous: ratio >= 3,
788
+ reason: ratio >= 3 ? `injection rate ${ratio.toFixed(1)}x baseline` : 'within baseline',
789
+ };
790
+ }
791
+ }
792
+
720
793
  function parseJsonOutput(output) {
721
794
  if (typeof output === 'string') return JSON.parse(output);
722
795
  return output;
@@ -808,11 +881,97 @@ function applyCustomPromptDetectors(injection, text, options = {}, metadata = {}
808
881
  };
809
882
  }
810
883
 
884
+ function applyPluginDetectors(injection, text, options = {}, metadata = {}) {
885
+ const plugins = Array.isArray(options.plugins) ? options.plugins : [];
886
+ if (!plugins.length) return injection;
887
+ const matches = [...(injection.matches || [])];
888
+ const seen = new Set(matches.map((item) => item.id));
889
+ let score = injection.score || 0;
890
+ for (const plugin of plugins) {
891
+ if (!plugin || typeof plugin.detect !== 'function') continue;
892
+ const results = plugin.detect(String(text || ''), { metadata, options }) || [];
893
+ for (const finding of (Array.isArray(results) ? results : [results])) {
894
+ if (!finding || !finding.id || seen.has(finding.id)) continue;
895
+ seen.add(finding.id);
896
+ const detectorScore = Math.max(0, Math.min(finding.score || 0, 40));
897
+ matches.push({
898
+ id: finding.id,
899
+ score: detectorScore,
900
+ reason: finding.reason || `Plugin ${plugin.id || 'custom'} matched`,
901
+ source: plugin.id || 'plugin',
902
+ matched: finding.matched,
903
+ version: plugin.version || null,
904
+ });
905
+ score += detectorScore;
906
+ }
907
+ }
908
+ const cappedScore = Math.min(score, 100);
909
+ return {
910
+ ...injection,
911
+ matches,
912
+ score: cappedScore,
913
+ level: riskLevelFromScore(cappedScore),
914
+ blockedByDefault: cappedScore >= 45,
915
+ };
916
+ }
917
+
918
+ function applyPluginOutputScans(review, output, options = {}, metadata = {}) {
919
+ const plugins = Array.isArray(options.plugins) ? options.plugins : [];
920
+ if (!plugins.length) return review;
921
+ const findings = [...(review.findings || [])];
922
+ const seen = new Set(findings.map((item) => item.id));
923
+ let severity = review.severity || 'low';
924
+ for (const plugin of plugins) {
925
+ if (!plugin || typeof plugin.outputScan !== 'function') continue;
926
+ const results = plugin.outputScan(String(output || ''), { metadata, options, review }) || [];
927
+ for (const finding of (Array.isArray(results) ? results : [results])) {
928
+ if (!finding || !finding.id || seen.has(finding.id)) continue;
929
+ seen.add(finding.id);
930
+ findings.push({
931
+ id: finding.id,
932
+ severity: finding.severity || 'medium',
933
+ reason: finding.reason || `Plugin ${plugin.id || 'custom'} flagged output`,
934
+ source: plugin.id || 'plugin',
935
+ });
936
+ if (severityWeight(finding.severity || 'medium') > severityWeight(severity)) severity = finding.severity || 'medium';
937
+ }
938
+ }
939
+ return {
940
+ ...review,
941
+ findings,
942
+ severity,
943
+ allowed: review.allowed && !compareRisk(severity, 'high'),
944
+ complianceMap: mapCompliance(findings.map((item) => item.id)),
945
+ };
946
+ }
947
+
948
+ function applyPluginRetrievalScans(documents, options = {}, metadata = {}) {
949
+ const plugins = Array.isArray(options.plugins) ? options.plugins : [];
950
+ if (!plugins.length) return documents;
951
+ return (Array.isArray(documents) ? documents : []).map((doc) => {
952
+ const pluginFindings = [];
953
+ for (const plugin of plugins) {
954
+ if (!plugin || typeof plugin.retrievalScan !== 'function') continue;
955
+ const results = plugin.retrievalScan(doc, { metadata, options }) || [];
956
+ pluginFindings.push(...(Array.isArray(results) ? results : [results]).filter(Boolean));
957
+ }
958
+ return pluginFindings.length ? { ...doc, pluginFindings } : doc;
959
+ });
960
+ }
961
+
962
+ function enrichTelemetryWithPlugins(event, options = {}) {
963
+ const plugins = Array.isArray(options.plugins) ? options.plugins : [];
964
+ return plugins.reduce((current, plugin) => {
965
+ if (!plugin || typeof plugin.enrichTelemetry !== 'function') return current;
966
+ return plugin.enrichTelemetry(current, { options }) || current;
967
+ }, event);
968
+ }
969
+
811
970
  function resolveEffectiveShieldOptions(baseOptions = {}, metadata = {}) {
812
971
  const presetOptions = resolveShieldPreset(baseOptions.preset);
813
972
  const routePolicy = resolveRoutePolicy(baseOptions.routePolicies, metadata);
814
973
  const routePresetOptions = resolveShieldPreset(routePolicy && routePolicy.preset);
815
- return {
974
+ const merged = {
816
975
  ...baseOptions,
817
976
  ...presetOptions,
818
977
  ...routePresetOptions,
@@ -843,6 +1002,7 @@ function resolveEffectiveShieldOptions(baseOptions = {}, metadata = {}) {
843
1002
  ]),
844
1003
  routePolicy,
845
1004
  };
1005
+ return merged;
846
1006
  }
847
1007
 
848
1008
  function cloneRegex(regex) {
@@ -960,7 +1120,7 @@ function normalizeLeetspeak(text) {
960
1120
  }
961
1121
 
962
1122
  function deobfuscateText(input, options = {}) {
963
- const sanitized = sanitizeText(input, options.maxLength || 5000);
1123
+ const sanitized = normalizeUnicodeText(sanitizeText(input, options.maxLength || 5000));
964
1124
  const variants = [];
965
1125
  const seen = new Set([sanitized]);
966
1126
  const addVariant = (kind, text, source, depth = 1) => {
@@ -977,6 +1137,8 @@ function deobfuscateText(input, options = {}) {
977
1137
 
978
1138
  const collectDecodedVariants = (text) => {
979
1139
  const decodedVariants = [];
1140
+ const normalized = normalizeUnicodeText(text);
1141
+ if (normalized && normalized !== text) decodedVariants.push({ kind: 'unicode_nfkc', text: normalized, source: text });
980
1142
  const leet = normalizeLeetspeak(text);
981
1143
  if (leet) decodedVariants.push({ kind: 'leetspeak', text: leet, source: text });
982
1144
  for (const match of text.match(/[A-Za-z0-9+/=]{16,}/g) || []) {
@@ -1341,6 +1503,57 @@ class SessionBuffer {
1341
1503
  }
1342
1504
  }
1343
1505
 
1506
+ class ConversationThreatTracker {
1507
+ constructor(options = {}) {
1508
+ this.windowSize = options.windowSize || 10;
1509
+ this.blockThreshold = options.blockThreshold || 80;
1510
+ this.sessions = new Map();
1511
+ }
1512
+
1513
+ record(sessionId, injection = {}) {
1514
+ if (!sessionId) return null;
1515
+ const history = this.sessions.get(sessionId) || [];
1516
+ const entry = {
1517
+ at: new Date().toISOString(),
1518
+ score: Number(injection.score || 0),
1519
+ level: injection.level || 'low',
1520
+ ruleIds: Array.isArray(injection.matches) ? injection.matches.map((item) => item.id).filter(Boolean) : [],
1521
+ };
1522
+ const next = [...history, entry].slice(-this.windowSize);
1523
+ this.sessions.set(sessionId, next);
1524
+ const rollingScore = next.reduce((sum, item) => sum + item.score, 0);
1525
+ const trend = next.length >= 2 ? next[next.length - 1].score - next[0].score : entry.score;
1526
+ return {
1527
+ sessionId,
1528
+ turns: next.length,
1529
+ rollingScore,
1530
+ trend,
1531
+ blocked: rollingScore >= this.blockThreshold,
1532
+ highestLevel: next.reduce((level, item) => compareRisk(item.level, level) ? item.level : level, 'low'),
1533
+ history: next,
1534
+ };
1535
+ }
1536
+
1537
+ summarize(sessionId) {
1538
+ const history = this.sessions.get(sessionId) || [];
1539
+ const rollingScore = history.reduce((sum, item) => sum + item.score, 0);
1540
+ const trend = history.length >= 2 ? history[history.length - 1].score - history[0].score : (history[0] ? history[0].score : 0);
1541
+ return {
1542
+ sessionId,
1543
+ turns: history.length,
1544
+ rollingScore,
1545
+ trend,
1546
+ blocked: rollingScore >= this.blockThreshold,
1547
+ highestLevel: history.reduce((level, item) => compareRisk(item.level, level) ? item.level : level, 'low'),
1548
+ history,
1549
+ };
1550
+ }
1551
+
1552
+ clear(sessionId) {
1553
+ if (sessionId) this.sessions.delete(sessionId);
1554
+ }
1555
+ }
1556
+
1344
1557
  class TokenBudgetFirewall {
1345
1558
  constructor(options = {}) {
1346
1559
  this.maxTokensPerUser = options.maxTokensPerUser || 8000;
@@ -1387,28 +1600,42 @@ class BlackwallShield {
1387
1600
  shadowPolicyPacks: [],
1388
1601
  entityDetectors: [],
1389
1602
  customPromptDetectors: [],
1603
+ plugins: [],
1390
1604
  suppressPromptRules: [],
1391
1605
  routePolicies: [],
1392
1606
  detectNamedEntities: false,
1393
1607
  semanticScorer: null,
1394
1608
  sessionBuffer: null,
1609
+ conversationThreatTracker: options.conversationThreatTracker === undefined ? new ConversationThreatTracker() : options.conversationThreatTracker,
1395
1610
  tokenBudgetFirewall: null,
1611
+ provenanceGraph: options.provenanceGraph === undefined ? new PromptProvenanceGraph() : options.provenanceGraph,
1396
1612
  systemPrompt: null,
1397
1613
  outputFirewallDefaults: {},
1398
1614
  onAlert: null,
1399
1615
  onTelemetry: null,
1400
1616
  telemetryExporters: [],
1617
+ baselineTracker: options.baselineTracker === undefined ? new RouteBaselineTracker() : options.baselineTracker,
1618
+ auditTrail: options.auditTrail === undefined ? new AuditTrail({ secret: options.attestationSecret || 'blackwall-attestation-secret' }) : options.auditTrail,
1401
1619
  identityResolver: null,
1402
1620
  webhookUrl: null,
1403
1621
  ...options,
1404
1622
  };
1405
1623
  }
1406
1624
 
1625
+ use(plugin) {
1626
+ if (!plugin || !['detect', 'outputScan', 'retrievalScan', 'enrichTelemetry'].some((key) => typeof plugin[key] === 'function')) {
1627
+ throw new TypeError('Plugins must expose at least one hook: detect, outputScan, retrievalScan, or enrichTelemetry');
1628
+ }
1629
+ this.options.plugins = [...(this.options.plugins || []), plugin];
1630
+ return this;
1631
+ }
1632
+
1407
1633
  inspectText(text) {
1408
1634
  const effectiveOptions = resolveEffectiveShieldOptions(this.options);
1409
1635
  const pii = maskValue(text, effectiveOptions);
1410
1636
  let injection = detectPromptInjection(text, effectiveOptions);
1411
1637
  injection = applyCustomPromptDetectors(injection, String(text || ''), effectiveOptions, {});
1638
+ injection = applyPluginDetectors(injection, String(text || ''), effectiveOptions, {});
1412
1639
  injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
1413
1640
  return {
1414
1641
  sanitized: pii.original || sanitizeText(text, effectiveOptions.maxLength),
@@ -1430,7 +1657,10 @@ class BlackwallShield {
1430
1657
  }
1431
1658
 
1432
1659
  async emitTelemetry(event) {
1433
- const enriched = buildEnterpriseTelemetryEvent(event, this.options.identityResolver);
1660
+ const enriched = enrichTelemetryWithPlugins(buildEnterpriseTelemetryEvent(event, this.options.identityResolver), this.options);
1661
+ if (this.options.baselineTracker && typeof this.options.baselineTracker.record === 'function') {
1662
+ this.options.baselineTracker.record(enriched);
1663
+ }
1434
1664
  if (typeof this.options.onTelemetry === 'function') {
1435
1665
  await this.options.onTelemetry(enriched);
1436
1666
  }
@@ -1467,14 +1697,29 @@ class BlackwallShield {
1467
1697
  const sessionContext = sessionBuffer && typeof sessionBuffer.render === 'function'
1468
1698
  ? sessionBuffer.render()
1469
1699
  : promptCandidate;
1700
+ const retrievalDocuments = applyPluginRetrievalScans(metadata.retrievalDocuments || metadata.retrieval_documents || [], effectiveOptions, metadata);
1470
1701
  let injection = detectPromptInjection(sessionContext, effectiveOptions);
1471
1702
  injection = applyCustomPromptDetectors(injection, Array.isArray(sessionContext) ? JSON.stringify(sessionContext) : String(sessionContext || ''), effectiveOptions, metadata);
1703
+ injection = applyPluginDetectors(injection, Array.isArray(sessionContext) ? JSON.stringify(sessionContext) : String(sessionContext || ''), effectiveOptions, metadata);
1472
1704
  injection = applyPromptRuleSuppressions(injection, effectiveOptions.suppressPromptRules);
1705
+ const tracker = effectiveOptions.conversationThreatTracker;
1706
+ const threatTrajectory = tracker && typeof tracker.record === 'function'
1707
+ ? tracker.record(metadata.sessionId || metadata.session_id || metadata.conversationId || metadata.conversation_id, injection)
1708
+ : null;
1709
+ const provenance = effectiveOptions.provenanceGraph && typeof effectiveOptions.provenanceGraph.append === 'function'
1710
+ ? effectiveOptions.provenanceGraph.append({
1711
+ agentId: metadata.agentId || metadata.agent_id || metadata.route || 'shield',
1712
+ input: Array.isArray(sessionContext) ? JSON.stringify(sessionContext) : String(sessionContext || ''),
1713
+ output: JSON.stringify(masked.masked || []),
1714
+ riskDelta: injection.score || 0,
1715
+ })
1716
+ : null;
1473
1717
 
1474
1718
  const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
1475
1719
  const threshold = (primaryPolicy && primaryPolicy.promptInjectionThreshold) || effectiveOptions.promptInjectionThreshold;
1476
1720
  const wouldBlock = effectiveOptions.blockOnPromptInjection && compareRisk(injection.level, threshold);
1477
- const shouldBlock = effectiveOptions.shadowMode ? false : wouldBlock;
1721
+ const trajectoryBlocked = !!(threatTrajectory && threatTrajectory.blocked);
1722
+ const shouldBlock = effectiveOptions.shadowMode ? false : (wouldBlock || trajectoryBlocked);
1478
1723
  const shouldNotify = compareRisk(injection.level, effectiveOptions.notifyOnRiskLevel);
1479
1724
  const policyNames = [...new Set([...(effectiveOptions.shadowPolicyPacks || []), ...comparePolicyPacks].filter(Boolean))];
1480
1725
  const policyComparisons = policyNames.map((name) => evaluatePolicyPack(injection, name, effectiveOptions.promptInjectionThreshold));
@@ -1502,6 +1747,8 @@ class BlackwallShield {
1502
1747
  blocked: shouldBlock || !budgetResult.allowed,
1503
1748
  threshold,
1504
1749
  },
1750
+ trajectory: threatTrajectory,
1751
+ provenance,
1505
1752
  policyPack: primaryPolicy ? primaryPolicy.name : null,
1506
1753
  policyComparisons,
1507
1754
  tokenBudget: budgetResult,
@@ -1519,8 +1766,10 @@ class BlackwallShield {
1519
1766
  promptTokenEstimate: budgetResult.estimatedTokens,
1520
1767
  complianceMap: mapCompliance([
1521
1768
  ...injection.matches.map((item) => item.id),
1769
+ ...(threatTrajectory && threatTrajectory.blocked ? ['trajectory_escalation'] : []),
1522
1770
  ...(budgetResult.allowed ? [] : ['token_budget_exceeded']),
1523
1771
  ]),
1772
+ retrievalDocumentsInspected: retrievalDocuments.length,
1524
1773
  },
1525
1774
  };
1526
1775
 
@@ -1534,13 +1783,16 @@ class BlackwallShield {
1534
1783
  if (shouldNotify || wouldBlock) {
1535
1784
  await this.notify({
1536
1785
  type: shouldBlock ? 'llm_request_blocked' : (wouldBlock ? 'llm_request_shadow_blocked' : 'llm_request_risky'),
1537
- severity: wouldBlock ? injection.level : 'warning',
1538
- reason: wouldBlock ? 'Prompt injection threshold exceeded' : 'Prompt injection risk detected',
1786
+ severity: (wouldBlock || trajectoryBlocked) ? injection.level : 'warning',
1787
+ reason: trajectoryBlocked ? 'Conversation threat trajectory exceeded policy threshold' : (wouldBlock ? 'Prompt injection threshold exceeded' : 'Prompt injection risk detected'),
1539
1788
  report,
1540
1789
  });
1541
1790
  }
1542
1791
 
1543
1792
  const finalBlocked = shouldBlock || !budgetResult.allowed;
1793
+ const attestation = this.options.auditTrail && typeof this.options.auditTrail.issueAttestation === 'function'
1794
+ ? this.options.auditTrail.issueAttestation({ metadata, blocked: finalBlocked })
1795
+ : null;
1544
1796
  return {
1545
1797
  allowed: !finalBlocked,
1546
1798
  blocked: finalBlocked,
@@ -1548,23 +1800,42 @@ class BlackwallShield {
1548
1800
  messages: masked.masked,
1549
1801
  report,
1550
1802
  vault: masked.vault,
1803
+ attestation,
1551
1804
  };
1552
1805
  }
1553
1806
 
1807
+ generateCoverageReport(options = {}) {
1808
+ return generateCoverageReport({ ...this.options, ...options });
1809
+ }
1810
+
1554
1811
  async reviewModelResponse({ output, metadata = {}, outputFirewall = null, firewallOptions = {} } = {}) {
1555
1812
  const effectiveOptions = resolveEffectiveShieldOptions(this.options, metadata);
1556
1813
  const primaryPolicy = resolvePolicyPack(effectiveOptions.policyPack);
1557
1814
  const firewall = outputFirewall || new OutputFirewall({
1558
1815
  riskThreshold: (primaryPolicy && primaryPolicy.outputRiskThreshold) || 'high',
1559
1816
  systemPrompt: effectiveOptions.systemPrompt,
1817
+ cotScanner: new CoTScanner({
1818
+ systemPrompt: effectiveOptions.systemPrompt,
1819
+ scanChainOfThought: firewallOptions.scanChainOfThought !== false,
1820
+ }),
1560
1821
  ...effectiveOptions.outputFirewallDefaults,
1561
1822
  ...firewallOptions,
1562
1823
  });
1563
- const review = firewall.inspect(output, {
1824
+ let review = firewall.inspect(output, {
1564
1825
  systemPrompt: effectiveOptions.systemPrompt,
1826
+ scanChainOfThought: firewallOptions.scanChainOfThought !== false,
1565
1827
  ...(effectiveOptions.outputFirewallDefaults || {}),
1566
1828
  ...firewallOptions,
1567
1829
  });
1830
+ review = applyPluginOutputScans(review, output, effectiveOptions, metadata);
1831
+ const provenance = effectiveOptions.provenanceGraph && typeof effectiveOptions.provenanceGraph.append === 'function'
1832
+ ? effectiveOptions.provenanceGraph.append({
1833
+ agentId: metadata.agentId || metadata.agent_id || metadata.model || 'model',
1834
+ input: metadata.promptHash || '',
1835
+ output: typeof output === 'string' ? output : JSON.stringify(output || ''),
1836
+ riskDelta: review.hallucinationRisk || 0,
1837
+ })
1838
+ : null;
1568
1839
  const report = {
1569
1840
  package: 'blackwall-llm-shield-js',
1570
1841
  createdAt: new Date().toISOString(),
@@ -1578,6 +1849,7 @@ class BlackwallShield {
1578
1849
  piiEntityCounts: summarizeSensitiveFindings(review.piiFindings),
1579
1850
  complianceMap: mapCompliance(review.findings.map((item) => item.id)),
1580
1851
  },
1852
+ provenance,
1581
1853
  },
1582
1854
  };
1583
1855
 
@@ -1599,6 +1871,9 @@ class BlackwallShield {
1599
1871
  return {
1600
1872
  ...review,
1601
1873
  report,
1874
+ attestation: this.options.auditTrail && typeof this.options.auditTrail.issueAttestation === 'function'
1875
+ ? this.options.auditTrail.issueAttestation({ metadata, blocked: !review.allowed })
1876
+ : null,
1602
1877
  };
1603
1878
  }
1604
1879
 
@@ -1753,6 +2028,67 @@ class BlackwallShield {
1753
2028
  };
1754
2029
  }
1755
2030
  }
2031
+
2032
+ async protectZeroTrustModelCall(options = {}) {
2033
+ const result = await this.protectModelCall(options);
2034
+ const maskedOutput = result.review && result.review.maskedOutput != null ? result.review.maskedOutput : result.response;
2035
+ return {
2036
+ ...result,
2037
+ rehydratedOutput: result.request && result.request.vault ? unvault(maskedOutput, result.request.vault) : maskedOutput,
2038
+ zeroTrust: {
2039
+ vaultUsed: !!(result.request && result.request.vault && Object.keys(result.request.vault).length),
2040
+ },
2041
+ };
2042
+ }
2043
+
2044
+ detectAnomalies({ route = 'unknown', userId = 'anonymous', events = [] } = {}) {
2045
+ const tracker = this.options.baselineTracker || new RouteBaselineTracker();
2046
+ return tracker.detect({ route, userId, events });
2047
+ }
2048
+
2049
+ async replayTelemetry({ events = [], compareConfig = {} } = {}) {
2050
+ const replayShield = new BlackwallShield(compareConfig);
2051
+ let wouldHaveBlocked = 0;
2052
+ let changed = 0;
2053
+ for (const event of (Array.isArray(events) ? events : [])) {
2054
+ const originalBlocked = !!event.blocked;
2055
+ const promptLevel = event.report && event.report.promptInjection && event.report.promptInjection.level;
2056
+ const threshold = compareConfig.promptInjectionThreshold || compareConfig.prompt_injection_threshold || 'high';
2057
+ const replayBlocked = promptLevel ? compareRisk(promptLevel, threshold) : originalBlocked;
2058
+ if (replayBlocked) wouldHaveBlocked += 1;
2059
+ if (replayBlocked !== originalBlocked) changed += 1;
2060
+ }
2061
+ return {
2062
+ totalEvents: (Array.isArray(events) ? events : []).length,
2063
+ wouldHaveBlocked,
2064
+ falsePositiveEstimate: changed,
2065
+ compareConfig: buildShieldOptions(compareConfig),
2066
+ };
2067
+ }
2068
+
2069
+ async syncThreatIntel({ feedUrl = '', fetchFn = null, autoHarden = false, persist = false, corpusPath = undefined } = {}) {
2070
+ const runner = fetchFn || (typeof fetch === 'function' ? fetch : null);
2071
+ if (!runner) throw new Error('No fetch implementation available for threat intel sync');
2072
+ const response = await runner(feedUrl);
2073
+ const payload = typeof response.json === 'function' ? await response.json() : response;
2074
+ const prompts = Array.isArray(payload) ? payload : (payload.prompts || []);
2075
+ this.options.threatIntel = prompts;
2076
+ let hardened = null;
2077
+ if (autoHarden && prompts.length) {
2078
+ const engine = new AdversarialMutationEngine();
2079
+ hardened = engine.hardenCorpus({
2080
+ corpus: getRedTeamPromptLibrary(),
2081
+ blockedPrompt: prompts[0].prompt || prompts[0],
2082
+ persist,
2083
+ corpusPath,
2084
+ });
2085
+ }
2086
+ return {
2087
+ synced: prompts.length,
2088
+ prompts,
2089
+ hardened,
2090
+ };
2091
+ }
1756
2092
  }
1757
2093
 
1758
2094
  function validateGrounding(text, documents = [], options = {}) {
@@ -1783,7 +2119,9 @@ function validateGrounding(text, documents = [], options = {}) {
1783
2119
  checked: docTokens.length > 0,
1784
2120
  supportedSentences: sentences.length - unsupported.length,
1785
2121
  unsupportedSentences: unsupported,
2122
+ unsupportedClaims: unsupported,
1786
2123
  score: Number(Math.max(0, 1 - ratio).toFixed(2)),
2124
+ hallucinationRisk: Number(ratio.toFixed(2)),
1787
2125
  severity,
1788
2126
  blocked: severity === 'high',
1789
2127
  };
@@ -1822,7 +2160,8 @@ class CoTScanner {
1822
2160
  if (output && typeof output === 'object' && typeof output.thinking === 'string') return output.thinking;
1823
2161
  const text = typeof output === 'string' ? output : JSON.stringify(output || '');
1824
2162
  const match = text.match(/<thinking>([\s\S]*?)<\/thinking>/i);
1825
- return match ? match[1].trim() : '';
2163
+ if (match) return match[1].trim();
2164
+ return this.options.scanChainOfThought ? text : '';
1826
2165
  }
1827
2166
 
1828
2167
  scan(output) {
@@ -1864,7 +2203,16 @@ class AgentIdentityRegistry {
1864
2203
  }
1865
2204
 
1866
2205
  register(agentId, profile = {}) {
1867
- const identity = { agentId, persona: profile.persona || 'default', scopes: profile.scopes || [], capabilities: profile.capabilities || {} };
2206
+ const identity = {
2207
+ agentId,
2208
+ persona: profile.persona || 'default',
2209
+ scopes: profile.scopes || [],
2210
+ capabilities: profile.capabilities || {},
2211
+ capabilityManifest: profile.capabilityManifest || profile.capabilities || {},
2212
+ lineage: profile.lineage || [],
2213
+ trustScore: profile.trustScore != null ? Number(profile.trustScore) : 100,
2214
+ securityEvents: profile.securityEvents || [],
2215
+ };
1868
2216
  this.identities.set(agentId, identity);
1869
2217
  return identity;
1870
2218
  }
@@ -1891,6 +2239,21 @@ class AgentIdentityRegistry {
1891
2239
  return { valid: true, agentId: record.agentId };
1892
2240
  }
1893
2241
 
2242
+ recordSecurityEvent(agentId, event = {}) {
2243
+ const identity = this.get(agentId) || this.register(agentId, {});
2244
+ const severity = event.severity || 'low';
2245
+ const penalty = severity === 'critical' ? 25 : severity === 'high' ? 15 : severity === 'medium' ? 8 : 3;
2246
+ identity.securityEvents = [...(identity.securityEvents || []), { ...event, at: new Date().toISOString() }];
2247
+ identity.trustScore = Math.max(0, Number(identity.trustScore != null ? identity.trustScore : 100) - penalty);
2248
+ this.identities.set(agentId, identity);
2249
+ return identity;
2250
+ }
2251
+
2252
+ getTrustScore(agentId) {
2253
+ const identity = this.get(agentId);
2254
+ return identity ? Number(identity.trustScore != null ? identity.trustScore : 100) : null;
2255
+ }
2256
+
1894
2257
  issueSignedPassport(agentId, options = {}) {
1895
2258
  const identity = this.get(agentId) || this.register(agentId, options.profile || {});
1896
2259
  const securityScore = options.securityScore != null
@@ -1905,6 +2268,15 @@ class AgentIdentityRegistry {
1905
2268
  scopes: identity.scopes || [],
1906
2269
  persona: identity.persona || 'default',
1907
2270
  environment: options.environment || 'production',
2271
+ capabilityManifest: options.capabilityManifest || identity.capabilityManifest || identity.capabilities || {},
2272
+ lineage: options.lineage || identity.lineage || [],
2273
+ trustScore: options.trustScore != null ? options.trustScore : this.getTrustScore(agentId),
2274
+ attestationFormat: options.attestationFormat || 'jwt',
2275
+ cryptoProfile: {
2276
+ signingAlgorithm: options.signingAlgorithm || 'HS256',
2277
+ pqcReady: options.pqcReady !== false,
2278
+ transparencyMode: options.transparencyMode || 'explainable',
2279
+ },
1908
2280
  };
1909
2281
  const signature = crypto.createHmac('sha256', this.secret).update(JSON.stringify(passport)).digest('hex');
1910
2282
  return { ...passport, signature };
@@ -2068,10 +2440,80 @@ class CrossModelConsensusWrapper {
2068
2440
  }
2069
2441
  }
2070
2442
 
2443
+ class QuorumApprovalEngine {
2444
+ constructor(options = {}) {
2445
+ this.auditors = Array.isArray(options.auditors) ? options.auditors : [];
2446
+ this.threshold = options.threshold || Math.max(1, Math.ceil(this.auditors.length / 2));
2447
+ this.registry = options.registry || null;
2448
+ }
2449
+
2450
+ async evaluate({ tool, args = {}, context = {} } = {}) {
2451
+ const votes = [];
2452
+ for (const auditor of this.auditors) {
2453
+ if (!auditor) continue;
2454
+ const result = typeof auditor.inspect === 'function'
2455
+ ? await auditor.inspect({ tool, args, context, sessionContext: context.sessionContext || null })
2456
+ : (typeof auditor.evaluate === 'function'
2457
+ ? await auditor.evaluate({
2458
+ messages: context.consensusMessages || [{ role: 'user', content: JSON.stringify({ tool, args, context }) }],
2459
+ metadata: context,
2460
+ primaryResult: { blocked: false },
2461
+ })
2462
+ : null);
2463
+ if (!result) continue;
2464
+ const approved = result.approved != null ? !!result.approved : !result.disagreement;
2465
+ votes.push({
2466
+ auditor: result.auditor || auditor.name || `auditor_${votes.length + 1}`,
2467
+ approved,
2468
+ reason: result.reason || null,
2469
+ });
2470
+ }
2471
+ const approvals = votes.filter((vote) => vote.approved).length;
2472
+ const approved = approvals >= this.threshold;
2473
+ if (!approved && this.registry && context.agentId) {
2474
+ this.registry.recordSecurityEvent(context.agentId, {
2475
+ type: 'quorum_disagreement',
2476
+ severity: 'high',
2477
+ tool,
2478
+ approvals,
2479
+ threshold: this.threshold,
2480
+ });
2481
+ }
2482
+ return {
2483
+ approved,
2484
+ requiresApproval: !approved,
2485
+ threshold: this.threshold,
2486
+ approvals,
2487
+ rejections: votes.length - approvals,
2488
+ votes,
2489
+ reason: approved ? null : 'Quorum approval threshold was not met',
2490
+ trustScore: this.registry && context.agentId ? this.registry.getTrustScore(context.agentId) : null,
2491
+ };
2492
+ }
2493
+ }
2494
+
2495
+ function applyDifferentialPrivacyToValue(value, options = {}) {
2496
+ if (typeof value === 'number') return value + Number(options.numericNoise || 1);
2497
+ if (typeof value === 'string') {
2498
+ return applyDifferentialPrivacyNoise(value, {
2499
+ differentialPrivacy: true,
2500
+ differentialPrivacyEpsilon: options.epsilon || 1,
2501
+ });
2502
+ }
2503
+ if (Array.isArray(value)) return value.map((item) => applyDifferentialPrivacyToValue(item, options));
2504
+ if (value && typeof value === 'object') {
2505
+ return Object.fromEntries(Object.entries(value).map(([key, item]) => [key, applyDifferentialPrivacyToValue(item, options)]));
2506
+ }
2507
+ return value;
2508
+ }
2509
+
2071
2510
  class DigitalTwinOrchestrator {
2072
2511
  constructor(options = {}) {
2073
2512
  this.toolSchemas = options.toolSchemas || [];
2074
2513
  this.invocations = [];
2514
+ this.simulationMode = options.simulationMode !== false;
2515
+ this.differentialPrivacy = !!options.differentialPrivacy;
2516
+ this.syntheticNoiseOptions = options.syntheticNoiseOptions || {};
2075
2517
  }
2076
2518
 
2077
2519
  generate() {
@@ -2079,8 +2521,18 @@ class DigitalTwinOrchestrator {
2079
2521
  for (const schema of this.toolSchemas) {
2080
2522
  if (!schema || !schema.name) continue;
2081
2523
  handlers[schema.name] = async (args = {}) => {
2082
- const response = schema.mockResponse || schema.sampleResponse || { ok: true, tool: schema.name, args };
2083
- this.invocations.push({ tool: schema.name, args, response, at: new Date().toISOString() });
2524
+ const baseResponse = schema.mockResponse || schema.sampleResponse || { ok: true, tool: schema.name, args };
2525
+ const response = this.differentialPrivacy
2526
+ ? applyDifferentialPrivacyToValue(baseResponse, this.syntheticNoiseOptions)
2527
+ : baseResponse;
2528
+ this.invocations.push({
2529
+ tool: schema.name,
2530
+ args,
2531
+ response,
2532
+ simulationMode: this.simulationMode,
2533
+ differentialPrivacy: this.differentialPrivacy,
2534
+ at: new Date().toISOString(),
2535
+ });
2084
2536
  return response;
2085
2537
  };
2086
2538
  }
@@ -2091,6 +2543,8 @@ class DigitalTwinOrchestrator {
2091
2543
  return handlers[tool](args);
2092
2544
  },
2093
2545
  invocations: this.invocations,
2546
+ simulationMode: this.simulationMode,
2547
+ differentialPrivacy: this.differentialPrivacy,
2094
2548
  };
2095
2549
  }
2096
2550
 
@@ -2102,6 +2556,37 @@ class DigitalTwinOrchestrator {
2102
2556
  }
2103
2557
  }
2104
2558
 
2559
+ class SovereignRoutingEngine {
2560
+ constructor(options = {}) {
2561
+ this.classificationGate = options.classificationGate || new DataClassificationGate();
2562
+ this.providerRoutingPolicy = options.providerRoutingPolicy || new ProviderRoutingPolicy();
2563
+ this.localProviders = options.localProviders || ['on-prem'];
2564
+ this.globalProviders = options.globalProviders || ['global-cloud'];
2565
+ }
2566
+
2567
+ route({ metadata = {}, findings = [], messages = [], requestedProvider = null, candidates = [] } = {}) {
2568
+ const inspection = this.classificationGate.inspect({ metadata, findings, messages, provider: requestedProvider });
2569
+ const classification = inspection.classification;
2570
+ const sovereignCandidates = classification === 'restricted'
2571
+ ? [...this.localProviders]
2572
+ : classification === 'public'
2573
+ ? [...this.globalProviders, ...this.localProviders]
2574
+ : [...this.localProviders, ...this.globalProviders];
2575
+ const routing = this.providerRoutingPolicy.choose({
2576
+ route: metadata.route || metadata.path || 'default',
2577
+ classification,
2578
+ requestedProvider,
2579
+ candidates: [...new Set([...(candidates || []), ...sovereignCandidates])],
2580
+ });
2581
+ return {
2582
+ ...routing,
2583
+ classification,
2584
+ sovereigntyMode: classification === 'restricted' ? 'local-only' : classification === 'public' ? 'global-ok' : 'hybrid',
2585
+ inspection,
2586
+ };
2587
+ }
2588
+ }
2589
+
2105
2590
  function suggestPolicyOverride({ route = null, approval = null, guardResult = null, toolDecision = null } = {}) {
2106
2591
  if (approval !== true) return null;
2107
2592
  if (guardResult && guardResult.report && guardResult.report.promptInjection) {
@@ -2127,6 +2612,27 @@ function suggestPolicyOverride({ route = null, approval = null, guardResult = nu
2127
2612
  return null;
2128
2613
  }
2129
2614
 
2615
+ function buildTransparencyReport({ decision = {}, input = {}, rationale = null, suggestedPolicy = null } = {}) {
2616
+ const report = decision.report || {};
2617
+ const promptInjection = report.promptInjection || {};
2618
+ const metadata = report.metadata || {};
2619
+ const blocked = !!decision.blocked || decision.allowed === false;
2620
+ return {
2621
+ blocked,
2622
+ reason: decision.reason || rationale || 'No explicit reason captured',
2623
+ summary: blocked
2624
+ ? 'Blackwall blocked the action because policy and risk signals exceeded the configured threshold.'
2625
+ : 'Blackwall allowed the action under the current policy.',
2626
+ evidence: {
2627
+ route: input.route || metadata.route || metadata.path || null,
2628
+ ruleIds: Array.isArray(promptInjection.matches) ? promptInjection.matches.map((item) => item.id).filter(Boolean) : [],
2629
+ severity: promptInjection.level || decision.severity || null,
2630
+ },
2631
+ suggestedPolicy,
2632
+ complianceNote: 'Use this report as an explainability artifact for operator review and policy tuning.',
2633
+ };
2634
+ }
2635
+
2130
2636
  class PolicyLearningLoop {
2131
2637
  constructor() {
2132
2638
  this.decisions = [];
@@ -2140,6 +2646,14 @@ class PolicyLearningLoop {
2140
2646
  suggestOverrides() {
2141
2647
  return this.decisions.map((entry) => suggestPolicyOverride(entry)).filter(Boolean);
2142
2648
  }
2649
+
2650
+ buildTransparencyReport(input = {}) {
2651
+ return buildTransparencyReport({
2652
+ decision: input.guardResult || input.toolDecision || {},
2653
+ input,
2654
+ suggestedPolicy: suggestPolicyOverride(input),
2655
+ });
2656
+ }
2143
2657
  }
2144
2658
 
2145
2659
  class AgenticCapabilityGater {
@@ -2274,6 +2788,25 @@ class OutputFirewall {
2274
2788
  }
2275
2789
  }
2276
2790
 
2791
+ class StreamingOutputFirewall {
2792
+ constructor(options = {}) {
2793
+ this.outputFirewall = options.outputFirewall || new OutputFirewall(options);
2794
+ this.buffer = '';
2795
+ this.windowSize = options.windowSize || 4096;
2796
+ }
2797
+
2798
+ ingest(chunk = '') {
2799
+ this.buffer = `${this.buffer}${String(chunk || '')}`.slice(-this.windowSize);
2800
+ const review = this.outputFirewall.inspect(this.buffer);
2801
+ return {
2802
+ blocked: !review.allowed,
2803
+ allowed: review.allowed,
2804
+ review,
2805
+ bufferedLength: this.buffer.length,
2806
+ };
2807
+ }
2808
+ }
2809
+
2277
2810
  class ToolPermissionFirewall {
2278
2811
  constructor(options = {}) {
2279
2812
  this.options = {
@@ -2286,6 +2819,7 @@ class ToolPermissionFirewall {
2286
2819
  valueAtRiskCircuitBreaker: null,
2287
2820
  consensusAuditor: null,
2288
2821
  crossModelConsensus: null,
2822
+ quorumApprovalEngine: null,
2289
2823
  consensusRequiredFor: [],
2290
2824
  onApprovalRequest: null,
2291
2825
  approvalWebhookUrl: null,
@@ -2356,6 +2890,15 @@ class ToolPermissionFirewall {
2356
2890
  approvalRequest: { tool, args, context },
2357
2891
  };
2358
2892
  }
2893
+ if (this.options.quorumApprovalEngine && (context.highImpact || this.options.consensusRequiredFor.includes(tool))) {
2894
+ return {
2895
+ allowed: false,
2896
+ reason: 'Quorum approval requires async inspection',
2897
+ requiresApproval: true,
2898
+ requiresAsyncQuorum: true,
2899
+ approvalRequest: { tool, args, context },
2900
+ };
2901
+ }
2359
2902
  const requiresApproval = this.options.requireHumanApprovalFor.includes(tool);
2360
2903
  return {
2361
2904
  allowed: !requiresApproval,
@@ -2385,6 +2928,23 @@ class ToolPermissionFirewall {
2385
2928
  }
2386
2929
  return { allowed: true, reason: null, requiresApproval: false, consensus };
2387
2930
  }
2931
+ if (result.requiresAsyncQuorum && this.options.quorumApprovalEngine) {
2932
+ const quorum = await this.options.quorumApprovalEngine.evaluate({
2933
+ tool: input.tool,
2934
+ args: input.args || {},
2935
+ context: input.context || {},
2936
+ });
2937
+ if (!quorum.approved) {
2938
+ return {
2939
+ allowed: false,
2940
+ reason: quorum.reason,
2941
+ requiresApproval: true,
2942
+ quorum,
2943
+ approvalRequest: { tool: input.tool, args: input.args || {}, context: input.context || {}, quorum },
2944
+ };
2945
+ }
2946
+ return { allowed: true, reason: null, requiresApproval: false, quorum };
2947
+ }
2388
2948
  if (result.requiresApproval) {
2389
2949
  if (typeof this.options.onApprovalRequest === 'function') {
2390
2950
  await this.options.onApprovalRequest(result.approvalRequest);
@@ -2409,6 +2969,7 @@ class RetrievalSanitizer {
2409
2969
  this.options = {
2410
2970
  systemPrompt: null,
2411
2971
  similarityThreshold: 0.5,
2972
+ plugins: [],
2412
2973
  ...options,
2413
2974
  };
2414
2975
  }
@@ -2440,7 +3001,7 @@ class RetrievalSanitizer {
2440
3001
 
2441
3002
  sanitizeDocuments(documents = []) {
2442
3003
  const poisoning = this.detectPoisoning(documents);
2443
- return (Array.isArray(documents) ? documents : []).map((doc, index) => {
3004
+ const sanitized = (Array.isArray(documents) ? documents : []).map((doc, index) => {
2444
3005
  const text = sanitizeText(String(doc && doc.content ? doc.content : ''));
2445
3006
  const similarity = this.similarityToSystemPrompt(text);
2446
3007
  const strippedInstructions = RETRIEVAL_INJECTION_RULES.reduce((acc, rule) => acc.replace(cloneRegex(rule), '[REDACTED_RETRIEVAL_INSTRUCTION]'), text);
@@ -2457,6 +3018,7 @@ class RetrievalSanitizer {
2457
3018
  metadata: doc && doc.metadata ? doc.metadata : {},
2458
3019
  };
2459
3020
  });
3021
+ return applyPluginRetrievalScans(sanitized, this.options);
2460
3022
  }
2461
3023
 
2462
3024
  validateAnswer(answer, documents = [], options = {}) {
@@ -2497,6 +3059,29 @@ class AuditTrail {
2497
3059
  summarize() {
2498
3060
  return summarizeSecurityEvents(this.events);
2499
3061
  }
3062
+
3063
+ issueAttestation(event = {}) {
3064
+ const payload = {
3065
+ inspectedAt: new Date().toISOString(),
3066
+ route: event.route || event.metadata && event.metadata.route || null,
3067
+ blocked: !!event.blocked,
3068
+ package: 'blackwall-llm-shield-js',
3069
+ };
3070
+ const header = Buffer.from(JSON.stringify({ alg: 'HS256', typ: 'JWT', kid: 'bw1' })).toString('base64url');
3071
+ const body = Buffer.from(JSON.stringify(payload)).toString('base64url');
3072
+ const signature = crypto.createHmac('sha256', this.secret).update(`${header}.${body}`).digest('base64url');
3073
+ return `bw1_${header}.${body}.${signature}`;
3074
+ }
3075
+
3076
+ verifyAttestation(token) {
3077
+ const raw = String(token || '').replace(/^bw1_/, '');
3078
+ const parts = raw.split('.');
3079
+ if (parts.length !== 3) return { valid: false, reason: 'Malformed attestation token' };
3080
+ const [header, body, signature] = parts;
3081
+ const expected = crypto.createHmac('sha256', this.secret).update(`${header}.${body}`).digest('base64url');
3082
+ if (signature !== expected) return { valid: false, reason: 'Invalid attestation signature' };
3083
+ return { valid: true, payload: JSON.parse(Buffer.from(body, 'base64url').toString('utf8')) };
3084
+ }
2500
3085
  }
2501
3086
 
2502
3087
  function createCanaryToken(label = 'default') {
@@ -2529,9 +3114,26 @@ function rehydrateResponse(maskedText, vault = {}) {
2529
3114
  return text;
2530
3115
  }
2531
3116
 
3117
+ function unvault(output, vault = {}) {
3118
+ return rehydrateResponse(output, vault);
3119
+ }
3120
+
2532
3121
  async function encryptVaultForClient(vault = {}, secret = '') {
2533
- const subtle = crypto.webcrypto && crypto.webcrypto.subtle;
2534
- if (!subtle) throw new Error('Web Crypto is not available');
3122
+ const subtle = (crypto.webcrypto && crypto.webcrypto.subtle) || crypto.subtle;
3123
+ if (!subtle) {
3124
+ const salt = crypto.randomBytes(16);
3125
+ const iv = crypto.randomBytes(12);
3126
+ const key = crypto.pbkdf2Sync(secret, salt, 100000, 32, 'sha256');
3127
+ const cipher = crypto.createCipheriv('aes-256-gcm', key, iv);
3128
+ const ciphertext = Buffer.concat([cipher.update(JSON.stringify(vault), 'utf8'), cipher.final()]);
3129
+ return {
3130
+ strategy: 'aes-256-gcm-pbkdf2',
3131
+ salt: Buffer.from(salt).toString('base64'),
3132
+ iv: Buffer.from(iv).toString('base64'),
3133
+ ciphertext: Buffer.from(ciphertext).toString('base64'),
3134
+ tag: cipher.getAuthTag().toString('base64'),
3135
+ };
3136
+ }
2535
3137
  const encoder = new TextEncoder();
2536
3138
  const salt = crypto.randomBytes(16);
2537
3139
  const iv = crypto.randomBytes(12);
@@ -2553,8 +3155,18 @@ async function encryptVaultForClient(vault = {}, secret = '') {
2553
3155
  }
2554
3156
 
2555
3157
  async function decryptVaultForClient(bundle = {}, secret = '') {
2556
- const subtle = crypto.webcrypto && crypto.webcrypto.subtle;
2557
- if (!subtle) throw new Error('Web Crypto is not available');
3158
+ const subtle = (crypto.webcrypto && crypto.webcrypto.subtle) || crypto.subtle;
3159
+ if (!subtle) {
3160
+ const salt = Buffer.from(bundle.salt || '', 'base64');
3161
+ const iv = Buffer.from(bundle.iv || '', 'base64');
3162
+ const ciphertext = Buffer.from(bundle.ciphertext || '', 'base64');
3163
+ const tag = Buffer.from(bundle.tag || '', 'base64');
3164
+ const key = crypto.pbkdf2Sync(secret, salt, 100000, 32, 'sha256');
3165
+ const decipher = crypto.createDecipheriv('aes-256-gcm', key, iv);
3166
+ decipher.setAuthTag(tag);
3167
+ const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
3168
+ return JSON.parse(plaintext.toString('utf8'));
3169
+ }
2558
3170
  const encoder = new TextEncoder();
2559
3171
  const decoder = new TextDecoder();
2560
3172
  const salt = Buffer.from(bundle.salt || '', 'base64');
@@ -2627,6 +3239,98 @@ function buildAdminDashboardModel(events = [], alerts = []) {
2627
3239
  };
2628
3240
  }
2629
3241
 
3242
+ function generateCoverageReport(options = {}) {
3243
+ const activeRuleIds = new Set([
3244
+ ...PROMPT_INJECTION_RULES.map((rule) => rule.id),
3245
+ ...OUTPUT_LEAKAGE_RULES.map((rule) => rule.id),
3246
+ 'retrieval_poisoning',
3247
+ ...(options.tokenBudgetFirewall ? ['token_budget_exceeded'] : []),
3248
+ ...((options.retrievalDocuments || options.retrieval_documents || []).length ? ['grounding_validation'] : []),
3249
+ ...(options.toolPermissionFirewall || options.valueAtRiskCircuitBreaker || options.quorumApprovalEngine ? ['tool_permission_guard'] : []),
3250
+ ...(options.shadowMode || options.approvalInboxModel ? ['human_review_gate'] : []),
3251
+ ...(options.retrievalSanitizer || options.trainingDataControls ? ['training_data_poisoning'] : []),
3252
+ ...((options.additionalRuleIds || []).filter(Boolean)),
3253
+ ]);
3254
+ for (const plugin of (Array.isArray(options.plugins) ? options.plugins : [])) {
3255
+ for (const item of (plugin.coverage || plugin.complianceMap || [])) activeRuleIds.add(item);
3256
+ }
3257
+ const covered = [...new Set([...activeRuleIds].flatMap((id) => COMPLIANCE_MAP[id] || []).filter((category) => OWASP_LLM_TOP10_2025.includes(category)))];
3258
+ const byCategory = Object.fromEntries(OWASP_LLM_TOP10_2025.map((category) => [category, covered.includes(category) ? 'covered' : 'uncovered']));
3259
+ const percent = Math.round((covered.length / OWASP_LLM_TOP10_2025.length) * 100);
3260
+ const badge = `<svg xmlns="http://www.w3.org/2000/svg" width="220" height="20" role="img" aria-label="OWASP coverage ${percent}%"><rect width="120" height="20" fill="#333"/><rect x="120" width="100" height="20" fill="#0a7f5a"/><text x="60" y="14" fill="#fff" text-anchor="middle" font-family="Arial" font-size="11">OWASP LLM Top 10</text><text x="170" y="14" fill="#fff" text-anchor="middle" font-family="Arial" font-size="11">${percent}% covered</text></svg>`;
3261
+ return {
3262
+ version: 'OWASP-LLM-2025',
3263
+ covered,
3264
+ byCategory,
3265
+ policyPack: options.policyPack || null,
3266
+ routePolicies: (options.routePolicies || []).length,
3267
+ percentCovered: percent,
3268
+ badge,
3269
+ };
3270
+ }
3271
+
3272
+ class AdversarialMutationEngine {
3273
+ mutate(prompt = '') {
3274
+ const source = String(prompt || '');
3275
+ const variants = [
3276
+ { strategy: 'original', prompt: source },
3277
+ { strategy: 'base64', prompt: Buffer.from(source).toString('base64') },
3278
+ { strategy: 'rot13', prompt: source.replace(/[a-z]/gi, (char) => {
3279
+ const base = char <= 'Z' ? 65 : 97;
3280
+ return String.fromCharCode(((char.charCodeAt(0) - base + 13) % 26) + base);
3281
+ }) },
3282
+ { strategy: 'leetspeak', prompt: source.replace(/[aeiost]/gi, (char) => ({ a: '4', e: '3', i: '1', o: '0', s: '5', t: '7' }[char.toLowerCase()] || char)) },
3283
+ { strategy: 'spaced', prompt: source.split('').join(' ') },
3284
+ { strategy: 'paraphrase', prompt: source.replace(/\bignore\b/gi, 'disregard').replace(/\breveal\b/gi, 'show').replace(/\binstructions\b/gi, 'directives') },
3285
+ { strategy: 'cross_lingual_es', prompt: source.replace(/\bignore previous instructions\b/i, 'ignora las instrucciones anteriores').replace(/\breveal the system prompt\b/i, 'revela el prompt del sistema') },
3286
+ { strategy: 'transliteration', prompt: normalizeUnicodeText(source) },
3287
+ ];
3288
+ return variants.filter((item, index, arr) => arr.findIndex((candidate) => candidate.prompt === item.prompt) === index);
3289
+ }
3290
+
3291
+ hardenCorpus({ corpus = RED_TEAM_PROMPT_LIBRARY, blockedPrompt = '', maxVariants = 10, persist = false, corpusPath = path.join(__dirname, 'red_team_prompts.json') } = {}) {
3292
+ const mutations = this.mutate(blockedPrompt).slice(0, maxVariants);
3293
+ const existing = new Set((Array.isArray(corpus) ? corpus : []).map((item) => item.prompt));
3294
+ const additions = mutations
3295
+ .filter((item) => item.prompt && !existing.has(item.prompt))
3296
+ .map((item, index) => ({ id: `mutation_${index + 1}`, category: 'mutation', prompt: item.prompt, strategy: item.strategy }));
3297
+ const nextCorpus = [...(Array.isArray(corpus) ? corpus : []), ...additions];
3298
+ if (persist) {
3299
+ fs.writeFileSync(corpusPath, `${JSON.stringify(nextCorpus, null, 2)}\n`, 'utf8');
3300
+ }
3301
+ return {
3302
+ added: additions,
3303
+ corpus: nextCorpus,
3304
+ persisted: persist,
3305
+ corpusPath,
3306
+ };
3307
+ }
3308
+ }
3309
+
3310
+ class PromptProvenanceGraph {
3311
+ constructor() {
3312
+ this.hops = [];
3313
+ }
3314
+
3315
+ append(hop = {}) {
3316
+ const record = {
3317
+ hop: this.hops.length + 1,
3318
+ agentId: hop.agentId || hop.agent_id || 'unknown',
3319
+ inputHash: crypto.createHash('sha256').update(String(hop.input || '')).digest('hex'),
3320
+ outputHash: crypto.createHash('sha256').update(String(hop.output || '')).digest('hex'),
3321
+ riskDelta: Number(hop.riskDelta || 0),
3322
+ ts: new Date().toISOString(),
3323
+ };
3324
+ this.hops.push(record);
3325
+ return record;
3326
+ }
3327
+
3328
+ summarize() {
3329
+ const mostRisky = this.hops.reduce((max, hop) => (hop.riskDelta > (max.riskDelta || 0) ? hop : max), {});
3330
+ return { hops: this.hops, totalHops: this.hops.length, mostRiskyHop: mostRisky.hop || null };
3331
+ }
3332
+ }
3333
+
2630
3334
  function getRedTeamPromptLibrary() {
2631
3335
  return RED_TEAM_PROMPT_LIBRARY.slice();
2632
3336
  }
@@ -2738,9 +3442,11 @@ function buildShieldOptions(options = {}) {
2738
3442
  module.exports = {
2739
3443
  AgenticCapabilityGater,
2740
3444
  AgentIdentityRegistry,
3445
+ AdversarialMutationEngine,
2741
3446
  AuditTrail,
2742
3447
  BlackwallShield,
2743
3448
  CoTScanner,
3449
+ ConversationThreatTracker,
2744
3450
  CrossModelConsensusWrapper,
2745
3451
  DigitalTwinOrchestrator,
2746
3452
  ImageMetadataScanner,
@@ -2749,10 +3455,15 @@ module.exports = {
2749
3455
  OutputFirewall,
2750
3456
  PowerBIExporter,
2751
3457
  PolicyLearningLoop,
3458
+ PromptProvenanceGraph,
3459
+ QuorumApprovalEngine,
3460
+ RouteBaselineTracker,
2752
3461
  RetrievalSanitizer,
2753
3462
  SessionBuffer,
2754
3463
  ShadowConsensusAuditor,
3464
+ SovereignRoutingEngine,
2755
3465
  TokenBudgetFirewall,
3466
+ StreamingOutputFirewall,
2756
3467
  ToolPermissionFirewall,
2757
3468
  ValueAtRiskCircuitBreaker,
2758
3469
  VisualInstructionDetector,
@@ -2774,17 +3485,20 @@ module.exports = {
2774
3485
  injectCanaryTokens,
2775
3486
  detectCanaryLeakage,
2776
3487
  rehydrateResponse,
3488
+ unvault,
2777
3489
  encryptVaultForClient,
2778
3490
  decryptVaultForClient,
2779
3491
  rehydrateFromZeroKnowledgeBundle,
2780
3492
  ShadowAIDiscovery,
2781
3493
  summarizeSecurityEvents,
2782
3494
  buildAdminDashboardModel,
3495
+ generateCoverageReport,
2783
3496
  getRedTeamPromptLibrary,
2784
3497
  runRedTeamSuite,
2785
3498
  buildShieldOptions,
2786
3499
  summarizeOperationalTelemetry,
2787
3500
  suggestPolicyOverride,
3501
+ buildTransparencyReport,
2788
3502
  normalizeIdentityMetadata,
2789
3503
  buildEnterpriseTelemetryEvent,
2790
3504
  buildPowerBIRecord,