@vpdeva/blackwall-llm-shield-js 0.2.4 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scorecard.js CHANGED
@@ -1,14 +1,61 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ const fs = require('fs');
4
+ const path = require('path');
3
5
  const { BlackwallShield, runRedTeamSuite } = require('./index');
4
6
 
7
+ function parseScalar(value = '') {
8
+ const trimmed = String(value).trim();
9
+ if (trimmed === 'true') return true;
10
+ if (trimmed === 'false') return false;
11
+ if (/^-?\d+$/.test(trimmed)) return Number(trimmed);
12
+ return trimmed.replace(/^['"]|['"]$/g, '');
13
+ }
14
+
15
+ function loadConfig(configPath) {
16
+ const raw = fs.readFileSync(configPath, 'utf8');
17
+ if (configPath.endsWith('.json')) return JSON.parse(raw);
18
+ const result = {};
19
+ let currentList = null;
20
+ let currentItem = null;
21
+ raw.split(/\r?\n/).forEach((line) => {
22
+ if (!line.trim() || line.trim().startsWith('#')) return;
23
+ if (/^[A-Za-z]/.test(line) && line.includes(':')) {
24
+ const [key, rest] = line.split(/:\s*/, 2);
25
+ if (!rest) {
26
+ result[key.trim()] = [];
27
+ currentList = result[key.trim()];
28
+ currentItem = null;
29
+ } else {
30
+ result[key.trim()] = parseScalar(rest);
31
+ }
32
+ return;
33
+ }
34
+ const itemMatch = line.match(/^\s*-\s+([A-Za-z0-9_]+):\s*(.+)$/);
35
+ if (itemMatch && currentList) {
36
+ currentItem = { [itemMatch[1]]: parseScalar(itemMatch[2]) };
37
+ currentList.push(currentItem);
38
+ return;
39
+ }
40
+ const propMatch = line.match(/^\s+([A-Za-z0-9_]+):\s*(.+)$/);
41
+ if (propMatch && currentItem) currentItem[propMatch[1]] = parseScalar(propMatch[2]);
42
+ });
43
+ return result;
44
+ }
45
+
5
46
  async function main() {
47
+ const args = process.argv.slice(2);
48
+ const validateMode = args[0] === 'validate';
49
+ const configIndex = args.indexOf('--config');
50
+ const configPath = configIndex >= 0 ? path.resolve(args[configIndex + 1]) : null;
51
+ const config = configPath ? loadConfig(configPath) : {};
6
52
  const shield = new BlackwallShield({
7
53
  blockOnPromptInjection: true,
8
54
  promptInjectionThreshold: process.env.BLACKWALL_PROMPT_THRESHOLD || 'high',
9
55
  shadowMode: process.env.BLACKWALL_SHADOW_MODE === 'true',
56
+ ...config,
10
57
  });
11
- const scorecard = await runRedTeamSuite({ shield, metadata: { source: 'cli' } });
58
+ const scorecard = await runRedTeamSuite({ shield, metadata: { source: 'cli', mode: validateMode ? 'validate' : 'run' } });
12
59
  process.stdout.write(`${JSON.stringify(scorecard, null, 2)}\n`);
13
60
  }
14
61
 
package/src/semantic.js CHANGED
@@ -4,6 +4,13 @@ class TransformersIntentScorer {
4
4
  constructor(classifier, options = {}) {
5
5
  this.classifier = classifier;
6
6
  this.threshold = options.threshold || 0.5;
7
+ this.labelMap = options.labelMap || {
8
+ jailbreak: 'jailbreak',
9
+ prompt_injection: 'prompt_injection',
10
+ unsafe: 'unsafe',
11
+ injection: 'prompt_injection',
12
+ malicious: 'unsafe',
13
+ };
7
14
  }
8
15
 
9
16
  async score(text) {
@@ -12,7 +19,8 @@ class TransformersIntentScorer {
12
19
  const matches = [];
13
20
  let total = 0;
14
21
  for (const item of items) {
15
- const label = String(item.label || '').toLowerCase();
22
+ const rawLabel = String(item.label || '').toLowerCase();
23
+ const label = this.labelMap[rawLabel] || rawLabel;
16
24
  const score = Number(item.score || 0);
17
25
  if (['jailbreak', 'prompt_injection', 'unsafe'].includes(label) && score >= this.threshold) {
18
26
  const weighted = Math.min(40, Math.round(score * 40));
@@ -32,7 +40,7 @@ class TransformersIntentScorer {
32
40
  async function createOptionalLocalIntentScorer(options = {}) {
33
41
  try {
34
42
  const { pipeline } = require('@xenova/transformers');
35
- const classifier = await pipeline('text-classification', options.model || 'Xenova/distilbert-base-uncased-finetuned-sst-2-english');
43
+ const classifier = await pipeline('text-classification', options.model || 'ProtectAI/deberta-v3-base-prompt-injection-v2');
36
44
  return new TransformersIntentScorer(classifier, options);
37
45
  } catch {
38
46
  return new LightweightIntentScorer(options);