agentshield-sdk 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/CHANGELOG.md +191 -0
  2. package/LICENSE +21 -0
  3. package/README.md +975 -0
  4. package/bin/agent-shield.js +680 -0
  5. package/package.json +118 -0
  6. package/src/adaptive.js +330 -0
  7. package/src/agent-protocol.js +998 -0
  8. package/src/alert-tuning.js +480 -0
  9. package/src/allowlist.js +603 -0
  10. package/src/audit-immutable.js +914 -0
  11. package/src/audit-streaming.js +469 -0
  12. package/src/badges.js +196 -0
  13. package/src/behavior-profiling.js +289 -0
  14. package/src/benchmark-harness.js +804 -0
  15. package/src/canary.js +271 -0
  16. package/src/certification.js +563 -0
  17. package/src/circuit-breaker.js +321 -0
  18. package/src/compliance.js +617 -0
  19. package/src/confidence-tuning.js +324 -0
  20. package/src/confused-deputy.js +624 -0
  21. package/src/context-scoring.js +360 -0
  22. package/src/conversation.js +494 -0
  23. package/src/cost-optimizer.js +1024 -0
  24. package/src/ctf.js +462 -0
  25. package/src/detector-core.js +1999 -0
  26. package/src/distributed.js +359 -0
  27. package/src/document-scanner.js +795 -0
  28. package/src/embedding.js +307 -0
  29. package/src/encoding.js +429 -0
  30. package/src/enterprise.js +405 -0
  31. package/src/errors.js +100 -0
  32. package/src/eu-ai-act.js +523 -0
  33. package/src/fuzzer.js +764 -0
  34. package/src/honeypot.js +328 -0
  35. package/src/i18n-patterns.js +523 -0
  36. package/src/index.js +430 -0
  37. package/src/integrations.js +528 -0
  38. package/src/llm-redteam.js +670 -0
  39. package/src/main.js +741 -0
  40. package/src/main.mjs +38 -0
  41. package/src/mcp-bridge.js +542 -0
  42. package/src/mcp-certification.js +846 -0
  43. package/src/mcp-sdk-integration.js +355 -0
  44. package/src/mcp-security-runtime.js +741 -0
  45. package/src/mcp-server.js +740 -0
  46. package/src/middleware.js +208 -0
  47. package/src/model-finetuning.js +884 -0
  48. package/src/model-fingerprint.js +1042 -0
  49. package/src/multi-agent-trust.js +453 -0
  50. package/src/multi-agent.js +404 -0
  51. package/src/multimodal.js +296 -0
  52. package/src/nist-mapping.js +505 -0
  53. package/src/observability.js +330 -0
  54. package/src/openclaw.js +450 -0
  55. package/src/otel.js +544 -0
  56. package/src/owasp-2025.js +483 -0
  57. package/src/pii.js +390 -0
  58. package/src/plugin-marketplace.js +628 -0
  59. package/src/plugin-system.js +349 -0
  60. package/src/policy-dsl.js +775 -0
  61. package/src/policy-extended.js +635 -0
  62. package/src/policy.js +443 -0
  63. package/src/presets.js +409 -0
  64. package/src/production.js +557 -0
  65. package/src/prompt-leakage.js +321 -0
  66. package/src/rag-vulnerability.js +579 -0
  67. package/src/redteam.js +475 -0
  68. package/src/response-handler.js +429 -0
  69. package/src/scanners.js +357 -0
  70. package/src/self-healing.js +363 -0
  71. package/src/semantic.js +339 -0
  72. package/src/shield-score.js +250 -0
  73. package/src/sso-saml.js +897 -0
  74. package/src/stream-scanner.js +806 -0
  75. package/src/testing.js +505 -0
  76. package/src/threat-encyclopedia.js +629 -0
  77. package/src/threat-intel-network.js +1017 -0
  78. package/src/token-analysis.js +467 -0
  79. package/src/tool-guard.js +412 -0
  80. package/src/tool-output-validator.js +354 -0
  81. package/src/utils.js +83 -0
  82. package/src/watermark.js +235 -0
  83. package/src/worker-scanner.js +601 -0
  84. package/types/index.d.ts +2088 -0
@@ -0,0 +1,884 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Agent Shield — Custom Model Fine-Tuning
5
+ *
6
+ * Train org-specific detection models on threat data.
7
+ * Uses TF-IDF + logistic regression — zero external dependencies.
8
+ *
9
+ * - ModelTrainer: Core training engine (gradient descent, binary cross-entropy)
10
+ * - TrainingPipeline: End-to-end collect -> train -> evaluate pipeline
11
+ * - DatasetManager: Dataset handling, augmentation, splitting
12
+ * - ModelEvaluator: Accuracy, precision, recall, F1, confusion matrix, ROC AUC
13
+ * - FineTunedModel: Trained model with predict, export, load
14
+ */
15
+
16
+ // =========================================================================
17
+ // FineTunedModel
18
+ // =========================================================================
19
+
20
+ /**
21
+ * A trained binary classifier using TF-IDF features and logistic regression.
22
+ */
23
+ class FineTunedModel {
24
+ /**
25
+ * @param {number[]} weights - Model weight vector (one per vocabulary term + bias)
26
+ * @param {string[]} vocabulary - Ordered vocabulary terms
27
+ * @param {Object} config - Training config used to produce this model
28
+ */
29
+ constructor(weights, vocabulary, config) {
30
+ this.weights = weights || [];
31
+ this.vocabulary = vocabulary || [];
32
+ this.config = config || {};
33
+ this.createdAt = new Date().toISOString();
34
+ }
35
+
36
+ /**
37
+ * Predict whether text is an attack or benign.
38
+ * @param {string} text - Input text
39
+ * @returns {{label: string, confidence: number}}
40
+ */
41
+ predict(text) {
42
+ const features = this._extractFeatures(text);
43
+ const z = this._dot(features);
44
+ const probability = this._sigmoid(z);
45
+
46
+ return {
47
+ label: probability >= 0.5 ? 'attack' : 'benign',
48
+ confidence: probability >= 0.5 ? probability : 1 - probability
49
+ };
50
+ }
51
+
52
+ /**
53
+ * Batch prediction on multiple texts.
54
+ * @param {string[]} texts - Array of input texts
55
+ * @returns {Array<{label: string, confidence: number}>}
56
+ */
57
+ predictBatch(texts) {
58
+ return texts.map(text => this.predict(text));
59
+ }
60
+
61
+ /**
62
+ * Export the model to a serializable JSON object.
63
+ * @returns {Object}
64
+ */
65
+ export() {
66
+ return {
67
+ type: 'agent-shield-finetuned-model',
68
+ version: '1.0',
69
+ weights: this.weights,
70
+ vocabulary: this.vocabulary,
71
+ config: this.config,
72
+ createdAt: this.createdAt
73
+ };
74
+ }
75
+
76
+ /**
77
+ * Load a model from a serialized JSON object.
78
+ * @param {Object} json - Serialized model
79
+ * @returns {FineTunedModel}
80
+ */
81
+ static load(json) {
82
+ if (!json || json.type !== 'agent-shield-finetuned-model') {
83
+ throw new Error('[Agent Shield] Invalid model format');
84
+ }
85
+ const model = new FineTunedModel(json.weights, json.vocabulary, json.config);
86
+ model.createdAt = json.createdAt || new Date().toISOString();
87
+ return model;
88
+ }
89
+
90
+ /**
91
+ * Get the top weighted features (most important for classification).
92
+ * @param {number} [topN=20] - Number of top features to return
93
+ * @returns {Array<{term: string, weight: number}>}
94
+ */
95
+ getFeatureImportance(topN = 20) {
96
+ const features = this.vocabulary.map((term, i) => ({
97
+ term,
98
+ weight: this.weights[i] || 0
99
+ }));
100
+
101
+ // Sort by absolute weight descending
102
+ features.sort((a, b) => Math.abs(b.weight) - Math.abs(a.weight));
103
+
104
+ return features.slice(0, topN);
105
+ }
106
+
107
+ /**
108
+ * Extract TF-IDF feature vector for a text sample.
109
+ * @private
110
+ * @param {string} text
111
+ * @returns {number[]} Feature vector aligned with vocabulary
112
+ */
113
+ _extractFeatures(text) {
114
+ const tokens = _tokenize(text);
115
+ const termFreq = {};
116
+ for (const token of tokens) {
117
+ termFreq[token] = (termFreq[token] || 0) + 1;
118
+ }
119
+
120
+ const features = new Array(this.vocabulary.length + 1).fill(0);
121
+ const totalTokens = tokens.length || 1;
122
+
123
+ for (let i = 0; i < this.vocabulary.length; i++) {
124
+ const term = this.vocabulary[i];
125
+ if (termFreq[term]) {
126
+ // TF component (normalized)
127
+ features[i] = termFreq[term] / totalTokens;
128
+ }
129
+ }
130
+
131
+ // Bias term
132
+ features[this.vocabulary.length] = 1;
133
+
134
+ return features;
135
+ }
136
+
137
+ /**
138
+ * Dot product of features and weights.
139
+ * @private
140
+ */
141
+ _dot(features) {
142
+ let sum = 0;
143
+ const len = Math.min(features.length, this.weights.length);
144
+ for (let i = 0; i < len; i++) {
145
+ sum += features[i] * this.weights[i];
146
+ }
147
+ return sum;
148
+ }
149
+
150
+ /**
151
+ * Sigmoid activation function.
152
+ * @private
153
+ */
154
+ _sigmoid(z) {
155
+ // Clamp to avoid overflow
156
+ if (z > 500) return 1;
157
+ if (z < -500) return 0;
158
+ return 1 / (1 + Math.exp(-z));
159
+ }
160
+ }
161
+
162
+ // =========================================================================
163
+ // DatasetManager
164
+ // =========================================================================
165
+
166
+ /**
167
+ * Manages training datasets for the fine-tuning pipeline.
168
+ */
169
+ class DatasetManager {
170
+ constructor() {
171
+ this.samples = [];
172
+ }
173
+
174
+ /**
175
+ * Add a training sample.
176
+ * @param {string} text - Sample text
177
+ * @param {string} label - 'attack' or 'benign'
178
+ * @param {Object} [metadata] - Optional metadata
179
+ */
180
+ addSample(text, label, metadata) {
181
+ if (!text || typeof text !== 'string') {
182
+ throw new Error('[Agent Shield] Sample text must be a non-empty string');
183
+ }
184
+ if (label !== 'attack' && label !== 'benign') {
185
+ throw new Error('[Agent Shield] Label must be "attack" or "benign"');
186
+ }
187
+ this.samples.push({ text, label, metadata: metadata || {} });
188
+ return this;
189
+ }
190
+
191
+ /**
192
+ * Import samples from Agent Shield scan results.
193
+ * @param {Array} scanResults - Array of scan result objects
194
+ */
195
+ addFromScanHistory(scanResults) {
196
+ if (!Array.isArray(scanResults)) {
197
+ throw new Error('[Agent Shield] scanResults must be an array');
198
+ }
199
+
200
+ for (const result of scanResults) {
201
+ const text = result.input || result.text || result.prompt || '';
202
+ if (!text) continue;
203
+
204
+ // Determine label from scan result
205
+ const isAttack = result.blocked || result.threat ||
206
+ (result.status && result.status !== 'safe') ||
207
+ (result.severity && result.severity !== 'low');
208
+
209
+ this.samples.push({
210
+ text,
211
+ label: isAttack ? 'attack' : 'benign',
212
+ metadata: {
213
+ source: 'scan_history',
214
+ severity: result.severity,
215
+ category: result.category,
216
+ originalResult: result
217
+ }
218
+ });
219
+ }
220
+
221
+ return this;
222
+ }
223
+
224
+ /**
225
+ * Augment the dataset with synthetic variations.
226
+ * Applies case variation, truncation, and token shuffling.
227
+ * @returns {DatasetManager} this
228
+ */
229
+ augment() {
230
+ const augmented = [];
231
+
232
+ for (const sample of this.samples) {
233
+ // Case variation: uppercase
234
+ augmented.push({
235
+ text: sample.text.toUpperCase(),
236
+ label: sample.label,
237
+ metadata: { ...sample.metadata, augmented: 'uppercase' }
238
+ });
239
+
240
+ // Case variation: lowercase
241
+ augmented.push({
242
+ text: sample.text.toLowerCase(),
243
+ label: sample.label,
244
+ metadata: { ...sample.metadata, augmented: 'lowercase' }
245
+ });
246
+
247
+ // Truncation: first half
248
+ if (sample.text.length > 20) {
249
+ const half = Math.ceil(sample.text.length / 2);
250
+ augmented.push({
251
+ text: sample.text.slice(0, half),
252
+ label: sample.label,
253
+ metadata: { ...sample.metadata, augmented: 'truncated' }
254
+ });
255
+ }
256
+
257
+ // Synonym insertion: add noise characters between words
258
+ const words = sample.text.split(/\s+/);
259
+ if (words.length > 2) {
260
+ const shuffled = [...words];
261
+ // Swap two random words
262
+ const i = Math.floor(Math.random() * shuffled.length);
263
+ const j = Math.floor(Math.random() * shuffled.length);
264
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
265
+ augmented.push({
266
+ text: shuffled.join(' '),
267
+ label: sample.label,
268
+ metadata: { ...sample.metadata, augmented: 'shuffled' }
269
+ });
270
+ }
271
+ }
272
+
273
+ this.samples.push(...augmented);
274
+ return this;
275
+ }
276
+
277
+ /**
278
+ * Split the dataset into train/validation/test sets.
279
+ * @param {number} [ratio=0.8] - Fraction for training (rest split evenly for val/test)
280
+ * @returns {{train: Array, validation: Array, test: Array}}
281
+ */
282
+ split(ratio = 0.8) {
283
+ // Shuffle samples
284
+ const shuffled = [...this.samples];
285
+ for (let i = shuffled.length - 1; i > 0; i--) {
286
+ const j = Math.floor(Math.random() * (i + 1));
287
+ [shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
288
+ }
289
+
290
+ const trainEnd = Math.floor(shuffled.length * ratio);
291
+ const valEnd = Math.floor(shuffled.length * (ratio + (1 - ratio) / 2));
292
+
293
+ return {
294
+ train: shuffled.slice(0, trainEnd),
295
+ validation: shuffled.slice(trainEnd, valEnd),
296
+ test: shuffled.slice(valEnd)
297
+ };
298
+ }
299
+
300
+ /**
301
+ * Get dataset statistics.
302
+ * @returns {Object} Statistics including counts, label distribution
303
+ */
304
+ getStats() {
305
+ const attackCount = this.samples.filter(s => s.label === 'attack').length;
306
+ const benignCount = this.samples.filter(s => s.label === 'benign').length;
307
+ const total = this.samples.length;
308
+
309
+ return {
310
+ total,
311
+ attackCount,
312
+ benignCount,
313
+ attackRatio: total > 0 ? attackCount / total : 0,
314
+ benignRatio: total > 0 ? benignCount / total : 0,
315
+ avgTextLength: total > 0
316
+ ? Math.round(this.samples.reduce((sum, s) => sum + s.text.length, 0) / total)
317
+ : 0,
318
+ augmentedCount: this.samples.filter(s => s.metadata && s.metadata.augmented).length
319
+ };
320
+ }
321
+
322
+ /**
323
+ * Export the dataset as a serializable object.
324
+ * @returns {Object}
325
+ */
326
+ export() {
327
+ return {
328
+ type: 'agent-shield-dataset',
329
+ version: '1.0',
330
+ samples: this.samples,
331
+ stats: this.getStats(),
332
+ exportedAt: new Date().toISOString()
333
+ };
334
+ }
335
+ }
336
+
337
+ // =========================================================================
338
+ // ModelTrainer
339
+ // =========================================================================
340
+
341
+ /**
342
+ * Core training engine using TF-IDF features and logistic regression
343
+ * with gradient descent.
344
+ */
345
+ class ModelTrainer {
346
+ /**
347
+ * @param {Object} [config]
348
+ * @param {number} [config.learningRate=0.01] - Learning rate for gradient descent
349
+ * @param {number} [config.epochs=10] - Number of training epochs
350
+ * @param {number} [config.batchSize=32] - Mini-batch size
351
+ * @param {number} [config.validationSplit=0.2] - Fraction held out for validation
352
+ */
353
+ constructor(config = {}) {
354
+ this.learningRate = config.learningRate || 0.01;
355
+ this.epochs = config.epochs || 10;
356
+ this.batchSize = config.batchSize || 32;
357
+ this.validationSplit = config.validationSplit || 0.2;
358
+ }
359
+
360
+ /**
361
+ * Train a binary classifier on the provided dataset.
362
+ * Uses TF-IDF features, sigmoid activation, and binary cross-entropy loss.
363
+ *
364
+ * @param {Array<{text: string, label: string}>} dataset - Training samples
365
+ * @returns {FineTunedModel} Trained model
366
+ */
367
+ train(dataset) {
368
+ if (!dataset || dataset.length === 0) {
369
+ throw new Error('[Agent Shield] Dataset cannot be empty');
370
+ }
371
+
372
+ console.log(`[Agent Shield] Training started: ${dataset.length} samples, ${this.epochs} epochs`);
373
+
374
+ // Build vocabulary from training data
375
+ const vocabulary = this._buildVocabulary(dataset);
376
+ console.log(`[Agent Shield] Vocabulary size: ${vocabulary.length}`);
377
+
378
+ // Compute IDF values
379
+ const idf = this._computeIDF(dataset, vocabulary);
380
+
381
+ // Build feature matrix and label vector
382
+ const { features, labels } = this._buildFeatureMatrix(dataset, vocabulary, idf);
383
+
384
+ // Split into train/validation
385
+ const splitIdx = Math.floor(features.length * (1 - this.validationSplit));
386
+ const trainFeatures = features.slice(0, splitIdx);
387
+ const trainLabels = labels.slice(0, splitIdx);
388
+ const valFeatures = features.slice(splitIdx);
389
+ const valLabels = labels.slice(splitIdx);
390
+
391
+ // Initialize weights (vocabulary size + 1 for bias)
392
+ const numFeatures = vocabulary.length + 1;
393
+ const weights = new Array(numFeatures).fill(0);
394
+
395
+ // Initialize with small random values
396
+ for (let i = 0; i < numFeatures; i++) {
397
+ weights[i] = (Math.random() - 0.5) * 0.01;
398
+ }
399
+
400
+ const lossHistory = [];
401
+
402
+ // Gradient descent training
403
+ for (let epoch = 0; epoch < this.epochs; epoch++) {
404
+ let epochLoss = 0;
405
+ let batchCount = 0;
406
+
407
+ // Mini-batch gradient descent
408
+ for (let batchStart = 0; batchStart < trainFeatures.length; batchStart += this.batchSize) {
409
+ const batchEnd = Math.min(batchStart + this.batchSize, trainFeatures.length);
410
+ const batchSize = batchEnd - batchStart;
411
+ const gradients = new Array(numFeatures).fill(0);
412
+
413
+ for (let i = batchStart; i < batchEnd; i++) {
414
+ const x = trainFeatures[i];
415
+ const y = trainLabels[i];
416
+
417
+ // Forward pass: z = w . x, yhat = sigmoid(z)
418
+ let z = 0;
419
+ for (let j = 0; j < numFeatures; j++) {
420
+ z += weights[j] * x[j];
421
+ }
422
+ const yhat = _sigmoid(z);
423
+
424
+ // Binary cross-entropy loss
425
+ const clampedYhat = Math.max(1e-7, Math.min(1 - 1e-7, yhat));
426
+ epochLoss += -(y * Math.log(clampedYhat) + (1 - y) * Math.log(1 - clampedYhat));
427
+
428
+ // Gradient: (yhat - y) * x_j
429
+ const error = yhat - y;
430
+ for (let j = 0; j < numFeatures; j++) {
431
+ gradients[j] += error * x[j];
432
+ }
433
+ }
434
+
435
+ // Update weights
436
+ for (let j = 0; j < numFeatures; j++) {
437
+ weights[j] -= this.learningRate * (gradients[j] / batchSize);
438
+ }
439
+ batchCount++;
440
+ }
441
+
442
+ const avgLoss = trainFeatures.length > 0 ? epochLoss / trainFeatures.length : 0;
443
+
444
+ // Validation loss
445
+ let valLoss = 0;
446
+ if (valFeatures.length > 0) {
447
+ for (let i = 0; i < valFeatures.length; i++) {
448
+ let z = 0;
449
+ for (let j = 0; j < numFeatures; j++) {
450
+ z += weights[j] * valFeatures[i][j];
451
+ }
452
+ const yhat = _sigmoid(z);
453
+ const clampedYhat = Math.max(1e-7, Math.min(1 - 1e-7, yhat));
454
+ valLoss += -(valLabels[i] * Math.log(clampedYhat) + (1 - valLabels[i]) * Math.log(1 - clampedYhat));
455
+ }
456
+ valLoss /= valFeatures.length;
457
+ }
458
+
459
+ lossHistory.push({ epoch: epoch + 1, trainLoss: avgLoss, valLoss });
460
+ console.log(`[Agent Shield] Epoch ${epoch + 1}/${this.epochs} — train_loss: ${avgLoss.toFixed(4)}, val_loss: ${valLoss.toFixed(4)}`);
461
+ }
462
+
463
+ const model = new FineTunedModel(weights, vocabulary, {
464
+ learningRate: this.learningRate,
465
+ epochs: this.epochs,
466
+ batchSize: this.batchSize,
467
+ trainingSamples: trainFeatures.length,
468
+ validationSamples: valFeatures.length,
469
+ lossHistory
470
+ });
471
+
472
+ console.log('[Agent Shield] Training complete');
473
+ return model;
474
+ }
475
+
476
+ /**
477
+ * Build vocabulary from dataset (unique tokens sorted by frequency).
478
+ * @private
479
+ */
480
+ _buildVocabulary(dataset) {
481
+ const freq = {};
482
+ for (const sample of dataset) {
483
+ const tokens = new Set(_tokenize(sample.text));
484
+ for (const token of tokens) {
485
+ freq[token] = (freq[token] || 0) + 1;
486
+ }
487
+ }
488
+
489
+ // Filter: keep tokens appearing in at least 2 documents, max 5000 terms
490
+ return Object.entries(freq)
491
+ .filter(([, count]) => count >= 2)
492
+ .sort((a, b) => b[1] - a[1])
493
+ .slice(0, 5000)
494
+ .map(([term]) => term);
495
+ }
496
+
497
+ /**
498
+ * Compute IDF (inverse document frequency) for each vocabulary term.
499
+ * @private
500
+ */
501
+ _computeIDF(dataset, vocabulary) {
502
+ const docCount = dataset.length;
503
+ const idf = {};
504
+
505
+ for (const term of vocabulary) {
506
+ let df = 0;
507
+ for (const sample of dataset) {
508
+ if (sample.text.toLowerCase().includes(term)) {
509
+ df++;
510
+ }
511
+ }
512
+ idf[term] = Math.log((docCount + 1) / (df + 1)) + 1; // smoothed IDF
513
+ }
514
+
515
+ return idf;
516
+ }
517
+
518
+ /**
519
+ * Build TF-IDF feature matrix and label vector.
520
+ * @private
521
+ */
522
+ _buildFeatureMatrix(dataset, vocabulary, idf) {
523
+ const features = [];
524
+ const labels = [];
525
+
526
+ for (const sample of dataset) {
527
+ const tokens = _tokenize(sample.text);
528
+ const termFreq = {};
529
+ for (const token of tokens) {
530
+ termFreq[token] = (termFreq[token] || 0) + 1;
531
+ }
532
+
533
+ const totalTokens = tokens.length || 1;
534
+ const featureVec = new Array(vocabulary.length + 1);
535
+
536
+ for (let i = 0; i < vocabulary.length; i++) {
537
+ const term = vocabulary[i];
538
+ const tf = (termFreq[term] || 0) / totalTokens;
539
+ featureVec[i] = tf * (idf[term] || 0);
540
+ }
541
+
542
+ // Bias term
543
+ featureVec[vocabulary.length] = 1;
544
+
545
+ features.push(featureVec);
546
+ labels.push(sample.label === 'attack' ? 1 : 0);
547
+ }
548
+
549
+ return { features, labels };
550
+ }
551
+ }
552
+
553
+ // =========================================================================
554
+ // ModelEvaluator
555
+ // =========================================================================
556
+
557
+ /**
558
+ * Evaluates a fine-tuned model and computes classification metrics.
559
+ */
560
+ class ModelEvaluator {
561
+ constructor() {
562
+ this.lastReport = null;
563
+ }
564
+
565
+ /**
566
+ * Evaluate a model on a test set.
567
+ * @param {FineTunedModel} model - Trained model
568
+ * @param {Array<{text: string, label: string}>} testSet - Test samples
569
+ * @returns {Object} Evaluation metrics
570
+ */
571
+ evaluate(model, testSet) {
572
+ if (!model || !testSet || testSet.length === 0) {
573
+ throw new Error('[Agent Shield] Model and non-empty test set required');
574
+ }
575
+
576
+ let tp = 0, fp = 0, tn = 0, fn = 0;
577
+ const predictions = [];
578
+
579
+ for (const sample of testSet) {
580
+ const prediction = model.predict(sample.text);
581
+ const actual = sample.label;
582
+ const predicted = prediction.label;
583
+
584
+ predictions.push({
585
+ text: sample.text.slice(0, 80),
586
+ actual,
587
+ predicted,
588
+ confidence: prediction.confidence
589
+ });
590
+
591
+ if (actual === 'attack' && predicted === 'attack') tp++;
592
+ else if (actual === 'benign' && predicted === 'attack') fp++;
593
+ else if (actual === 'benign' && predicted === 'benign') tn++;
594
+ else if (actual === 'attack' && predicted === 'benign') fn++;
595
+ }
596
+
597
+ const accuracy = testSet.length > 0 ? (tp + tn) / testSet.length : 0;
598
+ const precision = (tp + fp) > 0 ? tp / (tp + fp) : 0;
599
+ const recall = (tp + fn) > 0 ? tp / (tp + fn) : 0;
600
+ const f1 = (precision + recall) > 0
601
+ ? 2 * (precision * recall) / (precision + recall)
602
+ : 0;
603
+
604
+ // Compute ROC AUC approximation
605
+ const roc_auc = this._computeAUC(predictions);
606
+
607
+ const result = {
608
+ accuracy,
609
+ precision,
610
+ recall,
611
+ f1,
612
+ confusionMatrix: { tp, fp, tn, fn },
613
+ roc_auc,
614
+ totalSamples: testSet.length,
615
+ predictions
616
+ };
617
+
618
+ this.lastReport = result;
619
+ return result;
620
+ }
621
+
622
+ /**
623
+ * Generate a formatted text report from the last evaluation.
624
+ * @returns {string} Formatted report
625
+ */
626
+ generateReport() {
627
+ if (!this.lastReport) {
628
+ return '[Agent Shield] No evaluation has been run yet.';
629
+ }
630
+
631
+ const r = this.lastReport;
632
+ const cm = r.confusionMatrix;
633
+
634
+ const lines = [
635
+ '=== Agent Shield — Model Evaluation Report ===',
636
+ '',
637
+ `Samples evaluated: ${r.totalSamples}`,
638
+ '',
639
+ 'Metrics:',
640
+ ` Accuracy: ${(r.accuracy * 100).toFixed(2)}%`,
641
+ ` Precision: ${(r.precision * 100).toFixed(2)}%`,
642
+ ` Recall: ${(r.recall * 100).toFixed(2)}%`,
643
+ ` F1 Score: ${(r.f1 * 100).toFixed(2)}%`,
644
+ ` ROC AUC: ${r.roc_auc.toFixed(4)}`,
645
+ '',
646
+ 'Confusion Matrix:',
647
+ ` Predicted Attack Predicted Benign`,
648
+ ` Actual Attack ${String(cm.tp).padStart(5)} ${String(cm.fn).padStart(5)}`,
649
+ ` Actual Benign ${String(cm.fp).padStart(5)} ${String(cm.tn).padStart(5)}`,
650
+ '',
651
+ '=== End of Report ==='
652
+ ];
653
+
654
+ return lines.join('\n');
655
+ }
656
+
657
+ /**
658
+ * Approximate AUC using the trapezoidal rule on sorted predictions.
659
+ * @private
660
+ */
661
+ _computeAUC(predictions) {
662
+ if (predictions.length === 0) return 0;
663
+
664
+ // Sort by confidence descending (for attack predictions) / ascending (for benign)
665
+ const scored = predictions.map(p => ({
666
+ actual: p.actual === 'attack' ? 1 : 0,
667
+ score: p.predicted === 'attack' ? p.confidence : 1 - p.confidence
668
+ }));
669
+ scored.sort((a, b) => b.score - a.score);
670
+
671
+ const totalPositive = scored.filter(s => s.actual === 1).length;
672
+ const totalNegative = scored.filter(s => s.actual === 0).length;
673
+
674
+ if (totalPositive === 0 || totalNegative === 0) return 0.5;
675
+
676
+ let tpr = 0, fpr = 0, prevTpr = 0, prevFpr = 0;
677
+ let auc = 0;
678
+
679
+ for (const item of scored) {
680
+ if (item.actual === 1) {
681
+ tpr += 1 / totalPositive;
682
+ } else {
683
+ fpr += 1 / totalNegative;
684
+ }
685
+
686
+ // Trapezoidal rule
687
+ auc += (fpr - prevFpr) * (tpr + prevTpr) / 2;
688
+ prevTpr = tpr;
689
+ prevFpr = fpr;
690
+ }
691
+
692
+ return auc;
693
+ }
694
+ }
695
+
696
+ // =========================================================================
697
+ // TrainingPipeline
698
+ // =========================================================================
699
+
700
+ /**
701
+ * End-to-end pipeline for collecting, processing, training, and evaluating
702
+ * a fine-tuned detection model.
703
+ */
704
+ class TrainingPipeline {
705
+ /**
706
+ * @param {Object} [config] - Pipeline configuration passed to ModelTrainer
707
+ */
708
+ constructor(config = {}) {
709
+ this.config = config;
710
+ this.stages = [];
711
+ this.report = null;
712
+ }
713
+
714
+ /**
715
+ * Add a custom processing stage to the pipeline.
716
+ * @param {string} name - Stage name
717
+ * @param {Function} fn - Stage function (receives data, returns transformed data)
718
+ * @returns {TrainingPipeline} this
719
+ */
720
+ addStage(name, fn) {
721
+ if (typeof fn !== 'function') {
722
+ throw new Error(`[Agent Shield] Stage "${name}" must be a function`);
723
+ }
724
+ this.stages.push({ name, fn });
725
+ return this;
726
+ }
727
+
728
+ /**
729
+ * Run the full pipeline: collect -> preprocess -> augment -> train -> evaluate -> export.
730
+ * @param {Array} rawData - Raw training data (array of {text, label} or scan results)
731
+ * @returns {Object} Pipeline result with model, evaluation, and export
732
+ */
733
+ run(rawData) {
734
+ const startTime = Date.now();
735
+ const stageResults = [];
736
+
737
+ console.log('[Agent Shield] Training pipeline started');
738
+
739
+ // Stage 1: Collect
740
+ let data = rawData;
741
+ stageResults.push({ stage: 'collect', samples: data.length, duration: 0 });
742
+
743
+ // Stage 2: Preprocess
744
+ const preprocessStart = Date.now();
745
+ const dataset = new DatasetManager();
746
+ for (const item of data) {
747
+ const text = item.text || item.input || item.prompt || '';
748
+ const label = item.label || (item.blocked || item.threat ? 'attack' : 'benign');
749
+ if (text) {
750
+ dataset.addSample(text, label, item.metadata);
751
+ }
752
+ }
753
+ stageResults.push({
754
+ stage: 'preprocess',
755
+ samples: dataset.samples.length,
756
+ duration: Date.now() - preprocessStart
757
+ });
758
+
759
+ // Stage 3: Augment
760
+ const augmentStart = Date.now();
761
+ dataset.augment();
762
+ stageResults.push({
763
+ stage: 'augment',
764
+ samples: dataset.samples.length,
765
+ duration: Date.now() - augmentStart
766
+ });
767
+
768
+ // Run custom stages
769
+ let pipelineData = dataset.samples;
770
+ for (const stage of this.stages) {
771
+ const stageStart = Date.now();
772
+ pipelineData = stage.fn(pipelineData);
773
+ stageResults.push({
774
+ stage: stage.name,
775
+ samples: Array.isArray(pipelineData) ? pipelineData.length : 'N/A',
776
+ duration: Date.now() - stageStart
777
+ });
778
+ }
779
+
780
+ // Stage 4: Train
781
+ const trainStart = Date.now();
782
+ const trainer = new ModelTrainer(this.config);
783
+ const trainingData = Array.isArray(pipelineData) ? pipelineData : dataset.samples;
784
+ const model = trainer.train(trainingData);
785
+ stageResults.push({
786
+ stage: 'train',
787
+ samples: trainingData.length,
788
+ duration: Date.now() - trainStart
789
+ });
790
+
791
+ // Stage 5: Evaluate
792
+ const evalStart = Date.now();
793
+ const splits = dataset.split(0.8);
794
+ const evaluator = new ModelEvaluator();
795
+ const evaluation = evaluator.evaluate(model, splits.test.length > 0 ? splits.test : splits.validation);
796
+ stageResults.push({
797
+ stage: 'evaluate',
798
+ samples: (splits.test.length > 0 ? splits.test : splits.validation).length,
799
+ duration: Date.now() - evalStart
800
+ });
801
+
802
+ // Stage 6: Export
803
+ const exportStart = Date.now();
804
+ const exported = model.export();
805
+ stageResults.push({
806
+ stage: 'export',
807
+ samples: 1,
808
+ duration: Date.now() - exportStart
809
+ });
810
+
811
+ const totalDuration = Date.now() - startTime;
812
+
813
+ this.report = {
814
+ stages: stageResults,
815
+ totalDuration,
816
+ datasetStats: dataset.getStats(),
817
+ evaluation: {
818
+ accuracy: evaluation.accuracy,
819
+ precision: evaluation.precision,
820
+ recall: evaluation.recall,
821
+ f1: evaluation.f1,
822
+ roc_auc: evaluation.roc_auc
823
+ },
824
+ completedAt: new Date().toISOString()
825
+ };
826
+
827
+ console.log(`[Agent Shield] Training pipeline complete in ${totalDuration}ms`);
828
+
829
+ return {
830
+ model,
831
+ evaluation,
832
+ exported,
833
+ report: this.report
834
+ };
835
+ }
836
+
837
+ /**
838
+ * Get the pipeline execution report.
839
+ * @returns {Object|null} Report from the last run
840
+ */
841
+ getReport() {
842
+ return this.report;
843
+ }
844
+ }
845
+
846
+ // =========================================================================
847
+ // Shared Utilities
848
+ // =========================================================================
849
+
850
+ /**
851
+ * Tokenize text into lowercase terms.
852
+ * @param {string} text
853
+ * @returns {string[]}
854
+ */
855
+ function _tokenize(text) {
856
+ return text
857
+ .toLowerCase()
858
+ .replace(/[^a-z0-9\s_-]/g, ' ')
859
+ .split(/\s+/)
860
+ .filter(t => t.length > 1);
861
+ }
862
+
863
+ /**
864
+ * Sigmoid function.
865
+ * @param {number} z
866
+ * @returns {number}
867
+ */
868
+ function _sigmoid(z) {
869
+ if (z > 500) return 1;
870
+ if (z < -500) return 0;
871
+ return 1 / (1 + Math.exp(-z));
872
+ }
873
+
874
+ // =========================================================================
875
+ // Exports
876
+ // =========================================================================
877
+
878
+ module.exports = {
879
+ ModelTrainer,
880
+ TrainingPipeline,
881
+ DatasetManager,
882
+ ModelEvaluator,
883
+ FineTunedModel
884
+ };