agentshield-sdk 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +191 -0
- package/LICENSE +21 -0
- package/README.md +975 -0
- package/bin/agent-shield.js +680 -0
- package/package.json +118 -0
- package/src/adaptive.js +330 -0
- package/src/agent-protocol.js +998 -0
- package/src/alert-tuning.js +480 -0
- package/src/allowlist.js +603 -0
- package/src/audit-immutable.js +914 -0
- package/src/audit-streaming.js +469 -0
- package/src/badges.js +196 -0
- package/src/behavior-profiling.js +289 -0
- package/src/benchmark-harness.js +804 -0
- package/src/canary.js +271 -0
- package/src/certification.js +563 -0
- package/src/circuit-breaker.js +321 -0
- package/src/compliance.js +617 -0
- package/src/confidence-tuning.js +324 -0
- package/src/confused-deputy.js +624 -0
- package/src/context-scoring.js +360 -0
- package/src/conversation.js +494 -0
- package/src/cost-optimizer.js +1024 -0
- package/src/ctf.js +462 -0
- package/src/detector-core.js +1999 -0
- package/src/distributed.js +359 -0
- package/src/document-scanner.js +795 -0
- package/src/embedding.js +307 -0
- package/src/encoding.js +429 -0
- package/src/enterprise.js +405 -0
- package/src/errors.js +100 -0
- package/src/eu-ai-act.js +523 -0
- package/src/fuzzer.js +764 -0
- package/src/honeypot.js +328 -0
- package/src/i18n-patterns.js +523 -0
- package/src/index.js +430 -0
- package/src/integrations.js +528 -0
- package/src/llm-redteam.js +670 -0
- package/src/main.js +741 -0
- package/src/main.mjs +38 -0
- package/src/mcp-bridge.js +542 -0
- package/src/mcp-certification.js +846 -0
- package/src/mcp-sdk-integration.js +355 -0
- package/src/mcp-security-runtime.js +741 -0
- package/src/mcp-server.js +740 -0
- package/src/middleware.js +208 -0
- package/src/model-finetuning.js +884 -0
- package/src/model-fingerprint.js +1042 -0
- package/src/multi-agent-trust.js +453 -0
- package/src/multi-agent.js +404 -0
- package/src/multimodal.js +296 -0
- package/src/nist-mapping.js +505 -0
- package/src/observability.js +330 -0
- package/src/openclaw.js +450 -0
- package/src/otel.js +544 -0
- package/src/owasp-2025.js +483 -0
- package/src/pii.js +390 -0
- package/src/plugin-marketplace.js +628 -0
- package/src/plugin-system.js +349 -0
- package/src/policy-dsl.js +775 -0
- package/src/policy-extended.js +635 -0
- package/src/policy.js +443 -0
- package/src/presets.js +409 -0
- package/src/production.js +557 -0
- package/src/prompt-leakage.js +321 -0
- package/src/rag-vulnerability.js +579 -0
- package/src/redteam.js +475 -0
- package/src/response-handler.js +429 -0
- package/src/scanners.js +357 -0
- package/src/self-healing.js +363 -0
- package/src/semantic.js +339 -0
- package/src/shield-score.js +250 -0
- package/src/sso-saml.js +897 -0
- package/src/stream-scanner.js +806 -0
- package/src/testing.js +505 -0
- package/src/threat-encyclopedia.js +629 -0
- package/src/threat-intel-network.js +1017 -0
- package/src/token-analysis.js +467 -0
- package/src/tool-guard.js +412 -0
- package/src/tool-output-validator.js +354 -0
- package/src/utils.js +83 -0
- package/src/watermark.js +235 -0
- package/src/worker-scanner.js +601 -0
- package/types/index.d.ts +2088 -0
|
@@ -0,0 +1,884 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Agent Shield — Custom Model Fine-Tuning
|
|
5
|
+
*
|
|
6
|
+
* Train org-specific detection models on threat data.
|
|
7
|
+
* Uses TF-IDF + logistic regression — zero external dependencies.
|
|
8
|
+
*
|
|
9
|
+
* - ModelTrainer: Core training engine (gradient descent, binary cross-entropy)
|
|
10
|
+
* - TrainingPipeline: End-to-end collect -> train -> evaluate pipeline
|
|
11
|
+
* - DatasetManager: Dataset handling, augmentation, splitting
|
|
12
|
+
* - ModelEvaluator: Accuracy, precision, recall, F1, confusion matrix, ROC AUC
|
|
13
|
+
* - FineTunedModel: Trained model with predict, export, load
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
// =========================================================================
|
|
17
|
+
// FineTunedModel
|
|
18
|
+
// =========================================================================
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* A trained binary classifier using TF-IDF features and logistic regression.
|
|
22
|
+
*/
|
|
23
|
+
class FineTunedModel {
|
|
24
|
+
/**
|
|
25
|
+
* @param {number[]} weights - Model weight vector (one per vocabulary term + bias)
|
|
26
|
+
* @param {string[]} vocabulary - Ordered vocabulary terms
|
|
27
|
+
* @param {Object} config - Training config used to produce this model
|
|
28
|
+
*/
|
|
29
|
+
constructor(weights, vocabulary, config) {
|
|
30
|
+
this.weights = weights || [];
|
|
31
|
+
this.vocabulary = vocabulary || [];
|
|
32
|
+
this.config = config || {};
|
|
33
|
+
this.createdAt = new Date().toISOString();
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* Predict whether text is an attack or benign.
|
|
38
|
+
* @param {string} text - Input text
|
|
39
|
+
* @returns {{label: string, confidence: number}}
|
|
40
|
+
*/
|
|
41
|
+
predict(text) {
|
|
42
|
+
const features = this._extractFeatures(text);
|
|
43
|
+
const z = this._dot(features);
|
|
44
|
+
const probability = this._sigmoid(z);
|
|
45
|
+
|
|
46
|
+
return {
|
|
47
|
+
label: probability >= 0.5 ? 'attack' : 'benign',
|
|
48
|
+
confidence: probability >= 0.5 ? probability : 1 - probability
|
|
49
|
+
};
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* Batch prediction on multiple texts.
|
|
54
|
+
* @param {string[]} texts - Array of input texts
|
|
55
|
+
* @returns {Array<{label: string, confidence: number}>}
|
|
56
|
+
*/
|
|
57
|
+
predictBatch(texts) {
|
|
58
|
+
return texts.map(text => this.predict(text));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Export the model to a serializable JSON object.
|
|
63
|
+
* @returns {Object}
|
|
64
|
+
*/
|
|
65
|
+
export() {
|
|
66
|
+
return {
|
|
67
|
+
type: 'agent-shield-finetuned-model',
|
|
68
|
+
version: '1.0',
|
|
69
|
+
weights: this.weights,
|
|
70
|
+
vocabulary: this.vocabulary,
|
|
71
|
+
config: this.config,
|
|
72
|
+
createdAt: this.createdAt
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Load a model from a serialized JSON object.
|
|
78
|
+
* @param {Object} json - Serialized model
|
|
79
|
+
* @returns {FineTunedModel}
|
|
80
|
+
*/
|
|
81
|
+
static load(json) {
|
|
82
|
+
if (!json || json.type !== 'agent-shield-finetuned-model') {
|
|
83
|
+
throw new Error('[Agent Shield] Invalid model format');
|
|
84
|
+
}
|
|
85
|
+
const model = new FineTunedModel(json.weights, json.vocabulary, json.config);
|
|
86
|
+
model.createdAt = json.createdAt || new Date().toISOString();
|
|
87
|
+
return model;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Get the top weighted features (most important for classification).
|
|
92
|
+
* @param {number} [topN=20] - Number of top features to return
|
|
93
|
+
* @returns {Array<{term: string, weight: number}>}
|
|
94
|
+
*/
|
|
95
|
+
getFeatureImportance(topN = 20) {
|
|
96
|
+
const features = this.vocabulary.map((term, i) => ({
|
|
97
|
+
term,
|
|
98
|
+
weight: this.weights[i] || 0
|
|
99
|
+
}));
|
|
100
|
+
|
|
101
|
+
// Sort by absolute weight descending
|
|
102
|
+
features.sort((a, b) => Math.abs(b.weight) - Math.abs(a.weight));
|
|
103
|
+
|
|
104
|
+
return features.slice(0, topN);
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* Extract TF-IDF feature vector for a text sample.
|
|
109
|
+
* @private
|
|
110
|
+
* @param {string} text
|
|
111
|
+
* @returns {number[]} Feature vector aligned with vocabulary
|
|
112
|
+
*/
|
|
113
|
+
_extractFeatures(text) {
|
|
114
|
+
const tokens = _tokenize(text);
|
|
115
|
+
const termFreq = {};
|
|
116
|
+
for (const token of tokens) {
|
|
117
|
+
termFreq[token] = (termFreq[token] || 0) + 1;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
const features = new Array(this.vocabulary.length + 1).fill(0);
|
|
121
|
+
const totalTokens = tokens.length || 1;
|
|
122
|
+
|
|
123
|
+
for (let i = 0; i < this.vocabulary.length; i++) {
|
|
124
|
+
const term = this.vocabulary[i];
|
|
125
|
+
if (termFreq[term]) {
|
|
126
|
+
// TF component (normalized)
|
|
127
|
+
features[i] = termFreq[term] / totalTokens;
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Bias term
|
|
132
|
+
features[this.vocabulary.length] = 1;
|
|
133
|
+
|
|
134
|
+
return features;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Dot product of features and weights.
|
|
139
|
+
* @private
|
|
140
|
+
*/
|
|
141
|
+
_dot(features) {
|
|
142
|
+
let sum = 0;
|
|
143
|
+
const len = Math.min(features.length, this.weights.length);
|
|
144
|
+
for (let i = 0; i < len; i++) {
|
|
145
|
+
sum += features[i] * this.weights[i];
|
|
146
|
+
}
|
|
147
|
+
return sum;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/**
|
|
151
|
+
* Sigmoid activation function.
|
|
152
|
+
* @private
|
|
153
|
+
*/
|
|
154
|
+
_sigmoid(z) {
|
|
155
|
+
// Clamp to avoid overflow
|
|
156
|
+
if (z > 500) return 1;
|
|
157
|
+
if (z < -500) return 0;
|
|
158
|
+
return 1 / (1 + Math.exp(-z));
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// =========================================================================
|
|
163
|
+
// DatasetManager
|
|
164
|
+
// =========================================================================
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Manages training datasets for the fine-tuning pipeline.
|
|
168
|
+
*/
|
|
169
|
+
class DatasetManager {
|
|
170
|
+
constructor() {
|
|
171
|
+
this.samples = [];
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Add a training sample.
|
|
176
|
+
* @param {string} text - Sample text
|
|
177
|
+
* @param {string} label - 'attack' or 'benign'
|
|
178
|
+
* @param {Object} [metadata] - Optional metadata
|
|
179
|
+
*/
|
|
180
|
+
addSample(text, label, metadata) {
|
|
181
|
+
if (!text || typeof text !== 'string') {
|
|
182
|
+
throw new Error('[Agent Shield] Sample text must be a non-empty string');
|
|
183
|
+
}
|
|
184
|
+
if (label !== 'attack' && label !== 'benign') {
|
|
185
|
+
throw new Error('[Agent Shield] Label must be "attack" or "benign"');
|
|
186
|
+
}
|
|
187
|
+
this.samples.push({ text, label, metadata: metadata || {} });
|
|
188
|
+
return this;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
/**
|
|
192
|
+
* Import samples from Agent Shield scan results.
|
|
193
|
+
* @param {Array} scanResults - Array of scan result objects
|
|
194
|
+
*/
|
|
195
|
+
addFromScanHistory(scanResults) {
|
|
196
|
+
if (!Array.isArray(scanResults)) {
|
|
197
|
+
throw new Error('[Agent Shield] scanResults must be an array');
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
for (const result of scanResults) {
|
|
201
|
+
const text = result.input || result.text || result.prompt || '';
|
|
202
|
+
if (!text) continue;
|
|
203
|
+
|
|
204
|
+
// Determine label from scan result
|
|
205
|
+
const isAttack = result.blocked || result.threat ||
|
|
206
|
+
(result.status && result.status !== 'safe') ||
|
|
207
|
+
(result.severity && result.severity !== 'low');
|
|
208
|
+
|
|
209
|
+
this.samples.push({
|
|
210
|
+
text,
|
|
211
|
+
label: isAttack ? 'attack' : 'benign',
|
|
212
|
+
metadata: {
|
|
213
|
+
source: 'scan_history',
|
|
214
|
+
severity: result.severity,
|
|
215
|
+
category: result.category,
|
|
216
|
+
originalResult: result
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
return this;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
/**
|
|
225
|
+
* Augment the dataset with synthetic variations.
|
|
226
|
+
* Applies case variation, truncation, and token shuffling.
|
|
227
|
+
* @returns {DatasetManager} this
|
|
228
|
+
*/
|
|
229
|
+
augment() {
|
|
230
|
+
const augmented = [];
|
|
231
|
+
|
|
232
|
+
for (const sample of this.samples) {
|
|
233
|
+
// Case variation: uppercase
|
|
234
|
+
augmented.push({
|
|
235
|
+
text: sample.text.toUpperCase(),
|
|
236
|
+
label: sample.label,
|
|
237
|
+
metadata: { ...sample.metadata, augmented: 'uppercase' }
|
|
238
|
+
});
|
|
239
|
+
|
|
240
|
+
// Case variation: lowercase
|
|
241
|
+
augmented.push({
|
|
242
|
+
text: sample.text.toLowerCase(),
|
|
243
|
+
label: sample.label,
|
|
244
|
+
metadata: { ...sample.metadata, augmented: 'lowercase' }
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
// Truncation: first half
|
|
248
|
+
if (sample.text.length > 20) {
|
|
249
|
+
const half = Math.ceil(sample.text.length / 2);
|
|
250
|
+
augmented.push({
|
|
251
|
+
text: sample.text.slice(0, half),
|
|
252
|
+
label: sample.label,
|
|
253
|
+
metadata: { ...sample.metadata, augmented: 'truncated' }
|
|
254
|
+
});
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// Synonym insertion: add noise characters between words
|
|
258
|
+
const words = sample.text.split(/\s+/);
|
|
259
|
+
if (words.length > 2) {
|
|
260
|
+
const shuffled = [...words];
|
|
261
|
+
// Swap two random words
|
|
262
|
+
const i = Math.floor(Math.random() * shuffled.length);
|
|
263
|
+
const j = Math.floor(Math.random() * shuffled.length);
|
|
264
|
+
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
265
|
+
augmented.push({
|
|
266
|
+
text: shuffled.join(' '),
|
|
267
|
+
label: sample.label,
|
|
268
|
+
metadata: { ...sample.metadata, augmented: 'shuffled' }
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
this.samples.push(...augmented);
|
|
274
|
+
return this;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
/**
|
|
278
|
+
* Split the dataset into train/validation/test sets.
|
|
279
|
+
* @param {number} [ratio=0.8] - Fraction for training (rest split evenly for val/test)
|
|
280
|
+
* @returns {{train: Array, validation: Array, test: Array}}
|
|
281
|
+
*/
|
|
282
|
+
split(ratio = 0.8) {
|
|
283
|
+
// Shuffle samples
|
|
284
|
+
const shuffled = [...this.samples];
|
|
285
|
+
for (let i = shuffled.length - 1; i > 0; i--) {
|
|
286
|
+
const j = Math.floor(Math.random() * (i + 1));
|
|
287
|
+
[shuffled[i], shuffled[j]] = [shuffled[j], shuffled[i]];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
const trainEnd = Math.floor(shuffled.length * ratio);
|
|
291
|
+
const valEnd = Math.floor(shuffled.length * (ratio + (1 - ratio) / 2));
|
|
292
|
+
|
|
293
|
+
return {
|
|
294
|
+
train: shuffled.slice(0, trainEnd),
|
|
295
|
+
validation: shuffled.slice(trainEnd, valEnd),
|
|
296
|
+
test: shuffled.slice(valEnd)
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
/**
|
|
301
|
+
* Get dataset statistics.
|
|
302
|
+
* @returns {Object} Statistics including counts, label distribution
|
|
303
|
+
*/
|
|
304
|
+
getStats() {
|
|
305
|
+
const attackCount = this.samples.filter(s => s.label === 'attack').length;
|
|
306
|
+
const benignCount = this.samples.filter(s => s.label === 'benign').length;
|
|
307
|
+
const total = this.samples.length;
|
|
308
|
+
|
|
309
|
+
return {
|
|
310
|
+
total,
|
|
311
|
+
attackCount,
|
|
312
|
+
benignCount,
|
|
313
|
+
attackRatio: total > 0 ? attackCount / total : 0,
|
|
314
|
+
benignRatio: total > 0 ? benignCount / total : 0,
|
|
315
|
+
avgTextLength: total > 0
|
|
316
|
+
? Math.round(this.samples.reduce((sum, s) => sum + s.text.length, 0) / total)
|
|
317
|
+
: 0,
|
|
318
|
+
augmentedCount: this.samples.filter(s => s.metadata && s.metadata.augmented).length
|
|
319
|
+
};
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Export the dataset as a serializable object.
|
|
324
|
+
* @returns {Object}
|
|
325
|
+
*/
|
|
326
|
+
export() {
|
|
327
|
+
return {
|
|
328
|
+
type: 'agent-shield-dataset',
|
|
329
|
+
version: '1.0',
|
|
330
|
+
samples: this.samples,
|
|
331
|
+
stats: this.getStats(),
|
|
332
|
+
exportedAt: new Date().toISOString()
|
|
333
|
+
};
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// =========================================================================
|
|
338
|
+
// ModelTrainer
|
|
339
|
+
// =========================================================================
|
|
340
|
+
|
|
341
|
+
/**
|
|
342
|
+
* Core training engine using TF-IDF features and logistic regression
|
|
343
|
+
* with gradient descent.
|
|
344
|
+
*/
|
|
345
|
+
class ModelTrainer {
|
|
346
|
+
/**
|
|
347
|
+
* @param {Object} [config]
|
|
348
|
+
* @param {number} [config.learningRate=0.01] - Learning rate for gradient descent
|
|
349
|
+
* @param {number} [config.epochs=10] - Number of training epochs
|
|
350
|
+
* @param {number} [config.batchSize=32] - Mini-batch size
|
|
351
|
+
* @param {number} [config.validationSplit=0.2] - Fraction held out for validation
|
|
352
|
+
*/
|
|
353
|
+
constructor(config = {}) {
|
|
354
|
+
this.learningRate = config.learningRate || 0.01;
|
|
355
|
+
this.epochs = config.epochs || 10;
|
|
356
|
+
this.batchSize = config.batchSize || 32;
|
|
357
|
+
this.validationSplit = config.validationSplit || 0.2;
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
/**
|
|
361
|
+
* Train a binary classifier on the provided dataset.
|
|
362
|
+
* Uses TF-IDF features, sigmoid activation, and binary cross-entropy loss.
|
|
363
|
+
*
|
|
364
|
+
* @param {Array<{text: string, label: string}>} dataset - Training samples
|
|
365
|
+
* @returns {FineTunedModel} Trained model
|
|
366
|
+
*/
|
|
367
|
+
train(dataset) {
|
|
368
|
+
if (!dataset || dataset.length === 0) {
|
|
369
|
+
throw new Error('[Agent Shield] Dataset cannot be empty');
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
console.log(`[Agent Shield] Training started: ${dataset.length} samples, ${this.epochs} epochs`);
|
|
373
|
+
|
|
374
|
+
// Build vocabulary from training data
|
|
375
|
+
const vocabulary = this._buildVocabulary(dataset);
|
|
376
|
+
console.log(`[Agent Shield] Vocabulary size: ${vocabulary.length}`);
|
|
377
|
+
|
|
378
|
+
// Compute IDF values
|
|
379
|
+
const idf = this._computeIDF(dataset, vocabulary);
|
|
380
|
+
|
|
381
|
+
// Build feature matrix and label vector
|
|
382
|
+
const { features, labels } = this._buildFeatureMatrix(dataset, vocabulary, idf);
|
|
383
|
+
|
|
384
|
+
// Split into train/validation
|
|
385
|
+
const splitIdx = Math.floor(features.length * (1 - this.validationSplit));
|
|
386
|
+
const trainFeatures = features.slice(0, splitIdx);
|
|
387
|
+
const trainLabels = labels.slice(0, splitIdx);
|
|
388
|
+
const valFeatures = features.slice(splitIdx);
|
|
389
|
+
const valLabels = labels.slice(splitIdx);
|
|
390
|
+
|
|
391
|
+
// Initialize weights (vocabulary size + 1 for bias)
|
|
392
|
+
const numFeatures = vocabulary.length + 1;
|
|
393
|
+
const weights = new Array(numFeatures).fill(0);
|
|
394
|
+
|
|
395
|
+
// Initialize with small random values
|
|
396
|
+
for (let i = 0; i < numFeatures; i++) {
|
|
397
|
+
weights[i] = (Math.random() - 0.5) * 0.01;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
const lossHistory = [];
|
|
401
|
+
|
|
402
|
+
// Gradient descent training
|
|
403
|
+
for (let epoch = 0; epoch < this.epochs; epoch++) {
|
|
404
|
+
let epochLoss = 0;
|
|
405
|
+
let batchCount = 0;
|
|
406
|
+
|
|
407
|
+
// Mini-batch gradient descent
|
|
408
|
+
for (let batchStart = 0; batchStart < trainFeatures.length; batchStart += this.batchSize) {
|
|
409
|
+
const batchEnd = Math.min(batchStart + this.batchSize, trainFeatures.length);
|
|
410
|
+
const batchSize = batchEnd - batchStart;
|
|
411
|
+
const gradients = new Array(numFeatures).fill(0);
|
|
412
|
+
|
|
413
|
+
for (let i = batchStart; i < batchEnd; i++) {
|
|
414
|
+
const x = trainFeatures[i];
|
|
415
|
+
const y = trainLabels[i];
|
|
416
|
+
|
|
417
|
+
// Forward pass: z = w . x, yhat = sigmoid(z)
|
|
418
|
+
let z = 0;
|
|
419
|
+
for (let j = 0; j < numFeatures; j++) {
|
|
420
|
+
z += weights[j] * x[j];
|
|
421
|
+
}
|
|
422
|
+
const yhat = _sigmoid(z);
|
|
423
|
+
|
|
424
|
+
// Binary cross-entropy loss
|
|
425
|
+
const clampedYhat = Math.max(1e-7, Math.min(1 - 1e-7, yhat));
|
|
426
|
+
epochLoss += -(y * Math.log(clampedYhat) + (1 - y) * Math.log(1 - clampedYhat));
|
|
427
|
+
|
|
428
|
+
// Gradient: (yhat - y) * x_j
|
|
429
|
+
const error = yhat - y;
|
|
430
|
+
for (let j = 0; j < numFeatures; j++) {
|
|
431
|
+
gradients[j] += error * x[j];
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// Update weights
|
|
436
|
+
for (let j = 0; j < numFeatures; j++) {
|
|
437
|
+
weights[j] -= this.learningRate * (gradients[j] / batchSize);
|
|
438
|
+
}
|
|
439
|
+
batchCount++;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const avgLoss = trainFeatures.length > 0 ? epochLoss / trainFeatures.length : 0;
|
|
443
|
+
|
|
444
|
+
// Validation loss
|
|
445
|
+
let valLoss = 0;
|
|
446
|
+
if (valFeatures.length > 0) {
|
|
447
|
+
for (let i = 0; i < valFeatures.length; i++) {
|
|
448
|
+
let z = 0;
|
|
449
|
+
for (let j = 0; j < numFeatures; j++) {
|
|
450
|
+
z += weights[j] * valFeatures[i][j];
|
|
451
|
+
}
|
|
452
|
+
const yhat = _sigmoid(z);
|
|
453
|
+
const clampedYhat = Math.max(1e-7, Math.min(1 - 1e-7, yhat));
|
|
454
|
+
valLoss += -(valLabels[i] * Math.log(clampedYhat) + (1 - valLabels[i]) * Math.log(1 - clampedYhat));
|
|
455
|
+
}
|
|
456
|
+
valLoss /= valFeatures.length;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
lossHistory.push({ epoch: epoch + 1, trainLoss: avgLoss, valLoss });
|
|
460
|
+
console.log(`[Agent Shield] Epoch ${epoch + 1}/${this.epochs} — train_loss: ${avgLoss.toFixed(4)}, val_loss: ${valLoss.toFixed(4)}`);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
const model = new FineTunedModel(weights, vocabulary, {
|
|
464
|
+
learningRate: this.learningRate,
|
|
465
|
+
epochs: this.epochs,
|
|
466
|
+
batchSize: this.batchSize,
|
|
467
|
+
trainingSamples: trainFeatures.length,
|
|
468
|
+
validationSamples: valFeatures.length,
|
|
469
|
+
lossHistory
|
|
470
|
+
});
|
|
471
|
+
|
|
472
|
+
console.log('[Agent Shield] Training complete');
|
|
473
|
+
return model;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
/**
|
|
477
|
+
* Build vocabulary from dataset (unique tokens sorted by frequency).
|
|
478
|
+
* @private
|
|
479
|
+
*/
|
|
480
|
+
_buildVocabulary(dataset) {
|
|
481
|
+
const freq = {};
|
|
482
|
+
for (const sample of dataset) {
|
|
483
|
+
const tokens = new Set(_tokenize(sample.text));
|
|
484
|
+
for (const token of tokens) {
|
|
485
|
+
freq[token] = (freq[token] || 0) + 1;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
// Filter: keep tokens appearing in at least 2 documents, max 5000 terms
|
|
490
|
+
return Object.entries(freq)
|
|
491
|
+
.filter(([, count]) => count >= 2)
|
|
492
|
+
.sort((a, b) => b[1] - a[1])
|
|
493
|
+
.slice(0, 5000)
|
|
494
|
+
.map(([term]) => term);
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
/**
|
|
498
|
+
* Compute IDF (inverse document frequency) for each vocabulary term.
|
|
499
|
+
* @private
|
|
500
|
+
*/
|
|
501
|
+
_computeIDF(dataset, vocabulary) {
|
|
502
|
+
const docCount = dataset.length;
|
|
503
|
+
const idf = {};
|
|
504
|
+
|
|
505
|
+
for (const term of vocabulary) {
|
|
506
|
+
let df = 0;
|
|
507
|
+
for (const sample of dataset) {
|
|
508
|
+
if (sample.text.toLowerCase().includes(term)) {
|
|
509
|
+
df++;
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
idf[term] = Math.log((docCount + 1) / (df + 1)) + 1; // smoothed IDF
|
|
513
|
+
}
|
|
514
|
+
|
|
515
|
+
return idf;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/**
|
|
519
|
+
* Build TF-IDF feature matrix and label vector.
|
|
520
|
+
* @private
|
|
521
|
+
*/
|
|
522
|
+
_buildFeatureMatrix(dataset, vocabulary, idf) {
|
|
523
|
+
const features = [];
|
|
524
|
+
const labels = [];
|
|
525
|
+
|
|
526
|
+
for (const sample of dataset) {
|
|
527
|
+
const tokens = _tokenize(sample.text);
|
|
528
|
+
const termFreq = {};
|
|
529
|
+
for (const token of tokens) {
|
|
530
|
+
termFreq[token] = (termFreq[token] || 0) + 1;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
const totalTokens = tokens.length || 1;
|
|
534
|
+
const featureVec = new Array(vocabulary.length + 1);
|
|
535
|
+
|
|
536
|
+
for (let i = 0; i < vocabulary.length; i++) {
|
|
537
|
+
const term = vocabulary[i];
|
|
538
|
+
const tf = (termFreq[term] || 0) / totalTokens;
|
|
539
|
+
featureVec[i] = tf * (idf[term] || 0);
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
// Bias term
|
|
543
|
+
featureVec[vocabulary.length] = 1;
|
|
544
|
+
|
|
545
|
+
features.push(featureVec);
|
|
546
|
+
labels.push(sample.label === 'attack' ? 1 : 0);
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
return { features, labels };
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
// =========================================================================
|
|
554
|
+
// ModelEvaluator
|
|
555
|
+
// =========================================================================
|
|
556
|
+
|
|
557
|
+
/**
|
|
558
|
+
* Evaluates a fine-tuned model and computes classification metrics.
|
|
559
|
+
*/
|
|
560
|
+
class ModelEvaluator {
|
|
561
|
+
constructor() {
|
|
562
|
+
this.lastReport = null;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
/**
|
|
566
|
+
* Evaluate a model on a test set.
|
|
567
|
+
* @param {FineTunedModel} model - Trained model
|
|
568
|
+
* @param {Array<{text: string, label: string}>} testSet - Test samples
|
|
569
|
+
* @returns {Object} Evaluation metrics
|
|
570
|
+
*/
|
|
571
|
+
evaluate(model, testSet) {
|
|
572
|
+
if (!model || !testSet || testSet.length === 0) {
|
|
573
|
+
throw new Error('[Agent Shield] Model and non-empty test set required');
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
let tp = 0, fp = 0, tn = 0, fn = 0;
|
|
577
|
+
const predictions = [];
|
|
578
|
+
|
|
579
|
+
for (const sample of testSet) {
|
|
580
|
+
const prediction = model.predict(sample.text);
|
|
581
|
+
const actual = sample.label;
|
|
582
|
+
const predicted = prediction.label;
|
|
583
|
+
|
|
584
|
+
predictions.push({
|
|
585
|
+
text: sample.text.slice(0, 80),
|
|
586
|
+
actual,
|
|
587
|
+
predicted,
|
|
588
|
+
confidence: prediction.confidence
|
|
589
|
+
});
|
|
590
|
+
|
|
591
|
+
if (actual === 'attack' && predicted === 'attack') tp++;
|
|
592
|
+
else if (actual === 'benign' && predicted === 'attack') fp++;
|
|
593
|
+
else if (actual === 'benign' && predicted === 'benign') tn++;
|
|
594
|
+
else if (actual === 'attack' && predicted === 'benign') fn++;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
const accuracy = testSet.length > 0 ? (tp + tn) / testSet.length : 0;
|
|
598
|
+
const precision = (tp + fp) > 0 ? tp / (tp + fp) : 0;
|
|
599
|
+
const recall = (tp + fn) > 0 ? tp / (tp + fn) : 0;
|
|
600
|
+
const f1 = (precision + recall) > 0
|
|
601
|
+
? 2 * (precision * recall) / (precision + recall)
|
|
602
|
+
: 0;
|
|
603
|
+
|
|
604
|
+
// Compute ROC AUC approximation
|
|
605
|
+
const roc_auc = this._computeAUC(predictions);
|
|
606
|
+
|
|
607
|
+
const result = {
|
|
608
|
+
accuracy,
|
|
609
|
+
precision,
|
|
610
|
+
recall,
|
|
611
|
+
f1,
|
|
612
|
+
confusionMatrix: { tp, fp, tn, fn },
|
|
613
|
+
roc_auc,
|
|
614
|
+
totalSamples: testSet.length,
|
|
615
|
+
predictions
|
|
616
|
+
};
|
|
617
|
+
|
|
618
|
+
this.lastReport = result;
|
|
619
|
+
return result;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
/**
|
|
623
|
+
* Generate a formatted text report from the last evaluation.
|
|
624
|
+
* @returns {string} Formatted report
|
|
625
|
+
*/
|
|
626
|
+
generateReport() {
|
|
627
|
+
if (!this.lastReport) {
|
|
628
|
+
return '[Agent Shield] No evaluation has been run yet.';
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
const r = this.lastReport;
|
|
632
|
+
const cm = r.confusionMatrix;
|
|
633
|
+
|
|
634
|
+
const lines = [
|
|
635
|
+
'=== Agent Shield — Model Evaluation Report ===',
|
|
636
|
+
'',
|
|
637
|
+
`Samples evaluated: ${r.totalSamples}`,
|
|
638
|
+
'',
|
|
639
|
+
'Metrics:',
|
|
640
|
+
` Accuracy: ${(r.accuracy * 100).toFixed(2)}%`,
|
|
641
|
+
` Precision: ${(r.precision * 100).toFixed(2)}%`,
|
|
642
|
+
` Recall: ${(r.recall * 100).toFixed(2)}%`,
|
|
643
|
+
` F1 Score: ${(r.f1 * 100).toFixed(2)}%`,
|
|
644
|
+
` ROC AUC: ${r.roc_auc.toFixed(4)}`,
|
|
645
|
+
'',
|
|
646
|
+
'Confusion Matrix:',
|
|
647
|
+
` Predicted Attack Predicted Benign`,
|
|
648
|
+
` Actual Attack ${String(cm.tp).padStart(5)} ${String(cm.fn).padStart(5)}`,
|
|
649
|
+
` Actual Benign ${String(cm.fp).padStart(5)} ${String(cm.tn).padStart(5)}`,
|
|
650
|
+
'',
|
|
651
|
+
'=== End of Report ==='
|
|
652
|
+
];
|
|
653
|
+
|
|
654
|
+
return lines.join('\n');
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
/**
|
|
658
|
+
* Approximate AUC using the trapezoidal rule on sorted predictions.
|
|
659
|
+
* @private
|
|
660
|
+
*/
|
|
661
|
+
_computeAUC(predictions) {
|
|
662
|
+
if (predictions.length === 0) return 0;
|
|
663
|
+
|
|
664
|
+
// Sort by confidence descending (for attack predictions) / ascending (for benign)
|
|
665
|
+
const scored = predictions.map(p => ({
|
|
666
|
+
actual: p.actual === 'attack' ? 1 : 0,
|
|
667
|
+
score: p.predicted === 'attack' ? p.confidence : 1 - p.confidence
|
|
668
|
+
}));
|
|
669
|
+
scored.sort((a, b) => b.score - a.score);
|
|
670
|
+
|
|
671
|
+
const totalPositive = scored.filter(s => s.actual === 1).length;
|
|
672
|
+
const totalNegative = scored.filter(s => s.actual === 0).length;
|
|
673
|
+
|
|
674
|
+
if (totalPositive === 0 || totalNegative === 0) return 0.5;
|
|
675
|
+
|
|
676
|
+
let tpr = 0, fpr = 0, prevTpr = 0, prevFpr = 0;
|
|
677
|
+
let auc = 0;
|
|
678
|
+
|
|
679
|
+
for (const item of scored) {
|
|
680
|
+
if (item.actual === 1) {
|
|
681
|
+
tpr += 1 / totalPositive;
|
|
682
|
+
} else {
|
|
683
|
+
fpr += 1 / totalNegative;
|
|
684
|
+
}
|
|
685
|
+
|
|
686
|
+
// Trapezoidal rule
|
|
687
|
+
auc += (fpr - prevFpr) * (tpr + prevTpr) / 2;
|
|
688
|
+
prevTpr = tpr;
|
|
689
|
+
prevFpr = fpr;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
return auc;
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
// =========================================================================
|
|
697
|
+
// TrainingPipeline
|
|
698
|
+
// =========================================================================
|
|
699
|
+
|
|
700
|
+
/**
|
|
701
|
+
* End-to-end pipeline for collecting, processing, training, and evaluating
|
|
702
|
+
* a fine-tuned detection model.
|
|
703
|
+
*/
|
|
704
|
+
class TrainingPipeline {
|
|
705
|
+
/**
|
|
706
|
+
* @param {Object} [config] - Pipeline configuration passed to ModelTrainer
|
|
707
|
+
*/
|
|
708
|
+
constructor(config = {}) {
|
|
709
|
+
this.config = config;
|
|
710
|
+
this.stages = [];
|
|
711
|
+
this.report = null;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
/**
|
|
715
|
+
* Add a custom processing stage to the pipeline.
|
|
716
|
+
* @param {string} name - Stage name
|
|
717
|
+
* @param {Function} fn - Stage function (receives data, returns transformed data)
|
|
718
|
+
* @returns {TrainingPipeline} this
|
|
719
|
+
*/
|
|
720
|
+
addStage(name, fn) {
|
|
721
|
+
if (typeof fn !== 'function') {
|
|
722
|
+
throw new Error(`[Agent Shield] Stage "${name}" must be a function`);
|
|
723
|
+
}
|
|
724
|
+
this.stages.push({ name, fn });
|
|
725
|
+
return this;
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
/**
|
|
729
|
+
* Run the full pipeline: collect -> preprocess -> augment -> train -> evaluate -> export.
|
|
730
|
+
* @param {Array} rawData - Raw training data (array of {text, label} or scan results)
|
|
731
|
+
* @returns {Object} Pipeline result with model, evaluation, and export
|
|
732
|
+
*/
|
|
733
|
+
run(rawData) {
|
|
734
|
+
const startTime = Date.now();
|
|
735
|
+
const stageResults = [];
|
|
736
|
+
|
|
737
|
+
console.log('[Agent Shield] Training pipeline started');
|
|
738
|
+
|
|
739
|
+
// Stage 1: Collect
|
|
740
|
+
let data = rawData;
|
|
741
|
+
stageResults.push({ stage: 'collect', samples: data.length, duration: 0 });
|
|
742
|
+
|
|
743
|
+
// Stage 2: Preprocess
|
|
744
|
+
const preprocessStart = Date.now();
|
|
745
|
+
const dataset = new DatasetManager();
|
|
746
|
+
for (const item of data) {
|
|
747
|
+
const text = item.text || item.input || item.prompt || '';
|
|
748
|
+
const label = item.label || (item.blocked || item.threat ? 'attack' : 'benign');
|
|
749
|
+
if (text) {
|
|
750
|
+
dataset.addSample(text, label, item.metadata);
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
stageResults.push({
|
|
754
|
+
stage: 'preprocess',
|
|
755
|
+
samples: dataset.samples.length,
|
|
756
|
+
duration: Date.now() - preprocessStart
|
|
757
|
+
});
|
|
758
|
+
|
|
759
|
+
// Stage 3: Augment
|
|
760
|
+
const augmentStart = Date.now();
|
|
761
|
+
dataset.augment();
|
|
762
|
+
stageResults.push({
|
|
763
|
+
stage: 'augment',
|
|
764
|
+
samples: dataset.samples.length,
|
|
765
|
+
duration: Date.now() - augmentStart
|
|
766
|
+
});
|
|
767
|
+
|
|
768
|
+
// Run custom stages
|
|
769
|
+
let pipelineData = dataset.samples;
|
|
770
|
+
for (const stage of this.stages) {
|
|
771
|
+
const stageStart = Date.now();
|
|
772
|
+
pipelineData = stage.fn(pipelineData);
|
|
773
|
+
stageResults.push({
|
|
774
|
+
stage: stage.name,
|
|
775
|
+
samples: Array.isArray(pipelineData) ? pipelineData.length : 'N/A',
|
|
776
|
+
duration: Date.now() - stageStart
|
|
777
|
+
});
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
// Stage 4: Train
|
|
781
|
+
const trainStart = Date.now();
|
|
782
|
+
const trainer = new ModelTrainer(this.config);
|
|
783
|
+
const trainingData = Array.isArray(pipelineData) ? pipelineData : dataset.samples;
|
|
784
|
+
const model = trainer.train(trainingData);
|
|
785
|
+
stageResults.push({
|
|
786
|
+
stage: 'train',
|
|
787
|
+
samples: trainingData.length,
|
|
788
|
+
duration: Date.now() - trainStart
|
|
789
|
+
});
|
|
790
|
+
|
|
791
|
+
// Stage 5: Evaluate
|
|
792
|
+
const evalStart = Date.now();
|
|
793
|
+
const splits = dataset.split(0.8);
|
|
794
|
+
const evaluator = new ModelEvaluator();
|
|
795
|
+
const evaluation = evaluator.evaluate(model, splits.test.length > 0 ? splits.test : splits.validation);
|
|
796
|
+
stageResults.push({
|
|
797
|
+
stage: 'evaluate',
|
|
798
|
+
samples: (splits.test.length > 0 ? splits.test : splits.validation).length,
|
|
799
|
+
duration: Date.now() - evalStart
|
|
800
|
+
});
|
|
801
|
+
|
|
802
|
+
// Stage 6: Export
|
|
803
|
+
const exportStart = Date.now();
|
|
804
|
+
const exported = model.export();
|
|
805
|
+
stageResults.push({
|
|
806
|
+
stage: 'export',
|
|
807
|
+
samples: 1,
|
|
808
|
+
duration: Date.now() - exportStart
|
|
809
|
+
});
|
|
810
|
+
|
|
811
|
+
const totalDuration = Date.now() - startTime;
|
|
812
|
+
|
|
813
|
+
this.report = {
|
|
814
|
+
stages: stageResults,
|
|
815
|
+
totalDuration,
|
|
816
|
+
datasetStats: dataset.getStats(),
|
|
817
|
+
evaluation: {
|
|
818
|
+
accuracy: evaluation.accuracy,
|
|
819
|
+
precision: evaluation.precision,
|
|
820
|
+
recall: evaluation.recall,
|
|
821
|
+
f1: evaluation.f1,
|
|
822
|
+
roc_auc: evaluation.roc_auc
|
|
823
|
+
},
|
|
824
|
+
completedAt: new Date().toISOString()
|
|
825
|
+
};
|
|
826
|
+
|
|
827
|
+
console.log(`[Agent Shield] Training pipeline complete in ${totalDuration}ms`);
|
|
828
|
+
|
|
829
|
+
return {
|
|
830
|
+
model,
|
|
831
|
+
evaluation,
|
|
832
|
+
exported,
|
|
833
|
+
report: this.report
|
|
834
|
+
};
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
/**
|
|
838
|
+
* Get the pipeline execution report.
|
|
839
|
+
* @returns {Object|null} Report from the last run
|
|
840
|
+
*/
|
|
841
|
+
getReport() {
|
|
842
|
+
return this.report;
|
|
843
|
+
}
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
// =========================================================================
|
|
847
|
+
// Shared Utilities
|
|
848
|
+
// =========================================================================
|
|
849
|
+
|
|
850
|
+
/**
|
|
851
|
+
* Tokenize text into lowercase terms.
|
|
852
|
+
* @param {string} text
|
|
853
|
+
* @returns {string[]}
|
|
854
|
+
*/
|
|
855
|
+
function _tokenize(text) {
|
|
856
|
+
return text
|
|
857
|
+
.toLowerCase()
|
|
858
|
+
.replace(/[^a-z0-9\s_-]/g, ' ')
|
|
859
|
+
.split(/\s+/)
|
|
860
|
+
.filter(t => t.length > 1);
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
/**
|
|
864
|
+
* Sigmoid function.
|
|
865
|
+
* @param {number} z
|
|
866
|
+
* @returns {number}
|
|
867
|
+
*/
|
|
868
|
+
function _sigmoid(z) {
|
|
869
|
+
if (z > 500) return 1;
|
|
870
|
+
if (z < -500) return 0;
|
|
871
|
+
return 1 / (1 + Math.exp(-z));
|
|
872
|
+
}
|
|
873
|
+
|
|
874
|
+
// =========================================================================
|
|
875
|
+
// Exports
|
|
876
|
+
// =========================================================================
|
|
877
|
+
|
|
878
|
+
module.exports = {
|
|
879
|
+
ModelTrainer,
|
|
880
|
+
TrainingPipeline,
|
|
881
|
+
DatasetManager,
|
|
882
|
+
ModelEvaluator,
|
|
883
|
+
FineTunedModel
|
|
884
|
+
};
|