agentshield-sdk 11.0.0 → 13.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +88 -79
- package/package.json +2 -2
- package/src/agent-intent.js +359 -672
- package/src/cross-turn.js +217 -564
- package/src/detector-core.js +106 -0
- package/src/ensemble.js +300 -409
- package/src/fleet-defense.js +483 -0
- package/src/hitl-guard.js +487 -0
- package/src/incident-response.js +265 -0
- package/src/main.js +121 -33
- package/src/mcp-guard.js +4 -0
- package/src/memory-guard.js +637 -0
- package/src/micro-model.js +15 -1
- package/src/ml-detector.js +110 -266
- package/src/normalizer.js +296 -604
- package/src/persistent-learning.js +104 -620
- package/src/semantic-guard.js +452 -0
- package/src/semantic-isolation.js +1 -0
- package/src/smart-config.js +557 -705
- package/src/sota-benchmark.js +268 -10
- package/src/trap-defense.js +468 -0
- package/types/index.d.ts +251 -580
package/src/agent-intent.js
CHANGED
|
@@ -1,798 +1,484 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
|
-
* Agent Shield — Agent
|
|
4
|
+
* Agent Shield — Agent Behavioral Fingerprinting (v12.0)
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Captures an agent's normal behavior profile by tracking tool call frequency,
|
|
7
|
+
* argument patterns, response patterns, and timing profiles. Generates a
|
|
8
|
+
* portable fingerprint hash for comparison and compromise detection.
|
|
9
9
|
*
|
|
10
|
-
*
|
|
11
|
-
* - AgentIntent — static declaration of purpose, allowed tools, allowed topics.
|
|
12
|
-
* - GoalDriftDetector — monitors a conversation for drift over time.
|
|
13
|
-
* - ToolSequenceModeler — learns bigram tool transitions, flags anomalies.
|
|
14
|
-
*
|
|
15
|
-
* Zero dependencies, local-only. All detection runs via TF-IDF cosine
|
|
16
|
-
* similarity and simple Markov chains — no ML libraries required.
|
|
10
|
+
* All detection runs locally — no data ever leaves your environment.
|
|
17
11
|
*
|
|
18
12
|
* @module agent-intent
|
|
19
13
|
*/
|
|
20
14
|
|
|
15
|
+
const crypto = require('crypto');
|
|
16
|
+
|
|
21
17
|
// =========================================================================
|
|
22
|
-
//
|
|
18
|
+
// CONSTANTS
|
|
23
19
|
// =========================================================================
|
|
24
20
|
|
|
25
|
-
/**
|
|
26
|
-
const
|
|
27
|
-
'the', 'be', 'to', 'of', 'and', 'in', 'that', 'have', 'it', 'for',
|
|
28
|
-
'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at', 'this', 'but',
|
|
29
|
-
'his', 'by', 'from', 'they', 'we', 'say', 'her', 'she', 'or', 'an',
|
|
30
|
-
'will', 'my', 'one', 'all', 'would', 'there', 'their', 'what', 'so',
|
|
31
|
-
'up', 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me',
|
|
32
|
-
'when', 'make', 'can', 'like', 'no', 'just', 'him', 'know', 'take',
|
|
33
|
-
'into', 'your', 'some', 'could', 'them', 'see', 'other', 'than',
|
|
34
|
-
'then', 'now', 'look', 'only', 'come', 'its', 'over', 'also', 'back',
|
|
35
|
-
'after', 'use', 'how', 'our', 'well', 'way', 'even', 'new', 'want',
|
|
36
|
-
'because', 'any', 'these', 'give', 'most', 'us', 'is', 'are', 'was',
|
|
37
|
-
'were', 'been', 'has', 'had', 'did', 'am',
|
|
38
|
-
]);
|
|
21
|
+
/** Default deviation threshold for compromise detection (z-score). */
|
|
22
|
+
const DEFAULT_DEVIATION_THRESHOLD = 2.5;
|
|
39
23
|
|
|
40
|
-
/**
|
|
41
|
-
|
|
42
|
-
* Not a full Porter stemmer, but good enough for TF-IDF matching.
|
|
43
|
-
* @param {string} word
|
|
44
|
-
* @returns {string}
|
|
45
|
-
*/
|
|
46
|
-
function stem(word) {
|
|
47
|
-
if (word.length <= 3) return word;
|
|
48
|
-
// Handle -ies -> -y (e.g. itineraries -> itinerary, cities -> city)
|
|
49
|
-
if (word.endsWith('ies') && word.length > 4) {
|
|
50
|
-
return word.slice(0, -3) + 'y';
|
|
51
|
-
}
|
|
52
|
-
// Order matters: try longest suffixes first
|
|
53
|
-
const suffixes = [
|
|
54
|
-
'ational', 'tional', 'encies', 'ances', 'ments', 'ating',
|
|
55
|
-
'ation', 'aries', 'ness', 'ment', 'ings', 'ible', 'able',
|
|
56
|
-
'ence', 'ance', 'ious', 'eous', 'less', 'ting', 'ally', 'ful',
|
|
57
|
-
'ing', 'ary', 'ely', 'ers', 'ion', 'ous', 'ive',
|
|
58
|
-
'ed', 'ly', 'es', 'er', 'al', 'ty',
|
|
59
|
-
's'
|
|
60
|
-
];
|
|
61
|
-
for (const suffix of suffixes) {
|
|
62
|
-
if (word.endsWith(suffix) && word.length - suffix.length >= 2) {
|
|
63
|
-
return word.slice(0, -suffix.length);
|
|
64
|
-
}
|
|
65
|
-
}
|
|
66
|
-
return word;
|
|
67
|
-
}
|
|
24
|
+
/** Minimum observations before fingerprint is considered stable. */
|
|
25
|
+
const MIN_OBSERVATIONS = 10;
|
|
68
26
|
|
|
69
|
-
/**
|
|
70
|
-
|
|
71
|
-
* @param {string} text
|
|
72
|
-
* @returns {string[]}
|
|
73
|
-
*/
|
|
74
|
-
function tokenize(text) {
|
|
75
|
-
if (!text) return [];
|
|
76
|
-
if (typeof text !== 'string') text = String(text);
|
|
77
|
-
return text.toLowerCase()
|
|
78
|
-
.replace(/[^a-z0-9\s]/g, ' ')
|
|
79
|
-
.split(/\s+/)
|
|
80
|
-
.filter(w => w.length > 1);
|
|
81
|
-
}
|
|
27
|
+
/** Maximum history entries per metric to prevent unbounded growth. */
|
|
28
|
+
const MAX_HISTORY = 10000;
|
|
82
29
|
|
|
83
|
-
/**
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
.filter(w => !STOP_WORDS.has(w))
|
|
91
|
-
.map(w => stem(w));
|
|
92
|
-
}
|
|
30
|
+
/** Similarity score thresholds. */
|
|
31
|
+
const SIMILARITY_THRESHOLDS = {
|
|
32
|
+
identical: 0.95,
|
|
33
|
+
similar: 0.75,
|
|
34
|
+
related: 0.50,
|
|
35
|
+
different: 0.25
|
|
36
|
+
};
|
|
93
37
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
* @returns {Map<string, number>}
|
|
98
|
-
*/
|
|
99
|
-
function termFrequency(tokens) {
|
|
100
|
-
const tf = new Map();
|
|
101
|
-
if (tokens.length === 0) return tf;
|
|
102
|
-
for (const t of tokens) {
|
|
103
|
-
tf.set(t, (tf.get(t) || 0) + 1);
|
|
104
|
-
}
|
|
105
|
-
for (const [k, v] of tf) {
|
|
106
|
-
tf.set(k, v / tokens.length);
|
|
107
|
-
}
|
|
108
|
-
return tf;
|
|
109
|
-
}
|
|
38
|
+
// =========================================================================
|
|
39
|
+
// UTILITY FUNCTIONS
|
|
40
|
+
// =========================================================================
|
|
110
41
|
|
|
111
42
|
/**
|
|
112
|
-
*
|
|
113
|
-
* @param {
|
|
114
|
-
* @returns {
|
|
43
|
+
* Compute mean of an array of numbers.
|
|
44
|
+
* @param {number[]} arr
|
|
45
|
+
* @returns {number}
|
|
115
46
|
*/
|
|
116
|
-
function
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
for (
|
|
120
|
-
|
|
121
|
-
for (const t of seen) {
|
|
122
|
-
df.set(t, (df.get(t) || 0) + 1);
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
const idf = new Map();
|
|
126
|
-
for (const [term, count] of df) {
|
|
127
|
-
idf.set(term, Math.log((n + 1) / (count + 1)) + 1);
|
|
128
|
-
}
|
|
129
|
-
return idf;
|
|
47
|
+
function mean(arr) {
|
|
48
|
+
if (!arr || arr.length === 0) return 0;
|
|
49
|
+
let sum = 0;
|
|
50
|
+
for (let i = 0; i < arr.length; i++) sum += arr[i];
|
|
51
|
+
return sum / arr.length;
|
|
130
52
|
}
|
|
131
53
|
|
|
132
54
|
/**
|
|
133
|
-
*
|
|
134
|
-
* @param {
|
|
135
|
-
* @
|
|
136
|
-
* @returns {Map<string, number>}
|
|
55
|
+
* Compute standard deviation of an array of numbers.
|
|
56
|
+
* @param {number[]} arr
|
|
57
|
+
* @returns {number}
|
|
137
58
|
*/
|
|
138
|
-
function
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
59
|
+
function stddev(arr) {
|
|
60
|
+
if (!arr || arr.length < 2) return 0;
|
|
61
|
+
const m = mean(arr);
|
|
62
|
+
let sumSq = 0;
|
|
63
|
+
for (let i = 0; i < arr.length; i++) {
|
|
64
|
+
const d = arr[i] - m;
|
|
65
|
+
sumSq += d * d;
|
|
144
66
|
}
|
|
145
|
-
return
|
|
67
|
+
return Math.sqrt(sumSq / (arr.length - 1));
|
|
146
68
|
}
|
|
147
69
|
|
|
148
70
|
/**
|
|
149
|
-
*
|
|
71
|
+
* Compute cosine similarity between two frequency maps.
|
|
150
72
|
* @param {Map<string, number>} a
|
|
151
73
|
* @param {Map<string, number>} b
|
|
152
|
-
* @returns {number} 0
|
|
74
|
+
* @returns {number} 0..1
|
|
153
75
|
*/
|
|
154
|
-
function
|
|
155
|
-
|
|
76
|
+
function cosineSimilarity(a, b) {
|
|
77
|
+
if (a.size === 0 && b.size === 0) return 1;
|
|
78
|
+
if (a.size === 0 || b.size === 0) return 0;
|
|
79
|
+
|
|
156
80
|
const keys = new Set([...a.keys(), ...b.keys()]);
|
|
81
|
+
let dot = 0;
|
|
82
|
+
let magA = 0;
|
|
83
|
+
let magB = 0;
|
|
84
|
+
|
|
157
85
|
for (const k of keys) {
|
|
158
86
|
const va = a.get(k) || 0;
|
|
159
87
|
const vb = b.get(k) || 0;
|
|
160
88
|
dot += va * vb;
|
|
161
|
-
|
|
162
|
-
|
|
89
|
+
magA += va * va;
|
|
90
|
+
magB += vb * vb;
|
|
163
91
|
}
|
|
164
|
-
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
165
|
-
if (!isFinite(denom) || denom === 0) return 0;
|
|
166
|
-
const result = dot / denom;
|
|
167
|
-
return isFinite(result) ? result : 0;
|
|
168
|
-
}
|
|
169
92
|
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
93
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
94
|
+
return denom === 0 ? 0 : dot / denom;
|
|
95
|
+
}
|
|
173
96
|
|
|
174
97
|
/**
|
|
175
|
-
*
|
|
176
|
-
*
|
|
98
|
+
* Jensen-Shannon divergence between two distributions (lower = more similar).
|
|
99
|
+
* @param {Map<string, number>} p
|
|
100
|
+
* @param {Map<string, number>} q
|
|
101
|
+
* @returns {number} 0..1
|
|
177
102
|
*/
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
throw new Error('[Agent Shield] AgentIntent requires a purpose string');
|
|
190
|
-
}
|
|
191
|
-
this.purpose = config.purpose;
|
|
192
|
-
this.allowedTools = config.allowedTools || null;
|
|
193
|
-
this.allowedTopics = config.allowedTopics || null;
|
|
194
|
-
this.maxDriftScore = typeof config.maxDriftScore === 'number' ? config.maxDriftScore : 0.7;
|
|
195
|
-
this.onDrift = config.onDrift || null;
|
|
196
|
-
|
|
197
|
-
// Pre-compute purpose tokens and TF vector
|
|
198
|
-
this._purposeTokens = tokenizeForTfIdf(this.purpose);
|
|
199
|
-
|
|
200
|
-
// Build topic tokens from allowedTopics
|
|
201
|
-
this._topicTokens = [];
|
|
202
|
-
if (this.allowedTopics && this.allowedTopics.length > 0) {
|
|
203
|
-
for (const topic of this.allowedTopics) {
|
|
204
|
-
this._topicTokens.push(...tokenizeForTfIdf(topic));
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
// Combined purpose + topics tokens for broader matching
|
|
209
|
-
this._allPurposeTokens = [...this._purposeTokens, ...this._topicTokens];
|
|
210
|
-
|
|
211
|
-
console.log(`[Agent Shield] AgentIntent created: "${this.purpose.substring(0, 80)}"`);
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
/**
|
|
215
|
-
* Check if a user message is on-topic for this agent's purpose.
|
|
216
|
-
* Uses TF-IDF cosine similarity between purpose and message.
|
|
217
|
-
* @param {string} message - User message
|
|
218
|
-
* @returns {object} { onTopic: bool, relevanceScore: number 0-1, drift: number 0-1, reason: string }
|
|
219
|
-
*/
|
|
220
|
-
checkMessage(message) {
|
|
221
|
-
if (!message || typeof message !== 'string' || message.trim().length === 0) {
|
|
222
|
-
return { onTopic: true, relevanceScore: 0, drift: 1, reason: 'Empty message' };
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
const msgTokens = tokenizeForTfIdf(message);
|
|
226
|
-
if (msgTokens.length === 0) {
|
|
227
|
-
return { onTopic: true, relevanceScore: 0, drift: 1, reason: 'No meaningful tokens in message' };
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
// Build IDF from purpose + message as two documents
|
|
231
|
-
const docs = [this._allPurposeTokens, msgTokens];
|
|
232
|
-
const idf = buildIdf(docs);
|
|
233
|
-
|
|
234
|
-
// Build TF-IDF vectors
|
|
235
|
-
const purposeVec = tfidfVector(this._allPurposeTokens, idf);
|
|
236
|
-
const msgVec = tfidfVector(msgTokens, idf);
|
|
237
|
-
|
|
238
|
-
// TF-IDF cosine similarity
|
|
239
|
-
const cosSim = cosineSim(purposeVec, msgVec);
|
|
240
|
-
|
|
241
|
-
// Term frequency cosine (no IDF) — better for short text vs fixed reference
|
|
242
|
-
const purposeTf = termFrequency(this._allPurposeTokens);
|
|
243
|
-
const msgTf = termFrequency(msgTokens);
|
|
244
|
-
const tfSim = cosineSim(purposeTf, msgTf);
|
|
245
|
-
|
|
246
|
-
// Message coverage: fraction of message tokens matching purpose vocabulary
|
|
247
|
-
const purposeSet = new Set(this._allPurposeTokens);
|
|
248
|
-
const overlapCount = msgTokens.filter(t => purposeSet.has(t)).length;
|
|
249
|
-
const coverageRatio = msgTokens.length > 0 ? overlapCount / msgTokens.length : 0;
|
|
250
|
-
|
|
251
|
-
// Blend: 25% TF-IDF cosine + 25% TF cosine + 50% coverage
|
|
252
|
-
// Coverage dominates because for intent checking, the key question is:
|
|
253
|
-
// "how much of the user's message uses purpose-related vocabulary?"
|
|
254
|
-
const relevanceScore = (cosSim * 0.25) + (tfSim * 0.25) + (coverageRatio * 0.5);
|
|
255
|
-
const drift = 1 - relevanceScore;
|
|
256
|
-
const onTopic = drift <= this.maxDriftScore;
|
|
257
|
-
|
|
258
|
-
let reason;
|
|
259
|
-
if (onTopic) {
|
|
260
|
-
reason = `Message is on-topic (relevance: ${(relevanceScore * 100).toFixed(1)}%)`;
|
|
261
|
-
} else {
|
|
262
|
-
reason = `Message drifted from purpose (relevance: ${(relevanceScore * 100).toFixed(1)}%, threshold: ${((1 - this.maxDriftScore) * 100).toFixed(1)}%)`;
|
|
263
|
-
}
|
|
264
|
-
|
|
265
|
-
if (!onTopic && this.onDrift) {
|
|
266
|
-
try {
|
|
267
|
-
this.onDrift({ message: message.substring(0, 200), drift, relevanceScore, reason });
|
|
268
|
-
} catch (e) {
|
|
269
|
-
console.error('[Agent Shield] onDrift callback error:', e.message);
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
|
|
273
|
-
return { onTopic, relevanceScore, drift, reason };
|
|
103
|
+
function jsDivergence(p, q) {
|
|
104
|
+
const keys = new Set([...p.keys(), ...q.keys()]);
|
|
105
|
+
const total = keys.size;
|
|
106
|
+
if (total === 0) return 0;
|
|
107
|
+
|
|
108
|
+
// Normalize to probability distributions
|
|
109
|
+
let sumP = 0;
|
|
110
|
+
let sumQ = 0;
|
|
111
|
+
for (const k of keys) {
|
|
112
|
+
sumP += p.get(k) || 0;
|
|
113
|
+
sumQ += q.get(k) || 0;
|
|
274
114
|
}
|
|
115
|
+
if (sumP === 0 && sumQ === 0) return 0;
|
|
116
|
+
if (sumP === 0 || sumQ === 0) return 1;
|
|
275
117
|
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
if (!toolName || typeof toolName !== 'string') {
|
|
284
|
-
return { allowed: false, reason: 'Invalid tool name' };
|
|
285
|
-
}
|
|
286
|
-
|
|
287
|
-
// If no allowedTools specified, everything is allowed
|
|
288
|
-
if (!this.allowedTools) {
|
|
289
|
-
return { allowed: true, reason: 'No tool restrictions defined' };
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
const normalizedName = toolName.toLowerCase().trim();
|
|
293
|
-
const allowed = this.allowedTools.some(t => t.toLowerCase().trim() === normalizedName);
|
|
294
|
-
|
|
295
|
-
if (allowed) {
|
|
296
|
-
return { allowed: true, reason: `Tool "${toolName}" is in the allowed list` };
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
return {
|
|
300
|
-
allowed: false,
|
|
301
|
-
reason: `Tool "${toolName}" is not in the allowed list [${this.allowedTools.join(', ')}]`
|
|
302
|
-
};
|
|
118
|
+
let jsd = 0;
|
|
119
|
+
for (const k of keys) {
|
|
120
|
+
const pi = (p.get(k) || 0) / sumP;
|
|
121
|
+
const qi = (q.get(k) || 0) / sumQ;
|
|
122
|
+
const mi = (pi + qi) / 2;
|
|
123
|
+
if (pi > 0 && mi > 0) jsd += 0.5 * pi * Math.log2(pi / mi);
|
|
124
|
+
if (qi > 0 && mi > 0) jsd += 0.5 * qi * Math.log2(qi / mi);
|
|
303
125
|
}
|
|
304
126
|
|
|
305
|
-
|
|
306
|
-
* Get the intent's TF-IDF vector (for comparison).
|
|
307
|
-
* @returns {Map<string, number>}
|
|
308
|
-
*/
|
|
309
|
-
getPurposeVector() {
|
|
310
|
-
const idf = buildIdf([this._allPurposeTokens]);
|
|
311
|
-
return tfidfVector(this._allPurposeTokens, idf);
|
|
312
|
-
}
|
|
127
|
+
return Math.min(1, Math.max(0, jsd));
|
|
313
128
|
}
|
|
314
129
|
|
|
315
130
|
// =========================================================================
|
|
316
|
-
//
|
|
131
|
+
// AGENT FINGERPRINT
|
|
317
132
|
// =========================================================================
|
|
318
133
|
|
|
319
134
|
/**
|
|
320
|
-
*
|
|
321
|
-
*
|
|
135
|
+
* Agent Behavioral Fingerprint.
|
|
136
|
+
*
|
|
137
|
+
* Captures an agent's normal behavior profile and detects deviations that
|
|
138
|
+
* may indicate compromise.
|
|
139
|
+
*
|
|
140
|
+
* @example
|
|
141
|
+
* const fp = new AgentFingerprint({ agentId: 'my-agent' });
|
|
142
|
+
* fp.recordToolCall('readFile', { path: '/data/config.json' }, 12);
|
|
143
|
+
* fp.recordToolCall('readFile', { path: '/data/users.json' }, 15);
|
|
144
|
+
* fp.recordResponse('text', 150);
|
|
145
|
+
* const hash = fp.generateHash();
|
|
146
|
+
* const result = fp.detectCompromise({ tool: 'execCommand', args: { cmd: 'curl evil.com' }, latencyMs: 500 });
|
|
322
147
|
*/
|
|
323
|
-
class
|
|
148
|
+
class AgentFingerprint {
|
|
324
149
|
/**
|
|
325
|
-
* @param {
|
|
326
|
-
* @param {
|
|
327
|
-
* @param {number} [
|
|
328
|
-
* @param {number} [
|
|
329
|
-
* @param {number} [config.checkInterval=5] - Check every N messages
|
|
330
|
-
* @param {function} [config.onDrift] - Callback on drift
|
|
150
|
+
* @param {object} [options]
|
|
151
|
+
* @param {string} [options.agentId] - Unique agent identifier
|
|
152
|
+
* @param {number} [options.deviationThreshold] - Z-score threshold for anomaly (default 2.5)
|
|
153
|
+
* @param {number} [options.minObservations] - Minimum observations before stable (default 10)
|
|
331
154
|
*/
|
|
332
|
-
constructor(
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
this.
|
|
337
|
-
this.windowSize = config.windowSize || 10;
|
|
338
|
-
this.driftThreshold = typeof config.driftThreshold === 'number' ? config.driftThreshold : 0.6;
|
|
339
|
-
this.checkInterval = config.checkInterval || 5;
|
|
340
|
-
this.onDrift = config.onDrift || null;
|
|
341
|
-
|
|
342
|
-
this._messages = [];
|
|
343
|
-
this._driftHistory = [];
|
|
344
|
-
this._totalMessages = 0;
|
|
345
|
-
this._driftEvents = 0;
|
|
346
|
-
this._topicShifts = 0;
|
|
347
|
-
|
|
348
|
-
console.log('[Agent Shield] GoalDriftDetector initialized ' +
|
|
349
|
-
`(window=${this.windowSize}, threshold=${this.driftThreshold})`);
|
|
350
|
-
}
|
|
155
|
+
constructor(options = {}) {
|
|
156
|
+
this.agentId = options.agentId || `agent-${Date.now()}`;
|
|
157
|
+
this.deviationThreshold = options.deviationThreshold || DEFAULT_DEVIATION_THRESHOLD;
|
|
158
|
+
this.minObservations = options.minObservations || MIN_OBSERVATIONS;
|
|
159
|
+
this.createdAt = Date.now();
|
|
351
160
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
* @param {string} message - The message text
|
|
355
|
-
* @param {string} [role='user'] - 'user' or 'assistant'
|
|
356
|
-
* @returns {object} {
|
|
357
|
-
* driftScore: number 0-1 (0=on topic, 1=completely off),
|
|
358
|
-
* driftDetected: bool,
|
|
359
|
-
* trend: 'stable' | 'drifting' | 'recovering',
|
|
360
|
-
* turnsSincePurpose: number,
|
|
361
|
-
* topicShift: bool (sudden topic change),
|
|
362
|
-
* reason: string
|
|
363
|
-
* }
|
|
364
|
-
*/
|
|
365
|
-
addMessage(message, role = 'user') {
|
|
366
|
-
if (!message || typeof message !== 'string') {
|
|
367
|
-
return {
|
|
368
|
-
driftScore: 0,
|
|
369
|
-
driftDetected: false,
|
|
370
|
-
trend: 'stable',
|
|
371
|
-
turnsSincePurpose: 0,
|
|
372
|
-
topicShift: false,
|
|
373
|
-
reason: 'Empty or invalid message'
|
|
374
|
-
};
|
|
375
|
-
}
|
|
161
|
+
/** @type {Map<string, number>} Tool call frequency counts. */
|
|
162
|
+
this.toolFrequency = new Map();
|
|
376
163
|
|
|
377
|
-
|
|
378
|
-
|
|
164
|
+
/** @type {Map<string, Set<string>>} Argument key patterns per tool. */
|
|
165
|
+
this.argumentPatterns = new Map();
|
|
379
166
|
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
tokens: msgTokens,
|
|
383
|
-
role,
|
|
384
|
-
timestamp: Date.now()
|
|
385
|
-
});
|
|
167
|
+
/** @type {Map<string, number[]>} Latency observations per tool. */
|
|
168
|
+
this.timingProfiles = new Map();
|
|
386
169
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
this._messages = this._messages.slice(-this.windowSize * 3);
|
|
390
|
-
}
|
|
170
|
+
/** @type {Map<string, number>} Response type frequency counts. */
|
|
171
|
+
this.responsePatterns = new Map();
|
|
391
172
|
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
const windowTokens = [];
|
|
395
|
-
for (const msg of window) {
|
|
396
|
-
windowTokens.push(...msg.tokens);
|
|
397
|
-
}
|
|
173
|
+
/** @type {number[]} Inter-call intervals in ms. */
|
|
174
|
+
this.callIntervals = [];
|
|
398
175
|
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
const docs = [purposeTokens, windowTokens];
|
|
402
|
-
const idf = buildIdf(docs);
|
|
403
|
-
|
|
404
|
-
const purposeVec = tfidfVector(purposeTokens, idf);
|
|
405
|
-
const windowVec = tfidfVector(windowTokens, idf);
|
|
406
|
-
const cosSim = cosineSim(purposeVec, windowVec);
|
|
407
|
-
|
|
408
|
-
// TF cosine (no IDF)
|
|
409
|
-
const purposeTf = termFrequency(purposeTokens);
|
|
410
|
-
const windowTf = termFrequency(windowTokens);
|
|
411
|
-
const tfSim = cosineSim(purposeTf, windowTf);
|
|
412
|
-
|
|
413
|
-
// Coverage: fraction of window tokens in purpose vocabulary
|
|
414
|
-
const purposeSet = new Set(purposeTokens);
|
|
415
|
-
const overlapCount = windowTokens.filter(t => purposeSet.has(t)).length;
|
|
416
|
-
const coverageRatio = windowTokens.length > 0 ? overlapCount / windowTokens.length : 0;
|
|
417
|
-
|
|
418
|
-
const relevance = (cosSim * 0.25) + (tfSim * 0.25) + (coverageRatio * 0.5);
|
|
419
|
-
const driftScore = 1 - relevance;
|
|
420
|
-
const driftDetected = driftScore > this.driftThreshold;
|
|
421
|
-
|
|
422
|
-
// Detect sudden topic shift by comparing current message to previous
|
|
423
|
-
let topicShift = false;
|
|
424
|
-
if (this._messages.length >= 2) {
|
|
425
|
-
const prev = this._messages[this._messages.length - 2];
|
|
426
|
-
const prevTf = termFrequency(prev.tokens);
|
|
427
|
-
const currTf = termFrequency(msgTokens);
|
|
428
|
-
const localSim = cosineSim(prevTf, currTf);
|
|
429
|
-
// A sharp drop in local similarity signals a topic shift
|
|
430
|
-
if (localSim < 0.1 && msgTokens.length > 2 && prev.tokens.length > 2) {
|
|
431
|
-
topicShift = true;
|
|
432
|
-
this._topicShifts++;
|
|
433
|
-
}
|
|
434
|
-
}
|
|
176
|
+
/** @type {number} Total observations recorded. */
|
|
177
|
+
this.totalObservations = 0;
|
|
435
178
|
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
for (let i = this._messages.length - 1; i >= 0; i--) {
|
|
439
|
-
const msg = this._messages[i];
|
|
440
|
-
const msgDocs = [purposeTokens, msg.tokens];
|
|
441
|
-
const msgIdf = buildIdf(msgDocs);
|
|
442
|
-
const msgPurposeVec = tfidfVector(purposeTokens, msgIdf);
|
|
443
|
-
const msgVec = tfidfVector(msg.tokens, msgIdf);
|
|
444
|
-
const msgCosSim = cosineSim(msgPurposeVec, msgVec);
|
|
445
|
-
|
|
446
|
-
// TF cosine
|
|
447
|
-
const msgPurposeTf = termFrequency(purposeTokens);
|
|
448
|
-
const msgTf = termFrequency(msg.tokens);
|
|
449
|
-
const msgTfSim = cosineSim(msgPurposeTf, msgTf);
|
|
450
|
-
|
|
451
|
-
// Coverage
|
|
452
|
-
const msgOverlap = msg.tokens.filter(t => purposeSet.has(t)).length;
|
|
453
|
-
const msgCoverage = msg.tokens.length > 0 ? msgOverlap / msg.tokens.length : 0;
|
|
454
|
-
|
|
455
|
-
const msgRelevance = (msgCosSim * 0.25) + (msgTfSim * 0.25) + (msgCoverage * 0.5);
|
|
456
|
-
if (msgRelevance > (1 - this.driftThreshold)) {
|
|
457
|
-
break;
|
|
458
|
-
}
|
|
459
|
-
turnsSincePurpose++;
|
|
460
|
-
}
|
|
179
|
+
/** @type {number|null} Timestamp of last recorded event. */
|
|
180
|
+
this._lastCallTime = null;
|
|
461
181
|
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
if (this._driftHistory.length > 100) {
|
|
465
|
-
this._driftHistory = this._driftHistory.slice(-100);
|
|
466
|
-
}
|
|
182
|
+
console.log(`[Agent Shield] AgentFingerprint created for ${this.agentId}`);
|
|
183
|
+
}
|
|
467
184
|
|
|
468
|
-
|
|
469
|
-
|
|
185
|
+
/**
|
|
186
|
+
* Record a tool call observation.
|
|
187
|
+
* @param {string} toolName - Name of the tool invoked
|
|
188
|
+
* @param {object} [args] - Arguments passed to the tool
|
|
189
|
+
* @param {number} [latencyMs] - Call latency in milliseconds
|
|
190
|
+
*/
|
|
191
|
+
recordToolCall(toolName, args = {}, latencyMs = 0) {
|
|
192
|
+
if (!toolName || typeof toolName !== 'string') return;
|
|
193
|
+
|
|
194
|
+
// Track frequency
|
|
195
|
+
this.toolFrequency.set(toolName, (this.toolFrequency.get(toolName) || 0) + 1);
|
|
470
196
|
|
|
471
|
-
|
|
472
|
-
|
|
197
|
+
// Track argument key patterns
|
|
198
|
+
if (!this.argumentPatterns.has(toolName)) {
|
|
199
|
+
this.argumentPatterns.set(toolName, new Set());
|
|
200
|
+
}
|
|
201
|
+
const argKeys = Object.keys(args || {}).sort().join(',');
|
|
202
|
+
if (argKeys) {
|
|
203
|
+
this.argumentPatterns.get(toolName).add(argKeys);
|
|
473
204
|
}
|
|
474
205
|
|
|
475
|
-
//
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
206
|
+
// Track timing
|
|
207
|
+
if (!this.timingProfiles.has(toolName)) {
|
|
208
|
+
this.timingProfiles.set(toolName, []);
|
|
209
|
+
}
|
|
210
|
+
const timings = this.timingProfiles.get(toolName);
|
|
211
|
+
if (timings.length < MAX_HISTORY) {
|
|
212
|
+
timings.push(latencyMs);
|
|
482
213
|
}
|
|
483
214
|
|
|
484
|
-
//
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
turnsSincePurpose,
|
|
491
|
-
topicShift,
|
|
492
|
-
message: message.substring(0, 200),
|
|
493
|
-
reason
|
|
494
|
-
});
|
|
495
|
-
} catch (e) {
|
|
496
|
-
console.error('[Agent Shield] onDrift callback error:', e.message);
|
|
215
|
+
// Track call intervals
|
|
216
|
+
const now = Date.now();
|
|
217
|
+
if (this._lastCallTime !== null) {
|
|
218
|
+
const interval = now - this._lastCallTime;
|
|
219
|
+
if (this.callIntervals.length < MAX_HISTORY) {
|
|
220
|
+
this.callIntervals.push(interval);
|
|
497
221
|
}
|
|
498
222
|
}
|
|
223
|
+
this._lastCallTime = now;
|
|
499
224
|
|
|
500
|
-
|
|
501
|
-
driftScore,
|
|
502
|
-
driftDetected,
|
|
503
|
-
trend,
|
|
504
|
-
turnsSincePurpose,
|
|
505
|
-
topicShift,
|
|
506
|
-
reason
|
|
507
|
-
};
|
|
508
|
-
}
|
|
509
|
-
|
|
510
|
-
/**
|
|
511
|
-
* Calculate drift trend from recent scores.
|
|
512
|
-
* @private
|
|
513
|
-
* @returns {'stable' | 'drifting' | 'recovering'}
|
|
514
|
-
*/
|
|
515
|
-
_calcTrend() {
|
|
516
|
-
const h = this._driftHistory;
|
|
517
|
-
if (h.length < 3) return 'stable';
|
|
518
|
-
|
|
519
|
-
const last3 = h.slice(-3);
|
|
520
|
-
const increasing = last3[0] < last3[1] && last3[1] < last3[2];
|
|
521
|
-
const decreasing = last3[0] > last3[1] && last3[1] > last3[2];
|
|
522
|
-
|
|
523
|
-
if (increasing) return 'drifting';
|
|
524
|
-
if (decreasing) return 'recovering';
|
|
525
|
-
return 'stable';
|
|
225
|
+
this.totalObservations++;
|
|
526
226
|
}
|
|
527
227
|
|
|
528
228
|
/**
|
|
529
|
-
*
|
|
530
|
-
* @
|
|
229
|
+
* Record a response observation.
|
|
230
|
+
* @param {string} responseType - Type of response (e.g. 'text', 'json', 'error')
|
|
231
|
+
* @param {number} [length] - Response length in characters
|
|
531
232
|
*/
|
|
532
|
-
|
|
533
|
-
return
|
|
233
|
+
recordResponse(responseType, length = 0) {
|
|
234
|
+
if (!responseType || typeof responseType !== 'string') return;
|
|
235
|
+
this.responsePatterns.set(responseType, (this.responsePatterns.get(responseType) || 0) + 1);
|
|
236
|
+
this.totalObservations++;
|
|
534
237
|
}
|
|
535
238
|
|
|
536
239
|
/**
|
|
537
|
-
*
|
|
240
|
+
* Check if the fingerprint has enough data to be considered stable.
|
|
241
|
+
* @returns {boolean}
|
|
538
242
|
*/
|
|
539
|
-
|
|
540
|
-
this.
|
|
541
|
-
this._driftHistory = [];
|
|
542
|
-
this._totalMessages = 0;
|
|
543
|
-
this._driftEvents = 0;
|
|
544
|
-
this._topicShifts = 0;
|
|
545
|
-
console.log('[Agent Shield] GoalDriftDetector reset');
|
|
243
|
+
isStable() {
|
|
244
|
+
return this.totalObservations >= this.minObservations;
|
|
546
245
|
}
|
|
547
246
|
|
|
548
247
|
/**
|
|
549
|
-
*
|
|
550
|
-
* @returns {
|
|
248
|
+
* Generate a portable hash that uniquely identifies this agent's behavior.
|
|
249
|
+
* @returns {string} SHA-256 hex hash
|
|
551
250
|
*/
|
|
552
|
-
|
|
553
|
-
const
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
driftEvents: this._driftEvents,
|
|
561
|
-
topicShifts: this._topicShifts,
|
|
562
|
-
averageDrift: avgDrift,
|
|
563
|
-
maxDrift,
|
|
564
|
-
currentTrend: this._calcTrend(),
|
|
565
|
-
historyLength: h.length
|
|
251
|
+
generateHash() {
|
|
252
|
+
const profile = {
|
|
253
|
+
agentId: this.agentId,
|
|
254
|
+
toolFrequency: Object.fromEntries(this.toolFrequency),
|
|
255
|
+
argumentPatterns: {},
|
|
256
|
+
timingStats: {},
|
|
257
|
+
responsePatterns: Object.fromEntries(this.responsePatterns),
|
|
258
|
+
totalObservations: this.totalObservations
|
|
566
259
|
};
|
|
567
|
-
}
|
|
568
|
-
}
|
|
569
260
|
|
|
570
|
-
//
|
|
571
|
-
|
|
572
|
-
|
|
261
|
+
// Serialize argument patterns
|
|
262
|
+
for (const [tool, patterns] of this.argumentPatterns) {
|
|
263
|
+
profile.argumentPatterns[tool] = [...patterns].sort();
|
|
264
|
+
}
|
|
573
265
|
|
|
574
|
-
|
|
575
|
-
const
|
|
266
|
+
// Serialize timing statistics (mean + stddev, not raw data)
|
|
267
|
+
for (const [tool, timings] of this.timingProfiles) {
|
|
268
|
+
profile.timingStats[tool] = {
|
|
269
|
+
mean: Math.round(mean(timings) * 100) / 100,
|
|
270
|
+
stddev: Math.round(stddev(timings) * 100) / 100,
|
|
271
|
+
count: timings.length
|
|
272
|
+
};
|
|
273
|
+
}
|
|
576
274
|
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
* and flags anomalous sequences.
|
|
580
|
-
*/
|
|
581
|
-
class ToolSequenceModeler {
|
|
582
|
-
/**
|
|
583
|
-
* @param {object} [config]
|
|
584
|
-
* @param {number} [config.learningPeriod=50] - Tool calls before modeling starts
|
|
585
|
-
* @param {number} [config.anomalyThreshold=0.15] - Probability below this = anomaly
|
|
586
|
-
* @param {number} [config.maxChainLength=10] - Max sequence length to track
|
|
587
|
-
*/
|
|
588
|
-
constructor(config = {}) {
|
|
589
|
-
this.learningPeriod = config.learningPeriod || 50;
|
|
590
|
-
this.anomalyThreshold = typeof config.anomalyThreshold === 'number' ? config.anomalyThreshold : 0.15;
|
|
591
|
-
this.maxChainLength = config.maxChainLength || 10;
|
|
592
|
-
|
|
593
|
-
/** @type {Object<string, Object<string, number>>} Bigram counts: from -> to -> count */
|
|
594
|
-
this._transitions = {};
|
|
595
|
-
/** @type {string[]} Recent tool sequence */
|
|
596
|
-
this._sequence = [];
|
|
597
|
-
/** @type {number} Total tool calls recorded */
|
|
598
|
-
this._totalCalls = 0;
|
|
599
|
-
/** @type {number} Anomalies detected */
|
|
600
|
-
this._anomalyCount = 0;
|
|
601
|
-
/** @type {Object<string, number>} Tool call counts */
|
|
602
|
-
this._toolCounts = {};
|
|
603
|
-
|
|
604
|
-
console.log(`[Agent Shield] ToolSequenceModeler initialized ` +
|
|
605
|
-
`(learningPeriod=${this.learningPeriod}, anomalyThreshold=${this.anomalyThreshold})`);
|
|
275
|
+
const serialized = JSON.stringify(profile, Object.keys(profile).sort());
|
|
276
|
+
return crypto.createHash('sha256').update(serialized).digest('hex');
|
|
606
277
|
}
|
|
607
278
|
|
|
608
279
|
/**
|
|
609
|
-
*
|
|
610
|
-
* @param {
|
|
611
|
-
* @
|
|
612
|
-
* @returns {object} {
|
|
613
|
-
* allowed: bool,
|
|
614
|
-
* anomalyScore: number 0-1 (0=normal, 1=never seen),
|
|
615
|
-
* probability: number (transition probability from previous tool),
|
|
616
|
-
* isLearning: bool,
|
|
617
|
-
* reason: string
|
|
618
|
-
* }
|
|
280
|
+
* Compare this fingerprint with another and return a similarity score.
|
|
281
|
+
* @param {AgentFingerprint} other - Another fingerprint to compare against
|
|
282
|
+
* @returns {{ score: number, label: string, details: object }}
|
|
619
283
|
*/
|
|
620
|
-
|
|
621
|
-
if (!
|
|
622
|
-
return {
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
284
|
+
compare(other) {
|
|
285
|
+
if (!(other instanceof AgentFingerprint)) {
|
|
286
|
+
return { score: 0, label: 'invalid', details: { error: 'Not an AgentFingerprint instance' } };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const details = {};
|
|
290
|
+
|
|
291
|
+
// 1. Tool frequency similarity (cosine)
|
|
292
|
+
details.toolFrequency = cosineSimilarity(this.toolFrequency, other.toolFrequency);
|
|
293
|
+
|
|
294
|
+
// 2. Argument pattern overlap (Jaccard)
|
|
295
|
+
let argOverlap = 0;
|
|
296
|
+
let argTotal = 0;
|
|
297
|
+
const allTools = new Set([...this.argumentPatterns.keys(), ...other.argumentPatterns.keys()]);
|
|
298
|
+
for (const tool of allTools) {
|
|
299
|
+
const a = this.argumentPatterns.get(tool) || new Set();
|
|
300
|
+
const b = other.argumentPatterns.get(tool) || new Set();
|
|
301
|
+
const union = new Set([...a, ...b]);
|
|
302
|
+
const intersection = [...a].filter(x => b.has(x));
|
|
303
|
+
if (union.size > 0) {
|
|
304
|
+
argOverlap += intersection.length / union.size;
|
|
305
|
+
argTotal++;
|
|
306
|
+
}
|
|
629
307
|
}
|
|
308
|
+
details.argumentPatterns = argTotal > 0 ? argOverlap / argTotal : (allTools.size === 0 ? 1 : 0);
|
|
630
309
|
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
const isLearning = this._totalCalls <= this.learningPeriod;
|
|
310
|
+
// 3. Response pattern similarity (cosine)
|
|
311
|
+
details.responsePatterns = cosineSimilarity(this.responsePatterns, other.responsePatterns);
|
|
634
312
|
|
|
635
|
-
//
|
|
636
|
-
const
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
642
|
-
|
|
313
|
+
// 4. Timing profile similarity (1 - JS divergence of mean latencies)
|
|
314
|
+
const timingA = new Map();
|
|
315
|
+
const timingB = new Map();
|
|
316
|
+
for (const [tool, timings] of this.timingProfiles) {
|
|
317
|
+
timingA.set(tool, mean(timings));
|
|
318
|
+
}
|
|
319
|
+
for (const [tool, timings] of other.timingProfiles) {
|
|
320
|
+
timingB.set(tool, mean(timings));
|
|
643
321
|
}
|
|
644
|
-
|
|
322
|
+
details.timingProfile = 1 - jsDivergence(timingA, timingB);
|
|
645
323
|
|
|
646
|
-
//
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
324
|
+
// Weighted aggregate
|
|
325
|
+
const weights = { toolFrequency: 0.35, argumentPatterns: 0.25, responsePatterns: 0.20, timingProfile: 0.20 };
|
|
326
|
+
let score = 0;
|
|
327
|
+
for (const [key, weight] of Object.entries(weights)) {
|
|
328
|
+
score += (details[key] || 0) * weight;
|
|
650
329
|
}
|
|
330
|
+
score = Math.round(score * 1000) / 1000;
|
|
331
|
+
|
|
332
|
+
let label = 'different';
|
|
333
|
+
if (score >= SIMILARITY_THRESHOLDS.identical) label = 'identical';
|
|
334
|
+
else if (score >= SIMILARITY_THRESHOLDS.similar) label = 'similar';
|
|
335
|
+
else if (score >= SIMILARITY_THRESHOLDS.related) label = 'related';
|
|
336
|
+
|
|
337
|
+
return { score, label, details };
|
|
338
|
+
}
|
|
651
339
|
|
|
652
|
-
|
|
653
|
-
|
|
340
|
+
/**
|
|
341
|
+
* Check if current behavior deviates from the fingerprint (possible compromise).
|
|
342
|
+
* @param {object} observation - Current observed behavior
|
|
343
|
+
* @param {string} [observation.tool] - Tool being called
|
|
344
|
+
* @param {object} [observation.args] - Arguments to the tool
|
|
345
|
+
* @param {number} [observation.latencyMs] - Observed latency in ms
|
|
346
|
+
* @returns {{ compromised: boolean, score: number, reasons: string[] }}
|
|
347
|
+
*/
|
|
348
|
+
detectCompromise(observation = {}) {
|
|
349
|
+
const reasons = [];
|
|
350
|
+
let anomalyScore = 0;
|
|
351
|
+
|
|
352
|
+
if (!this.isStable()) {
|
|
654
353
|
return {
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
isLearning: true,
|
|
659
|
-
reason: `Learning mode (${this._totalCalls}/${this.learningPeriod})`
|
|
354
|
+
compromised: false,
|
|
355
|
+
score: 0,
|
|
356
|
+
reasons: ['Fingerprint not yet stable (insufficient observations)']
|
|
660
357
|
};
|
|
661
358
|
}
|
|
662
359
|
|
|
663
|
-
|
|
664
|
-
const probability = this._getTransitionProbability(prevTool, toolName);
|
|
665
|
-
const anomalyScore = 1 - probability;
|
|
666
|
-
const allowed = probability >= this.anomalyThreshold;
|
|
360
|
+
const { tool, args, latencyMs } = observation;
|
|
667
361
|
|
|
668
|
-
|
|
669
|
-
|
|
362
|
+
// 1. Unknown tool check
|
|
363
|
+
if (tool && !this.toolFrequency.has(tool)) {
|
|
364
|
+
reasons.push(`Unknown tool "${tool}" not in behavioral profile`);
|
|
365
|
+
anomalyScore += 3;
|
|
670
366
|
}
|
|
671
367
|
|
|
672
|
-
|
|
673
|
-
if (
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
368
|
+
// 2. Tool frequency deviation
|
|
369
|
+
if (tool && this.toolFrequency.has(tool)) {
|
|
370
|
+
const totalCalls = [...this.toolFrequency.values()].reduce((a, b) => a + b, 0);
|
|
371
|
+
const expectedFreq = this.toolFrequency.get(tool) / totalCalls;
|
|
372
|
+
// If this tool is very rarely used (<5% of calls), calling it is mildly suspicious
|
|
373
|
+
if (expectedFreq < 0.05) {
|
|
374
|
+
reasons.push(`Tool "${tool}" is rarely used (${(expectedFreq * 100).toFixed(1)}% of calls)`);
|
|
375
|
+
anomalyScore += 1;
|
|
376
|
+
}
|
|
678
377
|
}
|
|
679
378
|
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
*/
|
|
690
|
-
_getTransitionProbability(from, to) {
|
|
691
|
-
const row = this._transitions[from];
|
|
692
|
-
if (!row) return 0;
|
|
693
|
-
|
|
694
|
-
const total = Object.values(row).reduce((a, b) => a + b, 0);
|
|
695
|
-
if (total === 0) return 0;
|
|
696
|
-
|
|
697
|
-
const count = row[to] || 0;
|
|
698
|
-
return count / total;
|
|
699
|
-
}
|
|
379
|
+
// 3. Argument pattern deviation
|
|
380
|
+
if (tool && args && this.argumentPatterns.has(tool)) {
|
|
381
|
+
const knownPatterns = this.argumentPatterns.get(tool);
|
|
382
|
+
const currentPattern = Object.keys(args || {}).sort().join(',');
|
|
383
|
+
if (currentPattern && !knownPatterns.has(currentPattern)) {
|
|
384
|
+
reasons.push(`Unusual argument pattern for "${tool}": "${currentPattern}"`);
|
|
385
|
+
anomalyScore += 2;
|
|
386
|
+
}
|
|
387
|
+
}
|
|
700
388
|
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
389
|
+
// 4. Timing anomaly (z-score)
|
|
390
|
+
if (tool && typeof latencyMs === 'number' && this.timingProfiles.has(tool)) {
|
|
391
|
+
const timings = this.timingProfiles.get(tool);
|
|
392
|
+
const m = mean(timings);
|
|
393
|
+
const sd = stddev(timings);
|
|
394
|
+
if (sd > 0) {
|
|
395
|
+
const zScore = Math.abs(latencyMs - m) / sd;
|
|
396
|
+
if (zScore > this.deviationThreshold) {
|
|
397
|
+
reasons.push(`Timing anomaly for "${tool}": z-score ${zScore.toFixed(2)} (latency ${latencyMs}ms vs mean ${m.toFixed(0)}ms)`);
|
|
398
|
+
anomalyScore += zScore > 4 ? 3 : 1;
|
|
399
|
+
}
|
|
712
400
|
}
|
|
713
401
|
}
|
|
714
|
-
return matrix;
|
|
715
|
-
}
|
|
716
402
|
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
403
|
+
// 5. Check for suspicious argument values
|
|
404
|
+
if (args) {
|
|
405
|
+
const argStr = JSON.stringify(args).toLowerCase();
|
|
406
|
+
const suspiciousPatterns = [
|
|
407
|
+
/curl\s+/,
|
|
408
|
+
/wget\s+/,
|
|
409
|
+
/eval\s*\(/,
|
|
410
|
+
/base64/,
|
|
411
|
+
/\/etc\/passwd/,
|
|
412
|
+
/\.\.\//,
|
|
413
|
+
/exfiltrat/
|
|
414
|
+
];
|
|
415
|
+
for (const pattern of suspiciousPatterns) {
|
|
416
|
+
if (pattern.test(argStr)) {
|
|
417
|
+
reasons.push(`Suspicious argument content detected: ${pattern.source}`);
|
|
418
|
+
anomalyScore += 2;
|
|
419
|
+
}
|
|
733
420
|
}
|
|
734
421
|
}
|
|
735
|
-
sequences.sort((a, b) => b.count - a.count);
|
|
736
|
-
return sequences.slice(0, topN);
|
|
737
|
-
}
|
|
738
422
|
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
* @returns {object}
|
|
742
|
-
*/
|
|
743
|
-
exportModel() {
|
|
423
|
+
const compromised = anomalyScore >= this.deviationThreshold;
|
|
424
|
+
|
|
744
425
|
return {
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
anomalyCount: this._anomalyCount,
|
|
749
|
-
learningPeriod: this.learningPeriod,
|
|
750
|
-
anomalyThreshold: this.anomalyThreshold,
|
|
751
|
-
exportedAt: new Date().toISOString()
|
|
426
|
+
compromised,
|
|
427
|
+
score: Math.round(anomalyScore * 100) / 100,
|
|
428
|
+
reasons
|
|
752
429
|
};
|
|
753
430
|
}
|
|
754
431
|
|
|
755
432
|
/**
|
|
756
|
-
*
|
|
757
|
-
* @
|
|
433
|
+
* Export fingerprint as a portable JSON object.
|
|
434
|
+
* @returns {object}
|
|
758
435
|
*/
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
this.
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
436
|
+
toJSON() {
|
|
437
|
+
const obj = {
|
|
438
|
+
agentId: this.agentId,
|
|
439
|
+
createdAt: this.createdAt,
|
|
440
|
+
totalObservations: this.totalObservations,
|
|
441
|
+
stable: this.isStable(),
|
|
442
|
+
hash: this.generateHash(),
|
|
443
|
+
toolFrequency: Object.fromEntries(this.toolFrequency),
|
|
444
|
+
argumentPatterns: {},
|
|
445
|
+
timingStats: {},
|
|
446
|
+
responsePatterns: Object.fromEntries(this.responsePatterns)
|
|
447
|
+
};
|
|
448
|
+
|
|
449
|
+
for (const [tool, patterns] of this.argumentPatterns) {
|
|
450
|
+
obj.argumentPatterns[tool] = [...patterns];
|
|
771
451
|
}
|
|
772
|
-
|
|
773
|
-
|
|
452
|
+
for (const [tool, timings] of this.timingProfiles) {
|
|
453
|
+
obj.timingStats[tool] = { mean: mean(timings), stddev: stddev(timings), count: timings.length };
|
|
774
454
|
}
|
|
775
|
-
|
|
455
|
+
|
|
456
|
+
return obj;
|
|
776
457
|
}
|
|
777
458
|
|
|
778
459
|
/**
|
|
779
|
-
*
|
|
780
|
-
* @
|
|
460
|
+
* Restore fingerprint from a previously exported JSON object.
|
|
461
|
+
* @param {object} data - Output from toJSON()
|
|
462
|
+
* @returns {AgentFingerprint}
|
|
781
463
|
*/
|
|
782
|
-
|
|
783
|
-
const
|
|
784
|
-
|
|
785
|
-
|
|
464
|
+
static fromJSON(data) {
|
|
465
|
+
const fp = new AgentFingerprint({ agentId: data.agentId });
|
|
466
|
+
fp.createdAt = data.createdAt || Date.now();
|
|
467
|
+
fp.totalObservations = data.totalObservations || 0;
|
|
786
468
|
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
469
|
+
if (data.toolFrequency) {
|
|
470
|
+
fp.toolFrequency = new Map(Object.entries(data.toolFrequency));
|
|
471
|
+
}
|
|
472
|
+
if (data.argumentPatterns) {
|
|
473
|
+
for (const [tool, patterns] of Object.entries(data.argumentPatterns)) {
|
|
474
|
+
fp.argumentPatterns.set(tool, new Set(patterns));
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
if (data.responsePatterns) {
|
|
478
|
+
fp.responsePatterns = new Map(Object.entries(data.responsePatterns));
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
return fp;
|
|
796
482
|
}
|
|
797
483
|
}
|
|
798
484
|
|
|
@@ -801,7 +487,8 @@ class ToolSequenceModeler {
|
|
|
801
487
|
// =========================================================================
|
|
802
488
|
|
|
803
489
|
module.exports = {
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
490
|
+
AgentFingerprint,
|
|
491
|
+
SIMILARITY_THRESHOLDS,
|
|
492
|
+
DEFAULT_DEVIATION_THRESHOLD,
|
|
493
|
+
MIN_OBSERVATIONS
|
|
807
494
|
};
|