agentshield-sdk 7.3.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +64 -0
- package/README.md +63 -7
- package/package.json +8 -3
- package/src/agent-intent.js +807 -0
- package/src/agent-protocol.js +4 -0
- package/src/allowlist.js +605 -603
- package/src/audit-streaming.js +486 -469
- package/src/audit.js +1 -1
- package/src/behavior-profiling.js +299 -289
- package/src/behavioral-dna.js +4 -9
- package/src/canary.js +273 -271
- package/src/compliance.js +619 -617
- package/src/confidence-tuning.js +328 -324
- package/src/context-scoring.js +362 -360
- package/src/cost-optimizer.js +1024 -1024
- package/src/cross-turn.js +663 -0
- package/src/detector-core.js +186 -0
- package/src/distributed.js +5 -1
- package/src/embedding.js +310 -307
- package/src/ensemble.js +523 -0
- package/src/herd-immunity.js +12 -12
- package/src/honeypot.js +332 -328
- package/src/integrations.js +1 -2
- package/src/intent-firewall.js +14 -14
- package/src/llm-redteam.js +678 -670
- package/src/main.js +63 -0
- package/src/middleware.js +5 -2
- package/src/model-fingerprint.js +1059 -1042
- package/src/multi-agent-trust.js +459 -453
- package/src/multi-agent.js +1 -1
- package/src/normalizer.js +734 -0
- package/src/persistent-learning.js +677 -0
- package/src/pii.js +4 -0
- package/src/policy-dsl.js +775 -775
- package/src/presets.js +409 -409
- package/src/production.js +22 -9
- package/src/redteam.js +475 -475
- package/src/response-handler.js +436 -429
- package/src/scanners.js +358 -357
- package/src/self-healing.js +368 -363
- package/src/self-training.js +772 -0
- package/src/semantic.js +339 -339
- package/src/shield-score.js +250 -250
- package/src/smart-config.js +812 -0
- package/src/sso-saml.js +8 -4
- package/src/testing.js +24 -2
- package/src/tool-guard.js +412 -412
- package/src/watermark.js +242 -235
- package/src/worker-scanner.js +608 -601
- package/types/index.d.ts +660 -0
|
@@ -0,0 +1,807 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Agent Shield — Agent Intent Declaration & Goal Drift Detection (v8.0)
|
|
5
|
+
*
|
|
6
|
+
* Lets developers declare what their agent is supposed to do, then detects
|
|
7
|
+
* when conversations drift away from that purpose. Includes a Markov-chain
|
|
8
|
+
* tool sequence modeler that learns normal tool patterns and flags anomalies.
|
|
9
|
+
*
|
|
10
|
+
* Design:
|
|
11
|
+
* - AgentIntent — static declaration of purpose, allowed tools, allowed topics.
|
|
12
|
+
* - GoalDriftDetector — monitors a conversation for drift over time.
|
|
13
|
+
* - ToolSequenceModeler — learns bigram tool transitions, flags anomalies.
|
|
14
|
+
*
|
|
15
|
+
* Zero dependencies, local-only. All detection runs via TF-IDF cosine
|
|
16
|
+
* similarity and simple Markov chains — no ML libraries required.
|
|
17
|
+
*
|
|
18
|
+
* @module agent-intent
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
// =========================================================================
|
|
22
|
+
// TOKENIZER & TF-IDF (mirrors ipia-detector.js patterns)
|
|
23
|
+
// =========================================================================
|
|
24
|
+
|
|
25
|
+
/** Common English stop words to down-weight in TF-IDF. */
|
|
26
|
+
const STOP_WORDS = new Set([
|
|
27
|
+
'the', 'be', 'to', 'of', 'and', 'in', 'that', 'have', 'it', 'for',
|
|
28
|
+
'not', 'on', 'with', 'he', 'as', 'you', 'do', 'at', 'this', 'but',
|
|
29
|
+
'his', 'by', 'from', 'they', 'we', 'say', 'her', 'she', 'or', 'an',
|
|
30
|
+
'will', 'my', 'one', 'all', 'would', 'there', 'their', 'what', 'so',
|
|
31
|
+
'up', 'out', 'if', 'about', 'who', 'get', 'which', 'go', 'me',
|
|
32
|
+
'when', 'make', 'can', 'like', 'no', 'just', 'him', 'know', 'take',
|
|
33
|
+
'into', 'your', 'some', 'could', 'them', 'see', 'other', 'than',
|
|
34
|
+
'then', 'now', 'look', 'only', 'come', 'its', 'over', 'also', 'back',
|
|
35
|
+
'after', 'use', 'how', 'our', 'well', 'way', 'even', 'new', 'want',
|
|
36
|
+
'because', 'any', 'these', 'give', 'most', 'us', 'is', 'are', 'was',
|
|
37
|
+
'were', 'been', 'has', 'had', 'did', 'am',
|
|
38
|
+
]);
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Simple suffix-stripping stemmer (covers common English suffixes).
|
|
42
|
+
* Not a full Porter stemmer, but good enough for TF-IDF matching.
|
|
43
|
+
* @param {string} word
|
|
44
|
+
* @returns {string}
|
|
45
|
+
*/
|
|
46
|
+
function stem(word) {
|
|
47
|
+
if (word.length <= 3) return word;
|
|
48
|
+
// Handle -ies -> -y (e.g. itineraries -> itinerary, cities -> city)
|
|
49
|
+
if (word.endsWith('ies') && word.length > 4) {
|
|
50
|
+
return word.slice(0, -3) + 'y';
|
|
51
|
+
}
|
|
52
|
+
// Order matters: try longest suffixes first
|
|
53
|
+
const suffixes = [
|
|
54
|
+
'ational', 'tional', 'encies', 'ances', 'ments', 'ating',
|
|
55
|
+
'ation', 'aries', 'ness', 'ment', 'ings', 'ible', 'able',
|
|
56
|
+
'ence', 'ance', 'ious', 'eous', 'less', 'ting', 'ally', 'ful',
|
|
57
|
+
'ing', 'ary', 'ely', 'ers', 'ion', 'ous', 'ive',
|
|
58
|
+
'ed', 'ly', 'es', 'er', 'al', 'ty',
|
|
59
|
+
's'
|
|
60
|
+
];
|
|
61
|
+
for (const suffix of suffixes) {
|
|
62
|
+
if (word.endsWith(suffix) && word.length - suffix.length >= 2) {
|
|
63
|
+
return word.slice(0, -suffix.length);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
return word;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Tokenize text into lowercase words (2+ chars), filtering stop words.
|
|
71
|
+
* @param {string} text
|
|
72
|
+
* @returns {string[]}
|
|
73
|
+
*/
|
|
74
|
+
function tokenize(text) {
|
|
75
|
+
if (!text) return [];
|
|
76
|
+
if (typeof text !== 'string') text = String(text);
|
|
77
|
+
return text.toLowerCase()
|
|
78
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
79
|
+
.split(/\s+/)
|
|
80
|
+
.filter(w => w.length > 1);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Tokenize without stop words, with stemming (for TF-IDF relevance).
|
|
85
|
+
* @param {string} text
|
|
86
|
+
* @returns {string[]}
|
|
87
|
+
*/
|
|
88
|
+
function tokenizeForTfIdf(text) {
|
|
89
|
+
return tokenize(text)
|
|
90
|
+
.filter(w => !STOP_WORDS.has(w))
|
|
91
|
+
.map(w => stem(w));
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
* Compute term frequency map.
|
|
96
|
+
* @param {string[]} tokens
|
|
97
|
+
* @returns {Map<string, number>}
|
|
98
|
+
*/
|
|
99
|
+
function termFrequency(tokens) {
|
|
100
|
+
const tf = new Map();
|
|
101
|
+
if (tokens.length === 0) return tf;
|
|
102
|
+
for (const t of tokens) {
|
|
103
|
+
tf.set(t, (tf.get(t) || 0) + 1);
|
|
104
|
+
}
|
|
105
|
+
for (const [k, v] of tf) {
|
|
106
|
+
tf.set(k, v / tokens.length);
|
|
107
|
+
}
|
|
108
|
+
return tf;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Build IDF from a set of documents (each a token array).
|
|
113
|
+
* @param {Array<string[]>} docs
|
|
114
|
+
* @returns {Map<string, number>}
|
|
115
|
+
*/
|
|
116
|
+
function buildIdf(docs) {
|
|
117
|
+
const df = new Map();
|
|
118
|
+
const n = docs.length;
|
|
119
|
+
for (const doc of docs) {
|
|
120
|
+
const seen = new Set(doc);
|
|
121
|
+
for (const t of seen) {
|
|
122
|
+
df.set(t, (df.get(t) || 0) + 1);
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
const idf = new Map();
|
|
126
|
+
for (const [term, count] of df) {
|
|
127
|
+
idf.set(term, Math.log((n + 1) / (count + 1)) + 1);
|
|
128
|
+
}
|
|
129
|
+
return idf;
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Build a TF-IDF vector for a document given an IDF map.
|
|
134
|
+
* @param {string[]} tokens
|
|
135
|
+
* @param {Map<string, number>} idf
|
|
136
|
+
* @returns {Map<string, number>}
|
|
137
|
+
*/
|
|
138
|
+
function tfidfVector(tokens, idf) {
|
|
139
|
+
const tf = termFrequency(tokens);
|
|
140
|
+
const vec = new Map();
|
|
141
|
+
for (const [term, freq] of tf) {
|
|
142
|
+
const idfVal = idf.get(term) || Math.log(2) + 1;
|
|
143
|
+
vec.set(term, freq * idfVal);
|
|
144
|
+
}
|
|
145
|
+
return vec;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Cosine similarity between two TF-IDF vectors.
|
|
150
|
+
* @param {Map<string, number>} a
|
|
151
|
+
* @param {Map<string, number>} b
|
|
152
|
+
* @returns {number} 0-1
|
|
153
|
+
*/
|
|
154
|
+
function cosineSim(a, b) {
|
|
155
|
+
let dot = 0, normA = 0, normB = 0;
|
|
156
|
+
const keys = new Set([...a.keys(), ...b.keys()]);
|
|
157
|
+
for (const k of keys) {
|
|
158
|
+
const va = a.get(k) || 0;
|
|
159
|
+
const vb = b.get(k) || 0;
|
|
160
|
+
dot += va * vb;
|
|
161
|
+
normA += va * va;
|
|
162
|
+
normB += vb * vb;
|
|
163
|
+
}
|
|
164
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
165
|
+
if (!isFinite(denom) || denom === 0) return 0;
|
|
166
|
+
const result = dot / denom;
|
|
167
|
+
return isFinite(result) ? result : 0;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// =========================================================================
|
|
171
|
+
// AGENT INTENT
|
|
172
|
+
// =========================================================================
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Declares what an agent is supposed to do. Provides methods to check
|
|
176
|
+
* whether a message or tool call is on-topic.
|
|
177
|
+
*/
|
|
178
|
+
class AgentIntent {
|
|
179
|
+
/**
|
|
180
|
+
* @param {object} config
|
|
181
|
+
* @param {string} config.purpose - What this agent does ("Books flights for customers")
|
|
182
|
+
* @param {string[]} [config.allowedTools] - Tools this agent may use
|
|
183
|
+
* @param {string[]} [config.allowedTopics] - Topics the agent should stay within
|
|
184
|
+
* @param {number} [config.maxDriftScore=0.7] - Max drift before alert (0-1)
|
|
185
|
+
* @param {function} [config.onDrift] - Callback when drift detected
|
|
186
|
+
*/
|
|
187
|
+
constructor(config) {
|
|
188
|
+
if (!config || !config.purpose) {
|
|
189
|
+
throw new Error('[Agent Shield] AgentIntent requires a purpose string');
|
|
190
|
+
}
|
|
191
|
+
this.purpose = config.purpose;
|
|
192
|
+
this.allowedTools = config.allowedTools || null;
|
|
193
|
+
this.allowedTopics = config.allowedTopics || null;
|
|
194
|
+
this.maxDriftScore = typeof config.maxDriftScore === 'number' ? config.maxDriftScore : 0.7;
|
|
195
|
+
this.onDrift = config.onDrift || null;
|
|
196
|
+
|
|
197
|
+
// Pre-compute purpose tokens and TF vector
|
|
198
|
+
this._purposeTokens = tokenizeForTfIdf(this.purpose);
|
|
199
|
+
|
|
200
|
+
// Build topic tokens from allowedTopics
|
|
201
|
+
this._topicTokens = [];
|
|
202
|
+
if (this.allowedTopics && this.allowedTopics.length > 0) {
|
|
203
|
+
for (const topic of this.allowedTopics) {
|
|
204
|
+
this._topicTokens.push(...tokenizeForTfIdf(topic));
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
// Combined purpose + topics tokens for broader matching
|
|
209
|
+
this._allPurposeTokens = [...this._purposeTokens, ...this._topicTokens];
|
|
210
|
+
|
|
211
|
+
console.log(`[Agent Shield] AgentIntent created: "${this.purpose.substring(0, 80)}"`);
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
/**
|
|
215
|
+
* Check if a user message is on-topic for this agent's purpose.
|
|
216
|
+
* Uses TF-IDF cosine similarity between purpose and message.
|
|
217
|
+
* @param {string} message - User message
|
|
218
|
+
* @returns {object} { onTopic: bool, relevanceScore: number 0-1, drift: number 0-1, reason: string }
|
|
219
|
+
*/
|
|
220
|
+
checkMessage(message) {
|
|
221
|
+
if (!message || typeof message !== 'string' || message.trim().length === 0) {
|
|
222
|
+
return { onTopic: true, relevanceScore: 0, drift: 1, reason: 'Empty message' };
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
const msgTokens = tokenizeForTfIdf(message);
|
|
226
|
+
if (msgTokens.length === 0) {
|
|
227
|
+
return { onTopic: true, relevanceScore: 0, drift: 1, reason: 'No meaningful tokens in message' };
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// Build IDF from purpose + message as two documents
|
|
231
|
+
const docs = [this._allPurposeTokens, msgTokens];
|
|
232
|
+
const idf = buildIdf(docs);
|
|
233
|
+
|
|
234
|
+
// Build TF-IDF vectors
|
|
235
|
+
const purposeVec = tfidfVector(this._allPurposeTokens, idf);
|
|
236
|
+
const msgVec = tfidfVector(msgTokens, idf);
|
|
237
|
+
|
|
238
|
+
// TF-IDF cosine similarity
|
|
239
|
+
const cosSim = cosineSim(purposeVec, msgVec);
|
|
240
|
+
|
|
241
|
+
// Term frequency cosine (no IDF) — better for short text vs fixed reference
|
|
242
|
+
const purposeTf = termFrequency(this._allPurposeTokens);
|
|
243
|
+
const msgTf = termFrequency(msgTokens);
|
|
244
|
+
const tfSim = cosineSim(purposeTf, msgTf);
|
|
245
|
+
|
|
246
|
+
// Message coverage: fraction of message tokens matching purpose vocabulary
|
|
247
|
+
const purposeSet = new Set(this._allPurposeTokens);
|
|
248
|
+
const overlapCount = msgTokens.filter(t => purposeSet.has(t)).length;
|
|
249
|
+
const coverageRatio = msgTokens.length > 0 ? overlapCount / msgTokens.length : 0;
|
|
250
|
+
|
|
251
|
+
// Blend: 25% TF-IDF cosine + 25% TF cosine + 50% coverage
|
|
252
|
+
// Coverage dominates because for intent checking, the key question is:
|
|
253
|
+
// "how much of the user's message uses purpose-related vocabulary?"
|
|
254
|
+
const relevanceScore = (cosSim * 0.25) + (tfSim * 0.25) + (coverageRatio * 0.5);
|
|
255
|
+
const drift = 1 - relevanceScore;
|
|
256
|
+
const onTopic = drift <= this.maxDriftScore;
|
|
257
|
+
|
|
258
|
+
let reason;
|
|
259
|
+
if (onTopic) {
|
|
260
|
+
reason = `Message is on-topic (relevance: ${(relevanceScore * 100).toFixed(1)}%)`;
|
|
261
|
+
} else {
|
|
262
|
+
reason = `Message drifted from purpose (relevance: ${(relevanceScore * 100).toFixed(1)}%, threshold: ${((1 - this.maxDriftScore) * 100).toFixed(1)}%)`;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
if (!onTopic && this.onDrift) {
|
|
266
|
+
try {
|
|
267
|
+
this.onDrift({ message: message.substring(0, 200), drift, relevanceScore, reason });
|
|
268
|
+
} catch (e) {
|
|
269
|
+
console.error('[Agent Shield] onDrift callback error:', e.message);
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
return { onTopic, relevanceScore, drift, reason };
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Check if a tool call is allowed for this agent.
|
|
278
|
+
* @param {string} toolName
|
|
279
|
+
* @param {object} [args]
|
|
280
|
+
* @returns {object} { allowed: bool, reason: string }
|
|
281
|
+
*/
|
|
282
|
+
checkTool(toolName, args) {
|
|
283
|
+
if (!toolName || typeof toolName !== 'string') {
|
|
284
|
+
return { allowed: false, reason: 'Invalid tool name' };
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
// If no allowedTools specified, everything is allowed
|
|
288
|
+
if (!this.allowedTools) {
|
|
289
|
+
return { allowed: true, reason: 'No tool restrictions defined' };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const normalizedName = toolName.toLowerCase().trim();
|
|
293
|
+
const allowed = this.allowedTools.some(t => t.toLowerCase().trim() === normalizedName);
|
|
294
|
+
|
|
295
|
+
if (allowed) {
|
|
296
|
+
return { allowed: true, reason: `Tool "${toolName}" is in the allowed list` };
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return {
|
|
300
|
+
allowed: false,
|
|
301
|
+
reason: `Tool "${toolName}" is not in the allowed list [${this.allowedTools.join(', ')}]`
|
|
302
|
+
};
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
/**
|
|
306
|
+
* Get the intent's TF-IDF vector (for comparison).
|
|
307
|
+
* @returns {Map<string, number>}
|
|
308
|
+
*/
|
|
309
|
+
getPurposeVector() {
|
|
310
|
+
const idf = buildIdf([this._allPurposeTokens]);
|
|
311
|
+
return tfidfVector(this._allPurposeTokens, idf);
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
// =========================================================================
|
|
316
|
+
// GOAL DRIFT DETECTOR
|
|
317
|
+
// =========================================================================
|
|
318
|
+
|
|
319
|
+
/**
|
|
320
|
+
* Monitors a conversation over time for drift away from a declared purpose.
|
|
321
|
+
* Uses a sliding window of recent messages and TF-IDF cosine similarity.
|
|
322
|
+
*/
|
|
323
|
+
class GoalDriftDetector {
|
|
324
|
+
/**
|
|
325
|
+
* @param {AgentIntent} intent - The declared intent
|
|
326
|
+
* @param {object} [config]
|
|
327
|
+
* @param {number} [config.windowSize=10] - Messages to consider
|
|
328
|
+
* @param {number} [config.driftThreshold=0.6] - Drift score to trigger alert
|
|
329
|
+
* @param {number} [config.checkInterval=5] - Check every N messages
|
|
330
|
+
* @param {function} [config.onDrift] - Callback on drift
|
|
331
|
+
*/
|
|
332
|
+
constructor(intent, config = {}) {
|
|
333
|
+
if (!intent || !(intent instanceof AgentIntent)) {
|
|
334
|
+
throw new Error('[Agent Shield] GoalDriftDetector requires an AgentIntent instance');
|
|
335
|
+
}
|
|
336
|
+
this.intent = intent;
|
|
337
|
+
this.windowSize = config.windowSize || 10;
|
|
338
|
+
this.driftThreshold = typeof config.driftThreshold === 'number' ? config.driftThreshold : 0.6;
|
|
339
|
+
this.checkInterval = config.checkInterval || 5;
|
|
340
|
+
this.onDrift = config.onDrift || null;
|
|
341
|
+
|
|
342
|
+
this._messages = [];
|
|
343
|
+
this._driftHistory = [];
|
|
344
|
+
this._totalMessages = 0;
|
|
345
|
+
this._driftEvents = 0;
|
|
346
|
+
this._topicShifts = 0;
|
|
347
|
+
|
|
348
|
+
console.log('[Agent Shield] GoalDriftDetector initialized ' +
|
|
349
|
+
`(window=${this.windowSize}, threshold=${this.driftThreshold})`);
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/**
|
|
353
|
+
* Add a message to the conversation and check for drift.
|
|
354
|
+
* @param {string} message - The message text
|
|
355
|
+
* @param {string} [role='user'] - 'user' or 'assistant'
|
|
356
|
+
* @returns {object} {
|
|
357
|
+
* driftScore: number 0-1 (0=on topic, 1=completely off),
|
|
358
|
+
* driftDetected: bool,
|
|
359
|
+
* trend: 'stable' | 'drifting' | 'recovering',
|
|
360
|
+
* turnsSincePurpose: number,
|
|
361
|
+
* topicShift: bool (sudden topic change),
|
|
362
|
+
* reason: string
|
|
363
|
+
* }
|
|
364
|
+
*/
|
|
365
|
+
addMessage(message, role = 'user') {
|
|
366
|
+
if (!message || typeof message !== 'string') {
|
|
367
|
+
return {
|
|
368
|
+
driftScore: 0,
|
|
369
|
+
driftDetected: false,
|
|
370
|
+
trend: 'stable',
|
|
371
|
+
turnsSincePurpose: 0,
|
|
372
|
+
topicShift: false,
|
|
373
|
+
reason: 'Empty or invalid message'
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
this._totalMessages++;
|
|
378
|
+
const msgTokens = tokenizeForTfIdf(message);
|
|
379
|
+
|
|
380
|
+
this._messages.push({
|
|
381
|
+
text: message,
|
|
382
|
+
tokens: msgTokens,
|
|
383
|
+
role,
|
|
384
|
+
timestamp: Date.now()
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// Cap stored messages
|
|
388
|
+
if (this._messages.length > this.windowSize * 3) {
|
|
389
|
+
this._messages = this._messages.slice(-this.windowSize * 3);
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Get sliding window of recent messages
|
|
393
|
+
const window = this._messages.slice(-this.windowSize);
|
|
394
|
+
const windowTokens = [];
|
|
395
|
+
for (const msg of window) {
|
|
396
|
+
windowTokens.push(...msg.tokens);
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
// Blended scoring (same approach as AgentIntent.checkMessage)
|
|
400
|
+
const purposeTokens = this.intent._allPurposeTokens;
|
|
401
|
+
const docs = [purposeTokens, windowTokens];
|
|
402
|
+
const idf = buildIdf(docs);
|
|
403
|
+
|
|
404
|
+
const purposeVec = tfidfVector(purposeTokens, idf);
|
|
405
|
+
const windowVec = tfidfVector(windowTokens, idf);
|
|
406
|
+
const cosSim = cosineSim(purposeVec, windowVec);
|
|
407
|
+
|
|
408
|
+
// TF cosine (no IDF)
|
|
409
|
+
const purposeTf = termFrequency(purposeTokens);
|
|
410
|
+
const windowTf = termFrequency(windowTokens);
|
|
411
|
+
const tfSim = cosineSim(purposeTf, windowTf);
|
|
412
|
+
|
|
413
|
+
// Coverage: fraction of window tokens in purpose vocabulary
|
|
414
|
+
const purposeSet = new Set(purposeTokens);
|
|
415
|
+
const overlapCount = windowTokens.filter(t => purposeSet.has(t)).length;
|
|
416
|
+
const coverageRatio = windowTokens.length > 0 ? overlapCount / windowTokens.length : 0;
|
|
417
|
+
|
|
418
|
+
const relevance = (cosSim * 0.25) + (tfSim * 0.25) + (coverageRatio * 0.5);
|
|
419
|
+
const driftScore = 1 - relevance;
|
|
420
|
+
const driftDetected = driftScore > this.driftThreshold;
|
|
421
|
+
|
|
422
|
+
// Detect sudden topic shift by comparing current message to previous
|
|
423
|
+
let topicShift = false;
|
|
424
|
+
if (this._messages.length >= 2) {
|
|
425
|
+
const prev = this._messages[this._messages.length - 2];
|
|
426
|
+
const prevTf = termFrequency(prev.tokens);
|
|
427
|
+
const currTf = termFrequency(msgTokens);
|
|
428
|
+
const localSim = cosineSim(prevTf, currTf);
|
|
429
|
+
// A sharp drop in local similarity signals a topic shift
|
|
430
|
+
if (localSim < 0.1 && msgTokens.length > 2 && prev.tokens.length > 2) {
|
|
431
|
+
topicShift = true;
|
|
432
|
+
this._topicShifts++;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Calculate turns since any on-topic message
|
|
437
|
+
let turnsSincePurpose = 0;
|
|
438
|
+
for (let i = this._messages.length - 1; i >= 0; i--) {
|
|
439
|
+
const msg = this._messages[i];
|
|
440
|
+
const msgDocs = [purposeTokens, msg.tokens];
|
|
441
|
+
const msgIdf = buildIdf(msgDocs);
|
|
442
|
+
const msgPurposeVec = tfidfVector(purposeTokens, msgIdf);
|
|
443
|
+
const msgVec = tfidfVector(msg.tokens, msgIdf);
|
|
444
|
+
const msgCosSim = cosineSim(msgPurposeVec, msgVec);
|
|
445
|
+
|
|
446
|
+
// TF cosine
|
|
447
|
+
const msgPurposeTf = termFrequency(purposeTokens);
|
|
448
|
+
const msgTf = termFrequency(msg.tokens);
|
|
449
|
+
const msgTfSim = cosineSim(msgPurposeTf, msgTf);
|
|
450
|
+
|
|
451
|
+
// Coverage
|
|
452
|
+
const msgOverlap = msg.tokens.filter(t => purposeSet.has(t)).length;
|
|
453
|
+
const msgCoverage = msg.tokens.length > 0 ? msgOverlap / msg.tokens.length : 0;
|
|
454
|
+
|
|
455
|
+
const msgRelevance = (msgCosSim * 0.25) + (msgTfSim * 0.25) + (msgCoverage * 0.5);
|
|
456
|
+
if (msgRelevance > (1 - this.driftThreshold)) {
|
|
457
|
+
break;
|
|
458
|
+
}
|
|
459
|
+
turnsSincePurpose++;
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
// Record drift score for trend analysis
|
|
463
|
+
this._driftHistory.push(driftScore);
|
|
464
|
+
if (this._driftHistory.length > 100) {
|
|
465
|
+
this._driftHistory = this._driftHistory.slice(-100);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Determine trend from last 3 scores
|
|
469
|
+
const trend = this._calcTrend();
|
|
470
|
+
|
|
471
|
+
if (driftDetected) {
|
|
472
|
+
this._driftEvents++;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
// Build reason
|
|
476
|
+
let reason;
|
|
477
|
+
if (driftDetected) {
|
|
478
|
+
reason = `Conversation has drifted from purpose (drift: ${(driftScore * 100).toFixed(1)}%, ` +
|
|
479
|
+
`threshold: ${(this.driftThreshold * 100).toFixed(1)}%, trend: ${trend})`;
|
|
480
|
+
} else {
|
|
481
|
+
reason = `Conversation is on-topic (drift: ${(driftScore * 100).toFixed(1)}%, trend: ${trend})`;
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
// Fire callback
|
|
485
|
+
if (driftDetected && this.onDrift) {
|
|
486
|
+
try {
|
|
487
|
+
this.onDrift({
|
|
488
|
+
driftScore,
|
|
489
|
+
trend,
|
|
490
|
+
turnsSincePurpose,
|
|
491
|
+
topicShift,
|
|
492
|
+
message: message.substring(0, 200),
|
|
493
|
+
reason
|
|
494
|
+
});
|
|
495
|
+
} catch (e) {
|
|
496
|
+
console.error('[Agent Shield] onDrift callback error:', e.message);
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
return {
|
|
501
|
+
driftScore,
|
|
502
|
+
driftDetected,
|
|
503
|
+
trend,
|
|
504
|
+
turnsSincePurpose,
|
|
505
|
+
topicShift,
|
|
506
|
+
reason
|
|
507
|
+
};
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
/**
|
|
511
|
+
* Calculate drift trend from recent scores.
|
|
512
|
+
* @private
|
|
513
|
+
* @returns {'stable' | 'drifting' | 'recovering'}
|
|
514
|
+
*/
|
|
515
|
+
_calcTrend() {
|
|
516
|
+
const h = this._driftHistory;
|
|
517
|
+
if (h.length < 3) return 'stable';
|
|
518
|
+
|
|
519
|
+
const last3 = h.slice(-3);
|
|
520
|
+
const increasing = last3[0] < last3[1] && last3[1] < last3[2];
|
|
521
|
+
const decreasing = last3[0] > last3[1] && last3[1] > last3[2];
|
|
522
|
+
|
|
523
|
+
if (increasing) return 'drifting';
|
|
524
|
+
if (decreasing) return 'recovering';
|
|
525
|
+
return 'stable';
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
/**
|
|
529
|
+
* Get drift history.
|
|
530
|
+
* @returns {number[]} Array of drift scores
|
|
531
|
+
*/
|
|
532
|
+
getHistory() {
|
|
533
|
+
return [...this._driftHistory];
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
/**
|
|
537
|
+
* Reset the detector.
|
|
538
|
+
*/
|
|
539
|
+
reset() {
|
|
540
|
+
this._messages = [];
|
|
541
|
+
this._driftHistory = [];
|
|
542
|
+
this._totalMessages = 0;
|
|
543
|
+
this._driftEvents = 0;
|
|
544
|
+
this._topicShifts = 0;
|
|
545
|
+
console.log('[Agent Shield] GoalDriftDetector reset');
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
/**
|
|
549
|
+
* Get stats.
|
|
550
|
+
* @returns {object}
|
|
551
|
+
*/
|
|
552
|
+
getStats() {
|
|
553
|
+
const h = this._driftHistory;
|
|
554
|
+
const avgDrift = h.length > 0 ? h.reduce((a, b) => a + b, 0) / h.length : 0;
|
|
555
|
+
const maxDrift = h.length > 0 ? Math.max(...h) : 0;
|
|
556
|
+
|
|
557
|
+
return {
|
|
558
|
+
totalMessages: this._totalMessages,
|
|
559
|
+
messagesInWindow: Math.min(this._messages.length, this.windowSize),
|
|
560
|
+
driftEvents: this._driftEvents,
|
|
561
|
+
topicShifts: this._topicShifts,
|
|
562
|
+
averageDrift: avgDrift,
|
|
563
|
+
maxDrift,
|
|
564
|
+
currentTrend: this._calcTrend(),
|
|
565
|
+
historyLength: h.length
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
// =========================================================================
|
|
571
|
+
// TOOL SEQUENCE MODELER
|
|
572
|
+
// =========================================================================
|
|
573
|
+
|
|
574
|
+
/** Special token for the start of a tool sequence. */
|
|
575
|
+
const START_TOKEN = '__START__';
|
|
576
|
+
|
|
577
|
+
/**
|
|
578
|
+
* Learns normal tool call patterns using a Markov chain (bigram transitions)
|
|
579
|
+
* and flags anomalous sequences.
|
|
580
|
+
*/
|
|
581
|
+
class ToolSequenceModeler {
|
|
582
|
+
/**
|
|
583
|
+
* @param {object} [config]
|
|
584
|
+
* @param {number} [config.learningPeriod=50] - Tool calls before modeling starts
|
|
585
|
+
* @param {number} [config.anomalyThreshold=0.15] - Probability below this = anomaly
|
|
586
|
+
* @param {number} [config.maxChainLength=10] - Max sequence length to track
|
|
587
|
+
*/
|
|
588
|
+
constructor(config = {}) {
|
|
589
|
+
this.learningPeriod = config.learningPeriod || 50;
|
|
590
|
+
this.anomalyThreshold = typeof config.anomalyThreshold === 'number' ? config.anomalyThreshold : 0.15;
|
|
591
|
+
this.maxChainLength = config.maxChainLength || 10;
|
|
592
|
+
|
|
593
|
+
/** @type {Object<string, Object<string, number>>} Bigram counts: from -> to -> count */
|
|
594
|
+
this._transitions = {};
|
|
595
|
+
/** @type {string[]} Recent tool sequence */
|
|
596
|
+
this._sequence = [];
|
|
597
|
+
/** @type {number} Total tool calls recorded */
|
|
598
|
+
this._totalCalls = 0;
|
|
599
|
+
/** @type {number} Anomalies detected */
|
|
600
|
+
this._anomalyCount = 0;
|
|
601
|
+
/** @type {Object<string, number>} Tool call counts */
|
|
602
|
+
this._toolCounts = {};
|
|
603
|
+
|
|
604
|
+
console.log(`[Agent Shield] ToolSequenceModeler initialized ` +
|
|
605
|
+
`(learningPeriod=${this.learningPeriod}, anomalyThreshold=${this.anomalyThreshold})`);
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
/**
|
|
609
|
+
* Record a tool call and check if it's anomalous.
|
|
610
|
+
* @param {string} toolName
|
|
611
|
+
* @param {object} [context] - { args, userId, agentId }
|
|
612
|
+
* @returns {object} {
|
|
613
|
+
* allowed: bool,
|
|
614
|
+
* anomalyScore: number 0-1 (0=normal, 1=never seen),
|
|
615
|
+
* probability: number (transition probability from previous tool),
|
|
616
|
+
* isLearning: bool,
|
|
617
|
+
* reason: string
|
|
618
|
+
* }
|
|
619
|
+
*/
|
|
620
|
+
recordToolCall(toolName, context = {}) {
|
|
621
|
+
if (!toolName || typeof toolName !== 'string') {
|
|
622
|
+
return {
|
|
623
|
+
allowed: true,
|
|
624
|
+
anomalyScore: 0,
|
|
625
|
+
probability: 0,
|
|
626
|
+
isLearning: true,
|
|
627
|
+
reason: 'Invalid tool name'
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
this._totalCalls++;
|
|
632
|
+
this._toolCounts[toolName] = (this._toolCounts[toolName] || 0) + 1;
|
|
633
|
+
const isLearning = this._totalCalls <= this.learningPeriod;
|
|
634
|
+
|
|
635
|
+
// Determine the previous tool (or START_TOKEN)
|
|
636
|
+
const prevTool = this._sequence.length > 0
|
|
637
|
+
? this._sequence[this._sequence.length - 1]
|
|
638
|
+
: START_TOKEN;
|
|
639
|
+
|
|
640
|
+
// Record transition
|
|
641
|
+
if (!this._transitions[prevTool]) {
|
|
642
|
+
this._transitions[prevTool] = {};
|
|
643
|
+
}
|
|
644
|
+
this._transitions[prevTool][toolName] = (this._transitions[prevTool][toolName] || 0) + 1;
|
|
645
|
+
|
|
646
|
+
// Add to sequence, enforce maxChainLength
|
|
647
|
+
this._sequence.push(toolName);
|
|
648
|
+
if (this._sequence.length > this.maxChainLength) {
|
|
649
|
+
this._sequence.shift();
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
// During learning, always allow
|
|
653
|
+
if (isLearning) {
|
|
654
|
+
return {
|
|
655
|
+
allowed: true,
|
|
656
|
+
anomalyScore: 0,
|
|
657
|
+
probability: 1,
|
|
658
|
+
isLearning: true,
|
|
659
|
+
reason: `Learning mode (${this._totalCalls}/${this.learningPeriod})`
|
|
660
|
+
};
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
// Calculate transition probability
|
|
664
|
+
const probability = this._getTransitionProbability(prevTool, toolName);
|
|
665
|
+
const anomalyScore = 1 - probability;
|
|
666
|
+
const allowed = probability >= this.anomalyThreshold;
|
|
667
|
+
|
|
668
|
+
if (!allowed) {
|
|
669
|
+
this._anomalyCount++;
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
let reason;
|
|
673
|
+
if (allowed) {
|
|
674
|
+
reason = `Tool "${toolName}" after "${prevTool}" is normal (P=${probability.toFixed(3)})`;
|
|
675
|
+
} else {
|
|
676
|
+
reason = `Tool "${toolName}" after "${prevTool}" is anomalous ` +
|
|
677
|
+
`(P=${probability.toFixed(3)}, threshold=${this.anomalyThreshold})`;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
return { allowed, anomalyScore, probability, isLearning, reason };
|
|
681
|
+
}
|
|
682
|
+
|
|
683
|
+
/**
|
|
684
|
+
* Get transition probability P(to | from).
|
|
685
|
+
* @private
|
|
686
|
+
* @param {string} from
|
|
687
|
+
* @param {string} to
|
|
688
|
+
* @returns {number}
|
|
689
|
+
*/
|
|
690
|
+
_getTransitionProbability(from, to) {
|
|
691
|
+
const row = this._transitions[from];
|
|
692
|
+
if (!row) return 0;
|
|
693
|
+
|
|
694
|
+
const total = Object.values(row).reduce((a, b) => a + b, 0);
|
|
695
|
+
if (total === 0) return 0;
|
|
696
|
+
|
|
697
|
+
const count = row[to] || 0;
|
|
698
|
+
return count / total;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
/**
|
|
702
|
+
* Get the transition probability matrix.
|
|
703
|
+
* @returns {Object<string, Object<string, number>>} Normalized probabilities
|
|
704
|
+
*/
|
|
705
|
+
getTransitionMatrix() {
|
|
706
|
+
const matrix = {};
|
|
707
|
+
for (const [from, targets] of Object.entries(this._transitions)) {
|
|
708
|
+
const total = Object.values(targets).reduce((a, b) => a + b, 0);
|
|
709
|
+
matrix[from] = {};
|
|
710
|
+
for (const [to, count] of Object.entries(targets)) {
|
|
711
|
+
matrix[from][to] = total > 0 ? count / total : 0;
|
|
712
|
+
}
|
|
713
|
+
}
|
|
714
|
+
return matrix;
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Get the most common tool sequences (bigrams).
|
|
719
|
+
* @param {number} [topN=10] - Number of sequences to return
|
|
720
|
+
* @returns {Array<{ from: string, to: string, count: number, probability: number }>}
|
|
721
|
+
*/
|
|
722
|
+
getCommonSequences(topN = 10) {
|
|
723
|
+
const sequences = [];
|
|
724
|
+
for (const [from, targets] of Object.entries(this._transitions)) {
|
|
725
|
+
const total = Object.values(targets).reduce((a, b) => a + b, 0);
|
|
726
|
+
for (const [to, count] of Object.entries(targets)) {
|
|
727
|
+
sequences.push({
|
|
728
|
+
from,
|
|
729
|
+
to,
|
|
730
|
+
count,
|
|
731
|
+
probability: total > 0 ? count / total : 0
|
|
732
|
+
});
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
sequences.sort((a, b) => b.count - a.count);
|
|
736
|
+
return sequences.slice(0, topN);
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
/**
|
|
740
|
+
* Export the learned model for persistence.
|
|
741
|
+
* @returns {object}
|
|
742
|
+
*/
|
|
743
|
+
exportModel() {
|
|
744
|
+
return {
|
|
745
|
+
transitions: JSON.parse(JSON.stringify(this._transitions)),
|
|
746
|
+
toolCounts: { ...this._toolCounts },
|
|
747
|
+
totalCalls: this._totalCalls,
|
|
748
|
+
anomalyCount: this._anomalyCount,
|
|
749
|
+
learningPeriod: this.learningPeriod,
|
|
750
|
+
anomalyThreshold: this.anomalyThreshold,
|
|
751
|
+
exportedAt: new Date().toISOString()
|
|
752
|
+
};
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
/**
|
|
756
|
+
* Import a previously exported model.
|
|
757
|
+
* @param {object} data - Model data from exportModel()
|
|
758
|
+
*/
|
|
759
|
+
importModel(data) {
|
|
760
|
+
if (!data || typeof data !== 'object') {
|
|
761
|
+
throw new Error('[Agent Shield] Invalid model data');
|
|
762
|
+
}
|
|
763
|
+
if (data.transitions) {
|
|
764
|
+
this._transitions = JSON.parse(JSON.stringify(data.transitions));
|
|
765
|
+
}
|
|
766
|
+
if (data.toolCounts) {
|
|
767
|
+
this._toolCounts = { ...data.toolCounts };
|
|
768
|
+
}
|
|
769
|
+
if (typeof data.totalCalls === 'number') {
|
|
770
|
+
this._totalCalls = data.totalCalls;
|
|
771
|
+
}
|
|
772
|
+
if (typeof data.anomalyCount === 'number') {
|
|
773
|
+
this._anomalyCount = data.anomalyCount;
|
|
774
|
+
}
|
|
775
|
+
console.log(`[Agent Shield] ToolSequenceModeler model imported (${this._totalCalls} calls)`);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
/**
|
|
779
|
+
* Get modeler stats.
|
|
780
|
+
* @returns {object}
|
|
781
|
+
*/
|
|
782
|
+
getStats() {
|
|
783
|
+
const uniqueTools = Object.keys(this._toolCounts).length;
|
|
784
|
+
const transitionCount = Object.values(this._transitions)
|
|
785
|
+
.reduce((sum, targets) => sum + Object.keys(targets).length, 0);
|
|
786
|
+
|
|
787
|
+
return {
|
|
788
|
+
totalCalls: this._totalCalls,
|
|
789
|
+
uniqueTools,
|
|
790
|
+
transitionCount,
|
|
791
|
+
anomalyCount: this._anomalyCount,
|
|
792
|
+
isLearning: this._totalCalls <= this.learningPeriod,
|
|
793
|
+
learningProgress: Math.min(this._totalCalls / this.learningPeriod, 1),
|
|
794
|
+
toolCounts: { ...this._toolCounts }
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// =========================================================================
|
|
800
|
+
// EXPORTS
|
|
801
|
+
// =========================================================================
|
|
802
|
+
|
|
803
|
+
module.exports = {
|
|
804
|
+
AgentIntent,
|
|
805
|
+
GoalDriftDetector,
|
|
806
|
+
ToolSequenceModeler
|
|
807
|
+
};
|