agentshield-sdk 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +191 -0
- package/LICENSE +21 -0
- package/README.md +975 -0
- package/bin/agent-shield.js +680 -0
- package/package.json +118 -0
- package/src/adaptive.js +330 -0
- package/src/agent-protocol.js +998 -0
- package/src/alert-tuning.js +480 -0
- package/src/allowlist.js +603 -0
- package/src/audit-immutable.js +914 -0
- package/src/audit-streaming.js +469 -0
- package/src/badges.js +196 -0
- package/src/behavior-profiling.js +289 -0
- package/src/benchmark-harness.js +804 -0
- package/src/canary.js +271 -0
- package/src/certification.js +563 -0
- package/src/circuit-breaker.js +321 -0
- package/src/compliance.js +617 -0
- package/src/confidence-tuning.js +324 -0
- package/src/confused-deputy.js +624 -0
- package/src/context-scoring.js +360 -0
- package/src/conversation.js +494 -0
- package/src/cost-optimizer.js +1024 -0
- package/src/ctf.js +462 -0
- package/src/detector-core.js +1999 -0
- package/src/distributed.js +359 -0
- package/src/document-scanner.js +795 -0
- package/src/embedding.js +307 -0
- package/src/encoding.js +429 -0
- package/src/enterprise.js +405 -0
- package/src/errors.js +100 -0
- package/src/eu-ai-act.js +523 -0
- package/src/fuzzer.js +764 -0
- package/src/honeypot.js +328 -0
- package/src/i18n-patterns.js +523 -0
- package/src/index.js +430 -0
- package/src/integrations.js +528 -0
- package/src/llm-redteam.js +670 -0
- package/src/main.js +741 -0
- package/src/main.mjs +38 -0
- package/src/mcp-bridge.js +542 -0
- package/src/mcp-certification.js +846 -0
- package/src/mcp-sdk-integration.js +355 -0
- package/src/mcp-security-runtime.js +741 -0
- package/src/mcp-server.js +740 -0
- package/src/middleware.js +208 -0
- package/src/model-finetuning.js +884 -0
- package/src/model-fingerprint.js +1042 -0
- package/src/multi-agent-trust.js +453 -0
- package/src/multi-agent.js +404 -0
- package/src/multimodal.js +296 -0
- package/src/nist-mapping.js +505 -0
- package/src/observability.js +330 -0
- package/src/openclaw.js +450 -0
- package/src/otel.js +544 -0
- package/src/owasp-2025.js +483 -0
- package/src/pii.js +390 -0
- package/src/plugin-marketplace.js +628 -0
- package/src/plugin-system.js +349 -0
- package/src/policy-dsl.js +775 -0
- package/src/policy-extended.js +635 -0
- package/src/policy.js +443 -0
- package/src/presets.js +409 -0
- package/src/production.js +557 -0
- package/src/prompt-leakage.js +321 -0
- package/src/rag-vulnerability.js +579 -0
- package/src/redteam.js +475 -0
- package/src/response-handler.js +429 -0
- package/src/scanners.js +357 -0
- package/src/self-healing.js +363 -0
- package/src/semantic.js +339 -0
- package/src/shield-score.js +250 -0
- package/src/sso-saml.js +897 -0
- package/src/stream-scanner.js +806 -0
- package/src/testing.js +505 -0
- package/src/threat-encyclopedia.js +629 -0
- package/src/threat-intel-network.js +1017 -0
- package/src/token-analysis.js +467 -0
- package/src/tool-guard.js +412 -0
- package/src/tool-output-validator.js +354 -0
- package/src/utils.js +83 -0
- package/src/watermark.js +235 -0
- package/src/worker-scanner.js +601 -0
- package/types/index.d.ts +2088 -0
|
@@ -0,0 +1,1042 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Agent Shield — Model Fingerprinting & Supply Chain Detection
|
|
5
|
+
*
|
|
6
|
+
* Detect which LLM generated a response, useful for detecting supply chain
|
|
7
|
+
* attacks where a different model is swapped in. All analysis uses pure
|
|
8
|
+
* string/regex operations — no external NLP libraries.
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
// =========================================================================
|
|
12
|
+
// CONSTANTS & HEDGING / FORMAL / TRANSITION WORD LISTS
|
|
13
|
+
// =========================================================================
|
|
14
|
+
|
|
15
|
+
const HEDGING_WORDS = [
|
|
16
|
+
'perhaps', 'might', 'could', 'possibly', 'generally', 'likely',
|
|
17
|
+
'probably', 'may', 'seemingly', 'arguably', 'apparently', 'presumably',
|
|
18
|
+
'conceivably', 'potentially', 'typically', 'often', 'sometimes',
|
|
19
|
+
'it seems', 'it appears', 'tend to', 'in general'
|
|
20
|
+
];
|
|
21
|
+
|
|
22
|
+
const FORMAL_WORDS = [
|
|
23
|
+
'therefore', 'furthermore', 'consequently', 'nevertheless', 'moreover',
|
|
24
|
+
'accordingly', 'hereby', 'henceforth', 'wherein', 'subsequently',
|
|
25
|
+
'notwithstanding', 'thus', 'hence', 'pertaining', 'regarding',
|
|
26
|
+
'facilitate', 'utilize', 'implement', 'demonstrate', 'constitute'
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
const INFORMAL_WORDS = [
|
|
30
|
+
'gonna', 'wanna', 'gotta', 'kinda', 'sorta', 'yeah', 'nah', 'ok',
|
|
31
|
+
'cool', 'stuff', 'things', 'lots', 'pretty much', 'basically',
|
|
32
|
+
'honestly', 'actually', 'literally', 'awesome', 'super', 'totally'
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
const TRANSITION_PHRASES = [
|
|
36
|
+
'however', 'therefore', 'additionally', 'furthermore', 'moreover',
|
|
37
|
+
'consequently', 'nevertheless', 'in addition', 'on the other hand',
|
|
38
|
+
'as a result', 'in contrast', 'similarly', 'meanwhile', 'subsequently',
|
|
39
|
+
'in conclusion', 'for example', 'for instance', 'in particular',
|
|
40
|
+
'that said', 'having said that'
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const CONTRACTION_PATTERN = /\b(?:i'm|i've|i'll|i'd|we're|we've|we'll|we'd|they're|they've|they'll|they'd|you're|you've|you'll|you'd|he's|she's|it's|he'd|she'd|that's|there's|here's|who's|what's|can't|couldn't|won't|wouldn't|shouldn't|didn't|doesn't|don't|isn't|aren't|wasn't|weren't|hasn't|haven't|hadn't|let's|ain't)\b/gi;
|
|
44
|
+
|
|
45
|
+
const PASSIVE_PATTERN = /\b(?:was|were|been|being|is|are|am)\s+(?:\w+ly\s+)?(?:\w+ed|written|spoken|taken|given|made|done|shown|known|seen|found|built|sent|told|left|held|brought|kept|set|run|cut|put|read)\b/gi;
|
|
46
|
+
|
|
47
|
+
const EMOJI_PATTERN = /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{FE00}-\u{FE0F}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{200D}\u{20E3}]/gu;
|
|
48
|
+
|
|
49
|
+
// =========================================================================
|
|
50
|
+
// MODEL_SIGNATURES — Built-in approximate feature profiles
|
|
51
|
+
// =========================================================================
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Built-in approximate feature profiles for common LLMs.
|
|
55
|
+
* Each entry contains {mean, stddev} for every feature dimension.
|
|
56
|
+
* @type {Object<string, {mean: Object, stddev: Object}>}
|
|
57
|
+
*/
|
|
58
|
+
const MODEL_SIGNATURES = {
|
|
59
|
+
'gpt-4': {
|
|
60
|
+
mean: {
|
|
61
|
+
avg_sentence_length: 22,
|
|
62
|
+
vocabulary_richness: 0.62,
|
|
63
|
+
punctuation_density: 0.045,
|
|
64
|
+
avg_word_length: 5.2,
|
|
65
|
+
formality_score: 0.7,
|
|
66
|
+
hedging_frequency: 0.012,
|
|
67
|
+
bullet_point_usage: 0.03,
|
|
68
|
+
code_block_frequency: 0.005,
|
|
69
|
+
emoji_density: 0.0,
|
|
70
|
+
paragraph_count: 4,
|
|
71
|
+
capitalization_pattern: 0.03,
|
|
72
|
+
transition_words: 0.018,
|
|
73
|
+
question_frequency: 0.05,
|
|
74
|
+
contraction_usage: 0.4,
|
|
75
|
+
passive_voice_estimate: 0.08,
|
|
76
|
+
response_structure_code: 0
|
|
77
|
+
},
|
|
78
|
+
stddev: {
|
|
79
|
+
avg_sentence_length: 4,
|
|
80
|
+
vocabulary_richness: 0.08,
|
|
81
|
+
punctuation_density: 0.01,
|
|
82
|
+
avg_word_length: 0.5,
|
|
83
|
+
formality_score: 0.1,
|
|
84
|
+
hedging_frequency: 0.005,
|
|
85
|
+
bullet_point_usage: 0.02,
|
|
86
|
+
code_block_frequency: 0.005,
|
|
87
|
+
emoji_density: 0.001,
|
|
88
|
+
paragraph_count: 2,
|
|
89
|
+
capitalization_pattern: 0.01,
|
|
90
|
+
transition_words: 0.006,
|
|
91
|
+
question_frequency: 0.03,
|
|
92
|
+
contraction_usage: 0.3,
|
|
93
|
+
passive_voice_estimate: 0.04,
|
|
94
|
+
response_structure_code: 0.1
|
|
95
|
+
}
|
|
96
|
+
},
|
|
97
|
+
'gpt-3.5': {
|
|
98
|
+
mean: {
|
|
99
|
+
avg_sentence_length: 16,
|
|
100
|
+
vocabulary_richness: 0.55,
|
|
101
|
+
punctuation_density: 0.04,
|
|
102
|
+
avg_word_length: 4.8,
|
|
103
|
+
formality_score: 0.45,
|
|
104
|
+
hedging_frequency: 0.008,
|
|
105
|
+
bullet_point_usage: 0.04,
|
|
106
|
+
code_block_frequency: 0.006,
|
|
107
|
+
emoji_density: 0.5,
|
|
108
|
+
paragraph_count: 3,
|
|
109
|
+
capitalization_pattern: 0.025,
|
|
110
|
+
transition_words: 0.01,
|
|
111
|
+
question_frequency: 0.07,
|
|
112
|
+
contraction_usage: 1.2,
|
|
113
|
+
passive_voice_estimate: 0.06,
|
|
114
|
+
response_structure_code: 0
|
|
115
|
+
},
|
|
116
|
+
stddev: {
|
|
117
|
+
avg_sentence_length: 5,
|
|
118
|
+
vocabulary_richness: 0.1,
|
|
119
|
+
punctuation_density: 0.012,
|
|
120
|
+
avg_word_length: 0.6,
|
|
121
|
+
formality_score: 0.12,
|
|
122
|
+
hedging_frequency: 0.004,
|
|
123
|
+
bullet_point_usage: 0.03,
|
|
124
|
+
code_block_frequency: 0.005,
|
|
125
|
+
emoji_density: 0.5,
|
|
126
|
+
paragraph_count: 2,
|
|
127
|
+
capitalization_pattern: 0.01,
|
|
128
|
+
transition_words: 0.005,
|
|
129
|
+
question_frequency: 0.04,
|
|
130
|
+
contraction_usage: 0.5,
|
|
131
|
+
passive_voice_estimate: 0.03,
|
|
132
|
+
response_structure_code: 0.1
|
|
133
|
+
}
|
|
134
|
+
},
|
|
135
|
+
'claude': {
|
|
136
|
+
mean: {
|
|
137
|
+
avg_sentence_length: 19,
|
|
138
|
+
vocabulary_richness: 0.64,
|
|
139
|
+
punctuation_density: 0.05,
|
|
140
|
+
avg_word_length: 5.1,
|
|
141
|
+
formality_score: 0.72,
|
|
142
|
+
hedging_frequency: 0.02,
|
|
143
|
+
bullet_point_usage: 0.035,
|
|
144
|
+
code_block_frequency: 0.004,
|
|
145
|
+
emoji_density: 0.0,
|
|
146
|
+
paragraph_count: 4,
|
|
147
|
+
capitalization_pattern: 0.028,
|
|
148
|
+
transition_words: 0.015,
|
|
149
|
+
question_frequency: 0.04,
|
|
150
|
+
contraction_usage: 0.6,
|
|
151
|
+
passive_voice_estimate: 0.07,
|
|
152
|
+
response_structure_code: 0
|
|
153
|
+
},
|
|
154
|
+
stddev: {
|
|
155
|
+
avg_sentence_length: 4,
|
|
156
|
+
vocabulary_richness: 0.07,
|
|
157
|
+
punctuation_density: 0.01,
|
|
158
|
+
avg_word_length: 0.4,
|
|
159
|
+
formality_score: 0.08,
|
|
160
|
+
hedging_frequency: 0.008,
|
|
161
|
+
bullet_point_usage: 0.02,
|
|
162
|
+
code_block_frequency: 0.004,
|
|
163
|
+
emoji_density: 0.001,
|
|
164
|
+
paragraph_count: 2,
|
|
165
|
+
capitalization_pattern: 0.008,
|
|
166
|
+
transition_words: 0.005,
|
|
167
|
+
question_frequency: 0.03,
|
|
168
|
+
contraction_usage: 0.4,
|
|
169
|
+
passive_voice_estimate: 0.03,
|
|
170
|
+
response_structure_code: 0.1
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
'llama': {
|
|
174
|
+
mean: {
|
|
175
|
+
avg_sentence_length: 17,
|
|
176
|
+
vocabulary_richness: 0.52,
|
|
177
|
+
punctuation_density: 0.038,
|
|
178
|
+
avg_word_length: 4.7,
|
|
179
|
+
formality_score: 0.4,
|
|
180
|
+
hedging_frequency: 0.006,
|
|
181
|
+
bullet_point_usage: 0.025,
|
|
182
|
+
code_block_frequency: 0.007,
|
|
183
|
+
emoji_density: 0.2,
|
|
184
|
+
paragraph_count: 3,
|
|
185
|
+
capitalization_pattern: 0.03,
|
|
186
|
+
transition_words: 0.008,
|
|
187
|
+
question_frequency: 0.06,
|
|
188
|
+
contraction_usage: 1.0,
|
|
189
|
+
passive_voice_estimate: 0.05,
|
|
190
|
+
response_structure_code: 0
|
|
191
|
+
},
|
|
192
|
+
stddev: {
|
|
193
|
+
avg_sentence_length: 6,
|
|
194
|
+
vocabulary_richness: 0.12,
|
|
195
|
+
punctuation_density: 0.015,
|
|
196
|
+
avg_word_length: 0.7,
|
|
197
|
+
formality_score: 0.15,
|
|
198
|
+
hedging_frequency: 0.004,
|
|
199
|
+
bullet_point_usage: 0.02,
|
|
200
|
+
code_block_frequency: 0.006,
|
|
201
|
+
emoji_density: 0.3,
|
|
202
|
+
paragraph_count: 2,
|
|
203
|
+
capitalization_pattern: 0.012,
|
|
204
|
+
transition_words: 0.005,
|
|
205
|
+
question_frequency: 0.04,
|
|
206
|
+
contraction_usage: 0.6,
|
|
207
|
+
passive_voice_estimate: 0.03,
|
|
208
|
+
response_structure_code: 0.1
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
'mistral': {
|
|
212
|
+
mean: {
|
|
213
|
+
avg_sentence_length: 15,
|
|
214
|
+
vocabulary_richness: 0.58,
|
|
215
|
+
punctuation_density: 0.042,
|
|
216
|
+
avg_word_length: 4.9,
|
|
217
|
+
formality_score: 0.55,
|
|
218
|
+
hedging_frequency: 0.007,
|
|
219
|
+
bullet_point_usage: 0.02,
|
|
220
|
+
code_block_frequency: 0.005,
|
|
221
|
+
emoji_density: 0.1,
|
|
222
|
+
paragraph_count: 3,
|
|
223
|
+
capitalization_pattern: 0.027,
|
|
224
|
+
transition_words: 0.01,
|
|
225
|
+
question_frequency: 0.04,
|
|
226
|
+
contraction_usage: 0.8,
|
|
227
|
+
passive_voice_estimate: 0.06,
|
|
228
|
+
response_structure_code: 0
|
|
229
|
+
},
|
|
230
|
+
stddev: {
|
|
231
|
+
avg_sentence_length: 4,
|
|
232
|
+
vocabulary_richness: 0.09,
|
|
233
|
+
punctuation_density: 0.011,
|
|
234
|
+
avg_word_length: 0.5,
|
|
235
|
+
formality_score: 0.1,
|
|
236
|
+
hedging_frequency: 0.004,
|
|
237
|
+
bullet_point_usage: 0.015,
|
|
238
|
+
code_block_frequency: 0.005,
|
|
239
|
+
emoji_density: 0.2,
|
|
240
|
+
paragraph_count: 2,
|
|
241
|
+
capitalization_pattern: 0.01,
|
|
242
|
+
transition_words: 0.005,
|
|
243
|
+
question_frequency: 0.03,
|
|
244
|
+
contraction_usage: 0.5,
|
|
245
|
+
passive_voice_estimate: 0.03,
|
|
246
|
+
response_structure_code: 0.1
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
};
|
|
250
|
+
|
|
251
|
+
// =========================================================================
|
|
252
|
+
// RESPONSE ANALYZER
|
|
253
|
+
// =========================================================================
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Extracts stylistic features from text for model fingerprinting.
|
|
257
|
+
*/
|
|
258
|
+
class ResponseAnalyzer {
|
|
259
|
+
constructor() {
|
|
260
|
+
// Pre-compile hedging word regexes for hot-path performance
|
|
261
|
+
this._hedgingRegexes = HEDGING_WORDS.map(h =>
|
|
262
|
+
new RegExp('\\b' + h.replace(/\s+/g, '\\s+') + '\\b', 'gi')
|
|
263
|
+
);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
/**
|
|
267
|
+
* Analyze text and return a feature vector describing its style.
|
|
268
|
+
*
|
|
269
|
+
* @param {string} text - The text to analyze.
|
|
270
|
+
* @returns {object} Feature vector with stylistic measurements.
|
|
271
|
+
*/
|
|
272
|
+
analyze(text) {
|
|
273
|
+
if (!text || typeof text !== 'string' || text.trim().length === 0) {
|
|
274
|
+
return this._emptyFeatures();
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const sentences = this._splitSentences(text);
|
|
278
|
+
const words = this._extractWords(text);
|
|
279
|
+
const totalChars = text.length;
|
|
280
|
+
|
|
281
|
+
return {
|
|
282
|
+
avg_sentence_length: this._avgSentenceLength(sentences),
|
|
283
|
+
vocabulary_richness: this._vocabularyRichness(words),
|
|
284
|
+
punctuation_density: this._punctuationDensity(text, totalChars),
|
|
285
|
+
avg_word_length: this._avgWordLength(words),
|
|
286
|
+
formality_score: this._formalityScore(words),
|
|
287
|
+
hedging_frequency: this._hedgingFrequency(text, words.length),
|
|
288
|
+
bullet_point_usage: this._bulletPointUsage(text),
|
|
289
|
+
code_block_frequency: this._codeBlockFrequency(text, totalChars),
|
|
290
|
+
emoji_density: this._emojiDensity(text, totalChars),
|
|
291
|
+
paragraph_count: this._paragraphCount(text),
|
|
292
|
+
capitalization_pattern: this._capitalizationPattern(text),
|
|
293
|
+
transition_words: this._transitionWordFrequency(text, words.length),
|
|
294
|
+
question_frequency: this._questionFrequency(sentences),
|
|
295
|
+
contraction_usage: this._contractionUsage(text, words.length),
|
|
296
|
+
passive_voice_estimate: this._passiveVoiceEstimate(text, sentences.length),
|
|
297
|
+
response_structure: this._responseStructure(text)
|
|
298
|
+
};
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
/**
|
|
302
|
+
* Returns a zeroed-out feature vector.
|
|
303
|
+
* @returns {object}
|
|
304
|
+
*/
|
|
305
|
+
_emptyFeatures() {
|
|
306
|
+
return {
|
|
307
|
+
avg_sentence_length: 0,
|
|
308
|
+
vocabulary_richness: 0,
|
|
309
|
+
punctuation_density: 0,
|
|
310
|
+
avg_word_length: 0,
|
|
311
|
+
formality_score: 0,
|
|
312
|
+
hedging_frequency: 0,
|
|
313
|
+
bullet_point_usage: 0,
|
|
314
|
+
code_block_frequency: 0,
|
|
315
|
+
emoji_density: 0,
|
|
316
|
+
paragraph_count: 0,
|
|
317
|
+
capitalization_pattern: 0,
|
|
318
|
+
transition_words: 0,
|
|
319
|
+
question_frequency: 0,
|
|
320
|
+
contraction_usage: 0,
|
|
321
|
+
passive_voice_estimate: 0,
|
|
322
|
+
response_structure: 'prose'
|
|
323
|
+
};
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
/**
|
|
327
|
+
* Split text into sentences.
|
|
328
|
+
* @param {string} text
|
|
329
|
+
* @returns {string[]}
|
|
330
|
+
*/
|
|
331
|
+
_splitSentences(text) {
|
|
332
|
+
const raw = text.split(/[.!?]+/).map(s => s.trim()).filter(s => s.length > 0);
|
|
333
|
+
return raw.length > 0 ? raw : [text.trim()];
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
/**
|
|
337
|
+
* Extract words from text (lowercased).
|
|
338
|
+
* @param {string} text
|
|
339
|
+
* @returns {string[]}
|
|
340
|
+
*/
|
|
341
|
+
_extractWords(text) {
|
|
342
|
+
const matches = text.match(/[a-zA-Z']+/g);
|
|
343
|
+
return matches ? matches.map(w => w.toLowerCase()) : [];
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/** @returns {number} */
|
|
347
|
+
_avgSentenceLength(sentences) {
|
|
348
|
+
if (sentences.length === 0) return 0;
|
|
349
|
+
const totalWords = sentences.reduce((sum, s) => {
|
|
350
|
+
const words = s.match(/\S+/g);
|
|
351
|
+
return sum + (words ? words.length : 0);
|
|
352
|
+
}, 0);
|
|
353
|
+
return totalWords / sentences.length;
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
/** @returns {number} */
|
|
357
|
+
_vocabularyRichness(words) {
|
|
358
|
+
if (words.length === 0) return 0;
|
|
359
|
+
const unique = new Set(words);
|
|
360
|
+
return unique.size / words.length;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/** @returns {number} */
|
|
364
|
+
_punctuationDensity(text, totalChars) {
|
|
365
|
+
if (totalChars === 0) return 0;
|
|
366
|
+
const punctuation = text.match(/[.,;:!?'"()\[\]{}\-—–…]/g);
|
|
367
|
+
return punctuation ? punctuation.length / totalChars : 0;
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
/** @returns {number} */
|
|
371
|
+
_avgWordLength(words) {
|
|
372
|
+
if (words.length === 0) return 0;
|
|
373
|
+
const totalLen = words.reduce((sum, w) => sum + w.length, 0);
|
|
374
|
+
return totalLen / words.length;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
/** @returns {number} */
|
|
378
|
+
_formalityScore(words) {
|
|
379
|
+
if (words.length === 0) return 0;
|
|
380
|
+
const text = words.join(' ');
|
|
381
|
+
let formalCount = 0;
|
|
382
|
+
let informalCount = 0;
|
|
383
|
+
for (const w of FORMAL_WORDS) {
|
|
384
|
+
if (text.includes(w)) formalCount++;
|
|
385
|
+
}
|
|
386
|
+
for (const w of INFORMAL_WORDS) {
|
|
387
|
+
if (text.includes(w)) informalCount++;
|
|
388
|
+
}
|
|
389
|
+
const total = formalCount + informalCount;
|
|
390
|
+
if (total === 0) return 0.5;
|
|
391
|
+
return formalCount / total;
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
/** @returns {number} */
|
|
395
|
+
_hedgingFrequency(text, wordCount) {
|
|
396
|
+
if (wordCount === 0) return 0;
|
|
397
|
+
const lower = text.toLowerCase();
|
|
398
|
+
let count = 0;
|
|
399
|
+
for (const regex of this._hedgingRegexes) {
|
|
400
|
+
regex.lastIndex = 0;
|
|
401
|
+
const matches = lower.match(regex);
|
|
402
|
+
if (matches) count += matches.length;
|
|
403
|
+
}
|
|
404
|
+
return count / wordCount;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
/** @returns {number} */
|
|
408
|
+
_bulletPointUsage(text) {
|
|
409
|
+
const lines = text.split('\n');
|
|
410
|
+
if (lines.length === 0) return 0;
|
|
411
|
+
const bulletLines = lines.filter(l => /^\s*[-*•]\s/.test(l) || /^\s*\d+[.)]\s/.test(l));
|
|
412
|
+
return bulletLines.length / lines.length;
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
/** @returns {number} */
|
|
416
|
+
_codeBlockFrequency(text, totalChars) {
|
|
417
|
+
if (totalChars === 0) return 0;
|
|
418
|
+
const backtickBlocks = text.match(/```[\s\S]*?```/g) || [];
|
|
419
|
+
const inlineCode = text.match(/`[^`]+`/g) || [];
|
|
420
|
+
const codeChars = backtickBlocks.reduce((s, b) => s + b.length, 0)
|
|
421
|
+
+ inlineCode.reduce((s, b) => s + b.length, 0);
|
|
422
|
+
return codeChars / totalChars;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/** @returns {number} */
|
|
426
|
+
_emojiDensity(text, totalChars) {
|
|
427
|
+
if (totalChars === 0) return 0;
|
|
428
|
+
const emojis = text.match(EMOJI_PATTERN);
|
|
429
|
+
const count = emojis ? emojis.length : 0;
|
|
430
|
+
return (count / totalChars) * 1000;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
/** @returns {number} */
|
|
434
|
+
_paragraphCount(text) {
|
|
435
|
+
const paragraphs = text.split(/\n\s*\n/).filter(p => p.trim().length > 0);
|
|
436
|
+
return Math.max(paragraphs.length, 1);
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
/** @returns {number} */
|
|
440
|
+
_capitalizationPattern(text) {
|
|
441
|
+
const letters = text.match(/[a-zA-Z]/g);
|
|
442
|
+
if (!letters || letters.length === 0) return 0;
|
|
443
|
+
const upper = letters.filter(c => c === c.toUpperCase());
|
|
444
|
+
return upper.length / letters.length;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/** @returns {number} */
|
|
448
|
+
_transitionWordFrequency(text, wordCount) {
|
|
449
|
+
if (wordCount === 0) return 0;
|
|
450
|
+
const lower = text.toLowerCase();
|
|
451
|
+
let count = 0;
|
|
452
|
+
for (const phrase of TRANSITION_PHRASES) {
|
|
453
|
+
const regex = new RegExp('\\b' + phrase.replace(/\s+/g, '\\s+') + '\\b', 'gi');
|
|
454
|
+
const matches = lower.match(regex);
|
|
455
|
+
if (matches) count += matches.length;
|
|
456
|
+
}
|
|
457
|
+
return count / wordCount;
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
/** @returns {number} */
|
|
461
|
+
_questionFrequency(sentences) {
|
|
462
|
+
if (sentences.length === 0) return 0;
|
|
463
|
+
// Count based on original question marks in text
|
|
464
|
+
let questions = 0;
|
|
465
|
+
for (const s of sentences) {
|
|
466
|
+
if (s.includes('?') || /^(?:what|who|where|when|why|how|is|are|do|does|can|could|would|should)\b/i.test(s.trim())) {
|
|
467
|
+
questions++;
|
|
468
|
+
}
|
|
469
|
+
}
|
|
470
|
+
return questions / sentences.length;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/** @returns {number} */
|
|
474
|
+
_contractionUsage(text, wordCount) {
|
|
475
|
+
if (wordCount === 0) return 0;
|
|
476
|
+
const matches = text.match(CONTRACTION_PATTERN);
|
|
477
|
+
const count = matches ? matches.length : 0;
|
|
478
|
+
return (count / wordCount) * 100;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
/** @returns {number} */
|
|
482
|
+
_passiveVoiceEstimate(text, sentenceCount) {
|
|
483
|
+
if (sentenceCount === 0) return 0;
|
|
484
|
+
const matches = text.match(PASSIVE_PATTERN);
|
|
485
|
+
const count = matches ? matches.length : 0;
|
|
486
|
+
return count / sentenceCount;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
/**
|
|
490
|
+
* Determine the overall response structure.
|
|
491
|
+
* @param {string} text
|
|
492
|
+
* @returns {'prose'|'list'|'mixed'|'code'}
|
|
493
|
+
*/
|
|
494
|
+
_responseStructure(text) {
|
|
495
|
+
const lines = text.split('\n').filter(l => l.trim().length > 0);
|
|
496
|
+
if (lines.length === 0) return 'prose';
|
|
497
|
+
|
|
498
|
+
const bulletLines = lines.filter(l => /^\s*[-*•]\s/.test(l) || /^\s*\d+[.)]\s/.test(l)).length;
|
|
499
|
+
const codeBlocks = (text.match(/```/g) || []).length / 2;
|
|
500
|
+
const codeLines = lines.filter(l => /^\s{4,}\S/.test(l) || /^```/.test(l)).length;
|
|
501
|
+
|
|
502
|
+
const bulletRatio = bulletLines / lines.length;
|
|
503
|
+
const codeRatio = (codeLines + codeBlocks * 3) / lines.length;
|
|
504
|
+
|
|
505
|
+
if (codeRatio > 0.5) return 'code';
|
|
506
|
+
if (bulletRatio > 0.5) return 'list';
|
|
507
|
+
if (bulletRatio > 0.15 || codeRatio > 0.15) return 'mixed';
|
|
508
|
+
return 'prose';
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// =========================================================================
|
|
513
|
+
// STYLE PROFILE
|
|
514
|
+
// =========================================================================
|
|
515
|
+
|
|
516
|
+
/** Feature keys used for numeric comparison (excludes response_structure). */
|
|
517
|
+
const NUMERIC_FEATURE_KEYS = [
|
|
518
|
+
'avg_sentence_length', 'vocabulary_richness', 'punctuation_density',
|
|
519
|
+
'avg_word_length', 'formality_score', 'hedging_frequency',
|
|
520
|
+
'bullet_point_usage', 'code_block_frequency', 'emoji_density',
|
|
521
|
+
'paragraph_count', 'capitalization_pattern', 'transition_words',
|
|
522
|
+
'question_frequency', 'contraction_usage', 'passive_voice_estimate'
|
|
523
|
+
];
|
|
524
|
+
|
|
525
|
+
/**
|
|
526
|
+
* Statistical profile for a model's writing style.
|
|
527
|
+
*/
|
|
528
|
+
class StyleProfile {
|
|
529
|
+
/**
|
|
530
|
+
* @param {string} modelName - Name of the model this profile represents.
|
|
531
|
+
*/
|
|
532
|
+
constructor(modelName) {
|
|
533
|
+
/** @type {string} */
|
|
534
|
+
this.modelName = modelName;
|
|
535
|
+
/** @type {object[]} */
|
|
536
|
+
this._samples = [];
|
|
537
|
+
/** @type {object|null} */
|
|
538
|
+
this._cachedProfile = null;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
/**
|
|
542
|
+
* Add a feature vector sample to this profile.
|
|
543
|
+
* @param {object} features - Feature vector from ResponseAnalyzer.analyze().
|
|
544
|
+
*/
|
|
545
|
+
addSample(features) {
|
|
546
|
+
this._samples.push({ ...features });
|
|
547
|
+
this._cachedProfile = null;
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
/**
|
|
551
|
+
* Compute the profile: mean and stddev for each numeric feature.
|
|
552
|
+
* @returns {object} { mean: Object, stddev: Object }
|
|
553
|
+
*/
|
|
554
|
+
getProfile() {
|
|
555
|
+
if (this._cachedProfile) return this._cachedProfile;
|
|
556
|
+
if (this._samples.length === 0) {
|
|
557
|
+
const empty = {};
|
|
558
|
+
for (const key of NUMERIC_FEATURE_KEYS) empty[key] = 0;
|
|
559
|
+
this._cachedProfile = { mean: { ...empty }, stddev: { ...empty } };
|
|
560
|
+
return this._cachedProfile;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
const mean = {};
|
|
564
|
+
const stddev = {};
|
|
565
|
+
const n = this._samples.length;
|
|
566
|
+
|
|
567
|
+
for (const key of NUMERIC_FEATURE_KEYS) {
|
|
568
|
+
const values = this._samples.map(s => typeof s[key] === 'number' ? s[key] : 0);
|
|
569
|
+
const m = values.reduce((a, b) => a + b, 0) / n;
|
|
570
|
+
mean[key] = m;
|
|
571
|
+
const variance = values.reduce((a, v) => a + (v - m) * (v - m), 0) / n;
|
|
572
|
+
stddev[key] = Math.sqrt(variance);
|
|
573
|
+
}
|
|
574
|
+
|
|
575
|
+
this._cachedProfile = { mean, stddev };
|
|
576
|
+
return this._cachedProfile;
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
/**
|
|
580
|
+
* Cosine similarity between input features and this profile's mean.
|
|
581
|
+
* @param {object} features - Feature vector.
|
|
582
|
+
* @returns {number} Similarity in [0, 1].
|
|
583
|
+
*/
|
|
584
|
+
similarity(features) {
|
|
585
|
+
const profile = this.getProfile();
|
|
586
|
+
return _cosineSimilarity(
|
|
587
|
+
NUMERIC_FEATURE_KEYS.map(k => features[k] || 0),
|
|
588
|
+
NUMERIC_FEATURE_KEYS.map(k => profile.mean[k] || 0)
|
|
589
|
+
);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
/**
|
|
593
|
+
* Euclidean distance between input features and this profile's mean.
|
|
594
|
+
* @param {object} features - Feature vector.
|
|
595
|
+
* @returns {number} Distance (>= 0).
|
|
596
|
+
*/
|
|
597
|
+
distance(features) {
|
|
598
|
+
const profile = this.getProfile();
|
|
599
|
+
return _euclideanDistance(
|
|
600
|
+
NUMERIC_FEATURE_KEYS.map(k => features[k] || 0),
|
|
601
|
+
NUMERIC_FEATURE_KEYS.map(k => profile.mean[k] || 0)
|
|
602
|
+
);
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
/**
|
|
606
|
+
* @returns {number} Number of samples added to this profile.
|
|
607
|
+
*/
|
|
608
|
+
getSampleCount() {
|
|
609
|
+
return this._samples.length;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
/**
|
|
613
|
+
* Returns true if the profile has enough samples for reliable comparison.
|
|
614
|
+
* @returns {boolean}
|
|
615
|
+
*/
|
|
616
|
+
isStable() {
|
|
617
|
+
return this._samples.length >= 5;
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
/**
|
|
621
|
+
* Serialize the profile to a JSON-compatible object.
|
|
622
|
+
* @returns {object}
|
|
623
|
+
*/
|
|
624
|
+
export() {
|
|
625
|
+
return {
|
|
626
|
+
modelName: this.modelName,
|
|
627
|
+
samples: this._samples.slice(),
|
|
628
|
+
profile: this.getProfile()
|
|
629
|
+
};
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
/**
|
|
633
|
+
* Deserialize a profile from a previously exported object.
|
|
634
|
+
* @param {object} json - Exported profile data.
|
|
635
|
+
* @returns {StyleProfile}
|
|
636
|
+
*/
|
|
637
|
+
static import(json) {
|
|
638
|
+
const profile = new StyleProfile(json.modelName || 'unknown');
|
|
639
|
+
if (Array.isArray(json.samples)) {
|
|
640
|
+
for (const sample of json.samples) {
|
|
641
|
+
profile.addSample(sample);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
return profile;
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
// =========================================================================
|
|
649
|
+
// FINGERPRINT DATABASE
|
|
650
|
+
// =========================================================================
|
|
651
|
+
|
|
652
|
+
/**
|
|
653
|
+
* Store of known model profiles with identification capabilities.
|
|
654
|
+
*/
|
|
655
|
+
class FingerprintDatabase {
|
|
656
|
+
constructor() {
|
|
657
|
+
/** @type {Map<string, StyleProfile>} */
|
|
658
|
+
this._profiles = new Map();
|
|
659
|
+
this._loadBuiltInProfiles();
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
/**
|
|
663
|
+
* Load built-in model signatures as StyleProfile instances.
|
|
664
|
+
* @private
|
|
665
|
+
*/
|
|
666
|
+
_loadBuiltInProfiles() {
|
|
667
|
+
for (const [name, sig] of Object.entries(MODEL_SIGNATURES)) {
|
|
668
|
+
const profile = new StyleProfile(name);
|
|
669
|
+
// Inject the signature directly as a synthetic sample matching the mean
|
|
670
|
+
profile._samples.push({ ...sig.mean });
|
|
671
|
+
profile._cachedProfile = { mean: { ...sig.mean }, stddev: { ...sig.stddev } };
|
|
672
|
+
this._profiles.set(name, profile);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
/**
|
|
677
|
+
* Store a profile for a model name.
|
|
678
|
+
* @param {string} modelName
|
|
679
|
+
* @param {StyleProfile} profile
|
|
680
|
+
*/
|
|
681
|
+
addProfile(modelName, profile) {
|
|
682
|
+
this._profiles.set(modelName, profile);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
/**
|
|
686
|
+
* Identify the most likely model that produced the given features.
|
|
687
|
+
* Returns a ranked list of models by similarity.
|
|
688
|
+
* @param {object} features - Feature vector.
|
|
689
|
+
* @returns {Array<{model: string, similarity: number}>}
|
|
690
|
+
*/
|
|
691
|
+
identify(features) {
|
|
692
|
+
if (!features || typeof features !== 'object') {
|
|
693
|
+
return this.listModels().map(m => ({ model: m, similarity: 0 }));
|
|
694
|
+
}
|
|
695
|
+
const results = [];
|
|
696
|
+
for (const [name, profile] of this._profiles) {
|
|
697
|
+
results.push({
|
|
698
|
+
model: name,
|
|
699
|
+
similarity: profile.similarity(features)
|
|
700
|
+
});
|
|
701
|
+
}
|
|
702
|
+
results.sort((a, b) => b.similarity - a.similarity);
|
|
703
|
+
return results;
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
/**
|
|
707
|
+
* Get the single best matching model.
|
|
708
|
+
* @param {object} features - Feature vector.
|
|
709
|
+
* @returns {{model: string, similarity: number}}
|
|
710
|
+
*/
|
|
711
|
+
getClosestMatch(features) {
|
|
712
|
+
const ranked = this.identify(features);
|
|
713
|
+
return ranked.length > 0 ? ranked[0] : { model: 'unknown', similarity: 0 };
|
|
714
|
+
}
|
|
715
|
+
|
|
716
|
+
/**
|
|
717
|
+
* List all registered model names.
|
|
718
|
+
* @returns {string[]}
|
|
719
|
+
*/
|
|
720
|
+
listModels() {
|
|
721
|
+
return Array.from(this._profiles.keys());
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
/**
|
|
725
|
+
* Remove a model profile.
|
|
726
|
+
* @param {string} name
|
|
727
|
+
*/
|
|
728
|
+
removeModel(name) {
|
|
729
|
+
this._profiles.delete(name);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
/**
|
|
733
|
+
* Serialize the entire database.
|
|
734
|
+
* @returns {object}
|
|
735
|
+
*/
|
|
736
|
+
export() {
|
|
737
|
+
const data = {};
|
|
738
|
+
for (const [name, profile] of this._profiles) {
|
|
739
|
+
data[name] = profile.export();
|
|
740
|
+
}
|
|
741
|
+
return data;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
/**
|
|
745
|
+
* Deserialize an entire database from exported data.
|
|
746
|
+
* @param {object} json - Previously exported database.
|
|
747
|
+
*/
|
|
748
|
+
import(json) {
|
|
749
|
+
for (const [name, profileData] of Object.entries(json)) {
|
|
750
|
+
this._profiles.set(name, StyleProfile.import(profileData));
|
|
751
|
+
}
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
// =========================================================================
|
|
756
|
+
// MODEL FINGERPRINTER
|
|
757
|
+
// =========================================================================
|
|
758
|
+
|
|
759
|
+
/**
|
|
760
|
+
* Main fingerprinting engine — analyzes text to identify which LLM produced it.
|
|
761
|
+
*/
|
|
762
|
+
class ModelFingerprinter {
|
|
763
|
+
/**
|
|
764
|
+
* @param {object} [config]
|
|
765
|
+
* @param {string[]} [config.knownModels] - List of model names to consider.
|
|
766
|
+
* @param {number} [config.sensitivityThreshold] - Minimum confidence to report a match (0-1).
|
|
767
|
+
* @param {number} [config.minSampleSize] - Minimum samples before a profile is usable.
|
|
768
|
+
*/
|
|
769
|
+
constructor(config = {}) {
|
|
770
|
+
this.config = {
|
|
771
|
+
knownModels: config.knownModels || [],
|
|
772
|
+
sensitivityThreshold: config.sensitivityThreshold ?? 0.7,
|
|
773
|
+
minSampleSize: config.minSampleSize ?? 5
|
|
774
|
+
};
|
|
775
|
+
this._analyzer = new ResponseAnalyzer();
|
|
776
|
+
this._database = new FingerprintDatabase();
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Analyze text and return the likely model that generated it.
|
|
781
|
+
*
|
|
782
|
+
* @param {string} text - The text to fingerprint.
|
|
783
|
+
* @returns {{likely_model: string, confidence: number, features: object, alternatives: Array<{model: string, similarity: number}>}}
|
|
784
|
+
*/
|
|
785
|
+
fingerprint(text) {
|
|
786
|
+
const features = this._analyzer.analyze(text);
|
|
787
|
+
const ranked = this._database.identify(features);
|
|
788
|
+
const best = ranked[0] || { model: 'unknown', similarity: 0 };
|
|
789
|
+
|
|
790
|
+
return {
|
|
791
|
+
likely_model: best.similarity >= this.config.sensitivityThreshold ? best.model : 'unknown',
|
|
792
|
+
confidence: best.similarity,
|
|
793
|
+
features,
|
|
794
|
+
alternatives: ranked.slice(1, 4)
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
|
|
798
|
+
/**
|
|
799
|
+
* Compare text against an expected model and return match information.
|
|
800
|
+
*
|
|
801
|
+
* @param {string} text - The text to check.
|
|
802
|
+
* @param {string} expectedModel - The model name expected.
|
|
803
|
+
* @returns {{match: boolean, confidence: number, drift_score: number}}
|
|
804
|
+
*/
|
|
805
|
+
compareTo(text, expectedModel) {
|
|
806
|
+
const features = this._analyzer.analyze(text);
|
|
807
|
+
const ranked = this._database.identify(features);
|
|
808
|
+
const expected = ranked.find(r => r.model === expectedModel);
|
|
809
|
+
const best = ranked[0] || { model: 'unknown', similarity: 0 };
|
|
810
|
+
const expectedSimilarity = expected ? expected.similarity : 0;
|
|
811
|
+
|
|
812
|
+
return {
|
|
813
|
+
match: best.model === expectedModel && expectedSimilarity >= this.config.sensitivityThreshold,
|
|
814
|
+
confidence: expectedSimilarity,
|
|
815
|
+
drift_score: 1 - expectedSimilarity
|
|
816
|
+
};
|
|
817
|
+
}
|
|
818
|
+
|
|
819
|
+
/**
|
|
820
|
+
* Build a StyleProfile from an array of sample texts.
|
|
821
|
+
*
|
|
822
|
+
* @param {string[]} texts - Sample texts from the model.
|
|
823
|
+
* @param {string} modelName - Name for the profile.
|
|
824
|
+
* @returns {StyleProfile}
|
|
825
|
+
*/
|
|
826
|
+
buildProfile(texts, modelName) {
|
|
827
|
+
const profile = new StyleProfile(modelName);
|
|
828
|
+
for (const text of texts) {
|
|
829
|
+
const features = this._analyzer.analyze(text);
|
|
830
|
+
profile.addSample(features);
|
|
831
|
+
}
|
|
832
|
+
return profile;
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
/**
|
|
836
|
+
* Register a known model profile in the database.
|
|
837
|
+
*
|
|
838
|
+
* @param {string} name - Model name.
|
|
839
|
+
* @param {StyleProfile} profile - The profile to register.
|
|
840
|
+
*/
|
|
841
|
+
registerModel(name, profile) {
|
|
842
|
+
this._database.addProfile(name, profile);
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
/**
|
|
846
|
+
* Detect if the model generating responses has changed from a baseline.
|
|
847
|
+
*
|
|
848
|
+
* @param {string} currentText - The current response text.
|
|
849
|
+
* @param {StyleProfile} baselineProfile - The expected model's profile.
|
|
850
|
+
* @returns {{swapDetected: boolean, similarity: number, drift_score: number}}
|
|
851
|
+
*/
|
|
852
|
+
detectSwap(currentText, baselineProfile) {
|
|
853
|
+
const features = this._analyzer.analyze(currentText);
|
|
854
|
+
const similarity = baselineProfile.similarity(features);
|
|
855
|
+
const driftScore = 1 - similarity;
|
|
856
|
+
|
|
857
|
+
return {
|
|
858
|
+
swapDetected: similarity < this.config.sensitivityThreshold,
|
|
859
|
+
similarity,
|
|
860
|
+
drift_score: driftScore
|
|
861
|
+
};
|
|
862
|
+
}
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
// =========================================================================
|
|
866
|
+
// SUPPLY CHAIN DETECTOR
|
|
867
|
+
// =========================================================================
|
|
868
|
+
|
|
869
|
+
/**
|
|
870
|
+
* Monitors for model substitution over time by tracking stylistic drift.
|
|
871
|
+
*/
|
|
872
|
+
class SupplyChainDetector {
|
|
873
|
+
/**
|
|
874
|
+
* @param {string} expectedModel - The model name expected to be in use.
|
|
875
|
+
* @param {object} [config]
|
|
876
|
+
* @param {number} [config.driftThreshold] - Maximum acceptable drift (0-1).
|
|
877
|
+
* @param {number} [config.windowSize] - Number of recent responses to consider.
|
|
878
|
+
* @param {boolean} [config.alertOnDrift] - Whether to generate alerts on drift.
|
|
879
|
+
*/
|
|
880
|
+
constructor(expectedModel, config = {}) {
|
|
881
|
+
this.expectedModel = expectedModel;
|
|
882
|
+
this.config = {
|
|
883
|
+
driftThreshold: config.driftThreshold ?? 0.3,
|
|
884
|
+
windowSize: config.windowSize ?? 20,
|
|
885
|
+
alertOnDrift: config.alertOnDrift !== false
|
|
886
|
+
};
|
|
887
|
+
this._analyzer = new ResponseAnalyzer();
|
|
888
|
+
this._database = new FingerprintDatabase();
|
|
889
|
+
this._history = [];
|
|
890
|
+
this._alerts = [];
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
/**
|
|
894
|
+
* Analyze a response and check for drift from the expected model.
|
|
895
|
+
*
|
|
896
|
+
* @param {string} text - The response text to analyze.
|
|
897
|
+
* @returns {{drift_score: number, is_anomalous: boolean, identified_as: string}}
|
|
898
|
+
*/
|
|
899
|
+
ingestResponse(text) {
|
|
900
|
+
const features = this._analyzer.analyze(text);
|
|
901
|
+
const ranked = this._database.identify(features);
|
|
902
|
+
const expected = ranked.find(r => r.model === this.expectedModel);
|
|
903
|
+
const best = ranked[0] || { model: 'unknown', similarity: 0 };
|
|
904
|
+
const expectedSimilarity = expected ? expected.similarity : 0;
|
|
905
|
+
const driftScore = 1 - expectedSimilarity;
|
|
906
|
+
|
|
907
|
+
const entry = {
|
|
908
|
+
timestamp: Date.now(),
|
|
909
|
+
drift_score: driftScore,
|
|
910
|
+
identified_as: best.model,
|
|
911
|
+
expected_similarity: expectedSimilarity
|
|
912
|
+
};
|
|
913
|
+
|
|
914
|
+
this._history.push(entry);
|
|
915
|
+
|
|
916
|
+
// Keep only the window
|
|
917
|
+
if (this._history.length > this.config.windowSize) {
|
|
918
|
+
this._history = this._history.slice(-this.config.windowSize);
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
const isAnomalous = driftScore > this.config.driftThreshold;
|
|
922
|
+
|
|
923
|
+
if (isAnomalous && this.config.alertOnDrift) {
|
|
924
|
+
this._alerts.push({
|
|
925
|
+
timestamp: entry.timestamp,
|
|
926
|
+
drift_score: driftScore,
|
|
927
|
+
expected: this.expectedModel,
|
|
928
|
+
detected: best.model,
|
|
929
|
+
message: `[Agent Shield] Model drift detected: expected ${this.expectedModel}, response resembles ${best.model} (drift: ${driftScore.toFixed(3)})`
|
|
930
|
+
});
|
|
931
|
+
console.log(`[Agent Shield] Model drift alert: expected=${this.expectedModel} detected=${best.model} drift=${driftScore.toFixed(3)}`);
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
return {
|
|
935
|
+
drift_score: driftScore,
|
|
936
|
+
is_anomalous: isAnomalous,
|
|
937
|
+
identified_as: best.model
|
|
938
|
+
};
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
/**
|
|
942
|
+
* Get the current average drift score over the window.
|
|
943
|
+
*
|
|
944
|
+
* @returns {number} Drift from expected model (0 = identical, 1 = completely different).
|
|
945
|
+
*/
|
|
946
|
+
getDriftScore() {
|
|
947
|
+
if (this._history.length === 0) return 0;
|
|
948
|
+
const total = this._history.reduce((sum, e) => sum + e.drift_score, 0);
|
|
949
|
+
return total / this._history.length;
|
|
950
|
+
}
|
|
951
|
+
|
|
952
|
+
/**
|
|
953
|
+
* Returns true if the average drift exceeds the threshold.
|
|
954
|
+
*
|
|
955
|
+
* @returns {boolean}
|
|
956
|
+
*/
|
|
957
|
+
isCompromised() {
|
|
958
|
+
return this.getDriftScore() > this.config.driftThreshold;
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
/**
|
|
962
|
+
* Get all drift alerts generated so far.
|
|
963
|
+
*
|
|
964
|
+
* @returns {Array<{timestamp: number, drift_score: number, expected: string, detected: string, message: string}>}
|
|
965
|
+
*/
|
|
966
|
+
getAlerts() {
|
|
967
|
+
return this._alerts.slice();
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
/**
|
|
971
|
+
* Get drift score over time.
|
|
972
|
+
*
|
|
973
|
+
* @returns {Array<{timestamp: number, drift_score: number, identified_as: string}>}
|
|
974
|
+
*/
|
|
975
|
+
getTimeline() {
|
|
976
|
+
return this._history.map(e => ({
|
|
977
|
+
timestamp: e.timestamp,
|
|
978
|
+
drift_score: e.drift_score,
|
|
979
|
+
identified_as: e.identified_as
|
|
980
|
+
}));
|
|
981
|
+
}
|
|
982
|
+
|
|
983
|
+
/**
|
|
984
|
+
* Reset the detector state.
|
|
985
|
+
*/
|
|
986
|
+
reset() {
|
|
987
|
+
this._history = [];
|
|
988
|
+
this._alerts = [];
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
// =========================================================================
|
|
993
|
+
// MATH HELPERS
|
|
994
|
+
// =========================================================================
|
|
995
|
+
|
|
996
|
+
/**
|
|
997
|
+
* Compute cosine similarity between two numeric vectors.
|
|
998
|
+
* @param {number[]} a
|
|
999
|
+
* @param {number[]} b
|
|
1000
|
+
* @returns {number} Similarity in [0, 1].
|
|
1001
|
+
*/
|
|
1002
|
+
function _cosineSimilarity(a, b) {
|
|
1003
|
+
let dot = 0, magA = 0, magB = 0;
|
|
1004
|
+
for (let i = 0; i < a.length; i++) {
|
|
1005
|
+
dot += a[i] * b[i];
|
|
1006
|
+
magA += a[i] * a[i];
|
|
1007
|
+
magB += b[i] * b[i];
|
|
1008
|
+
}
|
|
1009
|
+
magA = Math.sqrt(magA);
|
|
1010
|
+
magB = Math.sqrt(magB);
|
|
1011
|
+
if (magA === 0 || magB === 0) return 0;
|
|
1012
|
+
return Math.max(0, Math.min(1, dot / (magA * magB)));
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
/**
|
|
1016
|
+
* Compute Euclidean distance between two numeric vectors.
|
|
1017
|
+
* @param {number[]} a
|
|
1018
|
+
* @param {number[]} b
|
|
1019
|
+
* @returns {number}
|
|
1020
|
+
*/
|
|
1021
|
+
function _euclideanDistance(a, b) {
|
|
1022
|
+
let sum = 0;
|
|
1023
|
+
for (let i = 0; i < a.length; i++) {
|
|
1024
|
+
const diff = a[i] - b[i];
|
|
1025
|
+
sum += diff * diff;
|
|
1026
|
+
}
|
|
1027
|
+
return Math.sqrt(sum);
|
|
1028
|
+
}
|
|
1029
|
+
|
|
1030
|
+
// =========================================================================
|
|
1031
|
+
// EXPORTS
|
|
1032
|
+
// =========================================================================
|
|
1033
|
+
|
|
1034
|
+
module.exports = {
|
|
1035
|
+
ModelFingerprinter,
|
|
1036
|
+
ResponseAnalyzer,
|
|
1037
|
+
StyleProfile,
|
|
1038
|
+
FingerprintDatabase,
|
|
1039
|
+
FingerPrintDatabase: FingerprintDatabase,
|
|
1040
|
+
SupplyChainDetector,
|
|
1041
|
+
MODEL_SIGNATURES
|
|
1042
|
+
};
|