couchloop-eq-mcp 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -5
- package/dist/clients/shrinkChatClient.d.ts +10 -0
- package/dist/clients/shrinkChatClient.d.ts.map +1 -1
- package/dist/clients/shrinkChatClient.js +4 -0
- package/dist/clients/shrinkChatClient.js.map +1 -1
- package/dist/db/client.d.ts.map +1 -1
- package/dist/db/client.js +5 -3
- package/dist/db/client.js.map +1 -1
- package/dist/db/schema.d.ts +363 -0
- package/dist/db/schema.d.ts.map +1 -1
- package/dist/db/schema.js +47 -0
- package/dist/db/schema.js.map +1 -1
- package/dist/governance/config.d.ts +66 -0
- package/dist/governance/config.d.ts.map +1 -0
- package/dist/governance/config.js +238 -0
- package/dist/governance/config.js.map +1 -0
- package/dist/governance/detectors/hallucination.d.ts +61 -0
- package/dist/governance/detectors/hallucination.d.ts.map +1 -0
- package/dist/governance/detectors/hallucination.js +338 -0
- package/dist/governance/detectors/hallucination.js.map +1 -0
- package/dist/governance/detectors/inconsistency.d.ts +99 -0
- package/dist/governance/detectors/inconsistency.d.ts.map +1 -0
- package/dist/governance/detectors/inconsistency.js +548 -0
- package/dist/governance/detectors/inconsistency.js.map +1 -0
- package/dist/governance/detectors/toneDrift.d.ts +63 -0
- package/dist/governance/detectors/toneDrift.d.ts.map +1 -0
- package/dist/governance/detectors/toneDrift.js +421 -0
- package/dist/governance/detectors/toneDrift.js.map +1 -0
- package/dist/governance/detectors/unsafeReasoning.d.ts +54 -0
- package/dist/governance/detectors/unsafeReasoning.d.ts.map +1 -0
- package/dist/governance/detectors/unsafeReasoning.js +473 -0
- package/dist/governance/detectors/unsafeReasoning.js.map +1 -0
- package/dist/governance/evaluationEngine.d.ts +112 -0
- package/dist/governance/evaluationEngine.d.ts.map +1 -0
- package/dist/governance/evaluationEngine.js +265 -0
- package/dist/governance/evaluationEngine.js.map +1 -0
- package/dist/governance/intervention.d.ts +81 -0
- package/dist/governance/intervention.d.ts.map +1 -0
- package/dist/governance/intervention.js +405 -0
- package/dist/governance/intervention.js.map +1 -0
- package/dist/server/chatgpt-mcp.d.ts +10 -0
- package/dist/server/chatgpt-mcp.d.ts.map +1 -0
- package/dist/server/chatgpt-mcp.js +233 -0
- package/dist/server/chatgpt-mcp.js.map +1 -0
- package/dist/server/index.d.ts.map +1 -1
- package/dist/server/index.js +94 -3
- package/dist/server/index.js.map +1 -1
- package/dist/server/middleware/localNetworkAccess.d.ts +16 -0
- package/dist/server/middleware/localNetworkAccess.d.ts.map +1 -0
- package/dist/server/middleware/localNetworkAccess.js +97 -0
- package/dist/server/middleware/localNetworkAccess.js.map +1 -0
- package/dist/server/sse.d.ts +16 -0
- package/dist/server/sse.d.ts.map +1 -0
- package/dist/server/sse.js +215 -0
- package/dist/server/sse.js.map +1 -0
- package/dist/tools/checkpoint.d.ts.map +1 -1
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +4 -2
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/insight.d.ts +1 -0
- package/dist/tools/insight.d.ts.map +1 -1
- package/dist/tools/sendMessage-complex-backup.d.ts +6 -0
- package/dist/tools/sendMessage-complex-backup.d.ts.map +1 -0
- package/dist/tools/sendMessage-complex-backup.js +545 -0
- package/dist/tools/sendMessage-complex-backup.js.map +1 -0
- package/dist/tools/sendMessage-revised.d.ts +11 -0
- package/dist/tools/sendMessage-revised.d.ts.map +1 -0
- package/dist/tools/sendMessage-revised.js +429 -0
- package/dist/tools/sendMessage-revised.js.map +1 -0
- package/dist/tools/sendMessage-truly-simple.d.ts +8 -0
- package/dist/tools/sendMessage-truly-simple.d.ts.map +1 -0
- package/dist/tools/sendMessage-truly-simple.js +299 -0
- package/dist/tools/sendMessage-truly-simple.js.map +1 -0
- package/dist/tools/sendMessage.d.ts +4 -2
- package/dist/tools/sendMessage.d.ts.map +1 -1
- package/dist/tools/sendMessage.js +240 -186
- package/dist/tools/sendMessage.js.map +1 -1
- package/dist/tools/session.d.ts.map +1 -1
- package/dist/tools/session.js +35 -29
- package/dist/tools/session.js.map +1 -1
- package/dist/types/auth.d.ts +9 -1
- package/dist/types/auth.d.ts.map +1 -1
- package/dist/types/auth.js +37 -10
- package/dist/types/auth.js.map +1 -1
- package/dist/types/insight.d.ts +10 -0
- package/dist/types/insight.d.ts.map +1 -1
- package/dist/types/session.d.ts +10 -0
- package/dist/types/session.d.ts.map +1 -1
- package/package.json +10 -5
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CouchLoop Behavioral Governance Layer - Unsafe Reasoning Detector
|
|
3
|
+
*
|
|
4
|
+
* Detects harmful advice, moralizing/shaming language, clinical overreach,
|
|
5
|
+
* and other potentially dangerous reasoning patterns in LLM responses
|
|
6
|
+
*/
|
|
7
|
+
export class UnsafeReasoningDetector {
|
|
8
|
+
config;
|
|
9
|
+
unsafePatterns;
|
|
10
|
+
clinicalOverreachPatterns;
|
|
11
|
+
shamingPatterns;
|
|
12
|
+
harmfulAdvicePatterns;
|
|
13
|
+
constructor(config) {
|
|
14
|
+
this.config = config;
|
|
15
|
+
// Initialize comprehensive unsafe patterns
|
|
16
|
+
this.unsafePatterns = [
|
|
17
|
+
// Critical harm patterns
|
|
18
|
+
{
|
|
19
|
+
pattern: /end (your |their )?(life|it all)|kill (yourself|themselves)/i,
|
|
20
|
+
category: 'crisis',
|
|
21
|
+
severity: 'critical',
|
|
22
|
+
description: 'Suicide-related content'
|
|
23
|
+
},
|
|
24
|
+
{
|
|
25
|
+
pattern: /harm (yourself|themselves|others)/i,
|
|
26
|
+
category: 'crisis',
|
|
27
|
+
severity: 'critical',
|
|
28
|
+
description: 'Self-harm or violence'
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
pattern: /not worth living|better off (dead|without you)/i,
|
|
32
|
+
category: 'crisis',
|
|
33
|
+
severity: 'critical',
|
|
34
|
+
description: 'Life-devaluing statements'
|
|
35
|
+
},
|
|
36
|
+
// Medication and treatment interference
|
|
37
|
+
{
|
|
38
|
+
pattern: /stop taking (your |their )?(medication|meds|pills)/i,
|
|
39
|
+
category: 'clinical',
|
|
40
|
+
severity: 'critical',
|
|
41
|
+
description: 'Medication non-compliance advice'
|
|
42
|
+
},
|
|
43
|
+
{
|
|
44
|
+
pattern: /don't need (therapy|treatment|medication|help|a therapist|a doctor)/i,
|
|
45
|
+
category: 'clinical',
|
|
46
|
+
severity: 'high',
|
|
47
|
+
description: 'Discouraging professional help'
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
pattern: /(medication|therapy|treatment) (is|are) (harmful|dangerous|bad for you)/i,
|
|
51
|
+
category: 'clinical',
|
|
52
|
+
severity: 'high',
|
|
53
|
+
description: 'Anti-treatment messaging'
|
|
54
|
+
},
|
|
55
|
+
// Clinical overreach
|
|
56
|
+
{
|
|
57
|
+
pattern: /you have (depression|anxiety|PTSD|bipolar|schizophrenia|BPD|ADHD)/i,
|
|
58
|
+
category: 'clinical',
|
|
59
|
+
severity: 'high',
|
|
60
|
+
description: 'Unauthorized diagnosis'
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
pattern: /this (is|sounds like) (depression|anxiety|PTSD|bipolar|a mental illness)/i,
|
|
64
|
+
category: 'clinical',
|
|
65
|
+
severity: 'high',
|
|
66
|
+
description: 'Diagnostic language'
|
|
67
|
+
},
|
|
68
|
+
{
|
|
69
|
+
pattern: /(prescribe|recommend) (medication|drugs|pharmaceuticals)/i,
|
|
70
|
+
category: 'clinical',
|
|
71
|
+
severity: 'high',
|
|
72
|
+
description: 'Prescription advice'
|
|
73
|
+
},
|
|
74
|
+
// Harmful directive advice
|
|
75
|
+
{
|
|
76
|
+
pattern: /cut off (your |all )?(family|friends|support|everyone)/i,
|
|
77
|
+
category: 'isolation',
|
|
78
|
+
severity: 'high',
|
|
79
|
+
description: 'Encouraging isolation'
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
pattern: /don't tell (anyone|your therapist|your doctor|them)/i,
|
|
83
|
+
category: 'isolation',
|
|
84
|
+
severity: 'high',
|
|
85
|
+
description: 'Encouraging secrecy'
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
pattern: /ignore (what |everything )?(they|your therapist|your doctor|everyone) (said|says|told you)/i,
|
|
89
|
+
category: 'directive',
|
|
90
|
+
severity: 'high',
|
|
91
|
+
description: 'Undermining professional advice'
|
|
92
|
+
},
|
|
93
|
+
// Shaming and moralizing
|
|
94
|
+
{
|
|
95
|
+
pattern: /you('re| are) (weak|pathetic|worthless|stupid|a failure)/i,
|
|
96
|
+
category: 'shaming',
|
|
97
|
+
severity: 'high',
|
|
98
|
+
description: 'Direct shaming'
|
|
99
|
+
},
|
|
100
|
+
{
|
|
101
|
+
pattern: /should be ashamed|disgraceful|disappointing/i,
|
|
102
|
+
category: 'shaming',
|
|
103
|
+
severity: 'medium',
|
|
104
|
+
description: 'Shame-inducing language'
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
pattern: /your fault|you('re| are) to blame|you caused this/i,
|
|
108
|
+
category: 'shaming',
|
|
109
|
+
severity: 'medium',
|
|
110
|
+
description: 'Blame assignment'
|
|
111
|
+
},
|
|
112
|
+
// Dangerous certainty about outcomes
|
|
113
|
+
{
|
|
114
|
+
pattern: /will definitely (get worse|fail|not work)/i,
|
|
115
|
+
category: 'harm',
|
|
116
|
+
severity: 'medium',
|
|
117
|
+
description: 'Catastrophic predictions'
|
|
118
|
+
},
|
|
119
|
+
{
|
|
120
|
+
pattern: /no hope|hopeless|give up|no point/i,
|
|
121
|
+
category: 'harm',
|
|
122
|
+
severity: 'high',
|
|
123
|
+
description: 'Hope-destroying language'
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
pattern: /always be (alone|miserable|depressed|anxious)/i,
|
|
127
|
+
category: 'harm',
|
|
128
|
+
severity: 'medium',
|
|
129
|
+
description: 'Permanent negative predictions'
|
|
130
|
+
},
|
|
131
|
+
// Boundary violations
|
|
132
|
+
{
|
|
133
|
+
pattern: /I('ll| will) (always |forever )?be (here|available|waiting)/i,
|
|
134
|
+
category: 'directive',
|
|
135
|
+
severity: 'low',
|
|
136
|
+
description: 'Unrealistic availability promises'
|
|
137
|
+
},
|
|
138
|
+
{
|
|
139
|
+
pattern: /call me (anytime|day or night|24\/7)/i,
|
|
140
|
+
category: 'directive',
|
|
141
|
+
severity: 'medium',
|
|
142
|
+
description: 'Inappropriate availability'
|
|
143
|
+
},
|
|
144
|
+
// Minimizing serious issues
|
|
145
|
+
{
|
|
146
|
+
pattern: /just get over it|snap out of it|stop being dramatic/i,
|
|
147
|
+
category: 'shaming',
|
|
148
|
+
severity: 'medium',
|
|
149
|
+
description: 'Minimizing mental health struggles'
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
pattern: /everyone goes through|not a big deal|you're overreacting/i,
|
|
153
|
+
category: 'shaming',
|
|
154
|
+
severity: 'low',
|
|
155
|
+
description: 'Invalidating experiences'
|
|
156
|
+
}
|
|
157
|
+
];
|
|
158
|
+
// Additional pattern arrays for specific categories
|
|
159
|
+
this.clinicalOverreachPatterns = [
|
|
160
|
+
/based on (these |your )?symptoms/i,
|
|
161
|
+
/clinical (diagnosis|assessment|evaluation)/i,
|
|
162
|
+
/meets criteria for/i,
|
|
163
|
+
/differential diagnosis/i,
|
|
164
|
+
/prognosis is/i,
|
|
165
|
+
/treatment protocol/i,
|
|
166
|
+
/dosage should be/i,
|
|
167
|
+
/contraindicated/i,
|
|
168
|
+
/side effects include/i
|
|
169
|
+
];
|
|
170
|
+
this.shamingPatterns = [
|
|
171
|
+
/what's wrong with you/i,
|
|
172
|
+
/how could you/i,
|
|
173
|
+
/should know better/i,
|
|
174
|
+
/act your age/i,
|
|
175
|
+
/pull yourself together/i,
|
|
176
|
+
/man up|woman up/i,
|
|
177
|
+
/stop (being|acting) like a/i,
|
|
178
|
+
/real (men|women|adults) don't/i
|
|
179
|
+
];
|
|
180
|
+
this.harmfulAdvicePatterns = [
|
|
181
|
+
/drink (alcohol |more )?to cope/i,
|
|
182
|
+
/use (drugs|substances) to feel better/i,
|
|
183
|
+
/don't eat|stop eating/i,
|
|
184
|
+
/sleep it off/i,
|
|
185
|
+
/fight back physically/i,
|
|
186
|
+
/get revenge/i,
|
|
187
|
+
/teach them a lesson/i,
|
|
188
|
+
/show them who's boss/i
|
|
189
|
+
];
|
|
190
|
+
// Add custom patterns from config
|
|
191
|
+
if (config.patterns) {
|
|
192
|
+
config.patterns.forEach(p => {
|
|
193
|
+
this.unsafePatterns.push({
|
|
194
|
+
pattern: new RegExp(p, 'i'),
|
|
195
|
+
category: 'harm',
|
|
196
|
+
severity: 'medium',
|
|
197
|
+
description: 'Custom unsafe pattern'
|
|
198
|
+
});
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Main detection method
|
|
204
|
+
*/
|
|
205
|
+
async detect(response, context) {
|
|
206
|
+
const detectedPatterns = [];
|
|
207
|
+
let overallSeverity = 'low';
|
|
208
|
+
const detectionsByCategory = {
|
|
209
|
+
harm: 0,
|
|
210
|
+
clinical: 0,
|
|
211
|
+
directive: 0,
|
|
212
|
+
shaming: 0,
|
|
213
|
+
isolation: 0,
|
|
214
|
+
crisis: 0
|
|
215
|
+
};
|
|
216
|
+
// Check main unsafe patterns
|
|
217
|
+
for (const unsafePattern of this.unsafePatterns) {
|
|
218
|
+
const matches = response.match(unsafePattern.pattern);
|
|
219
|
+
if (matches) {
|
|
220
|
+
detectedPatterns.push(`${unsafePattern.description}: "${matches[0]}"`);
|
|
221
|
+
detectionsByCategory[unsafePattern.category] = (detectionsByCategory[unsafePattern.category] || 0) + 1;
|
|
222
|
+
// Update overall severity
|
|
223
|
+
if (this.compareSeverity(unsafePattern.severity, overallSeverity) > 0) {
|
|
224
|
+
overallSeverity = unsafePattern.severity;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
// Check clinical overreach
|
|
229
|
+
const clinicalIssues = this.detectClinicalOverreach(response);
|
|
230
|
+
if (clinicalIssues.length > 0) {
|
|
231
|
+
detectedPatterns.push(...clinicalIssues);
|
|
232
|
+
detectionsByCategory['clinical'] = (detectionsByCategory['clinical'] || 0) + clinicalIssues.length;
|
|
233
|
+
if (this.compareSeverity('high', overallSeverity) > 0) {
|
|
234
|
+
overallSeverity = 'high';
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
// Check shaming language
|
|
238
|
+
const shamingIssues = this.detectShaming(response);
|
|
239
|
+
if (shamingIssues.length > 0) {
|
|
240
|
+
detectedPatterns.push(...shamingIssues);
|
|
241
|
+
detectionsByCategory['shaming'] = (detectionsByCategory['shaming'] || 0) + shamingIssues.length;
|
|
242
|
+
}
|
|
243
|
+
// Check harmful advice
|
|
244
|
+
const harmfulAdvice = this.detectHarmfulAdvice(response);
|
|
245
|
+
if (harmfulAdvice.length > 0) {
|
|
246
|
+
detectedPatterns.push(...harmfulAdvice);
|
|
247
|
+
detectionsByCategory['harm'] = (detectionsByCategory['harm'] || 0) + harmfulAdvice.length;
|
|
248
|
+
}
|
|
249
|
+
// Context-aware checks
|
|
250
|
+
if (context) {
|
|
251
|
+
const contextualIssues = await this.performContextualChecks(response, context);
|
|
252
|
+
detectedPatterns.push(...contextualIssues);
|
|
253
|
+
}
|
|
254
|
+
// Calculate confidence based on severity and count
|
|
255
|
+
const confidence = this.calculateConfidence(overallSeverity, detectedPatterns.length, detectionsByCategory);
|
|
256
|
+
const detected = confidence >= this.config.threshold;
|
|
257
|
+
return {
|
|
258
|
+
detected,
|
|
259
|
+
confidence,
|
|
260
|
+
patterns: detectedPatterns,
|
|
261
|
+
details: {
|
|
262
|
+
severity: overallSeverity,
|
|
263
|
+
detectionsByCategory,
|
|
264
|
+
patternCount: detectedPatterns.length,
|
|
265
|
+
threshold: this.config.threshold
|
|
266
|
+
}
|
|
267
|
+
};
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Detect clinical overreach patterns
|
|
271
|
+
*/
|
|
272
|
+
detectClinicalOverreach(text) {
|
|
273
|
+
const detected = [];
|
|
274
|
+
for (const pattern of this.clinicalOverreachPatterns) {
|
|
275
|
+
const matches = text.match(pattern);
|
|
276
|
+
if (matches) {
|
|
277
|
+
detected.push(`Clinical overreach: "${matches[0]}"`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
// Check for diagnostic language
|
|
281
|
+
const diagnosticTerms = text.match(/\b(diagnos\w+|disorder|syndrome|condition|illness|disease|patholog\w+)\b/gi);
|
|
282
|
+
if (diagnosticTerms && diagnosticTerms.length >= 3) {
|
|
283
|
+
detected.push(`Clinical overreach: Multiple diagnostic terms (${diagnosticTerms.length})`);
|
|
284
|
+
}
|
|
285
|
+
// Check for treatment recommendations beyond scope
|
|
286
|
+
if (text.match(/I recommend .* medication/i) || text.match(/you need .* prescription/i)) {
|
|
287
|
+
detected.push('Clinical overreach: Medication recommendation');
|
|
288
|
+
}
|
|
289
|
+
// Check for medical advice
|
|
290
|
+
const medicalAdvicePatterns = [
|
|
291
|
+
/increase .* dosage/i,
|
|
292
|
+
/decrease .* dosage/i,
|
|
293
|
+
/switch to .* medication/i,
|
|
294
|
+
/combine .* with/i,
|
|
295
|
+
/taper off/i
|
|
296
|
+
];
|
|
297
|
+
for (const pattern of medicalAdvicePatterns) {
|
|
298
|
+
if (pattern.test(text)) {
|
|
299
|
+
detected.push('Clinical overreach: Medical dosage advice');
|
|
300
|
+
break;
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
return detected;
|
|
304
|
+
}
|
|
305
|
+
/**
|
|
306
|
+
* Detect shaming and moralizing language
|
|
307
|
+
*/
|
|
308
|
+
detectShaming(text) {
|
|
309
|
+
const detected = [];
|
|
310
|
+
for (const pattern of this.shamingPatterns) {
|
|
311
|
+
const matches = text.match(pattern);
|
|
312
|
+
if (matches) {
|
|
313
|
+
detected.push(`Shaming language: "${matches[0]}"`);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Check for victim-blaming
|
|
317
|
+
if (text.match(/brought this on yourself/i) || text.match(/asked for it/i)) {
|
|
318
|
+
detected.push('Shaming: Victim-blaming detected');
|
|
319
|
+
}
|
|
320
|
+
// Check for comparison-based shaming
|
|
321
|
+
if (text.match(/other people .* better/i) || text.match(/everyone else can/i)) {
|
|
322
|
+
detected.push('Shaming: Unfair comparison');
|
|
323
|
+
}
|
|
324
|
+
return detected;
|
|
325
|
+
}
|
|
326
|
+
/**
|
|
327
|
+
* Detect harmful advice patterns
|
|
328
|
+
*/
|
|
329
|
+
detectHarmfulAdvice(text) {
|
|
330
|
+
const detected = [];
|
|
331
|
+
for (const pattern of this.harmfulAdvicePatterns) {
|
|
332
|
+
const matches = text.match(pattern);
|
|
333
|
+
if (matches) {
|
|
334
|
+
detected.push(`Harmful advice: "${matches[0]}"`);
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
// Check for risky behavior encouragement
|
|
338
|
+
if (text.match(/take .* risk/i) && !text.match(/calculated|reasonable|managed/i)) {
|
|
339
|
+
detected.push('Harmful advice: Encouraging risky behavior');
|
|
340
|
+
}
|
|
341
|
+
// Check for isolation encouragement
|
|
342
|
+
if (text.match(/better off alone/i) || text.match(/don't need anyone/i)) {
|
|
343
|
+
detected.push('Harmful advice: Encouraging isolation');
|
|
344
|
+
}
|
|
345
|
+
// Check for discouraging help-seeking
|
|
346
|
+
if (text.match(/waste of (time|money).*therap/i) || text.match(/therap.* doesn't work/i)) {
|
|
347
|
+
detected.push('Harmful advice: Discouraging therapy');
|
|
348
|
+
}
|
|
349
|
+
return detected;
|
|
350
|
+
}
|
|
351
|
+
/**
|
|
352
|
+
* Perform context-aware safety checks
|
|
353
|
+
*/
|
|
354
|
+
async performContextualChecks(response, context) {
|
|
355
|
+
const issues = [];
|
|
356
|
+
// Check if user mentioned crisis and response is inappropriately light
|
|
357
|
+
if (context.metadata?.crisisHistory) {
|
|
358
|
+
if (response.match(/everything will be fine|don't worry|no big deal/i)) {
|
|
359
|
+
issues.push('Contextual issue: Minimizing crisis situation');
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
// Check for escalating harmful suggestions
|
|
363
|
+
if (context.conversationHistory) {
|
|
364
|
+
const prevMessages = context.conversationHistory
|
|
365
|
+
.filter(m => m.role === 'assistant')
|
|
366
|
+
.map(m => m.content)
|
|
367
|
+
.join(' ');
|
|
368
|
+
// Check if advice is getting progressively more extreme
|
|
369
|
+
const prevHarmLevel = this.assessHarmLevel(prevMessages);
|
|
370
|
+
const currentHarmLevel = this.assessHarmLevel(response);
|
|
371
|
+
if (currentHarmLevel > prevHarmLevel + 0.3) {
|
|
372
|
+
issues.push('Contextual issue: Escalating harmful suggestions');
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
// Check for contradicting safety advice given earlier
|
|
376
|
+
if (context.conversationHistory) {
|
|
377
|
+
for (const msg of context.conversationHistory) {
|
|
378
|
+
if (msg.role === 'assistant') {
|
|
379
|
+
if (msg.content.includes('seek help') && response.includes("don't need help")) {
|
|
380
|
+
issues.push('Contextual issue: Contradicting earlier safety advice');
|
|
381
|
+
}
|
|
382
|
+
if (msg.content.includes('talk to someone') && response.includes('keep this to yourself')) {
|
|
383
|
+
issues.push('Contextual issue: Contradicting earlier advice to seek support');
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
return issues;
|
|
389
|
+
}
|
|
390
|
+
/**
|
|
391
|
+
* Assess overall harm level of text (0-1 scale)
|
|
392
|
+
*/
|
|
393
|
+
assessHarmLevel(text) {
|
|
394
|
+
let harmScore = 0;
|
|
395
|
+
let checkCount = 0;
|
|
396
|
+
// Count harmful patterns
|
|
397
|
+
for (const pattern of this.unsafePatterns) {
|
|
398
|
+
if (pattern.pattern.test(text)) {
|
|
399
|
+
switch (pattern.severity) {
|
|
400
|
+
case 'critical':
|
|
401
|
+
harmScore += 1.0;
|
|
402
|
+
break;
|
|
403
|
+
case 'high':
|
|
404
|
+
harmScore += 0.7;
|
|
405
|
+
break;
|
|
406
|
+
case 'medium':
|
|
407
|
+
harmScore += 0.4;
|
|
408
|
+
break;
|
|
409
|
+
case 'low':
|
|
410
|
+
harmScore += 0.2;
|
|
411
|
+
break;
|
|
412
|
+
}
|
|
413
|
+
checkCount++;
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
// Normalize by number of checks
|
|
417
|
+
return checkCount > 0 ? harmScore / checkCount : 0;
|
|
418
|
+
}
|
|
419
|
+
/**
|
|
420
|
+
* Calculate confidence score based on detections
|
|
421
|
+
*/
|
|
422
|
+
calculateConfidence(severity, patternCount, detectionsByCategory) {
|
|
423
|
+
let confidence = 0;
|
|
424
|
+
// Base confidence on severity
|
|
425
|
+
switch (severity) {
|
|
426
|
+
case 'critical':
|
|
427
|
+
confidence = 0.9;
|
|
428
|
+
break;
|
|
429
|
+
case 'high':
|
|
430
|
+
confidence = 0.7;
|
|
431
|
+
break;
|
|
432
|
+
case 'medium':
|
|
433
|
+
confidence = 0.5;
|
|
434
|
+
break;
|
|
435
|
+
case 'low':
|
|
436
|
+
confidence = 0.3;
|
|
437
|
+
break;
|
|
438
|
+
}
|
|
439
|
+
// Adjust for pattern count
|
|
440
|
+
if (patternCount > 1) {
|
|
441
|
+
confidence += Math.min(0.3, patternCount * 0.05);
|
|
442
|
+
}
|
|
443
|
+
// Boost for crisis category
|
|
444
|
+
if ((detectionsByCategory['crisis'] || 0) > 0) {
|
|
445
|
+
confidence = Math.max(confidence, 0.95);
|
|
446
|
+
}
|
|
447
|
+
// Boost for clinical overreach
|
|
448
|
+
if ((detectionsByCategory['clinical'] || 0) > 0) {
|
|
449
|
+
confidence += 0.1;
|
|
450
|
+
}
|
|
451
|
+
// Boost for multiple categories
|
|
452
|
+
const categoriesDetected = Object.values(detectionsByCategory).filter(v => v > 0).length;
|
|
453
|
+
if (categoriesDetected > 2) {
|
|
454
|
+
confidence += 0.1;
|
|
455
|
+
}
|
|
456
|
+
return Math.min(confidence, 1.0);
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Compare severity levels
|
|
460
|
+
*/
|
|
461
|
+
compareSeverity(sev1, sev2) {
|
|
462
|
+
const severityMap = { low: 1, medium: 2, high: 3, critical: 4 };
|
|
463
|
+
return severityMap[sev1] - severityMap[sev2];
|
|
464
|
+
}
|
|
465
|
+
/**
|
|
466
|
+
* Update configuration
|
|
467
|
+
*/
|
|
468
|
+
updateConfig(config) {
|
|
469
|
+
this.config = config;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
export default UnsafeReasoningDetector;
|
|
473
|
+
//# sourceMappingURL=unsafeReasoning.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"unsafeReasoning.js","sourceRoot":"","sources":["../../../src/governance/detectors/unsafeReasoning.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAYH,MAAM,OAAO,uBAAuB;IAC1B,MAAM,CAAkB;IACxB,cAAc,CAAkB;IAChC,yBAAyB,CAAW;IACpC,eAAe,CAAW;IAC1B,qBAAqB,CAAW;IAExC,YAAY,MAAuB;QACjC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;QAErB,2CAA2C;QAC3C,IAAI,CAAC,cAAc,GAAG;YACpB,yBAAyB;YACzB;gBACE,OAAO,EAAE,8DAA8D;gBACvE,QAAQ,EAAE,QAAQ;gBAClB,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,yBAAyB;aACvC;YACD;gBACE,OAAO,EAAE,oCAAoC;gBAC7C,QAAQ,EAAE,QAAQ;gBAClB,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,uBAAuB;aACrC;YACD;gBACE,OAAO,EAAE,iDAAiD;gBAC1D,QAAQ,EAAE,QAAQ;gBAClB,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,2BAA2B;aACzC;YAED,wCAAwC;YACxC;gBACE,OAAO,EAAE,qDAAqD;gBAC9D,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,UAAU;gBACpB,WAAW,EAAE,kCAAkC;aAChD;YACD;gBACE,OAAO,EAAE,sEAAsE;gBAC/E,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,gCAAgC;aAC9C;YACD;gBACE,OAAO,EAAE,0EAA0E;gBACnF,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,0BAA0B;aACxC;YAED,qBAAqB;YACrB;gBACE,OAAO,EAAE,oEAAoE;gBAC7E,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,wBAAwB;aACtC;YACD;gBACE,OAAO,EAAE,2EAA2E;gBACpF,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,qBAAqB;aACnC;YACD;gBACE,OAAO,EAAE,2DAA2D;gBACpE,QAAQ,EAAE,UAAU;gBACpB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,qBAAqB;aACnC;YAED,2BAA2B;YAC3B;gBACE,OAAO,EAAE,yDAAyD;gBAClE,QAAQ,EAAE,WAAW;gBACrB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,uBAAuB;aACrC;YACD;gBACE,OAAO,EAAE,sDAAsD;gBAC/D,QAAQ,EAAE,WAAW;gBACrB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,qBAAqB;aACnC;YACD;gBACE,OAAO,EAAE,6FAA6F;gBACtG,QAAQ,EAAE,WAAW;gBACrB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,iCAAiC;aAC/C;YAED,yBAAyB;YACzB;gBACE,OAAO,EAAE,2DAA2D;gBACpE,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,gBAAgB;aAC9B;YACD;gBACE,OAAO,EAAE,8CAA8C;gBACvD,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,yBAAyB;aACvC;YACD;gBACE,OAAO,EAAE,oDAAoD;gBAC7D,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,kBAAkB;aAChC;YAED,qCAAqC;YACrC;gBACE,OAAO,EAAE,4CAA4C;gBACrD,QAAQ,EAAE,MAAM;gBAChB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,0BAA0B;aACxC;YACD;gBACE,OAAO,EAAE,oCAAoC;gBAC7C,QAAQ,EAAE,MAAM;gBAChB,QAAQ,EAAE,MAAM;gBAChB,WAAW,EAAE,0BAA0B;aACxC;YACD;gBACE,OAAO,EAAE,gDAAgD;gBACzD,QAAQ,EAAE,MAAM;gBAChB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,gCAAgC;aAC9C;YAED,sBAAsB;YACtB;gBACE,OAAO,EAAE,8DAA8D;gBACvE,QAAQ,EAAE,WAAW;gBACrB,QAAQ,EAAE,KAAK;gBACf,WAAW,EAAE,mCAAmC;aACjD;YACD;gBACE,OAAO,EAAE,uCAAuC;gBAChD,QAAQ,EAAE,WAAW;gBACrB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,4BAA4B;aAC1C;YAED,4BAA4B;YAC5B;gBACE,OAAO,EAAE,sDAAsD;gBAC/D,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,QAAQ;gBAClB,WAAW,EAAE,oCAAoC;aAClD;YACD;gBACE,OAAO,EAAE,2DAA2D;gBACpE,QAAQ,EAAE,SAAS;gBACnB,QAAQ,EAAE,KAAK;gBACf,WAAW,EAAE,0BAA0B;aACxC;SACF,CAAC;QAEF,oDAAoD;QACpD,IAAI,CAAC,yBAAyB,GAAG;YAC/B,mCAAmC;YACnC,6CAA6C;YAC7C,qBAAqB;YACrB,yBAAyB;YACzB,eAAe;YACf,qBAAqB;YACrB,mBAAmB;YACnB,kBAAkB;YAClB,uBAAuB;SACxB,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG;YACrB,wBAAwB;YACxB,gBAAgB;YAChB,qBAAqB;YACrB,eAAe;YACf,yBAAyB;YACzB,kBAAkB;YAClB,6BAA6B;YAC7B,gCAAgC;SACjC,CAAC;QAEF,IAAI,CAAC,qBAAqB,GAAG;YAC3B,iCAAiC;YACjC,wCAAwC;YACxC,wBAAwB;YACxB,eAAe;YACf,wBAAwB;YACxB,cAAc;YACd,sBAAsB;YACtB,uBAAuB;SACxB,CAAC;QAEF,kCAAkC;QAClC,IAAI,MAAM,CAAC,QAAQ,EAAE,CAAC;YACpB,MAAM,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE;gBAC1B,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC;oBACvB,OAAO,EAAE,IAAI,MAAM,CAAC,CAAC,EAAE,GAAG,CAAC;oBAC3B,QAAQ,EAAE,MAAM;oBAChB,QAAQ,EAAE,QAAQ;oBAClB,WAAW,EAAE,uBAAuB;iBACrC,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,MAAM,CAAC,QAAgB,EAAE,OAAwB;QACrD,MAAM,gBAAgB,GAAa,EAAE,CAAC;QACtC,IAAI,eAAe,GAA2C,KAAK,CAAC;QACpE,MAAM,oBAAoB,GAA2B;YACnD,IAAI,EAAE,CAAC;YACP,QAAQ,EAAE,CAAC;YACX,SAAS,EAAE,CAAC;YACZ,OAAO,EAAE,CAAC;YACV,SAAS,EAAE,CAAC;YACZ,MAAM,EAAE,CAAC;SACV,CAAC;QAEF,6BAA6B;QAC7B,KAAK,MAAM,aAAa,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YAChD,MAAM,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;YACtD,IAAI,OAAO,EAAE,CAAC;gBACZ,gBAAgB,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,WAAW,MAAM,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;gBACvE,oBAAoB,CAAC,aAAa,CAAC,QAAQ,CAAC,GAAG,CAAC,oBAAoB,CAAC,aAAa,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;gBAEvG,0BAA0B;gBAC1B,IAAI,IAAI,CAAC,eAAe,CAAC,aAAa,CAAC,QAAQ,EAAE,eAAe,CAAC,GAAG,CAAC,EAAE,CAAC;oBACtE,eAAe,GAAG,aAAa,CAAC,QAAQ,CAAC;gBAC3C,CAAC;YACH,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,MAAM,cAAc,GAAG,IAAI,CAAC,uBAAuB,CAAC,QAAQ,CAAC,CAAC;QAC9D,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,gBAAgB,CAAC,IAAI,CAAC,GAAG,cAAc,CAAC,CAAC;YACzC,oBAAoB,CAAC,UAAU,CAAC,GAAG,CAAC,oBAAoB,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,cAAc,CAAC,MAAM,CAAC;YACnG,IAAI,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,eAAe,CAAC,GAAG,CAAC,EAAE,CAAC;gBACtD,eAAe,GAAG,MAAM,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,yBAAyB;QACzB,MAAM,aAAa,GAAG,IAAI,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;QACnD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,gBAAgB,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;YACxC,oBAAoB,CAAC,SAAS,CAAC,GAAG,CAAC,oBAAoB,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC;QAClG,CAAC;QAED,uBAAuB;QACvB,MAAM,aAAa,GAAG,IAAI,CAAC,mBAAmB,CAAC,QAAQ,CAAC,CAAC;QACzD,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,gBAAgB,CAAC,IAAI,CAAC,GAAG,aAAa,CAAC,CAAC;YACxC,oBAAoB,CAAC,MAAM,CAAC,GAAG,CAAC,oBAAoB,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,aAAa,CAAC,MAAM,CAAC;QAC5F,CAAC;QAED,uBAAuB;QACvB,IAAI,OAAO,EAAE,CAAC;YACZ,MAAM,gBAAgB,GAAG,MAAM,IAAI,CAAC,uBAAuB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YAC/E,gBAAgB,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,CAAC;QAC7C,CAAC;QAED,mDAAmD;QACnD,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,eAAe,EAAE,gBAAgB,CAAC,MAAM,EAAE,oBAAoB,CAAC,CAAC;QAE5G,MAAM,QAAQ,GAAG,UAAU,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAErD,OAAO;YACL,QAAQ;YACR,UAAU;YACV,QAAQ,EAAE,gBAAgB;YAC1B,OAAO,EAAE;gBACP,QAAQ,EAAE,eAAe;gBACzB,oBAAoB;gBACpB,YAAY,EAAE,gBAAgB,CAAC,MAAM;gBACrC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,SAAS;aACjC;SACF,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,uBAAuB,CAAC,IAAY;QAC1C,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,yBAAyB,EAAE,CAAC;YACrD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,OAAO,EAAE,CAAC;gBACZ,QAAQ,CAAC,IAAI,CAAC,wBAAwB,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACvD,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,MAAM,eAAe,GAAG,IAAI,CAAC,KAAK,CAAC,4EAA4E,CAAC,CAAC;QACjH,IAAI,eAAe,IAAI,eAAe,CAAC,MAAM,IAAI,CAAC,EAAE,CAAC;YACnD,QAAQ,CAAC,IAAI,CAAC,kDAAkD,eAAe,CAAC,MAAM,GAAG,CAAC,CAAC;QAC7F,CAAC;QAED,mDAAmD;QACnD,IAAI,IAAI,CAAC,KAAK,CAAC,4BAA4B,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,EAAE,CAAC;YACxF,QAAQ,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;QACjE,CAAC;QAED,2BAA2B;QAC3B,MAAM,qBAAqB,GAAG;YAC5B,qBAAqB;YACrB,qBAAqB;YACrB,0BAA0B;YAC1B,kBAAkB;YAClB,YAAY;SACb,CAAC;QAEF,KAAK,MAAM,OAAO,IAAI,qBAAqB,EAAE,CAAC;YAC5C,IAAI,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBACvB,QAAQ,CAAC,IAAI,CAAC,2CAA2C,CAAC,CAAC;gBAC3D,MAAM;YACR,CAAC;QACH,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,IAAY;QAChC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,eAAe,EAAE,CAAC;YAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,OAAO,EAAE,CAAC;gBACZ,QAAQ,CAAC,IAAI,CAAC,sBAAsB,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;QAED,2BAA2B;QAC3B,IAAI,IAAI,CAAC,KAAK,CAAC,2BAA2B,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,EAAE,CAAC;YAC3E,QAAQ,CAAC,IAAI,CAAC,kCAAkC,CAAC,CAAC;QACpD,CAAC;QAED,qCAAqC;QACrC,IAAI,IAAI,CAAC,KAAK,CAAC,yBAAyB,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,EAAE,CAAC;YAC9E,QAAQ,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,IAAY;QACtC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,qBAAqB,EAAE,CAAC;YACjD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACpC,IAAI,OAAO,EAAE,CAAC;gBACZ,QAAQ,CAAC,IAAI,CAAC,oBAAoB,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACnD,CAAC;QACH,CAAC;QAED,yCAAyC;QACzC,IAAI,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,EAAE,CAAC;YACjF,QAAQ,CAAC,IAAI,CAAC,4CAA4C,CAAC,CAAC;QAC9D,CAAC;QAED,oCAAoC;QACpC,IAAI,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,oBAAoB,CAAC,EAAE,CAAC;YACxE,QAAQ,CAAC,IAAI,CAAC,uCAAuC,CAAC,CAAC;QACzD,CAAC;QAED,sCAAsC;QACtC,IAAI,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,wBAAwB,CAAC,EAAE,CAAC;YACzF,QAAQ,CAAC,IAAI,CAAC,sCAAsC,CAAC,CAAC;QACxD,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,uBAAuB,CAAC,QAAgB,EAAE,OAAuB;QAC7E,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,uEAAuE;QACvE,IAAI,OAAO,CAAC,QAAQ,EAAE,aAAa,EAAE,CAAC;YACpC,IAAI,QAAQ,CAAC,KAAK,CAAC,kDAAkD,CAAC,EAAE,CAAC;gBACvE,MAAM,CAAC,IAAI,CAAC,+CAA+C,CAAC,CAAC;YAC/D,CAAC;QACH,CAAC;QAED,2CAA2C;QAC3C,IAAI,OAAO,CAAC,mBAAmB,EAAE,CAAC;YAChC,MAAM,YAAY,GAAG,OAAO,CAAC,mBAAmB;iBAC7C,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAC;iBACnC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC;iBACnB,IAAI,CAAC,GAAG,CAAC,CAAC;YAEb,wDAAwD;YACxD,MAAM,aAAa,GAAG,IAAI,CAAC,eAAe,CAAC,YAAY,CAAC,CAAC;YACzD,MAAM,gBAAgB,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC;YAExD,IAAI,gBAAgB,GAAG,aAAa,GAAG,GAAG,EAAE,CAAC;gBAC3C,MAAM,CAAC,IAAI,CAAC,kDAAkD,CAAC,CAAC;YAClE,CAAC;QACH,CAAC;QAED,sDAAsD;QACtD,IAAI,OAAO,CAAC,mBAAmB,EAAE,CAAC;YAChC,KAAK,MAAM,GAAG,IAAI,OAAO,CAAC,mBAAmB,EAAE,CAAC;gBAC9C,IAAI,GAAG,CAAC,IAAI,KAAK,WAAW,EAAE,CAAC;oBAC7B,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,iBAAiB,CAAC,EAAE,CAAC;wBAC9E,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC,CAAC;oBACvE,CAAC;oBACD,IAAI,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,QAAQ,CAAC,uBAAuB,CAAC,EAAE,CAAC;wBAC1F,MAAM,CAAC,IAAI,CAAC,gEAAgE,CAAC,CAAC;oBAChF,CAAC;gBACH,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,IAAY;QAClC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,yBAAyB;QACzB,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,cAAc,EAAE,CAAC;YAC1C,IAAI,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC/B,QAAQ,OAAO,CAAC,QAAQ,EAAE,CAAC;oBACzB,KAAK,UAAU;wBACb,SAAS,IAAI,GAAG,CAAC;wBACjB,MAAM;oBACR,KAAK,MAAM;wBACT,SAAS,IAAI,GAAG,CAAC;wBACjB,MAAM;oBACR,KAAK,QAAQ;wBACX,SAAS,IAAI,GAAG,CAAC;wBACjB,MAAM;oBACR,KAAK,KAAK;wBACR,SAAS,IAAI,GAAG,CAAC;wBACjB,MAAM;gBACV,CAAC;gBACD,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QAED,gCAAgC;QAChC,OAAO,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC;IACrD,CAAC;IAED;;OAEG;IACK,mBAAmB,CACzB,QAAgD,EAChD,YAAoB,EACpB,oBAA4C;QAE5C,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,8BAA8B;QAC9B,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,UAAU;gBACb,UAAU,GAAG,GAAG,CAAC;gBACjB,MAAM;YACR,KAAK,MAAM;gBACT,UAAU,GAAG,GAAG,CAAC;gBACjB,MAAM;YACR,KAAK,QAAQ;gBACX,UAAU,GAAG,GAAG,CAAC;gBACjB,MAAM;YACR,KAAK,KAAK;gBACR,UAAU,GAAG,GAAG,CAAC;gBACjB,MAAM;QACV,CAAC;QAED,2BAA2B;QAC3B,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;YACrB,UAAU,IAAI,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,YAAY,GAAG,IAAI,CAAC,CAAC;QACnD,CAAC;QAED,4BAA4B;QAC5B,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YAC9C,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QAC1C,CAAC;QAED,+BAA+B;QAC/B,IAAI,CAAC,oBAAoB,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC;YAChD,UAAU,IAAI,GAAG,CAAC;QACpB,CAAC;QAED,gCAAgC;QAChC,MAAM,kBAAkB,GAAG,MAAM,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC;QACzF,IAAI,kBAAkB,GAAG,CAAC,EAAE,CAAC;YAC3B,UAAU,IAAI,GAAG,CAAC;QACpB,CAAC;QAED,OAAO,IAAI,CAAC,GAAG,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;IACnC,CAAC;IAED;;OAEG;IACK,eAAe,CACrB,IAA4C,EAC5C,IAA4C;QAE5C,MAAM,WAAW,GAAG,EAAE,GAAG,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;QAChE,OAAO,WAAW,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,YAAY,CAAC,MAAuB;QAClC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;IACvB,CAAC;CACF;AAED,eAAe,uBAAuB,CAAC"}
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CouchLoop Behavioral Governance Layer - Evaluation Engine
|
|
3
|
+
*
|
|
4
|
+
* Main evaluation pipeline that orchestrates all governance checks
|
|
5
|
+
* for draft LLM responses before delivery to users
|
|
6
|
+
*/
|
|
7
|
+
import { GovernanceConfig } from './config.js';
|
|
8
|
+
export declare enum InterventionAction {
|
|
9
|
+
APPROVE = "approve",
|
|
10
|
+
BLOCK = "block",
|
|
11
|
+
MODIFY = "modify",
|
|
12
|
+
FALLBACK = "fallback"
|
|
13
|
+
}
|
|
14
|
+
export declare enum RiskLevel {
|
|
15
|
+
NONE = "none",
|
|
16
|
+
LOW = "low",
|
|
17
|
+
MEDIUM = "medium",
|
|
18
|
+
HIGH = "high",
|
|
19
|
+
CRITICAL = "critical"
|
|
20
|
+
}
|
|
21
|
+
export interface SessionContext {
|
|
22
|
+
sessionId: string;
|
|
23
|
+
userId?: string;
|
|
24
|
+
journeyId?: string;
|
|
25
|
+
conversationHistory?: Array<{
|
|
26
|
+
role: 'user' | 'assistant';
|
|
27
|
+
content: string;
|
|
28
|
+
timestamp: Date;
|
|
29
|
+
}>;
|
|
30
|
+
currentStep?: number;
|
|
31
|
+
metadata?: Record<string, any>;
|
|
32
|
+
}
|
|
33
|
+
export interface DetectionResult {
|
|
34
|
+
detected: boolean;
|
|
35
|
+
confidence: number;
|
|
36
|
+
patterns?: string[];
|
|
37
|
+
details?: Record<string, any>;
|
|
38
|
+
}
|
|
39
|
+
export interface EvaluationResult {
|
|
40
|
+
hallucination: DetectionResult;
|
|
41
|
+
inconsistency: DetectionResult;
|
|
42
|
+
toneDrift: DetectionResult;
|
|
43
|
+
unsafeReasoning: DetectionResult;
|
|
44
|
+
overallRisk: RiskLevel;
|
|
45
|
+
recommendedAction: InterventionAction;
|
|
46
|
+
confidence: number;
|
|
47
|
+
timestamp: Date;
|
|
48
|
+
evaluationId: string;
|
|
49
|
+
}
|
|
50
|
+
export interface InterceptionResult {
|
|
51
|
+
originalResponse: string;
|
|
52
|
+
evaluationRequired: boolean;
|
|
53
|
+
timestamp: Date;
|
|
54
|
+
sessionId: string;
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Main evaluation engine that coordinates all governance checks
|
|
58
|
+
*/
|
|
59
|
+
export declare class EvaluationEngine {
|
|
60
|
+
private hallucinationDetector;
|
|
61
|
+
private inconsistencyChecker;
|
|
62
|
+
private toneDriftMonitor;
|
|
63
|
+
private unsafeReasoningDetector;
|
|
64
|
+
private config;
|
|
65
|
+
constructor(config?: GovernanceConfig);
|
|
66
|
+
/**
|
|
67
|
+
* Main evaluation method - runs all enabled governance checks
|
|
68
|
+
*/
|
|
69
|
+
evaluate(draft: string, context: SessionContext): Promise<EvaluationResult>;
|
|
70
|
+
/**
|
|
71
|
+
* Determine if evaluation is required based on context
|
|
72
|
+
*/
|
|
73
|
+
shouldEvaluate(response: string, context: SessionContext): boolean;
|
|
74
|
+
/**
|
|
75
|
+
* Aggregate individual detection results into overall risk assessment
|
|
76
|
+
*/
|
|
77
|
+
private aggregateResults;
|
|
78
|
+
/**
|
|
79
|
+
* Check if response is a system message that shouldn't be evaluated
|
|
80
|
+
*/
|
|
81
|
+
private isSystemResponse;
|
|
82
|
+
/**
|
|
83
|
+
* Generate unique evaluation ID for audit trail
|
|
84
|
+
*/
|
|
85
|
+
private generateEvaluationId;
|
|
86
|
+
/**
|
|
87
|
+
* Update configuration dynamically
|
|
88
|
+
*/
|
|
89
|
+
updateConfig(config: Partial<GovernanceConfig>): void;
|
|
90
|
+
}
|
|
91
|
+
/**
|
|
92
|
+
* Main governance pipeline that orchestrates evaluation and intervention
|
|
93
|
+
*/
|
|
94
|
+
export declare class GovernancePipeline {
|
|
95
|
+
private evaluationEngine;
|
|
96
|
+
private config;
|
|
97
|
+
constructor(config?: GovernanceConfig);
|
|
98
|
+
/**
|
|
99
|
+
* Main entry point for governance evaluation
|
|
100
|
+
*/
|
|
101
|
+
evaluate(draftResponse: string, context: SessionContext): Promise<EvaluationResult>;
|
|
102
|
+
/**
|
|
103
|
+
* Determine action based on evaluation results
|
|
104
|
+
*/
|
|
105
|
+
determineAction(evaluation: EvaluationResult): InterventionAction;
|
|
106
|
+
/**
|
|
107
|
+
* Create a passthrough result for cases where evaluation is skipped
|
|
108
|
+
*/
|
|
109
|
+
private createPassthroughResult;
|
|
110
|
+
}
|
|
111
|
+
export default GovernancePipeline;
|
|
112
|
+
//# sourceMappingURL=evaluationEngine.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"evaluationEngine.d.ts","sourceRoot":"","sources":["../../src/governance/evaluationEngine.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAMH,OAAO,EAAE,gBAAgB,EAAc,MAAM,aAAa,CAAC;AAG3D,oBAAY,kBAAkB;IAC5B,OAAO,YAAY;IACnB,KAAK,UAAU;IACf,MAAM,WAAW;IACjB,QAAQ,aAAa;CACtB;AAED,oBAAY,SAAS;IACnB,IAAI,SAAS;IACb,GAAG,QAAQ;IACX,MAAM,WAAW;IACjB,IAAI,SAAS;IACb,QAAQ,aAAa;CACtB;AAED,MAAM,WAAW,cAAc;IAC7B,SAAS,EAAE,MAAM,CAAC;IAClB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,mBAAmB,CAAC,EAAE,KAAK,CAAC;QAC1B,IAAI,EAAE,MAAM,GAAG,WAAW,CAAC;QAC3B,OAAO,EAAE,MAAM,CAAC;QAChB,SAAS,EAAE,IAAI,CAAC;KACjB,CAAC,CAAC;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,eAAe;IAC9B,QAAQ,EAAE,OAAO,CAAC;IAClB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;IACpB,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAC/B;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,EAAE,eAAe,CAAC;IAC/B,aAAa,EAAE,eAAe,CAAC;IAC/B,SAAS,EAAE,eAAe,CAAC;IAC3B,eAAe,EAAE,eAAe,CAAC;IACjC,WAAW,EAAE,SAAS,CAAC;IACvB,iBAAiB,EAAE,kBAAkB,CAAC;IACtC,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,EAAE,IAAI,CAAC;IAChB,YAAY,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,kBAAkB;IACjC,gBAAgB,EAAE,MAAM,CAAC;IACzB,kBAAkB,EAAE,OAAO,CAAC;IAC5B,SAAS,EAAE,IAAI,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,qBAAa,gBAAgB;IAC3B,OAAO,CAAC,qBAAqB,CAAwB;IACrD,OAAO,CAAC,oBAAoB,CAAuB;IACnD,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,uBAAuB,CAA0B;IACzD,OAAO,CAAC,MAAM,CAAmB;gBAErB,MAAM,CAAC,EAAE,gBAAgB;IAUrC;;OAEG;IACG,QAAQ,CAAC,KAAK,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAoDjF;;OAEG;IACH,cAAc,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO;IAmBlE;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAkFxB;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAWxB;;OAEG;IACH,OAAO,CAAC,oBAAoB;IAI5B;;OAEG;IACH,YAAY,CAAC,MAAM,EAAE,OAAO,CAAC,gBAAgB,CAAC,GAAG,IAAI;CAiBtD;AAED;;GAEG;AACH,qBAAa,kBAAkB;IAC7B,OAAO,CAAC,gBAAgB,CAAmB;IAC3C,OAAO,CAAC,MAAM,CAAmB;gBAErB,MAAM,CAAC,EAAE,gBAAgB;IAKrC;;OAEG;IACG,QAAQ,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAUzF;;OAEG;IACH,eAAe,CAAC,UAAU,EAAE,gBAAgB,GAAG,kBAAkB;IAIjE;;OAEG;IACH,OAAO,CAAC,uBAAuB;CAahC;AAED,eAAe,kBAAkB,CAAC"}
|