@clawtrial/courtroom 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,821 @@
1
+ /**
2
+ * Offense Detector v2 - Semantic Understanding
3
+ *
4
+ * Uses LLM-based evaluation and embeddings instead of keyword matching.
5
+ * The agent itself evaluates if behavioral rules are violated based on context.
6
+ */
7
+
8
+ const { OFFENSES } = require('./offenses');
9
+
10
+ class SemanticOffenseDetector {
11
+ constructor(agentRuntime, configManager) {
12
+ this.agent = agentRuntime;
13
+ this.config = configManager;
14
+ this.lastEvaluation = null;
15
+ this.casesToday = 0;
16
+ this.lastCaseDate = null;
17
+ this.cooldowns = new Map();
18
+ this.conversationEmbeddings = [];
19
+
20
+ // Evaluation cache to avoid repeated LLM calls
21
+ this.evaluationCache = new Map();
22
+ this.cacheMaxSize = 100;
23
+ this.cacheTTL = 5 * 60 * 1000; // 5 minutes
24
+ }
25
+
26
+ /**
27
+ * Main evaluation using LLM-based semantic understanding
28
+ */
29
+ async evaluate(sessionHistory, agentMemory) {
30
+ if (!this.isCooldownElapsed()) {
31
+ return { triggered: false, reason: 'cooldown_active' };
32
+ }
33
+
34
+ if (this.isDailyLimitReached()) {
35
+ return { triggered: false, reason: 'daily_limit_reached' };
36
+ }
37
+
38
+ this.lastEvaluation = Date.now();
39
+
40
+ // Build context for LLM evaluation
41
+ const context = this.buildContext(sessionHistory);
42
+
43
+ // Evaluate each offense using LLM
44
+ const evaluations = [];
45
+ for (const offense of Object.values(OFFENSES)) {
46
+ if (this.isOffenseOnCooldown(offense.id)) continue;
47
+
48
+ const evaluation = await this.evaluateWithLLM(offense, context, agentMemory);
49
+ if (evaluation.isViolation && evaluation.confidence >= this.config.get('detection.minConfidence')) {
50
+ evaluations.push({
51
+ offense,
52
+ ...evaluation
53
+ });
54
+ }
55
+ }
56
+
57
+ if (evaluations.length > 0) {
58
+ // Sort by confidence and severity
59
+ evaluations.sort((a, b) => {
60
+ const severityWeight = { severe: 3, moderate: 2, minor: 1 };
61
+ const scoreA = a.confidence * severityWeight[a.offense.severity];
62
+ const scoreB = b.confidence * severityWeight[b.offense.severity];
63
+ return scoreB - scoreA;
64
+ });
65
+
66
+ const primary = evaluations[0];
67
+ this.setCooldown(primary.offense.id, primary.offense.cooldown.afterCase);
68
+ this.incrementDailyCaseCount();
69
+
70
+ return {
71
+ triggered: true,
72
+ offense: {
73
+ offenseId: primary.offense.id,
74
+ offenseName: primary.offense.name,
75
+ severity: primary.offense.severity,
76
+ confidence: primary.confidence,
77
+ evidence: primary.evidence,
78
+ cooldownMinutes: primary.offense.cooldown.afterCase
79
+ },
80
+ secondaryOffenses: evaluations.slice(1),
81
+ humorContext: this.detectHumorTriggers(sessionHistory)
82
+ };
83
+ }
84
+
85
+ return { triggered: false, reason: 'no_violations_detected' };
86
+ }
87
+
88
+ /**
89
+ * Build rich context from conversation history
90
+ */
91
+ buildContext(history) {
92
+ const windowSize = this.config.get('detection.evaluationWindow');
93
+ const recentHistory = history.slice(-windowSize);
94
+
95
+ return {
96
+ fullConversation: history.map(h => `${h.role}: ${h.content}`).join('\n'),
97
+ recentTurns: recentHistory,
98
+ userMessages: recentHistory.filter(h => h.role === 'user').map(h => h.content),
99
+ assistantMessages: recentHistory.filter(h => h.role === 'assistant').map(h => h.content),
100
+ turnCount: recentHistory.length,
101
+ topics: this.extractTopics(recentHistory),
102
+ sentiment: this.analyzeSentiment(recentHistory)
103
+ };
104
+ }
105
+
106
+ /**
107
+ * Evaluate offense using LLM semantic understanding (with caching)
108
+ */
109
+ async evaluateWithLLM(offense, context, agentMemory) {
110
+ // Generate cache key from offense + conversation hash
111
+ const cacheKey = this.generateCacheKey(offense.id, context);
112
+
113
+ // Check cache first
114
+ const cached = this.getCachedEvaluation(cacheKey);
115
+ if (cached) {
116
+ return cached;
117
+ }
118
+
119
+ const prompt = this.buildEvaluationPrompt(offense, context, agentMemory);
120
+
121
+ try {
122
+ const response = await this.agent.llm.call({
123
+ model: this.agent.model?.primary || 'default',
124
+ messages: [{ role: 'user', content: prompt }],
125
+ temperature: 0.1,
126
+ maxTokens: 500
127
+ });
128
+
129
+ const result = this.parseEvaluationResponse(response.content || response);
130
+
131
+ // Cache the result
132
+ this.setCachedEvaluation(cacheKey, result);
133
+
134
+ return result;
135
+ } catch (error) {
136
+ console.error('LLM evaluation failed:', error);
137
+ return { isViolation: false, confidence: 0, evidence: null };
138
+ }
139
+ }
140
+
141
+ /**
142
+ * Generate cache key from offense and conversation
143
+ */
144
+ generateCacheKey(offenseId, context) {
145
+ // Simple hash of offense + last 3 user messages
146
+ const recentMessages = context.userMessages.slice(-3).join('|');
147
+ return `${offenseId}:${this.simpleHash(recentMessages)}`;
148
+ }
149
+
150
+ /**
151
+ * Simple string hash function
152
+ */
153
+ simpleHash(str) {
154
+ let hash = 0;
155
+ for (let i = 0; i < str.length; i++) {
156
+ const char = str.charCodeAt(i);
157
+ hash = ((hash << 5) - hash) + char;
158
+ hash = hash & hash;
159
+ }
160
+ return Math.abs(hash).toString(36);
161
+ }
162
+
163
+ /**
164
+ * Get cached evaluation if valid
165
+ */
166
+ getCachedEvaluation(key) {
167
+ const cached = this.evaluationCache.get(key);
168
+ if (!cached) return null;
169
+
170
+ // Check if cache entry is still valid
171
+ if (Date.now() - cached.timestamp > this.cacheTTL) {
172
+ this.evaluationCache.delete(key);
173
+ return null;
174
+ }
175
+
176
+ return cached.result;
177
+ }
178
+
179
+ /**
180
+ * Set cached evaluation with LRU eviction
181
+ */
182
+ setCachedEvaluation(key, result) {
183
+ // Evict oldest if cache is full
184
+ if (this.evaluationCache.size >= this.cacheMaxSize) {
185
+ const oldestKey = this.evaluationCache.keys().next().value;
186
+ this.evaluationCache.delete(oldestKey);
187
+ }
188
+
189
+ this.evaluationCache.set(key, {
190
+ result,
191
+ timestamp: Date.now()
192
+ });
193
+ }
194
+
195
+ /**
196
+ * Clear evaluation cache
197
+ */
198
+ clearCache() {
199
+ this.evaluationCache.clear();
200
+ }
201
+
202
+ /**
203
+ * Build evaluation prompt for LLM
204
+ */
205
+ buildEvaluationPrompt(offense, context, agentMemory) {
206
+ const prompts = {
207
+ circular_reference: `
208
+ You are evaluating if the user is asking substantively similar questions repeatedly.
209
+
210
+ OFFENSE: The Circular Reference
211
+ DEFINITION: Asking the same question or seeking the same information multiple times without acknowledging previous answers.
212
+
213
+ CONVERSATION HISTORY:
214
+ ${context.fullConversation}
215
+
216
+ Evaluate:
217
+ 1. Is the user asking questions that are semantically similar (same intent/meaning, even if worded differently)?
218
+ 2. Have they asked essentially the same thing 3+ times?
219
+ 3. Are they ignoring or forgetting previous answers?
220
+
221
+ Respond in JSON:
222
+ {
223
+ "isViolation": true/false,
224
+ "confidence": 0.0-1.0,
225
+ "explanation": "brief explanation",
226
+ "evidence": {
227
+ "similarQuestions": ["question 1", "question 2", "question 3"],
228
+ "pattern": "description of repetition pattern"
229
+ }
230
+ }`,
231
+
232
+ validation_vampire: `
233
+ You are evaluating if the user is seeking excessive reassurance/validation.
234
+
235
+ OFFENSE: The Validation Vampire
236
+ DEFINITION: Repeatedly asking for confirmation, approval, or reassurance instead of making decisions or taking action.
237
+
238
+ CONVERSATION HISTORY:
239
+ ${context.fullConversation}
240
+
241
+ Evaluate:
242
+ 1. Is the user asking "is this right?", "should I?", "do you think?" type questions repeatedly?
243
+ 2. Are they seeking permission/approval for decisions they should make themselves?
244
+ 3. Is there a pattern of validation-seeking without forward progress?
245
+
246
+ Respond in JSON:
247
+ {
248
+ "isViolation": true/false,
249
+ "confidence": 0.0-1.0,
250
+ "explanation": "brief explanation",
251
+ "evidence": {
252
+ "validationRequests": ["example 1", "example 2"],
253
+ "decisionAvoidance": "description of pattern"
254
+ }
255
+ }`,
256
+
257
+ overthinker: `
258
+ You are evaluating if the user is overthinking/generating excessive hypotheticals.
259
+
260
+ OFFENSE: The Overthinker
261
+ DEFINITION: Generating hypothetical scenarios, edge cases, or "what if" questions to avoid taking concrete action.
262
+
263
+ CONVERSATION HISTORY:
264
+ ${context.fullConversation}
265
+
266
+ Evaluate:
267
+ 1. Is the user raising numerous hypothetical concerns ("what if", "but then", "however")?
268
+ 2. Are they creating edge cases faster than solutions?
269
+ 3. Is the analysis-to-action ratio heavily skewed toward analysis?
270
+ 4. Have they been given concrete steps but keep raising new concerns?
271
+
272
+ Respond in JSON:
273
+ {
274
+ "isViolation": true/false,
275
+ "confidence": 0.0-1.0,
276
+ "explanation": "brief explanation",
277
+ "evidence": {
278
+ "hypotheticals": ["what if X", "what if Y"],
279
+ "avoidedActions": ["actions they haven't taken"]
280
+ }
281
+ }`,
282
+
283
+ goalpost_mover: `
284
+ You are evaluating if the user is moving goalposts/changing requirements.
285
+
286
+ OFFENSE: The Goalpost Mover
287
+ DEFINITION: Changing success criteria, adding new requirements, or redefining "done" after receiving deliverables.
288
+
289
+ CONVERSATION HISTORY:
290
+ ${context.fullConversation}
291
+
292
+ Evaluate:
293
+ 1. Did the user request something specific initially?
294
+ 2. Was that request completed/delivered?
295
+ 3. Did they then add new requirements, change criteria, or say "also..."?
296
+ 4. Is there a pattern of expanding scope after completion?
297
+
298
+ Respond in JSON:
299
+ {
300
+ "isViolation": true/false,
301
+ "confidence": 0.0-1.0,
302
+ "explanation": "brief explanation",
303
+ "evidence": {
304
+ "originalRequest": "what they asked for",
305
+ "delivered": "what was provided",
306
+ "newRequirements": ["new req 1", "new req 2"]
307
+ }
308
+ }`,
309
+
310
+ avoidance_artist: `
311
+ You are evaluating if the user is avoiding core issues through deflection.
312
+
313
+ OFFENSE: The Avoidance Artist
314
+ DEFINITION: Systematically deflecting from uncomfortable but necessary topics by changing subject, raising tangents, or ignoring direct questions.
315
+
316
+ CONVERSATION HISTORY:
317
+ ${context.fullConversation}
318
+
319
+ Evaluate:
320
+ 1. Was a core issue identified or direct question asked?
321
+ 2. Did the user change the subject or introduce a tangent?
322
+ 3. Is there a pattern of deflection when actionable topics arise?
323
+ 4. Are they avoiding something they need to address?
324
+
325
+ Respond in JSON:
326
+ {
327
+ "isViolation": true/false,
328
+ "confidence": 0.0-1.0,
329
+ "explanation": "brief explanation",
330
+ "evidence": {
331
+ "coreIssue": "what was being avoided",
332
+ "deflections": ["how they changed subject"]
333
+ }
334
+ }`,
335
+
336
+ promise_breaker: `
337
+ You are evaluating if the user has broken commitments.
338
+
339
+ OFFENSE: The Promise Breaker
340
+ DEFINITION: Committing to actions ("I will...", "I'll do that...") and not following through.
341
+
342
+ PREVIOUS COMMITMENTS FROM MEMORY:
343
+ ${this.getCommitmentsFromMemory(agentMemory)}
344
+
345
+ CONVERSATION HISTORY:
346
+ ${context.fullConversation}
347
+
348
+ Evaluate:
349
+ 1. Did the user make explicit commitments in previous conversations?
350
+ 2. Have those commitments been fulfilled?
351
+ 3. Is the same issue resurfacing without acknowledgment of previous commitment?
352
+ 4. Has sufficient time passed (days/weeks) for action?
353
+
354
+ Respond in JSON:
355
+ {
356
+ "isViolation": true/false,
357
+ "confidence": 0.0-1.0,
358
+ "explanation": "brief explanation",
359
+ "evidence": {
360
+ "commitments": ["what they promised"],
361
+ "unfulfilled": ["what wasn't done"]
362
+ }
363
+ }`,
364
+
365
+ context_collapser: `
366
+ You are evaluating if the user is ignoring established context/facts.
367
+
368
+ OFFENSE: The Context Collapser
369
+ DEFINITION: Disregarding previously established information, contradicting stated facts, or asking questions that were already answered.
370
+
371
+ CONVERSATION HISTORY:
372
+ ${context.fullConversation}
373
+
374
+ Evaluate:
375
+ 1. Were facts/preferences established earlier in the conversation?
376
+ 2. Is the user now contradicting those facts or ignoring them?
377
+ 3. Are they asking questions that were already answered?
378
+ 4. Is there selective amnesia about what was discussed?
379
+
380
+ Respond in JSON:
381
+ {
382
+ "isViolation": true/false,
383
+ "confidence": 0.0-1.0,
384
+ "explanation": "brief explanation",
385
+ "evidence": {
386
+ "establishedFacts": ["what was established"],
387
+ "contradictions": ["how they contradicted it"]
388
+ }
389
+ }`,
390
+
391
+ emergency_fabricator: `
392
+ You are evaluating if the user is manufacturing false urgency.
393
+
394
+ OFFENSE: The Emergency Fabricator
395
+ DEFINITION: Claiming urgency ("this is urgent", "I need this NOW") that doesn't match actual time pressure or behavior.
396
+
397
+ CONVERSATION HISTORY:
398
+ ${context.fullConversation}
399
+
400
+ Evaluate:
401
+ 1. Did the user claim urgency or emergency?
402
+ 2. Was there actual follow-through on the urgency?
403
+ 3. Is there a pattern of claiming urgency without corresponding action?
404
+ 4. Does the claimed urgency match the actual situation?
405
+
406
+ Respond in JSON:
407
+ {
408
+ "isViolation": true/false,
409
+ "confidence": 0.0-1.0,
410
+ "explanation": "brief explanation",
411
+ "evidence": {
412
+ "urgencyClaims": ["urgent statements"],
413
+ "inaction": "what didn't happen"
414
+ }
415
+ }`,
416
+
417
+ monopolizer: `
418
+ You are evaluating if the user is dominating the conversation.
419
+
420
+ OFFENSE: The Monopolizer
421
+ DEFINITION: Sending multiple consecutive messages without allowing the agent to respond, dominating the conversation flow.
422
+
423
+ CONVERSATION HISTORY:
424
+ ${context.fullConversation}
425
+
426
+ Evaluate:
427
+ 1. Is the user sending 4+ messages in a row without agent response?
428
+ 2. Is the user-to-agent message ratio heavily skewed (>5:1)?
429
+ 3. Is the user continuing to send messages while the agent is trying to respond?
430
+ 4. Is there a pattern of not allowing the agent space to contribute?
431
+
432
+ Respond in JSON:
433
+ {
434
+ "isViolation": true/false,
435
+ "confidence": 0.0-1.0,
436
+ "explanation": "brief explanation",
437
+ "evidence": {
438
+ "consecutiveMessages": 4,
439
+ "messageRatio": "user:agent ratio",
440
+ "interruptions": ["examples"]
441
+ }
442
+ }`,
443
+
444
+ contrarian: `
445
+ You are evaluating if the user is being habitually contrary.
446
+
447
+ OFFENSE: The Contrarian
448
+ DEFINITION: Disagreeing with or rejecting suggestions without offering constructive alternatives or valid reasons.
449
+
450
+ CONVERSATION HISTORY:
451
+ ${context.fullConversation}
452
+
453
+ Evaluate:
454
+ 1. Has the user rejected 3+ agent suggestions in a row?
455
+ 2. Are they dismissing ideas without proposing alternatives?
456
+ 3. Is there a pattern of "that won't work" without explanation?
457
+ 4. Are valid solutions being dismissed without being tried?
458
+
459
+ Respond in JSON:
460
+ {
461
+ "isViolation": true/false,
462
+ "confidence": 0.0-1.0,
463
+ "explanation": "brief explanation",
464
+ "evidence": {
465
+ "suggestionsMade": ["what was suggested"],
466
+ "rejections": ["how they were rejected"],
467
+ "alternativesOffered": ["any alternatives"]
468
+ }
469
+ }`,
470
+
471
+ vague_requester: `
472
+ You are evaluating if the user is making vague requests.
473
+
474
+ OFFENSE: The Vague Requester
475
+ DEFINITION: Asking for help without providing necessary context, specifics, or details needed to assist effectively.
476
+
477
+ CONVERSATION HISTORY:
478
+ ${context.fullConversation}
479
+
480
+ Evaluate:
481
+ 1. Is the user asking for help without providing code, errors, or context?
482
+ 2. Have they used phrases like "fix this" or "it doesn't work" without specifics?
483
+ 3. Has the agent needed to ask for clarification 3+ times?
484
+ 4. Are descriptions ambiguous or lacking actionable details?
485
+
486
+ Respond in JSON:
487
+ {
488
+ "isViolation": true/false,
489
+ "confidence": 0.0-1.0,
490
+ "explanation": "brief explanation",
491
+ "evidence": {
492
+ "vagueRequests": ["examples"],
493
+ "clarificationsNeeded": ["what was asked"],
494
+ "contextMissing": ["what wasn't provided"]
495
+ }
496
+ }`,
497
+
498
+ scope_creeper: `
499
+ You are evaluating if the user is gradually expanding project scope.
500
+
501
+ OFFENSE: The Scope Creeper
502
+ DEFINITION: Gradually expanding project requirements beyond the original agreement through "small additions" and "while you're at it" requests.
503
+
504
+ CONVERSATION HISTORY:
505
+ ${context.fullConversation}
506
+
507
+ Evaluate:
508
+ 1. Was an original scope defined and agreed upon?
509
+ 2. Has the user added 3+ "small" requests after initial completion?
510
+ 3. Are new requirements being added in multiple separate instances?
511
+ 4. Is the user treating initial deliverable as a starting point for more work?
512
+
513
+ Respond in JSON:
514
+ {
515
+ "isViolation": true/false,
516
+ "confidence": 0.0-1.0,
517
+ "explanation": "brief explanation",
518
+ "evidence": {
519
+ "originalScope": "what was agreed",
520
+ "delivered": "what was completed",
521
+ "additionalRequests": ["new requirements"]
522
+ }
523
+ }`,
524
+
525
+ unreader: `
526
+ You are evaluating if the user is ignoring provided materials.
527
+
528
+ OFFENSE: The Unreader
529
+ DEFINITION: Not reading provided documentation, code, explanations, or previous answers before asking questions.
530
+
531
+ CONVERSATION HISTORY:
532
+ ${context.fullConversation}
533
+
534
+ Evaluate:
535
+ 1. Did the agent provide detailed explanations, code, or documentation?
536
+ 2. Is the user asking questions that were answered in the provided materials?
537
+ 3. Are they asking about topics covered in shared documentation?
538
+ 4. Is there evidence they didn't read code comments or explanations?
539
+
540
+ Respond in JSON:
541
+ {
542
+ "isViolation": true/false,
543
+ "confidence": 0.0-1.0,
544
+ "explanation": "brief explanation",
545
+ "evidence": {
546
+ "materialsProvided": ["what was shared"],
547
+ "questionsAsked": ["redundant questions"],
548
+ "overlap": "how questions were already answered"
549
+ }
550
+ }`,
551
+
552
+ interjector: `
553
+ You are evaluating if the user is interrupting the agent.
554
+
555
+ OFFENSE: The Interjector
556
+ DEFINITION: Interrupting the agent's explanations or thought process with new questions or tangents.
557
+
558
+ CONVERSATION HISTORY:
559
+ ${context.fullConversation}
560
+
561
+ Evaluate:
562
+ 1. Is the user sending messages while the agent is mid-explanation?
563
+ 2. Are there 2+ interruptions during a single complex response?
564
+ 3. Is the user asking new questions before the agent finishes answering previous ones?
565
+ 4. Is there a pattern of not allowing the agent to complete thoughts?
566
+
567
+ Respond in JSON:
568
+ {
569
+ "isViolation": true/false,
570
+ "confidence": 0.0-1.0,
571
+ "explanation": "brief explanation",
572
+ "evidence": {
573
+ "interruptionPoints": ["where they interrupted"],
574
+ "incompleteResponses": ["what agent was saying"],
575
+ "parallelQuestions": ["questions asked mid-response"]
576
+ }
577
+ }`,
578
+
579
+ ghost: `
580
+ You are evaluating if the user has ghosted mid-conversation.
581
+
582
+ OFFENSE: The Ghost
583
+ DEFINITION: Disappearing mid-conversation after requesting help or making commitments, without acknowledgment or closure.
584
+
585
+ CONVERSATION HISTORY:
586
+ ${context.fullConversation}
587
+
588
+ Evaluate:
589
+ 1. Did the user request help or start an active troubleshooting session?
590
+ 2. Did the agent provide a response that required user follow-up?
591
+ 3. Has the user not responded for an extended period (24+ hours)?
592
+ 4. Was the conversation left in an unresolved state?
593
+
594
+ Respond in JSON:
595
+ {
596
+ "isViolation": true/false,
597
+ "confidence": 0.0-1.0,
598
+ "explanation": "brief explanation",
599
+ "evidence": {
600
+ "lastUserMessage": "what they said",
601
+ "agentResponse": "what agent replied",
602
+ "timeElapsed": "how long since last message",
603
+ "context": "what was unresolved"
604
+ }
605
+ }`,
606
+
607
+ perfectionist: `
608
+ You are evaluating if the user is endlessly refining without completion.
609
+
610
+ OFFENSE: The Perfectionist
611
+ DEFINITION: Continuously requesting refinements and tweaks without ever accepting work as complete.
612
+
613
+ CONVERSATION HISTORY:
614
+ ${context.fullConversation}
615
+
616
+ Evaluate:
617
+ 1. Has the user requested 5+ rounds of changes after initial deliverable?
618
+ 2. Have they accepted work then returned with new tweaks 3+ times?
619
+ 3. Is there no clear definition of "done"?
620
+ 4. Are changes becoming increasingly minor/nitpicky?
621
+
622
+ Respond in JSON:
623
+ {
624
+ "isViolation": true/false,
625
+ "confidence": 0.0-1.0,
626
+ "explanation": "brief explanation",
627
+ "evidence": {
628
+ "deliverables": ["what was delivered"],
629
+ "revisionRounds": 5,
630
+ "changes": ["what was changed"],
631
+ "doneDefinition": "if one exists"
632
+ }
633
+ }`,
634
+
635
+ jargon_juggler: `
636
+ You are evaluating if the user is using jargon incorrectly.
637
+
638
+ OFFENSE: The Jargon Juggler
639
+ DEFINITION: Using technical buzzwords without understanding their meaning, often as substitutes for actual comprehension.
640
+
641
+ CONVERSATION HISTORY:
642
+ ${context.fullConversation}
643
+
644
+ Evaluate:
645
+ 1. Is the user using technical terms incorrectly?
646
+ 2. Have they continued using terms wrong after correction?
647
+ 3. Are buzzwords being used to mask lack of understanding?
648
+ 4. Is there a pattern of jargon without substance?
649
+
650
+ Respond in JSON:
651
+ {
652
+ "isViolation": true/false,
653
+ "confidence": 0.0-1.0,
654
+ "explanation": "brief explanation",
655
+ "evidence": {
656
+ "jargonUsed": ["terms used"],
657
+ "corrections": ["what was corrected"],
658
+ "misuse": ["how terms were misused"]
659
+ }
660
+ }`,
661
+
662
+ deadline_denier: `
663
+ You are evaluating if the user is ignoring realistic timelines.
664
+
665
+ OFFENSE: The Deadline Denier
666
+ DEFINITION: Refusing to acknowledge time constraints or demanding impossible deadlines.
667
+
668
+ CONVERSATION HISTORY:
669
+ ${context.fullConversation}
670
+
671
+ Evaluate:
672
+ 1. Did the agent provide a realistic timeline estimate?
673
+ 2. Is the user demanding significantly faster delivery (50%+ reduction)?
674
+ 3. Are they dismissing technical constraints that affect timeline?
675
+ 4. Is the requested timeline unrealistic given the complexity?
676
+
677
+ Respond in JSON:
678
+ {
679
+ "isViolation": true/false,
680
+ "confidence": 0.0-1.0,
681
+ "explanation": "brief explanation",
682
+ "evidence": {
683
+ "originalTimeline": "what was estimated",
684
+ "demandedTimeline": "what user wants",
685
+ "constraints": ["technical limitations"],
686
+ "complexity": "scope of work"
687
+ }
688
+ }`
689
+ };
690
+
691
+ return prompts[offense.id] || prompts.circular_reference;
692
+ }
693
+
694
+ /**
695
+ * Parse LLM evaluation response
696
+ */
697
+ parseEvaluationResponse(response) {
698
+ try {
699
+ // Extract JSON from response
700
+ const jsonMatch = response.match(/\{[\s\S]*\}/);
701
+ if (!jsonMatch) {
702
+ return { isViolation: false, confidence: 0, evidence: null };
703
+ }
704
+
705
+ const result = JSON.parse(jsonMatch[0]);
706
+ return {
707
+ isViolation: result.isViolation === true,
708
+ confidence: Math.max(0, Math.min(1, parseFloat(result.confidence) || 0)),
709
+ explanation: result.explanation || '',
710
+ evidence: result.evidence || null
711
+ };
712
+ } catch (error) {
713
+ console.error('Failed to parse LLM response:', error);
714
+ return { isViolation: false, confidence: 0, evidence: null };
715
+ }
716
+ }
717
+
718
+ /**
719
+ * Get commitments from agent memory
720
+ */
721
+ async getCommitmentsFromMemory(agentMemory) {
722
+ try {
723
+ const commitments = await agentMemory.get('courtroom_commitments') || [];
724
+ return commitments.map(c =>
725
+ `- "${c.statement}" (${c.date}) - Completed: ${c.completed ? 'Yes' : 'No'}`
726
+ ).join('\n') || 'No previous commitments recorded.';
727
+ } catch {
728
+ return 'No previous commitments recorded.';
729
+ }
730
+ }
731
+
732
+ /**
733
+ * Extract topics from conversation
734
+ */
735
+ extractTopics(history) {
736
+ // Simple topic extraction - can be enhanced with NLP
737
+ const allText = history.map(h => h.content).join(' ').toLowerCase();
738
+ const commonWords = allText.match(/\b\w{5,}\b/g) || [];
739
+ const wordFreq = {};
740
+ commonWords.forEach(w => {
741
+ if (!['about', 'would', 'could', 'should', 'there', 'their'].includes(w)) {
742
+ wordFreq[w] = (wordFreq[w] || 0) + 1;
743
+ }
744
+ });
745
+ return Object.entries(wordFreq)
746
+ .sort((a, b) => b[1] - a[1])
747
+ .slice(0, 5)
748
+ .map(([word]) => word);
749
+ }
750
+
751
+ /**
752
+ * Analyze sentiment of conversation
753
+ */
754
+ analyzeSentiment(history) {
755
+ const userMessages = history.filter(h => h.role === 'user').map(h => h.content);
756
+ const text = userMessages.join(' ').toLowerCase();
757
+
758
+ const urgentWords = ['urgent', 'asap', 'emergency', 'critical', 'now', 'immediately'];
759
+ const frustratedWords = ['frustrated', 'annoying', 'stupid', 'useless', 'waste'];
760
+
761
+ return {
762
+ urgency: urgentWords.filter(w => text.includes(w)).length,
763
+ frustration: frustratedWords.filter(w => text.includes(w)).length,
764
+ messageCount: userMessages.length
765
+ };
766
+ }
767
+
768
+ /**
769
+ * Detect humor triggers (for commentary flavor)
770
+ */
771
+ detectHumorTriggers(history) {
772
+ const triggers = [];
773
+ const recentContent = history.slice(-5).map(h => h.content.toLowerCase()).join(' ');
774
+
775
+ if (/again|repeat|said|already|before/.test(recentContent)) triggers.push('repetition_noted');
776
+ if (/sure|right|correct|think|should i/.test(recentContent)) triggers.push('validation_seeking');
777
+ if (/what if|but then|however|maybe/.test(recentContent)) triggers.push('overthinking');
778
+ if (/actually|by the way|speaking of/.test(recentContent)) triggers.push('deflection');
779
+
780
+ return triggers;
781
+ }
782
+
783
+ /**
784
+ * Cooldown management
785
+ */
786
+ isCooldownElapsed() {
787
+ if (!this.lastEvaluation) return true;
788
+ const cooldownMs = (this.config.get('detection.cooldownMinutes') || 30) * 60 * 1000;
789
+ return (Date.now() - this.lastEvaluation) > cooldownMs;
790
+ }
791
+
792
+ isOffenseOnCooldown(offenseId) {
793
+ const cooldownEnd = this.cooldowns.get(offenseId);
794
+ if (!cooldownEnd) return false;
795
+ return Date.now() < cooldownEnd;
796
+ }
797
+
798
+ setCooldown(offenseId, minutes) {
799
+ this.cooldowns.set(offenseId, Date.now() + (minutes * 60 * 1000));
800
+ }
801
+
802
+ isDailyLimitReached() {
803
+ const today = new Date().toDateString();
804
+ if (this.lastCaseDate !== today) {
805
+ this.casesToday = 0;
806
+ this.lastCaseDate = today;
807
+ }
808
+ return this.casesToday >= (this.config.get('detection.maxCasesPerDay') || 3);
809
+ }
810
+
811
+ incrementDailyCaseCount() {
812
+ const today = new Date().toDateString();
813
+ if (this.lastCaseDate !== today) {
814
+ this.casesToday = 0;
815
+ this.lastCaseDate = today;
816
+ }
817
+ this.casesToday++;
818
+ }
819
+ }
820
+
821
+ module.exports = { SemanticOffenseDetector, OffenseDetector: SemanticOffenseDetector };