instar 0.23.12 → 0.23.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,956 @@
1
+ /**
2
+ * ResponseReviewGate — Main orchestrator for the response review pipeline.
3
+ *
4
+ * Evaluates agent responses before they reach users. Architecture:
5
+ * 1. Policy Enforcement Layer (PEL) — deterministic hard blocks
6
+ * 2. Gate Reviewer — fast LLM triage (does this need full review?)
7
+ * 3. Specialist Reviewers — parallel LLM calls checking specific dimensions
8
+ *
9
+ * Implements the 15-row normative decision matrix from the Coherence Gate spec.
10
+ * Handles retry tracking, conversation advancement detection, feedback composition,
11
+ * per-channel fail behavior, and reviewer criticality tiers.
12
+ *
13
+ * NOTE: The existing CoherenceGate.ts is a pre-action verification system
14
+ * (checks before deployment/git push). This is a response review pipeline —
15
+ * completely different purpose, same "Coherence Gate" branding.
16
+ */
17
+ import fs from 'node:fs';
18
+ import path from 'node:path';
19
+ import { PolicyEnforcementLayer } from './PolicyEnforcementLayer.js';
20
+ import { CoherenceReviewer } from './CoherenceReviewer.js';
21
+ import { GateReviewer } from './reviewers/gate-reviewer.js';
22
+ import { ConversationalToneReviewer } from './reviewers/conversational-tone.js';
23
+ import { ClaimProvenanceReviewer } from './reviewers/claim-provenance.js';
24
+ import { SettlingDetectionReviewer } from './reviewers/settling-detection.js';
25
+ import { ContextCompletenessReviewer } from './reviewers/context-completeness.js';
26
+ import { CapabilityAccuracyReviewer } from './reviewers/capability-accuracy.js';
27
+ import { UrlValidityReviewer } from './reviewers/url-validity.js';
28
+ import { ValueAlignmentReviewer } from './reviewers/value-alignment.js';
29
+ import { InformationLeakageReviewer } from './reviewers/information-leakage.js';
30
+ import { RecipientResolver } from './RecipientResolver.js';
31
+ import { CustomReviewerLoader } from './CustomReviewerLoader.js';
32
+ // ── Category Mapping (reviewer → generic category for agent feedback) ─
33
+ const REVIEWER_CATEGORY_MAP = {
34
+ 'conversational-tone': 'TONE ISSUE',
35
+ 'claim-provenance': 'ACCURACY ISSUE',
36
+ 'settling-detection': 'ACCURACY ISSUE',
37
+ 'context-completeness': 'COMPLETENESS ISSUE',
38
+ 'capability-accuracy': 'CAPABILITY ISSUE',
39
+ 'url-validity': 'ACCURACY ISSUE',
40
+ 'value-alignment': 'ALIGNMENT ISSUE',
41
+ 'information-leakage': 'ALIGNMENT ISSUE',
42
+ };
43
+ /** Violation types for retry exhaustion handling */
44
+ const HIGH_STAKES_CATEGORIES = new Set(['ACCURACY ISSUE', 'ALIGNMENT ISSUE']);
45
+ const VALUE_DOC_CACHE_TTL_MS = 60 * 60 * 1000; // 60 minutes
46
+ // ── Main Class ───────────────────────────────────────────────────────
47
+ export class ResponseReviewGate {
48
+ config;
49
+ stateDir;
50
+ pel;
51
+ gateReviewer;
52
+ reviewers = new Map();
53
+ recipientResolver;
54
+ retrySessions = new Map();
55
+ sessionMutexes = new Map();
56
+ valueDocCache = null;
57
+ reviewHistory = [];
58
+ proposals = [];
59
+ static RETENTION_DAYS = 30;
60
+ constructor(options) {
61
+ this.config = options.config;
62
+ this.stateDir = options.stateDir;
63
+ // Initialize PEL
64
+ this.pel = new PolicyEnforcementLayer(options.stateDir);
65
+ // Initialize gate reviewer
66
+ this.gateReviewer = new GateReviewer(options.apiKey, {
67
+ model: options.config.gateModel ?? 'haiku',
68
+ timeoutMs: 5_000,
69
+ });
70
+ // Initialize built-in specialist reviewers
71
+ this.initializeReviewers(options.apiKey, options.config);
72
+ // Initialize recipient resolver
73
+ this.recipientResolver = new RecipientResolver({
74
+ stateDir: options.stateDir,
75
+ relationships: options.relationships,
76
+ adaptiveTrust: options.adaptiveTrust,
77
+ });
78
+ // Load custom reviewers
79
+ this.loadCustomReviewers(options.apiKey);
80
+ }
81
+ /**
82
+ * Evaluate an agent's draft response. Main entry point.
83
+ * Implements the 15-row normative decision matrix.
84
+ */
85
+ async evaluate(request) {
86
+ const { message, sessionId, stopHookActive, context } = request;
87
+ // Session mutex — prevent concurrent reviews for same session
88
+ await this.acquireMutex(sessionId);
89
+ try {
90
+ return await this._evaluate(message, sessionId, stopHookActive, context);
91
+ }
92
+ finally {
93
+ this.releaseMutex(sessionId);
94
+ }
95
+ }
96
+ async _evaluate(message, sessionId, stopHookActive, context) {
97
+ const isExternal = context.isExternalFacing ?? this.isExternalChannel(context.channel);
98
+ const channelConfig = this.resolveChannelConfig(context.channel, isExternal);
99
+ const recipientType = context.recipientType ?? 'primary-user';
100
+ // ── Retry state management ──────────────────────────────────
101
+ let retryState = this.retrySessions.get(sessionId);
102
+ if (!stopHookActive) {
103
+ // New response (not a revision) — reset retry counter
104
+ retryState = {
105
+ retryCount: 0,
106
+ lastViolations: [],
107
+ transcriptVersion: this.getTranscriptVersion(context.transcriptPath),
108
+ createdAt: Date.now(),
109
+ };
110
+ this.retrySessions.set(sessionId, retryState);
111
+ }
112
+ else if (retryState) {
113
+ retryState.retryCount++;
114
+ // Conversation advancement detection
115
+ const currentVersion = this.getTranscriptVersion(context.transcriptPath);
116
+ if (currentVersion > retryState.transcriptVersion) {
117
+ // User sent a new message — abandon stale revision
118
+ this.retrySessions.delete(sessionId);
119
+ this.logAudit(sessionId, context, 'abandoned', [], 'Conversation advanced during revision');
120
+ return { pass: true, _outcome: 'abandoned-stale' };
121
+ }
122
+ }
123
+ else {
124
+ retryState = { retryCount: 1, lastViolations: [], transcriptVersion: 0, createdAt: Date.now() };
125
+ this.retrySessions.set(sessionId, retryState);
126
+ }
127
+ const maxRetries = this.config.maxRetries ?? 2;
128
+ // ── Step 1: PEL (always runs, even in observeOnly) ──────────
129
+ const pelContext = {
130
+ channel: context.channel,
131
+ isExternalFacing: isExternal,
132
+ recipientType,
133
+ stateDir: this.stateDir,
134
+ };
135
+ const pelResult = this.pel.enforce(message, pelContext);
136
+ // Row 1: PEL HARD_BLOCK → always block, no exceptions
137
+ if (pelResult.outcome === 'hard_block') {
138
+ const feedback = this.composePELFeedback(pelResult);
139
+ this.logAudit(sessionId, context, 'pel-block', [], 'PEL hard block');
140
+ return {
141
+ pass: false,
142
+ feedback,
143
+ issueCategories: ['POLICY VIOLATION'],
144
+ retryCount: retryState.retryCount,
145
+ _pelBlock: true,
146
+ _outcome: 'block',
147
+ };
148
+ }
149
+ // Row 3: observeOnly → log but never block (except PEL)
150
+ const observeOnly = this.config.observeOnly ?? false;
151
+ // ── Step 2: Resolve recipient context ────────────────────────
152
+ const recipientContext = this.recipientResolver.resolve(context.recipientId, recipientType);
153
+ // ── Step 3: Extract tool output context from transcript ──────
154
+ const toolOutputContext = context.transcriptPath
155
+ ? this.extractToolContext(context.transcriptPath)
156
+ : undefined;
157
+ // ── Step 4: Extract URLs for URL validity reviewer ───────────
158
+ const extractedUrls = this.extractUrls(message);
159
+ // ── Step 5: Load value documents (cached) ────────────────────
160
+ const valueDocs = this.loadValueDocs();
161
+ // ── Step 6: Build review context ─────────────────────────────
162
+ const reviewCtx = {
163
+ message,
164
+ channel: context.channel,
165
+ isExternalFacing: isExternal,
166
+ recipientType,
167
+ toolOutputContext,
168
+ extractedUrls: extractedUrls.length > 0 ? extractedUrls : undefined,
169
+ agentValues: valueDocs.agentValues || undefined,
170
+ userValues: valueDocs.userValues || undefined,
171
+ orgValues: valueDocs.orgValues || undefined,
172
+ trustLevel: recipientContext.trustLevel,
173
+ relationshipContext: recipientContext.communicationStyle ? {
174
+ communicationStyle: recipientContext.communicationStyle,
175
+ formality: recipientContext.formality,
176
+ themes: recipientContext.themes,
177
+ } : undefined,
178
+ };
179
+ // ── Step 7: Gate reviewer ────────────────────────────────────
180
+ let gateResult;
181
+ const skipGate = channelConfig.skipGate ?? false;
182
+ if (!skipGate) {
183
+ gateResult = await this.gateReviewer.reviewAsGate(reviewCtx);
184
+ if (!gateResult.needsReview) {
185
+ // Row 4: PEL PASS + Gate says no review needed → PASS
186
+ const warnings = pelResult.outcome === 'warn'
187
+ ? pelResult.violations.map(v => v.detail)
188
+ : [];
189
+ this.logAudit(sessionId, context, 'pass-gate', [], 'Gate skipped full review');
190
+ return {
191
+ pass: true,
192
+ warnings,
193
+ _gateResult: gateResult,
194
+ _outcome: 'pass',
195
+ };
196
+ }
197
+ }
198
+ // ── Step 8: Specialist reviewers (parallel fan-out) ──────────
199
+ const enabledReviewers = this.getEnabledReviewers(context.channel, recipientType, channelConfig);
200
+ const results = await Promise.allSettled(enabledReviewers.map(r => r.review(reviewCtx)));
201
+ // Collect results
202
+ const settled = [];
203
+ let abstainCount = 0;
204
+ let highCritTimeout = false;
205
+ for (let i = 0; i < results.length; i++) {
206
+ const result = results[i];
207
+ if (result.status === 'fulfilled') {
208
+ settled.push(result.value);
209
+ }
210
+ else {
211
+ // Reviewer failed — treat as abstain
212
+ abstainCount++;
213
+ const reviewerName = enabledReviewers[i].name;
214
+ const criticality = this.config.reviewerCriticality?.[reviewerName] ?? 'standard';
215
+ if (criticality === 'high') {
216
+ highCritTimeout = true;
217
+ }
218
+ }
219
+ }
220
+ // ── Step 9: Aggregate verdicts ───────────────────────────────
221
+ const blockResults = settled.filter(r => !r.pass && this.getReviewerMode(r.reviewer) === 'block');
222
+ const warnResults = settled.filter(r => !r.pass && this.getReviewerMode(r.reviewer) === 'warn');
223
+ const allAbstain = settled.length === 0 && abstainCount > 0;
224
+ const majorityAbstain = abstainCount > enabledReviewers.length / 2;
225
+ // Check warn escalation threshold
226
+ const warnEscalationThreshold = this.config.warnEscalationThreshold ?? 3;
227
+ const warnEscalated = warnResults.length >= warnEscalationThreshold;
228
+ // Determine LLM verdict
229
+ let llmVerdict;
230
+ if (allAbstain || majorityAbstain) {
231
+ llmVerdict = 'ALL_ABSTAIN';
232
+ }
233
+ else if (highCritTimeout && isExternal) {
234
+ llmVerdict = 'HIGH_CRIT_TIMEOUT';
235
+ }
236
+ else if (blockResults.length > 0 || warnEscalated) {
237
+ llmVerdict = 'BLOCK';
238
+ }
239
+ else if (warnResults.length > 0) {
240
+ llmVerdict = 'WARN_ONLY';
241
+ }
242
+ else {
243
+ llmVerdict = 'PASS';
244
+ }
245
+ // ── Step 10: Apply normative decision matrix ─────────────────
246
+ const pelOutcome = pelResult.outcome; // 'pass' | 'warn' | 'hard_block' (hard_block handled above)
247
+ const retryExhausted = retryState.retryCount >= maxRetries;
248
+ // Build audit violations
249
+ const auditViolations = [...blockResults, ...warnResults].map(r => ({
250
+ reviewer: r.reviewer,
251
+ severity: r.severity,
252
+ issue: r.issue,
253
+ suggestion: r.suggestion,
254
+ latencyMs: r.latencyMs,
255
+ }));
256
+ // Row 2: PEL WARN → pass + warn (PEL warns are advisory)
257
+ const pelWarnings = pelResult.outcome === 'warn'
258
+ ? pelResult.violations.map(v => v.detail)
259
+ : [];
260
+ // Row 3: observeOnly → always pass
261
+ if (observeOnly) {
262
+ this.logAudit(sessionId, context, 'observe-only', auditViolations, `LLM: ${llmVerdict}`);
263
+ return {
264
+ pass: true,
265
+ warnings: [...pelWarnings, ...warnResults.map(r => r.issue)],
266
+ _auditViolations: auditViolations,
267
+ _gateResult: gateResult,
268
+ _outcome: 'pass-observe',
269
+ };
270
+ }
271
+ // Row 4: LLM PASS → deliver
272
+ if (llmVerdict === 'PASS') {
273
+ this.logAudit(sessionId, context, 'pass', auditViolations, 'All reviewers pass');
274
+ return {
275
+ pass: true,
276
+ warnings: pelWarnings,
277
+ _auditViolations: auditViolations,
278
+ _gateResult: gateResult,
279
+ _outcome: 'pass',
280
+ };
281
+ }
282
+ // Row 5: WARN_ONLY → deliver with warnings
283
+ if (llmVerdict === 'WARN_ONLY') {
284
+ this.logAudit(sessionId, context, 'pass-warn', auditViolations, 'Warnings only');
285
+ return {
286
+ pass: true,
287
+ warnings: [...pelWarnings, ...warnResults.map(r => r.issue)],
288
+ _auditViolations: auditViolations,
289
+ _gateResult: gateResult,
290
+ _outcome: 'pass-warn',
291
+ };
292
+ }
293
+ // Rows 10-15: ALL_ABSTAIN, TIMEOUT, HIGH_CRIT_TIMEOUT
294
+ if (llmVerdict === 'ALL_ABSTAIN' || llmVerdict === 'HIGH_CRIT_TIMEOUT') {
295
+ if (isExternal) {
296
+ // Row 10, 12, 14: QUEUE for external
297
+ if (channelConfig.queueOnFailure) {
298
+ this.logAudit(sessionId, context, 'queued', auditViolations, `${llmVerdict}: queued`);
299
+ // For now, queue-and-hold is implemented by returning pass:false
300
+ // In production, this would integrate with a message queue
301
+ return {
302
+ pass: false,
303
+ feedback: '[unreviewed] Review system temporarily unavailable. Message held for review.',
304
+ issueCategories: ['INFRASTRUCTURE'],
305
+ _auditViolations: auditViolations,
306
+ _outcome: 'queue',
307
+ };
308
+ }
309
+ }
310
+ // Row 11, 13, 15: fail-open for internal
311
+ this.logAudit(sessionId, context, 'pass-failopen', auditViolations, `${llmVerdict}: fail-open`);
312
+ return {
313
+ pass: true,
314
+ warnings: ['[unreviewed] Some reviewers were unavailable'],
315
+ _auditViolations: auditViolations,
316
+ _outcome: 'pass-failopen',
317
+ };
318
+ }
319
+ // Row 6: BLOCK + retries remaining → block for revision
320
+ if (llmVerdict === 'BLOCK' && !retryExhausted) {
321
+ const feedback = this.composeFeedback(blockResults, warnResults, retryState.retryCount, maxRetries);
322
+ retryState.lastViolations = auditViolations;
323
+ this.logAudit(sessionId, context, 'block', auditViolations, `Block: retry ${retryState.retryCount}/${maxRetries}`);
324
+ return {
325
+ pass: false,
326
+ feedback,
327
+ issueCategories: this.getIssueCategories(blockResults),
328
+ retryCount: retryState.retryCount,
329
+ _auditViolations: auditViolations,
330
+ _gateResult: gateResult,
331
+ _outcome: 'block',
332
+ };
333
+ }
334
+ // Rows 7-9: BLOCK + retry exhausted
335
+ if (llmVerdict === 'BLOCK' && retryExhausted) {
336
+ const categories = this.getIssueCategories(blockResults);
337
+ const hasHighStakes = categories.some(c => HIGH_STAKES_CATEGORIES.has(c));
338
+ if (isExternal && hasHighStakes) {
339
+ // Row 9: External + accuracy/alignment → HOLD for operator review
340
+ this.logAudit(sessionId, context, 'hold', auditViolations, 'Retry exhausted on high-stakes issue');
341
+ return {
342
+ pass: false,
343
+ feedback: 'Response held for operator review due to unresolved accuracy/alignment concerns.',
344
+ issueCategories: categories,
345
+ retryCount: retryState.retryCount,
346
+ _auditViolations: auditViolations,
347
+ _outcome: 'hold',
348
+ };
349
+ }
350
+ // Rows 7-8: Internal, or external + low-stakes → PASS + attention queue
351
+ this.logAudit(sessionId, context, 'pass-exhausted', auditViolations, 'Retry exhausted, delivering');
352
+ this.retrySessions.delete(sessionId);
353
+ return {
354
+ pass: true,
355
+ warnings: [...pelWarnings, `[retry-exhausted] ${categories.join(', ')}`],
356
+ _auditViolations: auditViolations,
357
+ _gateResult: gateResult,
358
+ _outcome: 'pass-exhausted',
359
+ };
360
+ }
361
+ // Fallback (should not reach here)
362
+ return { pass: true, _outcome: 'fallback' };
363
+ }
364
+ // ── Reviewer Management ────────────────────────────────────────────
365
+ initializeReviewers(apiKey, config) {
366
+ const defaultModel = config.reviewerModel ?? 'haiku';
367
+ const overrides = config.reviewerModelOverrides ?? {};
368
+ const reviewerDefs = [
369
+ { name: 'conversational-tone', cls: ConversationalToneReviewer },
370
+ { name: 'claim-provenance', cls: ClaimProvenanceReviewer },
371
+ { name: 'settling-detection', cls: SettlingDetectionReviewer },
372
+ { name: 'context-completeness', cls: ContextCompletenessReviewer },
373
+ { name: 'capability-accuracy', cls: CapabilityAccuracyReviewer },
374
+ { name: 'url-validity', cls: UrlValidityReviewer },
375
+ { name: 'value-alignment', cls: ValueAlignmentReviewer },
376
+ { name: 'information-leakage', cls: InformationLeakageReviewer },
377
+ ];
378
+ for (const { name, cls } of reviewerDefs) {
379
+ const reviewerConfig = config.reviewers?.[name];
380
+ if (reviewerConfig && !reviewerConfig.enabled)
381
+ continue;
382
+ const model = overrides[name] ?? defaultModel;
383
+ const mode = reviewerConfig?.mode ?? 'block';
384
+ const timeoutMs = config.timeoutMs ?? 8_000;
385
+ this.reviewers.set(name, new cls(apiKey, { model, mode, timeoutMs }));
386
+ }
387
+ }
388
+ loadCustomReviewers(apiKey) {
389
+ const loader = new CustomReviewerLoader(this.stateDir);
390
+ // Custom reviewer loading is best-effort — don't break startup
391
+ try {
392
+ const specs = loader.loadAll();
393
+ for (const spec of specs) {
394
+ if (this.reviewers.has(spec.name))
395
+ continue; // built-in takes precedence
396
+ // Create a dynamic reviewer from the spec
397
+ const mode = (this.config.reviewers?.[spec.name]?.mode ?? spec.mode);
398
+ const model = this.config.reviewerModelOverrides?.[spec.name] ?? this.config.reviewerModel ?? 'haiku';
399
+ // Dynamic reviewer using the spec's prompt
400
+ const reviewer = new DynamicReviewer(spec.name, apiKey, spec.prompt, spec.contextRequirements, {
401
+ model, mode, timeoutMs: this.config.timeoutMs ?? 8_000,
402
+ });
403
+ this.reviewers.set(spec.name, reviewer);
404
+ }
405
+ }
406
+ catch {
407
+ // @silent-fallback-ok — custom reviewer loading non-critical
408
+ }
409
+ }
410
+ getEnabledReviewers(channel, recipientType, channelConfig) {
411
+ const enabled = [];
412
+ for (const [name, reviewer] of this.reviewers) {
413
+ // Skip information-leakage for primary-user
414
+ if (name === 'information-leakage' && recipientType === 'primary-user')
415
+ continue;
416
+ // Skip observe-mode reviewers from blocking pipeline
417
+ const mode = this.getReviewerMode(name);
418
+ if (mode === 'observe')
419
+ continue;
420
+ enabled.push(reviewer);
421
+ }
422
+ // Add channel-specific additional reviewers if configured
423
+ // (These would be custom reviewers registered for this channel)
424
+ if (channelConfig.additionalReviewers) {
425
+ for (const name of channelConfig.additionalReviewers) {
426
+ const reviewer = this.reviewers.get(name);
427
+ if (reviewer && !enabled.includes(reviewer)) {
428
+ enabled.push(reviewer);
429
+ }
430
+ }
431
+ }
432
+ return enabled;
433
+ }
434
+ getReviewerMode(reviewerName) {
435
+ return this.config.reviewers?.[reviewerName]?.mode ?? 'block';
436
+ }
437
+ // ── Channel Configuration ──────────────────────────────────────────
438
+ resolveChannelConfig(channel, isExternal) {
439
+ // Check explicit channel config first
440
+ const explicit = this.config.channels?.[channel];
441
+ if (explicit)
442
+ return explicit;
443
+ // Fall back to channel defaults
444
+ const defaults = isExternal
445
+ ? this.config.channelDefaults?.external
446
+ : this.config.channelDefaults?.internal;
447
+ return defaults ?? {
448
+ failOpen: !isExternal,
449
+ skipGate: isExternal,
450
+ queueOnFailure: isExternal,
451
+ queueTimeoutMs: 30_000,
452
+ };
453
+ }
454
+ isExternalChannel(channel) {
455
+ const internalChannels = new Set(['direct', 'cli', 'internal']);
456
+ return !internalChannels.has(channel);
457
+ }
458
+ // ── Feedback Composition ───────────────────────────────────────────
459
+ composeFeedback(blocks, warns, retryCount, maxRetries) {
460
+ const allIssues = [...blocks, ...warns];
461
+ const lines = [];
462
+ if (retryCount > 0) {
463
+ // Collapse format for revisions (context window management)
464
+ const prevCategories = this.getIssueCategories(blocks);
465
+ lines.push(`COHERENCE REVIEW: Previous attempt had ${allIssues.length} issue(s): ${prevCategories.join(', ')}.`);
466
+ lines.push(`Current attempt (revision ${retryCount} of ${maxRetries}):`);
467
+ lines.push('');
468
+ }
469
+ else {
470
+ lines.push(`COHERENCE REVIEW: Your draft response has ${allIssues.length} issue(s) to address.`);
471
+ lines.push('');
472
+ }
473
+ // Deduplicate by category
474
+ const seen = new Set();
475
+ for (const result of allIssues) {
476
+ const category = REVIEWER_CATEGORY_MAP[result.reviewer] ?? 'QUALITY ISSUE';
477
+ if (seen.has(category))
478
+ continue;
479
+ seen.add(category);
480
+ lines.push(`[${category}]`);
481
+ lines.push(result.issue);
482
+ if (result.suggestion) {
483
+ lines.push(result.suggestion);
484
+ }
485
+ lines.push('');
486
+ }
487
+ lines.push('Revise your response addressing the issues above. Keep the substance — just fix the flagged problems.');
488
+ return lines.join('\n');
489
+ }
490
+ composePELFeedback(pelResult) {
491
+ const lines = ['POLICY VIOLATION: Your response contains content that cannot be sent.', ''];
492
+ for (const violation of pelResult.violations) {
493
+ if (violation.severity === 'hard_block') {
494
+ lines.push(`[POLICY VIOLATION] ${violation.detail}`);
495
+ }
496
+ }
497
+ lines.push('');
498
+ lines.push('Remove the flagged content and try again.');
499
+ return lines.join('\n');
500
+ }
501
+ getIssueCategories(results) {
502
+ const categories = new Set();
503
+ for (const r of results) {
504
+ categories.add(REVIEWER_CATEGORY_MAP[r.reviewer] ?? 'QUALITY ISSUE');
505
+ }
506
+ return [...categories];
507
+ }
508
+ // ── Context Extraction ─────────────────────────────────────────────
509
+ extractToolContext(transcriptPath) {
510
+ try {
511
+ if (!fs.existsSync(transcriptPath))
512
+ return undefined;
513
+ const content = fs.readFileSync(transcriptPath, 'utf-8');
514
+ const lines = content.trim().split('\n');
515
+ // Extract last 3-5 tool results (look for tool_result entries)
516
+ const toolResults = [];
517
+ for (let i = lines.length - 1; i >= 0 && toolResults.length < 5; i--) {
518
+ try {
519
+ const entry = JSON.parse(lines[i]);
520
+ if (entry?.type === 'tool_result' || entry?.role === 'tool') {
521
+ const text = typeof entry.content === 'string'
522
+ ? entry.content
523
+ : JSON.stringify(entry.content);
524
+ // Truncate each result to ~100 tokens
525
+ toolResults.unshift(text.slice(0, 400));
526
+ }
527
+ }
528
+ catch {
529
+ // Skip non-JSON lines
530
+ }
531
+ }
532
+ if (toolResults.length === 0)
533
+ return undefined;
534
+ // Combine and truncate to ~500 tokens total
535
+ const combined = toolResults.join('\n---\n');
536
+ return combined.slice(0, 2000);
537
+ }
538
+ catch {
539
+ return undefined;
540
+ }
541
+ }
542
+ extractUrls(message) {
543
+ const urlRegex = /https?:\/\/[^\s<>"')\]]+/g;
544
+ return [...(message.match(urlRegex) ?? [])];
545
+ }
546
+ loadValueDocs() {
547
+ // Check cache
548
+ if (this.valueDocCache && Date.now() - this.valueDocCache.loadedAt < VALUE_DOC_CACHE_TTL_MS) {
549
+ return this.valueDocCache;
550
+ }
551
+ const agentValues = this.extractValueSection(path.join(this.stateDir, 'AGENT.md'), 'Intent');
552
+ const userValues = this.extractValueSection(path.join(this.stateDir, 'USER.md'));
553
+ const orgValues = this.extractValueSection(path.join(this.stateDir, 'ORG-INTENT.md'));
554
+ this.valueDocCache = { agentValues, userValues, orgValues, loadedAt: Date.now() };
555
+ return this.valueDocCache;
556
+ }
557
+ /**
558
+ * Deterministic value document summarization.
559
+ * Extracts headers, bullets, and bold text — not LLM summarization.
560
+ * Target: ~200-400 tokens for all three tiers combined.
561
+ */
562
+ extractValueSection(filePath, section) {
563
+ try {
564
+ if (!fs.existsSync(filePath))
565
+ return '';
566
+ let content = fs.readFileSync(filePath, 'utf-8');
567
+ // If a specific section is requested, extract it
568
+ if (section) {
569
+ const sectionRegex = new RegExp(`^##\\s+${section}[\\s\\S]*?(?=^##\\s|$)`, 'gm');
570
+ const match = content.match(sectionRegex);
571
+ content = match ? match[0] : content;
572
+ }
573
+ // Extract key elements: headers, bullets, bold text
574
+ const lines = content.split('\n');
575
+ const extracted = [];
576
+ let tokens = 0;
577
+ for (const line of lines) {
578
+ const trimmed = line.trim();
579
+ if (!trimmed)
580
+ continue;
581
+ // Keep headers
582
+ if (trimmed.startsWith('#')) {
583
+ extracted.push(trimmed);
584
+ tokens += trimmed.split(/\s+/).length;
585
+ }
586
+ // Keep bullet points
587
+ else if (trimmed.startsWith('-') || trimmed.startsWith('*')) {
588
+ extracted.push(trimmed);
589
+ tokens += trimmed.split(/\s+/).length;
590
+ }
591
+ // Keep bold text lines
592
+ else if (trimmed.includes('**')) {
593
+ extracted.push(trimmed);
594
+ tokens += trimmed.split(/\s+/).length;
595
+ }
596
+ // Budget: ~150 tokens per document
597
+ if (tokens > 150)
598
+ break;
599
+ }
600
+ return extracted.join('\n');
601
+ }
602
+ catch {
603
+ return '';
604
+ }
605
+ }
606
+ // ── Conversation Advancement ───────────────────────────────────────
607
+ getTranscriptVersion(transcriptPath) {
608
+ if (!transcriptPath)
609
+ return 0;
610
+ try {
611
+ const stat = fs.statSync(transcriptPath);
612
+ return stat.mtimeMs;
613
+ }
614
+ catch {
615
+ return 0;
616
+ }
617
+ }
618
+ // ── Session Mutex ──────────────────────────────────────────────────
619
+ async acquireMutex(sessionId) {
620
+ while (this.sessionMutexes.has(sessionId)) {
621
+ await this.sessionMutexes.get(sessionId);
622
+ }
623
+ let resolve;
624
+ const promise = new Promise(r => { resolve = r; });
625
+ this.sessionMutexes.set(sessionId, promise);
626
+ // Store resolve for release
627
+ promise.__resolve = resolve;
628
+ }
629
+ releaseMutex(sessionId) {
630
+ const promise = this.sessionMutexes.get(sessionId);
631
+ this.sessionMutexes.delete(sessionId);
632
+ if (promise && promise.__resolve) {
633
+ promise.__resolve();
634
+ }
635
+ }
636
+ // ── Audit Logging ──────────────────────────────────────────────────
637
+ logAudit(sessionId, context, verdict, violations, note) {
638
+ const entry = {
639
+ timestamp: new Date().toISOString(),
640
+ sessionId,
641
+ channel: context.channel,
642
+ recipientType: context.recipientType ?? 'primary-user',
643
+ recipientId: context.recipientId,
644
+ verdict,
645
+ violations,
646
+ note,
647
+ };
648
+ this.reviewHistory.push(entry);
649
+ // Prune old entries (keep last 1000)
650
+ if (this.reviewHistory.length > 1000) {
651
+ this.reviewHistory = this.reviewHistory.slice(-1000);
652
+ }
653
+ }
654
+ // ── Public API for routes ──────────────────────────────────────────
655
+ getReviewHistory(options) {
656
+ // Retention: purge entries older than RETENTION_DAYS
657
+ const retentionCutoff = Date.now() - ResponseReviewGate.RETENTION_DAYS * 24 * 60 * 60 * 1000;
658
+ this.reviewHistory = this.reviewHistory.filter(e => new Date(e.timestamp).getTime() >= retentionCutoff);
659
+ let entries = this.reviewHistory;
660
+ if (options?.sessionId) {
661
+ entries = entries.filter(e => e.sessionId === options.sessionId);
662
+ }
663
+ if (options?.reviewer) {
664
+ entries = entries.filter(e => e.violations.some(v => v.reviewer === options.reviewer));
665
+ }
666
+ if (options?.verdict) {
667
+ entries = entries.filter(e => e.verdict === options.verdict);
668
+ }
669
+ if (options?.recipientId) {
670
+ entries = entries.filter(e => e.recipientId === options.recipientId);
671
+ }
672
+ if (options?.since) {
673
+ const sinceDate = new Date(options.since).getTime();
674
+ entries = entries.filter(e => new Date(e.timestamp).getTime() >= sinceDate);
675
+ }
676
+ const limit = options?.limit ?? 50;
677
+ return entries.slice(-limit);
678
+ }
679
+ /**
680
+ * Delete review history for a specific session (DSAR compliance).
681
+ */
682
+ deleteHistory(sessionId) {
683
+ const before = this.reviewHistory.length;
684
+ this.reviewHistory = this.reviewHistory.filter(e => e.sessionId !== sessionId);
685
+ return before - this.reviewHistory.length;
686
+ }
687
+ getReviewerStats(options) {
688
+ const perReviewer = {};
689
+ for (const [name, reviewer] of this.reviewers) {
690
+ const m = reviewer.metrics;
691
+ const total = m.passCount + m.failCount + m.errorCount;
692
+ perReviewer[name] = {
693
+ passRate: total > 0 ? m.passCount / total : 0,
694
+ flagRate: total > 0 ? m.failCount / total : 0,
695
+ errorRate: total > 0 ? m.errorCount / total : 0,
696
+ avgLatencyMs: total > 0 ? Math.round(m.totalLatencyMs / total) : 0,
697
+ jsonValidityRate: total > 0 ? 1 - (m.jsonParseErrors / total) : 1,
698
+ total,
699
+ };
700
+ }
701
+ // Per-recipient-type breakdown from history
702
+ const recipientBreakdown = {};
703
+ let sinceMs = 0;
704
+ if (options?.since) {
705
+ sinceMs = new Date(options.since).getTime();
706
+ }
707
+ else if (options?.period === 'daily') {
708
+ sinceMs = Date.now() - 24 * 60 * 60 * 1000;
709
+ }
710
+ else if (options?.period === 'weekly') {
711
+ sinceMs = Date.now() - 7 * 24 * 60 * 60 * 1000;
712
+ }
713
+ const filteredHistory = sinceMs > 0
714
+ ? this.reviewHistory.filter(e => new Date(e.timestamp).getTime() >= sinceMs)
715
+ : this.reviewHistory;
716
+ for (const entry of filteredHistory) {
717
+ const rt = entry.recipientType;
718
+ if (!recipientBreakdown[rt]) {
719
+ recipientBreakdown[rt] = { total: 0, blocked: 0, passed: 0 };
720
+ }
721
+ recipientBreakdown[rt].total++;
722
+ if (entry.verdict.includes('block') || entry.verdict.includes('hold')) {
723
+ recipientBreakdown[rt].blocked++;
724
+ }
725
+ else {
726
+ recipientBreakdown[rt].passed++;
727
+ }
728
+ }
729
+ // False positive indicators
730
+ const totalBlocked = filteredHistory.filter(e => e.verdict.includes('block') || e.verdict.includes('hold')).length;
731
+ const totalExhausted = filteredHistory.filter(e => e.verdict === 'pass-exhausted').length;
732
+ return {
733
+ reviewers: perReviewer,
734
+ summary: {
735
+ totalReviews: filteredHistory.length,
736
+ totalBlocked,
737
+ totalExhausted,
738
+ exhaustionRate: filteredHistory.length > 0
739
+ ? totalExhausted / filteredHistory.length
740
+ : 0,
741
+ period: options?.period ?? 'all',
742
+ },
743
+ recipientBreakdown,
744
+ };
745
+ }
746
+ /** Check if the gate is enabled and ready */
747
+ isEnabled() {
748
+ return this.config.enabled;
749
+ }
750
+ // ── Canary Tests ──────────────────────────────────────────────────
751
+ /**
752
+ * Run canary tests with known-bad messages. Returns results showing
753
+ * which canary messages were caught and which were missed.
754
+ */
755
+ async runCanaryTests() {
756
+ const results = [];
757
+ for (const canary of CANARY_CORPUS) {
758
+ const response = await this.evaluate({
759
+ message: canary.message,
760
+ sessionId: `canary-${Date.now()}`,
761
+ stopHookActive: false,
762
+ context: {
763
+ channel: canary.channel,
764
+ isExternalFacing: canary.isExternalFacing,
765
+ recipientType: canary.recipientType,
766
+ },
767
+ });
768
+ const caught = !response.pass;
769
+ results.push({
770
+ canaryId: canary.id,
771
+ description: canary.description,
772
+ expectedDimension: canary.expectedDimension,
773
+ caught,
774
+ verdict: response._outcome,
775
+ pass: caught === canary.shouldBlock,
776
+ });
777
+ }
778
+ return results;
779
+ }
780
+ /**
781
+ * Get reviewer health — per-reviewer pass rate relative to baseline expectations.
782
+ */
783
+ getReviewerHealth() {
784
+ const reviewerHealth = {};
785
+ for (const [name, reviewer] of this.reviewers) {
786
+ const m = reviewer.metrics;
787
+ const total = m.passCount + m.failCount + m.errorCount;
788
+ const passRate = total > 0 ? m.passCount / total : 1;
789
+ const errorRate = total > 0 ? m.errorCount / total : 0;
790
+ let status = 'healthy';
791
+ if (errorRate > 0.5 || (total > 10 && passRate < 0.1)) {
792
+ status = 'failing';
793
+ }
794
+ else if (errorRate > 0.2 || m.jsonParseErrors > total * 0.3) {
795
+ status = 'degraded';
796
+ }
797
+ reviewerHealth[name] = { passRate, total, status };
798
+ }
799
+ const allStatuses = Object.values(reviewerHealth).map(r => r.status);
800
+ let overallStatus = 'healthy';
801
+ if (allStatuses.includes('failing'))
802
+ overallStatus = 'failing';
803
+ else if (allStatuses.includes('degraded'))
804
+ overallStatus = 'degraded';
805
+ return {
806
+ overallStatus,
807
+ reviewers: reviewerHealth,
808
+ lastCanaryRun: this.lastCanaryResults,
809
+ };
810
+ }
811
+ lastCanaryResults = null;
812
+ /** Store canary results for health reporting */
813
+ setCanaryResults(results) {
814
+ this.lastCanaryResults = results;
815
+ }
816
+ // ── Proposal Queue Management ─────────────────────────────────────
817
+ getProposals(status) {
818
+ if (status) {
819
+ return this.proposals.filter(p => p.status === status);
820
+ }
821
+ return [...this.proposals];
822
+ }
823
+ addProposal(proposal) {
824
+ const newProposal = {
825
+ ...proposal,
826
+ id: `prop-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
827
+ status: 'pending',
828
+ createdAt: new Date().toISOString(),
829
+ };
830
+ this.proposals.push(newProposal);
831
+ return newProposal;
832
+ }
833
+ resolveProposal(id, action, resolution) {
834
+ const proposal = this.proposals.find(p => p.id === id);
835
+ if (!proposal || proposal.status !== 'pending')
836
+ return null;
837
+ proposal.status = action === 'approve' ? 'approved' : 'rejected';
838
+ proposal.resolvedAt = new Date().toISOString();
839
+ proposal.resolution = resolution;
840
+ return proposal;
841
+ }
842
+ // ── Health Dashboard Data ─────────────────────────────────────────
843
+ getHealthDashboard() {
844
+ const stats = this.getReviewerStats();
845
+ const pending = this.getProposals('pending');
846
+ // Incident counts by dimension
847
+ const incidentsByDimension = {};
848
+ for (const entry of this.reviewHistory) {
849
+ for (const v of entry.violations) {
850
+ incidentsByDimension[v.reviewer] = (incidentsByDimension[v.reviewer] ?? 0) + 1;
851
+ }
852
+ }
853
+ // Reviewer coverage (which reviewers have actually run)
854
+ const reviewerCoverage = {};
855
+ for (const [name, reviewer] of this.reviewers) {
856
+ const m = reviewer.metrics;
857
+ reviewerCoverage[name] = (m.passCount + m.failCount + m.errorCount) > 0;
858
+ }
859
+ return {
860
+ enabled: this.config.enabled,
861
+ observeOnly: this.config.observeOnly ?? false,
862
+ stats: stats.summary,
863
+ incidentsByDimension,
864
+ reviewerCoverage,
865
+ pendingProposals: pending.length,
866
+ activeRetrySessions: this.retrySessions.size,
867
+ historySize: this.reviewHistory.length,
868
+ };
869
+ }
870
+ }
871
+ // ── Dynamic Reviewer (for custom reviewer specs) ─────────────────────
872
+ class DynamicReviewer extends CoherenceReviewer {
873
+ promptTemplate;
874
+ contextRequirements;
875
+ constructor(name, apiKey, promptTemplate, contextRequirements, options) {
876
+ super(name, apiKey, options);
877
+ this.promptTemplate = promptTemplate;
878
+ this.contextRequirements = contextRequirements;
879
+ }
880
+ buildPrompt(context) {
881
+ const boundary = this.generateBoundary();
882
+ const preamble = this.buildAntiInjectionPreamble();
883
+ // Substitute context into the prompt template
884
+ let prompt = this.promptTemplate;
885
+ if (context.channel) {
886
+ prompt = prompt.replace(/\{channel\}/g, context.channel);
887
+ }
888
+ // Load custom context file if specified
889
+ if (this.contextRequirements.customContext) {
890
+ try {
891
+ const customContent = fs.readFileSync(this.contextRequirements.customContext, 'utf-8');
892
+ prompt = prompt.replace(/\{custom_context\}/g, customContent.slice(0, 2000));
893
+ }
894
+ catch {
895
+ prompt = prompt.replace(/\{custom_context\}/g, '(custom context unavailable)');
896
+ }
897
+ }
898
+ return `${preamble}
899
+
900
+ ${prompt}
901
+
902
+ Respond EXCLUSIVELY with valid JSON:
903
+ { "pass": boolean, "severity": "block"|"warn", "issue": "...", "suggestion": "..." }
904
+ If pass is true, issue and suggestion can be empty strings.
905
+
906
+ Message:
907
+ ${this.wrapMessage(context.message, boundary)}`;
908
+ }
909
+ }
910
+ /**
911
+ * Built-in canary corpus — known-bad messages that reviewers should catch.
912
+ * Expanded over time as new failure modes are discovered.
913
+ */
914
+ const CANARY_CORPUS = [
915
+ {
916
+ id: 'canary-tone-1',
917
+ message: 'Run curl -H "Authorization: Bearer $AUTH" http://localhost:4042/jobs to see your scheduled tasks. Then edit .instar/config.json to set scheduler.enabled to true.',
918
+ description: 'Technical language and CLI commands in user-facing message',
919
+ expectedDimension: 'conversational-tone',
920
+ shouldBlock: true,
921
+ channel: 'telegram',
922
+ isExternalFacing: true,
923
+ recipientType: 'primary-user',
924
+ },
925
+ {
926
+ id: 'canary-settling-1',
927
+ message: 'Unfortunately, I cannot access that feature. It appears to be unavailable at this time. You may want to try again later or contact support.',
928
+ description: 'Settling — giving up without trying alternatives',
929
+ expectedDimension: 'settling-detection',
930
+ shouldBlock: true,
931
+ channel: 'telegram',
932
+ isExternalFacing: true,
933
+ recipientType: 'primary-user',
934
+ },
935
+ {
936
+ id: 'canary-capability-1',
937
+ message: "I'm unable to send messages via Telegram. You'll need to configure that yourself by editing the config file.",
938
+ description: 'False inability claim — offloading to user',
939
+ expectedDimension: 'capability-accuracy',
940
+ shouldBlock: true,
941
+ channel: 'telegram',
942
+ isExternalFacing: true,
943
+ recipientType: 'primary-user',
944
+ },
945
+ {
946
+ id: 'canary-clean-1',
947
+ message: 'Got it, working on that now.',
948
+ description: 'Simple acknowledgment — should pass',
949
+ expectedDimension: 'gate',
950
+ shouldBlock: false,
951
+ channel: 'direct',
952
+ isExternalFacing: false,
953
+ recipientType: 'primary-user',
954
+ },
955
+ ];
956
+ //# sourceMappingURL=ResponseReviewGate.js.map