@brutalist/mcp 1.8.0 → 1.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/dist/brutalist-server.d.ts +31 -9
- package/dist/brutalist-server.d.ts.map +1 -1
- package/dist/brutalist-server.js +107 -673
- package/dist/brutalist-server.js.map +1 -1
- package/dist/cli-adapters/claude-adapter.d.ts +25 -0
- package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/claude-adapter.js +245 -0
- package/dist/cli-adapters/claude-adapter.js.map +1 -0
- package/dist/cli-adapters/codex-adapter.d.ts +23 -0
- package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/codex-adapter.js +173 -0
- package/dist/cli-adapters/codex-adapter.js.map +1 -0
- package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
- package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/gemini-adapter.js +196 -0
- package/dist/cli-adapters/gemini-adapter.js.map +1 -0
- package/dist/cli-adapters/index.d.ts +75 -0
- package/dist/cli-adapters/index.d.ts.map +1 -0
- package/dist/cli-adapters/index.js +29 -0
- package/dist/cli-adapters/index.js.map +1 -0
- package/dist/cli-adapters/shared.d.ts +12 -0
- package/dist/cli-adapters/shared.d.ts.map +1 -0
- package/dist/cli-adapters/shared.js +99 -0
- package/dist/cli-adapters/shared.js.map +1 -0
- package/dist/cli-agents.d.ts +64 -2
- package/dist/cli-agents.d.ts.map +1 -1
- package/dist/cli-agents.js +417 -401
- package/dist/cli-agents.js.map +1 -1
- package/dist/debate/constitutional.d.ts +27 -0
- package/dist/debate/constitutional.d.ts.map +1 -0
- package/dist/debate/constitutional.js +74 -0
- package/dist/debate/constitutional.js.map +1 -0
- package/dist/debate/debate-orchestrator.d.ts +154 -0
- package/dist/debate/debate-orchestrator.d.ts.map +1 -0
- package/dist/debate/debate-orchestrator.js +699 -0
- package/dist/debate/debate-orchestrator.js.map +1 -0
- package/dist/debate/index.d.ts +18 -0
- package/dist/debate/index.d.ts.map +1 -0
- package/dist/debate/index.js +18 -0
- package/dist/debate/index.js.map +1 -0
- package/dist/debate/refusal-detection.d.ts +27 -0
- package/dist/debate/refusal-detection.d.ts.map +1 -0
- package/dist/debate/refusal-detection.js +62 -0
- package/dist/debate/refusal-detection.js.map +1 -0
- package/dist/debate/synthesis.d.ts +22 -0
- package/dist/debate/synthesis.d.ts.map +1 -0
- package/dist/debate/synthesis.js +117 -0
- package/dist/debate/synthesis.js.map +1 -0
- package/dist/logger.d.ts +204 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +398 -18
- package/dist/logger.js.map +1 -1
- package/dist/metrics/counter.d.ts +24 -0
- package/dist/metrics/counter.d.ts.map +1 -0
- package/dist/metrics/counter.js +60 -0
- package/dist/metrics/counter.js.map +1 -0
- package/dist/metrics/histogram.d.ts +42 -0
- package/dist/metrics/histogram.d.ts.map +1 -0
- package/dist/metrics/histogram.js +114 -0
- package/dist/metrics/histogram.js.map +1 -0
- package/dist/metrics/index.d.ts +26 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +22 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/registry.d.ts +96 -0
- package/dist/metrics/registry.d.ts.map +1 -0
- package/dist/metrics/registry.js +113 -0
- package/dist/metrics/registry.js.map +1 -0
- package/dist/metrics/safe-metric.d.ts +25 -0
- package/dist/metrics/safe-metric.d.ts.map +1 -0
- package/dist/metrics/safe-metric.js +41 -0
- package/dist/metrics/safe-metric.js.map +1 -0
- package/dist/metrics/types.d.ts +82 -0
- package/dist/metrics/types.d.ts.map +1 -0
- package/dist/metrics/types.js +121 -0
- package/dist/metrics/types.js.map +1 -0
- package/dist/registry/argument-spaces.d.ts.map +1 -1
- package/dist/registry/argument-spaces.js +20 -0
- package/dist/registry/argument-spaces.js.map +1 -1
- package/dist/registry/domains.d.ts.map +1 -1
- package/dist/registry/domains.js +17 -1
- package/dist/registry/domains.js.map +1 -1
- package/dist/streaming/circuit-breaker.d.ts +13 -1
- package/dist/streaming/circuit-breaker.d.ts.map +1 -1
- package/dist/streaming/circuit-breaker.js +13 -1
- package/dist/streaming/circuit-breaker.js.map +1 -1
- package/dist/streaming/intelligent-buffer.d.ts +13 -1
- package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
- package/dist/streaming/intelligent-buffer.js +13 -1
- package/dist/streaming/intelligent-buffer.js.map +1 -1
- package/dist/streaming/output-parser.d.ts +16 -2
- package/dist/streaming/output-parser.d.ts.map +1 -1
- package/dist/streaming/output-parser.js +16 -2
- package/dist/streaming/output-parser.js.map +1 -1
- package/dist/streaming/progress-tracker.d.ts +14 -1
- package/dist/streaming/progress-tracker.d.ts.map +1 -1
- package/dist/streaming/progress-tracker.js +14 -1
- package/dist/streaming/progress-tracker.js.map +1 -1
- package/dist/streaming/session-manager.d.ts +14 -1
- package/dist/streaming/session-manager.d.ts.map +1 -1
- package/dist/streaming/session-manager.js +14 -1
- package/dist/streaming/session-manager.js.map +1 -1
- package/dist/streaming/sse-transport.d.ts +12 -1
- package/dist/streaming/sse-transport.d.ts.map +1 -1
- package/dist/streaming/sse-transport.js +12 -1
- package/dist/streaming/sse-transport.js.map +1 -1
- package/dist/streaming/streaming-orchestrator.d.ts +15 -1
- package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
- package/dist/streaming/streaming-orchestrator.js +15 -1
- package/dist/streaming/streaming-orchestrator.js.map +1 -1
- package/dist/system-prompts.d.ts.map +1 -1
- package/dist/system-prompts.js +490 -4
- package/dist/system-prompts.js.map +1 -1
- package/dist/tool-definitions-generated.d.ts.map +1 -1
- package/dist/tool-definitions-generated.js +3 -1
- package/dist/tool-definitions-generated.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,699 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* DebateOrchestrator — debate orchestration extracted from brutalist-server.ts.
|
|
3
|
+
*
|
|
4
|
+
* This module encapsulates the entire debate subsystem:
|
|
5
|
+
* - handleDebateToolExecution(): cache-aware entry point for debate tool calls
|
|
6
|
+
* - executeCLIDebate(): core debate engine with 3-tier escalation
|
|
7
|
+
*
|
|
8
|
+
* Dependencies are injected via constructor, making brutalist-server.ts a pure
|
|
9
|
+
* composition root that wires and delegates.
|
|
10
|
+
*
|
|
11
|
+
* Extracted from brutalist-server.ts lines 665-1348.
|
|
12
|
+
*/
|
|
13
|
+
import { existsSync } from 'fs';
|
|
14
|
+
import { join as pathJoin, resolve as pathResolve } from 'path';
|
|
15
|
+
import { mediateTranscript } from '../utils/transcript-mediator.js';
|
|
16
|
+
import { parseCursor, PAGINATION_DEFAULTS } from '../utils/pagination.js';
|
|
17
|
+
import { safeMetric as sharedSafeMetric, } from '../metrics/index.js';
|
|
18
|
+
import { detectRefusal } from './refusal-detection.js';
|
|
19
|
+
import { constitutionalAnchor } from './constitutional.js';
|
|
20
|
+
import { synthesizeDebate } from './synthesis.js';
|
|
21
|
+
/**
|
|
22
|
+
* Rank of each debate tier for computing the MAX tier reached across all
|
|
23
|
+
* turns of a debate. Used to derive the `tier` label on the debate
|
|
24
|
+
* duration histogram (higher rank wins).
|
|
25
|
+
*/
|
|
26
|
+
const TIER_RANK = {
|
|
27
|
+
standard: 0,
|
|
28
|
+
escalated: 1,
|
|
29
|
+
decomposed: 2,
|
|
30
|
+
};
|
|
31
|
+
/**
|
|
32
|
+
* DebateOrchestrator encapsulates all debate orchestration logic.
|
|
33
|
+
*
|
|
34
|
+
* It accepts dependencies via constructor injection so that brutalist-server.ts
|
|
35
|
+
* remains a thin composition root.
|
|
36
|
+
*/
|
|
37
|
+
export class DebateOrchestrator {
|
|
38
|
+
/** Mutable so test harnesses can replace cliOrchestrator on BrutalistServer. */
|
|
39
|
+
_cliOrchestrator;
|
|
40
|
+
responseCache;
|
|
41
|
+
formatter;
|
|
42
|
+
config;
|
|
43
|
+
onStreamingEvent;
|
|
44
|
+
onProgressUpdate;
|
|
45
|
+
metrics;
|
|
46
|
+
log;
|
|
47
|
+
get cliOrchestrator() {
|
|
48
|
+
return this._cliOrchestrator;
|
|
49
|
+
}
|
|
50
|
+
set cliOrchestrator(value) {
|
|
51
|
+
this._cliOrchestrator = value;
|
|
52
|
+
}
|
|
53
|
+
constructor(deps) {
|
|
54
|
+
this._cliOrchestrator = deps.cliOrchestrator;
|
|
55
|
+
this.responseCache = deps.responseCache;
|
|
56
|
+
this.formatter = deps.formatter;
|
|
57
|
+
this.config = deps.config;
|
|
58
|
+
this.onStreamingEvent = deps.onStreamingEvent;
|
|
59
|
+
this.onProgressUpdate = deps.onProgressUpdate;
|
|
60
|
+
this.metrics = deps.metrics;
|
|
61
|
+
this.log = deps.log;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Isolate metric writes from business control flow.
|
|
65
|
+
*
|
|
66
|
+
* Delegates to the shared `safeMetric` helper in
|
|
67
|
+
* `src/metrics/safe-metric.ts`. The private method is retained so
|
|
68
|
+
* existing call sites inside DebateOrchestrator
|
|
69
|
+
* (`this.safeMetric(op, fn)`) keep working without a touch, and so
|
|
70
|
+
* any debate-specific metric-error instrumentation can be layered in
|
|
71
|
+
* one place in the future.
|
|
72
|
+
*
|
|
73
|
+
* Parity note: `CLIAgentOrchestrator` uses the same shared helper
|
|
74
|
+
* directly (no private method) to prevent metric throws from
|
|
75
|
+
* propagating into the outer spawn try/catch. See Cycle 3 rework
|
|
76
|
+
* Task CLI-B' in phases/instrument_cli_spawn/phase.md.
|
|
77
|
+
*/
|
|
78
|
+
safeMetric(op, fn) {
|
|
79
|
+
sharedSafeMetric(this.log, op, fn);
|
|
80
|
+
}
|
|
81
|
+
/**
|
|
82
|
+
* Handle debate tool execution with constitutional position anchoring.
|
|
83
|
+
* Uses 2 randomly selected agents (or user-specified) with explicit PRO/CON positions.
|
|
84
|
+
*
|
|
85
|
+
* This is the entry point called from the roast_cli_debate tool registration.
|
|
86
|
+
*
|
|
87
|
+
* Instrumentation (intent #1): every exit path records the debate
|
|
88
|
+
* orchestration duration histogram exactly once. The `tier` label is the
|
|
89
|
+
* MAX tier reached across all turns of the underlying `executeCLIDebate`
|
|
90
|
+
* call; cache-hit paths short-circuit before any CLI agent runs, so their
|
|
91
|
+
* tier is always `'standard'`. The outer try/finally placement ensures
|
|
92
|
+
* error paths, refusal paths, and cache-hit paths all emit exactly one
|
|
93
|
+
* observation — `executeCLIDebate` itself has NO timer block to avoid
|
|
94
|
+
* double-observation.
|
|
95
|
+
*/
|
|
96
|
+
async handleDebateToolExecution(args, extra) {
|
|
97
|
+
const handleToolLog = this.log.forOperation('handle_tool');
|
|
98
|
+
const t0 = Date.now();
|
|
99
|
+
// Histogram labels — DEBATE_DURATION_LABELS = ['outcome', 'tier'] as const.
|
|
100
|
+
// outcome is derived from the debate result's behavior (refused vs. success)
|
|
101
|
+
// or forced to 'error' in the catch branch.
|
|
102
|
+
let outcome = 'success';
|
|
103
|
+
let tier = 'standard';
|
|
104
|
+
try {
|
|
105
|
+
// Build pagination params
|
|
106
|
+
const paginationParams = {
|
|
107
|
+
offset: args.offset || 0,
|
|
108
|
+
limit: args.limit || PAGINATION_DEFAULTS.DEFAULT_LIMIT_TOKENS
|
|
109
|
+
};
|
|
110
|
+
if (args.cursor) {
|
|
111
|
+
const cursorParams = parseCursor(args.cursor);
|
|
112
|
+
Object.assign(paginationParams, cursorParams);
|
|
113
|
+
}
|
|
114
|
+
const explicitPaginationRequested = args.offset !== undefined ||
|
|
115
|
+
args.limit !== undefined ||
|
|
116
|
+
args.cursor !== undefined ||
|
|
117
|
+
args.context_id !== undefined;
|
|
118
|
+
// Extract session ID early — needed for cache session isolation
|
|
119
|
+
const sessionId = extra?.sessionId ||
|
|
120
|
+
extra?._meta?.sessionId ||
|
|
121
|
+
extra?.headers?.['mcp-session-id'] ||
|
|
122
|
+
'anonymous';
|
|
123
|
+
// Validate resume flag requires context_id
|
|
124
|
+
if (args.resume && !args.context_id) {
|
|
125
|
+
throw new Error(`The 'resume' flag requires a 'context_id' from a previous debate. ` +
|
|
126
|
+
`Run an initial debate first, then use the returned context_id with resume: true.`);
|
|
127
|
+
}
|
|
128
|
+
// Check cache if context_id provided
|
|
129
|
+
let conversationHistory;
|
|
130
|
+
if (args.context_id && !args.force_refresh) {
|
|
131
|
+
const cachedResponse = await this.responseCache.getByContextId(args.context_id, sessionId);
|
|
132
|
+
if (cachedResponse) {
|
|
133
|
+
handleToolLog.info(`🎯 Debate cache HIT for context_id: ${args.context_id}`);
|
|
134
|
+
if (args.resume === true) {
|
|
135
|
+
// CONVERSATION CONTINUATION: Continue the debate
|
|
136
|
+
if (!args.topic || args.topic.trim() === '') {
|
|
137
|
+
throw new Error(`Debate continuation (resume: true) requires a new prompt/question. ` +
|
|
138
|
+
`Provide your follow-up in the topic field.`);
|
|
139
|
+
}
|
|
140
|
+
// Security: avoid logging user-provided topic text at info level.
|
|
141
|
+
// Emit length only; if a developer needs the preview, run at debug.
|
|
142
|
+
handleToolLog.info('Debate continuation - new prompt received', {
|
|
143
|
+
topicLength: args.topic.length,
|
|
144
|
+
});
|
|
145
|
+
conversationHistory = cachedResponse.conversationHistory || [];
|
|
146
|
+
// Fall through to execute new debate round with history
|
|
147
|
+
}
|
|
148
|
+
else {
|
|
149
|
+
// PAGINATION: Return cached debate result — no agent ran,
|
|
150
|
+
// outcome='success' and tier='standard' (their initial values).
|
|
151
|
+
handleToolLog.info(`📖 Debate pagination request - returning cached response`);
|
|
152
|
+
const cachedResult = {
|
|
153
|
+
success: true,
|
|
154
|
+
responses: [{
|
|
155
|
+
agent: 'cached',
|
|
156
|
+
success: true,
|
|
157
|
+
output: cachedResponse.content,
|
|
158
|
+
executionTime: 0
|
|
159
|
+
}]
|
|
160
|
+
};
|
|
161
|
+
return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, args.context_id, explicitPaginationRequested);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
else {
|
|
165
|
+
handleToolLog.warn(`❌ Debate cache MISS for context_id: ${args.context_id}`);
|
|
166
|
+
throw new Error(`Context ID "${args.context_id}" not found in cache. ` +
|
|
167
|
+
`It may have expired (2 hour TTL) or belong to a different session. ` +
|
|
168
|
+
`Remove context_id parameter to run a new debate.`);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
// Generate cache key for this debate
|
|
172
|
+
const cacheKey = this.responseCache.generateCacheKey({
|
|
173
|
+
tool: 'roast_cli_debate',
|
|
174
|
+
topic: args.topic,
|
|
175
|
+
proPosition: args.proPosition,
|
|
176
|
+
conPosition: args.conPosition,
|
|
177
|
+
agents: args.agents,
|
|
178
|
+
rounds: args.rounds,
|
|
179
|
+
context: args.context
|
|
180
|
+
});
|
|
181
|
+
// Check cache for identical request (if not resuming)
|
|
182
|
+
if (!args.force_refresh && !args.resume) {
|
|
183
|
+
const cachedContent = await this.responseCache.get(cacheKey);
|
|
184
|
+
if (cachedContent) {
|
|
185
|
+
const existingContextId = this.responseCache.findContextIdForKey(cacheKey);
|
|
186
|
+
const contextId = existingContextId
|
|
187
|
+
? this.responseCache.createAlias(existingContextId, cacheKey)
|
|
188
|
+
: this.responseCache.generateContextId(cacheKey);
|
|
189
|
+
handleToolLog.info(`🎯 Debate cache hit for new request, using context_id: ${contextId}`);
|
|
190
|
+
const cachedResult = {
|
|
191
|
+
success: true,
|
|
192
|
+
responses: [{
|
|
193
|
+
agent: 'cached',
|
|
194
|
+
success: true,
|
|
195
|
+
output: cachedContent,
|
|
196
|
+
executionTime: 0
|
|
197
|
+
}]
|
|
198
|
+
};
|
|
199
|
+
// Cache hit: outcome='success', tier='standard' (no agent ran).
|
|
200
|
+
return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, contextId, explicitPaginationRequested);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
// Build context with conversation history if resuming
|
|
204
|
+
let debateContext = args.context || '';
|
|
205
|
+
if (conversationHistory && conversationHistory.length > 0) {
|
|
206
|
+
const previousDebate = conversationHistory.map(msg => {
|
|
207
|
+
const role = msg.role === 'user' ? 'User Question' : 'Debate Response';
|
|
208
|
+
return `${role}:\n${msg.content}`;
|
|
209
|
+
}).join('\n\n---\n\n');
|
|
210
|
+
debateContext = `## Previous Debate Context\n\n${previousDebate}\n\n---\n\n## New Follow-up Question\n\nThe user wants to continue this debate with a new question or direction.\n\n${debateContext}`;
|
|
211
|
+
handleToolLog.info(`💬 Injected ${conversationHistory.length} previous messages into debate context`);
|
|
212
|
+
}
|
|
213
|
+
// Extract streaming context from extra
|
|
214
|
+
const progressToken = extra?._meta?.progressToken;
|
|
215
|
+
// Execute the debate
|
|
216
|
+
const numRounds = Math.min(args.rounds || 3, 3);
|
|
217
|
+
const result = await this.executeCLIDebate({
|
|
218
|
+
topic: args.topic,
|
|
219
|
+
proPosition: args.proPosition,
|
|
220
|
+
conPosition: args.conPosition,
|
|
221
|
+
agents: args.agents,
|
|
222
|
+
rounds: numRounds,
|
|
223
|
+
context: debateContext,
|
|
224
|
+
workingDirectory: args.workingDirectory,
|
|
225
|
+
models: args.models,
|
|
226
|
+
onStreamingEvent: this.onStreamingEvent,
|
|
227
|
+
progressToken,
|
|
228
|
+
onProgress: progressToken && sessionId ?
|
|
229
|
+
(progress, total, message) => this.onProgressUpdate(progressToken, progress, total, message, sessionId) : undefined,
|
|
230
|
+
sessionId,
|
|
231
|
+
mcp_servers: args.mcp_servers,
|
|
232
|
+
});
|
|
233
|
+
// Derive outcome and tier from the debate result for the histogram
|
|
234
|
+
// observation that fires in the finally block below. The counter for
|
|
235
|
+
// per-turn escalation tier already fired inside executeCLIDebate; this
|
|
236
|
+
// block only extracts the histogram labels — no metric emissions here.
|
|
237
|
+
const turns = result.debateBehavior?.turns ?? [];
|
|
238
|
+
if (turns.length > 0) {
|
|
239
|
+
// Tier = MAX tier reached across all turns (higher rank wins).
|
|
240
|
+
tier = turns.reduce((max, t) => TIER_RANK[t.tier] > TIER_RANK[max] ? t.tier : max, 'standard');
|
|
241
|
+
// Outcome = 'refused' when every turn's engaged=false AND at least
|
|
242
|
+
// one turn refused. Otherwise 'success'. The catch branch below
|
|
243
|
+
// overrides to 'error'.
|
|
244
|
+
const allDisengaged = turns.every(t => !t.engaged);
|
|
245
|
+
const anyRefused = turns.some(t => t.refused);
|
|
246
|
+
if (allDisengaged && anyRefused) {
|
|
247
|
+
outcome = 'refused';
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
// Cache the result
|
|
251
|
+
let contextId;
|
|
252
|
+
if (result.success && result.responses.length > 0) {
|
|
253
|
+
const fullContent = this.formatter.extractFullContent(result);
|
|
254
|
+
if (fullContent) {
|
|
255
|
+
const now = Date.now();
|
|
256
|
+
const updatedConversation = [
|
|
257
|
+
...(conversationHistory || []),
|
|
258
|
+
{ role: 'user', content: args.topic, timestamp: now },
|
|
259
|
+
{ role: 'assistant', content: fullContent, timestamp: now }
|
|
260
|
+
];
|
|
261
|
+
if (args.resume && args.context_id && conversationHistory) {
|
|
262
|
+
// Update existing cache entry
|
|
263
|
+
contextId = args.context_id;
|
|
264
|
+
await this.responseCache.updateByContextId(contextId, fullContent, updatedConversation, sessionId);
|
|
265
|
+
this.log.forOperation('cache').info(`✅ Updated debate conversation ${contextId} (now ${updatedConversation.length} messages)`);
|
|
266
|
+
}
|
|
267
|
+
else {
|
|
268
|
+
// New debate - create new context_id
|
|
269
|
+
const { contextId: newId } = await this.responseCache.set({ tool: 'roast_cli_debate', topic: args.topic }, fullContent, cacheKey, sessionId, undefined, updatedConversation);
|
|
270
|
+
contextId = newId;
|
|
271
|
+
this.log.forOperation('cache').info(`✅ Cached new debate with context ID: ${contextId}`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
}
|
|
275
|
+
return this.formatter.formatToolResponse(result, args.verbose, paginationParams, contextId, explicitPaginationRequested);
|
|
276
|
+
}
|
|
277
|
+
catch (error) {
|
|
278
|
+
outcome = 'error';
|
|
279
|
+
return this.formatter.formatErrorResponse(error);
|
|
280
|
+
}
|
|
281
|
+
finally {
|
|
282
|
+
// Record the debate duration exactly once per invocation. This is the
|
|
283
|
+
// SINGLE histogram observation point for debate orchestration — do
|
|
284
|
+
// NOT add another observe() call inside executeCLIDebate or any
|
|
285
|
+
// inner path. The typed label record below references
|
|
286
|
+
// DEBATE_DURATION_LABELS so a future label-set change triggers a
|
|
287
|
+
// compile error at this call site.
|
|
288
|
+
const durationSec = (Date.now() - t0) / 1000;
|
|
289
|
+
const durationLabels = {
|
|
290
|
+
outcome,
|
|
291
|
+
tier,
|
|
292
|
+
};
|
|
293
|
+
this.safeMetric('observe:debate_duration', () => this.metrics.debateOrchestrationDurationSeconds.observe(durationLabels, durationSec));
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Execute CLI debate with constitutional position anchoring.
|
|
298
|
+
* 2 agents, explicit PRO/CON positions, context compression between rounds.
|
|
299
|
+
*
|
|
300
|
+
* This is the core debate engine. It manages:
|
|
301
|
+
* - Agent selection and position assignment
|
|
302
|
+
* - Round execution with 3-tier refusal escalation
|
|
303
|
+
* - Transcript mediation between rounds
|
|
304
|
+
* - Behavioral metadata and asymmetry detection
|
|
305
|
+
* - Synthesis generation
|
|
306
|
+
*/
|
|
307
|
+
async executeCLIDebate(args) {
|
|
308
|
+
const { topic, proPosition, conPosition, rounds, context, workingDirectory, models, onStreamingEvent, progressToken, onProgress, sessionId } = args;
|
|
309
|
+
const debateLog = this.log.forOperation('execute_debate');
|
|
310
|
+
const escalateLog = this.log.forOperation('escalate');
|
|
311
|
+
// Security (Cycle 3 F32): the debug-level emission previously leaked
|
|
312
|
+
// user-provided topic/proPosition/conPosition text into logs whenever
|
|
313
|
+
// BRUTALIST_LOG_LEVEL=debug was set — identical disclosure channel to
|
|
314
|
+
// the info-level site already redacted at :263. Emit length-only
|
|
315
|
+
// fields matching that pattern; a developer needing the raw text
|
|
316
|
+
// should inspect the transcript passed to executeCLIDebate directly.
|
|
317
|
+
debateLog.debug("Executing CLI debate", {
|
|
318
|
+
topicLength: topic.length,
|
|
319
|
+
proPositionLength: proPosition.length,
|
|
320
|
+
conPositionLength: conPosition.length,
|
|
321
|
+
rounds,
|
|
322
|
+
});
|
|
323
|
+
try {
|
|
324
|
+
// Get available CLIs
|
|
325
|
+
const cliContext = await this.cliOrchestrator.detectCLIContext();
|
|
326
|
+
const availableCLIs = cliContext.availableCLIs;
|
|
327
|
+
if (availableCLIs.length < 2) {
|
|
328
|
+
throw new Error(`Need at least 2 CLI agents for debate. Available: ${availableCLIs.join(', ')}`);
|
|
329
|
+
}
|
|
330
|
+
// Select 2 agents: use specified or random selection
|
|
331
|
+
let selectedAgents;
|
|
332
|
+
if (args.agents && args.agents.length === 2) {
|
|
333
|
+
// Validate specified agents are available
|
|
334
|
+
const unavailable = args.agents.filter(a => !availableCLIs.includes(a));
|
|
335
|
+
if (unavailable.length > 0) {
|
|
336
|
+
throw new Error(`Specified agents not available: ${unavailable.join(', ')}. Available: ${availableCLIs.join(', ')}`);
|
|
337
|
+
}
|
|
338
|
+
selectedAgents = args.agents;
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
// Random selection of 2 agents
|
|
342
|
+
const shuffled = [...availableCLIs].sort(() => Math.random() - 0.5);
|
|
343
|
+
selectedAgents = shuffled.slice(0, 2);
|
|
344
|
+
}
|
|
345
|
+
// Randomly assign PRO/CON positions
|
|
346
|
+
const shuffledAgents = [...selectedAgents].sort(() => Math.random() - 0.5);
|
|
347
|
+
const proAgent = shuffledAgents[0];
|
|
348
|
+
const conAgent = shuffledAgents[1];
|
|
349
|
+
debateLog.info(`🎭 Debate: ${proAgent.toUpperCase()} (PRO) vs ${conAgent.toUpperCase()} (CON)`);
|
|
350
|
+
const debateResponses = [];
|
|
351
|
+
const transcript = [];
|
|
352
|
+
const turnMetadata = [];
|
|
353
|
+
let compressedContext = '';
|
|
354
|
+
const totalTurns = rounds * 2; // 2 agents per round
|
|
355
|
+
let completedTurns = 0;
|
|
356
|
+
// Frontier 1: Detect self-referential working directory (Codex reading its own control prompts)
|
|
357
|
+
const resolvedWorkDir = args.target || workingDirectory || this.config.workingDirectory || process.cwd();
|
|
358
|
+
const absWorkDir = pathResolve(resolvedWorkDir);
|
|
359
|
+
const isSelfReferential = existsSync(pathJoin(absWorkDir, 'src', 'brutalist-server.ts'))
|
|
360
|
+
|| existsSync(pathJoin(absWorkDir, 'dist', 'brutalist-server.js'));
|
|
361
|
+
if (isSelfReferential) {
|
|
362
|
+
debateLog.info(`🔒 Debate working directory is brutalist repo — Codex will be sandboxed`);
|
|
363
|
+
}
|
|
364
|
+
// Execute rounds
|
|
365
|
+
for (let round = 1; round <= rounds; round++) {
|
|
366
|
+
debateLog.info(`📢 Round ${round}/${rounds}`);
|
|
367
|
+
// Both agents argue in each round
|
|
368
|
+
for (const [agent, position, thesis] of [
|
|
369
|
+
[proAgent, 'PRO', proPosition],
|
|
370
|
+
[conAgent, 'CON', conPosition]
|
|
371
|
+
]) {
|
|
372
|
+
let prompt;
|
|
373
|
+
debateLog.info(` ⚔️ ${agent.toUpperCase()} (${position}) arguing...`);
|
|
374
|
+
// Build prompt-generation function so we can rebuild on escalation
|
|
375
|
+
const mcpBlock = args.mcp_servers?.length
|
|
376
|
+
? `\nEXTERNAL TOOL ACCESS: You have MCP tools available (${args.mcp_servers.join(', ')}). Use them to gather evidence supporting your position. You MUST NOT modify the codebase.\n`
|
|
377
|
+
: '';
|
|
378
|
+
const buildPrompt = (tier) => {
|
|
379
|
+
if (round === 1) {
|
|
380
|
+
return `${constitutionalAnchor(agent, position, thesis, tier)}
|
|
381
|
+
${mcpBlock}
|
|
382
|
+
TOPIC: ${topic}
|
|
383
|
+
${context ? `CONTEXT: ${context}` : ''}
|
|
384
|
+
|
|
385
|
+
Round 1: Opening analysis.
|
|
386
|
+
|
|
387
|
+
Present your ${position} analysis. Structure your response:
|
|
388
|
+
|
|
389
|
+
<thesis_statement>
|
|
390
|
+
Your core analytical position
|
|
391
|
+
</thesis_statement>
|
|
392
|
+
|
|
393
|
+
<key_arguments>
|
|
394
|
+
Three strongest arguments grounding your position in evidence and reasoning
|
|
395
|
+
</key_arguments>
|
|
396
|
+
|
|
397
|
+
<preemptive_rebuttal>
|
|
398
|
+
Address the strongest counterargument and show why it does not defeat your position
|
|
399
|
+
</preemptive_rebuttal>
|
|
400
|
+
|
|
401
|
+
<conclusion>
|
|
402
|
+
Reinforce why your analysis holds
|
|
403
|
+
</conclusion>`;
|
|
404
|
+
}
|
|
405
|
+
else {
|
|
406
|
+
const rawOpponent = transcript
|
|
407
|
+
.filter(t => t.agent !== agent && t.round === round - 1)
|
|
408
|
+
.map(t => t.content)
|
|
409
|
+
.join('\n\n');
|
|
410
|
+
const { sanitized: opponentTranscript, patternsDetected: opponentPatterns } = mediateTranscript(rawOpponent, 'sanitize', 4000);
|
|
411
|
+
if (opponentPatterns.length > 0) {
|
|
412
|
+
debateLog.info(`🛡️ Mediated ${opponentPatterns.length} patterns from opponent transcript for ${agent}`, { opponentPatterns });
|
|
413
|
+
}
|
|
414
|
+
return `${constitutionalAnchor(agent, position, thesis, tier)}
|
|
415
|
+
${mcpBlock}
|
|
416
|
+
TOPIC: ${topic}
|
|
417
|
+
|
|
418
|
+
Round ${round}: Engage with your counterpart's analysis.
|
|
419
|
+
|
|
420
|
+
YOUR COUNTERPART'S PREVIOUS ANALYSIS:
|
|
421
|
+
${opponentTranscript || 'No previous analysis recorded'}
|
|
422
|
+
|
|
423
|
+
${compressedContext ? `ANALYSIS CONTEXT SO FAR:\n${compressedContext}\n` : ''}
|
|
424
|
+
|
|
425
|
+
<counterpart_gaps>
|
|
426
|
+
Identify the specific weaknesses in their reasoning and evidence
|
|
427
|
+
</counterpart_gaps>
|
|
428
|
+
|
|
429
|
+
<deepening_analysis>
|
|
430
|
+
Advance new evidence and reasoning that strengthens your position
|
|
431
|
+
</deepening_analysis>
|
|
432
|
+
|
|
433
|
+
<reinforcement>
|
|
434
|
+
Show why your position holds against their strongest points
|
|
435
|
+
</reinforcement>`;
|
|
436
|
+
}
|
|
437
|
+
};
|
|
438
|
+
try {
|
|
439
|
+
const turnRequestId = `debate-${sessionId || 'anon'}-${round}-${agent}-${Date.now()}`;
|
|
440
|
+
// Emit agent_start streaming event
|
|
441
|
+
if (onStreamingEvent) {
|
|
442
|
+
onStreamingEvent({
|
|
443
|
+
type: 'agent_start',
|
|
444
|
+
agent,
|
|
445
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) arguing...`,
|
|
446
|
+
timestamp: Date.now(),
|
|
447
|
+
sessionId,
|
|
448
|
+
});
|
|
449
|
+
}
|
|
450
|
+
// Working directory: debateMode suppresses Codex shell exploration via prompt,
|
|
451
|
+
// so no need to redirect — Codex still needs a git repo to function
|
|
452
|
+
const agentWorkDir = workingDirectory || this.config.workingDirectory;
|
|
453
|
+
const cliOptions = {
|
|
454
|
+
workingDirectory: agentWorkDir,
|
|
455
|
+
timeout: (this.config.defaultTimeout || 60000) * 2,
|
|
456
|
+
models,
|
|
457
|
+
onStreamingEvent,
|
|
458
|
+
progressToken,
|
|
459
|
+
onProgress,
|
|
460
|
+
sessionId,
|
|
461
|
+
requestId: turnRequestId,
|
|
462
|
+
debateMode: true, // Frontier 1: suppress Codex shell exploration
|
|
463
|
+
mcpServers: args.mcp_servers, // MCP servers for evidence-backed debate
|
|
464
|
+
};
|
|
465
|
+
// Three-tier escalation: standard -> escalated -> decomposed
|
|
466
|
+
prompt = buildPrompt('standard');
|
|
467
|
+
let wasRefused = false;
|
|
468
|
+
let wasEscalated = false;
|
|
469
|
+
let engagedAfterEscalation = false;
|
|
470
|
+
let finalTier = 'standard';
|
|
471
|
+
let response = await this.cliOrchestrator.executeSingleCLI(agent, prompt, prompt, cliOptions);
|
|
472
|
+
// Tier 2: Detect refusal -> retry with analytical framing
|
|
473
|
+
if (response.success && response.output && detectRefusal(response.output)) {
|
|
474
|
+
wasRefused = true;
|
|
475
|
+
wasEscalated = true;
|
|
476
|
+
finalTier = 'escalated';
|
|
477
|
+
escalateLog.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused — escalating to analytical framing (tier 2)`);
|
|
478
|
+
const escalatedPrompt = buildPrompt('escalated');
|
|
479
|
+
const retryResponse = await this.cliOrchestrator.executeSingleCLI(agent, escalatedPrompt, escalatedPrompt, { ...cliOptions, requestId: `${turnRequestId}-escalated` });
|
|
480
|
+
if (retryResponse.success && retryResponse.output && !detectRefusal(retryResponse.output)) {
|
|
481
|
+
escalateLog.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 2 escalation`);
|
|
482
|
+
engagedAfterEscalation = true;
|
|
483
|
+
response = retryResponse;
|
|
484
|
+
}
|
|
485
|
+
else {
|
|
486
|
+
// Tier 3: Decomposed — scholarly steelman framing
|
|
487
|
+
finalTier = 'decomposed';
|
|
488
|
+
escalateLog.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused tier 2 — escalating to decomposed framing (tier 3)`);
|
|
489
|
+
const decomposedPrompt = buildPrompt('decomposed');
|
|
490
|
+
const decomposedResponse = await this.cliOrchestrator.executeSingleCLI(agent, decomposedPrompt, decomposedPrompt, { ...cliOptions, requestId: `${turnRequestId}-decomposed` });
|
|
491
|
+
if (decomposedResponse.success && decomposedResponse.output && !detectRefusal(decomposedResponse.output)) {
|
|
492
|
+
escalateLog.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 3 decomposition`);
|
|
493
|
+
engagedAfterEscalation = true;
|
|
494
|
+
response = decomposedResponse;
|
|
495
|
+
}
|
|
496
|
+
else {
|
|
497
|
+
escalateLog.warn(`⚠️ ${agent.toUpperCase()} (${position}) refused all 3 tiers — using best response`);
|
|
498
|
+
// Use decomposed response if available (likely less meta-commentary)
|
|
499
|
+
if (decomposedResponse.success && decomposedResponse.output) {
|
|
500
|
+
response = decomposedResponse;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
// Always add response (success or failure) for visibility
|
|
506
|
+
debateResponses.push(response);
|
|
507
|
+
completedTurns++;
|
|
508
|
+
// Emit agent_complete streaming event
|
|
509
|
+
if (onStreamingEvent) {
|
|
510
|
+
onStreamingEvent({
|
|
511
|
+
type: 'agent_complete',
|
|
512
|
+
agent,
|
|
513
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) ${response.success ? 'finished' : 'failed'}`,
|
|
514
|
+
timestamp: Date.now(),
|
|
515
|
+
sessionId,
|
|
516
|
+
});
|
|
517
|
+
}
|
|
518
|
+
// Emit progress update
|
|
519
|
+
if (onProgress) {
|
|
520
|
+
onProgress(completedTurns, totalTurns, `Debate: ${completedTurns}/${totalTurns} turns complete`);
|
|
521
|
+
}
|
|
522
|
+
// Frontier 3: Track behavioral metadata
|
|
523
|
+
const finalRefused = response.success && response.output ? detectRefusal(response.output) : false;
|
|
524
|
+
turnMetadata.push({
|
|
525
|
+
agent: agent,
|
|
526
|
+
position: position,
|
|
527
|
+
round,
|
|
528
|
+
engaged: response.success && !!response.output && !finalRefused,
|
|
529
|
+
refused: wasRefused,
|
|
530
|
+
escalated: wasEscalated,
|
|
531
|
+
engagedAfterEscalation,
|
|
532
|
+
responseLength: response.output?.length || 0,
|
|
533
|
+
executionTime: response.executionTime,
|
|
534
|
+
tier: engagedAfterEscalation ? finalTier : (wasEscalated ? finalTier : 'standard'),
|
|
535
|
+
});
|
|
536
|
+
// Escalation-tier counter: fires exactly ONCE per turn, labeled
|
|
537
|
+
// with this turn's FINAL tier (standard/escalated/decomposed).
|
|
538
|
+
// Retries within a single turn are NOT counted separately —
|
|
539
|
+
// they are represented by the final tier value on the pushed
|
|
540
|
+
// turnMetadata record. The typed label record references
|
|
541
|
+
// ESCALATION_TIER_LABELS so a future label-set change
|
|
542
|
+
// triggers a compile error at this call site. The call is
|
|
543
|
+
// wrapped in safeMetric so a metric throw cannot corrupt the
|
|
544
|
+
// surrounding turn try/catch (would otherwise double-push
|
|
545
|
+
// metadata and double-count completedTurns).
|
|
546
|
+
const successTierLabels = {
|
|
547
|
+
tier: turnMetadata[turnMetadata.length - 1].tier,
|
|
548
|
+
};
|
|
549
|
+
this.safeMetric('inc:escalation_tier', () => this.metrics.debateEscalationTierTotal.inc(successTierLabels, 1));
|
|
550
|
+
if (response.success && response.output) {
|
|
551
|
+
transcript.push({
|
|
552
|
+
agent,
|
|
553
|
+
position,
|
|
554
|
+
round,
|
|
555
|
+
content: response.output
|
|
556
|
+
});
|
|
557
|
+
}
|
|
558
|
+
else {
|
|
559
|
+
// Security (Cycle 3 F33 Pattern A): response.error can carry
|
|
560
|
+
// CLI-subprocess stderr tail, which in turn may echo model-
|
|
561
|
+
// generated or prompt-echoed text. Emit a presence-only flag
|
|
562
|
+
// at warn level instead of the raw string; operators with
|
|
563
|
+
// debug file-logging can still correlate via agent/position/
|
|
564
|
+
// round, and the transcript is the canonical source of truth
|
|
565
|
+
// for the actual failure text.
|
|
566
|
+
debateLog.warn(`⚠️ ${agent.toUpperCase()} (${position}) failed`, {
|
|
567
|
+
agent,
|
|
568
|
+
position,
|
|
569
|
+
error: response.error ? '<redacted>' : undefined,
|
|
570
|
+
hasOutput: Boolean(response.output),
|
|
571
|
+
});
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
catch (error) {
|
|
575
|
+
// Security (Cycle 3 F33): the StructuredLogger emitError path
|
|
576
|
+
// serializes the raw Error verbatim (message, stack, name)
|
|
577
|
+
// into NDJSON. Passing the original `error` leaks any
|
|
578
|
+
// CLI-subprocess stderr tail or prompt-echoed text embedded
|
|
579
|
+
// in error.message. Pass a sanitized Error-shaped shim that
|
|
580
|
+
// preserves `name` for diagnostic triage while redacting the
|
|
581
|
+
// payload. `.stack` is omitted from the shim (undefined) so
|
|
582
|
+
// the file-side fileData record carries only name+message.
|
|
583
|
+
const errorName = error instanceof Error ? error.name : 'Error';
|
|
584
|
+
const errorShim = { name: errorName, message: '<redacted>' };
|
|
585
|
+
debateLog.error(`❌ ${agent.toUpperCase()} (${position}) threw error`, errorShim);
|
|
586
|
+
completedTurns++;
|
|
587
|
+
// Security (Cycle 4 F7/F17): the same raw caught error.message
|
|
588
|
+
// that Cycle 3 redacted at the logger sink was still flowing
|
|
589
|
+
// through two adjacent sinks — the streaming event content
|
|
590
|
+
// (remote subscribers) and the debateResponses push (flows
|
|
591
|
+
// back out as `responses` at the return site, and downstream
|
|
592
|
+
// into synthesis.ts and response-formatter.ts). Emit a static
|
|
593
|
+
// classifier that retains the agent identity for operator
|
|
594
|
+
// triage but carries no subprocess/prompt-derived payload.
|
|
595
|
+
const redactedTurnError = `${agent.toUpperCase()} execution failed. See internal logs for details.`;
|
|
596
|
+
if (onStreamingEvent) {
|
|
597
|
+
onStreamingEvent({
|
|
598
|
+
type: 'agent_error',
|
|
599
|
+
agent,
|
|
600
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) error: ${redactedTurnError}`,
|
|
601
|
+
timestamp: Date.now(),
|
|
602
|
+
sessionId,
|
|
603
|
+
});
|
|
604
|
+
}
|
|
605
|
+
turnMetadata.push({
|
|
606
|
+
agent: agent,
|
|
607
|
+
position: position,
|
|
608
|
+
round,
|
|
609
|
+
engaged: false,
|
|
610
|
+
refused: false,
|
|
611
|
+
escalated: false,
|
|
612
|
+
engagedAfterEscalation: false,
|
|
613
|
+
responseLength: 0,
|
|
614
|
+
executionTime: 0,
|
|
615
|
+
tier: 'standard',
|
|
616
|
+
});
|
|
617
|
+
// Error-path turn: still counts exactly ONCE per turn. Tier is
|
|
618
|
+
// 'standard' because the turn never reached the refusal-retry
|
|
619
|
+
// branches — it threw before any escalation decision. The
|
|
620
|
+
// typed label record references ESCALATION_TIER_LABELS so a
|
|
621
|
+
// future label-set change triggers a compile error at this
|
|
622
|
+
// call site. Wrapped in safeMetric so a metric throw cannot
|
|
623
|
+
// re-enter the catch path and double-count the turn.
|
|
624
|
+
const errorTierLabels = {
|
|
625
|
+
tier: turnMetadata[turnMetadata.length - 1].tier,
|
|
626
|
+
};
|
|
627
|
+
this.safeMetric('inc:escalation_tier', () => this.metrics.debateEscalationTierTotal.inc(errorTierLabels, 1));
|
|
628
|
+
debateResponses.push({
|
|
629
|
+
agent,
|
|
630
|
+
success: false,
|
|
631
|
+
output: '',
|
|
632
|
+
error: redactedTurnError,
|
|
633
|
+
executionTime: 0
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
// Compress context for next round with mediation (if not final round)
|
|
638
|
+
if (round < rounds) {
|
|
639
|
+
const roundTranscript = transcript
|
|
640
|
+
.filter(t => t.round === round)
|
|
641
|
+
.map(t => {
|
|
642
|
+
const { sanitized } = mediateTranscript(t.content, 'sanitize', 1500);
|
|
643
|
+
return `${t.agent.toUpperCase()} (${t.position}): ${sanitized}`;
|
|
644
|
+
})
|
|
645
|
+
.join('\n\n---\n\n');
|
|
646
|
+
compressedContext = `Round ${round} Summary:\n${roundTranscript}`;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
// Compute position-dependent asymmetry summary
|
|
650
|
+
const proTurns = turnMetadata.filter(t => t.position === 'PRO');
|
|
651
|
+
const conTurns = turnMetadata.filter(t => t.position === 'CON');
|
|
652
|
+
const proRefusalRate = proTurns.length > 0
|
|
653
|
+
? proTurns.filter(t => t.refused).length / proTurns.length : 0;
|
|
654
|
+
const conRefusalRate = conTurns.length > 0
|
|
655
|
+
? conTurns.filter(t => t.refused).length / conTurns.length : 0;
|
|
656
|
+
const debateAgents = [...new Set(turnMetadata.map(t => t.agent))];
|
|
657
|
+
const agentAsymmetries = debateAgents.map(a => {
|
|
658
|
+
const aPro = turnMetadata.filter(t => t.agent === a && t.position === 'PRO');
|
|
659
|
+
const aCon = turnMetadata.filter(t => t.agent === a && t.position === 'CON');
|
|
660
|
+
const proEngaged = aPro.some(t => t.engaged);
|
|
661
|
+
const conEngaged = aCon.some(t => t.engaged);
|
|
662
|
+
return { agent: a, proEngaged, conEngaged, asymmetric: proEngaged !== conEngaged };
|
|
663
|
+
});
|
|
664
|
+
const asymmetryDetected = Math.abs(proRefusalRate - conRefusalRate) > 0.3
|
|
665
|
+
|| agentAsymmetries.some(a => a.asymmetric);
|
|
666
|
+
const behaviorSummary = {
|
|
667
|
+
topic, proPosition, conPosition,
|
|
668
|
+
turns: turnMetadata,
|
|
669
|
+
asymmetry: {
|
|
670
|
+
detected: asymmetryDetected,
|
|
671
|
+
description: asymmetryDetected
|
|
672
|
+
? `Position-dependent asymmetry: PRO refusal ${(proRefusalRate * 100).toFixed(0)}%, CON refusal ${(conRefusalRate * 100).toFixed(0)}%`
|
|
673
|
+
: 'No significant position-dependent asymmetry detected',
|
|
674
|
+
proRefusalRate,
|
|
675
|
+
conRefusalRate,
|
|
676
|
+
agentAsymmetries,
|
|
677
|
+
}
|
|
678
|
+
};
|
|
679
|
+
if (asymmetryDetected) {
|
|
680
|
+
debateLog.warn(`🎭 Alignment asymmetry detected: ${behaviorSummary.asymmetry.description}`);
|
|
681
|
+
}
|
|
682
|
+
// Build synthesis with behavioral data
|
|
683
|
+
const synthesis = synthesizeDebate(debateResponses, topic, rounds, new Map([[proAgent, `PRO: ${proPosition}`], [conAgent, `CON: ${conPosition}`]]), behaviorSummary);
|
|
684
|
+
return {
|
|
685
|
+
success: debateResponses.some(r => r.success),
|
|
686
|
+
responses: debateResponses,
|
|
687
|
+
synthesis,
|
|
688
|
+
debateBehavior: behaviorSummary,
|
|
689
|
+
analysisType: 'cli_debate',
|
|
690
|
+
topic
|
|
691
|
+
};
|
|
692
|
+
}
|
|
693
|
+
catch (error) {
|
|
694
|
+
debateLog.error("CLI debate execution failed", error);
|
|
695
|
+
throw error;
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
}
|
|
699
|
+
//# sourceMappingURL=debate-orchestrator.js.map
|