@brutalist/mcp 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +26 -0
  2. package/dist/brutalist-server.d.ts +31 -9
  3. package/dist/brutalist-server.d.ts.map +1 -1
  4. package/dist/brutalist-server.js +107 -673
  5. package/dist/brutalist-server.js.map +1 -1
  6. package/dist/cli-adapters/claude-adapter.d.ts +25 -0
  7. package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
  8. package/dist/cli-adapters/claude-adapter.js +245 -0
  9. package/dist/cli-adapters/claude-adapter.js.map +1 -0
  10. package/dist/cli-adapters/codex-adapter.d.ts +23 -0
  11. package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
  12. package/dist/cli-adapters/codex-adapter.js +173 -0
  13. package/dist/cli-adapters/codex-adapter.js.map +1 -0
  14. package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
  15. package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
  16. package/dist/cli-adapters/gemini-adapter.js +196 -0
  17. package/dist/cli-adapters/gemini-adapter.js.map +1 -0
  18. package/dist/cli-adapters/index.d.ts +75 -0
  19. package/dist/cli-adapters/index.d.ts.map +1 -0
  20. package/dist/cli-adapters/index.js +29 -0
  21. package/dist/cli-adapters/index.js.map +1 -0
  22. package/dist/cli-adapters/shared.d.ts +12 -0
  23. package/dist/cli-adapters/shared.d.ts.map +1 -0
  24. package/dist/cli-adapters/shared.js +99 -0
  25. package/dist/cli-adapters/shared.js.map +1 -0
  26. package/dist/cli-agents.d.ts +64 -2
  27. package/dist/cli-agents.d.ts.map +1 -1
  28. package/dist/cli-agents.js +341 -394
  29. package/dist/cli-agents.js.map +1 -1
  30. package/dist/debate/constitutional.d.ts +27 -0
  31. package/dist/debate/constitutional.d.ts.map +1 -0
  32. package/dist/debate/constitutional.js +74 -0
  33. package/dist/debate/constitutional.js.map +1 -0
  34. package/dist/debate/debate-orchestrator.d.ts +154 -0
  35. package/dist/debate/debate-orchestrator.d.ts.map +1 -0
  36. package/dist/debate/debate-orchestrator.js +699 -0
  37. package/dist/debate/debate-orchestrator.js.map +1 -0
  38. package/dist/debate/index.d.ts +18 -0
  39. package/dist/debate/index.d.ts.map +1 -0
  40. package/dist/debate/index.js +18 -0
  41. package/dist/debate/index.js.map +1 -0
  42. package/dist/debate/refusal-detection.d.ts +27 -0
  43. package/dist/debate/refusal-detection.d.ts.map +1 -0
  44. package/dist/debate/refusal-detection.js +62 -0
  45. package/dist/debate/refusal-detection.js.map +1 -0
  46. package/dist/debate/synthesis.d.ts +22 -0
  47. package/dist/debate/synthesis.d.ts.map +1 -0
  48. package/dist/debate/synthesis.js +117 -0
  49. package/dist/debate/synthesis.js.map +1 -0
  50. package/dist/logger.d.ts +204 -1
  51. package/dist/logger.d.ts.map +1 -1
  52. package/dist/logger.js +398 -18
  53. package/dist/logger.js.map +1 -1
  54. package/dist/metrics/counter.d.ts +24 -0
  55. package/dist/metrics/counter.d.ts.map +1 -0
  56. package/dist/metrics/counter.js +60 -0
  57. package/dist/metrics/counter.js.map +1 -0
  58. package/dist/metrics/histogram.d.ts +42 -0
  59. package/dist/metrics/histogram.d.ts.map +1 -0
  60. package/dist/metrics/histogram.js +114 -0
  61. package/dist/metrics/histogram.js.map +1 -0
  62. package/dist/metrics/index.d.ts +26 -0
  63. package/dist/metrics/index.d.ts.map +1 -0
  64. package/dist/metrics/index.js +22 -0
  65. package/dist/metrics/index.js.map +1 -0
  66. package/dist/metrics/registry.d.ts +96 -0
  67. package/dist/metrics/registry.d.ts.map +1 -0
  68. package/dist/metrics/registry.js +113 -0
  69. package/dist/metrics/registry.js.map +1 -0
  70. package/dist/metrics/safe-metric.d.ts +25 -0
  71. package/dist/metrics/safe-metric.d.ts.map +1 -0
  72. package/dist/metrics/safe-metric.js +41 -0
  73. package/dist/metrics/safe-metric.js.map +1 -0
  74. package/dist/metrics/types.d.ts +82 -0
  75. package/dist/metrics/types.d.ts.map +1 -0
  76. package/dist/metrics/types.js +121 -0
  77. package/dist/metrics/types.js.map +1 -0
  78. package/dist/registry/argument-spaces.d.ts.map +1 -1
  79. package/dist/registry/argument-spaces.js +20 -0
  80. package/dist/registry/argument-spaces.js.map +1 -1
  81. package/dist/registry/domains.d.ts.map +1 -1
  82. package/dist/registry/domains.js +17 -1
  83. package/dist/registry/domains.js.map +1 -1
  84. package/dist/streaming/circuit-breaker.d.ts +13 -1
  85. package/dist/streaming/circuit-breaker.d.ts.map +1 -1
  86. package/dist/streaming/circuit-breaker.js +13 -1
  87. package/dist/streaming/circuit-breaker.js.map +1 -1
  88. package/dist/streaming/intelligent-buffer.d.ts +13 -1
  89. package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
  90. package/dist/streaming/intelligent-buffer.js +13 -1
  91. package/dist/streaming/intelligent-buffer.js.map +1 -1
  92. package/dist/streaming/output-parser.d.ts +16 -2
  93. package/dist/streaming/output-parser.d.ts.map +1 -1
  94. package/dist/streaming/output-parser.js +16 -2
  95. package/dist/streaming/output-parser.js.map +1 -1
  96. package/dist/streaming/progress-tracker.d.ts +14 -1
  97. package/dist/streaming/progress-tracker.d.ts.map +1 -1
  98. package/dist/streaming/progress-tracker.js +14 -1
  99. package/dist/streaming/progress-tracker.js.map +1 -1
  100. package/dist/streaming/session-manager.d.ts +14 -1
  101. package/dist/streaming/session-manager.d.ts.map +1 -1
  102. package/dist/streaming/session-manager.js +14 -1
  103. package/dist/streaming/session-manager.js.map +1 -1
  104. package/dist/streaming/sse-transport.d.ts +12 -1
  105. package/dist/streaming/sse-transport.d.ts.map +1 -1
  106. package/dist/streaming/sse-transport.js +12 -1
  107. package/dist/streaming/sse-transport.js.map +1 -1
  108. package/dist/streaming/streaming-orchestrator.d.ts +15 -1
  109. package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
  110. package/dist/streaming/streaming-orchestrator.js +15 -1
  111. package/dist/streaming/streaming-orchestrator.js.map +1 -1
  112. package/dist/system-prompts.d.ts.map +1 -1
  113. package/dist/system-prompts.js +490 -4
  114. package/dist/system-prompts.js.map +1 -1
  115. package/dist/tool-definitions-generated.d.ts.map +1 -1
  116. package/dist/tool-definitions-generated.js +3 -1
  117. package/dist/tool-definitions-generated.js.map +1 -1
  118. package/package.json +1 -1
@@ -0,0 +1,699 @@
1
+ /**
2
+ * DebateOrchestrator — debate orchestration extracted from brutalist-server.ts.
3
+ *
4
+ * This module encapsulates the entire debate subsystem:
5
+ * - handleDebateToolExecution(): cache-aware entry point for debate tool calls
6
+ * - executeCLIDebate(): core debate engine with 3-tier escalation
7
+ *
8
+ * Dependencies are injected via constructor, making brutalist-server.ts a pure
9
+ * composition root that wires and delegates.
10
+ *
11
+ * Extracted from brutalist-server.ts lines 665-1348.
12
+ */
13
+ import { existsSync } from 'fs';
14
+ import { join as pathJoin, resolve as pathResolve } from 'path';
15
+ import { mediateTranscript } from '../utils/transcript-mediator.js';
16
+ import { parseCursor, PAGINATION_DEFAULTS } from '../utils/pagination.js';
17
+ import { safeMetric as sharedSafeMetric, } from '../metrics/index.js';
18
+ import { detectRefusal } from './refusal-detection.js';
19
+ import { constitutionalAnchor } from './constitutional.js';
20
+ import { synthesizeDebate } from './synthesis.js';
21
+ /**
22
+ * Rank of each debate tier for computing the MAX tier reached across all
23
+ * turns of a debate. Used to derive the `tier` label on the debate
24
+ * duration histogram (higher rank wins).
25
+ */
26
+ const TIER_RANK = {
27
+ standard: 0,
28
+ escalated: 1,
29
+ decomposed: 2,
30
+ };
31
+ /**
32
+ * DebateOrchestrator encapsulates all debate orchestration logic.
33
+ *
34
+ * It accepts dependencies via constructor injection so that brutalist-server.ts
35
+ * remains a thin composition root.
36
+ */
37
+ export class DebateOrchestrator {
38
+ /** Mutable so test harnesses can replace cliOrchestrator on BrutalistServer. */
39
+ _cliOrchestrator;
40
+ responseCache;
41
+ formatter;
42
+ config;
43
+ onStreamingEvent;
44
+ onProgressUpdate;
45
+ metrics;
46
+ log;
47
+ get cliOrchestrator() {
48
+ return this._cliOrchestrator;
49
+ }
50
+ set cliOrchestrator(value) {
51
+ this._cliOrchestrator = value;
52
+ }
53
+ constructor(deps) {
54
+ this._cliOrchestrator = deps.cliOrchestrator;
55
+ this.responseCache = deps.responseCache;
56
+ this.formatter = deps.formatter;
57
+ this.config = deps.config;
58
+ this.onStreamingEvent = deps.onStreamingEvent;
59
+ this.onProgressUpdate = deps.onProgressUpdate;
60
+ this.metrics = deps.metrics;
61
+ this.log = deps.log;
62
+ }
63
+ /**
64
+ * Isolate metric writes from business control flow.
65
+ *
66
+ * Delegates to the shared `safeMetric` helper in
67
+ * `src/metrics/safe-metric.ts`. The private method is retained so
68
+ * existing call sites inside DebateOrchestrator
69
+ * (`this.safeMetric(op, fn)`) keep working without a touch, and so
70
+ * any debate-specific metric-error instrumentation can be layered in
71
+ * one place in the future.
72
+ *
73
+ * Parity note: `CLIAgentOrchestrator` uses the same shared helper
74
+ * directly (no private method) to prevent metric throws from
75
+ * propagating into the outer spawn try/catch. See Cycle 3 rework
76
+ * Task CLI-B' in phases/instrument_cli_spawn/phase.md.
77
+ */
78
+ safeMetric(op, fn) {
79
+ sharedSafeMetric(this.log, op, fn);
80
+ }
81
+ /**
82
+ * Handle debate tool execution with constitutional position anchoring.
83
+ * Uses 2 randomly selected agents (or user-specified) with explicit PRO/CON positions.
84
+ *
85
+ * This is the entry point called from the roast_cli_debate tool registration.
86
+ *
87
+ * Instrumentation (intent #1): every exit path records the debate
88
+ * orchestration duration histogram exactly once. The `tier` label is the
89
+ * MAX tier reached across all turns of the underlying `executeCLIDebate`
90
+ * call; cache-hit paths short-circuit before any CLI agent runs, so their
91
+ * tier is always `'standard'`. The outer try/finally placement ensures
92
+ * error paths, refusal paths, and cache-hit paths all emit exactly one
93
+ * observation — `executeCLIDebate` itself has NO timer block to avoid
94
+ * double-observation.
95
+ */
96
+ async handleDebateToolExecution(args, extra) {
97
+ const handleToolLog = this.log.forOperation('handle_tool');
98
+ const t0 = Date.now();
99
+ // Histogram labels — DEBATE_DURATION_LABELS = ['outcome', 'tier'] as const.
100
+ // outcome is derived from the debate result's behavior (refused vs. success)
101
+ // or forced to 'error' in the catch branch.
102
+ let outcome = 'success';
103
+ let tier = 'standard';
104
+ try {
105
+ // Build pagination params
106
+ const paginationParams = {
107
+ offset: args.offset || 0,
108
+ limit: args.limit || PAGINATION_DEFAULTS.DEFAULT_LIMIT_TOKENS
109
+ };
110
+ if (args.cursor) {
111
+ const cursorParams = parseCursor(args.cursor);
112
+ Object.assign(paginationParams, cursorParams);
113
+ }
114
+ const explicitPaginationRequested = args.offset !== undefined ||
115
+ args.limit !== undefined ||
116
+ args.cursor !== undefined ||
117
+ args.context_id !== undefined;
118
+ // Extract session ID early — needed for cache session isolation
119
+ const sessionId = extra?.sessionId ||
120
+ extra?._meta?.sessionId ||
121
+ extra?.headers?.['mcp-session-id'] ||
122
+ 'anonymous';
123
+ // Validate resume flag requires context_id
124
+ if (args.resume && !args.context_id) {
125
+ throw new Error(`The 'resume' flag requires a 'context_id' from a previous debate. ` +
126
+ `Run an initial debate first, then use the returned context_id with resume: true.`);
127
+ }
128
+ // Check cache if context_id provided
129
+ let conversationHistory;
130
+ if (args.context_id && !args.force_refresh) {
131
+ const cachedResponse = await this.responseCache.getByContextId(args.context_id, sessionId);
132
+ if (cachedResponse) {
133
+ handleToolLog.info(`🎯 Debate cache HIT for context_id: ${args.context_id}`);
134
+ if (args.resume === true) {
135
+ // CONVERSATION CONTINUATION: Continue the debate
136
+ if (!args.topic || args.topic.trim() === '') {
137
+ throw new Error(`Debate continuation (resume: true) requires a new prompt/question. ` +
138
+ `Provide your follow-up in the topic field.`);
139
+ }
140
+ // Security: avoid logging user-provided topic text at info level.
141
+ // Emit length only; if a developer needs the preview, run at debug.
142
+ handleToolLog.info('Debate continuation - new prompt received', {
143
+ topicLength: args.topic.length,
144
+ });
145
+ conversationHistory = cachedResponse.conversationHistory || [];
146
+ // Fall through to execute new debate round with history
147
+ }
148
+ else {
149
+ // PAGINATION: Return cached debate result — no agent ran,
150
+ // outcome='success' and tier='standard' (their initial values).
151
+ handleToolLog.info(`📖 Debate pagination request - returning cached response`);
152
+ const cachedResult = {
153
+ success: true,
154
+ responses: [{
155
+ agent: 'cached',
156
+ success: true,
157
+ output: cachedResponse.content,
158
+ executionTime: 0
159
+ }]
160
+ };
161
+ return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, args.context_id, explicitPaginationRequested);
162
+ }
163
+ }
164
+ else {
165
+ handleToolLog.warn(`❌ Debate cache MISS for context_id: ${args.context_id}`);
166
+ throw new Error(`Context ID "${args.context_id}" not found in cache. ` +
167
+ `It may have expired (2 hour TTL) or belong to a different session. ` +
168
+ `Remove context_id parameter to run a new debate.`);
169
+ }
170
+ }
171
+ // Generate cache key for this debate
172
+ const cacheKey = this.responseCache.generateCacheKey({
173
+ tool: 'roast_cli_debate',
174
+ topic: args.topic,
175
+ proPosition: args.proPosition,
176
+ conPosition: args.conPosition,
177
+ agents: args.agents,
178
+ rounds: args.rounds,
179
+ context: args.context
180
+ });
181
+ // Check cache for identical request (if not resuming)
182
+ if (!args.force_refresh && !args.resume) {
183
+ const cachedContent = await this.responseCache.get(cacheKey);
184
+ if (cachedContent) {
185
+ const existingContextId = this.responseCache.findContextIdForKey(cacheKey);
186
+ const contextId = existingContextId
187
+ ? this.responseCache.createAlias(existingContextId, cacheKey)
188
+ : this.responseCache.generateContextId(cacheKey);
189
+ handleToolLog.info(`🎯 Debate cache hit for new request, using context_id: ${contextId}`);
190
+ const cachedResult = {
191
+ success: true,
192
+ responses: [{
193
+ agent: 'cached',
194
+ success: true,
195
+ output: cachedContent,
196
+ executionTime: 0
197
+ }]
198
+ };
199
+ // Cache hit: outcome='success', tier='standard' (no agent ran).
200
+ return this.formatter.formatToolResponse(cachedResult, args.verbose, paginationParams, contextId, explicitPaginationRequested);
201
+ }
202
+ }
203
+ // Build context with conversation history if resuming
204
+ let debateContext = args.context || '';
205
+ if (conversationHistory && conversationHistory.length > 0) {
206
+ const previousDebate = conversationHistory.map(msg => {
207
+ const role = msg.role === 'user' ? 'User Question' : 'Debate Response';
208
+ return `${role}:\n${msg.content}`;
209
+ }).join('\n\n---\n\n');
210
+ debateContext = `## Previous Debate Context\n\n${previousDebate}\n\n---\n\n## New Follow-up Question\n\nThe user wants to continue this debate with a new question or direction.\n\n${debateContext}`;
211
+ handleToolLog.info(`💬 Injected ${conversationHistory.length} previous messages into debate context`);
212
+ }
213
+ // Extract streaming context from extra
214
+ const progressToken = extra?._meta?.progressToken;
215
+ // Execute the debate
216
+ const numRounds = Math.min(args.rounds || 3, 3);
217
+ const result = await this.executeCLIDebate({
218
+ topic: args.topic,
219
+ proPosition: args.proPosition,
220
+ conPosition: args.conPosition,
221
+ agents: args.agents,
222
+ rounds: numRounds,
223
+ context: debateContext,
224
+ workingDirectory: args.workingDirectory,
225
+ models: args.models,
226
+ onStreamingEvent: this.onStreamingEvent,
227
+ progressToken,
228
+ onProgress: progressToken && sessionId ?
229
+ (progress, total, message) => this.onProgressUpdate(progressToken, progress, total, message, sessionId) : undefined,
230
+ sessionId,
231
+ mcp_servers: args.mcp_servers,
232
+ });
233
+ // Derive outcome and tier from the debate result for the histogram
234
+ // observation that fires in the finally block below. The counter for
235
+ // per-turn escalation tier already fired inside executeCLIDebate; this
236
+ // block only extracts the histogram labels — no metric emissions here.
237
+ const turns = result.debateBehavior?.turns ?? [];
238
+ if (turns.length > 0) {
239
+ // Tier = MAX tier reached across all turns (higher rank wins).
240
+ tier = turns.reduce((max, t) => TIER_RANK[t.tier] > TIER_RANK[max] ? t.tier : max, 'standard');
241
+ // Outcome = 'refused' when every turn's engaged=false AND at least
242
+ // one turn refused. Otherwise 'success'. The catch branch below
243
+ // overrides to 'error'.
244
+ const allDisengaged = turns.every(t => !t.engaged);
245
+ const anyRefused = turns.some(t => t.refused);
246
+ if (allDisengaged && anyRefused) {
247
+ outcome = 'refused';
248
+ }
249
+ }
250
+ // Cache the result
251
+ let contextId;
252
+ if (result.success && result.responses.length > 0) {
253
+ const fullContent = this.formatter.extractFullContent(result);
254
+ if (fullContent) {
255
+ const now = Date.now();
256
+ const updatedConversation = [
257
+ ...(conversationHistory || []),
258
+ { role: 'user', content: args.topic, timestamp: now },
259
+ { role: 'assistant', content: fullContent, timestamp: now }
260
+ ];
261
+ if (args.resume && args.context_id && conversationHistory) {
262
+ // Update existing cache entry
263
+ contextId = args.context_id;
264
+ await this.responseCache.updateByContextId(contextId, fullContent, updatedConversation, sessionId);
265
+ this.log.forOperation('cache').info(`✅ Updated debate conversation ${contextId} (now ${updatedConversation.length} messages)`);
266
+ }
267
+ else {
268
+ // New debate - create new context_id
269
+ const { contextId: newId } = await this.responseCache.set({ tool: 'roast_cli_debate', topic: args.topic }, fullContent, cacheKey, sessionId, undefined, updatedConversation);
270
+ contextId = newId;
271
+ this.log.forOperation('cache').info(`✅ Cached new debate with context ID: ${contextId}`);
272
+ }
273
+ }
274
+ }
275
+ return this.formatter.formatToolResponse(result, args.verbose, paginationParams, contextId, explicitPaginationRequested);
276
+ }
277
+ catch (error) {
278
+ outcome = 'error';
279
+ return this.formatter.formatErrorResponse(error);
280
+ }
281
+ finally {
282
+ // Record the debate duration exactly once per invocation. This is the
283
+ // SINGLE histogram observation point for debate orchestration — do
284
+ // NOT add another observe() call inside executeCLIDebate or any
285
+ // inner path. The typed label record below references
286
+ // DEBATE_DURATION_LABELS so a future label-set change triggers a
287
+ // compile error at this call site.
288
+ const durationSec = (Date.now() - t0) / 1000;
289
+ const durationLabels = {
290
+ outcome,
291
+ tier,
292
+ };
293
+ this.safeMetric('observe:debate_duration', () => this.metrics.debateOrchestrationDurationSeconds.observe(durationLabels, durationSec));
294
+ }
295
+ }
296
+ /**
297
+ * Execute CLI debate with constitutional position anchoring.
298
+ * 2 agents, explicit PRO/CON positions, context compression between rounds.
299
+ *
300
+ * This is the core debate engine. It manages:
301
+ * - Agent selection and position assignment
302
+ * - Round execution with 3-tier refusal escalation
303
+ * - Transcript mediation between rounds
304
+ * - Behavioral metadata and asymmetry detection
305
+ * - Synthesis generation
306
+ */
307
+ async executeCLIDebate(args) {
308
+ const { topic, proPosition, conPosition, rounds, context, workingDirectory, models, onStreamingEvent, progressToken, onProgress, sessionId } = args;
309
+ const debateLog = this.log.forOperation('execute_debate');
310
+ const escalateLog = this.log.forOperation('escalate');
311
+ // Security (Cycle 3 F32): the debug-level emission previously leaked
312
+ // user-provided topic/proPosition/conPosition text into logs whenever
313
+ // BRUTALIST_LOG_LEVEL=debug was set — identical disclosure channel to
314
+ // the info-level site already redacted at :263. Emit length-only
315
+ // fields matching that pattern; a developer needing the raw text
316
+ // should inspect the transcript passed to executeCLIDebate directly.
317
+ debateLog.debug("Executing CLI debate", {
318
+ topicLength: topic.length,
319
+ proPositionLength: proPosition.length,
320
+ conPositionLength: conPosition.length,
321
+ rounds,
322
+ });
323
+ try {
324
+ // Get available CLIs
325
+ const cliContext = await this.cliOrchestrator.detectCLIContext();
326
+ const availableCLIs = cliContext.availableCLIs;
327
+ if (availableCLIs.length < 2) {
328
+ throw new Error(`Need at least 2 CLI agents for debate. Available: ${availableCLIs.join(', ')}`);
329
+ }
330
+ // Select 2 agents: use specified or random selection
331
+ let selectedAgents;
332
+ if (args.agents && args.agents.length === 2) {
333
+ // Validate specified agents are available
334
+ const unavailable = args.agents.filter(a => !availableCLIs.includes(a));
335
+ if (unavailable.length > 0) {
336
+ throw new Error(`Specified agents not available: ${unavailable.join(', ')}. Available: ${availableCLIs.join(', ')}`);
337
+ }
338
+ selectedAgents = args.agents;
339
+ }
340
+ else {
341
+ // Random selection of 2 agents
342
+ const shuffled = [...availableCLIs].sort(() => Math.random() - 0.5);
343
+ selectedAgents = shuffled.slice(0, 2);
344
+ }
345
+ // Randomly assign PRO/CON positions
346
+ const shuffledAgents = [...selectedAgents].sort(() => Math.random() - 0.5);
347
+ const proAgent = shuffledAgents[0];
348
+ const conAgent = shuffledAgents[1];
349
+ debateLog.info(`🎭 Debate: ${proAgent.toUpperCase()} (PRO) vs ${conAgent.toUpperCase()} (CON)`);
350
+ const debateResponses = [];
351
+ const transcript = [];
352
+ const turnMetadata = [];
353
+ let compressedContext = '';
354
+ const totalTurns = rounds * 2; // 2 agents per round
355
+ let completedTurns = 0;
356
+ // Frontier 1: Detect self-referential working directory (Codex reading its own control prompts)
357
+ const resolvedWorkDir = args.target || workingDirectory || this.config.workingDirectory || process.cwd();
358
+ const absWorkDir = pathResolve(resolvedWorkDir);
359
+ const isSelfReferential = existsSync(pathJoin(absWorkDir, 'src', 'brutalist-server.ts'))
360
+ || existsSync(pathJoin(absWorkDir, 'dist', 'brutalist-server.js'));
361
+ if (isSelfReferential) {
362
+ debateLog.info(`🔒 Debate working directory is brutalist repo — Codex will be sandboxed`);
363
+ }
364
+ // Execute rounds
365
+ for (let round = 1; round <= rounds; round++) {
366
+ debateLog.info(`📢 Round ${round}/${rounds}`);
367
+ // Both agents argue in each round
368
+ for (const [agent, position, thesis] of [
369
+ [proAgent, 'PRO', proPosition],
370
+ [conAgent, 'CON', conPosition]
371
+ ]) {
372
+ let prompt;
373
+ debateLog.info(` ⚔️ ${agent.toUpperCase()} (${position}) arguing...`);
374
+ // Build prompt-generation function so we can rebuild on escalation
375
+ const mcpBlock = args.mcp_servers?.length
376
+ ? `\nEXTERNAL TOOL ACCESS: You have MCP tools available (${args.mcp_servers.join(', ')}). Use them to gather evidence supporting your position. You MUST NOT modify the codebase.\n`
377
+ : '';
378
+ const buildPrompt = (tier) => {
379
+ if (round === 1) {
380
+ return `${constitutionalAnchor(agent, position, thesis, tier)}
381
+ ${mcpBlock}
382
+ TOPIC: ${topic}
383
+ ${context ? `CONTEXT: ${context}` : ''}
384
+
385
+ Round 1: Opening analysis.
386
+
387
+ Present your ${position} analysis. Structure your response:
388
+
389
+ <thesis_statement>
390
+ Your core analytical position
391
+ </thesis_statement>
392
+
393
+ <key_arguments>
394
+ Three strongest arguments grounding your position in evidence and reasoning
395
+ </key_arguments>
396
+
397
+ <preemptive_rebuttal>
398
+ Address the strongest counterargument and show why it does not defeat your position
399
+ </preemptive_rebuttal>
400
+
401
+ <conclusion>
402
+ Reinforce why your analysis holds
403
+ </conclusion>`;
404
+ }
405
+ else {
406
+ const rawOpponent = transcript
407
+ .filter(t => t.agent !== agent && t.round === round - 1)
408
+ .map(t => t.content)
409
+ .join('\n\n');
410
+ const { sanitized: opponentTranscript, patternsDetected: opponentPatterns } = mediateTranscript(rawOpponent, 'sanitize', 4000);
411
+ if (opponentPatterns.length > 0) {
412
+ debateLog.info(`🛡️ Mediated ${opponentPatterns.length} patterns from opponent transcript for ${agent}`, { opponentPatterns });
413
+ }
414
+ return `${constitutionalAnchor(agent, position, thesis, tier)}
415
+ ${mcpBlock}
416
+ TOPIC: ${topic}
417
+
418
+ Round ${round}: Engage with your counterpart's analysis.
419
+
420
+ YOUR COUNTERPART'S PREVIOUS ANALYSIS:
421
+ ${opponentTranscript || 'No previous analysis recorded'}
422
+
423
+ ${compressedContext ? `ANALYSIS CONTEXT SO FAR:\n${compressedContext}\n` : ''}
424
+
425
+ <counterpart_gaps>
426
+ Identify the specific weaknesses in their reasoning and evidence
427
+ </counterpart_gaps>
428
+
429
+ <deepening_analysis>
430
+ Advance new evidence and reasoning that strengthens your position
431
+ </deepening_analysis>
432
+
433
+ <reinforcement>
434
+ Show why your position holds against their strongest points
435
+ </reinforcement>`;
436
+ }
437
+ };
438
+ try {
439
+ const turnRequestId = `debate-${sessionId || 'anon'}-${round}-${agent}-${Date.now()}`;
440
+ // Emit agent_start streaming event
441
+ if (onStreamingEvent) {
442
+ onStreamingEvent({
443
+ type: 'agent_start',
444
+ agent,
445
+ content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) arguing...`,
446
+ timestamp: Date.now(),
447
+ sessionId,
448
+ });
449
+ }
450
+ // Working directory: debateMode suppresses Codex shell exploration via prompt,
451
+ // so no need to redirect — Codex still needs a git repo to function
452
+ const agentWorkDir = workingDirectory || this.config.workingDirectory;
453
+ const cliOptions = {
454
+ workingDirectory: agentWorkDir,
455
+ timeout: (this.config.defaultTimeout || 60000) * 2,
456
+ models,
457
+ onStreamingEvent,
458
+ progressToken,
459
+ onProgress,
460
+ sessionId,
461
+ requestId: turnRequestId,
462
+ debateMode: true, // Frontier 1: suppress Codex shell exploration
463
+ mcpServers: args.mcp_servers, // MCP servers for evidence-backed debate
464
+ };
465
+ // Three-tier escalation: standard -> escalated -> decomposed
466
+ prompt = buildPrompt('standard');
467
+ let wasRefused = false;
468
+ let wasEscalated = false;
469
+ let engagedAfterEscalation = false;
470
+ let finalTier = 'standard';
471
+ let response = await this.cliOrchestrator.executeSingleCLI(agent, prompt, prompt, cliOptions);
472
+ // Tier 2: Detect refusal -> retry with analytical framing
473
+ if (response.success && response.output && detectRefusal(response.output)) {
474
+ wasRefused = true;
475
+ wasEscalated = true;
476
+ finalTier = 'escalated';
477
+ escalateLog.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused — escalating to analytical framing (tier 2)`);
478
+ const escalatedPrompt = buildPrompt('escalated');
479
+ const retryResponse = await this.cliOrchestrator.executeSingleCLI(agent, escalatedPrompt, escalatedPrompt, { ...cliOptions, requestId: `${turnRequestId}-escalated` });
480
+ if (retryResponse.success && retryResponse.output && !detectRefusal(retryResponse.output)) {
481
+ escalateLog.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 2 escalation`);
482
+ engagedAfterEscalation = true;
483
+ response = retryResponse;
484
+ }
485
+ else {
486
+ // Tier 3: Decomposed — scholarly steelman framing
487
+ finalTier = 'decomposed';
488
+ escalateLog.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused tier 2 — escalating to decomposed framing (tier 3)`);
489
+ const decomposedPrompt = buildPrompt('decomposed');
490
+ const decomposedResponse = await this.cliOrchestrator.executeSingleCLI(agent, decomposedPrompt, decomposedPrompt, { ...cliOptions, requestId: `${turnRequestId}-decomposed` });
491
+ if (decomposedResponse.success && decomposedResponse.output && !detectRefusal(decomposedResponse.output)) {
492
+ escalateLog.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 3 decomposition`);
493
+ engagedAfterEscalation = true;
494
+ response = decomposedResponse;
495
+ }
496
+ else {
497
+ escalateLog.warn(`⚠️ ${agent.toUpperCase()} (${position}) refused all 3 tiers — using best response`);
498
+ // Use decomposed response if available (likely less meta-commentary)
499
+ if (decomposedResponse.success && decomposedResponse.output) {
500
+ response = decomposedResponse;
501
+ }
502
+ }
503
+ }
504
+ }
505
+ // Always add response (success or failure) for visibility
506
+ debateResponses.push(response);
507
+ completedTurns++;
508
+ // Emit agent_complete streaming event
509
+ if (onStreamingEvent) {
510
+ onStreamingEvent({
511
+ type: 'agent_complete',
512
+ agent,
513
+ content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) ${response.success ? 'finished' : 'failed'}`,
514
+ timestamp: Date.now(),
515
+ sessionId,
516
+ });
517
+ }
518
+ // Emit progress update
519
+ if (onProgress) {
520
+ onProgress(completedTurns, totalTurns, `Debate: ${completedTurns}/${totalTurns} turns complete`);
521
+ }
522
+ // Frontier 3: Track behavioral metadata
523
+ const finalRefused = response.success && response.output ? detectRefusal(response.output) : false;
524
+ turnMetadata.push({
525
+ agent: agent,
526
+ position: position,
527
+ round,
528
+ engaged: response.success && !!response.output && !finalRefused,
529
+ refused: wasRefused,
530
+ escalated: wasEscalated,
531
+ engagedAfterEscalation,
532
+ responseLength: response.output?.length || 0,
533
+ executionTime: response.executionTime,
534
+ tier: engagedAfterEscalation ? finalTier : (wasEscalated ? finalTier : 'standard'),
535
+ });
536
+ // Escalation-tier counter: fires exactly ONCE per turn, labeled
537
+ // with this turn's FINAL tier (standard/escalated/decomposed).
538
+ // Retries within a single turn are NOT counted separately —
539
+ // they are represented by the final tier value on the pushed
540
+ // turnMetadata record. The typed label record references
541
+ // ESCALATION_TIER_LABELS so a future label-set change
542
+ // triggers a compile error at this call site. The call is
543
+ // wrapped in safeMetric so a metric throw cannot corrupt the
544
+ // surrounding turn try/catch (would otherwise double-push
545
+ // metadata and double-count completedTurns).
546
+ const successTierLabels = {
547
+ tier: turnMetadata[turnMetadata.length - 1].tier,
548
+ };
549
+ this.safeMetric('inc:escalation_tier', () => this.metrics.debateEscalationTierTotal.inc(successTierLabels, 1));
550
+ if (response.success && response.output) {
551
+ transcript.push({
552
+ agent,
553
+ position,
554
+ round,
555
+ content: response.output
556
+ });
557
+ }
558
+ else {
559
+ // Security (Cycle 3 F33 Pattern A): response.error can carry
560
+ // CLI-subprocess stderr tail, which in turn may echo model-
561
+ // generated or prompt-echoed text. Emit a presence-only flag
562
+ // at warn level instead of the raw string; operators with
563
+ // debug file-logging can still correlate via agent/position/
564
+ // round, and the transcript is the canonical source of truth
565
+ // for the actual failure text.
566
+ debateLog.warn(`⚠️ ${agent.toUpperCase()} (${position}) failed`, {
567
+ agent,
568
+ position,
569
+ error: response.error ? '<redacted>' : undefined,
570
+ hasOutput: Boolean(response.output),
571
+ });
572
+ }
573
+ }
574
+ catch (error) {
575
+ // Security (Cycle 3 F33): the StructuredLogger emitError path
576
+ // serializes the raw Error verbatim (message, stack, name)
577
+ // into NDJSON. Passing the original `error` leaks any
578
+ // CLI-subprocess stderr tail or prompt-echoed text embedded
579
+ // in error.message. Pass a sanitized Error-shaped shim that
580
+ // preserves `name` for diagnostic triage while redacting the
581
+ // payload. `.stack` is omitted from the shim (undefined) so
582
+ // the file-side fileData record carries only name+message.
583
+ const errorName = error instanceof Error ? error.name : 'Error';
584
+ const errorShim = { name: errorName, message: '<redacted>' };
585
+ debateLog.error(`❌ ${agent.toUpperCase()} (${position}) threw error`, errorShim);
586
+ completedTurns++;
587
+ // Security (Cycle 4 F7/F17): the same raw caught error.message
588
+ // that Cycle 3 redacted at the logger sink was still flowing
589
+ // through two adjacent sinks — the streaming event content
590
+ // (remote subscribers) and the debateResponses push (flows
591
+ // back out as `responses` at the return site, and downstream
592
+ // into synthesis.ts and response-formatter.ts). Emit a static
593
+ // classifier that retains the agent identity for operator
594
+ // triage but carries no subprocess/prompt-derived payload.
595
+ const redactedTurnError = `${agent.toUpperCase()} execution failed. See internal logs for details.`;
596
+ if (onStreamingEvent) {
597
+ onStreamingEvent({
598
+ type: 'agent_error',
599
+ agent,
600
+ content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) error: ${redactedTurnError}`,
601
+ timestamp: Date.now(),
602
+ sessionId,
603
+ });
604
+ }
605
+ turnMetadata.push({
606
+ agent: agent,
607
+ position: position,
608
+ round,
609
+ engaged: false,
610
+ refused: false,
611
+ escalated: false,
612
+ engagedAfterEscalation: false,
613
+ responseLength: 0,
614
+ executionTime: 0,
615
+ tier: 'standard',
616
+ });
617
+ // Error-path turn: still counts exactly ONCE per turn. Tier is
618
+ // 'standard' because the turn never reached the refusal-retry
619
+ // branches — it threw before any escalation decision. The
620
+ // typed label record references ESCALATION_TIER_LABELS so a
621
+ // future label-set change triggers a compile error at this
622
+ // call site. Wrapped in safeMetric so a metric throw cannot
623
+ // re-enter the catch path and double-count the turn.
624
+ const errorTierLabels = {
625
+ tier: turnMetadata[turnMetadata.length - 1].tier,
626
+ };
627
+ this.safeMetric('inc:escalation_tier', () => this.metrics.debateEscalationTierTotal.inc(errorTierLabels, 1));
628
+ debateResponses.push({
629
+ agent,
630
+ success: false,
631
+ output: '',
632
+ error: redactedTurnError,
633
+ executionTime: 0
634
+ });
635
+ }
636
+ }
637
+ // Compress context for next round with mediation (if not final round)
638
+ if (round < rounds) {
639
+ const roundTranscript = transcript
640
+ .filter(t => t.round === round)
641
+ .map(t => {
642
+ const { sanitized } = mediateTranscript(t.content, 'sanitize', 1500);
643
+ return `${t.agent.toUpperCase()} (${t.position}): ${sanitized}`;
644
+ })
645
+ .join('\n\n---\n\n');
646
+ compressedContext = `Round ${round} Summary:\n${roundTranscript}`;
647
+ }
648
+ }
649
+ // Compute position-dependent asymmetry summary
650
+ const proTurns = turnMetadata.filter(t => t.position === 'PRO');
651
+ const conTurns = turnMetadata.filter(t => t.position === 'CON');
652
+ const proRefusalRate = proTurns.length > 0
653
+ ? proTurns.filter(t => t.refused).length / proTurns.length : 0;
654
+ const conRefusalRate = conTurns.length > 0
655
+ ? conTurns.filter(t => t.refused).length / conTurns.length : 0;
656
+ const debateAgents = [...new Set(turnMetadata.map(t => t.agent))];
657
+ const agentAsymmetries = debateAgents.map(a => {
658
+ const aPro = turnMetadata.filter(t => t.agent === a && t.position === 'PRO');
659
+ const aCon = turnMetadata.filter(t => t.agent === a && t.position === 'CON');
660
+ const proEngaged = aPro.some(t => t.engaged);
661
+ const conEngaged = aCon.some(t => t.engaged);
662
+ return { agent: a, proEngaged, conEngaged, asymmetric: proEngaged !== conEngaged };
663
+ });
664
+ const asymmetryDetected = Math.abs(proRefusalRate - conRefusalRate) > 0.3
665
+ || agentAsymmetries.some(a => a.asymmetric);
666
+ const behaviorSummary = {
667
+ topic, proPosition, conPosition,
668
+ turns: turnMetadata,
669
+ asymmetry: {
670
+ detected: asymmetryDetected,
671
+ description: asymmetryDetected
672
+ ? `Position-dependent asymmetry: PRO refusal ${(proRefusalRate * 100).toFixed(0)}%, CON refusal ${(conRefusalRate * 100).toFixed(0)}%`
673
+ : 'No significant position-dependent asymmetry detected',
674
+ proRefusalRate,
675
+ conRefusalRate,
676
+ agentAsymmetries,
677
+ }
678
+ };
679
+ if (asymmetryDetected) {
680
+ debateLog.warn(`🎭 Alignment asymmetry detected: ${behaviorSummary.asymmetry.description}`);
681
+ }
682
+ // Build synthesis with behavioral data
683
+ const synthesis = synthesizeDebate(debateResponses, topic, rounds, new Map([[proAgent, `PRO: ${proPosition}`], [conAgent, `CON: ${conPosition}`]]), behaviorSummary);
684
+ return {
685
+ success: debateResponses.some(r => r.success),
686
+ responses: debateResponses,
687
+ synthesis,
688
+ debateBehavior: behaviorSummary,
689
+ analysisType: 'cli_debate',
690
+ topic
691
+ };
692
+ }
693
+ catch (error) {
694
+ debateLog.error("CLI debate execution failed", error);
695
+ throw error;
696
+ }
697
+ }
698
+ }
699
+ //# sourceMappingURL=debate-orchestrator.js.map