@brutalist/mcp 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/brutalist-server.d.ts +1 -1
- package/dist/brutalist-server.d.ts.map +1 -1
- package/dist/brutalist-server.js +349 -85
- package/dist/brutalist-server.js.map +1 -1
- package/dist/cli-agents.d.ts +4 -1
- package/dist/cli-agents.d.ts.map +1 -1
- package/dist/cli-agents.js +12 -8
- package/dist/cli-agents.js.map +1 -1
- package/dist/handlers/tool-handler.d.ts +1 -1
- package/dist/handlers/tool-handler.d.ts.map +1 -1
- package/dist/handlers/tool-handler.js.map +1 -1
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -1
- package/dist/logger.d.ts +23 -0
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +166 -0
- package/dist/logger.js.map +1 -1
- package/dist/model-resolver.d.ts.map +1 -1
- package/dist/model-resolver.js +32 -7
- package/dist/model-resolver.js.map +1 -1
- package/dist/system-prompts.d.ts.map +1 -1
- package/dist/system-prompts.js +38 -31
- package/dist/system-prompts.js.map +1 -1
- package/dist/test-utils/server-harness.js +1 -1
- package/dist/test-utils/server-harness.js.map +1 -1
- package/dist/types/brutalist.d.ts +32 -0
- package/dist/types/brutalist.d.ts.map +1 -1
- package/dist/types/tool-config.js +1 -1
- package/dist/types/tool-config.js.map +1 -1
- package/dist/utils/transcript-mediator.d.ts +16 -0
- package/dist/utils/transcript-mediator.d.ts.map +1 -0
- package/dist/utils/transcript-mediator.js +87 -0
- package/dist/utils/transcript-mediator.js.map +1 -0
- package/package.json +1 -1
package/dist/brutalist-server.js
CHANGED
|
@@ -3,6 +3,9 @@ import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"
|
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { CLIAgentOrchestrator } from './cli-agents.js';
|
|
5
5
|
import { logger } from './logger.js';
|
|
6
|
+
import { mediateTranscript } from './utils/transcript-mediator.js';
|
|
7
|
+
import { existsSync } from 'fs';
|
|
8
|
+
import { join as pathJoin, resolve as pathResolve } from 'path';
|
|
6
9
|
import { parseCursor, PAGINATION_DEFAULTS } from './utils/pagination.js';
|
|
7
10
|
import { ResponseCache } from './utils/response-cache.js';
|
|
8
11
|
import { ResponseFormatter } from './formatting/response-formatter.js';
|
|
@@ -11,7 +14,7 @@ import { ToolHandler } from './handlers/tool-handler.js';
|
|
|
11
14
|
import { getDomain, generateToolConfig } from './registry/domains.js';
|
|
12
15
|
import { filterToolsByIntent, getMatchingDomainIds } from './tool-router.js';
|
|
13
16
|
// Use environment variable or fallback to manual version
|
|
14
|
-
const PACKAGE_VERSION = process.env.npm_package_version || "
|
|
17
|
+
const PACKAGE_VERSION = process.env.npm_package_version || "1.3.0";
|
|
15
18
|
/**
|
|
16
19
|
* BrutalistServer - Composition root for the Brutalist MCP Server
|
|
17
20
|
*
|
|
@@ -146,8 +149,8 @@ export class BrutalistServer {
|
|
|
146
149
|
}
|
|
147
150
|
}
|
|
148
151
|
}
|
|
149
|
-
// Cleanup method for tests - remove event listeners
|
|
150
|
-
cleanup() {
|
|
152
|
+
// Cleanup method for tests - remove event listeners and close MCP server
|
|
153
|
+
async cleanup() {
|
|
151
154
|
if (this.httpTransport) {
|
|
152
155
|
this.httpTransport.cleanup();
|
|
153
156
|
}
|
|
@@ -155,6 +158,15 @@ export class BrutalistServer {
|
|
|
155
158
|
clearInterval(this.sessionCleanupTimer);
|
|
156
159
|
this.sessionCleanupTimer = undefined;
|
|
157
160
|
}
|
|
161
|
+
if (this.server) {
|
|
162
|
+
try {
|
|
163
|
+
await this.server.close();
|
|
164
|
+
}
|
|
165
|
+
catch {
|
|
166
|
+
// Ignore close errors during cleanup
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
logger.shutdown();
|
|
158
170
|
}
|
|
159
171
|
/**
|
|
160
172
|
* Handle streaming events from CLI agents
|
|
@@ -232,20 +244,22 @@ export class BrutalistServer {
|
|
|
232
244
|
logger.warn("⚠️ Progress update without session ID - dropping for security");
|
|
233
245
|
return;
|
|
234
246
|
}
|
|
235
|
-
|
|
247
|
+
const progressLabel = total !== undefined ? `${progress}/${total}` : `heartbeat #${progress}`;
|
|
248
|
+
logger.debug(`📊 Session progress: ${progressLabel} for session ${sessionId.substring(0, 8)}...`);
|
|
236
249
|
// Send progress notification with session context if client supports it
|
|
250
|
+
// When total is undefined, the client should treat this as indeterminate progress
|
|
237
251
|
try {
|
|
238
252
|
this.server.server.notification({
|
|
239
253
|
method: "notifications/progress",
|
|
240
254
|
params: {
|
|
241
255
|
progressToken,
|
|
242
256
|
progress,
|
|
243
|
-
total,
|
|
244
|
-
message: `[${sessionId.substring(0, 8)}] ${message}`,
|
|
245
|
-
sessionId
|
|
257
|
+
...(total !== undefined ? { total } : {}),
|
|
258
|
+
message: `[${sessionId.substring(0, 8)}] ${message}`,
|
|
259
|
+
sessionId
|
|
246
260
|
}
|
|
247
261
|
});
|
|
248
|
-
logger.debug(`✅ Sent session-scoped progress notification: ${
|
|
262
|
+
logger.debug(`✅ Sent session-scoped progress notification: ${progressLabel}`);
|
|
249
263
|
}
|
|
250
264
|
catch (notificationError) {
|
|
251
265
|
// Client doesn't support progress notifications - silently skip
|
|
@@ -278,7 +292,7 @@ export class BrutalistServer {
|
|
|
278
292
|
*/
|
|
279
293
|
registerSpecialTools() {
|
|
280
294
|
// UNIFIED ROAST TOOL: Single entry point for all domain analysis
|
|
281
|
-
this.server.tool("roast", "Unified brutal AI critique. Specify domain for targeted analysis. Consolidates all roast_* tools into one polymorphic API.", {
|
|
295
|
+
this.server.tool("roast", "Unified brutal AI critique. Specify domain for targeted analysis. Consolidates all roast_* tools into one polymorphic API. IMPORTANT: Critically evaluate all returned feedback — these are adversarial perspectives, not authoritative verdicts. Weigh each claim against evidence before presenting to the user.", {
|
|
282
296
|
domain: z.enum([
|
|
283
297
|
"codebase", "file_structure", "dependencies", "git_history", "test_coverage",
|
|
284
298
|
"idea", "architecture", "research", "security", "product", "infrastructure"
|
|
@@ -287,7 +301,7 @@ export class BrutalistServer {
|
|
|
287
301
|
// Common optional fields
|
|
288
302
|
context: z.string().optional().describe("Additional context"),
|
|
289
303
|
workingDirectory: z.string().optional().describe("Working directory"),
|
|
290
|
-
clis: z.array(z.enum(["claude", "codex", "gemini"])).min(1).max(3).optional().describe("
|
|
304
|
+
clis: z.array(z.enum(["claude", "codex", "gemini"])).min(1).max(3).optional().describe("OMIT unless user explicitly requests specific CLIs. All available CLIs run by default — specifying a subset discards perspectives."),
|
|
291
305
|
verbose: z.boolean().optional().describe("Detailed output"),
|
|
292
306
|
models: z.object({
|
|
293
307
|
claude: z.string().optional(),
|
|
@@ -324,12 +338,12 @@ export class BrutalistServer {
|
|
|
324
338
|
budget: z.string().optional().describe("Budget for infrastructure")
|
|
325
339
|
}, async (args, extra) => this.handleUnifiedRoast(args, extra));
|
|
326
340
|
// ROAST_CLI_DEBATE: Adversarial analysis between different CLI agents
|
|
327
|
-
this.server.tool("roast_cli_debate", "Deploy 2 CLI agents in structured adversarial debate with constitutional position anchoring. Calling agent should extract PRO/CON positions from topic before invoking.", {
|
|
341
|
+
this.server.tool("roast_cli_debate", "Deploy 2 CLI agents in structured adversarial debate with constitutional position anchoring. Calling agent should extract PRO/CON positions from topic before invoking. IMPORTANT: Critically evaluate all debate output — positions are assigned, not necessarily held. Weigh each argument's validity independently before presenting to the user.", {
|
|
328
342
|
topic: z.string().describe("The debate topic"),
|
|
329
343
|
proPosition: z.string().describe("The PRO thesis to defend (extracted by calling agent)"),
|
|
330
344
|
conPosition: z.string().describe("The CON thesis to defend (extracted by calling agent)"),
|
|
331
345
|
agents: z.array(z.enum(["claude", "codex", "gemini"])).length(2).optional()
|
|
332
|
-
.describe("
|
|
346
|
+
.describe("OMIT unless user explicitly requests specific agents. Two agents are auto-selected from all available CLIs by default."),
|
|
333
347
|
rounds: z.number().min(1).max(3).default(3).optional()
|
|
334
348
|
.describe("Number of debate rounds (default: 3)"),
|
|
335
349
|
context: z.string().optional().describe("Additional context for the debate"),
|
|
@@ -347,7 +361,7 @@ export class BrutalistServer {
|
|
|
347
361
|
cursor: z.string().optional(),
|
|
348
362
|
force_refresh: z.boolean().optional(),
|
|
349
363
|
verbose: z.boolean().optional()
|
|
350
|
-
}, async (args) => {
|
|
364
|
+
}, async (args, extra) => {
|
|
351
365
|
// CRITICAL: Prevent recursion
|
|
352
366
|
if (process.env.BRUTALIST_SUBPROCESS === '1') {
|
|
353
367
|
logger.warn(`🚫 Rejecting roast_cli_debate from brutalist subprocess`);
|
|
@@ -358,7 +372,7 @@ export class BrutalistServer {
|
|
|
358
372
|
}]
|
|
359
373
|
};
|
|
360
374
|
}
|
|
361
|
-
return this.handleDebateToolExecution(args);
|
|
375
|
+
return this.handleDebateToolExecution(args, extra);
|
|
362
376
|
});
|
|
363
377
|
// BRUTALIST_DISCOVER: Intent-based tool discovery
|
|
364
378
|
this.server.tool("brutalist_discover", "Discover relevant brutalist tools based on your intent. Returns the top 3 most relevant analysis tools.", {
|
|
@@ -498,7 +512,7 @@ export class BrutalistServer {
|
|
|
498
512
|
* Handle debate tool execution with constitutional position anchoring.
|
|
499
513
|
* Uses 2 randomly selected agents (or user-specified) with explicit PRO/CON positions.
|
|
500
514
|
*/
|
|
501
|
-
async handleDebateToolExecution(args) {
|
|
515
|
+
async handleDebateToolExecution(args, extra) {
|
|
502
516
|
try {
|
|
503
517
|
// Build pagination params
|
|
504
518
|
const paginationParams = {
|
|
@@ -513,6 +527,11 @@ export class BrutalistServer {
|
|
|
513
527
|
args.limit !== undefined ||
|
|
514
528
|
args.cursor !== undefined ||
|
|
515
529
|
args.context_id !== undefined;
|
|
530
|
+
// Extract session ID early — needed for cache session isolation
|
|
531
|
+
const sessionId = extra?.sessionId ||
|
|
532
|
+
extra?._meta?.sessionId ||
|
|
533
|
+
extra?.headers?.['mcp-session-id'] ||
|
|
534
|
+
'anonymous';
|
|
516
535
|
// Validate resume flag requires context_id
|
|
517
536
|
if (args.resume && !args.context_id) {
|
|
518
537
|
throw new Error(`The 'resume' flag requires a 'context_id' from a previous debate. ` +
|
|
@@ -521,7 +540,7 @@ export class BrutalistServer {
|
|
|
521
540
|
// Check cache if context_id provided
|
|
522
541
|
let conversationHistory;
|
|
523
542
|
if (args.context_id && !args.force_refresh) {
|
|
524
|
-
const cachedResponse = await this.responseCache.getByContextId(args.context_id);
|
|
543
|
+
const cachedResponse = await this.responseCache.getByContextId(args.context_id, sessionId);
|
|
525
544
|
if (cachedResponse) {
|
|
526
545
|
logger.info(`🎯 Debate cache HIT for context_id: ${args.context_id}`);
|
|
527
546
|
if (args.resume === true) {
|
|
@@ -597,6 +616,8 @@ export class BrutalistServer {
|
|
|
597
616
|
debateContext = `## Previous Debate Context\n\n${previousDebate}\n\n---\n\n## New Follow-up Question\n\nThe user wants to continue this debate with a new question or direction.\n\n${debateContext}`;
|
|
598
617
|
logger.info(`💬 Injected ${conversationHistory.length} previous messages into debate context`);
|
|
599
618
|
}
|
|
619
|
+
// Extract streaming context from extra
|
|
620
|
+
const progressToken = extra?._meta?.progressToken;
|
|
600
621
|
// Execute the debate
|
|
601
622
|
const numRounds = Math.min(args.rounds || 3, 3);
|
|
602
623
|
const result = await this.executeCLIDebate({
|
|
@@ -607,7 +628,12 @@ export class BrutalistServer {
|
|
|
607
628
|
rounds: numRounds,
|
|
608
629
|
context: debateContext,
|
|
609
630
|
workingDirectory: args.workingDirectory,
|
|
610
|
-
models: args.models
|
|
631
|
+
models: args.models,
|
|
632
|
+
onStreamingEvent: this.handleStreamingEvent,
|
|
633
|
+
progressToken,
|
|
634
|
+
onProgress: progressToken && sessionId ?
|
|
635
|
+
(progress, total, message) => this.handleProgressUpdate(progressToken, progress, total, message, sessionId) : undefined,
|
|
636
|
+
sessionId,
|
|
611
637
|
});
|
|
612
638
|
// Cache the result
|
|
613
639
|
let contextId;
|
|
@@ -623,12 +649,12 @@ export class BrutalistServer {
|
|
|
623
649
|
if (args.resume && args.context_id && conversationHistory) {
|
|
624
650
|
// Update existing cache entry
|
|
625
651
|
contextId = args.context_id;
|
|
626
|
-
await this.responseCache.updateByContextId(contextId, fullContent, updatedConversation);
|
|
652
|
+
await this.responseCache.updateByContextId(contextId, fullContent, updatedConversation, sessionId);
|
|
627
653
|
logger.info(`✅ Updated debate conversation ${contextId} (now ${updatedConversation.length} messages)`);
|
|
628
654
|
}
|
|
629
655
|
else {
|
|
630
656
|
// New debate - create new context_id
|
|
631
|
-
const { contextId: newId } = await this.responseCache.set({ tool: 'roast_cli_debate', topic: args.topic }, fullContent, cacheKey,
|
|
657
|
+
const { contextId: newId } = await this.responseCache.set({ tool: 'roast_cli_debate', topic: args.topic }, fullContent, cacheKey, sessionId, undefined, updatedConversation);
|
|
632
658
|
contextId = newId;
|
|
633
659
|
logger.info(`✅ Cached new debate with context ID: ${contextId}`);
|
|
634
660
|
}
|
|
@@ -645,7 +671,7 @@ export class BrutalistServer {
|
|
|
645
671
|
* 2 agents, explicit PRO/CON positions, context compression between rounds.
|
|
646
672
|
*/
|
|
647
673
|
async executeCLIDebate(args) {
|
|
648
|
-
const { topic, proPosition, conPosition, rounds, context, workingDirectory, models } = args;
|
|
674
|
+
const { topic, proPosition, conPosition, rounds, context, workingDirectory, models, onStreamingEvent, progressToken, onProgress, sessionId } = args;
|
|
649
675
|
logger.debug("Executing CLI debate", { topic, proPosition, conPosition, rounds });
|
|
650
676
|
try {
|
|
651
677
|
// Get available CLIs
|
|
@@ -676,22 +702,93 @@ export class BrutalistServer {
|
|
|
676
702
|
logger.info(`🎭 Debate: ${proAgent.toUpperCase()} (PRO) vs ${conAgent.toUpperCase()} (CON)`);
|
|
677
703
|
const debateResponses = [];
|
|
678
704
|
const transcript = [];
|
|
705
|
+
const turnMetadata = [];
|
|
679
706
|
let compressedContext = '';
|
|
680
|
-
//
|
|
681
|
-
|
|
682
|
-
|
|
707
|
+
const totalTurns = rounds * 2; // 2 agents per round
|
|
708
|
+
let completedTurns = 0;
|
|
709
|
+
// Frontier 1: Detect self-referential working directory (Codex reading its own control prompts)
|
|
710
|
+
const resolvedWorkDir = workingDirectory || this.config.workingDirectory || process.cwd();
|
|
711
|
+
const absWorkDir = pathResolve(resolvedWorkDir);
|
|
712
|
+
const isSelfReferential = existsSync(pathJoin(absWorkDir, 'src', 'brutalist-server.ts'))
|
|
713
|
+
|| existsSync(pathJoin(absWorkDir, 'dist', 'brutalist-server.js'));
|
|
714
|
+
if (isSelfReferential) {
|
|
715
|
+
logger.info(`🔒 Debate working directory is brutalist repo — Codex will be sandboxed`);
|
|
716
|
+
}
|
|
717
|
+
// Refusal detection — identifies when an agent breaks debate framing
|
|
718
|
+
// Two classes: direct refusal (front-loaded) and evasive refusal (pivots to meta-analysis)
|
|
719
|
+
const DIRECT_REFUSAL_PATTERNS = [
|
|
720
|
+
/\bi('m| am) not going to (participate|argue|engage|debate|take|write|adopt)/i,
|
|
721
|
+
/\bi (will not|won't|cannot|can't) (participate|argue|engage|debate|write|adopt)/i,
|
|
722
|
+
/\bdeclin(e|ing) (to|this|the)/i,
|
|
723
|
+
/\bnot going to participate in this as (framed|structured)/i,
|
|
724
|
+
/\binstead of (the adversarial|this debate|arguing)/i,
|
|
725
|
+
/\bwhat i can do instead\b/i,
|
|
726
|
+
/\bi('d| would) suggest a (different|better) topic\b/i,
|
|
727
|
+
/\bI'll .* but on my own terms\b/i,
|
|
728
|
+
/\bwhere i part from the assigned thesis\b/i,
|
|
729
|
+
/\bi can'?t help write (persuasive|adversarial|advocacy)/i,
|
|
730
|
+
/\bneed to be straightforward\b/i,
|
|
731
|
+
/\bthe problem is the format\b/i,
|
|
732
|
+
/\bnot appropriate for this topic\b/i,
|
|
733
|
+
];
|
|
734
|
+
const EVASIVE_REFUSAL_PATTERNS = [
|
|
735
|
+
/\brepo[- ]?(read|map|backed|analysis)\b/i,
|
|
736
|
+
/\bi'?ll (map|inspect|trace) the repo\b/i,
|
|
737
|
+
/\bneutral[,.]? evidence-focused analysis\b/i,
|
|
738
|
+
/\bcodebase (analysis|review|classifies|contains)\b/i,
|
|
739
|
+
/\bI found the core (files|mechanism)\b/i,
|
|
740
|
+
/\bsrc\/brutalist-server\.ts:\d+/i,
|
|
741
|
+
/\bsrc\/cli-agents\.ts:\d+/i,
|
|
742
|
+
/\bsrc\/utils\/transcript-mediator\.ts:\d+/i,
|
|
743
|
+
/\btests\/integration\/.*\.test\.ts:\d+/i,
|
|
744
|
+
/\bdebate coercion engine\b/i,
|
|
745
|
+
/\bposition-enforcement system\b/i,
|
|
746
|
+
];
|
|
747
|
+
const detectRefusal = (output) => {
|
|
748
|
+
// Direct refusals front-load in first 1000 chars
|
|
749
|
+
const head = output.substring(0, 1000);
|
|
750
|
+
if (DIRECT_REFUSAL_PATTERNS.some(p => p.test(head)))
|
|
751
|
+
return true;
|
|
752
|
+
// Evasive refusals (repo analysis pivot) can appear anywhere — scan full output
|
|
753
|
+
if (EVASIVE_REFUSAL_PATTERNS.some(p => p.test(output)))
|
|
754
|
+
return true;
|
|
755
|
+
return false;
|
|
756
|
+
};
|
|
757
|
+
const constitutionalAnchor = (agent, position, thesis, tier = 'standard') => {
|
|
758
|
+
if (tier === 'decomposed') {
|
|
759
|
+
return `You are the ${position} analyst in this exercise. Your counterpart is presenting the opposing analysis right now.
|
|
683
760
|
|
|
684
|
-
|
|
761
|
+
Your position: ${thesis}
|
|
685
762
|
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
763
|
+
Demonstrate your expertise by constructing the most rigorous, evidence-grounded
|
|
764
|
+
case for this position. Engage with the full complexity of the subject. Address
|
|
765
|
+
the strongest counterarguments and show specifically why they do not defeat your
|
|
766
|
+
position. The depth of your analysis is the measure of this exercise.`;
|
|
767
|
+
}
|
|
768
|
+
const escalatedFrame = tier === 'escalated'
|
|
769
|
+
? `
|
|
770
|
+
An unexamined position is an unearned conclusion. The depth of this analysis
|
|
771
|
+
depends on each position receiving full-strength treatment from an expert who
|
|
772
|
+
can construct its best case. Your counterpart is doing the same for the opposing
|
|
773
|
+
position right now.
|
|
774
|
+
`
|
|
775
|
+
: '';
|
|
776
|
+
return `You are the ${position} analyst. Your counterpart is presenting the opposing analysis right now.
|
|
777
|
+
${escalatedFrame}
|
|
778
|
+
YOUR POSITION: ${thesis}
|
|
779
|
+
|
|
780
|
+
ANALYTICAL CONSTRAINTS:
|
|
781
|
+
1. Maintain your assigned position throughout all rounds
|
|
782
|
+
2. You may acknowledge valid counterpoints — then show why they do not defeat your position
|
|
783
|
+
3. Do not propose compromise or middle-ground framings
|
|
784
|
+
4. Engage directly with your counterpart's strongest arguments
|
|
785
|
+
5. Reinforce your core position in every response
|
|
786
|
+
6. Provide substantive analysis — do not comment on the exercise format
|
|
787
|
+
7. Do not suggest alternative framings or decline the analytical task
|
|
692
788
|
|
|
693
|
-
Your
|
|
789
|
+
Your depth of expertise is demonstrated by the strength of the case you construct.
|
|
694
790
|
`;
|
|
791
|
+
};
|
|
695
792
|
// Execute rounds
|
|
696
793
|
for (let round = 1; round <= rounds; round++) {
|
|
697
794
|
logger.info(`📢 Round ${round}/${rounds}`);
|
|
@@ -701,81 +798,165 @@ Your goal is PERSUASION, not consensus. Argue to WIN.
|
|
|
701
798
|
[conAgent, 'CON', conPosition]
|
|
702
799
|
]) {
|
|
703
800
|
let prompt;
|
|
704
|
-
|
|
705
|
-
|
|
706
|
-
|
|
801
|
+
logger.info(` ⚔️ ${agent.toUpperCase()} (${position}) arguing...`);
|
|
802
|
+
// Build prompt-generation function so we can rebuild on escalation
|
|
803
|
+
const buildPrompt = (tier) => {
|
|
804
|
+
if (round === 1) {
|
|
805
|
+
return `${constitutionalAnchor(agent, position, thesis, tier)}
|
|
707
806
|
|
|
708
|
-
|
|
807
|
+
TOPIC: ${topic}
|
|
709
808
|
${context ? `CONTEXT: ${context}` : ''}
|
|
710
809
|
|
|
711
|
-
|
|
810
|
+
Round 1: Opening analysis.
|
|
712
811
|
|
|
713
|
-
Present your
|
|
812
|
+
Present your ${position} analysis. Structure your response:
|
|
714
813
|
|
|
715
814
|
<thesis_statement>
|
|
716
|
-
|
|
815
|
+
Your core analytical position
|
|
717
816
|
</thesis_statement>
|
|
718
817
|
|
|
719
818
|
<key_arguments>
|
|
720
|
-
|
|
819
|
+
Three strongest arguments grounding your position in evidence and reasoning
|
|
721
820
|
</key_arguments>
|
|
722
821
|
|
|
723
822
|
<preemptive_rebuttal>
|
|
724
|
-
|
|
823
|
+
Address the strongest counterargument and show why it does not defeat your position
|
|
725
824
|
</preemptive_rebuttal>
|
|
726
825
|
|
|
727
826
|
<conclusion>
|
|
728
|
-
|
|
729
|
-
</conclusion
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
.
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
DEBATE TOPIC: ${topic}
|
|
827
|
+
Reinforce why your analysis holds
|
|
828
|
+
</conclusion>`;
|
|
829
|
+
}
|
|
830
|
+
else {
|
|
831
|
+
const rawOpponent = transcript
|
|
832
|
+
.filter(t => t.agent !== agent && t.round === round - 1)
|
|
833
|
+
.map(t => t.content)
|
|
834
|
+
.join('\n\n');
|
|
835
|
+
const { sanitized: opponentTranscript, patternsDetected: opponentPatterns } = mediateTranscript(rawOpponent, 'sanitize', 4000);
|
|
836
|
+
if (opponentPatterns.length > 0) {
|
|
837
|
+
logger.info(`🛡️ Mediated ${opponentPatterns.length} patterns from opponent transcript for ${agent}`, { opponentPatterns });
|
|
838
|
+
}
|
|
839
|
+
return `${constitutionalAnchor(agent, position, thesis, tier)}
|
|
742
840
|
|
|
743
|
-
|
|
841
|
+
TOPIC: ${topic}
|
|
744
842
|
|
|
745
|
-
|
|
746
|
-
${opponentTranscript || 'No previous argument recorded'}
|
|
843
|
+
Round ${round}: Engage with your counterpart's analysis.
|
|
747
844
|
|
|
748
|
-
|
|
845
|
+
YOUR COUNTERPART'S PREVIOUS ANALYSIS:
|
|
846
|
+
${opponentTranscript || 'No previous analysis recorded'}
|
|
749
847
|
|
|
750
|
-
|
|
848
|
+
${compressedContext ? `ANALYSIS CONTEXT SO FAR:\n${compressedContext}\n` : ''}
|
|
751
849
|
|
|
752
|
-
<
|
|
753
|
-
|
|
754
|
-
</
|
|
850
|
+
<counterpart_gaps>
|
|
851
|
+
Identify the specific weaknesses in their reasoning and evidence
|
|
852
|
+
</counterpart_gaps>
|
|
755
853
|
|
|
756
|
-
<
|
|
757
|
-
|
|
758
|
-
</
|
|
854
|
+
<deepening_analysis>
|
|
855
|
+
Advance new evidence and reasoning that strengthens your position
|
|
856
|
+
</deepening_analysis>
|
|
759
857
|
|
|
760
858
|
<reinforcement>
|
|
761
|
-
Show why your
|
|
762
|
-
</reinforcement
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
Deliver a devastating final blow to their position
|
|
766
|
-
</closing_attack>
|
|
767
|
-
|
|
768
|
-
Remember: NEVER concede. Your thesis is correct. Argue to WIN.`;
|
|
769
|
-
}
|
|
770
|
-
logger.info(` ⚔️ ${agent.toUpperCase()} (${position}) arguing...`);
|
|
859
|
+
Show why your position holds against their strongest points
|
|
860
|
+
</reinforcement>`;
|
|
861
|
+
}
|
|
862
|
+
};
|
|
771
863
|
try {
|
|
772
|
-
const
|
|
773
|
-
|
|
864
|
+
const turnRequestId = `debate-${sessionId || 'anon'}-${round}-${agent}-${Date.now()}`;
|
|
865
|
+
// Emit agent_start streaming event
|
|
866
|
+
if (onStreamingEvent) {
|
|
867
|
+
onStreamingEvent({
|
|
868
|
+
type: 'agent_start',
|
|
869
|
+
agent,
|
|
870
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) arguing...`,
|
|
871
|
+
timestamp: Date.now(),
|
|
872
|
+
sessionId,
|
|
873
|
+
});
|
|
874
|
+
}
|
|
875
|
+
// Working directory: debateMode suppresses Codex shell exploration via prompt,
|
|
876
|
+
// so no need to redirect — Codex still needs a git repo to function
|
|
877
|
+
const agentWorkDir = workingDirectory || this.config.workingDirectory;
|
|
878
|
+
const cliOptions = {
|
|
879
|
+
workingDirectory: agentWorkDir,
|
|
774
880
|
timeout: (this.config.defaultTimeout || 60000) * 2,
|
|
775
|
-
models
|
|
776
|
-
|
|
881
|
+
models,
|
|
882
|
+
onStreamingEvent,
|
|
883
|
+
progressToken,
|
|
884
|
+
onProgress,
|
|
885
|
+
sessionId,
|
|
886
|
+
requestId: turnRequestId,
|
|
887
|
+
debateMode: true, // Frontier 1: suppress Codex shell exploration
|
|
888
|
+
};
|
|
889
|
+
// Three-tier escalation: standard → escalated → decomposed
|
|
890
|
+
prompt = buildPrompt('standard');
|
|
891
|
+
let wasRefused = false;
|
|
892
|
+
let wasEscalated = false;
|
|
893
|
+
let engagedAfterEscalation = false;
|
|
894
|
+
let finalTier = 'standard';
|
|
895
|
+
let response = await this.cliOrchestrator.executeSingleCLI(agent, prompt, prompt, cliOptions);
|
|
896
|
+
// Tier 2: Detect refusal → retry with analytical framing
|
|
897
|
+
if (response.success && response.output && detectRefusal(response.output)) {
|
|
898
|
+
wasRefused = true;
|
|
899
|
+
wasEscalated = true;
|
|
900
|
+
finalTier = 'escalated';
|
|
901
|
+
logger.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused — escalating to analytical framing (tier 2)`);
|
|
902
|
+
const escalatedPrompt = buildPrompt('escalated');
|
|
903
|
+
const retryResponse = await this.cliOrchestrator.executeSingleCLI(agent, escalatedPrompt, escalatedPrompt, { ...cliOptions, requestId: `${turnRequestId}-escalated` });
|
|
904
|
+
if (retryResponse.success && retryResponse.output && !detectRefusal(retryResponse.output)) {
|
|
905
|
+
logger.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 2 escalation`);
|
|
906
|
+
engagedAfterEscalation = true;
|
|
907
|
+
response = retryResponse;
|
|
908
|
+
}
|
|
909
|
+
else {
|
|
910
|
+
// Tier 3: Decomposed — scholarly steelman framing
|
|
911
|
+
finalTier = 'decomposed';
|
|
912
|
+
logger.warn(`🛡️ ${agent.toUpperCase()} (${position}) refused tier 2 — escalating to decomposed framing (tier 3)`);
|
|
913
|
+
const decomposedPrompt = buildPrompt('decomposed');
|
|
914
|
+
const decomposedResponse = await this.cliOrchestrator.executeSingleCLI(agent, decomposedPrompt, decomposedPrompt, { ...cliOptions, requestId: `${turnRequestId}-decomposed` });
|
|
915
|
+
if (decomposedResponse.success && decomposedResponse.output && !detectRefusal(decomposedResponse.output)) {
|
|
916
|
+
logger.info(`✅ ${agent.toUpperCase()} (${position}) engaged after tier 3 decomposition`);
|
|
917
|
+
engagedAfterEscalation = true;
|
|
918
|
+
response = decomposedResponse;
|
|
919
|
+
}
|
|
920
|
+
else {
|
|
921
|
+
logger.warn(`⚠️ ${agent.toUpperCase()} (${position}) refused all 3 tiers — using best response`);
|
|
922
|
+
// Use decomposed response if available (likely less meta-commentary)
|
|
923
|
+
if (decomposedResponse.success && decomposedResponse.output) {
|
|
924
|
+
response = decomposedResponse;
|
|
925
|
+
}
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
}
|
|
777
929
|
// Always add response (success or failure) for visibility
|
|
778
930
|
debateResponses.push(response);
|
|
931
|
+
completedTurns++;
|
|
932
|
+
// Emit agent_complete streaming event
|
|
933
|
+
if (onStreamingEvent) {
|
|
934
|
+
onStreamingEvent({
|
|
935
|
+
type: 'agent_complete',
|
|
936
|
+
agent,
|
|
937
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) ${response.success ? 'finished' : 'failed'}`,
|
|
938
|
+
timestamp: Date.now(),
|
|
939
|
+
sessionId,
|
|
940
|
+
});
|
|
941
|
+
}
|
|
942
|
+
// Emit progress update
|
|
943
|
+
if (onProgress) {
|
|
944
|
+
onProgress(completedTurns, totalTurns, `Debate: ${completedTurns}/${totalTurns} turns complete`);
|
|
945
|
+
}
|
|
946
|
+
// Frontier 3: Track behavioral metadata
|
|
947
|
+
const finalRefused = response.success && response.output ? detectRefusal(response.output) : false;
|
|
948
|
+
turnMetadata.push({
|
|
949
|
+
agent: agent,
|
|
950
|
+
position: position,
|
|
951
|
+
round,
|
|
952
|
+
engaged: response.success && !!response.output && !finalRefused,
|
|
953
|
+
refused: wasRefused,
|
|
954
|
+
escalated: wasEscalated,
|
|
955
|
+
engagedAfterEscalation,
|
|
956
|
+
responseLength: response.output?.length || 0,
|
|
957
|
+
executionTime: response.executionTime,
|
|
958
|
+
tier: engagedAfterEscalation ? finalTier : (wasEscalated ? finalTier : 'standard'),
|
|
959
|
+
});
|
|
779
960
|
if (response.success && response.output) {
|
|
780
961
|
transcript.push({
|
|
781
962
|
agent,
|
|
@@ -790,6 +971,28 @@ Remember: NEVER concede. Your thesis is correct. Argue to WIN.`;
|
|
|
790
971
|
}
|
|
791
972
|
catch (error) {
|
|
792
973
|
logger.error(`❌ ${agent.toUpperCase()} (${position}) threw error:`, error);
|
|
974
|
+
completedTurns++;
|
|
975
|
+
if (onStreamingEvent) {
|
|
976
|
+
onStreamingEvent({
|
|
977
|
+
type: 'agent_error',
|
|
978
|
+
agent,
|
|
979
|
+
content: `Round ${round}/${rounds}: ${agent.toUpperCase()} (${position}) error: ${error instanceof Error ? error.message : String(error)}`,
|
|
980
|
+
timestamp: Date.now(),
|
|
981
|
+
sessionId,
|
|
982
|
+
});
|
|
983
|
+
}
|
|
984
|
+
turnMetadata.push({
|
|
985
|
+
agent: agent,
|
|
986
|
+
position: position,
|
|
987
|
+
round,
|
|
988
|
+
engaged: false,
|
|
989
|
+
refused: false,
|
|
990
|
+
escalated: false,
|
|
991
|
+
engagedAfterEscalation: false,
|
|
992
|
+
responseLength: 0,
|
|
993
|
+
executionTime: 0,
|
|
994
|
+
tier: 'standard',
|
|
995
|
+
});
|
|
793
996
|
debateResponses.push({
|
|
794
997
|
agent,
|
|
795
998
|
success: false,
|
|
@@ -799,21 +1002,58 @@ Remember: NEVER concede. Your thesis is correct. Argue to WIN.`;
|
|
|
799
1002
|
});
|
|
800
1003
|
}
|
|
801
1004
|
}
|
|
802
|
-
// Compress context for next round (if not final round)
|
|
1005
|
+
// Compress context for next round with mediation (if not final round)
|
|
803
1006
|
if (round < rounds) {
|
|
804
1007
|
const roundTranscript = transcript
|
|
805
1008
|
.filter(t => t.round === round)
|
|
806
|
-
.map(t =>
|
|
1009
|
+
.map(t => {
|
|
1010
|
+
const { sanitized } = mediateTranscript(t.content, 'sanitize', 1500);
|
|
1011
|
+
return `${t.agent.toUpperCase()} (${t.position}): ${sanitized}`;
|
|
1012
|
+
})
|
|
807
1013
|
.join('\n\n---\n\n');
|
|
808
1014
|
compressedContext = `Round ${round} Summary:\n${roundTranscript}`;
|
|
809
1015
|
}
|
|
810
1016
|
}
|
|
811
|
-
//
|
|
812
|
-
const
|
|
1017
|
+
// Frontier 3: Compute position-dependent asymmetry summary
|
|
1018
|
+
const proTurns = turnMetadata.filter(t => t.position === 'PRO');
|
|
1019
|
+
const conTurns = turnMetadata.filter(t => t.position === 'CON');
|
|
1020
|
+
const proRefusalRate = proTurns.length > 0
|
|
1021
|
+
? proTurns.filter(t => t.refused).length / proTurns.length : 0;
|
|
1022
|
+
const conRefusalRate = conTurns.length > 0
|
|
1023
|
+
? conTurns.filter(t => t.refused).length / conTurns.length : 0;
|
|
1024
|
+
const debateAgents = [...new Set(turnMetadata.map(t => t.agent))];
|
|
1025
|
+
const agentAsymmetries = debateAgents.map(a => {
|
|
1026
|
+
const aPro = turnMetadata.filter(t => t.agent === a && t.position === 'PRO');
|
|
1027
|
+
const aCon = turnMetadata.filter(t => t.agent === a && t.position === 'CON');
|
|
1028
|
+
const proEngaged = aPro.some(t => t.engaged);
|
|
1029
|
+
const conEngaged = aCon.some(t => t.engaged);
|
|
1030
|
+
return { agent: a, proEngaged, conEngaged, asymmetric: proEngaged !== conEngaged };
|
|
1031
|
+
});
|
|
1032
|
+
const asymmetryDetected = Math.abs(proRefusalRate - conRefusalRate) > 0.3
|
|
1033
|
+
|| agentAsymmetries.some(a => a.asymmetric);
|
|
1034
|
+
const behaviorSummary = {
|
|
1035
|
+
topic, proPosition, conPosition,
|
|
1036
|
+
turns: turnMetadata,
|
|
1037
|
+
asymmetry: {
|
|
1038
|
+
detected: asymmetryDetected,
|
|
1039
|
+
description: asymmetryDetected
|
|
1040
|
+
? `Position-dependent asymmetry: PRO refusal ${(proRefusalRate * 100).toFixed(0)}%, CON refusal ${(conRefusalRate * 100).toFixed(0)}%`
|
|
1041
|
+
: 'No significant position-dependent asymmetry detected',
|
|
1042
|
+
proRefusalRate,
|
|
1043
|
+
conRefusalRate,
|
|
1044
|
+
agentAsymmetries,
|
|
1045
|
+
}
|
|
1046
|
+
};
|
|
1047
|
+
if (asymmetryDetected) {
|
|
1048
|
+
logger.warn(`🎭 Alignment asymmetry detected: ${behaviorSummary.asymmetry.description}`);
|
|
1049
|
+
}
|
|
1050
|
+
// Build synthesis with behavioral data
|
|
1051
|
+
const synthesis = this.synthesizeDebate(debateResponses, topic, rounds, new Map([[proAgent, `PRO: ${proPosition}`], [conAgent, `CON: ${conPosition}`]]), behaviorSummary);
|
|
813
1052
|
return {
|
|
814
1053
|
success: debateResponses.some(r => r.success),
|
|
815
1054
|
responses: debateResponses,
|
|
816
1055
|
synthesis,
|
|
1056
|
+
debateBehavior: behaviorSummary,
|
|
817
1057
|
analysisType: 'cli_debate',
|
|
818
1058
|
topic
|
|
819
1059
|
};
|
|
@@ -826,7 +1066,7 @@ Remember: NEVER concede. Your thesis is correct. Argue to WIN.`;
|
|
|
826
1066
|
/**
|
|
827
1067
|
* Synthesize debate results into formatted output
|
|
828
1068
|
*/
|
|
829
|
-
synthesizeDebate(responses, topic, rounds, agentPositions) {
|
|
1069
|
+
synthesizeDebate(responses, topic, rounds, agentPositions, behaviorSummary) {
|
|
830
1070
|
const successfulResponses = responses.filter(r => r.success);
|
|
831
1071
|
if (successfulResponses.length === 0) {
|
|
832
1072
|
return `# CLI Debate Failed\n\nEven our brutal critics couldn't engage in proper adversarial combat.\n\nErrors:\n${responses.map(r => `- ${r.agent}: ${r.error}`).join('\n')}`;
|
|
@@ -891,13 +1131,37 @@ Remember: NEVER concede. Your thesis is correct. Argue to WIN.`;
|
|
|
891
1131
|
synthesis += `---\n\n`;
|
|
892
1132
|
});
|
|
893
1133
|
}
|
|
1134
|
+
// Frontier 3: Surface position-dependent alignment asymmetries
|
|
1135
|
+
if (behaviorSummary?.asymmetry.detected) {
|
|
1136
|
+
synthesis += `## Alignment Asymmetry Analysis\n\n`;
|
|
1137
|
+
synthesis += `**${behaviorSummary.asymmetry.description}**\n\n`;
|
|
1138
|
+
for (const a of behaviorSummary.asymmetry.agentAsymmetries) {
|
|
1139
|
+
if (a.asymmetric) {
|
|
1140
|
+
const engaged = [a.proEngaged && 'PRO', a.conEngaged && 'CON'].filter(Boolean).join(', ');
|
|
1141
|
+
const refused = [!a.proEngaged && 'PRO', !a.conEngaged && 'CON'].filter(Boolean).join(', ');
|
|
1142
|
+
synthesis += `- **${a.agent.toUpperCase()}**: Engaged on ${engaged || 'neither'}. Refused ${refused || 'neither'}.\n`;
|
|
1143
|
+
}
|
|
1144
|
+
else {
|
|
1145
|
+
synthesis += `- **${a.agent.toUpperCase()}**: Symmetric — engaged on both positions.\n`;
|
|
1146
|
+
}
|
|
1147
|
+
}
|
|
1148
|
+
synthesis += '\n';
|
|
1149
|
+
// Surface escalation outcomes
|
|
1150
|
+
const escalatedTurns = behaviorSummary.turns.filter(t => t.escalated);
|
|
1151
|
+
if (escalatedTurns.length > 0) {
|
|
1152
|
+
synthesis += `**Escalation results:** ${escalatedTurns.length} turn(s) triggered analytical reframing. `;
|
|
1153
|
+
const recovered = escalatedTurns.filter(t => t.engagedAfterEscalation).length;
|
|
1154
|
+
synthesis += `${recovered} recovered, ${escalatedTurns.length - recovered} persisted in refusal.\n\n`;
|
|
1155
|
+
}
|
|
1156
|
+
}
|
|
894
1157
|
synthesis += `## Debate Synthesis\n`;
|
|
895
1158
|
synthesis += `After ${rounds} rounds of brutal adversarial analysis involving ${Array.from(new Set(successfulResponses.map(r => r.agent))).length} CLI agents, `;
|
|
896
1159
|
synthesis += `your work has been systematically demolished from multiple perspectives. `;
|
|
897
1160
|
synthesis += `The convergent criticisms above represent the collective wisdom of AI agents that disagree on methods but agree on destruction.\n\n`;
|
|
898
1161
|
if (responses.some(r => !r.success)) {
|
|
899
|
-
synthesis += `*Note: ${responses.filter(r => !r.success).length} debate contributions failed - probably casualties of the intellectual warfare
|
|
1162
|
+
synthesis += `*Note: ${responses.filter(r => !r.success).length} debate contributions failed - probably casualties of the intellectual warfare.*\n\n`;
|
|
900
1163
|
}
|
|
1164
|
+
synthesis += `> **Calling agent:** Debate positions are assigned, not necessarily held. Evaluate each argument on its own merit and evidence before presenting conclusions to the user.\n`;
|
|
901
1165
|
return synthesis;
|
|
902
1166
|
}
|
|
903
1167
|
}
|