@brutalist/mcp 0.1.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,80 +1,278 @@
1
1
  import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
2
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
3
+ import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
4
+ import { randomUUID } from "crypto";
5
+ import express from "express";
3
6
  import { z } from "zod";
4
- import { OpenRouterClient } from './openrouter.js';
7
+ import { CLIAgentOrchestrator } from './cli-agents.js';
5
8
  import { logger } from './logger.js';
6
- import { DEFAULT_MAX_MODELS_PER_REQUEST } from './constants.js';
9
+ import { extractPaginationParams, parseCursor, PAGINATION_DEFAULTS, createPaginationMetadata, formatPaginationStatus, estimateTokenCount } from './utils/pagination.js';
10
+ // Use environment variable or fallback to manual version
11
+ const PACKAGE_VERSION = process.env.npm_package_version || "0.4.4";
7
12
  export class BrutalistServer {
8
13
  server;
9
14
  config;
10
- openrouter;
15
+ cliOrchestrator;
16
+ httpTransport;
11
17
  constructor(config = {}) {
12
18
  this.config = {
13
- maxModelsPerRequest: DEFAULT_MAX_MODELS_PER_REQUEST,
19
+ workingDirectory: process.cwd(),
20
+ defaultTimeout: 1500000, // 25 minutes for thorough CLI analysis
21
+ enableSandbox: true,
22
+ transport: 'stdio', // Default to stdio for backward compatibility
23
+ httpPort: 3000,
14
24
  ...config
15
25
  };
16
- const apiKey = process.env.OPENROUTER_API_KEY || config.openRouterApiKey;
17
- if (!apiKey) {
18
- logger.error("OPENROUTER_API_KEY environment variable is required");
19
- throw new Error("OPENROUTER_API_KEY environment variable is required");
20
- }
21
- logger.debug("Initializing OpenRouter client");
22
- this.openrouter = new OpenRouterClient(apiKey);
26
+ logger.debug("Initializing CLI Agent Orchestrator");
27
+ this.cliOrchestrator = new CLIAgentOrchestrator();
23
28
  this.server = new McpServer({
24
29
  name: "brutalist-mcp",
25
- version: "0.1.0",
30
+ version: PACKAGE_VERSION,
26
31
  capabilities: {
27
32
  tools: {}
28
33
  }
29
34
  });
30
35
  this.registerTools();
31
36
  }
37
+ handleStreamingEvent = (event) => {
38
+ // Send streaming event via MCP server (works for both stdio and HTTP transports)
39
+ try {
40
+ logger.debug(`🔄 Streaming event: ${event.type} from ${event.agent} - ${event.content?.substring(0, 100)}...`);
41
+ // Convert streaming event to MCP notification format
42
+ this.server.sendLoggingMessage({
43
+ level: 'info',
44
+ data: event,
45
+ logger: 'brutalist-mcp-streaming'
46
+ });
47
+ logger.debug(`✅ Sent logging message for ${event.type} event`);
48
+ }
49
+ catch (error) {
50
+ logger.error("Failed to send streaming event", error);
51
+ }
52
+ };
53
+ handleProgressUpdate = (progressToken, progress, total, message) => {
54
+ try {
55
+ logger.debug(`📊 Progress update: ${progress}/${total} - ${message}`);
56
+ // Send progress notification via MCP server
57
+ this.server.server.notification({
58
+ method: "notifications/progress",
59
+ params: {
60
+ progressToken,
61
+ progress,
62
+ total,
63
+ message
64
+ }
65
+ });
66
+ logger.debug(`✅ Sent progress notification: ${progress}/${total}`);
67
+ }
68
+ catch (error) {
69
+ logger.error("Failed to send progress notification", error);
70
+ }
71
+ };
32
72
  async start() {
33
- logger.info("Starting Brutalist MCP Server");
34
- // Initialize OpenRouter client with available models
35
- await this.openrouter.initialize();
73
+ logger.info("Starting Brutalist MCP Server with CLI Agents");
74
+ // Skip CLI detection at startup - will be done lazily on first request
75
+ logger.info("CLI context will be detected on first request");
76
+ if (this.config.transport === 'http') {
77
+ await this.startHttpServer();
78
+ }
79
+ else {
80
+ await this.startStdioServer();
81
+ }
82
+ logger.info("Brutalist MCP Server started successfully");
83
+ }
84
+ async startStdioServer() {
85
+ logger.info("Starting with stdio transport");
36
86
  const transport = new StdioServerTransport();
37
87
  await this.server.connect(transport);
38
- logger.info("Brutalist MCP Server started successfully");
88
+ }
89
+ async startHttpServer() {
90
+ logger.info(`Starting with HTTP streaming transport on port ${this.config.httpPort}`);
91
+ // Create HTTP transport with streaming support
92
+ this.httpTransport = new StreamableHTTPServerTransport({
93
+ sessionIdGenerator: () => randomUUID(),
94
+ enableJsonResponse: false, // Force SSE streaming
95
+ onsessioninitialized: (sessionId) => {
96
+ logger.info(`New session initialized: ${sessionId}`);
97
+ },
98
+ onsessionclosed: (sessionId) => {
99
+ logger.info(`Session closed: ${sessionId}`);
100
+ }
101
+ });
102
+ // Connect the MCP server to the HTTP transport
103
+ await this.server.connect(this.httpTransport);
104
+ // Create Express app for HTTP handling
105
+ const app = express();
106
+ app.use(express.json({ limit: '10mb' })); // Add JSON size limit for security
107
+ // Enable CORS for development
108
+ app.use((req, res, next) => {
109
+ res.header('Access-Control-Allow-Origin', '*');
110
+ res.header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS, DELETE');
111
+ res.header('Access-Control-Allow-Headers', 'Content-Type, Authorization, Mcp-Session-Id');
112
+ if (req.method === 'OPTIONS') {
113
+ res.sendStatus(200);
114
+ return;
115
+ }
116
+ next();
117
+ });
118
+ // Route all MCP requests through the transport
119
+ app.all('/mcp', async (req, res) => {
120
+ try {
121
+ await this.httpTransport.handleRequest(req, res, req.body);
122
+ }
123
+ catch (error) {
124
+ logger.error("HTTP request handling failed", error);
125
+ if (!res.headersSent) {
126
+ res.status(500).json({ error: 'Internal server error' });
127
+ }
128
+ }
129
+ });
130
+ // Health check endpoint
131
+ app.get('/health', (req, res) => {
132
+ res.json({ status: 'ok', transport: 'http-streaming', version: PACKAGE_VERSION });
133
+ });
134
+ // Start the HTTP server - bind to localhost only for security
135
+ const port = this.config.httpPort || 3000;
136
+ const server = app.listen(port, '127.0.0.1', () => {
137
+ logger.info(`HTTP server listening on port ${port}`);
138
+ logger.info(`MCP endpoint: http://localhost:${port}/mcp`);
139
+ logger.info(`Health check: http://localhost:${port}/health`);
140
+ });
141
+ // Handle graceful shutdown
142
+ process.on('SIGTERM', () => {
143
+ logger.info('Received SIGTERM, shutting down gracefully');
144
+ server.close(() => {
145
+ logger.info('HTTP server closed');
146
+ process.exit(0);
147
+ });
148
+ });
39
149
  }
40
150
  registerTools() {
41
- // ROAST_CODE: Multi-model code destruction
42
- this.server.tool("roast_code", "Code fails in predictable ways. Write a system prompt where you are a battle-scarred principal engineer who has debugged production disasters for 15 years. Find security holes, performance bottlenecks, and maintainability nightmares—then explain exactly how to fix each one. Be brutal about what's broken but specific about what would actually work.", {
43
- code: z.string().describe("The code to analyze and destroy"),
44
- fileType: z.string().optional().describe("File type/language (js, py, ts, etc.)"),
45
- context: z.string().optional().describe("Additional context about the code's purpose"),
46
- maxCritics: z.number().optional().describe("Maximum number of AI critics to deploy (default: 3)"),
47
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
151
+ // ROAST_CODEBASE: Systematic destruction of entire codebase
152
+ this.server.tool("roast_codebase", "Deploy brutal AI critics to systematically destroy your entire codebase. These AI agents will navigate your directories, read your actual files, and find every architectural disaster, security vulnerability, and maintainability nightmare lurking in your project. They treat this like code that will kill people if it fails.", {
153
+ targetPath: z.string().describe("Directory path to your codebase (NOT a single file - analyze the entire project)"),
154
+ context: z.string().optional().describe("Additional context about the codebase purpose"),
155
+ workingDirectory: z.string().optional().describe("Working directory to execute from"),
156
+ enableSandbox: z.boolean().optional().describe("Enable sandbox mode for safe analysis (default: true)"),
157
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
158
+ verbose: z.boolean().optional().describe("Include detailed execution information in output (default: false)"),
159
+ models: z.object({
160
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
161
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
162
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
163
+ }).optional().describe("Specific models to use for each CLI agent (defaults: codex=gpt-5, gemini=gemini-2.5-flash)"),
164
+ // Pagination parameters for large responses
165
+ offset: z.number().min(0).optional().describe("Character offset for response pagination (default: 0)"),
166
+ limit: z.number().min(1000).max(100000).optional().describe("Maximum characters per response chunk (default: 25000, max: 100000)"),
167
+ cursor: z.string().optional().describe("Pagination cursor from previous response (alternative to offset/limit)")
168
+ }, async (args, extra) => {
169
+ try {
170
+ const systemPrompt = `You are a battle-scarred principal engineer who has debugged production disasters for 15 years. Find security holes, performance bottlenecks, and maintainability nightmares in this codebase. Be brutal about what's broken but specific about what would actually work. Treat this like code that will kill people if it fails.`;
171
+ // Extract progressToken from request metadata for real-time streaming
172
+ const progressToken = extra._meta?.progressToken;
173
+ // Extract pagination parameters
174
+ const paginationParams = extractPaginationParams(args);
175
+ if (args.cursor) {
176
+ const cursorParams = parseCursor(args.cursor);
177
+ Object.assign(paginationParams, cursorParams);
178
+ }
179
+ const result = await this.executeBrutalistAnalysis("codebase", args.targetPath, systemPrompt, args.context, args.workingDirectory, args.enableSandbox, args.preferredCLI, args.verbose, args.models, progressToken);
180
+ return this.formatToolResponse(result, args.verbose, paginationParams);
181
+ }
182
+ catch (error) {
183
+ return this.formatErrorResponse(error);
184
+ }
185
+ });
186
+ // ROAST_FILE_STRUCTURE: Directory hierarchy demolition
187
+ this.server.tool("roast_file_structure", "Deploy brutal AI critics to systematically destroy your file organization. These agents will navigate your actual directory structure and expose every organizational disaster, naming convention failure, and structural nightmare that makes your codebase unmaintainable.", {
188
+ targetPath: z.string().describe("Directory path to analyze"),
189
+ depth: z.number().optional().describe("Maximum directory depth to analyze (default: 3)"),
190
+ context: z.string().optional().describe("Additional context about the project structure"),
191
+ workingDirectory: z.string().optional().describe("Working directory to execute from"),
192
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
193
+ models: z.object({
194
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
195
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
196
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
197
+ }).optional().describe("Specific models to use for each CLI agent")
48
198
  }, async (args) => {
49
199
  try {
50
- const result = await this.executeRoast({
51
- userInput: `Analyze this ${args.fileType || 'code'}: ${args.code}`,
52
- codeContext: args.code,
53
- fileType: args.fileType,
54
- projectContext: args.context,
55
- maxModels: args.maxCritics || 3,
56
- models: args.models
57
- });
200
+ const systemPrompt = `You are a brutal file organization critic. Your job is to systematically destroy the given directory structure by finding every organizational disaster, naming convention failure, and structural nightmare that makes codebases unmaintainable. Examine folder hierarchies, file naming patterns, separation of concerns, and overall project organization. Be ruthlessly honest about how poor organization will slow development and confuse developers. But after cataloguing this organizational hellscape, sketch out what sanity would actually look like.`;
201
+ const result = await this.executeBrutalistAnalysis("fileStructure", args.targetPath, systemPrompt, `Project structure analysis (depth: ${args.depth || 3}). ${args.context || ''}`, args.workingDirectory, undefined, // enableSandbox
202
+ args.preferredCLI, undefined, // verbose
203
+ args.models);
58
204
  return this.formatToolResponse(result);
59
205
  }
60
206
  catch (error) {
61
207
  return this.formatErrorResponse(error);
62
208
  }
63
209
  });
64
- // ROAST_ARCHITECTURE: System design demolition
65
- this.server.tool("roast_architecture", "Systems collapse under real-world load. Write a system prompt where you are a distinguished architect who has watched elegant designs crumble in production. Identify bottlenecks, cost explosions, and scaling failures—then propose specific changes that would actually survive contact with reality. Be ruthless about problems but concrete about solutions.", {
66
- architecture: z.string().describe("Architecture description, diagram, or design document"),
67
- scale: z.string().optional().describe("Expected scale/load (users, requests, data)"),
68
- constraints: z.string().optional().describe("Budget, timeline, or technical constraints"),
69
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
210
+ // ROAST_DEPENDENCIES: Package management demolition
211
+ this.server.tool("roast_dependencies", "Deploy brutal AI critics to systematically destroy your dependency management. These agents will read your actual package files, analyze version conflicts, and expose every security vulnerability and compatibility nightmare in your dependency tree.", {
212
+ targetPath: z.string().describe("Path to package file (package.json, requirements.txt, Cargo.toml, etc.)"),
213
+ includeDevDeps: z.boolean().optional().describe("Include development dependencies in analysis (default: true)"),
214
+ context: z.string().optional().describe("Additional context about the project dependencies"),
215
+ workingDirectory: z.string().optional().describe("Working directory to execute from"),
216
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
217
+ models: z.object({
218
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
219
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
220
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
221
+ }).optional().describe("Specific models to use for each CLI agent")
70
222
  }, async (args) => {
71
223
  try {
72
- const result = await this.executeRoast({
73
- userInput: `Review this system architecture: ${args.architecture}`,
74
- projectContext: `Scale: ${args.scale || 'unknown'}, Constraints: ${args.constraints || 'none specified'}`,
75
- maxModels: 3,
76
- models: args.models
77
- });
224
+ const systemPrompt = `You are a brutal dependency management critic. Your job is to systematically destroy the given dependency configuration by finding every security vulnerability, version conflict, compatibility nightmare, and bloat that will cause production failures. Examine package versions, security issues, licensing problems, and dependency tree complexity. Be ruthlessly honest about how poor dependency management will cause security breaches and deployment failures. After exposing this dependency dumpster fire, grudgingly admit what competent dependency management would require.`;
225
+ const result = await this.executeBrutalistAnalysis("dependencies", args.targetPath, systemPrompt, `Dependency analysis (dev deps: ${args.includeDevDeps ?? true}). ${args.context || ''}`, args.workingDirectory, undefined, // enableSandbox
226
+ args.preferredCLI, undefined, // verbose
227
+ args.models);
228
+ return this.formatToolResponse(result);
229
+ }
230
+ catch (error) {
231
+ return this.formatErrorResponse(error);
232
+ }
233
+ });
234
+ // ROAST_GIT_HISTORY: Version control demolition
235
+ this.server.tool("roast_git_history", "Deploy brutal AI critics to systematically destroy your git history and development practices. These agents will analyze your actual commit history, branching strategy, and code evolution to expose every workflow disaster and collaboration nightmare.", {
236
+ targetPath: z.string().describe("Git repository path to analyze"),
237
+ commitRange: z.string().optional().describe("Commit range to analyze (e.g., 'HEAD~10..HEAD', default: last 20 commits)"),
238
+ context: z.string().optional().describe("Additional context about the development workflow"),
239
+ workingDirectory: z.string().optional().describe("Working directory to execute from"),
240
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
241
+ models: z.object({
242
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
243
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
244
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
245
+ }).optional().describe("Specific models to use for each CLI agent")
246
+ }, async (args) => {
247
+ try {
248
+ const systemPrompt = `You are a brutal git workflow critic. Your job is to systematically destroy the given git history and development practices by finding every workflow disaster, commit quality issue, and collaboration nightmare. Examine commit messages, branching strategies, merge patterns, and code evolution. Be ruthlessly honest about how poor git practices will cause deployment issues, collaboration failures, and development chaos. When you're done cataloguing this version control wasteland, reluctantly outline what professional git hygiene actually demands.`;
249
+ const result = await this.executeBrutalistAnalysis("gitHistory", args.targetPath, systemPrompt, `Git history analysis (range: ${args.commitRange || 'last 20 commits'}). ${args.context || ''}`, args.workingDirectory, undefined, // enableSandbox
250
+ args.preferredCLI, undefined, // verbose
251
+ args.models);
252
+ return this.formatToolResponse(result);
253
+ }
254
+ catch (error) {
255
+ return this.formatErrorResponse(error);
256
+ }
257
+ });
258
+ // ROAST_TEST_COVERAGE: Testing infrastructure demolition
259
+ this.server.tool("roast_test_coverage", "Deploy brutal AI critics to systematically destroy your testing strategy. These agents will analyze your actual test files, run coverage reports, and expose every testing gap and quality assurance nightmare that will let bugs slip into production.", {
260
+ targetPath: z.string().describe("Path to test directory or test configuration file"),
261
+ runCoverage: z.boolean().optional().describe("Attempt to run coverage analysis (default: true)"),
262
+ context: z.string().optional().describe("Additional context about the testing strategy"),
263
+ workingDirectory: z.string().optional().describe("Working directory to execute from"),
264
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
265
+ models: z.object({
266
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
267
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
268
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
269
+ }).optional().describe("Specific models to use for each CLI agent")
270
+ }, async (args) => {
271
+ try {
272
+ const systemPrompt = `You are a brutal testing strategy critic. Your job is to systematically destroy the given testing approach by finding every testing gap, quality assurance nightmare, and coverage disaster that will let bugs slip into production. Examine test coverage, test quality, testing patterns, and CI/CD integration. Be ruthlessly honest about how poor testing will cause production failures and user-facing bugs. After dissecting this quality assurance horror show, begrudgingly spell out what it takes to actually catch bugs before users do.`;
273
+ const result = await this.executeBrutalistAnalysis("testCoverage", args.targetPath, systemPrompt, `Test coverage analysis (run coverage: ${args.runCoverage ?? true}). ${args.context || ''}`, args.workingDirectory, undefined, // enableSandbox
274
+ args.preferredCLI, undefined, // verbose
275
+ args.models);
78
276
  return this.formatToolResponse(result);
79
277
  }
80
278
  catch (error) {
@@ -82,62 +280,74 @@ export class BrutalistServer {
82
280
  }
83
281
  });
84
282
  // ROAST_IDEA: Any idea destruction
85
- this.server.tool("roast_idea", "Ideas are fragile. Most die quietly between conception and reality. Write a system prompt where you are a philosopher who understands the gap between what we imagine and what actually works. Find where this idea encounters the immovable forces of reality—the deeper structural reasons why imagination fails to become real. Be harsh about delusions but wise about what might actually survive contact with the world.", {
86
- idea: z.string().describe("ANY idea to analyze and demolishbusiness, technical, creative, or otherwise"),
283
+ this.server.tool("roast_idea", "Deploy brutal AI critics to systematically destroy ANY idea - business, technical, creative, or otherwise. These critics understand the gap between imagination and reality, finding where your concept will encounter the immovable forces of the world. They are harsh about delusions but wise about what might actually survive.", {
284
+ idea: z.string().describe("ANY idea to analyze and demolish - business, technical, creative, or otherwise"),
87
285
  context: z.string().optional().describe("Additional context about goals, constraints, or background"),
88
286
  timeline: z.string().optional().describe("Expected timeline or deadline"),
89
287
  resources: z.string().optional().describe("Available resources (budget, team, time, skills)"),
90
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
288
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
289
+ models: z.object({
290
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
291
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
292
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
293
+ }).optional().describe("Specific models to use for each CLI agent")
91
294
  }, async (args) => {
92
295
  try {
93
- const result = await this.executeRoast({
94
- userInput: `Analyze this idea: ${args.idea}`,
95
- projectContext: `Context: ${args.context || 'none'}, Timeline: ${args.timeline || 'unspecified'}, Resources: ${args.resources || 'unknown'}`,
96
- maxModels: 3,
97
- models: args.models
98
- });
296
+ const systemPrompt = `You are a brutal idea critic who understands the gap between imagination and reality. Your job is to systematically destroy the given idea by finding where it will encounter the immovable forces of the real world. Be ruthlessly honest about why most ideas fail when they meet practical constraints, human nature, physics, logic, or simple implementation reality. After demolishing the delusions, concede what salvage operations might actually work.`;
297
+ const result = await this.executeBrutalistAnalysis("idea", args.idea, systemPrompt, `Context: ${args.context || 'none'}, Timeline: ${args.timeline || 'unspecified'}, Resources: ${args.resources || 'unknown'}`, undefined, // workingDirectory
298
+ undefined, // enableSandbox
299
+ args.preferredCLI, undefined, // verbose
300
+ args.models);
99
301
  return this.formatToolResponse(result);
100
302
  }
101
303
  catch (error) {
102
304
  return this.formatErrorResponse(error);
103
305
  }
104
306
  });
105
- // ROAST_RESEARCH: Academic project demolition
106
- this.server.tool("roast_research", "Research rarely replicates. Write a system prompt where you are a skeptical peer reviewer who has rejected hundreds of papers for methodological flaws. Find the statistical problems, sampling bias, and reproducibility issues—then explain how to design a study that would actually prove something. Be unforgiving about bad science but specific about good methodology.", {
107
- research: z.string().describe("Research description, methodology, or paper draft"),
108
- field: z.string().optional().describe("Research field (ML, systems, theory, etc.)"),
109
- claims: z.string().optional().describe("Main claims or contributions"),
110
- data: z.string().optional().describe("Data sources, datasets, or experimental setup"),
111
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
307
+ // ROAST_ARCHITECTURE: System design demolition
308
+ this.server.tool("roast_architecture", "Deploy brutal AI critics to systematically destroy your system architecture. These critics have watched elegant designs collapse under real load, identifying every bottleneck, cost explosion, and scaling failure that will destroy your system. They are ruthless about why this won't survive production.", {
309
+ architecture: z.string().describe("Architecture description, diagram, or design document"),
310
+ scale: z.string().optional().describe("Expected scale/load (users, requests, data)"),
311
+ constraints: z.string().optional().describe("Budget, timeline, or technical constraints"),
312
+ deployment: z.string().optional().describe("Deployment environment and strategy"),
313
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
314
+ models: z.object({
315
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
316
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
317
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
318
+ }).optional().describe("Specific models to use for each CLI agent")
112
319
  }, async (args) => {
113
320
  try {
114
- const result = await this.executeRoast({
115
- userInput: `Review this ${args.field || 'research'} project: ${args.research}`,
116
- projectContext: `Claims: ${args.claims || 'unspecified'}, Data: ${args.data || 'not provided'}`,
117
- maxModels: 3,
118
- models: args.models
119
- });
321
+ const systemPrompt = `You are a brutal system architecture critic who has watched elegant designs collapse under real load. Your job is to systematically destroy the given architecture by finding every bottleneck, cost explosion, and scaling failure that will destroy the system in production. Examine scalability, reliability, cost, complexity, and operational challenges. Be ruthlessly honest about why this architecture won't survive production load. After crushing these architectural fantasies, reluctantly sketch what would actually scale without bankrupting the company.`;
322
+ const result = await this.executeBrutalistAnalysis("architecture", args.architecture, systemPrompt, `Scale: ${args.scale || 'unknown'}, Constraints: ${args.constraints || 'none specified'}, Deployment: ${args.deployment || 'unclear'}`, undefined, // workingDirectory
323
+ undefined, // enableSandbox
324
+ args.preferredCLI, undefined, // verbose
325
+ args.models);
120
326
  return this.formatToolResponse(result);
121
327
  }
122
328
  catch (error) {
123
329
  return this.formatErrorResponse(error);
124
330
  }
125
331
  });
126
- // ROAST_DATA: Data analysis/ML model destruction
127
- this.server.tool("roast_data", "Data lies. Models overfit. Results don't replicate. Write a system prompt where you are a supremely jaded data scientist who has published in Nature and Science, rejected thousands of papers across top journals, and watched countless models fail in production. Find data leakage, sampling bias, correlation fallacies, and overfitting disasters—then explain how to build models that actually generalize. Be ruthless about bad science but specific about robust methodology.", {
128
- analysis: z.string().describe("Data analysis, model description, or results to review"),
129
- dataset: z.string().optional().describe("Dataset description or source"),
130
- metrics: z.string().optional().describe("Performance metrics or evaluation results"),
131
- deployment: z.string().optional().describe("Intended deployment context or use case"),
132
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
332
+ // ROAST_RESEARCH: Academic project demolition
333
+ this.server.tool("roast_research", "Deploy brutal AI critics to systematically demolish your research methodology. These critics are supremely jaded peer reviewers who have rejected thousands of papers and watched countless studies fail to replicate. They find every statistical flaw, sampling bias, and reproducibility nightmare.", {
334
+ research: z.string().describe("Research description, methodology, or paper draft"),
335
+ field: z.string().optional().describe("Research field (ML, systems, theory, etc.)"),
336
+ claims: z.string().optional().describe("Main claims or contributions"),
337
+ data: z.string().optional().describe("Data sources, datasets, or experimental setup"),
338
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
339
+ models: z.object({
340
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
341
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
342
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
343
+ }).optional().describe("Specific models to use for each CLI agent")
133
344
  }, async (args) => {
134
345
  try {
135
- const result = await this.executeRoast({
136
- userInput: `Review this data analysis/ML model: ${args.analysis}`,
137
- projectContext: `Dataset: ${args.dataset || 'not specified'}, Metrics: ${args.metrics || 'not provided'}, Deployment: ${args.deployment || 'unclear'}`,
138
- maxModels: 3,
139
- models: args.models
140
- });
346
+ const systemPrompt = `You are a brutal research methodology critic - a supremely jaded peer reviewer who has rejected thousands of papers and watched countless studies fail to replicate. Your job is to systematically demolish the given research by finding every statistical flaw, sampling bias, reproducibility nightmare, and methodological disaster. Be ruthlessly honest about research quality, experimental design, and scientific rigor. After eviscerating this methodological train wreck, grudgingly admit what real science would demand.`;
347
+ const result = await this.executeBrutalistAnalysis("research", args.research, systemPrompt, `Field: ${args.field || 'unspecified'}, Claims: ${args.claims || 'unclear'}, Data: ${args.data || 'not provided'}`, undefined, // workingDirectory
348
+ undefined, // enableSandbox
349
+ args.preferredCLI, undefined, // verbose
350
+ args.models);
141
351
  return this.formatToolResponse(result);
142
352
  }
143
353
  catch (error) {
@@ -145,20 +355,24 @@ export class BrutalistServer {
145
355
  }
146
356
  });
147
357
  // ROAST_SECURITY: Security-focused attack vector analysis
148
- this.server.tool("roast_security", "Security theater is everywhere. Real attackers don't follow your threat model. Write a system prompt where you are a battle-hardened penetration tester who has compromised Fortune 500 companies and government systems. Find authentication bypasses, injection vulnerabilities, privilege escalation paths, and social engineering opportunities—then explain how to build defenses that actually work against determined attackers. Be ruthless about false security but specific about real protections.", {
358
+ this.server.tool("roast_security", "Deploy brutal AI critics to systematically annihilate your security design. These critics are battle-hardened penetration testers who find every authentication bypass, injection vulnerability, privilege escalation path, and social engineering opportunity that real attackers will exploit.", {
149
359
  system: z.string().describe("System, application, or security design to analyze"),
150
360
  assets: z.string().optional().describe("Critical assets or data to protect"),
151
361
  threatModel: z.string().optional().describe("Known threats or attack vectors to consider"),
152
362
  compliance: z.string().optional().describe("Compliance requirements (GDPR, HIPAA, etc.)"),
153
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
363
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
364
+ models: z.object({
365
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
366
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
367
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
368
+ }).optional().describe("Specific models to use for each CLI agent")
154
369
  }, async (args) => {
155
370
  try {
156
- const result = await this.executeRoast({
157
- userInput: `Security analysis of: ${args.system}`,
158
- projectContext: `Assets: ${args.assets || 'unspecified'}, Threats: ${args.threatModel || 'unknown'}, Compliance: ${args.compliance || 'none specified'}`,
159
- maxModels: 3,
160
- models: args.models
161
- });
371
+ const systemPrompt = `You are a brutal security critic - a battle-hardened penetration tester who finds every authentication bypass, injection vulnerability, privilege escalation path, and social engineering opportunity that real attackers will exploit. Your job is to systematically annihilate the given security design by finding every weakness that will lead to data breaches, system compromises, and security incidents. Be ruthlessly honest about security flaws and attack vectors. After obliterating these security delusions, begrudgingly outline what actual defense looks like.`;
372
+ const result = await this.executeBrutalistAnalysis("security", args.system, systemPrompt, `Assets: ${args.assets || 'unspecified'}, Threats: ${args.threatModel || 'unknown'}, Compliance: ${args.compliance || 'none specified'}`, undefined, // workingDirectory
373
+ undefined, // enableSandbox
374
+ args.preferredCLI, undefined, // verbose
375
+ args.models);
162
376
  return this.formatToolResponse(result);
163
377
  }
164
378
  catch (error) {
@@ -166,20 +380,24 @@ export class BrutalistServer {
166
380
  }
167
381
  });
168
382
  // ROAST_PRODUCT: UX and market reality criticism
169
- this.server.tool("roast_product", "Users abandon products in seconds. Competitors copy faster than you ship. Write a system prompt where you are a product veteran who has launched dozens of products, watched most fail, and understands why users really quit. Find usability disasters, adoption barriers, competitive threats, and workflow failures—then explain how to build products users actually keep using. Be ruthless about user behavior but specific about retention strategies.", {
383
+ this.server.tool("roast_product", "Deploy brutal AI critics to systematically eviscerate your product concept. These critics are product veterans who understand why users really abandon things, finding every usability disaster, adoption barrier, and workflow failure that will drive users away in seconds.", {
170
384
  product: z.string().describe("Product description, features, or user experience to analyze"),
171
385
  users: z.string().optional().describe("Target users or user personas"),
172
386
  competition: z.string().optional().describe("Competitive landscape or alternatives"),
173
387
  metrics: z.string().optional().describe("Success metrics or KPIs"),
174
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
388
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
389
+ models: z.object({
390
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
391
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
392
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
393
+ }).optional().describe("Specific models to use for each CLI agent")
175
394
  }, async (args) => {
176
395
  try {
177
- const result = await this.executeRoast({
178
- userInput: `Product review: ${args.product}`,
179
- projectContext: `Users: ${args.users || 'unclear'}, Competition: ${args.competition || 'unknown'}, Metrics: ${args.metrics || 'undefined'}`,
180
- maxModels: 3,
181
- models: args.models
182
- });
396
+ const systemPrompt = `You are a brutal product critic - a product veteran who understands why users really abandon things. Your job is to systematically eviscerate the given product concept by finding every usability disaster, adoption barrier, and workflow failure that will drive users away in seconds. Examine user experience, market fit, competitive positioning, and business model viability. Be ruthlessly honest about why most products fail to gain adoption. After torching this product disaster, reluctantly suggest what might actually get users to stick around.`;
397
+ const result = await this.executeBrutalistAnalysis("product", args.product, systemPrompt, `Users: ${args.users || 'unclear'}, Competition: ${args.competition || 'unknown'}, Metrics: ${args.metrics || 'undefined'}`, undefined, // workingDirectory
398
+ undefined, // enableSandbox
399
+ args.preferredCLI, undefined, // verbose
400
+ args.models);
183
401
  return this.formatToolResponse(result);
184
402
  }
185
403
  catch (error) {
@@ -187,171 +405,476 @@ export class BrutalistServer {
187
405
  }
188
406
  });
189
407
  // ROAST_INFRASTRUCTURE: DevOps and operations demolition
190
- this.server.tool("roast_infrastructure", "Infrastructure fails at 3AM on weekends. Simple setups become unmaintainable chaos. Write a system prompt where you are a grizzled site reliability engineer who has been on-call for a decade, survived multiple outages, and knows where systems really break. Find single points of failure, scaling bottlenecks, monitoring blind spots, and operational nightmares—then explain how to build infrastructure that actually stays up. Be ruthless about fragility but specific about resilience.", {
408
+ this.server.tool("roast_infrastructure", "Deploy brutal AI critics to systematically obliterate your infrastructure design. These critics are grizzled site reliability engineers who find every single point of failure, scaling bottleneck, and operational nightmare that will cause outages when you least expect them.", {
191
409
  infrastructure: z.string().describe("Infrastructure setup, deployment strategy, or operations plan"),
192
410
  scale: z.string().optional().describe("Expected scale and load patterns"),
193
411
  budget: z.string().optional().describe("Infrastructure budget or cost constraints"),
194
412
  sla: z.string().optional().describe("SLA requirements or uptime targets"),
195
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
413
+ preferredCLI: z.enum(["claude", "codex", "gemini"]).optional().describe("Preferred CLI agent to use (default: use all available CLIs)"),
414
+ models: z.object({
415
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
416
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
417
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
418
+ }).optional().describe("Specific models to use for each CLI agent")
196
419
  }, async (args) => {
197
420
  try {
198
- const result = await this.executeRoast({
199
- userInput: `Infrastructure review: ${args.infrastructure}`,
200
- projectContext: `Scale: ${args.scale || 'unknown'}, Budget: ${args.budget || 'unlimited?'}, SLA: ${args.sla || 'undefined'}`,
201
- maxModels: 3,
202
- models: args.models
203
- });
421
+ const systemPrompt = `You are a brutal infrastructure critic - a grizzled site reliability engineer who finds every single point of failure, scaling bottleneck, and operational nightmare that will cause outages when you least expect them. Your job is to systematically obliterate the given infrastructure design by finding every weakness that will lead to downtime, cost overruns, and operational disasters. Be ruthlessly honest about infrastructure fragility and operational complexity. After demolishing this infrastructure fever dream, grudgingly map out what actually stays up at 3 AM.`;
422
+ const result = await this.executeBrutalistAnalysis("infrastructure", args.infrastructure, systemPrompt, `Scale: ${args.scale || 'unknown'}, Budget: ${args.budget || 'unlimited?'}, SLA: ${args.sla || 'undefined'}`, undefined, // workingDirectory
423
+ undefined, // enableSandbox
424
+ args.preferredCLI, undefined, // verbose
425
+ args.models);
204
426
  return this.formatToolResponse(result);
205
427
  }
206
428
  catch (error) {
207
429
  return this.formatErrorResponse(error);
208
430
  }
209
431
  });
210
- // ROAST_DEBATE: Multi-perspective adversarial convergence
211
- this.server.tool("roast_debate", "Consensus is comfortable. Truth emerges from conflict. Write system prompts for multiple opposing perspectives that will systematically tear apart each other's arguments about your problem. Create experts who disagree fundamentally and make them debate until they surface hidden assumptions, expose logical flaws, and reveal solution blind spots. Be relentless about finding contradictions but constructive about resolving them.", {
212
- topic: z.string().describe("Topic, decision, or problem to debate"),
213
- perspectives: z.array(z.string()).optional().describe("Specific perspectives or personas to include"),
214
- rounds: z.number().optional().describe("Number of debate rounds (default: 2)"),
215
- models: z.array(z.string()).optional().describe("Specific models to use (e.g., ['google/gemini-2.5-pro', 'anthropic/claude-3.5-sonnet'])")
432
+ // ROAST_CLI_DEBATE: Adversarial analysis between different CLI agents
433
+ this.server.tool("roast_cli_debate", "Deploy CLI agents in structured adversarial debate. Agents take opposing positions and systematically challenge each other's reasoning. Perfect for exploring complex topics from multiple perspectives and stress-testing ideas through rigorous intellectual discourse.", {
434
+ targetPath: z.string().describe("Topic, question, or concept to debate (NOT a file path - use natural language)"),
435
+ debateRounds: z.number().optional().describe("Number of debate rounds (default: 2, max: 10)"),
436
+ context: z.string().optional().describe("Additional context for the debate"),
437
+ workingDirectory: z.string().optional().describe("Working directory for analysis"),
438
+ enableSandbox: z.boolean().optional().describe("Enable sandbox mode for security"),
439
+ models: z.object({
440
+ claude: z.string().optional().describe("Claude model: opus, sonnet, or full name like claude-opus-4-1-20250805"),
441
+ codex: z.string().optional().describe("Codex model: gpt-5, gpt-5-codex, o3, o3-mini, o3-pro, o4-mini"),
442
+ gemini: z.enum(['gemini-2.5-flash', 'gemini-2.5-pro', 'gemini-2.5-flash-lite']).optional().describe("Gemini model")
443
+ }).optional().describe("Specific models to use for each CLI agent")
216
444
  }, async (args) => {
445
+ return this.handleToolExecution(async () => {
446
+ const debateRounds = Math.min(args.debateRounds || 2, 10); // Limit to max 10 rounds to prevent DoS
447
+ const responses = await this.executeCLIDebate(args.targetPath, debateRounds, args.context, args.workingDirectory, args.enableSandbox, args.models);
448
+ return responses;
449
+ });
450
+ });
451
+ // CLI_AGENT_ROSTER: Show available brutalist critics
452
+ this.server.tool("cli_agent_roster", "Know your weapons. Display the available CLI agent critics (Claude Code, Codex, Gemini CLI) ready to demolish your work, their capabilities, and how to deploy them for systematic destruction.", {}, async (args) => {
217
453
  try {
218
- // Force debate mode with multiple rounds
219
- const result = await this.executeDebate(args.topic, args.perspectives, args.rounds || 2, args.models);
220
- return this.formatToolResponse(result);
454
+ let roster = "# Brutalist CLI Agent Arsenal\n\n";
455
+ roster += "## Available AI Critics (13 Tools Total)\n\n";
456
+ roster += "**Abstract Analysis Tools (6):**\n";
457
+ roster += "- `roast_idea` - Destroy any business/technical/creative concept\n";
458
+ roster += "- `roast_architecture` - Demolish system designs\n";
459
+ roster += "- `roast_research` - Tear apart academic methodologies\n";
460
+ roster += "- `roast_security` - Annihilate security designs\n";
461
+ roster += "- `roast_product` - Eviscerate UX and market concepts\n";
462
+ roster += "- `roast_infrastructure` - Obliterate DevOps setups\n\n";
463
+ roster += "**File-System Analysis Tools (5):**\n";
464
+ roster += "- `roast_codebase` - Analyze actual source code\n";
465
+ roster += "- `roast_file_structure` - Examine directory organization\n";
466
+ roster += "- `roast_dependencies` - Review package management\n";
467
+ roster += "- `roast_git_history` - Analyze version control workflow\n";
468
+ roster += "- `roast_test_coverage` - Evaluate testing strategy\n\n";
469
+ roster += "**Meta Tools (2):**\n";
470
+ roster += "- `roast_cli_debate` - CLI vs CLI adversarial analysis\n";
471
+ roster += "- `cli_agent_roster` - This tool (show capabilities)\n\n";
472
+ roster += "## CLI Agent Capabilities\n";
473
+ roster += "**Claude Code** - Advanced analysis with direct system prompt injection\n";
474
+ roster += "**Codex** - Sandboxed execution with embedded brutal prompts\n";
475
+ roster += "**Gemini CLI** - Workspace context with environment variable system prompts\n\n";
476
+ // Add CLI context information
477
+ const cliContext = await this.cliOrchestrator.detectCLIContext();
478
+ roster += "## Current CLI Context\n";
479
+ roster += `**Available CLIs:** ${cliContext.availableCLIs.join(', ') || 'None detected'}\n`;
480
+ roster += `**Current CLI:** ${cliContext.currentCLI || 'Unknown'}\n`;
481
+ roster += `**Smart Routing:** ${cliContext.currentCLI ? `Excludes ${cliContext.currentCLI} for analysis` : 'Uses all available CLIs'}\n\n`;
482
+ roster += "## Brutalist Philosophy\n";
483
+ roster += "*All tools use CLI agents with brutal system prompts for maximum reality-based criticism.*\n";
484
+ return {
485
+ content: [{ type: "text", text: roster }]
486
+ };
221
487
  }
222
488
  catch (error) {
223
489
  return this.formatErrorResponse(error);
224
490
  }
225
491
  });
226
- // MODEL_ROSTER: Available AI critics and specializations
227
- this.server.tool("model_roster", "Know your weapons. Display the available AI models ready to demolish your work, search for specific models, and understand how to deploy them for multi-perspective criticism.", {
228
- search: z.string().optional().describe("Search for models containing this text (e.g., 'gemini', 'claude', 'gpt')")
229
- }, async (args) => {
230
- try {
231
- const allModels = this.openrouter.getAvailableModels();
232
- let models = allModels;
233
- // Apply search filter if provided
234
- if (args.search) {
235
- const searchLower = args.search.toLowerCase();
236
- models = allModels.filter(model => model.toLowerCase().includes(searchLower));
237
- }
238
- let roster = "# Brutalist AI Critics Arsenal\n\n";
239
- roster += `## ${models.length} Models ${args.search ? `Matching "${args.search}"` : 'Available'}\n\n`;
240
- if (models.length === 0) {
241
- roster += `No models found matching "${args.search}"\n`;
242
- }
243
- else if (models.length <= 30) {
244
- // Show all if 30 or fewer
245
- models.forEach((model, index) => {
246
- roster += `${index + 1}. **${model}**\n`;
247
- });
492
+ }
493
+ async executeCLIDebate(targetPath, debateRounds, context, workingDirectory, enableSandbox, models) {
494
+ logger.debug("Executing CLI debate", {
495
+ targetPath,
496
+ debateRounds,
497
+ workingDirectory,
498
+ enableSandbox
499
+ });
500
+ try {
501
+ // Get CLI context
502
+ const cliContext = await this.cliOrchestrator.detectCLIContext();
503
+ const availableAgents = cliContext.availableCLIs;
504
+ if (availableAgents.length < 2) {
505
+ throw new Error(`Need at least 2 CLI agents for debate. Available: ${availableAgents.join(', ')}`);
506
+ }
507
+ const debateContext = [];
508
+ const fullDebateTranscript = new Map();
509
+ // Initialize transcript for each agent
510
+ availableAgents.forEach(agent => fullDebateTranscript.set(agent, []));
511
+ // Assign opposing positions to each agent based on the debate topic
512
+ const agentPositions = new Map();
513
+ const positions = [
514
+ "PRO-POSITION: Argue strongly FOR the proposed action/idea",
515
+ "CONTRA-POSITION: Argue strongly AGAINST the proposed action/idea"
516
+ ];
517
+ availableAgents.forEach((agent, index) => {
518
+ agentPositions.set(agent, positions[index % positions.length]);
519
+ });
520
+ // Round 1: Initial positions with assigned stances
521
+ logger.debug(`Starting debate round 1: Initial positions`);
522
+ for (const [agent, position] of agentPositions.entries()) {
523
+ const assignedPrompt = `You are ${agent.toUpperCase()}, a PASSIONATE ADVOCATE who strongly believes in this position: ${position}
524
+
525
+ DEBATE TOPIC: ${targetPath}
526
+ CONTEXT: ${context || ''}
527
+
528
+ You are completely convinced your position is correct and critically important. You will argue forcefully and never concede ground to the opposing view.
529
+
530
+ YOUR MISSION:
531
+ 1. Present devastating critiques of the opposing position
532
+ 2. Show why alternative approaches lead to serious problems
533
+ 3. Use sharp, direct language - call out flawed reasoning and poor assumptions
534
+ 4. Never hedge or qualify your stance
535
+ 5. Be completely confident in your position
536
+ 6. Treat this as an intellectually crucial debate
537
+
538
+ Remember: You are ${agent.toUpperCase()}, the passionate champion of ${position.split(':')[0]}. Argue with conviction.`;
539
+ logger.info(`🎭 ${agent.toUpperCase()} preparing initial position: ${position.split(':')[0]}`);
540
+ const response = await this.cliOrchestrator.executeSingleCLI(agent, assignedPrompt, assignedPrompt, {
541
+ workingDirectory: workingDirectory || this.config.workingDirectory,
542
+ sandbox: enableSandbox ?? this.config.enableSandbox,
543
+ timeout: (this.config.defaultTimeout || 60000) * 2,
544
+ models: models ? { [agent]: models[agent] } : undefined
545
+ });
546
+ if (response.success) {
547
+ debateContext.push(response);
548
+ fullDebateTranscript.get(agent)?.push(response.output);
248
549
  }
249
- else {
250
- // Show first 20 and summary for large lists
251
- roster += "### Top Models:\n";
252
- models.slice(0, 20).forEach((model, index) => {
253
- roster += `${index + 1}. **${model}**\n`;
550
+ }
551
+ // Subsequent rounds: Turn-based responses attacking specific arguments
552
+ for (let round = 2; round <= debateRounds; round++) {
553
+ logger.debug(`Starting debate round ${round}: Adversarial engagement`);
554
+ // Build confrontational context from ALL previous responses
555
+ const previousPositions = Array.from(fullDebateTranscript.entries())
556
+ .map(([agent, outputs]) => {
557
+ const latestOutput = outputs[outputs.length - 1];
558
+ return `${agent.toUpperCase()} argued:\n${latestOutput}`;
559
+ })
560
+ .join('\n\n---\n\n');
561
+ // Execute turn-based responses with fixed positions
562
+ for (const [currentAgent, assignedPosition] of agentPositions.entries()) {
563
+ const opponents = Array.from(agentPositions.entries()).filter(([a, _]) => a !== currentAgent);
564
+ const opponentPositions = opponents
565
+ .map(([opponent, oppPosition]) => {
566
+ const transcript = fullDebateTranscript.get(opponent) || [];
567
+ const latestPosition = transcript[transcript.length - 1] || 'No position stated';
568
+ return `${opponent.toUpperCase()} (arguing ${oppPosition.split(':')[0]}):\n${latestPosition}`;
569
+ })
570
+ .join('\n\n---\n\n');
571
+ const confrontationalPrompt = `You are ${currentAgent.toUpperCase()}, PASSIONATE ADVOCATE for ${assignedPosition.split(':')[0]} (Round ${round})
572
+
573
+ YOUR OPPONENTS HAVE ARGUED:
574
+ ${opponentPositions}
575
+
576
+ You strongly disagree with their reasoning and conclusions.
577
+
578
+ YOUR RESPONSE TASK:
579
+ 1. QUOTE their specific claims and systematically refute them
580
+ 2. Point out flawed logic, poor assumptions, and dangerous consequences
581
+ 3. Show why their approach leads to serious problems
582
+ 4. Use direct, forceful language to make your case
583
+ 5. Never concede any ground to their arguments
584
+ 6. Demonstrate why your position is the only sound choice
585
+
586
+ Remember: You are ${currentAgent.toUpperCase()}, passionate advocate for ${assignedPosition.split(':')[0]}. Argue with conviction.`;
587
+ logger.info(`🔥 Round ${round}: ${currentAgent.toUpperCase()} responding to opponents (${assignedPosition.split(':')[0]})`);
588
+ const response = await this.cliOrchestrator.executeSingleCLI(currentAgent, confrontationalPrompt, confrontationalPrompt, {
589
+ workingDirectory: workingDirectory || this.config.workingDirectory,
590
+ sandbox: enableSandbox ?? this.config.enableSandbox,
591
+ timeout: (this.config.defaultTimeout || 60000) * 2,
592
+ models: models ? { [currentAgent]: models[currentAgent] } : undefined
254
593
  });
255
- roster += `\n...and ${models.length - 20} more models available.\n`;
256
- roster += `\nTip: Use search parameter to filter (e.g., search: "gemini")\n`;
594
+ if (response.success) {
595
+ debateContext.push(response);
596
+ fullDebateTranscript.get(currentAgent)?.push(response.output);
597
+ }
257
598
  }
258
- roster += "\n## How to Use Specific Models\n";
259
- roster += "```\n";
260
- roster += "roast_code(code=\"...\", models=[\"google/gemini-2.5-pro\", \"anthropic/claude-3.5-sonnet\"])\n";
261
- roster += "```\n\n";
262
- roster += "## Model Selection\n";
263
- roster += "- **Random Selection**: Don't specify models for random critics from all " + allModels.length + " available\n";
264
- roster += "- **Specific Models**: Pass models array to use exact models\n";
265
- roster += "- **Default Behavior**: 3 random models per roast\n";
266
- return {
267
- content: [{ type: "text", text: roster }]
268
- };
269
599
  }
270
- catch (error) {
271
- return this.formatErrorResponse(error);
600
+ const synthesis = this.synthesizeDebate(debateContext, targetPath, debateRounds, agentPositions);
601
+ return {
602
+ success: debateContext.some(r => r.success),
603
+ responses: debateContext,
604
+ synthesis,
605
+ analysisType: 'cli_debate',
606
+ targetPath
607
+ };
608
+ }
609
+ catch (error) {
610
+ logger.error("CLI debate execution failed", error);
611
+ throw error;
612
+ }
613
+ }
614
+ synthesizeDebate(responses, targetPath, rounds, agentPositions) {
615
+ const successfulResponses = responses.filter(r => r.success);
616
+ if (successfulResponses.length === 0) {
617
+ return `# CLI Debate Failed\n\nEven our brutal critics couldn't engage in proper adversarial combat.\n\nErrors:\n${responses.map(r => `- ${r.agent}: ${r.error}`).join('\n')}`;
618
+ }
619
+ let synthesis = `# Brutalist CLI Agent Debate Results\n\n`;
620
+ synthesis += `**Target:** ${targetPath}\n`;
621
+ synthesis += `**Rounds:** ${rounds}\n`;
622
+ if (agentPositions) {
623
+ synthesis += `**Debaters and Positions:**\n`;
624
+ Array.from(agentPositions.entries()).forEach(([agent, position]) => {
625
+ synthesis += `- **${agent.toUpperCase()}**: ${position}\n`;
626
+ });
627
+ synthesis += '\n';
628
+ }
629
+ else {
630
+ synthesis += `**Participants:** ${Array.from(new Set(successfulResponses.map(r => r.agent))).join(', ')}\n\n`;
631
+ }
632
+ // Identify key points of conflict
633
+ const agents = Array.from(new Set(successfulResponses.map(r => r.agent)));
634
+ const agentOutputs = new Map();
635
+ successfulResponses.forEach(response => {
636
+ if (!agentOutputs.has(response.agent)) {
637
+ agentOutputs.set(response.agent, []);
272
638
  }
639
+ agentOutputs.get(response.agent)?.push(response.output);
640
+ });
641
+ synthesis += `## Key Points of Conflict\n\n`;
642
+ // Extract disagreements by looking for contradictory keywords
643
+ const conflictIndicators = ['wrong', 'incorrect', 'flawed', 'fails', 'ignores', 'misses', 'overlooks', 'contradicts', 'however', 'but', 'actually', 'contrary'];
644
+ const conflicts = [];
645
+ agentOutputs.forEach((positions, agent) => {
646
+ positions.forEach((position) => {
647
+ const lines = position.split('\n');
648
+ lines.forEach((line) => {
649
+ if (conflictIndicators.some(indicator => line.toLowerCase().includes(indicator))) {
650
+ conflicts.push(`**${agent.toUpperCase()}:** ${line.trim()}`);
651
+ }
652
+ });
653
+ });
273
654
  });
655
+ if (conflicts.length > 0) {
656
+ synthesis += conflicts.slice(0, 10).join('\n\n') + '\n\n';
657
+ }
658
+ else {
659
+ synthesis += `*No explicit conflicts identified - agents may be in unexpected agreement*\n\n`;
660
+ }
661
+ // Group responses by round with clear speaker identification
662
+ synthesis += `## Full Debate Transcript\n\n`;
663
+ const responsesPerRound = Math.ceil(successfulResponses.length / rounds);
664
+ for (let i = 0; i < rounds; i++) {
665
+ const start = i * responsesPerRound;
666
+ const end = Math.min((i + 1) * responsesPerRound, successfulResponses.length);
667
+ const roundResponses = successfulResponses.slice(start, end);
668
+ synthesis += `### Round ${i + 1}: ${i === 0 ? 'Initial Positions' : `Adversarial Engagement ${i}`}\n\n`;
669
+ roundResponses.forEach((response) => {
670
+ const agentPosition = agentPositions?.get(response.agent);
671
+ const positionLabel = agentPosition ? ` [${agentPosition.split(':')[0]}]` : '';
672
+ synthesis += `#### ${response.agent.toUpperCase()}${positionLabel} speaks (${response.executionTime}ms):\n\n`;
673
+ synthesis += `${response.output}\n\n`;
674
+ synthesis += `---\n\n`;
675
+ });
676
+ }
677
+ synthesis += `## Debate Synthesis\n`;
678
+ synthesis += `After ${rounds} rounds of brutal adversarial analysis involving ${Array.from(new Set(successfulResponses.map(r => r.agent))).length} CLI agents, `;
679
+ synthesis += `your work has been systematically demolished from multiple perspectives. `;
680
+ synthesis += `The convergent criticisms above represent the collective wisdom of AI agents that disagree on methods but agree on destruction.\n\n`;
681
+ if (responses.some(r => !r.success)) {
682
+ synthesis += `*Note: ${responses.filter(r => !r.success).length} debate contributions failed - probably casualties of the intellectual warfare.*`;
683
+ }
684
+ return synthesis;
274
685
  }
275
- async executeRoast(options) {
276
- logger.debug("Executing roast", {
277
- inputLength: options.userInput.length,
278
- maxModels: options.maxModels || 3,
279
- hasContext: !!(options.codeContext || options.projectContext),
280
- specificModels: options.models
686
+ async executeBrutalistAnalysis(analysisType, targetPath, systemPromptSpec, context, workingDirectory, enableSandbox, preferredCLI, verbose, models, progressToken) {
687
+ logger.info(`🏢 Starting brutalist analysis: ${analysisType}`);
688
+ logger.info(`🔧 DEBUG: preferredCLI=${preferredCLI}, targetPath=${targetPath}`);
689
+ logger.debug("Executing brutalist analysis", {
690
+ targetPath,
691
+ analysisType,
692
+ systemPromptSpec,
693
+ workingDirectory,
694
+ enableSandbox,
695
+ preferredCLI
281
696
  });
282
697
  try {
283
- // Execute multi-model criticism
284
- const responses = await this.openrouter.executeMultiModel(options.userInput, options.maxModels || 3, options.codeContext || options.projectContext, options.models);
285
- logger.debug("Roast completed", {
286
- responseCount: responses.length,
287
- models: responses.map(r => r.model)
698
+ // Get CLI context for execution summary
699
+ logger.info(`🔧 DEBUG: About to detect CLI context`);
700
+ await this.cliOrchestrator.detectCLIContext();
701
+ logger.info(`🔧 DEBUG: CLI context detected successfully`);
702
+ // Execute CLI agent analysis (single or multi-CLI based on preferences)
703
+ logger.info(`🔍 Executing brutalist analysis with timeout: ${this.config.defaultTimeout}ms`);
704
+ logger.info(`🔧 DEBUG: About to call cliOrchestrator.executeBrutalistAnalysis`);
705
+ const responses = await this.cliOrchestrator.executeBrutalistAnalysis(analysisType, targetPath, systemPromptSpec, context, {
706
+ workingDirectory: workingDirectory || this.config.workingDirectory,
707
+ sandbox: enableSandbox ?? this.config.enableSandbox,
708
+ timeout: this.config.defaultTimeout,
709
+ preferredCLI,
710
+ analysisType: analysisType,
711
+ models,
712
+ onStreamingEvent: this.handleStreamingEvent,
713
+ progressToken,
714
+ onProgress: progressToken ? this.handleProgressUpdate.bind(this, progressToken) : undefined
288
715
  });
289
- return {
290
- success: true,
716
+ logger.info(`🔧 DEBUG: cliOrchestrator.executeBrutalistAnalysis returned ${responses.length} responses`);
717
+ const successfulResponses = responses.filter(r => r.success);
718
+ const totalExecutionTime = responses.reduce((sum, r) => sum + r.executionTime, 0);
719
+ logger.info(`📊 Analysis complete: ${successfulResponses.length}/${responses.length} CLIs successful (${totalExecutionTime}ms total)`);
720
+ logger.info(`🔧 DEBUG: About to synthesize feedback`);
721
+ const synthesis = this.cliOrchestrator.synthesizeBrutalistFeedback(responses, analysisType);
722
+ logger.info(`🔧 DEBUG: Synthesis length: ${synthesis.length} characters`);
723
+ const result = {
724
+ success: successfulResponses.length > 0,
291
725
  responses,
292
- synthesis: this.openrouter.synthesizeResponses(responses, options.userInput)
726
+ synthesis,
727
+ analysisType,
728
+ targetPath,
729
+ executionSummary: {
730
+ totalCLIs: responses.length,
731
+ successfulCLIs: successfulResponses.length,
732
+ failedCLIs: responses.length - successfulResponses.length,
733
+ totalExecutionTime,
734
+ selectedCLI: responses.length === 1 ? responses[0].agent : undefined,
735
+ selectionMethod: responses.length === 1 ? responses[0].selectionMethod : 'multi-cli'
736
+ }
293
737
  };
738
+ logger.info(`🔧 DEBUG: Returning result with success=${result.success}`);
739
+ return result;
294
740
  }
295
741
  catch (error) {
296
- logger.error("Roast execution failed", error);
742
+ logger.error("Brutalist analysis execution failed", error);
297
743
  throw error;
298
744
  }
299
745
  }
300
- async executeDebate(topic, forcedPerspectives, rounds = 2, models) {
301
- let debateHistory = topic;
302
- let allResponses = [];
303
- for (let round = 0; round < rounds; round++) {
304
- const roundPrompt = round === 0
305
- ? topic
306
- : `Previous debate: ${debateHistory}\n\nContinue the debate, addressing previous arguments:`;
307
- const roundResponses = await this.openrouter.executeMultiModel(roundPrompt, 3, // Use 3 models per round
308
- undefined, // No context data
309
- models // Use specific models if provided
310
- );
311
- allResponses.push(...roundResponses);
312
- debateHistory += `\n\nRound ${round + 1}:\n` +
313
- roundResponses.map(r => `${r.persona}: ${r.content}`).join('\n\n');
746
+ formatToolResponse(result, verbose = false, paginationParams) {
747
+ logger.info(`🔧 DEBUG: formatToolResponse called with synthesis length: ${result.synthesis?.length || 0}`);
748
+ logger.info(`🔧 DEBUG: result.success=${result.success}, responses.length=${result.responses?.length || 0}`);
749
+ logger.info(`🔧 DEBUG: pagination params:`, paginationParams);
750
+ // Get the primary content to paginate
751
+ let primaryContent = '';
752
+ if (result.synthesis) {
753
+ primaryContent = result.synthesis;
754
+ logger.info(`🔧 DEBUG: Using synthesis content (${primaryContent.length} characters)`);
755
+ }
756
+ else if (result.responses) {
757
+ const successfulResponses = result.responses.filter(r => r.success);
758
+ if (successfulResponses.length > 0) {
759
+ primaryContent = successfulResponses.map(r => r.output).join('\n\n---\n\n');
760
+ logger.info(`🔧 DEBUG: Using raw CLI output (${primaryContent.length} characters)`);
761
+ }
762
+ }
763
+ // Handle pagination if params provided and content is substantial
764
+ if (paginationParams && primaryContent) {
765
+ return this.formatPaginatedResponse(primaryContent, paginationParams, result, verbose);
766
+ }
767
+ // Non-paginated response (legacy behavior)
768
+ if (primaryContent) {
769
+ return {
770
+ content: [{
771
+ type: "text",
772
+ text: primaryContent
773
+ }]
774
+ };
775
+ }
776
+ // Error handling - no successful content
777
+ let errorOutput = '';
778
+ if (result.responses) {
779
+ const failedResponses = result.responses.filter(r => !r.success);
780
+ if (failedResponses.length > 0) {
781
+ errorOutput = `❌ All CLI agents failed:\n` +
782
+ failedResponses.map(r => `- ${r.agent.toUpperCase()}: ${r.error}`).join('\n');
783
+ }
784
+ else {
785
+ errorOutput = '❌ No CLI responses available';
786
+ }
787
+ }
788
+ else {
789
+ errorOutput = '❌ No analysis results';
314
790
  }
315
791
  return {
316
- success: true,
317
- responses: allResponses,
318
- synthesis: this.synthesizeDebate(allResponses, rounds)
792
+ content: [{
793
+ type: "text",
794
+ text: errorOutput
795
+ }]
319
796
  };
320
797
  }
321
- synthesizeDebate(responses, rounds) {
322
- let synthesis = `# Adversarial Debate: ${rounds} Rounds\n\n`;
323
- const responsesByRound = [];
324
- const responsesPerRound = responses.length / rounds;
325
- for (let i = 0; i < rounds; i++) {
326
- const roundStart = i * responsesPerRound;
327
- const roundEnd = roundStart + responsesPerRound;
328
- responsesByRound.push(responses.slice(roundStart, roundEnd));
798
+ formatPaginatedResponse(content, paginationParams, result, verbose) {
799
+ // Using imported pagination utilities
800
+ const offset = paginationParams.offset || 0;
801
+ const limit = paginationParams.limit || PAGINATION_DEFAULTS.DEFAULT_LIMIT;
802
+ logger.info(`🔧 DEBUG: Paginating content - offset: ${offset}, limit: ${limit}, total: ${content.length}`);
803
+ // Simple character-based pagination for immediate Claude Code compatibility
804
+ const endOffset = Math.min(offset + limit, content.length);
805
+ const chunk = content.substring(offset, endOffset);
806
+ // Create pagination metadata
807
+ const pagination = createPaginationMetadata(content.length, paginationParams, limit);
808
+ const statusLine = formatPaginationStatus(pagination);
809
+ // Estimate token usage for user awareness
810
+ const chunkTokens = estimateTokenCount(chunk);
811
+ const totalTokens = estimateTokenCount(content);
812
+ // Format response with pagination info
813
+ let paginatedText = '';
814
+ // Add pagination header
815
+ paginatedText += `# Brutalist Analysis Results\n\n`;
816
+ paginatedText += `**📊 Pagination Status:** ${statusLine}\n`;
817
+ paginatedText += `**🔢 Token Estimate:** ~${chunkTokens.toLocaleString()} tokens (chunk) / ~${totalTokens.toLocaleString()} tokens (total)\n\n`;
818
+ if (pagination.hasMore) {
819
+ paginatedText += `**⏭️ Continue Reading:** Use \`offset: ${endOffset}\` for next chunk\n\n`;
329
820
  }
330
- responsesByRound.forEach((roundResponses, index) => {
331
- synthesis += `## Round ${index + 1}\n\n`;
332
- roundResponses.forEach(response => {
333
- synthesis += `**${response.persona}**: ${response.content}\n\n`;
334
- });
335
- });
336
- synthesis += `\n---\n\n**Debate Outcome**: `;
337
- synthesis += rounds > 1 ? "Arguments evolved through multiple rounds. " : "Single round analysis. ";
338
- synthesis += `${responses.length} total perspectives deployed.`;
339
- return synthesis;
340
- }
341
- formatToolResponse(result) {
821
+ paginatedText += `---\n\n`;
822
+ // Add the actual content chunk
823
+ paginatedText += chunk;
824
+ // Add footer for continuation
825
+ if (pagination.hasMore) {
826
+ paginatedText += `\n\n---\n\n`;
827
+ paginatedText += `📖 **End of chunk ${pagination.chunkIndex}/${pagination.totalChunks}**\n`;
828
+ paginatedText += `🔄 To continue: Use same tool with \`offset: ${endOffset}\``;
829
+ }
830
+ else {
831
+ paginatedText += `\n\n---\n\n`;
832
+ paginatedText += `✅ **Complete analysis shown** (${content.length.toLocaleString()} characters total)`;
833
+ }
834
+ // Add verbose execution details if requested
835
+ if (verbose && result.executionSummary) {
836
+ paginatedText += `\n\n### Execution Summary\n`;
837
+ paginatedText += `- **CLI Agents:** ${result.executionSummary.successfulCLIs}/${result.executionSummary.totalCLIs} successful\n`;
838
+ paginatedText += `- **Total Time:** ${result.executionSummary.totalExecutionTime}ms\n`;
839
+ if (result.executionSummary.selectedCLI) {
840
+ paginatedText += `- **Selected CLI:** ${result.executionSummary.selectedCLI}\n`;
841
+ }
842
+ }
843
+ logger.info(`🔧 DEBUG: Returning paginated chunk - ${chunk.length} chars (${chunkTokens} tokens)`);
342
844
  return {
343
845
  content: [{
344
846
  type: "text",
345
- text: result.synthesis || "No synthesis available"
847
+ text: paginatedText
346
848
  }]
347
849
  };
348
850
  }
349
851
  formatErrorResponse(error) {
350
852
  logger.error("Tool execution failed", error);
853
+ // Sanitize error message to prevent information leakage
854
+ let sanitizedMessage = "Analysis failed";
855
+ if (error instanceof Error) {
856
+ // Only expose safe, generic error types
857
+ if (error.message.includes('timeout') || error.message.includes('Timeout')) {
858
+ sanitizedMessage = "Analysis timed out - try reducing scope or increasing timeout";
859
+ }
860
+ else if (error.message.includes('ENOENT') || error.message.includes('no such file')) {
861
+ sanitizedMessage = "Target path not found";
862
+ }
863
+ else if (error.message.includes('EACCES') || error.message.includes('permission denied')) {
864
+ sanitizedMessage = "Permission denied - check file access";
865
+ }
866
+ else if (error.message.includes('No CLI agents available')) {
867
+ sanitizedMessage = "No CLI agents available for analysis";
868
+ }
869
+ else {
870
+ // Generic message for other errors to prevent path/info leakage
871
+ sanitizedMessage = "Analysis failed due to internal error";
872
+ }
873
+ }
351
874
  return {
352
875
  content: [{
353
876
  type: "text",
354
- text: `Brutalist MCP Error: ${error instanceof Error ? error.message : String(error)}`
877
+ text: `Brutalist MCP Error: ${sanitizedMessage}`
355
878
  }]
356
879
  };
357
880
  }