@brutalist/mcp 1.8.1 → 1.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +32 -0
  2. package/dist/brutalist-server.d.ts +31 -9
  3. package/dist/brutalist-server.d.ts.map +1 -1
  4. package/dist/brutalist-server.js +107 -673
  5. package/dist/brutalist-server.js.map +1 -1
  6. package/dist/cli-adapters/claude-adapter.d.ts +25 -0
  7. package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
  8. package/dist/cli-adapters/claude-adapter.js +245 -0
  9. package/dist/cli-adapters/claude-adapter.js.map +1 -0
  10. package/dist/cli-adapters/codex-adapter.d.ts +23 -0
  11. package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
  12. package/dist/cli-adapters/codex-adapter.js +173 -0
  13. package/dist/cli-adapters/codex-adapter.js.map +1 -0
  14. package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
  15. package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
  16. package/dist/cli-adapters/gemini-adapter.js +196 -0
  17. package/dist/cli-adapters/gemini-adapter.js.map +1 -0
  18. package/dist/cli-adapters/index.d.ts +75 -0
  19. package/dist/cli-adapters/index.d.ts.map +1 -0
  20. package/dist/cli-adapters/index.js +29 -0
  21. package/dist/cli-adapters/index.js.map +1 -0
  22. package/dist/cli-adapters/shared.d.ts +12 -0
  23. package/dist/cli-adapters/shared.d.ts.map +1 -0
  24. package/dist/cli-adapters/shared.js +99 -0
  25. package/dist/cli-adapters/shared.js.map +1 -0
  26. package/dist/cli-agents.d.ts +69 -2
  27. package/dist/cli-agents.d.ts.map +1 -1
  28. package/dist/cli-agents.js +358 -394
  29. package/dist/cli-agents.js.map +1 -1
  30. package/dist/debate/constitutional.d.ts +27 -0
  31. package/dist/debate/constitutional.d.ts.map +1 -0
  32. package/dist/debate/constitutional.js +74 -0
  33. package/dist/debate/constitutional.js.map +1 -0
  34. package/dist/debate/debate-orchestrator.d.ts +154 -0
  35. package/dist/debate/debate-orchestrator.d.ts.map +1 -0
  36. package/dist/debate/debate-orchestrator.js +699 -0
  37. package/dist/debate/debate-orchestrator.js.map +1 -0
  38. package/dist/debate/index.d.ts +18 -0
  39. package/dist/debate/index.d.ts.map +1 -0
  40. package/dist/debate/index.js +18 -0
  41. package/dist/debate/index.js.map +1 -0
  42. package/dist/debate/refusal-detection.d.ts +27 -0
  43. package/dist/debate/refusal-detection.d.ts.map +1 -0
  44. package/dist/debate/refusal-detection.js +62 -0
  45. package/dist/debate/refusal-detection.js.map +1 -0
  46. package/dist/debate/synthesis.d.ts +22 -0
  47. package/dist/debate/synthesis.d.ts.map +1 -0
  48. package/dist/debate/synthesis.js +117 -0
  49. package/dist/debate/synthesis.js.map +1 -0
  50. package/dist/logger.d.ts +204 -1
  51. package/dist/logger.d.ts.map +1 -1
  52. package/dist/logger.js +398 -18
  53. package/dist/logger.js.map +1 -1
  54. package/dist/metrics/counter.d.ts +24 -0
  55. package/dist/metrics/counter.d.ts.map +1 -0
  56. package/dist/metrics/counter.js +60 -0
  57. package/dist/metrics/counter.js.map +1 -0
  58. package/dist/metrics/histogram.d.ts +42 -0
  59. package/dist/metrics/histogram.d.ts.map +1 -0
  60. package/dist/metrics/histogram.js +114 -0
  61. package/dist/metrics/histogram.js.map +1 -0
  62. package/dist/metrics/index.d.ts +26 -0
  63. package/dist/metrics/index.d.ts.map +1 -0
  64. package/dist/metrics/index.js +22 -0
  65. package/dist/metrics/index.js.map +1 -0
  66. package/dist/metrics/registry.d.ts +96 -0
  67. package/dist/metrics/registry.d.ts.map +1 -0
  68. package/dist/metrics/registry.js +113 -0
  69. package/dist/metrics/registry.js.map +1 -0
  70. package/dist/metrics/safe-metric.d.ts +25 -0
  71. package/dist/metrics/safe-metric.d.ts.map +1 -0
  72. package/dist/metrics/safe-metric.js +41 -0
  73. package/dist/metrics/safe-metric.js.map +1 -0
  74. package/dist/metrics/types.d.ts +82 -0
  75. package/dist/metrics/types.d.ts.map +1 -0
  76. package/dist/metrics/types.js +121 -0
  77. package/dist/metrics/types.js.map +1 -0
  78. package/dist/registry/argument-spaces.d.ts.map +1 -1
  79. package/dist/registry/argument-spaces.js +20 -0
  80. package/dist/registry/argument-spaces.js.map +1 -1
  81. package/dist/registry/domains.d.ts.map +1 -1
  82. package/dist/registry/domains.js +17 -1
  83. package/dist/registry/domains.js.map +1 -1
  84. package/dist/streaming/circuit-breaker.d.ts +13 -1
  85. package/dist/streaming/circuit-breaker.d.ts.map +1 -1
  86. package/dist/streaming/circuit-breaker.js +13 -1
  87. package/dist/streaming/circuit-breaker.js.map +1 -1
  88. package/dist/streaming/intelligent-buffer.d.ts +13 -1
  89. package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
  90. package/dist/streaming/intelligent-buffer.js +13 -1
  91. package/dist/streaming/intelligent-buffer.js.map +1 -1
  92. package/dist/streaming/output-parser.d.ts +16 -2
  93. package/dist/streaming/output-parser.d.ts.map +1 -1
  94. package/dist/streaming/output-parser.js +16 -2
  95. package/dist/streaming/output-parser.js.map +1 -1
  96. package/dist/streaming/progress-tracker.d.ts +14 -1
  97. package/dist/streaming/progress-tracker.d.ts.map +1 -1
  98. package/dist/streaming/progress-tracker.js +14 -1
  99. package/dist/streaming/progress-tracker.js.map +1 -1
  100. package/dist/streaming/session-manager.d.ts +14 -1
  101. package/dist/streaming/session-manager.d.ts.map +1 -1
  102. package/dist/streaming/session-manager.js +14 -1
  103. package/dist/streaming/session-manager.js.map +1 -1
  104. package/dist/streaming/sse-transport.d.ts +12 -1
  105. package/dist/streaming/sse-transport.d.ts.map +1 -1
  106. package/dist/streaming/sse-transport.js +12 -1
  107. package/dist/streaming/sse-transport.js.map +1 -1
  108. package/dist/streaming/streaming-orchestrator.d.ts +15 -1
  109. package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
  110. package/dist/streaming/streaming-orchestrator.js +15 -1
  111. package/dist/streaming/streaming-orchestrator.js.map +1 -1
  112. package/dist/system-prompts.d.ts.map +1 -1
  113. package/dist/system-prompts.js +490 -4
  114. package/dist/system-prompts.js.map +1 -1
  115. package/dist/tool-definitions-generated.d.ts.map +1 -1
  116. package/dist/tool-definitions-generated.js +3 -1
  117. package/dist/tool-definitions-generated.js.map +1 -1
  118. package/package.json +1 -1
@@ -3,7 +3,36 @@ import { promises as fs, realpathSync } from 'fs';
3
3
  import { promisify } from 'util';
4
4
  import { logger } from './logger.js';
5
5
  import { ModelResolver } from './model-resolver.js';
6
- import { resolveServers, writeClaudeMCPConfig, cleanupTempConfig, buildCodexMCPOverride, ensureGeminiMCPServers, ensurePlaywrightBrowsers, } from './mcp-registry.js';
6
+ import { cleanupTempConfig, } from './mcp-registry.js';
7
+ import { getProvider, parseNDJSON } from './cli-adapters/index.js';
8
+ import { GEMINI_FRONTIER_CHAIN } from './cli-adapters/gemini-adapter.js';
9
+ import { safeMetric } from './metrics/index.js';
10
+ /**
11
+ * Detect errors where rotating to the next Gemini frontier tier is likely
12
+ * to succeed. Covers two failure families:
13
+ *
14
+ * 1. Capacity saturation on the current tier
15
+ * (429 / "No capacity available" / quota / rate-limit).
16
+ *
17
+ * 2. Access denial on the current tier — the model exists but the
18
+ * user's account lacks preview-tier access. Appears as
19
+ * ModelNotFoundError / "Requested entity was not found" / 403 /
20
+ * "permission denied". In production the frontier chain is
21
+ * probe-tested (not user-typos), so these errors mean "this tier
22
+ * is unavailable to THIS caller" — which is exactly when rotation
23
+ * to the next tier should fire. Dropping down from preview tiers
24
+ * to the universally-available `gemini-2.5-pro` is the entire
25
+ * point of the chain.
26
+ *
27
+ * Does NOT match: auth failures (missing/invalid API key), prompt-safety
28
+ * rejections, or subprocess crashes — these will not differ between
29
+ * frontier tiers.
30
+ */
31
+ function isGeminiRotatableError(error) {
32
+ if (!error)
33
+ return false;
34
+ return /no capacity available|\b429\b|overloaded|rateLimitExceeded|rate limit|quota|too many requests|ModelNotFoundError|Requested entity was not found|\b403\b|permission denied|access denied/i.test(error);
35
+ }
7
36
  // Configurable timeouts and limits
8
37
  const DEFAULT_TIMEOUT = parseInt(process.env.BRUTALIST_TIMEOUT || '1800000', 10); // 30 minutes default
9
38
  const CLI_CHECK_TIMEOUT = parseInt(process.env.BRUTALIST_CLI_CHECK_TIMEOUT || '5000', 10); // 5 seconds for CLI checks
@@ -239,6 +268,19 @@ async function spawnAsync(command, args, options = {}) {
239
268
  spawnCommand = command;
240
269
  spawnArgs = args;
241
270
  }
271
+ // Fires only after all pre-spawn validators (command, args, cwd)
272
+ // pass. Callers gate their `spawned` flag on this callback so
273
+ // invalid-command / invalid-args / invalid-cwd rejects do NOT count
274
+ // as spawn outcomes in `brutalist_cli_spawn_total`
275
+ // (Cycle 3 Task CLI-C'). Wrapped in try/catch because a throw from
276
+ // the user-supplied callback must not abort the spawn itself.
277
+ try {
278
+ options.onBeforeSpawn?.();
279
+ }
280
+ catch {
281
+ // Swallow — this hook is diagnostic only; failures here must not
282
+ // prevent the spawn from proceeding.
283
+ }
242
284
  const child = spawn(spawnCommand, spawnArgs, {
243
285
  cwd: cwd,
244
286
  stdio: ['pipe', 'pipe', 'pipe'],
@@ -399,58 +441,6 @@ async function spawnAsync(command, args, options = {}) {
399
441
  }
400
442
  });
401
443
  }
402
- const CLI_BUILDER_CONFIGS = {
403
- claude: {
404
- command: 'claude',
405
- defaultArgs: ['--print'],
406
- modelArgName: '--model',
407
- mpcEnvCleanup: ['CLAUDE_MCP_CONFIG', 'MCP_ENABLED', 'CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'],
408
- streamingArgs: () => ['--output-format', 'stream-json', '--verbose'],
409
- mcpSupport: {
410
- configMethod: 'flag-file',
411
- configFlag: '--mcp-config',
412
- strictFlag: '--strict-mcp-config',
413
- writeProtection: {
414
- method: 'disallowed-tools',
415
- flag: '--disallowedTools',
416
- value: 'Edit,Write,NotebookEdit',
417
- },
418
- },
419
- },
420
- codex: {
421
- command: 'codex',
422
- defaultArgs: ['exec', '--sandbox', 'read-only', '--skip-git-repo-check'],
423
- modelArgName: '--model',
424
- jsonFlag: '--json',
425
- mpcEnvCleanup: ['CODEX_MCP_CONFIG', 'MCP_ENABLED'],
426
- promptWrapper: (sys, user) => `${sys}\n\n${user}\n\nUse your shell tools to read files (cat, ls, find, grep, head, etc.) and analyze the codebase. You ARE allowed to run read-only commands. Explore the directory structure, read relevant source files, and provide a comprehensive brutal analysis based on what you find.`,
427
- mcpSupport: {
428
- configMethod: 'config-override',
429
- configOverrideKey: 'mcp_servers',
430
- writeProtection: {
431
- method: 'sandbox',
432
- flag: '--sandbox',
433
- value: 'read-only', // already in defaultArgs
434
- },
435
- },
436
- },
437
- gemini: {
438
- command: 'gemini',
439
- defaultArgs: ['--output-format', 'json'],
440
- modelArgName: '--model',
441
- envExtras: { TERM: 'dumb', NO_COLOR: '1', CI: 'true' },
442
- mpcEnvCleanup: ['GEMINI_MCP_CONFIG', 'MCP_ENABLED'],
443
- mcpSupport: {
444
- configMethod: 'server-whitelist',
445
- whitelistFlag: '--allowed-mcp-server-names',
446
- writeProtection: {
447
- method: 'approval-mode',
448
- flag: '--approval-mode',
449
- value: 'plan',
450
- },
451
- },
452
- },
453
- };
454
444
  export class CLIAgentOrchestrator {
455
445
  defaultTimeout = 1800000; // 30 minutes - complex codebases need time
456
446
  defaultWorkingDir = process.cwd();
@@ -462,217 +452,94 @@ export class CLIAgentOrchestrator {
462
452
  MAX_CONCURRENT_CLIS = MAX_CONCURRENT_CLIS; // Configurable concurrency limit
463
453
  // Runtime model discovery
464
454
  modelResolver;
455
+ // Optional observability deps — injected at the composition root in
456
+ // production; absent (undefined) in test harnesses that construct
457
+ // `new CLIAgentOrchestrator()` with no args. Instrumentation is a no-op
458
+ // when these are undefined, via `this.metrics?.*` and `emitLog()` fallback.
459
+ metrics;
460
+ log;
465
461
  // Streaming throttle properties
466
462
  streamingBuffers = new Map();
467
463
  STREAMING_FLUSH_INTERVAL = 200; // 200ms
468
464
  MAX_CHUNK_SIZE = 2048; // 2KB per event
469
465
  HEARTBEAT_INTERVAL = 5000; // 5s between progress heartbeats
470
466
  lastHeartbeat = 0;
471
- constructor(modelResolver) {
472
- this.modelResolver = modelResolver || new ModelResolver();
473
- // Log configuration at startup
474
- logger.info(`🔧 Brutalist MCP Configuration:`);
475
- logger.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
476
- logger.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
477
- logger.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
478
- logger.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
467
+ /**
468
+ * Accepts a deps bag OR a bare `ModelResolver` (legacy positional form)
469
+ * OR nothing (characterization-test harnesses). The `instanceof ModelResolver`
470
+ * branch preserves the pre-observability signature.
471
+ */
472
+ constructor(deps) {
473
+ const bag = deps instanceof ModelResolver
474
+ ? { modelResolver: deps }
475
+ : (deps || {});
476
+ this.modelResolver = bag.modelResolver || new ModelResolver();
477
+ this.metrics = bag.metrics;
478
+ this.log = bag.log;
479
+ // Log configuration at startup (via emitLog — falls back to root logger
480
+ // when no scoped log was injected).
481
+ const bootLog = this.emitLog();
482
+ bootLog.info(`🔧 Brutalist MCP Configuration:`);
483
+ bootLog.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
484
+ bootLog.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
485
+ bootLog.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
486
+ bootLog.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
479
487
  // Detect CLI context and discover models at startup
480
488
  Promise.all([
481
489
  this.detectCLIContext(),
482
490
  this.modelResolver.initialize(),
483
491
  ]).catch(error => {
484
- logger.error("Failed startup detection:", error);
492
+ this.emitLog().error("Failed startup detection:", error);
485
493
  });
486
494
  }
487
- // Parse NDJSON with proper JSON boundary detection
488
- // Handles JSON objects that contain embedded newlines without data loss
495
+ /**
496
+ * Return the injected scoped logger if present, otherwise the root
497
+ * logger singleton. Keeps un-injected (test) instances working while
498
+ * scoping production emissions with `module='cli-orchestrator'`.
499
+ */
500
+ emitLog() {
501
+ return this.log ?? logger;
502
+ }
503
+ /**
504
+ * Heuristic for classifying a spawnAsync error as a timeout.
505
+ * Centralized so all outcome paths share the same detection logic.
506
+ *
507
+ * Matches any of:
508
+ * - execError.code === 'ETIMEDOUT' (Node's timeout code on some paths)
509
+ * - execError.killed === true (child_process kill after SIGTERM/SIGKILL
510
+ * escalation when the timeout timer fired — see spawnAsync timer block)
511
+ * - execError.message matching /timed out|timeout/i (spawnAsync rejects
512
+ * with "Command timed out after ..." on timer expiry)
513
+ */
514
+ isTimeoutError(err) {
515
+ if (!err || typeof err !== 'object')
516
+ return false;
517
+ const e = err;
518
+ if (e.code === 'ETIMEDOUT')
519
+ return true;
520
+ if (e.killed === true)
521
+ return true;
522
+ if (typeof e.message === 'string' && /timed out|timeout/i.test(e.message))
523
+ return true;
524
+ return false;
525
+ }
526
+ // Proxy methods for backward compatibility — characterization tests
527
+ // access these via (orchestrator as any).methodName().
528
+ // Implementation lives in src/cli-adapters/.
489
529
  parseNDJSON(input) {
490
- if (!input || !input.trim()) {
491
- return [];
492
- }
493
- const results = [];
494
- let depth = 0;
495
- let inString = false;
496
- let escape = false;
497
- let start = 0;
498
- for (let i = 0; i < input.length; i++) {
499
- const char = input[i];
500
- // Handle escape sequences
501
- if (escape) {
502
- escape = false;
503
- continue;
504
- }
505
- if (char === '\\') {
506
- escape = true;
507
- continue;
508
- }
509
- // Track string boundaries
510
- if (char === '"') {
511
- inString = !inString;
512
- continue;
513
- }
514
- // Only count braces/brackets outside of strings
515
- if (inString)
516
- continue;
517
- // Track depth
518
- if (char === '{' || char === '[') {
519
- depth++;
520
- }
521
- else if (char === '}' || char === ']') {
522
- depth--;
523
- // When depth returns to 0, we've found a complete JSON object
524
- if (depth === 0) {
525
- const jsonStr = input.slice(start, i + 1).trim();
526
- if (jsonStr) {
527
- try {
528
- const parsed = JSON.parse(jsonStr);
529
- results.push(parsed);
530
- }
531
- catch (e) {
532
- // Log unparseable segments (not silent)
533
- logger.warn(`Failed to parse JSON segment at position ${start}-${i + 1}:`, {
534
- preview: jsonStr.substring(0, 100),
535
- error: e instanceof Error ? e.message : String(e)
536
- });
537
- }
538
- }
539
- // Move start pointer past this object and any whitespace
540
- start = i + 1;
541
- while (start < input.length && /\s/.test(input[start])) {
542
- start++;
543
- }
544
- i = start - 1; // Will be incremented by loop
545
- }
546
- }
547
- }
548
- // Warn about incomplete JSON at end of input
549
- if (start < input.length) {
550
- const remaining = input.slice(start).trim();
551
- if (remaining) {
552
- logger.warn(`Incomplete JSON at end of input:`, {
553
- preview: remaining.substring(0, 100)
554
- });
555
- }
556
- }
557
- return results;
530
+ return parseNDJSON(input);
558
531
  }
559
- // Decode Claude's stream-json NDJSON output into plain text.
560
- // Extracts text content blocks from all 'assistant' events across all turns.
561
- // Skips system events, user events (tool results with raw file contents), and
562
- // tool_use content blocks within assistant events.
563
- // Falls back to 'result' event if no assistant text was captured.
564
532
  decodeClaudeStreamJson(ndjsonOutput) {
565
- if (!ndjsonOutput || !ndjsonOutput.trim()) {
566
- logger.warn('decodeClaudeStreamJson: empty input');
567
- return '';
568
- }
569
- const events = this.parseNDJSON(ndjsonOutput);
570
- if (events.length === 0) {
571
- logger.warn('decodeClaudeStreamJson: no valid JSON events found in output');
572
- return '';
573
- }
574
- const textParts = [];
575
- let resultText = '';
576
- let resultError = '';
577
- for (const event of events) {
578
- if (typeof event !== 'object' || event === null)
579
- continue;
580
- const typedEvent = event;
581
- if (typedEvent.type === 'assistant' && typedEvent.message?.content) {
582
- // Extract only text blocks from assistant messages (skip tool_use blocks)
583
- const content = typedEvent.message.content;
584
- if (Array.isArray(content)) {
585
- for (const item of content) {
586
- if (item.type === 'text' && item.text) {
587
- textParts.push(item.text);
588
- }
589
- }
590
- }
591
- }
592
- else if (typedEvent.type === 'result') {
593
- if (typedEvent.subtype === 'error' || typedEvent.is_error) {
594
- resultError = typedEvent.error || typedEvent.result || 'Unknown error';
595
- }
596
- else if (typedEvent.result) {
597
- resultText = typedEvent.result;
598
- }
599
- }
600
- // Skip: system, user (tool_result with raw file contents), hooks
601
- }
602
- // Handle error
603
- if (resultError) {
604
- logger.error('decodeClaudeStreamJson: Claude returned error result', { error: resultError });
605
- return `[Claude Error] ${resultError}`;
606
- }
607
- // Use accumulated assistant text if available, fall back to result event
608
- if (textParts.length > 0) {
609
- return textParts.join('\n\n');
610
- }
611
- if (resultText) {
612
- return resultText;
613
- }
614
- logger.warn('decodeClaudeStreamJson: no text content found in stream-json output', {
615
- eventCount: events.length,
616
- eventTypes: events.map(e => e.type).filter(Boolean)
617
- });
618
- return '';
533
+ const provider = getProvider('claude');
534
+ return provider.decodeOutput(ndjsonOutput, ['--output-format', 'stream-json']);
619
535
  }
620
- // Extract only the agent messages from Codex JSON output (no thinking, no file reads, no commands)
621
536
  extractCodexAgentMessage(jsonOutput) {
622
- if (!jsonOutput || !jsonOutput.trim()) {
623
- logger.debug('extractCodexAgentMessage: empty input');
624
- return '';
625
- }
626
- const agentMessages = [];
627
- const events = this.parseNDJSON(jsonOutput);
628
- logger.debug(`extractCodexAgentMessage: processing ${events.length} JSON events`);
629
- for (const event of events) {
630
- if (typeof event !== 'object' || event === null)
631
- continue;
632
- const typedEvent = event;
633
- logger.debug(`extractCodexAgentMessage: parsed event type=${typedEvent.type}, item.type=${typedEvent.item?.type}`);
634
- // Codex --json outputs events with structure: {"type":"item.completed","item":{...}}
635
- // Only extract agent_message type - this is the actual response
636
- if (typedEvent.type === 'item.completed' && typedEvent.item) {
637
- if (typedEvent.item.type === 'agent_message' && typedEvent.item.text) {
638
- // Agent's actual response text
639
- logger.info(`✅ extractCodexAgentMessage: found agent_message with ${typedEvent.item.text.length} chars`);
640
- agentMessages.push(typedEvent.item.text);
641
- }
642
- // Skip all other types:
643
- // - reasoning: internal thinking steps
644
- // - command_execution: file reads, bash commands
645
- // - error: will be in stderr
646
- }
647
- }
648
- const result = agentMessages.join('\n\n').trim();
649
- logger.info(`extractCodexAgentMessage: extracted ${agentMessages.length} messages, total ${result.length} chars`);
650
- return result;
537
+ const provider = getProvider('codex');
538
+ return provider.decodeOutput(jsonOutput, ['--json']);
651
539
  }
652
- // Extract response text from Gemini --output-format json output
653
540
  extractGeminiResponse(jsonOutput) {
654
- if (!jsonOutput || !jsonOutput.trim()) {
655
- logger.debug('extractGeminiResponse: empty input');
656
- return '';
657
- }
658
- try {
659
- const parsed = JSON.parse(jsonOutput);
660
- if (parsed.response && typeof parsed.response === 'string') {
661
- logger.info(`✅ extractGeminiResponse: extracted response with ${parsed.response.length} chars`);
662
- return parsed.response;
663
- }
664
- logger.warn('extractGeminiResponse: no response field in JSON output', {
665
- keys: Object.keys(parsed)
666
- });
667
- return '';
668
- }
669
- catch (e) {
670
- logger.warn('extractGeminiResponse: failed to parse JSON, returning raw output', {
671
- error: e instanceof Error ? e.message : String(e),
672
- preview: jsonOutput.substring(0, 200)
673
- });
674
- return '';
675
- }
541
+ const provider = getProvider('gemini');
542
+ return provider.decodeOutput(jsonOutput, ['--output-format', 'json']);
676
543
  }
677
544
  emitThrottledStreamingEvent(agent, type, content, onStreamingEvent, options) {
678
545
  if (!onStreamingEvent)
@@ -719,98 +586,23 @@ export class CLIAgentOrchestrator {
719
586
  buffer.lastFlush = now;
720
587
  }
721
588
  }
589
+ // Proxy: delegates to per-provider adapter via getProvider()
722
590
  async buildCLICommand(cli, userPrompt, systemPrompt, options) {
723
- const config = CLI_BUILDER_CONFIGS[cli];
724
- const mcpEnabled = options.mcpServers && options.mcpServers.length > 0;
725
- // Build args
726
- const args = [...config.defaultArgs];
727
- const resolvedModel = this.modelResolver.resolveModel(cli, options.models?.[cli]);
728
- if (resolvedModel) {
729
- args.push(config.modelArgName, resolvedModel);
730
- }
731
- if (config.jsonFlag && process.env.CODEX_USE_JSON !== 'false') {
732
- args.push(config.jsonFlag);
733
- }
734
- if (config.streamingArgs) {
735
- args.push(...config.streamingArgs(options));
736
- }
737
- // ── MCP configuration ────────────────────────────────────────────────
738
- let tempMcpConfigPath;
739
- if (mcpEnabled && config.mcpSupport) {
740
- const servers = resolveServers(options.mcpServers);
741
- const serverNames = Object.keys(servers);
742
- // Auto-install Playwright browsers if playwright is requested
743
- if (servers.playwright) {
744
- await ensurePlaywrightBrowsers();
745
- }
746
- if (serverNames.length > 0) {
747
- const mcp = config.mcpSupport;
748
- switch (mcp.configMethod) {
749
- case 'flag-file': {
750
- // Claude: write temp JSON config, pass --mcp-config <path> --strict-mcp-config
751
- const sessionId = options.sessionId || 'default';
752
- tempMcpConfigPath = await writeClaudeMCPConfig(servers, sessionId);
753
- args.push(mcp.configFlag, tempMcpConfigPath);
754
- args.push(mcp.strictFlag);
755
- // Hard deny on write tools
756
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
757
- // Non-interactive MCP tool use requires permission bypass
758
- args.push('--permission-mode', 'bypassPermissions');
759
- break;
760
- }
761
- case 'config-override': {
762
- // Codex: -c 'mcp_servers={...}' — replaces all configured servers (excludes brutalist)
763
- const tomlOverride = buildCodexMCPOverride(servers);
764
- args.push('-c', `${mcp.configOverrideKey}=${tomlOverride}`);
765
- // Write protection already in defaultArgs (--sandbox read-only)
766
- break;
767
- }
768
- case 'server-whitelist': {
769
- // Gemini: --allowed-mcp-server-names <names> --approval-mode plan
770
- await ensureGeminiMCPServers(servers);
771
- args.push(mcp.whitelistFlag, ...serverNames);
772
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
773
- break;
774
- }
775
- }
776
- logger.info(`🔌 MCP enabled for ${cli}: [${serverNames.join(', ')}]`);
777
- }
778
- }
779
- // Build prompt — skip CLI-specific wrapper in debate mode (prevents Codex
780
- // from exploring the brutalist repo and reading its own control prompts)
781
- const combinedPrompt = (config.promptWrapper && !options.debateMode)
782
- ? config.promptWrapper(systemPrompt, userPrompt)
783
- : `${systemPrompt}\n\n${userPrompt}`;
784
- // Build secure env
591
+ const provider = getProvider(cli);
785
592
  const secureEnv = createSecureEnvironment();
786
- // Add CLI-specific env extras
787
- if (config.envExtras) {
788
- Object.assign(secureEnv, config.envExtras);
789
- }
790
- // Add required API key
791
- const apiKeyMap = {
792
- claude: ['ANTHROPIC_API_KEY'],
793
- codex: ['OPENAI_API_KEY'],
794
- gemini: ['GOOGLE_API_KEY', 'GEMINI_API_KEY']
795
- };
796
- for (const key of apiKeyMap[cli]) {
797
- if (process.env[key])
798
- secureEnv[key] = process.env[key];
799
- }
800
- // Clean up MPC env vars that could cause deadlock — SKIP when MCP is enabled
801
- // (the per-CLI config above already isolates to only the requested servers)
802
- if (!mcpEnabled && config.mpcEnvCleanup) {
803
- for (const envVar of config.mpcEnvCleanup) {
804
- delete secureEnv[envVar];
805
- }
806
- }
807
- secureEnv.BRUTALIST_SUBPROCESS = '1';
808
- return { command: config.command, args, input: combinedPrompt, env: secureEnv, tempMcpConfigPath };
593
+ // Pattern A: thread the scoped logger into the adapter via CLIAgentOptions.log.
594
+ // The adapter reads options.log?.forOperation('<cli>_spawn') and falls back to
595
+ // the root logger import if absent. A caller-supplied options.log wins so a
596
+ // test or an upstream operation can override the per-orchestrator default.
597
+ const perCliOp = `${cli}_spawn`;
598
+ const adapterLog = options.log ?? this.log?.forOperation(perCliOp);
599
+ const optionsWithLog = adapterLog && options.log === undefined ? { ...options, log: adapterLog } : options;
600
+ return provider.buildCommand(userPrompt, systemPrompt, optionsWithLog, this.modelResolver, secureEnv);
809
601
  }
810
602
  async detectCLIContext() {
811
603
  // Return cached context if still valid
812
604
  if (this.cliContextCached && Date.now() - this.cliContextCacheTime < this.CLI_CACHE_TTL) {
813
- logger.debug('Using cached CLI context');
605
+ this.emitLog().debug('Using cached CLI context');
814
606
  return this.cliContext;
815
607
  }
816
608
  const availableCLIs = [];
@@ -820,14 +612,16 @@ export class CLIAgentOrchestrator {
820
612
  { name: 'codex', command: 'codex --version' },
821
613
  { name: 'gemini', command: 'gemini --version' }
822
614
  ];
615
+ // NOTE: These `--version` probes are NOT spawn attempts — they must not
616
+ // increment `cliSpawnTotal`. Only _executeCLI counts spawns.
823
617
  const results = await Promise.allSettled(cliChecks.map(async (check) => {
824
618
  try {
825
619
  await spawnAsync(check.name, ['--version'], { timeout: CLI_CHECK_TIMEOUT });
826
- logger.debug(`CLI available: ${check.name}`);
620
+ this.emitLog().debug(`CLI available: ${check.name}`);
827
621
  return check.name;
828
622
  }
829
623
  catch (error) {
830
- logger.debug(`CLI not available: ${check.name}`);
624
+ this.emitLog().debug(`CLI not available: ${check.name}`);
831
625
  return null;
832
626
  }
833
627
  }));
@@ -843,7 +637,7 @@ export class CLIAgentOrchestrator {
843
637
  selectSingleCLI(preferredCLI, analysisType) {
844
638
  // 1. Honor explicit preference if available
845
639
  if (preferredCLI && this.cliContext.availableCLIs.includes(preferredCLI)) {
846
- logger.info(`✅ Using preferred CLI: ${preferredCLI}`);
640
+ this.emitLog().info(`✅ Using preferred CLI: ${preferredCLI}`);
847
641
  return preferredCLI;
848
642
  }
849
643
  // 2. Smart selection based on analysis type
@@ -863,7 +657,7 @@ export class CLIAgentOrchestrator {
863
657
  // 3. Select by priority from available CLIs
864
658
  for (const cli of priority) {
865
659
  if (this.cliContext.availableCLIs.includes(cli)) {
866
- logger.info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
660
+ this.emitLog().info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
867
661
  return cli;
868
662
  }
869
663
  }
@@ -871,7 +665,7 @@ export class CLIAgentOrchestrator {
871
665
  if (this.cliContext.availableCLIs.length === 0) {
872
666
  throw new Error('No CLI agents available');
873
667
  }
874
- logger.warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
668
+ this.emitLog().warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
875
669
  return this.cliContext.availableCLIs[0];
876
670
  }
877
671
  async _executeCLI(cliName, userPrompt, systemPromptSpec, options = {}, commandBuilder) {
@@ -879,9 +673,22 @@ export class CLIAgentOrchestrator {
879
673
  const workingDir = options.workingDirectory || this.defaultWorkingDir;
880
674
  const timeout = options.timeout || this.defaultTimeout;
881
675
  let tempMcpConfigPath;
676
+ // Provider label for the spawn counter. Derived from cliName so the
677
+ // label set stays in sync with the 'claude' | 'codex' | 'gemini' union
678
+ // instead of reading adapter.name.
679
+ const provider = cliName;
680
+ // Gate for the catch-branch counter emission. Per compose.py:174,
681
+ // pre-spawn paths (commandBuilder throwing before spawnAsync is
682
+ // invoked, or spawnAsync's own pre-spawn validators for
683
+ // command/args/cwd rejecting) do NOT represent a spawn attempt and
684
+ // must not increment the counter. Cycle 3 Task CLI-C' tightened
685
+ // the semantics: `spawned` is now flipped inside spawnAsync via the
686
+ // `onBeforeSpawn` callback, which fires only after all pre-spawn
687
+ // validators pass and immediately before `child_process.spawn()`.
688
+ let spawned = false;
882
689
  try {
883
- logger.info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
884
- logger.debug(`${cliName.toUpperCase()} prompt`, { prompt: userPrompt.substring(0, 100) });
690
+ this.emitLog().info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
691
+ this.emitLog().debug(`${cliName.toUpperCase()} prompt`, { promptLength: userPrompt.length });
885
692
  // Emit agent start event
886
693
  if (options.onStreamingEvent) {
887
694
  options.onStreamingEvent({
@@ -895,33 +702,57 @@ export class CLIAgentOrchestrator {
895
702
  const built = await commandBuilder(userPrompt, systemPromptSpec, options);
896
703
  const { command, args, env, input } = built;
897
704
  tempMcpConfigPath = built.tempMcpConfigPath;
898
- logger.info(`📋 Command: ${command} ${args.join(' ')}`);
899
- logger.info(`📁 Working directory: ${workingDir}`);
900
- logger.info(`⏱️ Timeout: ${timeout}ms`);
705
+ // Cycle 4 Task T18 (F9 — security): do NOT log raw command +
706
+ // joined args. The args array can contain caller-controlled
707
+ // content that crossed the trust boundary (Codex `-c
708
+ // mcp_servers=<TOML>` override content, Claude `--mcp-config
709
+ // <temp-path>`, prompt fragments for CLIs that accept inline
710
+ // prompt). Log only bounded metadata — cliName for provider
711
+ // identification, argCount for diagnostic shape, and
712
+ // hasMcpConfig so operators can correlate MCP-enabled spawns
713
+ // with MCP registry entries.
714
+ const hasMcpConfig = !!(options.mcpServers && options.mcpServers.length > 0);
715
+ this.emitLog().info('CLI spawn preparing', {
716
+ cliName,
717
+ argCount: args.length,
718
+ hasMcpConfig,
719
+ });
720
+ this.emitLog().info(`📁 Working directory: ${workingDir}`);
721
+ this.emitLog().info(`⏱️ Timeout: ${timeout}ms`);
901
722
  if (input) {
902
- logger.info(`📝 Using stdin for prompt (${input.length} characters)`);
723
+ this.emitLog().info(`📝 Using stdin for prompt (${input.length} characters)`);
903
724
  }
725
+ // `spawned` is flipped by spawnAsync's `onBeforeSpawn` callback
726
+ // immediately before `child_process.spawn()`. This means
727
+ // pre-spawn validator rejects inside spawnAsync (invalid command,
728
+ // invalid args, invalid cwd) leave `spawned === false` so the
729
+ // catch-branch counter does NOT fire for those paths
730
+ // (Cycle 3 Task CLI-C').
904
731
  const { stdout, stderr } = await spawnAsync(command, args, {
905
732
  cwd: workingDir,
906
733
  timeout: timeout,
907
734
  maxBuffer: MAX_BUFFER_SIZE, // Configurable buffer for model outputs
908
735
  env: env,
909
736
  input: input,
737
+ onBeforeSpawn: () => { spawned = true; },
910
738
  onProgress: (chunk, type) => {
911
- // Stream output in real-time with agent identification
739
+ // Stream output in real-time with agent identification.
740
+ // Log payloads are length-only at debug level — raw chunk text is
741
+ // NEVER emitted to the logger to avoid leaking prompt / response
742
+ // content through log aggregators. Streaming events are Layer 2.
912
743
  if (type === 'stdout' && chunk.trim()) {
913
- logger.info(`🤖 ${cliName.toUpperCase()}: ${chunk.trim()}`);
744
+ this.emitLog().debug(`${cliName.toUpperCase()} stdout chunk received`, { bytes: chunk.length });
914
745
  // Emit throttled streaming event for real-time updates
915
746
  this.emitThrottledStreamingEvent(cliName, 'agent_progress', chunk.trim(), options.onStreamingEvent, options);
916
747
  }
917
748
  else if (type === 'stderr' && chunk.trim()) {
918
- logger.warn(`⚠️ ${cliName.toUpperCase()} stderr: ${chunk.trim()}`);
749
+ this.emitLog().debug(`${cliName.toUpperCase()} stderr chunk received`, { bytes: chunk.length });
919
750
  // Emit throttled error streaming event
920
751
  this.emitThrottledStreamingEvent(cliName, 'agent_error', chunk.trim(), options.onStreamingEvent, options);
921
752
  }
922
753
  }
923
754
  });
924
- logger.info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
755
+ this.emitLog().info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
925
756
  // Emit completion event
926
757
  if (options.onStreamingEvent) {
927
758
  options.onStreamingEvent({
@@ -932,33 +763,20 @@ export class CLIAgentOrchestrator {
932
763
  sessionId: options.sessionId
933
764
  });
934
765
  }
935
- // Post-process CLI output if needed
766
+ // Post-process CLI output via provider adapter. Thread the scoped
767
+ // logger through decodeOutput so adapter warnings/errors carry
768
+ // module=cli-orchestrator + operation=<provider>_spawn context.
936
769
  let finalOutput = stdout;
937
- // If Claude was run with stream-json format, decode the NDJSON to extract text
938
- if (cliName === 'claude' && args.includes('--output-format') && args.includes('stream-json')) {
939
- const decodedText = this.decodeClaudeStreamJson(stdout);
940
- if (decodedText) {
941
- finalOutput = decodedText;
942
- }
943
- }
944
- // If Codex was run with --json flag, extract only the agent messages
945
- if (cliName === 'codex' && args.includes('--json')) {
946
- const decodedText = this.extractCodexAgentMessage(stdout);
947
- if (decodedText) {
948
- finalOutput = decodedText;
949
- }
950
- }
951
- // If Gemini was run with --output-format json, extract the response field
952
- if (cliName === 'gemini' && args.includes('--output-format') && args.includes('json')) {
953
- const decodedText = this.extractGeminiResponse(stdout);
954
- if (decodedText) {
955
- finalOutput = decodedText;
956
- }
770
+ const providerAdapter = getProvider(cliName);
771
+ const decodeLog = this.log?.forOperation(`${cliName}_spawn`);
772
+ const decodedText = providerAdapter.decodeOutput(stdout, args, decodeLog);
773
+ if (decodedText) {
774
+ finalOutput = decodedText;
957
775
  }
958
776
  // Fallback: If stdout is empty but stderr has content and exit was successful,
959
777
  // Claude might have written to stderr (common in non-TTY environments)
960
778
  if (!finalOutput.trim() && stderr && stderr.trim()) {
961
- logger.info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
779
+ this.emitLog().info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
962
780
  finalOutput = stderr;
963
781
  }
964
782
  // Detect CLI errors that exit 0 but contain fatal error output
@@ -985,7 +803,7 @@ export class CLIAgentOrchestrator {
985
803
  const resetMatch = combinedOutput.match(/reset(?:s)? (?:in|after) (\d+h\s*\d+m(?:\s*\d+s)?)/i);
986
804
  const resetInfo = resetMatch ? ` (resets in ${resetMatch[1]})` : '';
987
805
  const errorMsg = `${cliName.toUpperCase()} quota exhausted${resetInfo}. The CLI exited 0 but returned a quota error instead of analysis output.`;
988
- logger.warn(`⏱️ ${errorMsg}`);
806
+ this.emitLog().warn(`⏱️ ${errorMsg}`);
989
807
  if (options.onStreamingEvent) {
990
808
  options.onStreamingEvent({
991
809
  type: 'agent_error',
@@ -995,24 +813,64 @@ export class CLIAgentOrchestrator {
995
813
  sessionId: options.sessionId
996
814
  });
997
815
  }
816
+ // Spawn counter: outcome=refused (quota exhaustion — CLI exited 0
817
+ // with a quota error in stdout/stderr). Labels annotated against
818
+ // CLI_SPAWN_LABELS so a future label-set change fails at compile
819
+ // time. Wrapped in `safeMetric` so a label-validation throw or
820
+ // other metric-layer exception cannot propagate into the outer
821
+ // spawn try/catch and be misclassified as a spawn failure
822
+ // (Cycle 3 Task CLI-B' — parity with debate's safeMetric).
823
+ const quotaLabels = {
824
+ provider,
825
+ outcome: 'refused',
826
+ };
827
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(refused:quota)', () => {
828
+ this.metrics?.cliSpawnTotal.inc(quotaLabels, 1);
829
+ });
998
830
  return {
999
831
  agent: cliName,
1000
832
  success: false,
1001
833
  output: '',
1002
834
  error: errorMsg,
1003
835
  executionTime: Date.now() - startTime,
1004
- command: `${command} ${args.join(' ')}`,
836
+ // Cycle 4 Task T18 (F9): match the failure-path redaction
837
+ // parity — `command` is a diagnostic display field; the
838
+ // static placeholder preserves the response shape without
839
+ // leaking raw command + args (which may include Codex TOML
840
+ // MCP overrides, Claude temp config paths, or prompt
841
+ // fragments that crossed the trust boundary).
842
+ command: `(redacted command for ${cliName})`,
1005
843
  workingDirectory: workingDir,
1006
844
  exitCode: 0
1007
845
  };
1008
846
  }
847
+ // Spawn counter: outcome=success (normal completion path). Labels
848
+ // annotated against CLI_SPAWN_LABELS so a future label-set change
849
+ // fails at compile time. Wrapped in `safeMetric` so a metric-layer
850
+ // exception cannot propagate into the outer catch branch and be
851
+ // misclassified as a spawn failure (Cycle 3 Task CLI-B').
852
+ const successLabels = {
853
+ provider,
854
+ outcome: 'success',
855
+ };
856
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(success)', () => {
857
+ this.metrics?.cliSpawnTotal.inc(successLabels, 1);
858
+ });
1009
859
  return {
1010
860
  agent: cliName,
1011
861
  success: true,
1012
862
  output: finalOutput,
1013
863
  error: stderr || undefined,
1014
864
  executionTime: Date.now() - startTime,
1015
- command: `${command} ${args.join(' ')}`,
865
+ // Cycle 4 Task T18 (F9): same redaction parity as the
866
+ // failure path — `command` is a diagnostic display field,
867
+ // not a machine-readable command reproduction. The raw
868
+ // command + args can contain caller-controlled payloads
869
+ // (Codex TOML MCP overrides at codex-adapter.ts:86/:87,
870
+ // Claude temp config paths at claude-adapter.ts:96, prompt
871
+ // fragments for CLIs that accept inline prompt) that
872
+ // crossed the trust boundary.
873
+ command: `(redacted command for ${cliName})`,
1016
874
  workingDirectory: workingDir,
1017
875
  exitCode: 0
1018
876
  };
@@ -1029,20 +887,66 @@ export class CLIAgentOrchestrator {
1029
887
  ];
1030
888
  const errorText = `${execError.message || ''} ${execError.stderr || ''}`.toLowerCase();
1031
889
  const isRateLimit = rateLimitPatterns.some(p => errorText.includes(p.toLowerCase()));
890
+ // Classify outcome for the spawn counter. Priority: rate-limit > timeout
891
+ // > generic failure. Timeout check uses the centralized heuristic.
892
+ // Classification priority is unchanged; the emission is gated on
893
+ // `spawned` so pre-spawn failures (e.g., commandBuilder throwing)
894
+ // do NOT increment the counter (compose.py:174).
895
+ let outcome;
896
+ if (isRateLimit) {
897
+ outcome = 'refused';
898
+ }
899
+ else if (this.isTimeoutError(execError)) {
900
+ outcome = 'timeout';
901
+ }
902
+ else {
903
+ outcome = 'failure';
904
+ }
905
+ if (spawned) {
906
+ // Wrapped in `safeMetric` so a metric-layer exception cannot
907
+ // re-throw from the catch branch (which would short-circuit
908
+ // the streaming event emission and the final failure-response
909
+ // construction below). Parity with debate's safeMetric pattern
910
+ // (Cycle 3 Task CLI-B').
911
+ const failureLabels = {
912
+ provider,
913
+ outcome,
914
+ };
915
+ safeMetric(this.emitLog(), `cliSpawnTotal.inc(${outcome})`, () => {
916
+ this.metrics?.cliSpawnTotal.inc(failureLabels, 1);
917
+ });
918
+ }
1032
919
  if (isRateLimit) {
1033
- logger.warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
920
+ this.emitLog().warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
1034
921
  }
1035
922
  else {
1036
- logger.error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
923
+ this.emitLog().error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
1037
924
  error: "Redacted: See internal logs for full error details.",
1038
925
  exitCode,
1039
926
  stderr: "Redacted: See internal logs for full stderr output."
1040
927
  });
1041
928
  }
929
+ // Cycle 3 Task D' (security): `errorMsg` is used both as streaming
930
+ // event content (just below) and as `result.error` in the returned
931
+ // CLIAgentResponse. Raw `error.message` from spawnAsync /
932
+ // downstream CLIs can contain CLI stdout/stderr fragments (TOML
933
+ // MCP override content, prompt echoes, tool-output snippets) that
934
+ // must not leak via streaming fan-out or the MCP response payload.
935
+ // We apply the same static-redaction pattern used by the logger
936
+ // emission at the `❌ ... execution failed` call above: map each
937
+ // classification path to a short, content-free string. The
938
+ // timeout branch preserves the millisecond budget (from our own
939
+ // `timeout` variable, not the underlying error) so downstream
940
+ // callers can still distinguish timeout from generic failure.
1042
941
  const errorMsg = isRateLimit
1043
942
  ? `${cliName.toUpperCase()} hit rate/usage limit. Try again later or use a different agent.`
1044
- : (error instanceof Error ? error.message : String(error));
1045
- // Emit error event
943
+ : this.isTimeoutError(execError)
944
+ ? `${cliName.toUpperCase()} execution timed out after ${timeout}ms. See internal logs for details.`
945
+ : `${cliName.toUpperCase()} execution failed. See internal logs for details.`;
946
+ // Emit error event. The content derives from the redacted
947
+ // `errorMsg` above, never from `error.message` directly, so
948
+ // streaming observers (HTTP SSE, MCP notifications) do not
949
+ // receive raw CLI payload fragments.
1046
950
  if (options.onStreamingEvent) {
1047
951
  options.onStreamingEvent({
1048
952
  type: 'agent_error',
@@ -1070,6 +974,8 @@ export class CLIAgentOrchestrator {
1070
974
  }
1071
975
  }
1072
976
  }
977
+ // Per-provider execution methods — thin wrappers via adapter dispatch.
978
+ // Retained for backward compatibility (tests may reference these).
1073
979
  async executeClaudeCode(userPrompt, systemPromptSpec, options = {}) {
1074
980
  return this._executeCLI('claude', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('claude', user, sys, opts));
1075
981
  }
@@ -1083,28 +989,86 @@ export class CLIAgentOrchestrator {
1083
989
  // Wait for available slot to prevent resource exhaustion
1084
990
  await this.waitForAvailableSlot();
1085
991
  this.runningCLIs++;
1086
- logger.info(`🎯 Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
992
+ this.emitLog().info(`\u{1F3AF} Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1087
993
  try {
1088
- switch (cli) {
1089
- case 'claude':
1090
- return await this.executeClaudeCode(userPrompt, systemPromptSpec, options);
1091
- case 'codex':
1092
- return await this.executeCodex(userPrompt, systemPromptSpec, options);
1093
- case 'gemini':
1094
- return await this.executeGemini(userPrompt, systemPromptSpec, options);
1095
- default:
1096
- throw new Error(`Unknown CLI: ${cli}`);
994
+ // Gemini frontier rotation: when using the default frontier chain (no
995
+ // caller-specified model, no env-var override), rotate through the
996
+ // chain on saturation OR access-denied failures. Access-denied
997
+ // rotation is the path most users take — the preview tier
998
+ // (gemini-3.1-pro-preview / gemini-3-pro-preview) isn't granted to
999
+ // every account, so the chain falls through to the universally-
1000
+ // available gemini-2.5-pro. Rotation is disabled when the caller
1001
+ // or operator has explicitly chosen a model.
1002
+ const geminiRotationActive = cli === 'gemini'
1003
+ && !options.models?.gemini
1004
+ && !process.env.BRUTALIST_GEMINI_MODEL;
1005
+ if (geminiRotationActive) {
1006
+ return await this._executeGeminiWithRotation(userPrompt, systemPromptSpec, options);
1097
1007
  }
1008
+ // Dispatch to adapter via buildCLICommand (which delegates to provider)
1009
+ return await this._executeCLI(cli, userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand(cli, user, sys, opts));
1098
1010
  }
1099
1011
  finally {
1100
1012
  this.runningCLIs--;
1101
- logger.info(`✅ Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1013
+ this.emitLog().info(`\u2705 Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1014
+ }
1015
+ }
1016
+ /**
1017
+ * Gemini frontier rotation - iterate through GEMINI_FRONTIER_CHAIN on
1018
+ * rotatable failures (capacity saturation OR tier access denial).
1019
+ *
1020
+ * Only active when neither caller nor operator has chosen a model. Each
1021
+ * attempt injects the model via options.models.gemini. Per-attempt
1022
+ * failures are classified by isGeminiRotatableError(): capacity errors
1023
+ * (quota/429) AND access errors (ModelNotFoundError / permission denied)
1024
+ * both trigger rotation. On unrelated failures (auth, prompt rejection,
1025
+ * subprocess crashes) rotation stops immediately — a different model
1026
+ * will not fix those. On chain exhaustion, the last failing response
1027
+ * is returned.
1028
+ *
1029
+ * In practice the typical non-preview user trajectory is:
1030
+ * gemini-3.1-pro-preview -> access denied (rotate)
1031
+ * gemini-3-pro-preview -> access denied (rotate)
1032
+ * gemini-2.5-pro -> success (universally available)
1033
+ */
1034
+ async _executeGeminiWithRotation(userPrompt, systemPromptSpec, options) {
1035
+ const chain = GEMINI_FRONTIER_CHAIN;
1036
+ let lastResponse = null;
1037
+ for (let i = 0; i < chain.length; i++) {
1038
+ const model = chain[i];
1039
+ const attemptOptions = {
1040
+ ...options,
1041
+ models: { ...(options.models || {}), gemini: model },
1042
+ };
1043
+ if (i > 0) {
1044
+ this.emitLog().info(`Gemini rotation: attempting tier ${i + 1}/${chain.length} (${model})`);
1045
+ }
1046
+ const response = await this._executeCLI('gemini', userPrompt, systemPromptSpec, attemptOptions, (user, sys, opts) => this.buildCLICommand('gemini', user, sys, opts));
1047
+ if (response.success) {
1048
+ if (i > 0) {
1049
+ this.emitLog().warn(`Gemini served by ${model} after ${i} rotation${i === 1 ? '' : 's'} (tier ${i + 1}/${chain.length})`);
1050
+ }
1051
+ else {
1052
+ this.emitLog().debug(`Gemini served by frontier ${model}`);
1053
+ }
1054
+ return response;
1055
+ }
1056
+ if (!isGeminiRotatableError(response.error)) {
1057
+ this.emitLog().debug(`Gemini ${model} failed with non-rotatable error; aborting rotation`, {
1058
+ errorPreview: response.error?.slice(0, 120),
1059
+ });
1060
+ return response;
1061
+ }
1062
+ this.emitLog().warn(`Gemini ${model} unavailable (capacity or access); rotating to next frontier tier`);
1063
+ lastResponse = response;
1102
1064
  }
1065
+ this.emitLog().error(`Gemini frontier chain exhausted (${chain.length} tiers); no tier available to this account`);
1066
+ return lastResponse;
1103
1067
  }
1104
1068
  async waitForAvailableSlot() {
1105
1069
  let waitTime = 100; // Start with 100ms wait time
1106
1070
  while (this.runningCLIs >= this.MAX_CONCURRENT_CLIS) {
1107
- logger.info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1071
+ this.emitLog().info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1108
1072
  await new Promise(resolve => setTimeout(resolve, waitTime));
1109
1073
  waitTime = Math.min(waitTime * 2, 5000); // Exponential backoff, max 5 seconds
1110
1074
  }
@@ -1148,15 +1112,15 @@ export class CLIAgentOrchestrator {
1148
1112
  // Only validate filesystem paths for tools that actually operate on files/directories
1149
1113
  // NOTE: Must match BrutalistPromptType values (camelCase)
1150
1114
  const filesystemTools = ['codebase', 'fileStructure', 'dependencies', 'gitHistory', 'testCoverage'];
1151
- logger.debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1115
+ this.emitLog().debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1152
1116
  try {
1153
1117
  if (filesystemTools.includes(analysisType) && primaryContent && primaryContent.trim() !== '') {
1154
- logger.debug(`Validating path: "${primaryContent}"`);
1118
+ this.emitLog().debug(`Validating path: "${primaryContent}"`);
1155
1119
  await asyncValidatePath(primaryContent, 'targetPath');
1156
1120
  }
1157
1121
  }
1158
1122
  catch (error) {
1159
- logger.error(`Path validation failed: ${error}`);
1123
+ this.emitLog().error(`Path validation failed: ${error}`);
1160
1124
  throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`);
1161
1125
  }
1162
1126
  // Validate workingDirectory if provided
@@ -1180,18 +1144,18 @@ export class CLIAgentOrchestrator {
1180
1144
  }
1181
1145
  // Deduplicate
1182
1146
  clisToUse = [...new Set(options.clis)];
1183
- logger.info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1147
+ this.emitLog().info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1184
1148
  }
1185
1149
  else {
1186
1150
  // Default: use all available CLIs
1187
1151
  clisToUse = [...this.cliContext.availableCLIs];
1188
- logger.info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1152
+ this.emitLog().info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1189
1153
  }
1190
1154
  if (clisToUse.length === 0) {
1191
1155
  throw new Error('No CLI agents available for analysis');
1192
1156
  }
1193
1157
  const selectionMethod = options.clis ? 'user-specified' : 'all-available';
1194
- logger.info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1158
+ this.emitLog().info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1195
1159
  // Execute selected CLIs in parallel with allSettled for better error handling
1196
1160
  const promises = clisToUse.map(async (cli) => {
1197
1161
  try {
@@ -1203,7 +1167,7 @@ export class CLIAgentOrchestrator {
1203
1167
  };
1204
1168
  }
1205
1169
  catch (error) {
1206
- logger.error(`❌ ${cli} execution failed:`, error);
1170
+ this.emitLog().error(`❌ ${cli} execution failed:`, error);
1207
1171
  return {
1208
1172
  agent: cli,
1209
1173
  success: false,
@@ -1220,7 +1184,7 @@ export class CLIAgentOrchestrator {
1220
1184
  const responses = results
1221
1185
  .filter(result => result.status === 'fulfilled')
1222
1186
  .map(result => result.value);
1223
- logger.info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1187
+ this.emitLog().info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1224
1188
  return responses;
1225
1189
  }
1226
1190
  synthesizeBrutalistFeedback(responses, analysisType) {