@brutalist/mcp 1.8.1 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +26 -0
  2. package/dist/brutalist-server.d.ts +31 -9
  3. package/dist/brutalist-server.d.ts.map +1 -1
  4. package/dist/brutalist-server.js +107 -673
  5. package/dist/brutalist-server.js.map +1 -1
  6. package/dist/cli-adapters/claude-adapter.d.ts +25 -0
  7. package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
  8. package/dist/cli-adapters/claude-adapter.js +245 -0
  9. package/dist/cli-adapters/claude-adapter.js.map +1 -0
  10. package/dist/cli-adapters/codex-adapter.d.ts +23 -0
  11. package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
  12. package/dist/cli-adapters/codex-adapter.js +173 -0
  13. package/dist/cli-adapters/codex-adapter.js.map +1 -0
  14. package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
  15. package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
  16. package/dist/cli-adapters/gemini-adapter.js +196 -0
  17. package/dist/cli-adapters/gemini-adapter.js.map +1 -0
  18. package/dist/cli-adapters/index.d.ts +75 -0
  19. package/dist/cli-adapters/index.d.ts.map +1 -0
  20. package/dist/cli-adapters/index.js +29 -0
  21. package/dist/cli-adapters/index.js.map +1 -0
  22. package/dist/cli-adapters/shared.d.ts +12 -0
  23. package/dist/cli-adapters/shared.d.ts.map +1 -0
  24. package/dist/cli-adapters/shared.js +99 -0
  25. package/dist/cli-adapters/shared.js.map +1 -0
  26. package/dist/cli-agents.d.ts +64 -2
  27. package/dist/cli-agents.d.ts.map +1 -1
  28. package/dist/cli-agents.js +341 -394
  29. package/dist/cli-agents.js.map +1 -1
  30. package/dist/debate/constitutional.d.ts +27 -0
  31. package/dist/debate/constitutional.d.ts.map +1 -0
  32. package/dist/debate/constitutional.js +74 -0
  33. package/dist/debate/constitutional.js.map +1 -0
  34. package/dist/debate/debate-orchestrator.d.ts +154 -0
  35. package/dist/debate/debate-orchestrator.d.ts.map +1 -0
  36. package/dist/debate/debate-orchestrator.js +699 -0
  37. package/dist/debate/debate-orchestrator.js.map +1 -0
  38. package/dist/debate/index.d.ts +18 -0
  39. package/dist/debate/index.d.ts.map +1 -0
  40. package/dist/debate/index.js +18 -0
  41. package/dist/debate/index.js.map +1 -0
  42. package/dist/debate/refusal-detection.d.ts +27 -0
  43. package/dist/debate/refusal-detection.d.ts.map +1 -0
  44. package/dist/debate/refusal-detection.js +62 -0
  45. package/dist/debate/refusal-detection.js.map +1 -0
  46. package/dist/debate/synthesis.d.ts +22 -0
  47. package/dist/debate/synthesis.d.ts.map +1 -0
  48. package/dist/debate/synthesis.js +117 -0
  49. package/dist/debate/synthesis.js.map +1 -0
  50. package/dist/logger.d.ts +204 -1
  51. package/dist/logger.d.ts.map +1 -1
  52. package/dist/logger.js +398 -18
  53. package/dist/logger.js.map +1 -1
  54. package/dist/metrics/counter.d.ts +24 -0
  55. package/dist/metrics/counter.d.ts.map +1 -0
  56. package/dist/metrics/counter.js +60 -0
  57. package/dist/metrics/counter.js.map +1 -0
  58. package/dist/metrics/histogram.d.ts +42 -0
  59. package/dist/metrics/histogram.d.ts.map +1 -0
  60. package/dist/metrics/histogram.js +114 -0
  61. package/dist/metrics/histogram.js.map +1 -0
  62. package/dist/metrics/index.d.ts +26 -0
  63. package/dist/metrics/index.d.ts.map +1 -0
  64. package/dist/metrics/index.js +22 -0
  65. package/dist/metrics/index.js.map +1 -0
  66. package/dist/metrics/registry.d.ts +96 -0
  67. package/dist/metrics/registry.d.ts.map +1 -0
  68. package/dist/metrics/registry.js +113 -0
  69. package/dist/metrics/registry.js.map +1 -0
  70. package/dist/metrics/safe-metric.d.ts +25 -0
  71. package/dist/metrics/safe-metric.d.ts.map +1 -0
  72. package/dist/metrics/safe-metric.js +41 -0
  73. package/dist/metrics/safe-metric.js.map +1 -0
  74. package/dist/metrics/types.d.ts +82 -0
  75. package/dist/metrics/types.d.ts.map +1 -0
  76. package/dist/metrics/types.js +121 -0
  77. package/dist/metrics/types.js.map +1 -0
  78. package/dist/registry/argument-spaces.d.ts.map +1 -1
  79. package/dist/registry/argument-spaces.js +20 -0
  80. package/dist/registry/argument-spaces.js.map +1 -1
  81. package/dist/registry/domains.d.ts.map +1 -1
  82. package/dist/registry/domains.js +17 -1
  83. package/dist/registry/domains.js.map +1 -1
  84. package/dist/streaming/circuit-breaker.d.ts +13 -1
  85. package/dist/streaming/circuit-breaker.d.ts.map +1 -1
  86. package/dist/streaming/circuit-breaker.js +13 -1
  87. package/dist/streaming/circuit-breaker.js.map +1 -1
  88. package/dist/streaming/intelligent-buffer.d.ts +13 -1
  89. package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
  90. package/dist/streaming/intelligent-buffer.js +13 -1
  91. package/dist/streaming/intelligent-buffer.js.map +1 -1
  92. package/dist/streaming/output-parser.d.ts +16 -2
  93. package/dist/streaming/output-parser.d.ts.map +1 -1
  94. package/dist/streaming/output-parser.js +16 -2
  95. package/dist/streaming/output-parser.js.map +1 -1
  96. package/dist/streaming/progress-tracker.d.ts +14 -1
  97. package/dist/streaming/progress-tracker.d.ts.map +1 -1
  98. package/dist/streaming/progress-tracker.js +14 -1
  99. package/dist/streaming/progress-tracker.js.map +1 -1
  100. package/dist/streaming/session-manager.d.ts +14 -1
  101. package/dist/streaming/session-manager.d.ts.map +1 -1
  102. package/dist/streaming/session-manager.js +14 -1
  103. package/dist/streaming/session-manager.js.map +1 -1
  104. package/dist/streaming/sse-transport.d.ts +12 -1
  105. package/dist/streaming/sse-transport.d.ts.map +1 -1
  106. package/dist/streaming/sse-transport.js +12 -1
  107. package/dist/streaming/sse-transport.js.map +1 -1
  108. package/dist/streaming/streaming-orchestrator.d.ts +15 -1
  109. package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
  110. package/dist/streaming/streaming-orchestrator.js +15 -1
  111. package/dist/streaming/streaming-orchestrator.js.map +1 -1
  112. package/dist/system-prompts.d.ts.map +1 -1
  113. package/dist/system-prompts.js +490 -4
  114. package/dist/system-prompts.js.map +1 -1
  115. package/dist/tool-definitions-generated.d.ts.map +1 -1
  116. package/dist/tool-definitions-generated.js +3 -1
  117. package/dist/tool-definitions-generated.js.map +1 -1
  118. package/package.json +1 -1
@@ -3,7 +3,28 @@ import { promises as fs, realpathSync } from 'fs';
3
3
  import { promisify } from 'util';
4
4
  import { logger } from './logger.js';
5
5
  import { ModelResolver } from './model-resolver.js';
6
- import { resolveServers, writeClaudeMCPConfig, cleanupTempConfig, buildCodexMCPOverride, ensureGeminiMCPServers, ensurePlaywrightBrowsers, } from './mcp-registry.js';
6
+ import { cleanupTempConfig, } from './mcp-registry.js';
7
+ import { getProvider, parseNDJSON } from './cli-adapters/index.js';
8
+ import { GEMINI_FRONTIER_CHAIN } from './cli-adapters/gemini-adapter.js';
9
+ import { safeMetric } from './metrics/index.js';
10
+ /**
11
+ * Detect Gemini-specific saturation errors — "No capacity available",
12
+ * status 429, overloaded, quota exhaustion. Used by the frontier-chain
13
+ * rotation logic in `executeSingleCLI` to decide whether to rotate to the
14
+ * next model tier or fail immediately (non-saturation errors don't
15
+ * benefit from a different model).
16
+ *
17
+ * Model-not-found (/ModelNotFoundError/) is NOT saturation — it means
18
+ * the model alias is wrong. Treat as non-saturation so rotation aborts
19
+ * and the caller sees the error rather than silently rotating past it.
20
+ */
21
+ function isGeminiSaturationError(error) {
22
+ if (!error)
23
+ return false;
24
+ if (/ModelNotFoundError/i.test(error))
25
+ return false;
26
+ return /no capacity available|\b429\b|overloaded|rateLimitExceeded|rate limit|quota|too many requests/i.test(error);
27
+ }
7
28
  // Configurable timeouts and limits
8
29
  const DEFAULT_TIMEOUT = parseInt(process.env.BRUTALIST_TIMEOUT || '1800000', 10); // 30 minutes default
9
30
  const CLI_CHECK_TIMEOUT = parseInt(process.env.BRUTALIST_CLI_CHECK_TIMEOUT || '5000', 10); // 5 seconds for CLI checks
@@ -239,6 +260,19 @@ async function spawnAsync(command, args, options = {}) {
239
260
  spawnCommand = command;
240
261
  spawnArgs = args;
241
262
  }
263
+ // Fires only after all pre-spawn validators (command, args, cwd)
264
+ // pass. Callers gate their `spawned` flag on this callback so
265
+ // invalid-command / invalid-args / invalid-cwd rejects do NOT count
266
+ // as spawn outcomes in `brutalist_cli_spawn_total`
267
+ // (Cycle 3 Task CLI-C'). Wrapped in try/catch because a throw from
268
+ // the user-supplied callback must not abort the spawn itself.
269
+ try {
270
+ options.onBeforeSpawn?.();
271
+ }
272
+ catch {
273
+ // Swallow — this hook is diagnostic only; failures here must not
274
+ // prevent the spawn from proceeding.
275
+ }
242
276
  const child = spawn(spawnCommand, spawnArgs, {
243
277
  cwd: cwd,
244
278
  stdio: ['pipe', 'pipe', 'pipe'],
@@ -399,58 +433,6 @@ async function spawnAsync(command, args, options = {}) {
399
433
  }
400
434
  });
401
435
  }
402
- const CLI_BUILDER_CONFIGS = {
403
- claude: {
404
- command: 'claude',
405
- defaultArgs: ['--print'],
406
- modelArgName: '--model',
407
- mpcEnvCleanup: ['CLAUDE_MCP_CONFIG', 'MCP_ENABLED', 'CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'],
408
- streamingArgs: () => ['--output-format', 'stream-json', '--verbose'],
409
- mcpSupport: {
410
- configMethod: 'flag-file',
411
- configFlag: '--mcp-config',
412
- strictFlag: '--strict-mcp-config',
413
- writeProtection: {
414
- method: 'disallowed-tools',
415
- flag: '--disallowedTools',
416
- value: 'Edit,Write,NotebookEdit',
417
- },
418
- },
419
- },
420
- codex: {
421
- command: 'codex',
422
- defaultArgs: ['exec', '--sandbox', 'read-only', '--skip-git-repo-check'],
423
- modelArgName: '--model',
424
- jsonFlag: '--json',
425
- mpcEnvCleanup: ['CODEX_MCP_CONFIG', 'MCP_ENABLED'],
426
- promptWrapper: (sys, user) => `${sys}\n\n${user}\n\nUse your shell tools to read files (cat, ls, find, grep, head, etc.) and analyze the codebase. You ARE allowed to run read-only commands. Explore the directory structure, read relevant source files, and provide a comprehensive brutal analysis based on what you find.`,
427
- mcpSupport: {
428
- configMethod: 'config-override',
429
- configOverrideKey: 'mcp_servers',
430
- writeProtection: {
431
- method: 'sandbox',
432
- flag: '--sandbox',
433
- value: 'read-only', // already in defaultArgs
434
- },
435
- },
436
- },
437
- gemini: {
438
- command: 'gemini',
439
- defaultArgs: ['--output-format', 'json'],
440
- modelArgName: '--model',
441
- envExtras: { TERM: 'dumb', NO_COLOR: '1', CI: 'true' },
442
- mpcEnvCleanup: ['GEMINI_MCP_CONFIG', 'MCP_ENABLED'],
443
- mcpSupport: {
444
- configMethod: 'server-whitelist',
445
- whitelistFlag: '--allowed-mcp-server-names',
446
- writeProtection: {
447
- method: 'approval-mode',
448
- flag: '--approval-mode',
449
- value: 'plan',
450
- },
451
- },
452
- },
453
- };
454
436
  export class CLIAgentOrchestrator {
455
437
  defaultTimeout = 1800000; // 30 minutes - complex codebases need time
456
438
  defaultWorkingDir = process.cwd();
@@ -462,217 +444,94 @@ export class CLIAgentOrchestrator {
462
444
  MAX_CONCURRENT_CLIS = MAX_CONCURRENT_CLIS; // Configurable concurrency limit
463
445
  // Runtime model discovery
464
446
  modelResolver;
447
+ // Optional observability deps — injected at the composition root in
448
+ // production; absent (undefined) in test harnesses that construct
449
+ // `new CLIAgentOrchestrator()` with no args. Instrumentation is a no-op
450
+ // when these are undefined, via `this.metrics?.*` and `emitLog()` fallback.
451
+ metrics;
452
+ log;
465
453
  // Streaming throttle properties
466
454
  streamingBuffers = new Map();
467
455
  STREAMING_FLUSH_INTERVAL = 200; // 200ms
468
456
  MAX_CHUNK_SIZE = 2048; // 2KB per event
469
457
  HEARTBEAT_INTERVAL = 5000; // 5s between progress heartbeats
470
458
  lastHeartbeat = 0;
471
- constructor(modelResolver) {
472
- this.modelResolver = modelResolver || new ModelResolver();
473
- // Log configuration at startup
474
- logger.info(`🔧 Brutalist MCP Configuration:`);
475
- logger.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
476
- logger.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
477
- logger.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
478
- logger.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
459
+ /**
460
+ * Accepts a deps bag OR a bare `ModelResolver` (legacy positional form)
461
+ * OR nothing (characterization-test harnesses). The `instanceof ModelResolver`
462
+ * branch preserves the pre-observability signature.
463
+ */
464
+ constructor(deps) {
465
+ const bag = deps instanceof ModelResolver
466
+ ? { modelResolver: deps }
467
+ : (deps || {});
468
+ this.modelResolver = bag.modelResolver || new ModelResolver();
469
+ this.metrics = bag.metrics;
470
+ this.log = bag.log;
471
+ // Log configuration at startup (via emitLog — falls back to root logger
472
+ // when no scoped log was injected).
473
+ const bootLog = this.emitLog();
474
+ bootLog.info(`🔧 Brutalist MCP Configuration:`);
475
+ bootLog.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
476
+ bootLog.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
477
+ bootLog.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
478
+ bootLog.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
479
479
  // Detect CLI context and discover models at startup
480
480
  Promise.all([
481
481
  this.detectCLIContext(),
482
482
  this.modelResolver.initialize(),
483
483
  ]).catch(error => {
484
- logger.error("Failed startup detection:", error);
484
+ this.emitLog().error("Failed startup detection:", error);
485
485
  });
486
486
  }
487
- // Parse NDJSON with proper JSON boundary detection
488
- // Handles JSON objects that contain embedded newlines without data loss
487
+ /**
488
+ * Return the injected scoped logger if present, otherwise the root
489
+ * logger singleton. Keeps un-injected (test) instances working while
490
+ * scoping production emissions with `module='cli-orchestrator'`.
491
+ */
492
+ emitLog() {
493
+ return this.log ?? logger;
494
+ }
495
+ /**
496
+ * Heuristic for classifying a spawnAsync error as a timeout.
497
+ * Centralized so all outcome paths share the same detection logic.
498
+ *
499
+ * Matches any of:
500
+ * - execError.code === 'ETIMEDOUT' (Node's timeout code on some paths)
501
+ * - execError.killed === true (child_process kill after SIGTERM/SIGKILL
502
+ * escalation when the timeout timer fired — see spawnAsync timer block)
503
+ * - execError.message matching /timed out|timeout/i (spawnAsync rejects
504
+ * with "Command timed out after ..." on timer expiry)
505
+ */
506
+ isTimeoutError(err) {
507
+ if (!err || typeof err !== 'object')
508
+ return false;
509
+ const e = err;
510
+ if (e.code === 'ETIMEDOUT')
511
+ return true;
512
+ if (e.killed === true)
513
+ return true;
514
+ if (typeof e.message === 'string' && /timed out|timeout/i.test(e.message))
515
+ return true;
516
+ return false;
517
+ }
518
+ // Proxy methods for backward compatibility — characterization tests
519
+ // access these via (orchestrator as any).methodName().
520
+ // Implementation lives in src/cli-adapters/.
489
521
  parseNDJSON(input) {
490
- if (!input || !input.trim()) {
491
- return [];
492
- }
493
- const results = [];
494
- let depth = 0;
495
- let inString = false;
496
- let escape = false;
497
- let start = 0;
498
- for (let i = 0; i < input.length; i++) {
499
- const char = input[i];
500
- // Handle escape sequences
501
- if (escape) {
502
- escape = false;
503
- continue;
504
- }
505
- if (char === '\\') {
506
- escape = true;
507
- continue;
508
- }
509
- // Track string boundaries
510
- if (char === '"') {
511
- inString = !inString;
512
- continue;
513
- }
514
- // Only count braces/brackets outside of strings
515
- if (inString)
516
- continue;
517
- // Track depth
518
- if (char === '{' || char === '[') {
519
- depth++;
520
- }
521
- else if (char === '}' || char === ']') {
522
- depth--;
523
- // When depth returns to 0, we've found a complete JSON object
524
- if (depth === 0) {
525
- const jsonStr = input.slice(start, i + 1).trim();
526
- if (jsonStr) {
527
- try {
528
- const parsed = JSON.parse(jsonStr);
529
- results.push(parsed);
530
- }
531
- catch (e) {
532
- // Log unparseable segments (not silent)
533
- logger.warn(`Failed to parse JSON segment at position ${start}-${i + 1}:`, {
534
- preview: jsonStr.substring(0, 100),
535
- error: e instanceof Error ? e.message : String(e)
536
- });
537
- }
538
- }
539
- // Move start pointer past this object and any whitespace
540
- start = i + 1;
541
- while (start < input.length && /\s/.test(input[start])) {
542
- start++;
543
- }
544
- i = start - 1; // Will be incremented by loop
545
- }
546
- }
547
- }
548
- // Warn about incomplete JSON at end of input
549
- if (start < input.length) {
550
- const remaining = input.slice(start).trim();
551
- if (remaining) {
552
- logger.warn(`Incomplete JSON at end of input:`, {
553
- preview: remaining.substring(0, 100)
554
- });
555
- }
556
- }
557
- return results;
522
+ return parseNDJSON(input);
558
523
  }
559
- // Decode Claude's stream-json NDJSON output into plain text.
560
- // Extracts text content blocks from all 'assistant' events across all turns.
561
- // Skips system events, user events (tool results with raw file contents), and
562
- // tool_use content blocks within assistant events.
563
- // Falls back to 'result' event if no assistant text was captured.
564
524
  decodeClaudeStreamJson(ndjsonOutput) {
565
- if (!ndjsonOutput || !ndjsonOutput.trim()) {
566
- logger.warn('decodeClaudeStreamJson: empty input');
567
- return '';
568
- }
569
- const events = this.parseNDJSON(ndjsonOutput);
570
- if (events.length === 0) {
571
- logger.warn('decodeClaudeStreamJson: no valid JSON events found in output');
572
- return '';
573
- }
574
- const textParts = [];
575
- let resultText = '';
576
- let resultError = '';
577
- for (const event of events) {
578
- if (typeof event !== 'object' || event === null)
579
- continue;
580
- const typedEvent = event;
581
- if (typedEvent.type === 'assistant' && typedEvent.message?.content) {
582
- // Extract only text blocks from assistant messages (skip tool_use blocks)
583
- const content = typedEvent.message.content;
584
- if (Array.isArray(content)) {
585
- for (const item of content) {
586
- if (item.type === 'text' && item.text) {
587
- textParts.push(item.text);
588
- }
589
- }
590
- }
591
- }
592
- else if (typedEvent.type === 'result') {
593
- if (typedEvent.subtype === 'error' || typedEvent.is_error) {
594
- resultError = typedEvent.error || typedEvent.result || 'Unknown error';
595
- }
596
- else if (typedEvent.result) {
597
- resultText = typedEvent.result;
598
- }
599
- }
600
- // Skip: system, user (tool_result with raw file contents), hooks
601
- }
602
- // Handle error
603
- if (resultError) {
604
- logger.error('decodeClaudeStreamJson: Claude returned error result', { error: resultError });
605
- return `[Claude Error] ${resultError}`;
606
- }
607
- // Use accumulated assistant text if available, fall back to result event
608
- if (textParts.length > 0) {
609
- return textParts.join('\n\n');
610
- }
611
- if (resultText) {
612
- return resultText;
613
- }
614
- logger.warn('decodeClaudeStreamJson: no text content found in stream-json output', {
615
- eventCount: events.length,
616
- eventTypes: events.map(e => e.type).filter(Boolean)
617
- });
618
- return '';
525
+ const provider = getProvider('claude');
526
+ return provider.decodeOutput(ndjsonOutput, ['--output-format', 'stream-json']);
619
527
  }
620
- // Extract only the agent messages from Codex JSON output (no thinking, no file reads, no commands)
621
528
  extractCodexAgentMessage(jsonOutput) {
622
- if (!jsonOutput || !jsonOutput.trim()) {
623
- logger.debug('extractCodexAgentMessage: empty input');
624
- return '';
625
- }
626
- const agentMessages = [];
627
- const events = this.parseNDJSON(jsonOutput);
628
- logger.debug(`extractCodexAgentMessage: processing ${events.length} JSON events`);
629
- for (const event of events) {
630
- if (typeof event !== 'object' || event === null)
631
- continue;
632
- const typedEvent = event;
633
- logger.debug(`extractCodexAgentMessage: parsed event type=${typedEvent.type}, item.type=${typedEvent.item?.type}`);
634
- // Codex --json outputs events with structure: {"type":"item.completed","item":{...}}
635
- // Only extract agent_message type - this is the actual response
636
- if (typedEvent.type === 'item.completed' && typedEvent.item) {
637
- if (typedEvent.item.type === 'agent_message' && typedEvent.item.text) {
638
- // Agent's actual response text
639
- logger.info(`✅ extractCodexAgentMessage: found agent_message with ${typedEvent.item.text.length} chars`);
640
- agentMessages.push(typedEvent.item.text);
641
- }
642
- // Skip all other types:
643
- // - reasoning: internal thinking steps
644
- // - command_execution: file reads, bash commands
645
- // - error: will be in stderr
646
- }
647
- }
648
- const result = agentMessages.join('\n\n').trim();
649
- logger.info(`extractCodexAgentMessage: extracted ${agentMessages.length} messages, total ${result.length} chars`);
650
- return result;
529
+ const provider = getProvider('codex');
530
+ return provider.decodeOutput(jsonOutput, ['--json']);
651
531
  }
652
- // Extract response text from Gemini --output-format json output
653
532
  extractGeminiResponse(jsonOutput) {
654
- if (!jsonOutput || !jsonOutput.trim()) {
655
- logger.debug('extractGeminiResponse: empty input');
656
- return '';
657
- }
658
- try {
659
- const parsed = JSON.parse(jsonOutput);
660
- if (parsed.response && typeof parsed.response === 'string') {
661
- logger.info(`✅ extractGeminiResponse: extracted response with ${parsed.response.length} chars`);
662
- return parsed.response;
663
- }
664
- logger.warn('extractGeminiResponse: no response field in JSON output', {
665
- keys: Object.keys(parsed)
666
- });
667
- return '';
668
- }
669
- catch (e) {
670
- logger.warn('extractGeminiResponse: failed to parse JSON, returning raw output', {
671
- error: e instanceof Error ? e.message : String(e),
672
- preview: jsonOutput.substring(0, 200)
673
- });
674
- return '';
675
- }
533
+ const provider = getProvider('gemini');
534
+ return provider.decodeOutput(jsonOutput, ['--output-format', 'json']);
676
535
  }
677
536
  emitThrottledStreamingEvent(agent, type, content, onStreamingEvent, options) {
678
537
  if (!onStreamingEvent)
@@ -719,98 +578,23 @@ export class CLIAgentOrchestrator {
719
578
  buffer.lastFlush = now;
720
579
  }
721
580
  }
581
+ // Proxy: delegates to per-provider adapter via getProvider()
722
582
  async buildCLICommand(cli, userPrompt, systemPrompt, options) {
723
- const config = CLI_BUILDER_CONFIGS[cli];
724
- const mcpEnabled = options.mcpServers && options.mcpServers.length > 0;
725
- // Build args
726
- const args = [...config.defaultArgs];
727
- const resolvedModel = this.modelResolver.resolveModel(cli, options.models?.[cli]);
728
- if (resolvedModel) {
729
- args.push(config.modelArgName, resolvedModel);
730
- }
731
- if (config.jsonFlag && process.env.CODEX_USE_JSON !== 'false') {
732
- args.push(config.jsonFlag);
733
- }
734
- if (config.streamingArgs) {
735
- args.push(...config.streamingArgs(options));
736
- }
737
- // ── MCP configuration ────────────────────────────────────────────────
738
- let tempMcpConfigPath;
739
- if (mcpEnabled && config.mcpSupport) {
740
- const servers = resolveServers(options.mcpServers);
741
- const serverNames = Object.keys(servers);
742
- // Auto-install Playwright browsers if playwright is requested
743
- if (servers.playwright) {
744
- await ensurePlaywrightBrowsers();
745
- }
746
- if (serverNames.length > 0) {
747
- const mcp = config.mcpSupport;
748
- switch (mcp.configMethod) {
749
- case 'flag-file': {
750
- // Claude: write temp JSON config, pass --mcp-config <path> --strict-mcp-config
751
- const sessionId = options.sessionId || 'default';
752
- tempMcpConfigPath = await writeClaudeMCPConfig(servers, sessionId);
753
- args.push(mcp.configFlag, tempMcpConfigPath);
754
- args.push(mcp.strictFlag);
755
- // Hard deny on write tools
756
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
757
- // Non-interactive MCP tool use requires permission bypass
758
- args.push('--permission-mode', 'bypassPermissions');
759
- break;
760
- }
761
- case 'config-override': {
762
- // Codex: -c 'mcp_servers={...}' — replaces all configured servers (excludes brutalist)
763
- const tomlOverride = buildCodexMCPOverride(servers);
764
- args.push('-c', `${mcp.configOverrideKey}=${tomlOverride}`);
765
- // Write protection already in defaultArgs (--sandbox read-only)
766
- break;
767
- }
768
- case 'server-whitelist': {
769
- // Gemini: --allowed-mcp-server-names <names> --approval-mode plan
770
- await ensureGeminiMCPServers(servers);
771
- args.push(mcp.whitelistFlag, ...serverNames);
772
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
773
- break;
774
- }
775
- }
776
- logger.info(`🔌 MCP enabled for ${cli}: [${serverNames.join(', ')}]`);
777
- }
778
- }
779
- // Build prompt — skip CLI-specific wrapper in debate mode (prevents Codex
780
- // from exploring the brutalist repo and reading its own control prompts)
781
- const combinedPrompt = (config.promptWrapper && !options.debateMode)
782
- ? config.promptWrapper(systemPrompt, userPrompt)
783
- : `${systemPrompt}\n\n${userPrompt}`;
784
- // Build secure env
583
+ const provider = getProvider(cli);
785
584
  const secureEnv = createSecureEnvironment();
786
- // Add CLI-specific env extras
787
- if (config.envExtras) {
788
- Object.assign(secureEnv, config.envExtras);
789
- }
790
- // Add required API key
791
- const apiKeyMap = {
792
- claude: ['ANTHROPIC_API_KEY'],
793
- codex: ['OPENAI_API_KEY'],
794
- gemini: ['GOOGLE_API_KEY', 'GEMINI_API_KEY']
795
- };
796
- for (const key of apiKeyMap[cli]) {
797
- if (process.env[key])
798
- secureEnv[key] = process.env[key];
799
- }
800
- // Clean up MPC env vars that could cause deadlock — SKIP when MCP is enabled
801
- // (the per-CLI config above already isolates to only the requested servers)
802
- if (!mcpEnabled && config.mpcEnvCleanup) {
803
- for (const envVar of config.mpcEnvCleanup) {
804
- delete secureEnv[envVar];
805
- }
806
- }
807
- secureEnv.BRUTALIST_SUBPROCESS = '1';
808
- return { command: config.command, args, input: combinedPrompt, env: secureEnv, tempMcpConfigPath };
585
+ // Pattern A: thread the scoped logger into the adapter via CLIAgentOptions.log.
586
+ // The adapter reads options.log?.forOperation('<cli>_spawn') and falls back to
587
+ // the root logger import if absent. A caller-supplied options.log wins so a
588
+ // test or an upstream operation can override the per-orchestrator default.
589
+ const perCliOp = `${cli}_spawn`;
590
+ const adapterLog = options.log ?? this.log?.forOperation(perCliOp);
591
+ const optionsWithLog = adapterLog && options.log === undefined ? { ...options, log: adapterLog } : options;
592
+ return provider.buildCommand(userPrompt, systemPrompt, optionsWithLog, this.modelResolver, secureEnv);
809
593
  }
810
594
  async detectCLIContext() {
811
595
  // Return cached context if still valid
812
596
  if (this.cliContextCached && Date.now() - this.cliContextCacheTime < this.CLI_CACHE_TTL) {
813
- logger.debug('Using cached CLI context');
597
+ this.emitLog().debug('Using cached CLI context');
814
598
  return this.cliContext;
815
599
  }
816
600
  const availableCLIs = [];
@@ -820,14 +604,16 @@ export class CLIAgentOrchestrator {
820
604
  { name: 'codex', command: 'codex --version' },
821
605
  { name: 'gemini', command: 'gemini --version' }
822
606
  ];
607
+ // NOTE: These `--version` probes are NOT spawn attempts — they must not
608
+ // increment `cliSpawnTotal`. Only _executeCLI counts spawns.
823
609
  const results = await Promise.allSettled(cliChecks.map(async (check) => {
824
610
  try {
825
611
  await spawnAsync(check.name, ['--version'], { timeout: CLI_CHECK_TIMEOUT });
826
- logger.debug(`CLI available: ${check.name}`);
612
+ this.emitLog().debug(`CLI available: ${check.name}`);
827
613
  return check.name;
828
614
  }
829
615
  catch (error) {
830
- logger.debug(`CLI not available: ${check.name}`);
616
+ this.emitLog().debug(`CLI not available: ${check.name}`);
831
617
  return null;
832
618
  }
833
619
  }));
@@ -843,7 +629,7 @@ export class CLIAgentOrchestrator {
843
629
  selectSingleCLI(preferredCLI, analysisType) {
844
630
  // 1. Honor explicit preference if available
845
631
  if (preferredCLI && this.cliContext.availableCLIs.includes(preferredCLI)) {
846
- logger.info(`✅ Using preferred CLI: ${preferredCLI}`);
632
+ this.emitLog().info(`✅ Using preferred CLI: ${preferredCLI}`);
847
633
  return preferredCLI;
848
634
  }
849
635
  // 2. Smart selection based on analysis type
@@ -863,7 +649,7 @@ export class CLIAgentOrchestrator {
863
649
  // 3. Select by priority from available CLIs
864
650
  for (const cli of priority) {
865
651
  if (this.cliContext.availableCLIs.includes(cli)) {
866
- logger.info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
652
+ this.emitLog().info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
867
653
  return cli;
868
654
  }
869
655
  }
@@ -871,7 +657,7 @@ export class CLIAgentOrchestrator {
871
657
  if (this.cliContext.availableCLIs.length === 0) {
872
658
  throw new Error('No CLI agents available');
873
659
  }
874
- logger.warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
660
+ this.emitLog().warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
875
661
  return this.cliContext.availableCLIs[0];
876
662
  }
877
663
  async _executeCLI(cliName, userPrompt, systemPromptSpec, options = {}, commandBuilder) {
@@ -879,9 +665,22 @@ export class CLIAgentOrchestrator {
879
665
  const workingDir = options.workingDirectory || this.defaultWorkingDir;
880
666
  const timeout = options.timeout || this.defaultTimeout;
881
667
  let tempMcpConfigPath;
668
+ // Provider label for the spawn counter. Derived from cliName so the
669
+ // label set stays in sync with the 'claude' | 'codex' | 'gemini' union
670
+ // instead of reading adapter.name.
671
+ const provider = cliName;
672
+ // Gate for the catch-branch counter emission. Per compose.py:174,
673
+ // pre-spawn paths (commandBuilder throwing before spawnAsync is
674
+ // invoked, or spawnAsync's own pre-spawn validators for
675
+ // command/args/cwd rejecting) do NOT represent a spawn attempt and
676
+ // must not increment the counter. Cycle 3 Task CLI-C' tightened
677
+ // the semantics: `spawned` is now flipped inside spawnAsync via the
678
+ // `onBeforeSpawn` callback, which fires only after all pre-spawn
679
+ // validators pass and immediately before `child_process.spawn()`.
680
+ let spawned = false;
882
681
  try {
883
- logger.info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
884
- logger.debug(`${cliName.toUpperCase()} prompt`, { prompt: userPrompt.substring(0, 100) });
682
+ this.emitLog().info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
683
+ this.emitLog().debug(`${cliName.toUpperCase()} prompt`, { promptLength: userPrompt.length });
885
684
  // Emit agent start event
886
685
  if (options.onStreamingEvent) {
887
686
  options.onStreamingEvent({
@@ -895,33 +694,57 @@ export class CLIAgentOrchestrator {
895
694
  const built = await commandBuilder(userPrompt, systemPromptSpec, options);
896
695
  const { command, args, env, input } = built;
897
696
  tempMcpConfigPath = built.tempMcpConfigPath;
898
- logger.info(`📋 Command: ${command} ${args.join(' ')}`);
899
- logger.info(`📁 Working directory: ${workingDir}`);
900
- logger.info(`⏱️ Timeout: ${timeout}ms`);
697
+ // Cycle 4 Task T18 (F9 — security): do NOT log raw command +
698
+ // joined args. The args array can contain caller-controlled
699
+ // content that crossed the trust boundary (Codex `-c
700
+ // mcp_servers=<TOML>` override content, Claude `--mcp-config
701
+ // <temp-path>`, prompt fragments for CLIs that accept inline
702
+ // prompt). Log only bounded metadata — cliName for provider
703
+ // identification, argCount for diagnostic shape, and
704
+ // hasMcpConfig so operators can correlate MCP-enabled spawns
705
+ // with MCP registry entries.
706
+ const hasMcpConfig = !!(options.mcpServers && options.mcpServers.length > 0);
707
+ this.emitLog().info('CLI spawn preparing', {
708
+ cliName,
709
+ argCount: args.length,
710
+ hasMcpConfig,
711
+ });
712
+ this.emitLog().info(`📁 Working directory: ${workingDir}`);
713
+ this.emitLog().info(`⏱️ Timeout: ${timeout}ms`);
901
714
  if (input) {
902
- logger.info(`📝 Using stdin for prompt (${input.length} characters)`);
715
+ this.emitLog().info(`📝 Using stdin for prompt (${input.length} characters)`);
903
716
  }
717
+ // `spawned` is flipped by spawnAsync's `onBeforeSpawn` callback
718
+ // immediately before `child_process.spawn()`. This means
719
+ // pre-spawn validator rejects inside spawnAsync (invalid command,
720
+ // invalid args, invalid cwd) leave `spawned === false` so the
721
+ // catch-branch counter does NOT fire for those paths
722
+ // (Cycle 3 Task CLI-C').
904
723
  const { stdout, stderr } = await spawnAsync(command, args, {
905
724
  cwd: workingDir,
906
725
  timeout: timeout,
907
726
  maxBuffer: MAX_BUFFER_SIZE, // Configurable buffer for model outputs
908
727
  env: env,
909
728
  input: input,
729
+ onBeforeSpawn: () => { spawned = true; },
910
730
  onProgress: (chunk, type) => {
911
- // Stream output in real-time with agent identification
731
+ // Stream output in real-time with agent identification.
732
+ // Log payloads are length-only at debug level — raw chunk text is
733
+ // NEVER emitted to the logger to avoid leaking prompt / response
734
+ // content through log aggregators. Streaming events are Layer 2.
912
735
  if (type === 'stdout' && chunk.trim()) {
913
- logger.info(`🤖 ${cliName.toUpperCase()}: ${chunk.trim()}`);
736
+ this.emitLog().debug(`${cliName.toUpperCase()} stdout chunk received`, { bytes: chunk.length });
914
737
  // Emit throttled streaming event for real-time updates
915
738
  this.emitThrottledStreamingEvent(cliName, 'agent_progress', chunk.trim(), options.onStreamingEvent, options);
916
739
  }
917
740
  else if (type === 'stderr' && chunk.trim()) {
918
- logger.warn(`⚠️ ${cliName.toUpperCase()} stderr: ${chunk.trim()}`);
741
+ this.emitLog().debug(`${cliName.toUpperCase()} stderr chunk received`, { bytes: chunk.length });
919
742
  // Emit throttled error streaming event
920
743
  this.emitThrottledStreamingEvent(cliName, 'agent_error', chunk.trim(), options.onStreamingEvent, options);
921
744
  }
922
745
  }
923
746
  });
924
- logger.info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
747
+ this.emitLog().info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
925
748
  // Emit completion event
926
749
  if (options.onStreamingEvent) {
927
750
  options.onStreamingEvent({
@@ -932,33 +755,20 @@ export class CLIAgentOrchestrator {
932
755
  sessionId: options.sessionId
933
756
  });
934
757
  }
935
- // Post-process CLI output if needed
758
+ // Post-process CLI output via provider adapter. Thread the scoped
759
+ // logger through decodeOutput so adapter warnings/errors carry
760
+ // module=cli-orchestrator + operation=<provider>_spawn context.
936
761
  let finalOutput = stdout;
937
- // If Claude was run with stream-json format, decode the NDJSON to extract text
938
- if (cliName === 'claude' && args.includes('--output-format') && args.includes('stream-json')) {
939
- const decodedText = this.decodeClaudeStreamJson(stdout);
940
- if (decodedText) {
941
- finalOutput = decodedText;
942
- }
943
- }
944
- // If Codex was run with --json flag, extract only the agent messages
945
- if (cliName === 'codex' && args.includes('--json')) {
946
- const decodedText = this.extractCodexAgentMessage(stdout);
947
- if (decodedText) {
948
- finalOutput = decodedText;
949
- }
950
- }
951
- // If Gemini was run with --output-format json, extract the response field
952
- if (cliName === 'gemini' && args.includes('--output-format') && args.includes('json')) {
953
- const decodedText = this.extractGeminiResponse(stdout);
954
- if (decodedText) {
955
- finalOutput = decodedText;
956
- }
762
+ const providerAdapter = getProvider(cliName);
763
+ const decodeLog = this.log?.forOperation(`${cliName}_spawn`);
764
+ const decodedText = providerAdapter.decodeOutput(stdout, args, decodeLog);
765
+ if (decodedText) {
766
+ finalOutput = decodedText;
957
767
  }
958
768
  // Fallback: If stdout is empty but stderr has content and exit was successful,
959
769
  // Claude might have written to stderr (common in non-TTY environments)
960
770
  if (!finalOutput.trim() && stderr && stderr.trim()) {
961
- logger.info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
771
+ this.emitLog().info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
962
772
  finalOutput = stderr;
963
773
  }
964
774
  // Detect CLI errors that exit 0 but contain fatal error output
@@ -985,7 +795,7 @@ export class CLIAgentOrchestrator {
985
795
  const resetMatch = combinedOutput.match(/reset(?:s)? (?:in|after) (\d+h\s*\d+m(?:\s*\d+s)?)/i);
986
796
  const resetInfo = resetMatch ? ` (resets in ${resetMatch[1]})` : '';
987
797
  const errorMsg = `${cliName.toUpperCase()} quota exhausted${resetInfo}. The CLI exited 0 but returned a quota error instead of analysis output.`;
988
- logger.warn(`⏱️ ${errorMsg}`);
798
+ this.emitLog().warn(`⏱️ ${errorMsg}`);
989
799
  if (options.onStreamingEvent) {
990
800
  options.onStreamingEvent({
991
801
  type: 'agent_error',
@@ -995,24 +805,64 @@ export class CLIAgentOrchestrator {
995
805
  sessionId: options.sessionId
996
806
  });
997
807
  }
808
+ // Spawn counter: outcome=refused (quota exhaustion — CLI exited 0
809
+ // with a quota error in stdout/stderr). Labels annotated against
810
+ // CLI_SPAWN_LABELS so a future label-set change fails at compile
811
+ // time. Wrapped in `safeMetric` so a label-validation throw or
812
+ // other metric-layer exception cannot propagate into the outer
813
+ // spawn try/catch and be misclassified as a spawn failure
814
+ // (Cycle 3 Task CLI-B' — parity with debate's safeMetric).
815
+ const quotaLabels = {
816
+ provider,
817
+ outcome: 'refused',
818
+ };
819
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(refused:quota)', () => {
820
+ this.metrics?.cliSpawnTotal.inc(quotaLabels, 1);
821
+ });
998
822
  return {
999
823
  agent: cliName,
1000
824
  success: false,
1001
825
  output: '',
1002
826
  error: errorMsg,
1003
827
  executionTime: Date.now() - startTime,
1004
- command: `${command} ${args.join(' ')}`,
828
+ // Cycle 4 Task T18 (F9): match the failure-path redaction
829
+ // parity — `command` is a diagnostic display field; the
830
+ // static placeholder preserves the response shape without
831
+ // leaking raw command + args (which may include Codex TOML
832
+ // MCP overrides, Claude temp config paths, or prompt
833
+ // fragments that crossed the trust boundary).
834
+ command: `(redacted command for ${cliName})`,
1005
835
  workingDirectory: workingDir,
1006
836
  exitCode: 0
1007
837
  };
1008
838
  }
839
+ // Spawn counter: outcome=success (normal completion path). Labels
840
+ // annotated against CLI_SPAWN_LABELS so a future label-set change
841
+ // fails at compile time. Wrapped in `safeMetric` so a metric-layer
842
+ // exception cannot propagate into the outer catch branch and be
843
+ // misclassified as a spawn failure (Cycle 3 Task CLI-B').
844
+ const successLabels = {
845
+ provider,
846
+ outcome: 'success',
847
+ };
848
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(success)', () => {
849
+ this.metrics?.cliSpawnTotal.inc(successLabels, 1);
850
+ });
1009
851
  return {
1010
852
  agent: cliName,
1011
853
  success: true,
1012
854
  output: finalOutput,
1013
855
  error: stderr || undefined,
1014
856
  executionTime: Date.now() - startTime,
1015
- command: `${command} ${args.join(' ')}`,
857
+ // Cycle 4 Task T18 (F9): same redaction parity as the
858
+ // failure path — `command` is a diagnostic display field,
859
+ // not a machine-readable command reproduction. The raw
860
+ // command + args can contain caller-controlled payloads
861
+ // (Codex TOML MCP overrides at codex-adapter.ts:86/:87,
862
+ // Claude temp config paths at claude-adapter.ts:96, prompt
863
+ // fragments for CLIs that accept inline prompt) that
864
+ // crossed the trust boundary.
865
+ command: `(redacted command for ${cliName})`,
1016
866
  workingDirectory: workingDir,
1017
867
  exitCode: 0
1018
868
  };
@@ -1029,20 +879,66 @@ export class CLIAgentOrchestrator {
1029
879
  ];
1030
880
  const errorText = `${execError.message || ''} ${execError.stderr || ''}`.toLowerCase();
1031
881
  const isRateLimit = rateLimitPatterns.some(p => errorText.includes(p.toLowerCase()));
882
+ // Classify outcome for the spawn counter. Priority: rate-limit > timeout
883
+ // > generic failure. Timeout check uses the centralized heuristic.
884
+ // Classification priority is unchanged; the emission is gated on
885
+ // `spawned` so pre-spawn failures (e.g., commandBuilder throwing)
886
+ // do NOT increment the counter (compose.py:174).
887
+ let outcome;
888
+ if (isRateLimit) {
889
+ outcome = 'refused';
890
+ }
891
+ else if (this.isTimeoutError(execError)) {
892
+ outcome = 'timeout';
893
+ }
894
+ else {
895
+ outcome = 'failure';
896
+ }
897
+ if (spawned) {
898
+ // Wrapped in `safeMetric` so a metric-layer exception cannot
899
+ // re-throw from the catch branch (which would short-circuit
900
+ // the streaming event emission and the final failure-response
901
+ // construction below). Parity with debate's safeMetric pattern
902
+ // (Cycle 3 Task CLI-B').
903
+ const failureLabels = {
904
+ provider,
905
+ outcome,
906
+ };
907
+ safeMetric(this.emitLog(), `cliSpawnTotal.inc(${outcome})`, () => {
908
+ this.metrics?.cliSpawnTotal.inc(failureLabels, 1);
909
+ });
910
+ }
1032
911
  if (isRateLimit) {
1033
- logger.warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
912
+ this.emitLog().warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
1034
913
  }
1035
914
  else {
1036
- logger.error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
915
+ this.emitLog().error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
1037
916
  error: "Redacted: See internal logs for full error details.",
1038
917
  exitCode,
1039
918
  stderr: "Redacted: See internal logs for full stderr output."
1040
919
  });
1041
920
  }
921
+ // Cycle 3 Task D' (security): `errorMsg` is used both as streaming
922
+ // event content (just below) and as `result.error` in the returned
923
+ // CLIAgentResponse. Raw `error.message` from spawnAsync /
924
+ // downstream CLIs can contain CLI stdout/stderr fragments (TOML
925
+ // MCP override content, prompt echoes, tool-output snippets) that
926
+ // must not leak via streaming fan-out or the MCP response payload.
927
+ // We apply the same static-redaction pattern used by the logger
928
+ // emission at the `❌ ... execution failed` call above: map each
929
+ // classification path to a short, content-free string. The
930
+ // timeout branch preserves the millisecond budget (from our own
931
+ // `timeout` variable, not the underlying error) so downstream
932
+ // callers can still distinguish timeout from generic failure.
1042
933
  const errorMsg = isRateLimit
1043
934
  ? `${cliName.toUpperCase()} hit rate/usage limit. Try again later or use a different agent.`
1044
- : (error instanceof Error ? error.message : String(error));
1045
- // Emit error event
935
+ : this.isTimeoutError(execError)
936
+ ? `${cliName.toUpperCase()} execution timed out after ${timeout}ms. See internal logs for details.`
937
+ : `${cliName.toUpperCase()} execution failed. See internal logs for details.`;
938
+ // Emit error event. The content derives from the redacted
939
+ // `errorMsg` above, never from `error.message` directly, so
940
+ // streaming observers (HTTP SSE, MCP notifications) do not
941
+ // receive raw CLI payload fragments.
1046
942
  if (options.onStreamingEvent) {
1047
943
  options.onStreamingEvent({
1048
944
  type: 'agent_error',
@@ -1070,6 +966,8 @@ export class CLIAgentOrchestrator {
1070
966
  }
1071
967
  }
1072
968
  }
969
+ // Per-provider execution methods — thin wrappers via adapter dispatch.
970
+ // Retained for backward compatibility (tests may reference these).
1073
971
  async executeClaudeCode(userPrompt, systemPromptSpec, options = {}) {
1074
972
  return this._executeCLI('claude', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('claude', user, sys, opts));
1075
973
  }
@@ -1083,28 +981,77 @@ export class CLIAgentOrchestrator {
1083
981
  // Wait for available slot to prevent resource exhaustion
1084
982
  await this.waitForAvailableSlot();
1085
983
  this.runningCLIs++;
1086
- logger.info(`🎯 Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
984
+ this.emitLog().info(`\u{1F3AF} Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1087
985
  try {
1088
- switch (cli) {
1089
- case 'claude':
1090
- return await this.executeClaudeCode(userPrompt, systemPromptSpec, options);
1091
- case 'codex':
1092
- return await this.executeCodex(userPrompt, systemPromptSpec, options);
1093
- case 'gemini':
1094
- return await this.executeGemini(userPrompt, systemPromptSpec, options);
1095
- default:
1096
- throw new Error(`Unknown CLI: ${cli}`);
986
+ // Gemini frontier rotation: when using the default frontier chain (no
987
+ // caller-specified model, no env-var override), rotate through the
988
+ // chain on saturation failures. Rotation is disabled when the caller
989
+ // or operator has explicitly chosen a model.
990
+ const geminiRotationActive = cli === 'gemini'
991
+ && !options.models?.gemini
992
+ && !process.env.BRUTALIST_GEMINI_MODEL;
993
+ if (geminiRotationActive) {
994
+ return await this._executeGeminiWithRotation(userPrompt, systemPromptSpec, options);
1097
995
  }
996
+ // Dispatch to adapter via buildCLICommand (which delegates to provider)
997
+ return await this._executeCLI(cli, userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand(cli, user, sys, opts));
1098
998
  }
1099
999
  finally {
1100
1000
  this.runningCLIs--;
1101
- logger.info(`✅ Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1001
+ this.emitLog().info(`\u2705 Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1002
+ }
1003
+ }
1004
+ /**
1005
+ * Gemini frontier rotation - iterate through GEMINI_FRONTIER_CHAIN on
1006
+ * saturation failures.
1007
+ *
1008
+ * Only active when neither caller nor operator has chosen a model. Each
1009
+ * attempt injects the model via options.models.gemini. Per-attempt
1010
+ * saturation is detected via the existing quota-pattern detection in
1011
+ * _executeCLI - saturation produces success=false with an error matching
1012
+ * /\b429\b/ or quota-family patterns. On non-saturation failure,
1013
+ * rotation stops immediately (a different model will not fix prompt
1014
+ * errors, subprocess crashes, or auth failures). On chain exhaustion,
1015
+ * the last failing response is returned.
1016
+ */
1017
+ async _executeGeminiWithRotation(userPrompt, systemPromptSpec, options) {
1018
+ const chain = GEMINI_FRONTIER_CHAIN;
1019
+ let lastResponse = null;
1020
+ for (let i = 0; i < chain.length; i++) {
1021
+ const model = chain[i];
1022
+ const attemptOptions = {
1023
+ ...options,
1024
+ models: { ...(options.models || {}), gemini: model },
1025
+ };
1026
+ if (i > 0) {
1027
+ this.emitLog().info(`Gemini rotation: attempting tier ${i + 1}/${chain.length} (${model})`);
1028
+ }
1029
+ const response = await this._executeCLI('gemini', userPrompt, systemPromptSpec, attemptOptions, (user, sys, opts) => this.buildCLICommand('gemini', user, sys, opts));
1030
+ if (response.success) {
1031
+ if (i > 0) {
1032
+ this.emitLog().warn(`Gemini served by ${model} after ${i} rotation${i === 1 ? '' : 's'} (tier ${i + 1}/${chain.length})`);
1033
+ }
1034
+ else {
1035
+ this.emitLog().debug(`Gemini served by frontier ${model}`);
1036
+ }
1037
+ return response;
1038
+ }
1039
+ if (!isGeminiSaturationError(response.error)) {
1040
+ this.emitLog().debug(`Gemini ${model} failed non-saturation; rotation aborted`, {
1041
+ errorPreview: response.error?.slice(0, 120),
1042
+ });
1043
+ return response;
1044
+ }
1045
+ this.emitLog().warn(`Gemini ${model} saturated; rotating to next frontier tier`);
1046
+ lastResponse = response;
1102
1047
  }
1048
+ this.emitLog().error(`Gemini frontier chain exhausted (${chain.length} tiers); all saturated`);
1049
+ return lastResponse;
1103
1050
  }
1104
1051
  async waitForAvailableSlot() {
1105
1052
  let waitTime = 100; // Start with 100ms wait time
1106
1053
  while (this.runningCLIs >= this.MAX_CONCURRENT_CLIS) {
1107
- logger.info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1054
+ this.emitLog().info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1108
1055
  await new Promise(resolve => setTimeout(resolve, waitTime));
1109
1056
  waitTime = Math.min(waitTime * 2, 5000); // Exponential backoff, max 5 seconds
1110
1057
  }
@@ -1148,15 +1095,15 @@ export class CLIAgentOrchestrator {
1148
1095
  // Only validate filesystem paths for tools that actually operate on files/directories
1149
1096
  // NOTE: Must match BrutalistPromptType values (camelCase)
1150
1097
  const filesystemTools = ['codebase', 'fileStructure', 'dependencies', 'gitHistory', 'testCoverage'];
1151
- logger.debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1098
+ this.emitLog().debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1152
1099
  try {
1153
1100
  if (filesystemTools.includes(analysisType) && primaryContent && primaryContent.trim() !== '') {
1154
- logger.debug(`Validating path: "${primaryContent}"`);
1101
+ this.emitLog().debug(`Validating path: "${primaryContent}"`);
1155
1102
  await asyncValidatePath(primaryContent, 'targetPath');
1156
1103
  }
1157
1104
  }
1158
1105
  catch (error) {
1159
- logger.error(`Path validation failed: ${error}`);
1106
+ this.emitLog().error(`Path validation failed: ${error}`);
1160
1107
  throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`);
1161
1108
  }
1162
1109
  // Validate workingDirectory if provided
@@ -1180,18 +1127,18 @@ export class CLIAgentOrchestrator {
1180
1127
  }
1181
1128
  // Deduplicate
1182
1129
  clisToUse = [...new Set(options.clis)];
1183
- logger.info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1130
+ this.emitLog().info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1184
1131
  }
1185
1132
  else {
1186
1133
  // Default: use all available CLIs
1187
1134
  clisToUse = [...this.cliContext.availableCLIs];
1188
- logger.info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1135
+ this.emitLog().info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1189
1136
  }
1190
1137
  if (clisToUse.length === 0) {
1191
1138
  throw new Error('No CLI agents available for analysis');
1192
1139
  }
1193
1140
  const selectionMethod = options.clis ? 'user-specified' : 'all-available';
1194
- logger.info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1141
+ this.emitLog().info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1195
1142
  // Execute selected CLIs in parallel with allSettled for better error handling
1196
1143
  const promises = clisToUse.map(async (cli) => {
1197
1144
  try {
@@ -1203,7 +1150,7 @@ export class CLIAgentOrchestrator {
1203
1150
  };
1204
1151
  }
1205
1152
  catch (error) {
1206
- logger.error(`❌ ${cli} execution failed:`, error);
1153
+ this.emitLog().error(`❌ ${cli} execution failed:`, error);
1207
1154
  return {
1208
1155
  agent: cli,
1209
1156
  success: false,
@@ -1220,7 +1167,7 @@ export class CLIAgentOrchestrator {
1220
1167
  const responses = results
1221
1168
  .filter(result => result.status === 'fulfilled')
1222
1169
  .map(result => result.value);
1223
- logger.info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1170
+ this.emitLog().info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1224
1171
  return responses;
1225
1172
  }
1226
1173
  synthesizeBrutalistFeedback(responses, analysisType) {