@brutalist/mcp 1.8.0 → 1.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +26 -0
  2. package/dist/brutalist-server.d.ts +31 -9
  3. package/dist/brutalist-server.d.ts.map +1 -1
  4. package/dist/brutalist-server.js +107 -673
  5. package/dist/brutalist-server.js.map +1 -1
  6. package/dist/cli-adapters/claude-adapter.d.ts +25 -0
  7. package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
  8. package/dist/cli-adapters/claude-adapter.js +245 -0
  9. package/dist/cli-adapters/claude-adapter.js.map +1 -0
  10. package/dist/cli-adapters/codex-adapter.d.ts +23 -0
  11. package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
  12. package/dist/cli-adapters/codex-adapter.js +173 -0
  13. package/dist/cli-adapters/codex-adapter.js.map +1 -0
  14. package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
  15. package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
  16. package/dist/cli-adapters/gemini-adapter.js +196 -0
  17. package/dist/cli-adapters/gemini-adapter.js.map +1 -0
  18. package/dist/cli-adapters/index.d.ts +75 -0
  19. package/dist/cli-adapters/index.d.ts.map +1 -0
  20. package/dist/cli-adapters/index.js +29 -0
  21. package/dist/cli-adapters/index.js.map +1 -0
  22. package/dist/cli-adapters/shared.d.ts +12 -0
  23. package/dist/cli-adapters/shared.d.ts.map +1 -0
  24. package/dist/cli-adapters/shared.js +99 -0
  25. package/dist/cli-adapters/shared.js.map +1 -0
  26. package/dist/cli-agents.d.ts +64 -2
  27. package/dist/cli-agents.d.ts.map +1 -1
  28. package/dist/cli-agents.js +417 -401
  29. package/dist/cli-agents.js.map +1 -1
  30. package/dist/debate/constitutional.d.ts +27 -0
  31. package/dist/debate/constitutional.d.ts.map +1 -0
  32. package/dist/debate/constitutional.js +74 -0
  33. package/dist/debate/constitutional.js.map +1 -0
  34. package/dist/debate/debate-orchestrator.d.ts +154 -0
  35. package/dist/debate/debate-orchestrator.d.ts.map +1 -0
  36. package/dist/debate/debate-orchestrator.js +699 -0
  37. package/dist/debate/debate-orchestrator.js.map +1 -0
  38. package/dist/debate/index.d.ts +18 -0
  39. package/dist/debate/index.d.ts.map +1 -0
  40. package/dist/debate/index.js +18 -0
  41. package/dist/debate/index.js.map +1 -0
  42. package/dist/debate/refusal-detection.d.ts +27 -0
  43. package/dist/debate/refusal-detection.d.ts.map +1 -0
  44. package/dist/debate/refusal-detection.js +62 -0
  45. package/dist/debate/refusal-detection.js.map +1 -0
  46. package/dist/debate/synthesis.d.ts +22 -0
  47. package/dist/debate/synthesis.d.ts.map +1 -0
  48. package/dist/debate/synthesis.js +117 -0
  49. package/dist/debate/synthesis.js.map +1 -0
  50. package/dist/logger.d.ts +204 -1
  51. package/dist/logger.d.ts.map +1 -1
  52. package/dist/logger.js +398 -18
  53. package/dist/logger.js.map +1 -1
  54. package/dist/metrics/counter.d.ts +24 -0
  55. package/dist/metrics/counter.d.ts.map +1 -0
  56. package/dist/metrics/counter.js +60 -0
  57. package/dist/metrics/counter.js.map +1 -0
  58. package/dist/metrics/histogram.d.ts +42 -0
  59. package/dist/metrics/histogram.d.ts.map +1 -0
  60. package/dist/metrics/histogram.js +114 -0
  61. package/dist/metrics/histogram.js.map +1 -0
  62. package/dist/metrics/index.d.ts +26 -0
  63. package/dist/metrics/index.d.ts.map +1 -0
  64. package/dist/metrics/index.js +22 -0
  65. package/dist/metrics/index.js.map +1 -0
  66. package/dist/metrics/registry.d.ts +96 -0
  67. package/dist/metrics/registry.d.ts.map +1 -0
  68. package/dist/metrics/registry.js +113 -0
  69. package/dist/metrics/registry.js.map +1 -0
  70. package/dist/metrics/safe-metric.d.ts +25 -0
  71. package/dist/metrics/safe-metric.d.ts.map +1 -0
  72. package/dist/metrics/safe-metric.js +41 -0
  73. package/dist/metrics/safe-metric.js.map +1 -0
  74. package/dist/metrics/types.d.ts +82 -0
  75. package/dist/metrics/types.d.ts.map +1 -0
  76. package/dist/metrics/types.js +121 -0
  77. package/dist/metrics/types.js.map +1 -0
  78. package/dist/registry/argument-spaces.d.ts.map +1 -1
  79. package/dist/registry/argument-spaces.js +20 -0
  80. package/dist/registry/argument-spaces.js.map +1 -1
  81. package/dist/registry/domains.d.ts.map +1 -1
  82. package/dist/registry/domains.js +17 -1
  83. package/dist/registry/domains.js.map +1 -1
  84. package/dist/streaming/circuit-breaker.d.ts +13 -1
  85. package/dist/streaming/circuit-breaker.d.ts.map +1 -1
  86. package/dist/streaming/circuit-breaker.js +13 -1
  87. package/dist/streaming/circuit-breaker.js.map +1 -1
  88. package/dist/streaming/intelligent-buffer.d.ts +13 -1
  89. package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
  90. package/dist/streaming/intelligent-buffer.js +13 -1
  91. package/dist/streaming/intelligent-buffer.js.map +1 -1
  92. package/dist/streaming/output-parser.d.ts +16 -2
  93. package/dist/streaming/output-parser.d.ts.map +1 -1
  94. package/dist/streaming/output-parser.js +16 -2
  95. package/dist/streaming/output-parser.js.map +1 -1
  96. package/dist/streaming/progress-tracker.d.ts +14 -1
  97. package/dist/streaming/progress-tracker.d.ts.map +1 -1
  98. package/dist/streaming/progress-tracker.js +14 -1
  99. package/dist/streaming/progress-tracker.js.map +1 -1
  100. package/dist/streaming/session-manager.d.ts +14 -1
  101. package/dist/streaming/session-manager.d.ts.map +1 -1
  102. package/dist/streaming/session-manager.js +14 -1
  103. package/dist/streaming/session-manager.js.map +1 -1
  104. package/dist/streaming/sse-transport.d.ts +12 -1
  105. package/dist/streaming/sse-transport.d.ts.map +1 -1
  106. package/dist/streaming/sse-transport.js +12 -1
  107. package/dist/streaming/sse-transport.js.map +1 -1
  108. package/dist/streaming/streaming-orchestrator.d.ts +15 -1
  109. package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
  110. package/dist/streaming/streaming-orchestrator.js +15 -1
  111. package/dist/streaming/streaming-orchestrator.js.map +1 -1
  112. package/dist/system-prompts.d.ts.map +1 -1
  113. package/dist/system-prompts.js +490 -4
  114. package/dist/system-prompts.js.map +1 -1
  115. package/dist/tool-definitions-generated.d.ts.map +1 -1
  116. package/dist/tool-definitions-generated.js +3 -1
  117. package/dist/tool-definitions-generated.js.map +1 -1
  118. package/package.json +1 -1
@@ -3,7 +3,28 @@ import { promises as fs, realpathSync } from 'fs';
3
3
  import { promisify } from 'util';
4
4
  import { logger } from './logger.js';
5
5
  import { ModelResolver } from './model-resolver.js';
6
- import { resolveServers, writeClaudeMCPConfig, cleanupTempConfig, buildCodexMCPOverride, ensureGeminiMCPServers, ensurePlaywrightBrowsers, } from './mcp-registry.js';
6
+ import { cleanupTempConfig, } from './mcp-registry.js';
7
+ import { getProvider, parseNDJSON } from './cli-adapters/index.js';
8
+ import { GEMINI_FRONTIER_CHAIN } from './cli-adapters/gemini-adapter.js';
9
+ import { safeMetric } from './metrics/index.js';
10
+ /**
11
+ * Detect Gemini-specific saturation errors — "No capacity available",
12
+ * status 429, overloaded, quota exhaustion. Used by the frontier-chain
13
+ * rotation logic in `executeSingleCLI` to decide whether to rotate to the
14
+ * next model tier or fail immediately (non-saturation errors don't
15
+ * benefit from a different model).
16
+ *
17
+ * Model-not-found (/ModelNotFoundError/) is NOT saturation — it means
18
+ * the model alias is wrong. Treat as non-saturation so rotation aborts
19
+ * and the caller sees the error rather than silently rotating past it.
20
+ */
21
+ function isGeminiSaturationError(error) {
22
+ if (!error)
23
+ return false;
24
+ if (/ModelNotFoundError/i.test(error))
25
+ return false;
26
+ return /no capacity available|\b429\b|overloaded|rateLimitExceeded|rate limit|quota|too many requests/i.test(error);
27
+ }
7
28
  // Configurable timeouts and limits
8
29
  const DEFAULT_TIMEOUT = parseInt(process.env.BRUTALIST_TIMEOUT || '1800000', 10); // 30 minutes default
9
30
  const CLI_CHECK_TIMEOUT = parseInt(process.env.BRUTALIST_CLI_CHECK_TIMEOUT || '5000', 10); // 5 seconds for CLI checks
@@ -20,8 +41,9 @@ export const CLAUDE_ALIASES = ['opus', 'sonnet', 'haiku'];
20
41
  // Security utilities for CLI execution
21
42
  const MAX_PATH_DEPTH = 10; // Maximum directory depth for paths
22
43
  // Validate and sanitize CLI arguments
23
- // Note: We use spawn() with shell:false and array args, so we don't need to block
24
- // punctuation characters. Only block truly dangerous patterns (null bytes).
44
+ // On Unix we use spawn() with shell:false and array args, so shell metacharacters
45
+ // are harmless. On Windows we must use shell:true for .cmd shims, so args are
46
+ // escaped via escapeWindowsArg() before being joined into the command string.
25
47
  // We use stdin for large content, so no arg length limit needed (OS limit is ~1MB anyway).
26
48
  function validateArguments(args) {
27
49
  for (const arg of args) {
@@ -31,6 +53,54 @@ function validateArguments(args) {
31
53
  }
32
54
  }
33
55
  }
56
+ // Escape a single argument for safe embedding in a Windows cmd.exe command string.
57
+ // Required when shell:true is used for .cmd shim execution. On Unix this is never called.
58
+ //
59
+ // On Windows with shell:true, Node.js runs: cmd.exe /d /s /c "command args..."
60
+ // The string passes through TWO parsers sequentially:
61
+ // 1. cmd.exe — interprets metacharacters (&|<>()^"%!) and toggles quoting on "
62
+ // 2. MSVCRT/CRT — the child process's C runtime parses the command line into argv
63
+ //
64
+ // These parsers have INCOMPATIBLE quote-escaping rules:
65
+ // - MSVCRT recognizes \" as an escaped quote
66
+ // - cmd.exe does NOT — it sees \" as backslash + quote-toggle
67
+ //
68
+ // Solution (from cross-spawn / https://qntm.org/cmd):
69
+ // Phase 1: MSVCRT escaping (\" for quotes, double trailing backslashes)
70
+ // Phase 2: Wrap in "...", then ^-prefix EVERY cmd.exe metacharacter
71
+ // After cmd.exe consumes the ^ prefixes, the child process receives a clean
72
+ // MSVCRT-quoted string.
73
+ function escapeWindowsArg(arg) {
74
+ if (arg.includes('\0')) {
75
+ throw new Error('Argument contains null byte');
76
+ }
77
+ // CR/LF act as command separators in cmd.exe — reject outright
78
+ if (/[\r\n]/.test(arg)) {
79
+ throw new Error('Argument contains newline');
80
+ }
81
+ // Empty string → escaped empty quoted arg
82
+ if (arg.length === 0) {
83
+ return '^"^"';
84
+ }
85
+ // Fast path: simple tokens with no cmd.exe metacharacters or whitespace
86
+ if (/^[A-Za-z0-9._\-\/\\:=@+]+$/.test(arg)) {
87
+ return arg;
88
+ }
89
+ // Phase 1: MSVCRT/CRT escaping
90
+ // - Double backslashes before any " (MSVCRT convention: 2N+1 \ before " = N \ + literal ")
91
+ // - Escape " with backslash
92
+ // - Double trailing backslashes (they'll precede the closing quote we add)
93
+ let escaped = arg
94
+ .replace(/(\\*)"/g, '$1$1\\"')
95
+ .replace(/(\\*)$/, '$1$1');
96
+ // Phase 2: Wrap in quotes, then ^-escape every cmd.exe metacharacter.
97
+ // This prevents cmd.exe from interpreting & | < > ( ) ^ " % ! as operators.
98
+ // The ^ prefix makes each metachar literal in cmd.exe; cmd.exe strips the ^
99
+ // before the child process sees the string, leaving valid MSVCRT quoting.
100
+ let quoted = `"${escaped}"`;
101
+ quoted = quoted.replace(/[()%!^"<>&|]/g, '^$&');
102
+ return quoted;
103
+ }
34
104
  // Validate and canonicalize paths to prevent traversal attacks
35
105
  function validatePath(path, name) {
36
106
  if (!path) {
@@ -172,15 +242,48 @@ async function spawnAsync(command, args, options = {}) {
172
242
  }
173
243
  // Use secure environment
174
244
  const secureEnv = options.env || createSecureEnvironment();
175
- const child = spawn(command, args, {
245
+ // On Windows, npm-installed CLIs (gemini, codex) are .cmd batch shims that
246
+ // require shell:true for spawn() to execute them. Native .exe CLIs (claude)
247
+ // work either way. On Unix, shell remains false to prevent injection.
248
+ //
249
+ // When shell:true, we join command+args into a single escaped string to:
250
+ // 1. Avoid Node.js DEP0190 (args array with shell:true is deprecated)
251
+ // 2. Ensure cmd.exe metacharacters in args are properly escaped
252
+ const useShell = process.platform === 'win32';
253
+ let spawnCommand;
254
+ let spawnArgs;
255
+ if (useShell) {
256
+ spawnCommand = [command, ...args.map(escapeWindowsArg)].join(' ');
257
+ spawnArgs = [];
258
+ }
259
+ else {
260
+ spawnCommand = command;
261
+ spawnArgs = args;
262
+ }
263
+ // Fires only after all pre-spawn validators (command, args, cwd)
264
+ // pass. Callers gate their `spawned` flag on this callback so
265
+ // invalid-command / invalid-args / invalid-cwd rejects do NOT count
266
+ // as spawn outcomes in `brutalist_cli_spawn_total`
267
+ // (Cycle 3 Task CLI-C'). Wrapped in try/catch because a throw from
268
+ // the user-supplied callback must not abort the spawn itself.
269
+ try {
270
+ options.onBeforeSpawn?.();
271
+ }
272
+ catch {
273
+ // Swallow — this hook is diagnostic only; failures here must not
274
+ // prevent the spawn from proceeding.
275
+ }
276
+ const child = spawn(spawnCommand, spawnArgs, {
176
277
  cwd: cwd,
177
278
  stdio: ['pipe', 'pipe', 'pipe'],
178
- shell: false, // CRITICAL: disable shell to prevent injection
279
+ shell: useShell,
179
280
  detached: false, // Run all CLIs non-detached for consistent behavior
180
281
  env: secureEnv,
181
- // Additional security options
182
- uid: process.getuid ? process.getuid() : undefined, // Maintain current user ID
183
- gid: process.getgid ? process.getgid() : undefined // Maintain current group ID
282
+ // Additional security options (Unix only; not available on Windows)
283
+ ...(useShell ? {} : {
284
+ uid: process.getuid ? process.getuid() : undefined,
285
+ gid: process.getgid ? process.getgid() : undefined
286
+ })
184
287
  });
185
288
  let stdout = '';
186
289
  let stderr = '';
@@ -330,58 +433,6 @@ async function spawnAsync(command, args, options = {}) {
330
433
  }
331
434
  });
332
435
  }
333
- const CLI_BUILDER_CONFIGS = {
334
- claude: {
335
- command: 'claude',
336
- defaultArgs: ['--print'],
337
- modelArgName: '--model',
338
- mpcEnvCleanup: ['CLAUDE_MCP_CONFIG', 'MCP_ENABLED', 'CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'],
339
- streamingArgs: () => ['--output-format', 'stream-json', '--verbose'],
340
- mcpSupport: {
341
- configMethod: 'flag-file',
342
- configFlag: '--mcp-config',
343
- strictFlag: '--strict-mcp-config',
344
- writeProtection: {
345
- method: 'disallowed-tools',
346
- flag: '--disallowedTools',
347
- value: 'Edit,Write,NotebookEdit',
348
- },
349
- },
350
- },
351
- codex: {
352
- command: 'codex',
353
- defaultArgs: ['exec', '--sandbox', 'read-only', '--skip-git-repo-check'],
354
- modelArgName: '--model',
355
- jsonFlag: '--json',
356
- mpcEnvCleanup: ['CODEX_MCP_CONFIG', 'MCP_ENABLED'],
357
- promptWrapper: (sys, user) => `${sys}\n\n${user}\n\nUse your shell tools to read files (cat, ls, find, grep, head, etc.) and analyze the codebase. You ARE allowed to run read-only commands. Explore the directory structure, read relevant source files, and provide a comprehensive brutal analysis based on what you find.`,
358
- mcpSupport: {
359
- configMethod: 'config-override',
360
- configOverrideKey: 'mcp_servers',
361
- writeProtection: {
362
- method: 'sandbox',
363
- flag: '--sandbox',
364
- value: 'read-only', // already in defaultArgs
365
- },
366
- },
367
- },
368
- gemini: {
369
- command: 'gemini',
370
- defaultArgs: ['--output-format', 'json'],
371
- modelArgName: '--model',
372
- envExtras: { TERM: 'dumb', NO_COLOR: '1', CI: 'true' },
373
- mpcEnvCleanup: ['GEMINI_MCP_CONFIG', 'MCP_ENABLED'],
374
- mcpSupport: {
375
- configMethod: 'server-whitelist',
376
- whitelistFlag: '--allowed-mcp-server-names',
377
- writeProtection: {
378
- method: 'approval-mode',
379
- flag: '--approval-mode',
380
- value: 'plan',
381
- },
382
- },
383
- },
384
- };
385
436
  export class CLIAgentOrchestrator {
386
437
  defaultTimeout = 1800000; // 30 minutes - complex codebases need time
387
438
  defaultWorkingDir = process.cwd();
@@ -393,217 +444,94 @@ export class CLIAgentOrchestrator {
393
444
  MAX_CONCURRENT_CLIS = MAX_CONCURRENT_CLIS; // Configurable concurrency limit
394
445
  // Runtime model discovery
395
446
  modelResolver;
447
+ // Optional observability deps — injected at the composition root in
448
+ // production; absent (undefined) in test harnesses that construct
449
+ // `new CLIAgentOrchestrator()` with no args. Instrumentation is a no-op
450
+ // when these are undefined, via `this.metrics?.*` and `emitLog()` fallback.
451
+ metrics;
452
+ log;
396
453
  // Streaming throttle properties
397
454
  streamingBuffers = new Map();
398
455
  STREAMING_FLUSH_INTERVAL = 200; // 200ms
399
456
  MAX_CHUNK_SIZE = 2048; // 2KB per event
400
457
  HEARTBEAT_INTERVAL = 5000; // 5s between progress heartbeats
401
458
  lastHeartbeat = 0;
402
- constructor(modelResolver) {
403
- this.modelResolver = modelResolver || new ModelResolver();
404
- // Log configuration at startup
405
- logger.info(`🔧 Brutalist MCP Configuration:`);
406
- logger.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
407
- logger.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
408
- logger.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
409
- logger.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
459
+ /**
460
+ * Accepts a deps bag OR a bare `ModelResolver` (legacy positional form)
461
+ * OR nothing (characterization-test harnesses). The `instanceof ModelResolver`
462
+ * branch preserves the pre-observability signature.
463
+ */
464
+ constructor(deps) {
465
+ const bag = deps instanceof ModelResolver
466
+ ? { modelResolver: deps }
467
+ : (deps || {});
468
+ this.modelResolver = bag.modelResolver || new ModelResolver();
469
+ this.metrics = bag.metrics;
470
+ this.log = bag.log;
471
+ // Log configuration at startup (via emitLog — falls back to root logger
472
+ // when no scoped log was injected).
473
+ const bootLog = this.emitLog();
474
+ bootLog.info(`🔧 Brutalist MCP Configuration:`);
475
+ bootLog.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
476
+ bootLog.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
477
+ bootLog.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
478
+ bootLog.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
410
479
  // Detect CLI context and discover models at startup
411
480
  Promise.all([
412
481
  this.detectCLIContext(),
413
482
  this.modelResolver.initialize(),
414
483
  ]).catch(error => {
415
- logger.error("Failed startup detection:", error);
484
+ this.emitLog().error("Failed startup detection:", error);
416
485
  });
417
486
  }
418
- // Parse NDJSON with proper JSON boundary detection
419
- // Handles JSON objects that contain embedded newlines without data loss
487
+ /**
488
+ * Return the injected scoped logger if present, otherwise the root
489
+ * logger singleton. Keeps un-injected (test) instances working while
490
+ * scoping production emissions with `module='cli-orchestrator'`.
491
+ */
492
+ emitLog() {
493
+ return this.log ?? logger;
494
+ }
495
+ /**
496
+ * Heuristic for classifying a spawnAsync error as a timeout.
497
+ * Centralized so all outcome paths share the same detection logic.
498
+ *
499
+ * Matches any of:
500
+ * - execError.code === 'ETIMEDOUT' (Node's timeout code on some paths)
501
+ * - execError.killed === true (child_process kill after SIGTERM/SIGKILL
502
+ * escalation when the timeout timer fired — see spawnAsync timer block)
503
+ * - execError.message matching /timed out|timeout/i (spawnAsync rejects
504
+ * with "Command timed out after ..." on timer expiry)
505
+ */
506
+ isTimeoutError(err) {
507
+ if (!err || typeof err !== 'object')
508
+ return false;
509
+ const e = err;
510
+ if (e.code === 'ETIMEDOUT')
511
+ return true;
512
+ if (e.killed === true)
513
+ return true;
514
+ if (typeof e.message === 'string' && /timed out|timeout/i.test(e.message))
515
+ return true;
516
+ return false;
517
+ }
518
+ // Proxy methods for backward compatibility — characterization tests
519
+ // access these via (orchestrator as any).methodName().
520
+ // Implementation lives in src/cli-adapters/.
420
521
  parseNDJSON(input) {
421
- if (!input || !input.trim()) {
422
- return [];
423
- }
424
- const results = [];
425
- let depth = 0;
426
- let inString = false;
427
- let escape = false;
428
- let start = 0;
429
- for (let i = 0; i < input.length; i++) {
430
- const char = input[i];
431
- // Handle escape sequences
432
- if (escape) {
433
- escape = false;
434
- continue;
435
- }
436
- if (char === '\\') {
437
- escape = true;
438
- continue;
439
- }
440
- // Track string boundaries
441
- if (char === '"') {
442
- inString = !inString;
443
- continue;
444
- }
445
- // Only count braces/brackets outside of strings
446
- if (inString)
447
- continue;
448
- // Track depth
449
- if (char === '{' || char === '[') {
450
- depth++;
451
- }
452
- else if (char === '}' || char === ']') {
453
- depth--;
454
- // When depth returns to 0, we've found a complete JSON object
455
- if (depth === 0) {
456
- const jsonStr = input.slice(start, i + 1).trim();
457
- if (jsonStr) {
458
- try {
459
- const parsed = JSON.parse(jsonStr);
460
- results.push(parsed);
461
- }
462
- catch (e) {
463
- // Log unparseable segments (not silent)
464
- logger.warn(`Failed to parse JSON segment at position ${start}-${i + 1}:`, {
465
- preview: jsonStr.substring(0, 100),
466
- error: e instanceof Error ? e.message : String(e)
467
- });
468
- }
469
- }
470
- // Move start pointer past this object and any whitespace
471
- start = i + 1;
472
- while (start < input.length && /\s/.test(input[start])) {
473
- start++;
474
- }
475
- i = start - 1; // Will be incremented by loop
476
- }
477
- }
478
- }
479
- // Warn about incomplete JSON at end of input
480
- if (start < input.length) {
481
- const remaining = input.slice(start).trim();
482
- if (remaining) {
483
- logger.warn(`Incomplete JSON at end of input:`, {
484
- preview: remaining.substring(0, 100)
485
- });
486
- }
487
- }
488
- return results;
522
+ return parseNDJSON(input);
489
523
  }
490
- // Decode Claude's stream-json NDJSON output into plain text.
491
- // Extracts text content blocks from all 'assistant' events across all turns.
492
- // Skips system events, user events (tool results with raw file contents), and
493
- // tool_use content blocks within assistant events.
494
- // Falls back to 'result' event if no assistant text was captured.
495
524
  decodeClaudeStreamJson(ndjsonOutput) {
496
- if (!ndjsonOutput || !ndjsonOutput.trim()) {
497
- logger.warn('decodeClaudeStreamJson: empty input');
498
- return '';
499
- }
500
- const events = this.parseNDJSON(ndjsonOutput);
501
- if (events.length === 0) {
502
- logger.warn('decodeClaudeStreamJson: no valid JSON events found in output');
503
- return '';
504
- }
505
- const textParts = [];
506
- let resultText = '';
507
- let resultError = '';
508
- for (const event of events) {
509
- if (typeof event !== 'object' || event === null)
510
- continue;
511
- const typedEvent = event;
512
- if (typedEvent.type === 'assistant' && typedEvent.message?.content) {
513
- // Extract only text blocks from assistant messages (skip tool_use blocks)
514
- const content = typedEvent.message.content;
515
- if (Array.isArray(content)) {
516
- for (const item of content) {
517
- if (item.type === 'text' && item.text) {
518
- textParts.push(item.text);
519
- }
520
- }
521
- }
522
- }
523
- else if (typedEvent.type === 'result') {
524
- if (typedEvent.subtype === 'error' || typedEvent.is_error) {
525
- resultError = typedEvent.error || typedEvent.result || 'Unknown error';
526
- }
527
- else if (typedEvent.result) {
528
- resultText = typedEvent.result;
529
- }
530
- }
531
- // Skip: system, user (tool_result with raw file contents), hooks
532
- }
533
- // Handle error
534
- if (resultError) {
535
- logger.error('decodeClaudeStreamJson: Claude returned error result', { error: resultError });
536
- return `[Claude Error] ${resultError}`;
537
- }
538
- // Use accumulated assistant text if available, fall back to result event
539
- if (textParts.length > 0) {
540
- return textParts.join('\n\n');
541
- }
542
- if (resultText) {
543
- return resultText;
544
- }
545
- logger.warn('decodeClaudeStreamJson: no text content found in stream-json output', {
546
- eventCount: events.length,
547
- eventTypes: events.map(e => e.type).filter(Boolean)
548
- });
549
- return '';
525
+ const provider = getProvider('claude');
526
+ return provider.decodeOutput(ndjsonOutput, ['--output-format', 'stream-json']);
550
527
  }
551
- // Extract only the agent messages from Codex JSON output (no thinking, no file reads, no commands)
552
528
  extractCodexAgentMessage(jsonOutput) {
553
- if (!jsonOutput || !jsonOutput.trim()) {
554
- logger.debug('extractCodexAgentMessage: empty input');
555
- return '';
556
- }
557
- const agentMessages = [];
558
- const events = this.parseNDJSON(jsonOutput);
559
- logger.debug(`extractCodexAgentMessage: processing ${events.length} JSON events`);
560
- for (const event of events) {
561
- if (typeof event !== 'object' || event === null)
562
- continue;
563
- const typedEvent = event;
564
- logger.debug(`extractCodexAgentMessage: parsed event type=${typedEvent.type}, item.type=${typedEvent.item?.type}`);
565
- // Codex --json outputs events with structure: {"type":"item.completed","item":{...}}
566
- // Only extract agent_message type - this is the actual response
567
- if (typedEvent.type === 'item.completed' && typedEvent.item) {
568
- if (typedEvent.item.type === 'agent_message' && typedEvent.item.text) {
569
- // Agent's actual response text
570
- logger.info(`✅ extractCodexAgentMessage: found agent_message with ${typedEvent.item.text.length} chars`);
571
- agentMessages.push(typedEvent.item.text);
572
- }
573
- // Skip all other types:
574
- // - reasoning: internal thinking steps
575
- // - command_execution: file reads, bash commands
576
- // - error: will be in stderr
577
- }
578
- }
579
- const result = agentMessages.join('\n\n').trim();
580
- logger.info(`extractCodexAgentMessage: extracted ${agentMessages.length} messages, total ${result.length} chars`);
581
- return result;
529
+ const provider = getProvider('codex');
530
+ return provider.decodeOutput(jsonOutput, ['--json']);
582
531
  }
583
- // Extract response text from Gemini --output-format json output
584
532
  extractGeminiResponse(jsonOutput) {
585
- if (!jsonOutput || !jsonOutput.trim()) {
586
- logger.debug('extractGeminiResponse: empty input');
587
- return '';
588
- }
589
- try {
590
- const parsed = JSON.parse(jsonOutput);
591
- if (parsed.response && typeof parsed.response === 'string') {
592
- logger.info(`✅ extractGeminiResponse: extracted response with ${parsed.response.length} chars`);
593
- return parsed.response;
594
- }
595
- logger.warn('extractGeminiResponse: no response field in JSON output', {
596
- keys: Object.keys(parsed)
597
- });
598
- return '';
599
- }
600
- catch (e) {
601
- logger.warn('extractGeminiResponse: failed to parse JSON, returning raw output', {
602
- error: e instanceof Error ? e.message : String(e),
603
- preview: jsonOutput.substring(0, 200)
604
- });
605
- return '';
606
- }
533
+ const provider = getProvider('gemini');
534
+ return provider.decodeOutput(jsonOutput, ['--output-format', 'json']);
607
535
  }
608
536
  emitThrottledStreamingEvent(agent, type, content, onStreamingEvent, options) {
609
537
  if (!onStreamingEvent)
@@ -650,98 +578,23 @@ export class CLIAgentOrchestrator {
650
578
  buffer.lastFlush = now;
651
579
  }
652
580
  }
581
+ // Proxy: delegates to per-provider adapter via getProvider()
653
582
  async buildCLICommand(cli, userPrompt, systemPrompt, options) {
654
- const config = CLI_BUILDER_CONFIGS[cli];
655
- const mcpEnabled = options.mcpServers && options.mcpServers.length > 0;
656
- // Build args
657
- const args = [...config.defaultArgs];
658
- const resolvedModel = this.modelResolver.resolveModel(cli, options.models?.[cli]);
659
- if (resolvedModel) {
660
- args.push(config.modelArgName, resolvedModel);
661
- }
662
- if (config.jsonFlag && process.env.CODEX_USE_JSON !== 'false') {
663
- args.push(config.jsonFlag);
664
- }
665
- if (config.streamingArgs) {
666
- args.push(...config.streamingArgs(options));
667
- }
668
- // ── MCP configuration ────────────────────────────────────────────────
669
- let tempMcpConfigPath;
670
- if (mcpEnabled && config.mcpSupport) {
671
- const servers = resolveServers(options.mcpServers);
672
- const serverNames = Object.keys(servers);
673
- // Auto-install Playwright browsers if playwright is requested
674
- if (servers.playwright) {
675
- await ensurePlaywrightBrowsers();
676
- }
677
- if (serverNames.length > 0) {
678
- const mcp = config.mcpSupport;
679
- switch (mcp.configMethod) {
680
- case 'flag-file': {
681
- // Claude: write temp JSON config, pass --mcp-config <path> --strict-mcp-config
682
- const sessionId = options.sessionId || 'default';
683
- tempMcpConfigPath = await writeClaudeMCPConfig(servers, sessionId);
684
- args.push(mcp.configFlag, tempMcpConfigPath);
685
- args.push(mcp.strictFlag);
686
- // Hard deny on write tools
687
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
688
- // Non-interactive MCP tool use requires permission bypass
689
- args.push('--permission-mode', 'bypassPermissions');
690
- break;
691
- }
692
- case 'config-override': {
693
- // Codex: -c 'mcp_servers={...}' — replaces all configured servers (excludes brutalist)
694
- const tomlOverride = buildCodexMCPOverride(servers);
695
- args.push('-c', `${mcp.configOverrideKey}=${tomlOverride}`);
696
- // Write protection already in defaultArgs (--sandbox read-only)
697
- break;
698
- }
699
- case 'server-whitelist': {
700
- // Gemini: --allowed-mcp-server-names <names> --approval-mode plan
701
- await ensureGeminiMCPServers(servers);
702
- args.push(mcp.whitelistFlag, ...serverNames);
703
- args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
704
- break;
705
- }
706
- }
707
- logger.info(`🔌 MCP enabled for ${cli}: [${serverNames.join(', ')}]`);
708
- }
709
- }
710
- // Build prompt — skip CLI-specific wrapper in debate mode (prevents Codex
711
- // from exploring the brutalist repo and reading its own control prompts)
712
- const combinedPrompt = (config.promptWrapper && !options.debateMode)
713
- ? config.promptWrapper(systemPrompt, userPrompt)
714
- : `${systemPrompt}\n\n${userPrompt}`;
715
- // Build secure env
583
+ const provider = getProvider(cli);
716
584
  const secureEnv = createSecureEnvironment();
717
- // Add CLI-specific env extras
718
- if (config.envExtras) {
719
- Object.assign(secureEnv, config.envExtras);
720
- }
721
- // Add required API key
722
- const apiKeyMap = {
723
- claude: ['ANTHROPIC_API_KEY'],
724
- codex: ['OPENAI_API_KEY'],
725
- gemini: ['GOOGLE_API_KEY', 'GEMINI_API_KEY']
726
- };
727
- for (const key of apiKeyMap[cli]) {
728
- if (process.env[key])
729
- secureEnv[key] = process.env[key];
730
- }
731
- // Clean up MPC env vars that could cause deadlock — SKIP when MCP is enabled
732
- // (the per-CLI config above already isolates to only the requested servers)
733
- if (!mcpEnabled && config.mpcEnvCleanup) {
734
- for (const envVar of config.mpcEnvCleanup) {
735
- delete secureEnv[envVar];
736
- }
737
- }
738
- secureEnv.BRUTALIST_SUBPROCESS = '1';
739
- return { command: config.command, args, input: combinedPrompt, env: secureEnv, tempMcpConfigPath };
585
+ // Pattern A: thread the scoped logger into the adapter via CLIAgentOptions.log.
586
+ // The adapter reads options.log?.forOperation('<cli>_spawn') and falls back to
587
+ // the root logger import if absent. A caller-supplied options.log wins so a
588
+ // test or an upstream operation can override the per-orchestrator default.
589
+ const perCliOp = `${cli}_spawn`;
590
+ const adapterLog = options.log ?? this.log?.forOperation(perCliOp);
591
+ const optionsWithLog = adapterLog && options.log === undefined ? { ...options, log: adapterLog } : options;
592
+ return provider.buildCommand(userPrompt, systemPrompt, optionsWithLog, this.modelResolver, secureEnv);
740
593
  }
741
594
  async detectCLIContext() {
742
595
  // Return cached context if still valid
743
596
  if (this.cliContextCached && Date.now() - this.cliContextCacheTime < this.CLI_CACHE_TTL) {
744
- logger.debug('Using cached CLI context');
597
+ this.emitLog().debug('Using cached CLI context');
745
598
  return this.cliContext;
746
599
  }
747
600
  const availableCLIs = [];
@@ -751,14 +604,16 @@ export class CLIAgentOrchestrator {
751
604
  { name: 'codex', command: 'codex --version' },
752
605
  { name: 'gemini', command: 'gemini --version' }
753
606
  ];
607
+ // NOTE: These `--version` probes are NOT spawn attempts — they must not
608
+ // increment `cliSpawnTotal`. Only _executeCLI counts spawns.
754
609
  const results = await Promise.allSettled(cliChecks.map(async (check) => {
755
610
  try {
756
611
  await spawnAsync(check.name, ['--version'], { timeout: CLI_CHECK_TIMEOUT });
757
- logger.debug(`CLI available: ${check.name}`);
612
+ this.emitLog().debug(`CLI available: ${check.name}`);
758
613
  return check.name;
759
614
  }
760
615
  catch (error) {
761
- logger.debug(`CLI not available: ${check.name}`);
616
+ this.emitLog().debug(`CLI not available: ${check.name}`);
762
617
  return null;
763
618
  }
764
619
  }));
@@ -774,7 +629,7 @@ export class CLIAgentOrchestrator {
774
629
  selectSingleCLI(preferredCLI, analysisType) {
775
630
  // 1. Honor explicit preference if available
776
631
  if (preferredCLI && this.cliContext.availableCLIs.includes(preferredCLI)) {
777
- logger.info(`✅ Using preferred CLI: ${preferredCLI}`);
632
+ this.emitLog().info(`✅ Using preferred CLI: ${preferredCLI}`);
778
633
  return preferredCLI;
779
634
  }
780
635
  // 2. Smart selection based on analysis type
@@ -794,7 +649,7 @@ export class CLIAgentOrchestrator {
794
649
  // 3. Select by priority from available CLIs
795
650
  for (const cli of priority) {
796
651
  if (this.cliContext.availableCLIs.includes(cli)) {
797
- logger.info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
652
+ this.emitLog().info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
798
653
  return cli;
799
654
  }
800
655
  }
@@ -802,7 +657,7 @@ export class CLIAgentOrchestrator {
802
657
  if (this.cliContext.availableCLIs.length === 0) {
803
658
  throw new Error('No CLI agents available');
804
659
  }
805
- logger.warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
660
+ this.emitLog().warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
806
661
  return this.cliContext.availableCLIs[0];
807
662
  }
808
663
  async _executeCLI(cliName, userPrompt, systemPromptSpec, options = {}, commandBuilder) {
@@ -810,9 +665,22 @@ export class CLIAgentOrchestrator {
810
665
  const workingDir = options.workingDirectory || this.defaultWorkingDir;
811
666
  const timeout = options.timeout || this.defaultTimeout;
812
667
  let tempMcpConfigPath;
668
+ // Provider label for the spawn counter. Derived from cliName so the
669
+ // label set stays in sync with the 'claude' | 'codex' | 'gemini' union
670
+ // instead of reading adapter.name.
671
+ const provider = cliName;
672
+ // Gate for the catch-branch counter emission. Per compose.py:174,
673
+ // pre-spawn paths (commandBuilder throwing before spawnAsync is
674
+ // invoked, or spawnAsync's own pre-spawn validators for
675
+ // command/args/cwd rejecting) do NOT represent a spawn attempt and
676
+ // must not increment the counter. Cycle 3 Task CLI-C' tightened
677
+ // the semantics: `spawned` is now flipped inside spawnAsync via the
678
+ // `onBeforeSpawn` callback, which fires only after all pre-spawn
679
+ // validators pass and immediately before `child_process.spawn()`.
680
+ let spawned = false;
813
681
  try {
814
- logger.info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
815
- logger.debug(`${cliName.toUpperCase()} prompt`, { prompt: userPrompt.substring(0, 100) });
682
+ this.emitLog().info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
683
+ this.emitLog().debug(`${cliName.toUpperCase()} prompt`, { promptLength: userPrompt.length });
816
684
  // Emit agent start event
817
685
  if (options.onStreamingEvent) {
818
686
  options.onStreamingEvent({
@@ -826,33 +694,57 @@ export class CLIAgentOrchestrator {
826
694
  const built = await commandBuilder(userPrompt, systemPromptSpec, options);
827
695
  const { command, args, env, input } = built;
828
696
  tempMcpConfigPath = built.tempMcpConfigPath;
829
- logger.info(`📋 Command: ${command} ${args.join(' ')}`);
830
- logger.info(`📁 Working directory: ${workingDir}`);
831
- logger.info(`⏱️ Timeout: ${timeout}ms`);
697
+ // Cycle 4 Task T18 (F9 — security): do NOT log raw command +
698
+ // joined args. The args array can contain caller-controlled
699
+ // content that crossed the trust boundary (Codex `-c
700
+ // mcp_servers=<TOML>` override content, Claude `--mcp-config
701
+ // <temp-path>`, prompt fragments for CLIs that accept inline
702
+ // prompt). Log only bounded metadata — cliName for provider
703
+ // identification, argCount for diagnostic shape, and
704
+ // hasMcpConfig so operators can correlate MCP-enabled spawns
705
+ // with MCP registry entries.
706
+ const hasMcpConfig = !!(options.mcpServers && options.mcpServers.length > 0);
707
+ this.emitLog().info('CLI spawn preparing', {
708
+ cliName,
709
+ argCount: args.length,
710
+ hasMcpConfig,
711
+ });
712
+ this.emitLog().info(`📁 Working directory: ${workingDir}`);
713
+ this.emitLog().info(`⏱️ Timeout: ${timeout}ms`);
832
714
  if (input) {
833
- logger.info(`📝 Using stdin for prompt (${input.length} characters)`);
715
+ this.emitLog().info(`📝 Using stdin for prompt (${input.length} characters)`);
834
716
  }
717
+ // `spawned` is flipped by spawnAsync's `onBeforeSpawn` callback
718
+ // immediately before `child_process.spawn()`. This means
719
+ // pre-spawn validator rejects inside spawnAsync (invalid command,
720
+ // invalid args, invalid cwd) leave `spawned === false` so the
721
+ // catch-branch counter does NOT fire for those paths
722
+ // (Cycle 3 Task CLI-C').
835
723
  const { stdout, stderr } = await spawnAsync(command, args, {
836
724
  cwd: workingDir,
837
725
  timeout: timeout,
838
726
  maxBuffer: MAX_BUFFER_SIZE, // Configurable buffer for model outputs
839
727
  env: env,
840
728
  input: input,
729
+ onBeforeSpawn: () => { spawned = true; },
841
730
  onProgress: (chunk, type) => {
842
- // Stream output in real-time with agent identification
731
+ // Stream output in real-time with agent identification.
732
+ // Log payloads are length-only at debug level — raw chunk text is
733
+ // NEVER emitted to the logger to avoid leaking prompt / response
734
+ // content through log aggregators. Streaming events are Layer 2.
843
735
  if (type === 'stdout' && chunk.trim()) {
844
- logger.info(`🤖 ${cliName.toUpperCase()}: ${chunk.trim()}`);
736
+ this.emitLog().debug(`${cliName.toUpperCase()} stdout chunk received`, { bytes: chunk.length });
845
737
  // Emit throttled streaming event for real-time updates
846
738
  this.emitThrottledStreamingEvent(cliName, 'agent_progress', chunk.trim(), options.onStreamingEvent, options);
847
739
  }
848
740
  else if (type === 'stderr' && chunk.trim()) {
849
- logger.warn(`⚠️ ${cliName.toUpperCase()} stderr: ${chunk.trim()}`);
741
+ this.emitLog().debug(`${cliName.toUpperCase()} stderr chunk received`, { bytes: chunk.length });
850
742
  // Emit throttled error streaming event
851
743
  this.emitThrottledStreamingEvent(cliName, 'agent_error', chunk.trim(), options.onStreamingEvent, options);
852
744
  }
853
745
  }
854
746
  });
855
- logger.info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
747
+ this.emitLog().info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
856
748
  // Emit completion event
857
749
  if (options.onStreamingEvent) {
858
750
  options.onStreamingEvent({
@@ -863,33 +755,20 @@ export class CLIAgentOrchestrator {
863
755
  sessionId: options.sessionId
864
756
  });
865
757
  }
866
- // Post-process CLI output if needed
758
+ // Post-process CLI output via provider adapter. Thread the scoped
759
+ // logger through decodeOutput so adapter warnings/errors carry
760
+ // module=cli-orchestrator + operation=<provider>_spawn context.
867
761
  let finalOutput = stdout;
868
- // If Claude was run with stream-json format, decode the NDJSON to extract text
869
- if (cliName === 'claude' && args.includes('--output-format') && args.includes('stream-json')) {
870
- const decodedText = this.decodeClaudeStreamJson(stdout);
871
- if (decodedText) {
872
- finalOutput = decodedText;
873
- }
874
- }
875
- // If Codex was run with --json flag, extract only the agent messages
876
- if (cliName === 'codex' && args.includes('--json')) {
877
- const decodedText = this.extractCodexAgentMessage(stdout);
878
- if (decodedText) {
879
- finalOutput = decodedText;
880
- }
881
- }
882
- // If Gemini was run with --output-format json, extract the response field
883
- if (cliName === 'gemini' && args.includes('--output-format') && args.includes('json')) {
884
- const decodedText = this.extractGeminiResponse(stdout);
885
- if (decodedText) {
886
- finalOutput = decodedText;
887
- }
762
+ const providerAdapter = getProvider(cliName);
763
+ const decodeLog = this.log?.forOperation(`${cliName}_spawn`);
764
+ const decodedText = providerAdapter.decodeOutput(stdout, args, decodeLog);
765
+ if (decodedText) {
766
+ finalOutput = decodedText;
888
767
  }
889
768
  // Fallback: If stdout is empty but stderr has content and exit was successful,
890
769
  // Claude might have written to stderr (common in non-TTY environments)
891
770
  if (!finalOutput.trim() && stderr && stderr.trim()) {
892
- logger.info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
771
+ this.emitLog().info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
893
772
  finalOutput = stderr;
894
773
  }
895
774
  // Detect CLI errors that exit 0 but contain fatal error output
@@ -916,7 +795,7 @@ export class CLIAgentOrchestrator {
916
795
  const resetMatch = combinedOutput.match(/reset(?:s)? (?:in|after) (\d+h\s*\d+m(?:\s*\d+s)?)/i);
917
796
  const resetInfo = resetMatch ? ` (resets in ${resetMatch[1]})` : '';
918
797
  const errorMsg = `${cliName.toUpperCase()} quota exhausted${resetInfo}. The CLI exited 0 but returned a quota error instead of analysis output.`;
919
- logger.warn(`⏱️ ${errorMsg}`);
798
+ this.emitLog().warn(`⏱️ ${errorMsg}`);
920
799
  if (options.onStreamingEvent) {
921
800
  options.onStreamingEvent({
922
801
  type: 'agent_error',
@@ -926,24 +805,64 @@ export class CLIAgentOrchestrator {
926
805
  sessionId: options.sessionId
927
806
  });
928
807
  }
808
+ // Spawn counter: outcome=refused (quota exhaustion — CLI exited 0
809
+ // with a quota error in stdout/stderr). Labels annotated against
810
+ // CLI_SPAWN_LABELS so a future label-set change fails at compile
811
+ // time. Wrapped in `safeMetric` so a label-validation throw or
812
+ // other metric-layer exception cannot propagate into the outer
813
+ // spawn try/catch and be misclassified as a spawn failure
814
+ // (Cycle 3 Task CLI-B' — parity with debate's safeMetric).
815
+ const quotaLabels = {
816
+ provider,
817
+ outcome: 'refused',
818
+ };
819
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(refused:quota)', () => {
820
+ this.metrics?.cliSpawnTotal.inc(quotaLabels, 1);
821
+ });
929
822
  return {
930
823
  agent: cliName,
931
824
  success: false,
932
825
  output: '',
933
826
  error: errorMsg,
934
827
  executionTime: Date.now() - startTime,
935
- command: `${command} ${args.join(' ')}`,
828
+ // Cycle 4 Task T18 (F9): match the failure-path redaction
829
+ // parity — `command` is a diagnostic display field; the
830
+ // static placeholder preserves the response shape without
831
+ // leaking raw command + args (which may include Codex TOML
832
+ // MCP overrides, Claude temp config paths, or prompt
833
+ // fragments that crossed the trust boundary).
834
+ command: `(redacted command for ${cliName})`,
936
835
  workingDirectory: workingDir,
937
836
  exitCode: 0
938
837
  };
939
838
  }
839
+ // Spawn counter: outcome=success (normal completion path). Labels
840
+ // annotated against CLI_SPAWN_LABELS so a future label-set change
841
+ // fails at compile time. Wrapped in `safeMetric` so a metric-layer
842
+ // exception cannot propagate into the outer catch branch and be
843
+ // misclassified as a spawn failure (Cycle 3 Task CLI-B').
844
+ const successLabels = {
845
+ provider,
846
+ outcome: 'success',
847
+ };
848
+ safeMetric(this.emitLog(), 'cliSpawnTotal.inc(success)', () => {
849
+ this.metrics?.cliSpawnTotal.inc(successLabels, 1);
850
+ });
940
851
  return {
941
852
  agent: cliName,
942
853
  success: true,
943
854
  output: finalOutput,
944
855
  error: stderr || undefined,
945
856
  executionTime: Date.now() - startTime,
946
- command: `${command} ${args.join(' ')}`,
857
+ // Cycle 4 Task T18 (F9): same redaction parity as the
858
+ // failure path — `command` is a diagnostic display field,
859
+ // not a machine-readable command reproduction. The raw
860
+ // command + args can contain caller-controlled payloads
861
+ // (Codex TOML MCP overrides at codex-adapter.ts:86/:87,
862
+ // Claude temp config paths at claude-adapter.ts:96, prompt
863
+ // fragments for CLIs that accept inline prompt) that
864
+ // crossed the trust boundary.
865
+ command: `(redacted command for ${cliName})`,
947
866
  workingDirectory: workingDir,
948
867
  exitCode: 0
949
868
  };
@@ -960,20 +879,66 @@ export class CLIAgentOrchestrator {
960
879
  ];
961
880
  const errorText = `${execError.message || ''} ${execError.stderr || ''}`.toLowerCase();
962
881
  const isRateLimit = rateLimitPatterns.some(p => errorText.includes(p.toLowerCase()));
882
+ // Classify outcome for the spawn counter. Priority: rate-limit > timeout
883
+ // > generic failure. Timeout check uses the centralized heuristic.
884
+ // Classification priority is unchanged; the emission is gated on
885
+ // `spawned` so pre-spawn failures (e.g., commandBuilder throwing)
886
+ // do NOT increment the counter (compose.py:174).
887
+ let outcome;
888
+ if (isRateLimit) {
889
+ outcome = 'refused';
890
+ }
891
+ else if (this.isTimeoutError(execError)) {
892
+ outcome = 'timeout';
893
+ }
894
+ else {
895
+ outcome = 'failure';
896
+ }
897
+ if (spawned) {
898
+ // Wrapped in `safeMetric` so a metric-layer exception cannot
899
+ // re-throw from the catch branch (which would short-circuit
900
+ // the streaming event emission and the final failure-response
901
+ // construction below). Parity with debate's safeMetric pattern
902
+ // (Cycle 3 Task CLI-B').
903
+ const failureLabels = {
904
+ provider,
905
+ outcome,
906
+ };
907
+ safeMetric(this.emitLog(), `cliSpawnTotal.inc(${outcome})`, () => {
908
+ this.metrics?.cliSpawnTotal.inc(failureLabels, 1);
909
+ });
910
+ }
963
911
  if (isRateLimit) {
964
- logger.warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
912
+ this.emitLog().warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
965
913
  }
966
914
  else {
967
- logger.error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
915
+ this.emitLog().error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
968
916
  error: "Redacted: See internal logs for full error details.",
969
917
  exitCode,
970
918
  stderr: "Redacted: See internal logs for full stderr output."
971
919
  });
972
920
  }
921
+ // Cycle 3 Task D' (security): `errorMsg` is used both as streaming
922
+ // event content (just below) and as `result.error` in the returned
923
+ // CLIAgentResponse. Raw `error.message` from spawnAsync /
924
+ // downstream CLIs can contain CLI stdout/stderr fragments (TOML
925
+ // MCP override content, prompt echoes, tool-output snippets) that
926
+ // must not leak via streaming fan-out or the MCP response payload.
927
+ // We apply the same static-redaction pattern used by the logger
928
+ // emission at the `❌ ... execution failed` call above: map each
929
+ // classification path to a short, content-free string. The
930
+ // timeout branch preserves the millisecond budget (from our own
931
+ // `timeout` variable, not the underlying error) so downstream
932
+ // callers can still distinguish timeout from generic failure.
973
933
  const errorMsg = isRateLimit
974
934
  ? `${cliName.toUpperCase()} hit rate/usage limit. Try again later or use a different agent.`
975
- : (error instanceof Error ? error.message : String(error));
976
- // Emit error event
935
+ : this.isTimeoutError(execError)
936
+ ? `${cliName.toUpperCase()} execution timed out after ${timeout}ms. See internal logs for details.`
937
+ : `${cliName.toUpperCase()} execution failed. See internal logs for details.`;
938
+ // Emit error event. The content derives from the redacted
939
+ // `errorMsg` above, never from `error.message` directly, so
940
+ // streaming observers (HTTP SSE, MCP notifications) do not
941
+ // receive raw CLI payload fragments.
977
942
  if (options.onStreamingEvent) {
978
943
  options.onStreamingEvent({
979
944
  type: 'agent_error',
@@ -1001,6 +966,8 @@ export class CLIAgentOrchestrator {
1001
966
  }
1002
967
  }
1003
968
  }
969
+ // Per-provider execution methods — thin wrappers via adapter dispatch.
970
+ // Retained for backward compatibility (tests may reference these).
1004
971
  async executeClaudeCode(userPrompt, systemPromptSpec, options = {}) {
1005
972
  return this._executeCLI('claude', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('claude', user, sys, opts));
1006
973
  }
@@ -1014,28 +981,77 @@ export class CLIAgentOrchestrator {
1014
981
  // Wait for available slot to prevent resource exhaustion
1015
982
  await this.waitForAvailableSlot();
1016
983
  this.runningCLIs++;
1017
- logger.info(`🎯 Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
984
+ this.emitLog().info(`\u{1F3AF} Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1018
985
  try {
1019
- switch (cli) {
1020
- case 'claude':
1021
- return await this.executeClaudeCode(userPrompt, systemPromptSpec, options);
1022
- case 'codex':
1023
- return await this.executeCodex(userPrompt, systemPromptSpec, options);
1024
- case 'gemini':
1025
- return await this.executeGemini(userPrompt, systemPromptSpec, options);
1026
- default:
1027
- throw new Error(`Unknown CLI: ${cli}`);
986
+ // Gemini frontier rotation: when using the default frontier chain (no
987
+ // caller-specified model, no env-var override), rotate through the
988
+ // chain on saturation failures. Rotation is disabled when the caller
989
+ // or operator has explicitly chosen a model.
990
+ const geminiRotationActive = cli === 'gemini'
991
+ && !options.models?.gemini
992
+ && !process.env.BRUTALIST_GEMINI_MODEL;
993
+ if (geminiRotationActive) {
994
+ return await this._executeGeminiWithRotation(userPrompt, systemPromptSpec, options);
1028
995
  }
996
+ // Dispatch to adapter via buildCLICommand (which delegates to provider)
997
+ return await this._executeCLI(cli, userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand(cli, user, sys, opts));
1029
998
  }
1030
999
  finally {
1031
1000
  this.runningCLIs--;
1032
- logger.info(`✅ Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1001
+ this.emitLog().info(`\u2705 Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
1002
+ }
1003
+ }
1004
+ /**
1005
+ * Gemini frontier rotation - iterate through GEMINI_FRONTIER_CHAIN on
1006
+ * saturation failures.
1007
+ *
1008
+ * Only active when neither caller nor operator has chosen a model. Each
1009
+ * attempt injects the model via options.models.gemini. Per-attempt
1010
+ * saturation is detected via the existing quota-pattern detection in
1011
+ * _executeCLI - saturation produces success=false with an error matching
1012
+ * /\b429\b/ or quota-family patterns. On non-saturation failure,
1013
+ * rotation stops immediately (a different model will not fix prompt
1014
+ * errors, subprocess crashes, or auth failures). On chain exhaustion,
1015
+ * the last failing response is returned.
1016
+ */
1017
+ async _executeGeminiWithRotation(userPrompt, systemPromptSpec, options) {
1018
+ const chain = GEMINI_FRONTIER_CHAIN;
1019
+ let lastResponse = null;
1020
+ for (let i = 0; i < chain.length; i++) {
1021
+ const model = chain[i];
1022
+ const attemptOptions = {
1023
+ ...options,
1024
+ models: { ...(options.models || {}), gemini: model },
1025
+ };
1026
+ if (i > 0) {
1027
+ this.emitLog().info(`Gemini rotation: attempting tier ${i + 1}/${chain.length} (${model})`);
1028
+ }
1029
+ const response = await this._executeCLI('gemini', userPrompt, systemPromptSpec, attemptOptions, (user, sys, opts) => this.buildCLICommand('gemini', user, sys, opts));
1030
+ if (response.success) {
1031
+ if (i > 0) {
1032
+ this.emitLog().warn(`Gemini served by ${model} after ${i} rotation${i === 1 ? '' : 's'} (tier ${i + 1}/${chain.length})`);
1033
+ }
1034
+ else {
1035
+ this.emitLog().debug(`Gemini served by frontier ${model}`);
1036
+ }
1037
+ return response;
1038
+ }
1039
+ if (!isGeminiSaturationError(response.error)) {
1040
+ this.emitLog().debug(`Gemini ${model} failed non-saturation; rotation aborted`, {
1041
+ errorPreview: response.error?.slice(0, 120),
1042
+ });
1043
+ return response;
1044
+ }
1045
+ this.emitLog().warn(`Gemini ${model} saturated; rotating to next frontier tier`);
1046
+ lastResponse = response;
1033
1047
  }
1048
+ this.emitLog().error(`Gemini frontier chain exhausted (${chain.length} tiers); all saturated`);
1049
+ return lastResponse;
1034
1050
  }
1035
1051
  async waitForAvailableSlot() {
1036
1052
  let waitTime = 100; // Start with 100ms wait time
1037
1053
  while (this.runningCLIs >= this.MAX_CONCURRENT_CLIS) {
1038
- logger.info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1054
+ this.emitLog().info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
1039
1055
  await new Promise(resolve => setTimeout(resolve, waitTime));
1040
1056
  waitTime = Math.min(waitTime * 2, 5000); // Exponential backoff, max 5 seconds
1041
1057
  }
@@ -1079,15 +1095,15 @@ export class CLIAgentOrchestrator {
1079
1095
  // Only validate filesystem paths for tools that actually operate on files/directories
1080
1096
  // NOTE: Must match BrutalistPromptType values (camelCase)
1081
1097
  const filesystemTools = ['codebase', 'fileStructure', 'dependencies', 'gitHistory', 'testCoverage'];
1082
- logger.debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1098
+ this.emitLog().debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
1083
1099
  try {
1084
1100
  if (filesystemTools.includes(analysisType) && primaryContent && primaryContent.trim() !== '') {
1085
- logger.debug(`Validating path: "${primaryContent}"`);
1101
+ this.emitLog().debug(`Validating path: "${primaryContent}"`);
1086
1102
  await asyncValidatePath(primaryContent, 'targetPath');
1087
1103
  }
1088
1104
  }
1089
1105
  catch (error) {
1090
- logger.error(`Path validation failed: ${error}`);
1106
+ this.emitLog().error(`Path validation failed: ${error}`);
1091
1107
  throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`);
1092
1108
  }
1093
1109
  // Validate workingDirectory if provided
@@ -1111,18 +1127,18 @@ export class CLIAgentOrchestrator {
1111
1127
  }
1112
1128
  // Deduplicate
1113
1129
  clisToUse = [...new Set(options.clis)];
1114
- logger.info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1130
+ this.emitLog().info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
1115
1131
  }
1116
1132
  else {
1117
1133
  // Default: use all available CLIs
1118
1134
  clisToUse = [...this.cliContext.availableCLIs];
1119
- logger.info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1135
+ this.emitLog().info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
1120
1136
  }
1121
1137
  if (clisToUse.length === 0) {
1122
1138
  throw new Error('No CLI agents available for analysis');
1123
1139
  }
1124
1140
  const selectionMethod = options.clis ? 'user-specified' : 'all-available';
1125
- logger.info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1141
+ this.emitLog().info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
1126
1142
  // Execute selected CLIs in parallel with allSettled for better error handling
1127
1143
  const promises = clisToUse.map(async (cli) => {
1128
1144
  try {
@@ -1134,7 +1150,7 @@ export class CLIAgentOrchestrator {
1134
1150
  };
1135
1151
  }
1136
1152
  catch (error) {
1137
- logger.error(`❌ ${cli} execution failed:`, error);
1153
+ this.emitLog().error(`❌ ${cli} execution failed:`, error);
1138
1154
  return {
1139
1155
  agent: cli,
1140
1156
  success: false,
@@ -1151,7 +1167,7 @@ export class CLIAgentOrchestrator {
1151
1167
  const responses = results
1152
1168
  .filter(result => result.status === 'fulfilled')
1153
1169
  .map(result => result.value);
1154
- logger.info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1170
+ this.emitLog().info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
1155
1171
  return responses;
1156
1172
  }
1157
1173
  synthesizeBrutalistFeedback(responses, analysisType) {