@brutalist/mcp 1.8.1 → 1.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/dist/brutalist-server.d.ts +31 -9
- package/dist/brutalist-server.d.ts.map +1 -1
- package/dist/brutalist-server.js +107 -673
- package/dist/brutalist-server.js.map +1 -1
- package/dist/cli-adapters/claude-adapter.d.ts +25 -0
- package/dist/cli-adapters/claude-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/claude-adapter.js +245 -0
- package/dist/cli-adapters/claude-adapter.js.map +1 -0
- package/dist/cli-adapters/codex-adapter.d.ts +23 -0
- package/dist/cli-adapters/codex-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/codex-adapter.js +173 -0
- package/dist/cli-adapters/codex-adapter.js.map +1 -0
- package/dist/cli-adapters/gemini-adapter.d.ts +50 -0
- package/dist/cli-adapters/gemini-adapter.d.ts.map +1 -0
- package/dist/cli-adapters/gemini-adapter.js +196 -0
- package/dist/cli-adapters/gemini-adapter.js.map +1 -0
- package/dist/cli-adapters/index.d.ts +75 -0
- package/dist/cli-adapters/index.d.ts.map +1 -0
- package/dist/cli-adapters/index.js +29 -0
- package/dist/cli-adapters/index.js.map +1 -0
- package/dist/cli-adapters/shared.d.ts +12 -0
- package/dist/cli-adapters/shared.d.ts.map +1 -0
- package/dist/cli-adapters/shared.js +99 -0
- package/dist/cli-adapters/shared.js.map +1 -0
- package/dist/cli-agents.d.ts +69 -2
- package/dist/cli-agents.d.ts.map +1 -1
- package/dist/cli-agents.js +358 -394
- package/dist/cli-agents.js.map +1 -1
- package/dist/debate/constitutional.d.ts +27 -0
- package/dist/debate/constitutional.d.ts.map +1 -0
- package/dist/debate/constitutional.js +74 -0
- package/dist/debate/constitutional.js.map +1 -0
- package/dist/debate/debate-orchestrator.d.ts +154 -0
- package/dist/debate/debate-orchestrator.d.ts.map +1 -0
- package/dist/debate/debate-orchestrator.js +699 -0
- package/dist/debate/debate-orchestrator.js.map +1 -0
- package/dist/debate/index.d.ts +18 -0
- package/dist/debate/index.d.ts.map +1 -0
- package/dist/debate/index.js +18 -0
- package/dist/debate/index.js.map +1 -0
- package/dist/debate/refusal-detection.d.ts +27 -0
- package/dist/debate/refusal-detection.d.ts.map +1 -0
- package/dist/debate/refusal-detection.js +62 -0
- package/dist/debate/refusal-detection.js.map +1 -0
- package/dist/debate/synthesis.d.ts +22 -0
- package/dist/debate/synthesis.d.ts.map +1 -0
- package/dist/debate/synthesis.js +117 -0
- package/dist/debate/synthesis.js.map +1 -0
- package/dist/logger.d.ts +204 -1
- package/dist/logger.d.ts.map +1 -1
- package/dist/logger.js +398 -18
- package/dist/logger.js.map +1 -1
- package/dist/metrics/counter.d.ts +24 -0
- package/dist/metrics/counter.d.ts.map +1 -0
- package/dist/metrics/counter.js +60 -0
- package/dist/metrics/counter.js.map +1 -0
- package/dist/metrics/histogram.d.ts +42 -0
- package/dist/metrics/histogram.d.ts.map +1 -0
- package/dist/metrics/histogram.js +114 -0
- package/dist/metrics/histogram.js.map +1 -0
- package/dist/metrics/index.d.ts +26 -0
- package/dist/metrics/index.d.ts.map +1 -0
- package/dist/metrics/index.js +22 -0
- package/dist/metrics/index.js.map +1 -0
- package/dist/metrics/registry.d.ts +96 -0
- package/dist/metrics/registry.d.ts.map +1 -0
- package/dist/metrics/registry.js +113 -0
- package/dist/metrics/registry.js.map +1 -0
- package/dist/metrics/safe-metric.d.ts +25 -0
- package/dist/metrics/safe-metric.d.ts.map +1 -0
- package/dist/metrics/safe-metric.js +41 -0
- package/dist/metrics/safe-metric.js.map +1 -0
- package/dist/metrics/types.d.ts +82 -0
- package/dist/metrics/types.d.ts.map +1 -0
- package/dist/metrics/types.js +121 -0
- package/dist/metrics/types.js.map +1 -0
- package/dist/registry/argument-spaces.d.ts.map +1 -1
- package/dist/registry/argument-spaces.js +20 -0
- package/dist/registry/argument-spaces.js.map +1 -1
- package/dist/registry/domains.d.ts.map +1 -1
- package/dist/registry/domains.js +17 -1
- package/dist/registry/domains.js.map +1 -1
- package/dist/streaming/circuit-breaker.d.ts +13 -1
- package/dist/streaming/circuit-breaker.d.ts.map +1 -1
- package/dist/streaming/circuit-breaker.js +13 -1
- package/dist/streaming/circuit-breaker.js.map +1 -1
- package/dist/streaming/intelligent-buffer.d.ts +13 -1
- package/dist/streaming/intelligent-buffer.d.ts.map +1 -1
- package/dist/streaming/intelligent-buffer.js +13 -1
- package/dist/streaming/intelligent-buffer.js.map +1 -1
- package/dist/streaming/output-parser.d.ts +16 -2
- package/dist/streaming/output-parser.d.ts.map +1 -1
- package/dist/streaming/output-parser.js +16 -2
- package/dist/streaming/output-parser.js.map +1 -1
- package/dist/streaming/progress-tracker.d.ts +14 -1
- package/dist/streaming/progress-tracker.d.ts.map +1 -1
- package/dist/streaming/progress-tracker.js +14 -1
- package/dist/streaming/progress-tracker.js.map +1 -1
- package/dist/streaming/session-manager.d.ts +14 -1
- package/dist/streaming/session-manager.d.ts.map +1 -1
- package/dist/streaming/session-manager.js +14 -1
- package/dist/streaming/session-manager.js.map +1 -1
- package/dist/streaming/sse-transport.d.ts +12 -1
- package/dist/streaming/sse-transport.d.ts.map +1 -1
- package/dist/streaming/sse-transport.js +12 -1
- package/dist/streaming/sse-transport.js.map +1 -1
- package/dist/streaming/streaming-orchestrator.d.ts +15 -1
- package/dist/streaming/streaming-orchestrator.d.ts.map +1 -1
- package/dist/streaming/streaming-orchestrator.js +15 -1
- package/dist/streaming/streaming-orchestrator.js.map +1 -1
- package/dist/system-prompts.d.ts.map +1 -1
- package/dist/system-prompts.js +490 -4
- package/dist/system-prompts.js.map +1 -1
- package/dist/tool-definitions-generated.d.ts.map +1 -1
- package/dist/tool-definitions-generated.js +3 -1
- package/dist/tool-definitions-generated.js.map +1 -1
- package/package.json +1 -1
package/dist/cli-agents.js
CHANGED
|
@@ -3,7 +3,36 @@ import { promises as fs, realpathSync } from 'fs';
|
|
|
3
3
|
import { promisify } from 'util';
|
|
4
4
|
import { logger } from './logger.js';
|
|
5
5
|
import { ModelResolver } from './model-resolver.js';
|
|
6
|
-
import {
|
|
6
|
+
import { cleanupTempConfig, } from './mcp-registry.js';
|
|
7
|
+
import { getProvider, parseNDJSON } from './cli-adapters/index.js';
|
|
8
|
+
import { GEMINI_FRONTIER_CHAIN } from './cli-adapters/gemini-adapter.js';
|
|
9
|
+
import { safeMetric } from './metrics/index.js';
|
|
10
|
+
/**
|
|
11
|
+
* Detect errors where rotating to the next Gemini frontier tier is likely
|
|
12
|
+
* to succeed. Covers two failure families:
|
|
13
|
+
*
|
|
14
|
+
* 1. Capacity saturation on the current tier
|
|
15
|
+
* (429 / "No capacity available" / quota / rate-limit).
|
|
16
|
+
*
|
|
17
|
+
* 2. Access denial on the current tier — the model exists but the
|
|
18
|
+
* user's account lacks preview-tier access. Appears as
|
|
19
|
+
* ModelNotFoundError / "Requested entity was not found" / 403 /
|
|
20
|
+
* "permission denied". In production the frontier chain is
|
|
21
|
+
* probe-tested (not user-typos), so these errors mean "this tier
|
|
22
|
+
* is unavailable to THIS caller" — which is exactly when rotation
|
|
23
|
+
* to the next tier should fire. Dropping down from preview tiers
|
|
24
|
+
* to the universally-available `gemini-2.5-pro` is the entire
|
|
25
|
+
* point of the chain.
|
|
26
|
+
*
|
|
27
|
+
* Does NOT match: auth failures (missing/invalid API key), prompt-safety
|
|
28
|
+
* rejections, or subprocess crashes — these will not differ between
|
|
29
|
+
* frontier tiers.
|
|
30
|
+
*/
|
|
31
|
+
function isGeminiRotatableError(error) {
|
|
32
|
+
if (!error)
|
|
33
|
+
return false;
|
|
34
|
+
return /no capacity available|\b429\b|overloaded|rateLimitExceeded|rate limit|quota|too many requests|ModelNotFoundError|Requested entity was not found|\b403\b|permission denied|access denied/i.test(error);
|
|
35
|
+
}
|
|
7
36
|
// Configurable timeouts and limits
|
|
8
37
|
const DEFAULT_TIMEOUT = parseInt(process.env.BRUTALIST_TIMEOUT || '1800000', 10); // 30 minutes default
|
|
9
38
|
const CLI_CHECK_TIMEOUT = parseInt(process.env.BRUTALIST_CLI_CHECK_TIMEOUT || '5000', 10); // 5 seconds for CLI checks
|
|
@@ -239,6 +268,19 @@ async function spawnAsync(command, args, options = {}) {
|
|
|
239
268
|
spawnCommand = command;
|
|
240
269
|
spawnArgs = args;
|
|
241
270
|
}
|
|
271
|
+
// Fires only after all pre-spawn validators (command, args, cwd)
|
|
272
|
+
// pass. Callers gate their `spawned` flag on this callback so
|
|
273
|
+
// invalid-command / invalid-args / invalid-cwd rejects do NOT count
|
|
274
|
+
// as spawn outcomes in `brutalist_cli_spawn_total`
|
|
275
|
+
// (Cycle 3 Task CLI-C'). Wrapped in try/catch because a throw from
|
|
276
|
+
// the user-supplied callback must not abort the spawn itself.
|
|
277
|
+
try {
|
|
278
|
+
options.onBeforeSpawn?.();
|
|
279
|
+
}
|
|
280
|
+
catch {
|
|
281
|
+
// Swallow — this hook is diagnostic only; failures here must not
|
|
282
|
+
// prevent the spawn from proceeding.
|
|
283
|
+
}
|
|
242
284
|
const child = spawn(spawnCommand, spawnArgs, {
|
|
243
285
|
cwd: cwd,
|
|
244
286
|
stdio: ['pipe', 'pipe', 'pipe'],
|
|
@@ -399,58 +441,6 @@ async function spawnAsync(command, args, options = {}) {
|
|
|
399
441
|
}
|
|
400
442
|
});
|
|
401
443
|
}
|
|
402
|
-
const CLI_BUILDER_CONFIGS = {
|
|
403
|
-
claude: {
|
|
404
|
-
command: 'claude',
|
|
405
|
-
defaultArgs: ['--print'],
|
|
406
|
-
modelArgName: '--model',
|
|
407
|
-
mpcEnvCleanup: ['CLAUDE_MCP_CONFIG', 'MCP_ENABLED', 'CLAUDECODE', 'CLAUDE_CODE_ENTRYPOINT'],
|
|
408
|
-
streamingArgs: () => ['--output-format', 'stream-json', '--verbose'],
|
|
409
|
-
mcpSupport: {
|
|
410
|
-
configMethod: 'flag-file',
|
|
411
|
-
configFlag: '--mcp-config',
|
|
412
|
-
strictFlag: '--strict-mcp-config',
|
|
413
|
-
writeProtection: {
|
|
414
|
-
method: 'disallowed-tools',
|
|
415
|
-
flag: '--disallowedTools',
|
|
416
|
-
value: 'Edit,Write,NotebookEdit',
|
|
417
|
-
},
|
|
418
|
-
},
|
|
419
|
-
},
|
|
420
|
-
codex: {
|
|
421
|
-
command: 'codex',
|
|
422
|
-
defaultArgs: ['exec', '--sandbox', 'read-only', '--skip-git-repo-check'],
|
|
423
|
-
modelArgName: '--model',
|
|
424
|
-
jsonFlag: '--json',
|
|
425
|
-
mpcEnvCleanup: ['CODEX_MCP_CONFIG', 'MCP_ENABLED'],
|
|
426
|
-
promptWrapper: (sys, user) => `${sys}\n\n${user}\n\nUse your shell tools to read files (cat, ls, find, grep, head, etc.) and analyze the codebase. You ARE allowed to run read-only commands. Explore the directory structure, read relevant source files, and provide a comprehensive brutal analysis based on what you find.`,
|
|
427
|
-
mcpSupport: {
|
|
428
|
-
configMethod: 'config-override',
|
|
429
|
-
configOverrideKey: 'mcp_servers',
|
|
430
|
-
writeProtection: {
|
|
431
|
-
method: 'sandbox',
|
|
432
|
-
flag: '--sandbox',
|
|
433
|
-
value: 'read-only', // already in defaultArgs
|
|
434
|
-
},
|
|
435
|
-
},
|
|
436
|
-
},
|
|
437
|
-
gemini: {
|
|
438
|
-
command: 'gemini',
|
|
439
|
-
defaultArgs: ['--output-format', 'json'],
|
|
440
|
-
modelArgName: '--model',
|
|
441
|
-
envExtras: { TERM: 'dumb', NO_COLOR: '1', CI: 'true' },
|
|
442
|
-
mpcEnvCleanup: ['GEMINI_MCP_CONFIG', 'MCP_ENABLED'],
|
|
443
|
-
mcpSupport: {
|
|
444
|
-
configMethod: 'server-whitelist',
|
|
445
|
-
whitelistFlag: '--allowed-mcp-server-names',
|
|
446
|
-
writeProtection: {
|
|
447
|
-
method: 'approval-mode',
|
|
448
|
-
flag: '--approval-mode',
|
|
449
|
-
value: 'plan',
|
|
450
|
-
},
|
|
451
|
-
},
|
|
452
|
-
},
|
|
453
|
-
};
|
|
454
444
|
export class CLIAgentOrchestrator {
|
|
455
445
|
defaultTimeout = 1800000; // 30 minutes - complex codebases need time
|
|
456
446
|
defaultWorkingDir = process.cwd();
|
|
@@ -462,217 +452,94 @@ export class CLIAgentOrchestrator {
|
|
|
462
452
|
MAX_CONCURRENT_CLIS = MAX_CONCURRENT_CLIS; // Configurable concurrency limit
|
|
463
453
|
// Runtime model discovery
|
|
464
454
|
modelResolver;
|
|
455
|
+
// Optional observability deps — injected at the composition root in
|
|
456
|
+
// production; absent (undefined) in test harnesses that construct
|
|
457
|
+
// `new CLIAgentOrchestrator()` with no args. Instrumentation is a no-op
|
|
458
|
+
// when these are undefined, via `this.metrics?.*` and `emitLog()` fallback.
|
|
459
|
+
metrics;
|
|
460
|
+
log;
|
|
465
461
|
// Streaming throttle properties
|
|
466
462
|
streamingBuffers = new Map();
|
|
467
463
|
STREAMING_FLUSH_INTERVAL = 200; // 200ms
|
|
468
464
|
MAX_CHUNK_SIZE = 2048; // 2KB per event
|
|
469
465
|
HEARTBEAT_INTERVAL = 5000; // 5s between progress heartbeats
|
|
470
466
|
lastHeartbeat = 0;
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
467
|
+
/**
|
|
468
|
+
* Accepts a deps bag OR a bare `ModelResolver` (legacy positional form)
|
|
469
|
+
* OR nothing (characterization-test harnesses). The `instanceof ModelResolver`
|
|
470
|
+
* branch preserves the pre-observability signature.
|
|
471
|
+
*/
|
|
472
|
+
constructor(deps) {
|
|
473
|
+
const bag = deps instanceof ModelResolver
|
|
474
|
+
? { modelResolver: deps }
|
|
475
|
+
: (deps || {});
|
|
476
|
+
this.modelResolver = bag.modelResolver || new ModelResolver();
|
|
477
|
+
this.metrics = bag.metrics;
|
|
478
|
+
this.log = bag.log;
|
|
479
|
+
// Log configuration at startup (via emitLog — falls back to root logger
|
|
480
|
+
// when no scoped log was injected).
|
|
481
|
+
const bootLog = this.emitLog();
|
|
482
|
+
bootLog.info(`🔧 Brutalist MCP Configuration:`);
|
|
483
|
+
bootLog.info(` - Default timeout: ${DEFAULT_TIMEOUT}ms`);
|
|
484
|
+
bootLog.info(` - CLI check timeout: ${CLI_CHECK_TIMEOUT}ms`);
|
|
485
|
+
bootLog.info(` - Max buffer size: ${MAX_BUFFER_SIZE} bytes`);
|
|
486
|
+
bootLog.info(` - Max concurrent CLIs: ${MAX_CONCURRENT_CLIS}`);
|
|
479
487
|
// Detect CLI context and discover models at startup
|
|
480
488
|
Promise.all([
|
|
481
489
|
this.detectCLIContext(),
|
|
482
490
|
this.modelResolver.initialize(),
|
|
483
491
|
]).catch(error => {
|
|
484
|
-
|
|
492
|
+
this.emitLog().error("Failed startup detection:", error);
|
|
485
493
|
});
|
|
486
494
|
}
|
|
487
|
-
|
|
488
|
-
|
|
495
|
+
/**
|
|
496
|
+
* Return the injected scoped logger if present, otherwise the root
|
|
497
|
+
* logger singleton. Keeps un-injected (test) instances working while
|
|
498
|
+
* scoping production emissions with `module='cli-orchestrator'`.
|
|
499
|
+
*/
|
|
500
|
+
emitLog() {
|
|
501
|
+
return this.log ?? logger;
|
|
502
|
+
}
|
|
503
|
+
/**
|
|
504
|
+
* Heuristic for classifying a spawnAsync error as a timeout.
|
|
505
|
+
* Centralized so all outcome paths share the same detection logic.
|
|
506
|
+
*
|
|
507
|
+
* Matches any of:
|
|
508
|
+
* - execError.code === 'ETIMEDOUT' (Node's timeout code on some paths)
|
|
509
|
+
* - execError.killed === true (child_process kill after SIGTERM/SIGKILL
|
|
510
|
+
* escalation when the timeout timer fired — see spawnAsync timer block)
|
|
511
|
+
* - execError.message matching /timed out|timeout/i (spawnAsync rejects
|
|
512
|
+
* with "Command timed out after ..." on timer expiry)
|
|
513
|
+
*/
|
|
514
|
+
isTimeoutError(err) {
|
|
515
|
+
if (!err || typeof err !== 'object')
|
|
516
|
+
return false;
|
|
517
|
+
const e = err;
|
|
518
|
+
if (e.code === 'ETIMEDOUT')
|
|
519
|
+
return true;
|
|
520
|
+
if (e.killed === true)
|
|
521
|
+
return true;
|
|
522
|
+
if (typeof e.message === 'string' && /timed out|timeout/i.test(e.message))
|
|
523
|
+
return true;
|
|
524
|
+
return false;
|
|
525
|
+
}
|
|
526
|
+
// Proxy methods for backward compatibility — characterization tests
|
|
527
|
+
// access these via (orchestrator as any).methodName().
|
|
528
|
+
// Implementation lives in src/cli-adapters/.
|
|
489
529
|
parseNDJSON(input) {
|
|
490
|
-
|
|
491
|
-
return [];
|
|
492
|
-
}
|
|
493
|
-
const results = [];
|
|
494
|
-
let depth = 0;
|
|
495
|
-
let inString = false;
|
|
496
|
-
let escape = false;
|
|
497
|
-
let start = 0;
|
|
498
|
-
for (let i = 0; i < input.length; i++) {
|
|
499
|
-
const char = input[i];
|
|
500
|
-
// Handle escape sequences
|
|
501
|
-
if (escape) {
|
|
502
|
-
escape = false;
|
|
503
|
-
continue;
|
|
504
|
-
}
|
|
505
|
-
if (char === '\\') {
|
|
506
|
-
escape = true;
|
|
507
|
-
continue;
|
|
508
|
-
}
|
|
509
|
-
// Track string boundaries
|
|
510
|
-
if (char === '"') {
|
|
511
|
-
inString = !inString;
|
|
512
|
-
continue;
|
|
513
|
-
}
|
|
514
|
-
// Only count braces/brackets outside of strings
|
|
515
|
-
if (inString)
|
|
516
|
-
continue;
|
|
517
|
-
// Track depth
|
|
518
|
-
if (char === '{' || char === '[') {
|
|
519
|
-
depth++;
|
|
520
|
-
}
|
|
521
|
-
else if (char === '}' || char === ']') {
|
|
522
|
-
depth--;
|
|
523
|
-
// When depth returns to 0, we've found a complete JSON object
|
|
524
|
-
if (depth === 0) {
|
|
525
|
-
const jsonStr = input.slice(start, i + 1).trim();
|
|
526
|
-
if (jsonStr) {
|
|
527
|
-
try {
|
|
528
|
-
const parsed = JSON.parse(jsonStr);
|
|
529
|
-
results.push(parsed);
|
|
530
|
-
}
|
|
531
|
-
catch (e) {
|
|
532
|
-
// Log unparseable segments (not silent)
|
|
533
|
-
logger.warn(`Failed to parse JSON segment at position ${start}-${i + 1}:`, {
|
|
534
|
-
preview: jsonStr.substring(0, 100),
|
|
535
|
-
error: e instanceof Error ? e.message : String(e)
|
|
536
|
-
});
|
|
537
|
-
}
|
|
538
|
-
}
|
|
539
|
-
// Move start pointer past this object and any whitespace
|
|
540
|
-
start = i + 1;
|
|
541
|
-
while (start < input.length && /\s/.test(input[start])) {
|
|
542
|
-
start++;
|
|
543
|
-
}
|
|
544
|
-
i = start - 1; // Will be incremented by loop
|
|
545
|
-
}
|
|
546
|
-
}
|
|
547
|
-
}
|
|
548
|
-
// Warn about incomplete JSON at end of input
|
|
549
|
-
if (start < input.length) {
|
|
550
|
-
const remaining = input.slice(start).trim();
|
|
551
|
-
if (remaining) {
|
|
552
|
-
logger.warn(`Incomplete JSON at end of input:`, {
|
|
553
|
-
preview: remaining.substring(0, 100)
|
|
554
|
-
});
|
|
555
|
-
}
|
|
556
|
-
}
|
|
557
|
-
return results;
|
|
530
|
+
return parseNDJSON(input);
|
|
558
531
|
}
|
|
559
|
-
// Decode Claude's stream-json NDJSON output into plain text.
|
|
560
|
-
// Extracts text content blocks from all 'assistant' events across all turns.
|
|
561
|
-
// Skips system events, user events (tool results with raw file contents), and
|
|
562
|
-
// tool_use content blocks within assistant events.
|
|
563
|
-
// Falls back to 'result' event if no assistant text was captured.
|
|
564
532
|
decodeClaudeStreamJson(ndjsonOutput) {
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
return '';
|
|
568
|
-
}
|
|
569
|
-
const events = this.parseNDJSON(ndjsonOutput);
|
|
570
|
-
if (events.length === 0) {
|
|
571
|
-
logger.warn('decodeClaudeStreamJson: no valid JSON events found in output');
|
|
572
|
-
return '';
|
|
573
|
-
}
|
|
574
|
-
const textParts = [];
|
|
575
|
-
let resultText = '';
|
|
576
|
-
let resultError = '';
|
|
577
|
-
for (const event of events) {
|
|
578
|
-
if (typeof event !== 'object' || event === null)
|
|
579
|
-
continue;
|
|
580
|
-
const typedEvent = event;
|
|
581
|
-
if (typedEvent.type === 'assistant' && typedEvent.message?.content) {
|
|
582
|
-
// Extract only text blocks from assistant messages (skip tool_use blocks)
|
|
583
|
-
const content = typedEvent.message.content;
|
|
584
|
-
if (Array.isArray(content)) {
|
|
585
|
-
for (const item of content) {
|
|
586
|
-
if (item.type === 'text' && item.text) {
|
|
587
|
-
textParts.push(item.text);
|
|
588
|
-
}
|
|
589
|
-
}
|
|
590
|
-
}
|
|
591
|
-
}
|
|
592
|
-
else if (typedEvent.type === 'result') {
|
|
593
|
-
if (typedEvent.subtype === 'error' || typedEvent.is_error) {
|
|
594
|
-
resultError = typedEvent.error || typedEvent.result || 'Unknown error';
|
|
595
|
-
}
|
|
596
|
-
else if (typedEvent.result) {
|
|
597
|
-
resultText = typedEvent.result;
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
// Skip: system, user (tool_result with raw file contents), hooks
|
|
601
|
-
}
|
|
602
|
-
// Handle error
|
|
603
|
-
if (resultError) {
|
|
604
|
-
logger.error('decodeClaudeStreamJson: Claude returned error result', { error: resultError });
|
|
605
|
-
return `[Claude Error] ${resultError}`;
|
|
606
|
-
}
|
|
607
|
-
// Use accumulated assistant text if available, fall back to result event
|
|
608
|
-
if (textParts.length > 0) {
|
|
609
|
-
return textParts.join('\n\n');
|
|
610
|
-
}
|
|
611
|
-
if (resultText) {
|
|
612
|
-
return resultText;
|
|
613
|
-
}
|
|
614
|
-
logger.warn('decodeClaudeStreamJson: no text content found in stream-json output', {
|
|
615
|
-
eventCount: events.length,
|
|
616
|
-
eventTypes: events.map(e => e.type).filter(Boolean)
|
|
617
|
-
});
|
|
618
|
-
return '';
|
|
533
|
+
const provider = getProvider('claude');
|
|
534
|
+
return provider.decodeOutput(ndjsonOutput, ['--output-format', 'stream-json']);
|
|
619
535
|
}
|
|
620
|
-
// Extract only the agent messages from Codex JSON output (no thinking, no file reads, no commands)
|
|
621
536
|
extractCodexAgentMessage(jsonOutput) {
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
return '';
|
|
625
|
-
}
|
|
626
|
-
const agentMessages = [];
|
|
627
|
-
const events = this.parseNDJSON(jsonOutput);
|
|
628
|
-
logger.debug(`extractCodexAgentMessage: processing ${events.length} JSON events`);
|
|
629
|
-
for (const event of events) {
|
|
630
|
-
if (typeof event !== 'object' || event === null)
|
|
631
|
-
continue;
|
|
632
|
-
const typedEvent = event;
|
|
633
|
-
logger.debug(`extractCodexAgentMessage: parsed event type=${typedEvent.type}, item.type=${typedEvent.item?.type}`);
|
|
634
|
-
// Codex --json outputs events with structure: {"type":"item.completed","item":{...}}
|
|
635
|
-
// Only extract agent_message type - this is the actual response
|
|
636
|
-
if (typedEvent.type === 'item.completed' && typedEvent.item) {
|
|
637
|
-
if (typedEvent.item.type === 'agent_message' && typedEvent.item.text) {
|
|
638
|
-
// Agent's actual response text
|
|
639
|
-
logger.info(`✅ extractCodexAgentMessage: found agent_message with ${typedEvent.item.text.length} chars`);
|
|
640
|
-
agentMessages.push(typedEvent.item.text);
|
|
641
|
-
}
|
|
642
|
-
// Skip all other types:
|
|
643
|
-
// - reasoning: internal thinking steps
|
|
644
|
-
// - command_execution: file reads, bash commands
|
|
645
|
-
// - error: will be in stderr
|
|
646
|
-
}
|
|
647
|
-
}
|
|
648
|
-
const result = agentMessages.join('\n\n').trim();
|
|
649
|
-
logger.info(`extractCodexAgentMessage: extracted ${agentMessages.length} messages, total ${result.length} chars`);
|
|
650
|
-
return result;
|
|
537
|
+
const provider = getProvider('codex');
|
|
538
|
+
return provider.decodeOutput(jsonOutput, ['--json']);
|
|
651
539
|
}
|
|
652
|
-
// Extract response text from Gemini --output-format json output
|
|
653
540
|
extractGeminiResponse(jsonOutput) {
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
return '';
|
|
657
|
-
}
|
|
658
|
-
try {
|
|
659
|
-
const parsed = JSON.parse(jsonOutput);
|
|
660
|
-
if (parsed.response && typeof parsed.response === 'string') {
|
|
661
|
-
logger.info(`✅ extractGeminiResponse: extracted response with ${parsed.response.length} chars`);
|
|
662
|
-
return parsed.response;
|
|
663
|
-
}
|
|
664
|
-
logger.warn('extractGeminiResponse: no response field in JSON output', {
|
|
665
|
-
keys: Object.keys(parsed)
|
|
666
|
-
});
|
|
667
|
-
return '';
|
|
668
|
-
}
|
|
669
|
-
catch (e) {
|
|
670
|
-
logger.warn('extractGeminiResponse: failed to parse JSON, returning raw output', {
|
|
671
|
-
error: e instanceof Error ? e.message : String(e),
|
|
672
|
-
preview: jsonOutput.substring(0, 200)
|
|
673
|
-
});
|
|
674
|
-
return '';
|
|
675
|
-
}
|
|
541
|
+
const provider = getProvider('gemini');
|
|
542
|
+
return provider.decodeOutput(jsonOutput, ['--output-format', 'json']);
|
|
676
543
|
}
|
|
677
544
|
emitThrottledStreamingEvent(agent, type, content, onStreamingEvent, options) {
|
|
678
545
|
if (!onStreamingEvent)
|
|
@@ -719,98 +586,23 @@ export class CLIAgentOrchestrator {
|
|
|
719
586
|
buffer.lastFlush = now;
|
|
720
587
|
}
|
|
721
588
|
}
|
|
589
|
+
// Proxy: delegates to per-provider adapter via getProvider()
|
|
722
590
|
async buildCLICommand(cli, userPrompt, systemPrompt, options) {
|
|
723
|
-
const
|
|
724
|
-
const mcpEnabled = options.mcpServers && options.mcpServers.length > 0;
|
|
725
|
-
// Build args
|
|
726
|
-
const args = [...config.defaultArgs];
|
|
727
|
-
const resolvedModel = this.modelResolver.resolveModel(cli, options.models?.[cli]);
|
|
728
|
-
if (resolvedModel) {
|
|
729
|
-
args.push(config.modelArgName, resolvedModel);
|
|
730
|
-
}
|
|
731
|
-
if (config.jsonFlag && process.env.CODEX_USE_JSON !== 'false') {
|
|
732
|
-
args.push(config.jsonFlag);
|
|
733
|
-
}
|
|
734
|
-
if (config.streamingArgs) {
|
|
735
|
-
args.push(...config.streamingArgs(options));
|
|
736
|
-
}
|
|
737
|
-
// ── MCP configuration ────────────────────────────────────────────────
|
|
738
|
-
let tempMcpConfigPath;
|
|
739
|
-
if (mcpEnabled && config.mcpSupport) {
|
|
740
|
-
const servers = resolveServers(options.mcpServers);
|
|
741
|
-
const serverNames = Object.keys(servers);
|
|
742
|
-
// Auto-install Playwright browsers if playwright is requested
|
|
743
|
-
if (servers.playwright) {
|
|
744
|
-
await ensurePlaywrightBrowsers();
|
|
745
|
-
}
|
|
746
|
-
if (serverNames.length > 0) {
|
|
747
|
-
const mcp = config.mcpSupport;
|
|
748
|
-
switch (mcp.configMethod) {
|
|
749
|
-
case 'flag-file': {
|
|
750
|
-
// Claude: write temp JSON config, pass --mcp-config <path> --strict-mcp-config
|
|
751
|
-
const sessionId = options.sessionId || 'default';
|
|
752
|
-
tempMcpConfigPath = await writeClaudeMCPConfig(servers, sessionId);
|
|
753
|
-
args.push(mcp.configFlag, tempMcpConfigPath);
|
|
754
|
-
args.push(mcp.strictFlag);
|
|
755
|
-
// Hard deny on write tools
|
|
756
|
-
args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
|
|
757
|
-
// Non-interactive MCP tool use requires permission bypass
|
|
758
|
-
args.push('--permission-mode', 'bypassPermissions');
|
|
759
|
-
break;
|
|
760
|
-
}
|
|
761
|
-
case 'config-override': {
|
|
762
|
-
// Codex: -c 'mcp_servers={...}' — replaces all configured servers (excludes brutalist)
|
|
763
|
-
const tomlOverride = buildCodexMCPOverride(servers);
|
|
764
|
-
args.push('-c', `${mcp.configOverrideKey}=${tomlOverride}`);
|
|
765
|
-
// Write protection already in defaultArgs (--sandbox read-only)
|
|
766
|
-
break;
|
|
767
|
-
}
|
|
768
|
-
case 'server-whitelist': {
|
|
769
|
-
// Gemini: --allowed-mcp-server-names <names> --approval-mode plan
|
|
770
|
-
await ensureGeminiMCPServers(servers);
|
|
771
|
-
args.push(mcp.whitelistFlag, ...serverNames);
|
|
772
|
-
args.push(mcp.writeProtection.flag, mcp.writeProtection.value);
|
|
773
|
-
break;
|
|
774
|
-
}
|
|
775
|
-
}
|
|
776
|
-
logger.info(`🔌 MCP enabled for ${cli}: [${serverNames.join(', ')}]`);
|
|
777
|
-
}
|
|
778
|
-
}
|
|
779
|
-
// Build prompt — skip CLI-specific wrapper in debate mode (prevents Codex
|
|
780
|
-
// from exploring the brutalist repo and reading its own control prompts)
|
|
781
|
-
const combinedPrompt = (config.promptWrapper && !options.debateMode)
|
|
782
|
-
? config.promptWrapper(systemPrompt, userPrompt)
|
|
783
|
-
: `${systemPrompt}\n\n${userPrompt}`;
|
|
784
|
-
// Build secure env
|
|
591
|
+
const provider = getProvider(cli);
|
|
785
592
|
const secureEnv = createSecureEnvironment();
|
|
786
|
-
//
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
const
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
gemini: ['GOOGLE_API_KEY', 'GEMINI_API_KEY']
|
|
795
|
-
};
|
|
796
|
-
for (const key of apiKeyMap[cli]) {
|
|
797
|
-
if (process.env[key])
|
|
798
|
-
secureEnv[key] = process.env[key];
|
|
799
|
-
}
|
|
800
|
-
// Clean up MPC env vars that could cause deadlock — SKIP when MCP is enabled
|
|
801
|
-
// (the per-CLI config above already isolates to only the requested servers)
|
|
802
|
-
if (!mcpEnabled && config.mpcEnvCleanup) {
|
|
803
|
-
for (const envVar of config.mpcEnvCleanup) {
|
|
804
|
-
delete secureEnv[envVar];
|
|
805
|
-
}
|
|
806
|
-
}
|
|
807
|
-
secureEnv.BRUTALIST_SUBPROCESS = '1';
|
|
808
|
-
return { command: config.command, args, input: combinedPrompt, env: secureEnv, tempMcpConfigPath };
|
|
593
|
+
// Pattern A: thread the scoped logger into the adapter via CLIAgentOptions.log.
|
|
594
|
+
// The adapter reads options.log?.forOperation('<cli>_spawn') and falls back to
|
|
595
|
+
// the root logger import if absent. A caller-supplied options.log wins so a
|
|
596
|
+
// test or an upstream operation can override the per-orchestrator default.
|
|
597
|
+
const perCliOp = `${cli}_spawn`;
|
|
598
|
+
const adapterLog = options.log ?? this.log?.forOperation(perCliOp);
|
|
599
|
+
const optionsWithLog = adapterLog && options.log === undefined ? { ...options, log: adapterLog } : options;
|
|
600
|
+
return provider.buildCommand(userPrompt, systemPrompt, optionsWithLog, this.modelResolver, secureEnv);
|
|
809
601
|
}
|
|
810
602
|
async detectCLIContext() {
|
|
811
603
|
// Return cached context if still valid
|
|
812
604
|
if (this.cliContextCached && Date.now() - this.cliContextCacheTime < this.CLI_CACHE_TTL) {
|
|
813
|
-
|
|
605
|
+
this.emitLog().debug('Using cached CLI context');
|
|
814
606
|
return this.cliContext;
|
|
815
607
|
}
|
|
816
608
|
const availableCLIs = [];
|
|
@@ -820,14 +612,16 @@ export class CLIAgentOrchestrator {
|
|
|
820
612
|
{ name: 'codex', command: 'codex --version' },
|
|
821
613
|
{ name: 'gemini', command: 'gemini --version' }
|
|
822
614
|
];
|
|
615
|
+
// NOTE: These `--version` probes are NOT spawn attempts — they must not
|
|
616
|
+
// increment `cliSpawnTotal`. Only _executeCLI counts spawns.
|
|
823
617
|
const results = await Promise.allSettled(cliChecks.map(async (check) => {
|
|
824
618
|
try {
|
|
825
619
|
await spawnAsync(check.name, ['--version'], { timeout: CLI_CHECK_TIMEOUT });
|
|
826
|
-
|
|
620
|
+
this.emitLog().debug(`CLI available: ${check.name}`);
|
|
827
621
|
return check.name;
|
|
828
622
|
}
|
|
829
623
|
catch (error) {
|
|
830
|
-
|
|
624
|
+
this.emitLog().debug(`CLI not available: ${check.name}`);
|
|
831
625
|
return null;
|
|
832
626
|
}
|
|
833
627
|
}));
|
|
@@ -843,7 +637,7 @@ export class CLIAgentOrchestrator {
|
|
|
843
637
|
selectSingleCLI(preferredCLI, analysisType) {
|
|
844
638
|
// 1. Honor explicit preference if available
|
|
845
639
|
if (preferredCLI && this.cliContext.availableCLIs.includes(preferredCLI)) {
|
|
846
|
-
|
|
640
|
+
this.emitLog().info(`✅ Using preferred CLI: ${preferredCLI}`);
|
|
847
641
|
return preferredCLI;
|
|
848
642
|
}
|
|
849
643
|
// 2. Smart selection based on analysis type
|
|
@@ -863,7 +657,7 @@ export class CLIAgentOrchestrator {
|
|
|
863
657
|
// 3. Select by priority from available CLIs
|
|
864
658
|
for (const cli of priority) {
|
|
865
659
|
if (this.cliContext.availableCLIs.includes(cli)) {
|
|
866
|
-
|
|
660
|
+
this.emitLog().info(`🎯 Auto-selected ${cli} for ${analysisType || 'general'} analysis`);
|
|
867
661
|
return cli;
|
|
868
662
|
}
|
|
869
663
|
}
|
|
@@ -871,7 +665,7 @@ export class CLIAgentOrchestrator {
|
|
|
871
665
|
if (this.cliContext.availableCLIs.length === 0) {
|
|
872
666
|
throw new Error('No CLI agents available');
|
|
873
667
|
}
|
|
874
|
-
|
|
668
|
+
this.emitLog().warn(`⚠️ Using fallback CLI: ${this.cliContext.availableCLIs[0]}`);
|
|
875
669
|
return this.cliContext.availableCLIs[0];
|
|
876
670
|
}
|
|
877
671
|
async _executeCLI(cliName, userPrompt, systemPromptSpec, options = {}, commandBuilder) {
|
|
@@ -879,9 +673,22 @@ export class CLIAgentOrchestrator {
|
|
|
879
673
|
const workingDir = options.workingDirectory || this.defaultWorkingDir;
|
|
880
674
|
const timeout = options.timeout || this.defaultTimeout;
|
|
881
675
|
let tempMcpConfigPath;
|
|
676
|
+
// Provider label for the spawn counter. Derived from cliName so the
|
|
677
|
+
// label set stays in sync with the 'claude' | 'codex' | 'gemini' union
|
|
678
|
+
// instead of reading adapter.name.
|
|
679
|
+
const provider = cliName;
|
|
680
|
+
// Gate for the catch-branch counter emission. Per compose.py:174,
|
|
681
|
+
// pre-spawn paths (commandBuilder throwing before spawnAsync is
|
|
682
|
+
// invoked, or spawnAsync's own pre-spawn validators for
|
|
683
|
+
// command/args/cwd rejecting) do NOT represent a spawn attempt and
|
|
684
|
+
// must not increment the counter. Cycle 3 Task CLI-C' tightened
|
|
685
|
+
// the semantics: `spawned` is now flipped inside spawnAsync via the
|
|
686
|
+
// `onBeforeSpawn` callback, which fires only after all pre-spawn
|
|
687
|
+
// validators pass and immediately before `child_process.spawn()`.
|
|
688
|
+
let spawned = false;
|
|
882
689
|
try {
|
|
883
|
-
|
|
884
|
-
|
|
690
|
+
this.emitLog().info(`🤖 Executing ${cliName.toUpperCase()} CLI`);
|
|
691
|
+
this.emitLog().debug(`${cliName.toUpperCase()} prompt`, { promptLength: userPrompt.length });
|
|
885
692
|
// Emit agent start event
|
|
886
693
|
if (options.onStreamingEvent) {
|
|
887
694
|
options.onStreamingEvent({
|
|
@@ -895,33 +702,57 @@ export class CLIAgentOrchestrator {
|
|
|
895
702
|
const built = await commandBuilder(userPrompt, systemPromptSpec, options);
|
|
896
703
|
const { command, args, env, input } = built;
|
|
897
704
|
tempMcpConfigPath = built.tempMcpConfigPath;
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
705
|
+
// Cycle 4 Task T18 (F9 — security): do NOT log raw command +
|
|
706
|
+
// joined args. The args array can contain caller-controlled
|
|
707
|
+
// content that crossed the trust boundary (Codex `-c
|
|
708
|
+
// mcp_servers=<TOML>` override content, Claude `--mcp-config
|
|
709
|
+
// <temp-path>`, prompt fragments for CLIs that accept inline
|
|
710
|
+
// prompt). Log only bounded metadata — cliName for provider
|
|
711
|
+
// identification, argCount for diagnostic shape, and
|
|
712
|
+
// hasMcpConfig so operators can correlate MCP-enabled spawns
|
|
713
|
+
// with MCP registry entries.
|
|
714
|
+
const hasMcpConfig = !!(options.mcpServers && options.mcpServers.length > 0);
|
|
715
|
+
this.emitLog().info('CLI spawn preparing', {
|
|
716
|
+
cliName,
|
|
717
|
+
argCount: args.length,
|
|
718
|
+
hasMcpConfig,
|
|
719
|
+
});
|
|
720
|
+
this.emitLog().info(`📁 Working directory: ${workingDir}`);
|
|
721
|
+
this.emitLog().info(`⏱️ Timeout: ${timeout}ms`);
|
|
901
722
|
if (input) {
|
|
902
|
-
|
|
723
|
+
this.emitLog().info(`📝 Using stdin for prompt (${input.length} characters)`);
|
|
903
724
|
}
|
|
725
|
+
// `spawned` is flipped by spawnAsync's `onBeforeSpawn` callback
|
|
726
|
+
// immediately before `child_process.spawn()`. This means
|
|
727
|
+
// pre-spawn validator rejects inside spawnAsync (invalid command,
|
|
728
|
+
// invalid args, invalid cwd) leave `spawned === false` so the
|
|
729
|
+
// catch-branch counter does NOT fire for those paths
|
|
730
|
+
// (Cycle 3 Task CLI-C').
|
|
904
731
|
const { stdout, stderr } = await spawnAsync(command, args, {
|
|
905
732
|
cwd: workingDir,
|
|
906
733
|
timeout: timeout,
|
|
907
734
|
maxBuffer: MAX_BUFFER_SIZE, // Configurable buffer for model outputs
|
|
908
735
|
env: env,
|
|
909
736
|
input: input,
|
|
737
|
+
onBeforeSpawn: () => { spawned = true; },
|
|
910
738
|
onProgress: (chunk, type) => {
|
|
911
|
-
// Stream output in real-time with agent identification
|
|
739
|
+
// Stream output in real-time with agent identification.
|
|
740
|
+
// Log payloads are length-only at debug level — raw chunk text is
|
|
741
|
+
// NEVER emitted to the logger to avoid leaking prompt / response
|
|
742
|
+
// content through log aggregators. Streaming events are Layer 2.
|
|
912
743
|
if (type === 'stdout' && chunk.trim()) {
|
|
913
|
-
|
|
744
|
+
this.emitLog().debug(`${cliName.toUpperCase()} stdout chunk received`, { bytes: chunk.length });
|
|
914
745
|
// Emit throttled streaming event for real-time updates
|
|
915
746
|
this.emitThrottledStreamingEvent(cliName, 'agent_progress', chunk.trim(), options.onStreamingEvent, options);
|
|
916
747
|
}
|
|
917
748
|
else if (type === 'stderr' && chunk.trim()) {
|
|
918
|
-
|
|
749
|
+
this.emitLog().debug(`${cliName.toUpperCase()} stderr chunk received`, { bytes: chunk.length });
|
|
919
750
|
// Emit throttled error streaming event
|
|
920
751
|
this.emitThrottledStreamingEvent(cliName, 'agent_error', chunk.trim(), options.onStreamingEvent, options);
|
|
921
752
|
}
|
|
922
753
|
}
|
|
923
754
|
});
|
|
924
|
-
|
|
755
|
+
this.emitLog().info(`✅ ${cliName.toUpperCase()} completed (${Date.now() - startTime}ms)`);
|
|
925
756
|
// Emit completion event
|
|
926
757
|
if (options.onStreamingEvent) {
|
|
927
758
|
options.onStreamingEvent({
|
|
@@ -932,33 +763,20 @@ export class CLIAgentOrchestrator {
|
|
|
932
763
|
sessionId: options.sessionId
|
|
933
764
|
});
|
|
934
765
|
}
|
|
935
|
-
// Post-process CLI output
|
|
766
|
+
// Post-process CLI output via provider adapter. Thread the scoped
|
|
767
|
+
// logger through decodeOutput so adapter warnings/errors carry
|
|
768
|
+
// module=cli-orchestrator + operation=<provider>_spawn context.
|
|
936
769
|
let finalOutput = stdout;
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
}
|
|
943
|
-
}
|
|
944
|
-
// If Codex was run with --json flag, extract only the agent messages
|
|
945
|
-
if (cliName === 'codex' && args.includes('--json')) {
|
|
946
|
-
const decodedText = this.extractCodexAgentMessage(stdout);
|
|
947
|
-
if (decodedText) {
|
|
948
|
-
finalOutput = decodedText;
|
|
949
|
-
}
|
|
950
|
-
}
|
|
951
|
-
// If Gemini was run with --output-format json, extract the response field
|
|
952
|
-
if (cliName === 'gemini' && args.includes('--output-format') && args.includes('json')) {
|
|
953
|
-
const decodedText = this.extractGeminiResponse(stdout);
|
|
954
|
-
if (decodedText) {
|
|
955
|
-
finalOutput = decodedText;
|
|
956
|
-
}
|
|
770
|
+
const providerAdapter = getProvider(cliName);
|
|
771
|
+
const decodeLog = this.log?.forOperation(`${cliName}_spawn`);
|
|
772
|
+
const decodedText = providerAdapter.decodeOutput(stdout, args, decodeLog);
|
|
773
|
+
if (decodedText) {
|
|
774
|
+
finalOutput = decodedText;
|
|
957
775
|
}
|
|
958
776
|
// Fallback: If stdout is empty but stderr has content and exit was successful,
|
|
959
777
|
// Claude might have written to stderr (common in non-TTY environments)
|
|
960
778
|
if (!finalOutput.trim() && stderr && stderr.trim()) {
|
|
961
|
-
|
|
779
|
+
this.emitLog().info(`📝 Using stderr as output for ${cliName} (stdout was empty)`);
|
|
962
780
|
finalOutput = stderr;
|
|
963
781
|
}
|
|
964
782
|
// Detect CLI errors that exit 0 but contain fatal error output
|
|
@@ -985,7 +803,7 @@ export class CLIAgentOrchestrator {
|
|
|
985
803
|
const resetMatch = combinedOutput.match(/reset(?:s)? (?:in|after) (\d+h\s*\d+m(?:\s*\d+s)?)/i);
|
|
986
804
|
const resetInfo = resetMatch ? ` (resets in ${resetMatch[1]})` : '';
|
|
987
805
|
const errorMsg = `${cliName.toUpperCase()} quota exhausted${resetInfo}. The CLI exited 0 but returned a quota error instead of analysis output.`;
|
|
988
|
-
|
|
806
|
+
this.emitLog().warn(`⏱️ ${errorMsg}`);
|
|
989
807
|
if (options.onStreamingEvent) {
|
|
990
808
|
options.onStreamingEvent({
|
|
991
809
|
type: 'agent_error',
|
|
@@ -995,24 +813,64 @@ export class CLIAgentOrchestrator {
|
|
|
995
813
|
sessionId: options.sessionId
|
|
996
814
|
});
|
|
997
815
|
}
|
|
816
|
+
// Spawn counter: outcome=refused (quota exhaustion — CLI exited 0
|
|
817
|
+
// with a quota error in stdout/stderr). Labels annotated against
|
|
818
|
+
// CLI_SPAWN_LABELS so a future label-set change fails at compile
|
|
819
|
+
// time. Wrapped in `safeMetric` so a label-validation throw or
|
|
820
|
+
// other metric-layer exception cannot propagate into the outer
|
|
821
|
+
// spawn try/catch and be misclassified as a spawn failure
|
|
822
|
+
// (Cycle 3 Task CLI-B' — parity with debate's safeMetric).
|
|
823
|
+
const quotaLabels = {
|
|
824
|
+
provider,
|
|
825
|
+
outcome: 'refused',
|
|
826
|
+
};
|
|
827
|
+
safeMetric(this.emitLog(), 'cliSpawnTotal.inc(refused:quota)', () => {
|
|
828
|
+
this.metrics?.cliSpawnTotal.inc(quotaLabels, 1);
|
|
829
|
+
});
|
|
998
830
|
return {
|
|
999
831
|
agent: cliName,
|
|
1000
832
|
success: false,
|
|
1001
833
|
output: '',
|
|
1002
834
|
error: errorMsg,
|
|
1003
835
|
executionTime: Date.now() - startTime,
|
|
1004
|
-
|
|
836
|
+
// Cycle 4 Task T18 (F9): match the failure-path redaction
|
|
837
|
+
// parity — `command` is a diagnostic display field; the
|
|
838
|
+
// static placeholder preserves the response shape without
|
|
839
|
+
// leaking raw command + args (which may include Codex TOML
|
|
840
|
+
// MCP overrides, Claude temp config paths, or prompt
|
|
841
|
+
// fragments that crossed the trust boundary).
|
|
842
|
+
command: `(redacted command for ${cliName})`,
|
|
1005
843
|
workingDirectory: workingDir,
|
|
1006
844
|
exitCode: 0
|
|
1007
845
|
};
|
|
1008
846
|
}
|
|
847
|
+
// Spawn counter: outcome=success (normal completion path). Labels
|
|
848
|
+
// annotated against CLI_SPAWN_LABELS so a future label-set change
|
|
849
|
+
// fails at compile time. Wrapped in `safeMetric` so a metric-layer
|
|
850
|
+
// exception cannot propagate into the outer catch branch and be
|
|
851
|
+
// misclassified as a spawn failure (Cycle 3 Task CLI-B').
|
|
852
|
+
const successLabels = {
|
|
853
|
+
provider,
|
|
854
|
+
outcome: 'success',
|
|
855
|
+
};
|
|
856
|
+
safeMetric(this.emitLog(), 'cliSpawnTotal.inc(success)', () => {
|
|
857
|
+
this.metrics?.cliSpawnTotal.inc(successLabels, 1);
|
|
858
|
+
});
|
|
1009
859
|
return {
|
|
1010
860
|
agent: cliName,
|
|
1011
861
|
success: true,
|
|
1012
862
|
output: finalOutput,
|
|
1013
863
|
error: stderr || undefined,
|
|
1014
864
|
executionTime: Date.now() - startTime,
|
|
1015
|
-
|
|
865
|
+
// Cycle 4 Task T18 (F9): same redaction parity as the
|
|
866
|
+
// failure path — `command` is a diagnostic display field,
|
|
867
|
+
// not a machine-readable command reproduction. The raw
|
|
868
|
+
// command + args can contain caller-controlled payloads
|
|
869
|
+
// (Codex TOML MCP overrides at codex-adapter.ts:86/:87,
|
|
870
|
+
// Claude temp config paths at claude-adapter.ts:96, prompt
|
|
871
|
+
// fragments for CLIs that accept inline prompt) that
|
|
872
|
+
// crossed the trust boundary.
|
|
873
|
+
command: `(redacted command for ${cliName})`,
|
|
1016
874
|
workingDirectory: workingDir,
|
|
1017
875
|
exitCode: 0
|
|
1018
876
|
};
|
|
@@ -1029,20 +887,66 @@ export class CLIAgentOrchestrator {
|
|
|
1029
887
|
];
|
|
1030
888
|
const errorText = `${execError.message || ''} ${execError.stderr || ''}`.toLowerCase();
|
|
1031
889
|
const isRateLimit = rateLimitPatterns.some(p => errorText.includes(p.toLowerCase()));
|
|
890
|
+
// Classify outcome for the spawn counter. Priority: rate-limit > timeout
|
|
891
|
+
// > generic failure. Timeout check uses the centralized heuristic.
|
|
892
|
+
// Classification priority is unchanged; the emission is gated on
|
|
893
|
+
// `spawned` so pre-spawn failures (e.g., commandBuilder throwing)
|
|
894
|
+
// do NOT increment the counter (compose.py:174).
|
|
895
|
+
let outcome;
|
|
896
|
+
if (isRateLimit) {
|
|
897
|
+
outcome = 'refused';
|
|
898
|
+
}
|
|
899
|
+
else if (this.isTimeoutError(execError)) {
|
|
900
|
+
outcome = 'timeout';
|
|
901
|
+
}
|
|
902
|
+
else {
|
|
903
|
+
outcome = 'failure';
|
|
904
|
+
}
|
|
905
|
+
if (spawned) {
|
|
906
|
+
// Wrapped in `safeMetric` so a metric-layer exception cannot
|
|
907
|
+
// re-throw from the catch branch (which would short-circuit
|
|
908
|
+
// the streaming event emission and the final failure-response
|
|
909
|
+
// construction below). Parity with debate's safeMetric pattern
|
|
910
|
+
// (Cycle 3 Task CLI-B').
|
|
911
|
+
const failureLabels = {
|
|
912
|
+
provider,
|
|
913
|
+
outcome,
|
|
914
|
+
};
|
|
915
|
+
safeMetric(this.emitLog(), `cliSpawnTotal.inc(${outcome})`, () => {
|
|
916
|
+
this.metrics?.cliSpawnTotal.inc(failureLabels, 1);
|
|
917
|
+
});
|
|
918
|
+
}
|
|
1032
919
|
if (isRateLimit) {
|
|
1033
|
-
|
|
920
|
+
this.emitLog().warn(`⏱️ ${cliName.toUpperCase()} CLI hit rate/usage limit (${Date.now() - startTime}ms)`);
|
|
1034
921
|
}
|
|
1035
922
|
else {
|
|
1036
|
-
|
|
923
|
+
this.emitLog().error(`❌ ${cliName.toUpperCase()} execution failed (${Date.now() - startTime}ms)`, {
|
|
1037
924
|
error: "Redacted: See internal logs for full error details.",
|
|
1038
925
|
exitCode,
|
|
1039
926
|
stderr: "Redacted: See internal logs for full stderr output."
|
|
1040
927
|
});
|
|
1041
928
|
}
|
|
929
|
+
// Cycle 3 Task D' (security): `errorMsg` is used both as streaming
|
|
930
|
+
// event content (just below) and as `result.error` in the returned
|
|
931
|
+
// CLIAgentResponse. Raw `error.message` from spawnAsync /
|
|
932
|
+
// downstream CLIs can contain CLI stdout/stderr fragments (TOML
|
|
933
|
+
// MCP override content, prompt echoes, tool-output snippets) that
|
|
934
|
+
// must not leak via streaming fan-out or the MCP response payload.
|
|
935
|
+
// We apply the same static-redaction pattern used by the logger
|
|
936
|
+
// emission at the `❌ ... execution failed` call above: map each
|
|
937
|
+
// classification path to a short, content-free string. The
|
|
938
|
+
// timeout branch preserves the millisecond budget (from our own
|
|
939
|
+
// `timeout` variable, not the underlying error) so downstream
|
|
940
|
+
// callers can still distinguish timeout from generic failure.
|
|
1042
941
|
const errorMsg = isRateLimit
|
|
1043
942
|
? `${cliName.toUpperCase()} hit rate/usage limit. Try again later or use a different agent.`
|
|
1044
|
-
:
|
|
1045
|
-
|
|
943
|
+
: this.isTimeoutError(execError)
|
|
944
|
+
? `${cliName.toUpperCase()} execution timed out after ${timeout}ms. See internal logs for details.`
|
|
945
|
+
: `${cliName.toUpperCase()} execution failed. See internal logs for details.`;
|
|
946
|
+
// Emit error event. The content derives from the redacted
|
|
947
|
+
// `errorMsg` above, never from `error.message` directly, so
|
|
948
|
+
// streaming observers (HTTP SSE, MCP notifications) do not
|
|
949
|
+
// receive raw CLI payload fragments.
|
|
1046
950
|
if (options.onStreamingEvent) {
|
|
1047
951
|
options.onStreamingEvent({
|
|
1048
952
|
type: 'agent_error',
|
|
@@ -1070,6 +974,8 @@ export class CLIAgentOrchestrator {
|
|
|
1070
974
|
}
|
|
1071
975
|
}
|
|
1072
976
|
}
|
|
977
|
+
// Per-provider execution methods — thin wrappers via adapter dispatch.
|
|
978
|
+
// Retained for backward compatibility (tests may reference these).
|
|
1073
979
|
async executeClaudeCode(userPrompt, systemPromptSpec, options = {}) {
|
|
1074
980
|
return this._executeCLI('claude', userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand('claude', user, sys, opts));
|
|
1075
981
|
}
|
|
@@ -1083,28 +989,86 @@ export class CLIAgentOrchestrator {
|
|
|
1083
989
|
// Wait for available slot to prevent resource exhaustion
|
|
1084
990
|
await this.waitForAvailableSlot();
|
|
1085
991
|
this.runningCLIs++;
|
|
1086
|
-
|
|
992
|
+
this.emitLog().info(`\u{1F3AF} Executing ${cli} (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
|
|
1087
993
|
try {
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
994
|
+
// Gemini frontier rotation: when using the default frontier chain (no
|
|
995
|
+
// caller-specified model, no env-var override), rotate through the
|
|
996
|
+
// chain on saturation OR access-denied failures. Access-denied
|
|
997
|
+
// rotation is the path most users take — the preview tier
|
|
998
|
+
// (gemini-3.1-pro-preview / gemini-3-pro-preview) isn't granted to
|
|
999
|
+
// every account, so the chain falls through to the universally-
|
|
1000
|
+
// available gemini-2.5-pro. Rotation is disabled when the caller
|
|
1001
|
+
// or operator has explicitly chosen a model.
|
|
1002
|
+
const geminiRotationActive = cli === 'gemini'
|
|
1003
|
+
&& !options.models?.gemini
|
|
1004
|
+
&& !process.env.BRUTALIST_GEMINI_MODEL;
|
|
1005
|
+
if (geminiRotationActive) {
|
|
1006
|
+
return await this._executeGeminiWithRotation(userPrompt, systemPromptSpec, options);
|
|
1097
1007
|
}
|
|
1008
|
+
// Dispatch to adapter via buildCLICommand (which delegates to provider)
|
|
1009
|
+
return await this._executeCLI(cli, userPrompt, systemPromptSpec, options, (user, sys, opts) => this.buildCLICommand(cli, user, sys, opts));
|
|
1098
1010
|
}
|
|
1099
1011
|
finally {
|
|
1100
1012
|
this.runningCLIs--;
|
|
1101
|
-
|
|
1013
|
+
this.emitLog().info(`\u2705 Released CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} slots used)`);
|
|
1014
|
+
}
|
|
1015
|
+
}
|
|
1016
|
+
/**
|
|
1017
|
+
* Gemini frontier rotation - iterate through GEMINI_FRONTIER_CHAIN on
|
|
1018
|
+
* rotatable failures (capacity saturation OR tier access denial).
|
|
1019
|
+
*
|
|
1020
|
+
* Only active when neither caller nor operator has chosen a model. Each
|
|
1021
|
+
* attempt injects the model via options.models.gemini. Per-attempt
|
|
1022
|
+
* failures are classified by isGeminiRotatableError(): capacity errors
|
|
1023
|
+
* (quota/429) AND access errors (ModelNotFoundError / permission denied)
|
|
1024
|
+
* both trigger rotation. On unrelated failures (auth, prompt rejection,
|
|
1025
|
+
* subprocess crashes) rotation stops immediately — a different model
|
|
1026
|
+
* will not fix those. On chain exhaustion, the last failing response
|
|
1027
|
+
* is returned.
|
|
1028
|
+
*
|
|
1029
|
+
* In practice the typical non-preview user trajectory is:
|
|
1030
|
+
* gemini-3.1-pro-preview -> access denied (rotate)
|
|
1031
|
+
* gemini-3-pro-preview -> access denied (rotate)
|
|
1032
|
+
* gemini-2.5-pro -> success (universally available)
|
|
1033
|
+
*/
|
|
1034
|
+
async _executeGeminiWithRotation(userPrompt, systemPromptSpec, options) {
|
|
1035
|
+
const chain = GEMINI_FRONTIER_CHAIN;
|
|
1036
|
+
let lastResponse = null;
|
|
1037
|
+
for (let i = 0; i < chain.length; i++) {
|
|
1038
|
+
const model = chain[i];
|
|
1039
|
+
const attemptOptions = {
|
|
1040
|
+
...options,
|
|
1041
|
+
models: { ...(options.models || {}), gemini: model },
|
|
1042
|
+
};
|
|
1043
|
+
if (i > 0) {
|
|
1044
|
+
this.emitLog().info(`Gemini rotation: attempting tier ${i + 1}/${chain.length} (${model})`);
|
|
1045
|
+
}
|
|
1046
|
+
const response = await this._executeCLI('gemini', userPrompt, systemPromptSpec, attemptOptions, (user, sys, opts) => this.buildCLICommand('gemini', user, sys, opts));
|
|
1047
|
+
if (response.success) {
|
|
1048
|
+
if (i > 0) {
|
|
1049
|
+
this.emitLog().warn(`Gemini served by ${model} after ${i} rotation${i === 1 ? '' : 's'} (tier ${i + 1}/${chain.length})`);
|
|
1050
|
+
}
|
|
1051
|
+
else {
|
|
1052
|
+
this.emitLog().debug(`Gemini served by frontier ${model}`);
|
|
1053
|
+
}
|
|
1054
|
+
return response;
|
|
1055
|
+
}
|
|
1056
|
+
if (!isGeminiRotatableError(response.error)) {
|
|
1057
|
+
this.emitLog().debug(`Gemini ${model} failed with non-rotatable error; aborting rotation`, {
|
|
1058
|
+
errorPreview: response.error?.slice(0, 120),
|
|
1059
|
+
});
|
|
1060
|
+
return response;
|
|
1061
|
+
}
|
|
1062
|
+
this.emitLog().warn(`Gemini ${model} unavailable (capacity or access); rotating to next frontier tier`);
|
|
1063
|
+
lastResponse = response;
|
|
1102
1064
|
}
|
|
1065
|
+
this.emitLog().error(`Gemini frontier chain exhausted (${chain.length} tiers); no tier available to this account`);
|
|
1066
|
+
return lastResponse;
|
|
1103
1067
|
}
|
|
1104
1068
|
async waitForAvailableSlot() {
|
|
1105
1069
|
let waitTime = 100; // Start with 100ms wait time
|
|
1106
1070
|
while (this.runningCLIs >= this.MAX_CONCURRENT_CLIS) {
|
|
1107
|
-
|
|
1071
|
+
this.emitLog().info(`⏳ Waiting for available CLI slot (${this.runningCLIs}/${this.MAX_CONCURRENT_CLIS} in use). Next check in ${waitTime}ms...`);
|
|
1108
1072
|
await new Promise(resolve => setTimeout(resolve, waitTime));
|
|
1109
1073
|
waitTime = Math.min(waitTime * 2, 5000); // Exponential backoff, max 5 seconds
|
|
1110
1074
|
}
|
|
@@ -1148,15 +1112,15 @@ export class CLIAgentOrchestrator {
|
|
|
1148
1112
|
// Only validate filesystem paths for tools that actually operate on files/directories
|
|
1149
1113
|
// NOTE: Must match BrutalistPromptType values (camelCase)
|
|
1150
1114
|
const filesystemTools = ['codebase', 'fileStructure', 'dependencies', 'gitHistory', 'testCoverage'];
|
|
1151
|
-
|
|
1115
|
+
this.emitLog().debug(`Validation check: analysisType="${analysisType}", isFilesystemTool=${filesystemTools.includes(analysisType)}`);
|
|
1152
1116
|
try {
|
|
1153
1117
|
if (filesystemTools.includes(analysisType) && primaryContent && primaryContent.trim() !== '') {
|
|
1154
|
-
|
|
1118
|
+
this.emitLog().debug(`Validating path: "${primaryContent}"`);
|
|
1155
1119
|
await asyncValidatePath(primaryContent, 'targetPath');
|
|
1156
1120
|
}
|
|
1157
1121
|
}
|
|
1158
1122
|
catch (error) {
|
|
1159
|
-
|
|
1123
|
+
this.emitLog().error(`Path validation failed: ${error}`);
|
|
1160
1124
|
throw new Error(`Security validation failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
1161
1125
|
}
|
|
1162
1126
|
// Validate workingDirectory if provided
|
|
@@ -1180,18 +1144,18 @@ export class CLIAgentOrchestrator {
|
|
|
1180
1144
|
}
|
|
1181
1145
|
// Deduplicate
|
|
1182
1146
|
clisToUse = [...new Set(options.clis)];
|
|
1183
|
-
|
|
1147
|
+
this.emitLog().info(`🎯 Using user-specified CLIs: ${clisToUse.join(', ')}`);
|
|
1184
1148
|
}
|
|
1185
1149
|
else {
|
|
1186
1150
|
// Default: use all available CLIs
|
|
1187
1151
|
clisToUse = [...this.cliContext.availableCLIs];
|
|
1188
|
-
|
|
1152
|
+
this.emitLog().info(`📋 Using all available CLIs: ${clisToUse.join(', ')}`);
|
|
1189
1153
|
}
|
|
1190
1154
|
if (clisToUse.length === 0) {
|
|
1191
1155
|
throw new Error('No CLI agents available for analysis');
|
|
1192
1156
|
}
|
|
1193
1157
|
const selectionMethod = options.clis ? 'user-specified' : 'all-available';
|
|
1194
|
-
|
|
1158
|
+
this.emitLog().info(`📊 Executing ${clisToUse.length} CLI(s): ${clisToUse.join(', ')} (${selectionMethod})`);
|
|
1195
1159
|
// Execute selected CLIs in parallel with allSettled for better error handling
|
|
1196
1160
|
const promises = clisToUse.map(async (cli) => {
|
|
1197
1161
|
try {
|
|
@@ -1203,7 +1167,7 @@ export class CLIAgentOrchestrator {
|
|
|
1203
1167
|
};
|
|
1204
1168
|
}
|
|
1205
1169
|
catch (error) {
|
|
1206
|
-
|
|
1170
|
+
this.emitLog().error(`❌ ${cli} execution failed:`, error);
|
|
1207
1171
|
return {
|
|
1208
1172
|
agent: cli,
|
|
1209
1173
|
success: false,
|
|
@@ -1220,7 +1184,7 @@ export class CLIAgentOrchestrator {
|
|
|
1220
1184
|
const responses = results
|
|
1221
1185
|
.filter(result => result.status === 'fulfilled')
|
|
1222
1186
|
.map(result => result.value);
|
|
1223
|
-
|
|
1187
|
+
this.emitLog().info(`✅ CLI analysis complete: ${responses.filter(r => r.success).length}/${responses.length} successful`);
|
|
1224
1188
|
return responses;
|
|
1225
1189
|
}
|
|
1226
1190
|
synthesizeBrutalistFeedback(responses, analysisType) {
|