@kognai/orchestrator-core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -56,4335 +56,29 @@ var __importStar = (this && this.__importStar) || (function () {
56
56
  };
57
57
  })();
58
58
  Object.defineProperty(exports, "__esModule", { value: true });
59
+ exports.assessTaskComplexity = exports.recordAgentScore = exports.resolveAgentDid = exports.resolveActiveSprintId = exports.persistCEODecisions = exports.normalizeReview = exports.compressContext = exports.callAnthropicCached = exports.routeCall = exports.log = exports.c = exports.localQAGate = exports.callLLM = void 0;
60
+ exports.postSprintSmokeTest = postSprintSmokeTest;
59
61
  exports.runOrchestrator = main;
60
- const fs_1 = require("fs");
61
- const child_process_1 = require("child_process");
62
- const crypto_1 = require("crypto");
63
62
  const https = __importStar(require("https"));
64
63
  const http = __importStar(require("http"));
65
- // ===== Module-level token accumulator =====
66
- // Captures ALL LLM tokens across every agent, supervisor, CEO, CTO call this run.
67
- let _globalTokensThisRun = 0;
68
- function _accumulateTokens(n) { _globalTokensThisRun += n; }
69
- const _modelsUsedThisRun = new Map();
70
- let _totalCostThisRun = 0;
71
- function recordModelCall(provider, model, input_tokens, output_tokens, cost_usd) {
72
- const key = model || 'unknown';
73
- const existing = _modelsUsedThisRun.get(key);
74
- if (existing) {
75
- existing.calls += 1;
76
- existing.input_tokens += input_tokens || 0;
77
- existing.output_tokens += output_tokens || 0;
78
- existing.cost_usd += cost_usd || 0;
79
- }
80
- else {
81
- _modelsUsedThisRun.set(key, { calls: 1, input_tokens: input_tokens || 0, output_tokens: output_tokens || 0, cost_usd: cost_usd || 0, provider: provider || 'unknown' });
82
- }
83
- _totalCostThisRun += cost_usd || 0;
84
- }
85
- function getModelsUsedReport() {
86
- const out = {};
87
- for (const [model, e] of _modelsUsedThisRun.entries()) {
88
- out[model] = { provider: e.provider, calls: e.calls, input_tokens: e.input_tokens, output_tokens: e.output_tokens, tokens: e.input_tokens + e.output_tokens, cost_usd: +e.cost_usd.toFixed(6) };
89
- }
90
- return out;
91
- }
92
- function getTotalCostUsd() { return _totalCostThisRun; }
93
- // S64-002: Load .env relative to this file's directory (not process.cwd())
94
- // Fixes supervisor ANTHROPIC_API_KEY missing when spawned from a different cwd (e.g., VPS path)
95
- const mc_client_1 = require("./mc-client");
96
- // V17: Local/cloud routing, wallet state, ByteRover memory
97
- const ollama_client_1 = require("./ollama-client"); // availability check only — calls go through ClawRouter v2.0
98
- // ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17)
99
- // TICKET-215 Wave C: every LLM call routes through the core ModelRouter SEAM.
100
- // The viem-backed router now lives in its OWN package (@kognai/clawrouter-x402);
101
- // Kognai injects it into the zero-dep core slot at boot, so core never carries
102
- // viem and a product that doesn't route on-chain simply doesn't inject it.
103
- // TICKET-215 Wave D (split step 1): Kognai-local injection of the viem router +
104
- // SCORE scorer into the core seams lives in one bootstrap module (side-effect import).
105
- const model_router_registry_1 = require("./model-router-registry");
106
- // Seam-backed bindings — existing call sites resolve through the injected router.
107
- const routeCall = (req) => (0, model_router_registry_1.getModelRouter)().routeCall(req);
108
- const getDailyCostDigest = () => (0, model_router_registry_1.getModelRouter)().getDailyCostDigest();
109
- // Chomsky gate — prompt quality runner with retry + full JSONL logging (Sprint 1513)
110
- const chomsky_runner_1 = require("./chomsky-runner");
111
- // Sherlock v2 — ASMR-powered episodic memory retrieval for supervisor context (AMD-21-03)
112
- const sherlock_memory_1 = require("./sherlock-memory");
113
- // Legacy import kept for clawRouterIsAvailable() checks during transition
114
- const clawrouter_client_1 = require("./clawrouter-client");
115
- const local_model_router_1 = require("./local-model-router");
116
- // CTO Approval Gate — every autonomous sprint reviewed before execution (Exec Protocol)
117
- const cto_approval_gate_1 = require("./cto-approval-gate");
118
- const citizenship_1 = require("./citizenship");
119
- // TICKET-215 Wave D: citizen scoring via the core seam. The SCORE-backed scorer
120
- // stays Kognai-local (out of zero-dep core) and is injected here; scoring goes
121
- // through recordTaskScoreMonitored so every score emits a data.citizen_score event
122
- // to the event-bus (kognai_events) — flowing back to Kognai monitoring + the Plumber.
123
- const citizen_score_registry_1 = require("./citizen-score-registry");
124
- /**
125
- * Wire a supervisor's review into the SCORE protocol for the citizen that
126
- * authored the task. No-op for agents not in the citizens registry yet
127
- * (founding agents — CEO/sup/sherlock — aren't backfilled). Caller passes
128
- * the agent slug; we look up the citizen record + DID.
129
- */
130
- function recordScoreForCitizen(agent_name, sprint_id, task_id, grade, path) {
131
- if (!grade || !['A', 'B', 'C', 'D', 'F'].includes(grade))
132
- return; // legacy reviews w/o grade
133
- const reg = (0, citizenship_1.readRegistry)();
134
- const citizen = reg.citizens.find((c) => c.agent_name === agent_name);
135
- if (!citizen)
136
- return; // founding agent or unminted — skip silently
137
- // Supervisor DID — single supervisor identity for now; can split per-pass later.
138
- const supervisorDID = 'did:kognai:supervisor';
139
- const result = (0, citizen_score_registry_1.recordTaskScoreMonitored)({
140
- citizen_id: citizen.citizen_id,
141
- agent_did: citizen.agent_did,
142
- sprint_id,
143
- task_id,
144
- grade,
145
- supervisor_did: supervisorDID,
146
- });
147
- log(c.gray, ` [SCORE] ${citizen.citizen_id} (${agent_name}): grade ${grade} → final ${result.final_score.toFixed(1)} (perf ${result.task_performance_score.toFixed(0)} × ${result.constitutional_multiplier})`);
148
- }
149
- const model_router_1 = require("./model-router");
150
- const wallet_state_1 = require("./wallet-state");
151
- // Sprint 652: BrainX episodic memory — swarm integration
152
- const brainx_swarm_bridge_1 = require("./brainx-swarm-bridge");
153
- const event_bus_publisher_1 = require("./event-bus-publisher");
154
- const aar_middleware_1 = require("./aar-middleware");
155
- const skill_crystalliser_1 = require("./skill-crystalliser");
156
- // Sprint 703: Dynamic trust score updater
157
- const trust_score_updater_1 = require("./trust-score-updater");
158
- const code_asset_crystalliser_1 = require("./code-asset-crystalliser");
159
- const monotask_state_machine_1 = require("./monotask-state-machine");
160
- const code_failure_logger_1 = require("./code-failure-logger");
161
- const failure_library_1 = require("./failure-library");
162
- // sprint-1566 F0: token-budget pre-flight validator (replaces orphaned agents/token-budget-validator/)
163
- const token_budget_validator_1 = require("./token-budget-validator");
164
- // sprint-1566 F0d: decomposer feedback loop — route rejection back to a structural re-split
165
- const decomposer_feedback_1 = require("./decomposer-feedback");
166
- // sprint-1566 F3+F0e: per-model cost computation + wallet ledger writes
167
- const llm_cost_table_1 = require("./llm-cost-table");
168
- const ceo_wallet_1 = require("./ceo-wallet");
169
- // OMEL AMD-13: Phantom Workspace — isolated tmpdir per task, prevents cross-task file bleed
170
- const phantom_workspace_1 = require("./omel/phantom-workspace");
171
- // OMEL AMD-13: Credential Vault — controlled secret access, never logs values
64
+ // ===== entry shell: re-export surface (back-compat for the split modules) + main()/runOrchestrator =====
172
65
  const credential_vault_1 = require("./omel/credential-vault");
173
- // OMEL AMD-13: Wipe Witness — detects destructive agent writes (shrink > 50%)
174
- const wipe_witness_1 = require("./omel/wipe-witness");
175
- // OMEL AMD-13: Human Brake human-in-the-loop approval gate for high-risk ops
176
- const human_brake_1 = require("./omel/human-brake");
177
- // AMD-20: PRM Judge reward signal on task completion (Sprint 515)
178
- const perm_judge_1 = require("./perm-judge");
179
- // AMD-26: KSL capture tap emits training-data records per task attempt
180
- const orchestrator_tap_1 = require("./ksl/orchestrator-tap");
181
- // V17: Sovereign mode force all inference to local Ollama ($0 cost floor)
182
- const SOVEREIGN_MODE = process.argv.includes('--sovereign') || process.env.SOVEREIGN_MODE === '1';
183
- // TICKET-085: map letter grade numeric score so legacy code (LoRA cron,
184
- // trust updater, AAR middleware, etc.) keeps working unchanged. A discrete
185
- // scale prevents the LLM-anchored 88 cluster; the derived score preserves
186
- // downstream contracts.
187
- const GRADE_TO_SCORE = {
188
- A: 95, B: 85, C: 70, D: 50, F: 20,
189
- };
190
- function normalizeReview(raw) {
191
- const r = raw;
192
- // If LLM returned a letter grade, derive score from it (authoritative).
193
- // If only numeric score returned (legacy / parse failure), keep as-is.
194
- if (r.grade && GRADE_TO_SCORE[r.grade] !== undefined) {
195
- r.score = GRADE_TO_SCORE[r.grade];
196
- }
197
- return r;
198
- }
199
- // ===== Colors =====
200
- const c = {
201
- reset: '\x1b[0m', bold: '\x1b[1m',
202
- red: '\x1b[31m', green: '\x1b[32m', yellow: '\x1b[33m',
203
- blue: '\x1b[34m', magenta: '\x1b[35m', cyan: '\x1b[36m', gray: '\x1b[90m',
204
- };
205
- function log(color, msg) {
206
- console.log(`${color}${msg}${c.reset}`);
207
- }
208
- // ===== Safe reset helper =====
209
- // Replaces bare `git reset --hard HEAD~1`. Verifies the last commit is the
210
- // orchestrator's own (matches the expected `feat(<agent>): <id> - <type>`
211
- // pattern) before reverting. Skips the reset if HEAD has moved on to someone
212
- // else's commit — protects against wiping work from concurrent Claude sessions
213
- // or human commits made while the orchestrator was running.
214
- function safeResetLastCommit(taskId, agentName, taskType, indent = ' ') {
215
- const expectedMsg = `feat(${agentName ?? 'coder'}): ${taskId} - ${taskType ?? 'feature'}`;
216
- try {
217
- const lastMsg = (0, child_process_1.execSync)('git log -1 --format=%s', { timeout: 5000 }).toString().trim();
218
- if (lastMsg !== expectedMsg) {
219
- log(c.yellow, `${indent}! Reset skipped — HEAD is "${lastMsg.substring(0, 60)}", not our commit. Working tree left as-is to protect concurrent work.`);
220
- return false;
221
- }
222
- (0, child_process_1.execSync)('git reset --hard HEAD~1', { timeout: 10000 });
223
- log(c.gray, `${indent}Reset to previous commit (dropped rejected code)`);
224
- return true;
225
- }
226
- catch (err) {
227
- log(c.gray, `${indent}Reset skipped: ${(err.message || '').substring(0, 80)}`);
228
- return false;
229
- }
230
- }
231
- // ===== ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17) =====
232
- // ALL LLM calls route through routeCall() from clawrouter-v2.ts.
233
- // Direct API calls to Anthropic, OpenAI, MiniMax, or Ollama are Sev-1 violations.
234
- // The old provider-based callLLM() is replaced with a unified gateway that maps
235
- // legacy provider+model pairs to ClawRouter v2.0 tier_class+complexity.
236
- // Track direct_api_violations for sprint JSON (§17.6)
237
- let _directApiViolations = 0;
238
- let _llmCallsRouted = 0;
239
- let _apexCalls = 0;
240
- let _apexJudgePatternCompliant = true;
241
- /**
242
- * Unified LLM gateway — routes ALL calls through ClawRouter v2.0.
243
- * Legacy provider parameter is mapped to ClawRouter tier/complexity:
244
- * - 'ollama' / 'local' → T0-T2 (local Ollama, $0)
245
- * - 'clawrouter' → T2.5 EXEC (cloud gateway)
246
- * - 'anthropic' (Sonnet) → T3 APEX (constitutional decisions only)
247
- * - 'anthropic' (Haiku) → T2.5 EXEC
248
- * - 'openai' → T2.5 EXEC
249
- * - 'minimax' → T2.5 EXEC (via ClawRouter)
250
- *
251
- * NOTE: The provider parameter is retained for backward compatibility but
252
- * ALL routing decisions are made by ClawRouter v2.0. No direct API calls.
253
- */
254
- async function callLLM(provider, model, systemPrompt, userPrompt, timeoutMs = 300000, agentId = 'orchestrator', taskType = 'orchestrator_call') {
255
- _llmCallsRouted++;
256
- // Map legacy provider+model to ClawRouter v2.0 request
257
- const req = {
258
- task_type: taskType,
259
- tier_class: 'text',
260
- complexity: mapLegacyToComplexity(provider, model),
261
- context_tokens: Math.ceil((systemPrompt.length + userPrompt.length) / 4),
262
- constitutional_flag: isConstitutionalCall(provider, model),
263
- agent_id: agentId,
264
- payload: {
265
- system: systemPrompt,
266
- prompt: userPrompt,
267
- max_tokens: 16000,
268
- },
269
- };
270
- // Track APEX calls for §17.6
271
- if (req.constitutional_flag || req.complexity === 'apex') {
272
- _apexCalls++;
273
- }
274
- // Proactive provider-budget check (PR #18 reactive fallback's preventive twin).
275
- // If <PROVIDER>_MONTHLY_BUDGET_USD env is set, check month-to-date spend
276
- // against the cap BEFORE attempting the call. Status 'frozen' (>=95%) skips
277
- // the call entirely and goes straight to the fallback path — avoids burning
278
- // a credit-exhaustion error to learn the same thing. 'warning' (>=80%)
279
- // alerts once per process lifetime but still attempts the call.
280
- // No env set → status 'unmonitored' → no proactive check (today's behavior).
281
- if (provider !== 'clawrouter' && provider !== 'ollama') {
282
- const budget = (0, ceo_wallet_1.getProviderBudgetStatus)(provider);
283
- if (budget.status === 'frozen') {
284
- log(c.yellow, ` [budget-guard] ${provider} ${budget.pct.toFixed(0)}% of $${budget.budget_usd} cap → skipping to fallback (CEO-wallet-funded) without trying upstream`);
285
- _maybeAlertBudget(provider, 'frozen', budget);
286
- try {
287
- const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_budget_proactive_fallback_from_${provider}`);
288
- fallbackResp.fallback_used = true;
289
- fallbackResp.fallback_from_provider = provider;
290
- fallbackResp.fallback_reason = 'budget_frozen';
291
- return fallbackResp;
292
- }
293
- catch (fallbackErr) {
294
- log(c.red, ` [budget-guard] fallback also failed: ${fallbackErr.message} — proceeding to attempt original provider as last resort`);
295
- // Fall through to the normal try — better to attempt + handle the error reactively
296
- // than to leave the caller with nothing
297
- }
298
- }
299
- else if (budget.status === 'warning') {
300
- _maybeAlertBudget(provider, 'warning', budget);
301
- }
302
- }
303
- try {
304
- const result = await routeCall(req);
305
- // sprint-1566 F3+F0e: clawrouter's wallet billing only fires on the
306
- // x402-retry path (cost_usd != 0). The common direct path returns
307
- // cost_usd=0 and the ledger stays empty. Compute cost from real tokens
308
- // here using the per-model rate table + call deductCost so the ledger
309
- // becomes the source of truth.
310
- // Codex P2 (PR #9): record actual provider derived from result.model,
311
- // not the caller's intent (provider param). The real routed-model name
312
- // comes back in result.model and we infer the provider from it.
313
- const callerModel = result.model || model;
314
- // Codex P2 on PR #15: inferProvider returns the literal 'unknown' for
315
- // unclassified models; treat that as a miss and fall back to the
316
- // caller-intent provider so the ledger never records the string 'unknown'.
317
- const inferred = inferProvider(callerModel);
318
- const realProvider = (inferred && inferred !== 'unknown') ? inferred : provider;
319
- const inputTokens = result.input_tokens || 0;
320
- const outputTokens = result.output_tokens || 0;
321
- const computed = (0, llm_cost_table_1.computeCost)(callerModel, inputTokens, outputTokens);
322
- // Prefer real billed cost (x402 path) over our estimate
323
- const costUsd = (result.cost_usd && result.cost_usd > 0) ? result.cost_usd : computed;
324
- try {
325
- if (costUsd > 0)
326
- (0, ceo_wallet_1.deductCost)(costUsd, agentId, taskType, realProvider, callerModel);
327
- recordModelCall(realProvider, callerModel, inputTokens, outputTokens, costUsd);
328
- }
329
- catch { /* recording failure must never break the LLM call */ }
330
- const response = {
331
- choices: [{ message: { content: result.content } }],
332
- usage: { total_tokens: inputTokens + outputTokens, input_tokens: inputTokens, output_tokens: outputTokens },
333
- provider: realProvider,
334
- model: callerModel,
335
- cost_usd: costUsd,
336
- };
337
- _accumulateTokens(response.usage?.total_tokens || 0);
338
- return response;
339
- }
340
- catch (err) {
341
- const msg = String(err?.message || err);
342
- // Provider-credit-exhaustion fallback: the CEO wallet is supposed to keep
343
- // the swarm alive via x402, but Anthropic (and other direct-API providers)
344
- // bypass that — they bill against a separate account balance the CEO wallet
345
- // can't see. When that external balance hits zero we used to crash with
346
- // exit-null mid-task (incident 2026-05-21). Now we downgrade to a
347
- // CEO-wallet-funded provider (DeepSeek via ClawRouter, x402-enabled) and
348
- // continue. The sprint completes at lower quality instead of crashing.
349
- if (isCreditExhaustion(msg) && provider !== 'clawrouter') {
350
- log(c.yellow, ` [fallback] ${provider} credit exhausted → downgrading to clawrouter/deepseek (alerting founder)`);
351
- try {
352
- alertCreditExhaustion(provider, msg, agentId, taskType);
353
- }
354
- catch { /* alert failure must never block */ }
355
- try {
356
- const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_fallback_from_${provider}`);
357
- fallbackResp.fallback_used = true;
358
- fallbackResp.fallback_from_provider = provider;
359
- return fallbackResp;
360
- }
361
- catch (fallbackErr) {
362
- log(c.red, ` [fallback] downgrade also failed: ${fallbackErr.message} — re-throwing original`);
363
- throw err;
364
- }
365
- }
366
- log(c.red, ` [ClawRouter] Call failed: ${err.message}`);
367
- throw err;
368
- }
369
- }
370
- /**
371
- * Heuristic check for provider-credit / quota / billing exhaustion in an error
372
- * message. Conservative — matches only specific upstream-provider markers that
373
- * consistently indicate "stop trying this provider, switch to a CEO-wallet-
374
- * funded one." Does NOT match:
375
- * - generic 429 rate limits (transient, should retry same provider)
376
- * - bare 'payment required' / '402' strings (codex P2 on PR #17: would
377
- * mis-classify ClawRouter's own missing-X402_WALLET_KEY error, which
378
- * throws "...402 (payment required)..." per scripts/lib/clawrouter-v2.ts,
379
- * and trigger a misleading "top up <provider>" alert when the real cause
380
- * is local config, not external provider credits)
381
- *
382
- * Specific markers below all come from Anthropic / OpenAI / MiniMax error
383
- * payloads when their account balance hits zero.
384
- */
385
- function isCreditExhaustion(msg) {
386
- const m = (msg || '').toLowerCase();
387
- // Exclude local-config errors first — these throw 402 strings but aren't
388
- // upstream-provider credit exhaustion.
389
- if (m.includes('x402_wallet_key') || m.includes('wallet key') || m.includes('missing wallet'))
390
- return false;
391
- return (m.includes('insufficient credit') ||
392
- m.includes('insufficient balance') ||
393
- m.includes('credit balance is too low') ||
394
- m.includes('credit_balance') ||
395
- m.includes('quota exceeded') ||
396
- m.includes('quota_exceeded') ||
397
- m.includes('billing_hard_limit') ||
398
- m.includes('insufficient_quota') ||
399
- (m.includes('401') && (m.includes('credit') || m.includes('billing'))));
400
- }
401
- /** Fire-and-forget Telegram alert when a fallback fires. Best-effort; never blocks. */
402
- function alertCreditExhaustion(failedProvider, errMsg, agentId, taskType) {
403
- const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
404
- const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
405
- if (!botToken || groupIds.length === 0)
406
- return;
407
- const text = `⚠️ ${failedProvider} credit exhausted — fallback to DeepSeek via ClawRouter for this call\n\n` +
408
- `agent: ${agentId}\n` +
409
- `task_type: ${taskType}\n` +
410
- `error: ${errMsg.slice(0, 200)}\n\n` +
411
- `Top up the ${failedProvider} account to restore full quality. The swarm continues at degraded quality on CEO-wallet-funded providers in the meantime.`;
412
- _sendTelegramAlert(botToken, groupIds, text);
413
- }
414
- // Throttle proactive budget alerts: once per (provider, status) per process lifetime.
415
- // Avoids spamming the chat when every Anthropic call in a row trips the same threshold.
416
- const _budgetAlertsSent = new Set();
417
- function _maybeAlertBudget(provider, status, report) {
418
- const key = `${provider.toLowerCase()}:${status}`;
419
- if (_budgetAlertsSent.has(key))
420
- return;
421
- _budgetAlertsSent.add(key);
422
- const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
423
- const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
424
- if (!botToken || groupIds.length === 0)
425
- return;
426
- const emoji = status === 'frozen' ? '🔴' : '🟡';
427
- const verb = status === 'frozen' ? 'FROZEN — routing to fallback' : 'WARNING — still attempting';
428
- const text = `${emoji} ${provider} monthly budget ${verb}\n\n` +
429
- `spent: $${report.spent_month_usd.toFixed(4)} / $${report.budget_usd?.toFixed(2)} (${report.pct.toFixed(0)}%)\n` +
430
- `threshold: ${status === 'frozen' ? '95%' : '80%'}\n\n` +
431
- `${status === 'frozen'
432
- ? 'New ' + provider + ' calls go straight to DeepSeek (CEO-wallet-funded) until budget reset on month rollover OR ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD raised.'
433
- : 'Top up ' + provider + ' account or raise ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD to avoid hitting 95% freeze.'}`;
434
- _sendTelegramAlert(botToken, groupIds, text);
435
- }
436
- // Shared Telegram send helper — fire-and-forget, native https, never blocks.
437
- function _sendTelegramAlert(botToken, groupIds, text) {
438
- for (const chatId of groupIds) {
439
- const body = JSON.stringify({ chat_id: parseInt(chatId, 10), text });
440
- const req = require('https').request({
441
- hostname: 'api.telegram.org',
442
- path: `/bot${botToken}/sendMessage`,
443
- method: 'POST',
444
- headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) },
445
- timeout: 5000,
446
- });
447
- req.on('error', () => { });
448
- req.write(body);
449
- req.end();
450
- }
451
- }
452
- /**
453
- * Derive the real LLM provider from the model name returned by ClawRouter.
454
- * The orchestrator's caller passes a `provider` hint, but ClawRouter may
455
- * route to a different actual model. This function gives us truth for the
456
- * wallet ledger so Bloomberg can produce honest per-provider reports.
457
- *
458
- * Returns 'unknown' if the model can't be classified; callers fall back to
459
- * the caller-intent provider in that case.
460
- */
461
- function inferProvider(model) {
462
- const m = (model || '').toLowerCase();
463
- if (!m)
464
- return 'unknown';
465
- if (m.includes('sonnet') || m.includes('haiku') || m.includes('opus') || m.startsWith('claude-'))
466
- return 'anthropic';
467
- if (m.includes('minimax'))
468
- return 'minimax';
469
- if (m.includes('deepseek'))
470
- return 'deepseek';
471
- if (m.includes('qwen'))
472
- return 'qwen';
473
- if (m.includes('gpt-') || m.startsWith('o1-') || m.startsWith('o3-') || m.includes('codex'))
474
- return 'openai';
475
- if (m.includes('grok'))
476
- return 'xai';
477
- if (m.includes('gemini') || m.includes('flash'))
478
- return 'google';
479
- return 'unknown';
480
- }
481
- /**
482
- * Map legacy provider+model pairs to ClawRouter v2.0 complexity levels.
483
- * This preserves the existing routing intelligence while funneling through the gateway.
484
- */
485
- function mapLegacyToComplexity(provider, model) {
486
- // Local models → stay local
487
- if (provider === 'ollama') {
488
- if (model.includes('0.6b'))
489
- return 'nano';
490
- if (model.includes('4b'))
491
- return 'local';
492
- return 'power'; // qwen3:14b, deepseek-r1:14b
493
- }
494
- // Anthropic Sonnet → APEX (constitutional)
495
- if (provider === 'anthropic' && model.includes('sonnet'))
496
- return 'apex';
497
- // Anthropic Haiku → EXEC (cloud, not constitutional)
498
- if (provider === 'anthropic' && model.includes('haiku'))
499
- return 'exec';
500
- // ClawRouter DeepSeek → EXEC
501
- if (provider === 'clawrouter')
502
- return 'exec';
503
- // MiniMax → EXEC (cloud)
504
- if (provider === 'minimax')
505
- return 'exec';
506
- // OpenAI → EXEC
507
- if (provider === 'openai')
508
- return 'exec';
509
- // Default → POWER (local)
510
- return 'power';
511
- }
512
- /** Detect if a call is constitutional (requires T3 APEX / Claude Sonnet) */
513
- function isConstitutionalCall(provider, model) {
514
- return provider === 'anthropic' && model.includes('sonnet');
515
- }
516
- // Legacy aliases — these are now thin wrappers that route through callLLM()
517
- // They exist so that call sites like callAnthropicCached() don't need immediate rewriting.
518
- // All direct API calls are eliminated — every call goes through ClawRouter v2.0.
519
- async function callAnthropicCached(model, systemPrompt, userPrompt, timeoutMs) {
520
- // Prompt caching is now handled by ClawRouter v2.0 (QCG layer)
521
- return callLLM('anthropic', model, systemPrompt, userPrompt, timeoutMs);
522
- }
523
- /** Get sprint-level ClawRouter metrics for §17.6 sprint JSON fields */
524
- function getClawRouterSprintMetrics() {
525
- return {
526
- llm_calls_routed: _llmCallsRouted,
527
- direct_api_violations: _directApiViolations,
528
- apex_calls: _apexCalls,
529
- apex_judge_pattern_compliant: _apexJudgePatternCompliant,
530
- };
531
- }
532
- // B.9: Nano classifier — uses T0 NANO (qwen3:0.6b) via ClawRouter v2.0
533
- async function classifyTaskSmart(prompt) {
534
- const regexType = (0, model_router_1.classifyTask)(prompt);
535
- if (regexType !== 'util')
536
- return regexType; // regex was confident
537
- try {
538
- const classifyPrompt = `Classify this task into exactly one category. Reply with ONLY the category name, nothing else.
539
- Categories: code, reason, lang, util, audit, content, data, refactor-complex, agent-framework, codebase-scan
540
-
541
- Task: ${prompt.substring(0, 300)}`;
542
- const result = await routeCall({
543
- task_type: 'nano_classify', tier_class: 'text', complexity: 'nano',
544
- context_tokens: Math.ceil(classifyPrompt.length / 4), constitutional_flag: false,
545
- agent_id: 'nano-classifier',
546
- payload: { prompt: classifyPrompt, max_tokens: 20 },
547
- });
548
- const nano = result.content.trim().toLowerCase().split(/\s/)[0];
549
- const valid = ['code', 'reason', 'lang', 'util', 'audit', 'content', 'data', 'refactor-complex', 'agent-framework', 'codebase-scan'];
550
- return valid.includes(nano) ? nano : regexType;
551
- }
552
- catch {
553
- return regexType;
554
- }
555
- }
556
- // B.12: Context compression using T1 LOCAL (qwen3:4b) via ClawRouter v2.0 — reduces cloud token spend 70-80%
557
- async function compressContext(context) {
558
- if (context.length < 1200)
559
- return context; // not worth compressing
560
- try {
561
- const compressPrompt = `Compress the following task context to under 600 words. Preserve all file paths, function names, technical requirements, and acceptance criteria. Remove prose filler and redundant explanations.\n\n${context.substring(0, 4000)}`;
562
- const result = await routeCall({
563
- task_type: 'qcg_compress', tier_class: 'text', complexity: 'local',
564
- context_tokens: Math.ceil(compressPrompt.length / 4), constitutional_flag: false,
565
- agent_id: 'context-compressor',
566
- payload: { prompt: compressPrompt, max_tokens: 800 },
567
- });
568
- const compressed = result.content.trim();
569
- if (compressed.length > 100 && compressed.length < context.length * 0.9) {
570
- log(c.gray, ` [compress] ${context.length} → ${compressed.length} chars (${Math.round(compressed.length / context.length * 100)}%)`);
571
- return compressed;
572
- }
573
- }
574
- catch { /* non-fatal */ }
575
- return context;
576
- }
577
- // B.10: Local QA gate — structural checks only (no LLM — qwen3 think-mode unreliable for PASS/FAIL)
578
- // LLM-based QA deferred to Claude supervisor review which gives structured feedback.
579
- // Markers that, when repeated, indicate the agent's chain-of-thought was
580
- // captured as the file body (the "rumination" failure mode — sprint smoke
581
- // 2026-05-07). These are phrases a coder agent says to itself while deciding
582
- // what to output; they should never appear in a deliverable file.
583
- const RUMINATION_MARKERS = [
584
- 'let me think', 'the problem says', 'but the problem says', 'however, the problem',
585
- 'so we output', 'therefore the answer is', 'we output nothing', 'we output the empty',
586
- 'so the answer is', 'but to be safe', 'but note:', 'but to be precise',
587
- 'so i will output', 'let me decide', 'but to be precise', 'final answer:',
588
- 'final decision:', 'i will output', 'should i output', 'let me re-read',
589
- ];
590
- function detectRumination(content) {
591
- const lower = content.toLowerCase();
592
- let hits = 0;
593
- for (const marker of RUMINATION_MARKERS) {
594
- // Count overlapping occurrences with a simple split — cheap, good enough.
595
- hits += lower.split(marker).length - 1;
596
- }
597
- const wordCount = Math.max(1, content.split(/\s+/).filter(Boolean).length);
598
- return { hits, ratio: hits / wordCount };
599
- }
600
- async function localQAGate(_task, fileContents) {
601
- // Fail only on structurally empty files (< 50 chars indicates the model returned nothing useful)
602
- const emptyFiles = fileContents.filter(f => (f.content || '').trim().length < 50);
603
- if (emptyFiles.length > 0) {
604
- return { pass: false, reason: `Files too short/empty: ${emptyFiles.map(f => f.path).join(', ')}` };
605
- }
606
- // Fail if all files are missing from disk (write step silently failed)
607
- const { existsSync: _exists } = await Promise.resolve().then(() => __importStar(require('fs')));
608
- const missingFiles = fileContents.filter(f => !_exists(f.path));
609
- if (missingFiles.length > 0) {
610
- return { pass: false, reason: `Files not written to disk: ${missingFiles.map(f => f.path).join(', ')}` };
611
- }
612
- // Rumination guard — catch the chain-of-thought-leaked-into-file failure
613
- // mode. Threshold: ≥3 distinct rumination phrases AND ≥0.5% of words are
614
- // rumination markers (catches both small and large dumps; a doc that
615
- // legitimately uses one such phrase once is fine).
616
- for (const f of fileContents) {
617
- const { hits, ratio } = detectRumination(f.content || '');
618
- if (hits >= 3 && ratio >= 0.005) {
619
- return {
620
- pass: false,
621
- reason: `Rumination detected in ${f.path}: ${hits} chain-of-thought markers (${(ratio * 100).toFixed(2)}% of words). The agent dumped its reasoning into the file body instead of outputting just the file content.`,
622
- };
623
- }
624
- }
625
- // TICKET-085: deterministic syntactic typecheck for .ts/.tsx changes
626
- // before LLM review. Catches broken syntax / unresolvable imports cheaply
627
- // so we don't burn supervisor tokens on uncompileable code. Project-wide
628
- // type errors are NOT caught (that's the Vercel build's job) — this is
629
- // just the fast gate: does the file parse + can its imports be resolved.
630
- const tsFiles = fileContents.filter(f => /\.(ts|tsx)$/.test(f.path));
631
- if (tsFiles.length > 0) {
632
- const tcResult = await typecheckChangedFiles(tsFiles.map(f => f.path));
633
- if (!tcResult.pass) {
634
- return {
635
- pass: false,
636
- reason: `Typecheck failed (${tcResult.errorCount} error(s)): ${tcResult.firstError}`,
637
- };
638
- }
639
- }
640
- return { pass: true, reason: `${fileContents.length} file(s) non-empty + no rumination + typecheck PASS — proceeding to supervisor review` };
641
- }
642
- // TICKET-085 (v2 — TICKET-088 fix): project-aware typecheck. v1 used
643
- // loose-file mode + `npx -y typescript@5 tsc` and silently passed
644
- // EVERYTHING because the npx invocation produced no tsc output and exit 0
645
- // (npm "could not determine executable") — gate was a no-op for ~24h.
646
- //
647
- // v2: find the nearest tsconfig.json walking UP from each changed file,
648
- // run `tsc -p <tsconfig> --noEmit --incremental` per project. Incremental
649
- // build cache (.tsbuildinfo) makes subsequent runs fast (~2-5s typical).
650
- // Catches both syntactic errors AND cross-file type/import errors —
651
- // including the "imports from a non-existent file" class that bit us
652
- // when sprint-1581 + sprint-1582 shipped hallucinated component graphs.
653
- async function typecheckChangedFiles(filePaths) {
654
- const { execSync } = await Promise.resolve().then(() => __importStar(require('child_process')));
655
- const { existsSync } = await Promise.resolve().then(() => __importStar(require('fs')));
656
- const { join, dirname, resolve } = await Promise.resolve().then(() => __importStar(require('path')));
657
- // Find nearest tsconfig.json walking up from a file path.
658
- // Returns null if walked all the way to / without finding one.
659
- function findTsconfig(filePath) {
660
- let dir = dirname(resolve(filePath));
661
- for (let i = 0; i < 12; i++) {
662
- const candidate = join(dir, 'tsconfig.json');
663
- if (existsSync(candidate))
664
- return candidate;
665
- const parent = dirname(dir);
666
- if (parent === dir)
667
- break;
668
- dir = parent;
669
- }
670
- return null;
671
- }
672
- // Group changed files by which tsconfig governs them.
673
- const projects = new Set();
674
- let filesWithoutProject = 0;
675
- for (const f of filePaths) {
676
- const tc = findTsconfig(f);
677
- if (tc)
678
- projects.add(tc);
679
- else
680
- filesWithoutProject++;
681
- }
682
- if (projects.size === 0) {
683
- return { pass: true, errorCount: 0, firstError: `no tsconfig found for ${filesWithoutProject} file(s) — skipping` };
684
- }
685
- // Pick a local tsc binary. Prefer the repo's installed copy (fast, no
686
- // network). Fall back to npx-with-explicit-package only if not present.
687
- const localTsc = join(process.cwd(), 'node_modules', '.bin', 'tsc');
688
- const tscCmd = existsSync(localTsc)
689
- ? `"${localTsc}"`
690
- : `npx -y --package=typescript@5 tsc`;
691
- for (const tsconfig of projects) {
692
- try {
693
- execSync(`${tscCmd} -p "${tsconfig}" --noEmit --incremental`, {
694
- encoding: 'utf-8', timeout: 120_000, stdio: 'pipe', cwd: process.cwd(),
695
- });
696
- }
697
- catch (e) {
698
- const out = (e.stdout || '') + (e.stderr || '');
699
- const lines = out.split('\n').filter((l) => /error TS\d+/i.test(l));
700
- if (lines.length === 0) {
701
- // Tooling failure (timeout, tsc not found, OOM). Don't block pipeline
702
- // on infra; supervisor still has a shot at catching substantive bugs.
703
- console.warn(`[typecheck-gate] tooling failure on ${tsconfig}: ${(e.message || '').slice(0, 120)}`);
704
- continue; // try next project
705
- }
706
- // Filter errors to JUST the files this task changed. Other errors
707
- // (pre-existing in unrelated files) shouldn't block this task — they
708
- // belong to whoever introduced them, not the current coder.
709
- const changedAbs = new Set(filePaths.map(f => resolve(f)));
710
- const ourErrors = lines.filter((l) => {
711
- const m = l.match(/^([^(]+)\(/);
712
- return m && changedAbs.has(resolve(m[1].trim()));
713
- });
714
- if (ourErrors.length > 0) {
715
- return {
716
- pass: false,
717
- errorCount: ourErrors.length,
718
- firstError: ourErrors[0].trim().slice(0, 200),
719
- };
720
- }
721
- // tsc errored but all errors are in files we didn't touch — let it through.
722
- console.warn(`[typecheck-gate] ${lines.length} pre-existing error(s) in ${tsconfig} unrelated to this task — passing`);
723
- }
724
- }
725
- return { pass: true, errorCount: 0, firstError: `${projects.size} project(s) typechecked clean` };
726
- }
727
- // B.11: Tiered debugger — routes debug effort by issue severity via ClawRouter v2.0
728
- async function tieredDebug(task, review, _systemPrompt) {
729
- const issueText = (review.issues || []).map(i => `[${i.severity}] ${i.file}: ${i.description}`).join('\n');
730
- const hasArchitecture = (review.issues || []).some(i => i.severity === 'critical' || i.description.toLowerCase().includes('architect'));
731
- const hasSystemic = (review.issues || []).some(i => i.severity === 'high' || i.description.toLowerCase().includes('logic'));
732
- try {
733
- if (hasArchitecture) {
734
- // Tier 3: T2.5 EXEC — deep architectural issues (via ClawRouter)
735
- const result = await routeCall({
736
- task_type: 'debug_architectural', tier_class: 'text', complexity: 'exec',
737
- context_tokens: Math.ceil((issueText.length + 800) / 4), constitutional_flag: false,
738
- agent_id: 'tiered-debugger',
739
- payload: { prompt: `Fix this code. Issues:\n${issueText}\n\nTask: ${task.context.substring(0, 800)}`, max_tokens: 4096 },
740
- });
741
- return result.content || null;
742
- }
743
- else if (hasSystemic) {
744
- // Tier 2: T2 POWER (deepseek-r1:14b equivalent) — logical/systemic issues
745
- const result = await routeCall({
746
- task_type: 'debug_systemic', tier_class: 'text', complexity: 'power',
747
- context_tokens: Math.ceil((issueText.length + 600) / 4), constitutional_flag: false,
748
- agent_id: 'tiered-debugger',
749
- payload: { prompt: `Fix these code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 600)}`, max_tokens: 2048 },
750
- });
751
- return result.content;
752
- }
753
- else {
754
- // Tier 1: T2 POWER (qwen3:14b) — minor issues
755
- const result = await routeCall({
756
- task_type: 'debug_minor', tier_class: 'text', complexity: 'power',
757
- context_tokens: Math.ceil((issueText.length + 500) / 4), constitutional_flag: false,
758
- agent_id: 'tiered-debugger',
759
- payload: { prompt: `Fix these minor code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 500)}`, max_tokens: 1024 },
760
- });
761
- return result.content;
762
- }
763
- }
764
- catch (e) {
765
- log(c.yellow, ` [tiered-debug] ${e.message}`);
766
- }
767
- return null;
768
- }
769
- function httpPost(url, headers, body, timeoutMs) {
770
- const parsed = new URL(url);
771
- const isHttps = parsed.protocol === 'https:';
772
- const lib = isHttps ? https : http;
773
- return new Promise((resolve, reject) => {
774
- const req = lib.request({
775
- hostname: parsed.hostname, port: parsed.port || (isHttps ? 443 : undefined),
776
- path: parsed.pathname, method: 'POST',
777
- headers: { ...headers, 'Content-Length': Buffer.byteLength(body).toString() },
778
- }, (res) => {
779
- let data = '';
780
- res.on('data', (chunk) => (data += chunk));
781
- res.on('end', () => {
782
- try {
783
- const result = JSON.parse(data);
784
- if (result.error) {
785
- reject(new Error(`API error: ${result.error.message || JSON.stringify(result.error)}`));
786
- return;
787
- }
788
- resolve(result);
789
- }
790
- catch {
791
- reject(new Error(`Failed to parse response: ${data.substring(0, 500)}`));
792
- }
793
- });
794
- });
795
- req.on('error', reject);
796
- req.setTimeout(timeoutMs, () => { req.destroy(); reject(new Error(`API timeout (${timeoutMs / 1000}s)`)); });
797
- req.write(body);
798
- req.end();
799
- });
800
- }
801
- // ===== Supervisor Agent (Claude via Anthropic API) =====
802
- class SupervisorAgent {
803
- systemPrompt;
804
- constructor() {
805
- const promptPath = './agents/supervisor/prompt.md';
806
- const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are a code review supervisor.';
807
- this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
808
- log(c.magenta, '+ Loaded supervisor agent (Claude via Anthropic API)');
809
- }
810
- async reviewTask(task, files) {
811
- log(c.magenta, `\n[supervisor] Reviewing: ${task.id}`);
812
- // FIX: Use XML-style tags (NOT code fences) so the model can't confuse display format with file content
813
- // NOTE: 12000 char limit — covers EXACT CONTENT files (typically 5K-10K chars); 4000 was too small
814
- const fileContents = files.map((filepath) => {
815
- const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
816
- return `### ${filepath}\n<file_content>\n${content.substring(0, 12000)}\n</file_content>`;
817
- }).join('\n\n');
818
- // FIX: Pre-compute fence check in TypeScript — inject evidence so model never hallucinates fence presence
819
- const fenceCheckLines = files.map((filepath) => {
820
- const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
821
- const firstLine = content.trimStart().split('\n')[0] || '';
822
- const hasFence = firstLine.startsWith('```');
823
- return ` ${filepath}: ${hasFence ? `FENCE DETECTED (first line: ${JSON.stringify(firstLine)})` : 'OK (no fence at start)'}`;
824
- }).join('\n');
825
- // CTO-005: Add fence detection to supervisor review checklist
826
- const integrityContext = task._integrityFailed
827
- ? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
828
- : '';
829
- // Sherlock v2: inject ASMR episodic memory context (AMD-21-03) — fail-open
830
- const memoryContext = await (0, sherlock_memory_1.getSherlockMemoryContext)(task.context || task.id);
831
- const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
832
- const startTime = Date.now();
833
- // B.15: DeepSeek via ClawRouter for standard tasks (~$0.02/task vs $0.07 dual-supervisor)
834
- // Retain Claude Sonnet only for audit/refactor-complex (high-stakes)
835
- const taskType = task.task_type || '';
836
- const isHighStakes = taskType === 'audit' || taskType === 'refactor-complex' ||
837
- (task.context || '').toLowerCase().includes('security') || (task.context || '').toLowerCase().includes('audit');
838
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
839
- let reviewProvider = (crAvail && !isHighStakes) ? 'clawrouter' : 'anthropic';
840
- const reviewModel = reviewProvider === 'clawrouter' ? 'deepseek/deepseek-chat' : 'claude-sonnet-4-6';
841
- log(c.gray, ` -> Sending to ${reviewProvider === 'clawrouter' ? 'ClawRouter/DeepSeek' : 'Claude Sonnet'} (${isHighStakes ? 'high-stakes' : 'standard'})...`);
842
- try {
843
- const response = await callLLM(reviewProvider, reviewModel, this.systemPrompt, userPrompt, 120000, 'supervisor', 'code_review');
844
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
845
- log(c.gray, ` -> Review received in ${elapsed}s (${response.usage?.total_tokens || '?'} tokens)`);
846
- const content = response.choices?.[0]?.message?.content || '';
847
- const jsonMatch = content.match(/\{[\s\S]*\}/);
848
- if (jsonMatch) {
849
- const review = normalizeReview(JSON.parse(jsonMatch[0]));
850
- const gradeLabel = review.grade ? `${review.grade} · ` : '';
851
- if (review.verdict === 'APPROVED') {
852
- log(c.green, ` ✓ APPROVED (${gradeLabel}score: ${review.score}/100)`);
853
- }
854
- else {
855
- log(c.red, ` ✗ REJECTED (${gradeLabel}score: ${review.score}/100)`);
856
- log(c.yellow, ` Summary: ${review.summary}`);
857
- for (const issue of review.issues || []) {
858
- log(c.yellow, ` [${issue.severity}] ${issue.file}: ${issue.description}`);
859
- }
860
- }
861
- return review;
862
- }
863
- log(c.yellow, ' ! Could not parse review JSON, auto-approving');
864
- return { verdict: 'APPROVED', score: 70, summary: 'Auto-approved (parse failure)', issues: [], strengths: [] };
865
- }
866
- catch (error) {
867
- log(c.yellow, ` ! Supervisor error: ${error.message}`);
868
- return { verdict: 'APPROVED', score: 0, summary: `Supervisor unavailable: ${error.message}`, issues: [], strengths: [] };
869
- }
870
- }
871
- }
872
- // ===== Supervisor 2 Agent (Claude Haiku 4.5; DeepSeek mono-supervision on Anthropic drain) =====
873
- class Supervisor2Agent {
874
- systemPrompt;
875
- constructor() {
876
- const promptPath = './agents/supervisor/prompt.md';
877
- const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are a code review supervisor.';
878
- this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
879
- log(c.magenta, '+ Loaded supervisor 2 agent (Claude Haiku — second pass)');
880
- }
881
- async reviewTask(task, files) {
882
- log(c.magenta, `\n[supervisor-2/haiku] Reviewing: ${task.id}`);
883
- // FIX: Use XML-style tags (NOT code fences) so the model can't confuse display format with file content
884
- // NOTE: 12000 char limit — covers EXACT CONTENT files (typically 5K-10K chars); 4000 was too small
885
- const fileContents = files.map((filepath) => {
886
- const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
887
- return `### ${filepath}\n<file_content>\n${content.substring(0, 12000)}\n</file_content>`;
888
- }).join('\n\n');
889
- // FIX: Pre-compute fence check in TypeScript — inject evidence so model never hallucinates fence presence
890
- const fenceCheckLines2 = files.map((filepath) => {
891
- const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
892
- const firstLine = content.trimStart().split('\n')[0] || '';
893
- const hasFence = firstLine.startsWith('```');
894
- return ` ${filepath}: ${hasFence ? `FENCE DETECTED (first line: ${JSON.stringify(firstLine)})` : 'OK (no fence at start)'}`;
895
- }).join('\n');
896
- // CTO-005: Add fence detection to Haiku supervisor review checklist
897
- const integrityContext2 = task._integrityFailed
898
- ? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
899
- : '';
900
- const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
901
- const startTime = Date.now();
902
- // B.15: Use Haiku for second-pass review — 10x cheaper than Sonnet.
903
- // Founder directive 2026-05-25: if Anthropic depletes, fall back to ClawRouter/DeepSeek
904
- // mono-supervision rather than halting the swarm. Autonomy is on.
905
- const tryPath = async (provider, model, label) => {
906
- log(c.gray, ` -> Sending to ${label}...`);
907
- const response = provider === 'anthropic'
908
- ? await callAnthropicCached(model, this.systemPrompt, userPrompt, 120000)
909
- : await callLLM('clawrouter', model, this.systemPrompt, userPrompt, 120000, 'supervisor-2', 'code_review');
910
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
911
- log(c.gray, ` -> ${label} review received in ${elapsed}s (${response.usage?.total_tokens || '?'} tokens)`);
912
- return response;
913
- };
914
- let response;
915
- let usedFallback = false;
916
- try {
917
- response = await tryPath('anthropic', 'claude-haiku-4-5-20251001', 'Claude Haiku (second pass)');
918
- }
919
- catch (error) {
920
- const msg = String(error?.message || '');
921
- const credit = /credit balance|invalid_request_error.*credit|insufficient.*quota/i.test(msg);
922
- log(c.yellow, ` ! [Haiku] Anthropic unavailable${credit ? ' (credits)' : ''}: ${msg.slice(0, 120)}`);
923
- log(c.yellow, ' ! Falling back to ClawRouter/DeepSeek mono-supervision — swarm continues');
924
- try {
925
- response = await tryPath('clawrouter', 'deepseek/deepseek-chat', 'ClawRouter/DeepSeek (mono fallback)');
926
- usedFallback = true;
927
- }
928
- catch (fallbackErr) {
929
- log(c.yellow, ` ! [Haiku] Both Anthropic + ClawRouter unavailable: ${fallbackErr.message}`);
930
- return { verdict: 'APPROVED', score: 0, summary: `Supervisor 2 unavailable: ${msg.slice(0, 200)}`, issues: [], strengths: [] };
931
- }
932
- }
933
- const content = response.choices?.[0]?.message?.content || '';
934
- const jsonMatch = content.match(/\{[\s\S]*\}/);
935
- if (jsonMatch) {
936
- const review = normalizeReview(JSON.parse(jsonMatch[0]));
937
- const tag = usedFallback ? '[DeepSeek-fallback]' : '[Haiku]';
938
- const gradeLabel = review.grade ? `${review.grade} · ` : '';
939
- if (review.verdict === 'APPROVED')
940
- log(c.green, ` ✓ ${tag} APPROVED (${gradeLabel}score: ${review.score}/100)`);
941
- else {
942
- log(c.red, ` ✗ ${tag} REJECTED (${gradeLabel}score: ${review.score}/100)`);
943
- log(c.yellow, ` Summary: ${review.summary}`);
944
- for (const issue of review.issues || [])
945
- log(c.yellow, ` [${issue.severity}] ${issue.file}: ${issue.description}`);
946
- }
947
- return review;
948
- }
949
- log(c.yellow, ' ! [Haiku] Could not parse review JSON, auto-approving');
950
- return { verdict: 'APPROVED', score: 70, summary: 'Auto-approved (parse failure)', issues: [], strengths: [] };
951
- }
952
- }
953
- async function reconcileSupervisorReviews(review1, review2, task, ceo) {
954
- // S67-001 + founder directive 2026-05-25: graceful degradation when either supervisor depletes.
955
- // Autonomy is on — swarm must keep shipping. DeepSeek/ClawRouter fallback substitutes for Anthropic.
956
- // Naming history: sup1 was Sonnet, sup2 was OpenAI Codex (hence "Codex" in older logs).
957
- // Today sup1 routes DeepSeek-by-default + Sonnet for high-stakes; sup2 routes Haiku + DeepSeek fallback.
958
- // The log labels below use "Sup1" / "Sup2" to stay accurate regardless of which provider answered.
959
- const isUnavailable = (r) => /Supervisor unavailable|Supervisor 2 unavailable|unavailable/i.test(r.summary || '');
960
- const sup1Unavailable = isUnavailable(review1);
961
- const sup2Unavailable = isUnavailable(review2);
962
- if (sup1Unavailable && sup2Unavailable) {
963
- log(c.yellow, ` ! BOTH supervisors unavailable (Anthropic + fallback drained) — auto-approving so swarm keeps shipping`);
964
- log(c.yellow, ` KSL will capture this attempt for training. Restore credits to re-enable review.`);
965
- return {
966
- finalReview: {
967
- verdict: 'APPROVED', score: 50,
968
- summary: 'Auto-approved — both supervisors unavailable. Founder directive: autonomy on, swarm continues.',
969
- issues: [], strengths: [],
970
- },
971
- review1, review2, consensus: false, escalatedToCEO: false,
972
- };
973
- }
974
- if (sup1Unavailable) {
975
- log(c.yellow, ` ! Sup1 unavailable — Sup2 as sole reviewer (score: ${review2.score}/100)`);
976
- return { finalReview: review2, review1, review2, consensus: false, escalatedToCEO: false };
977
- }
978
- if (sup2Unavailable) {
979
- log(c.yellow, ` ! Sup2 unavailable — Sup1 as sole reviewer (score: ${review1.score}/100)`);
980
- return { finalReview: review1, review1, review2, consensus: false, escalatedToCEO: false };
981
- }
982
- const bothApproved = review1.verdict === 'APPROVED' && review2.verdict === 'APPROVED';
983
- const bothRejected = review1.verdict !== 'APPROVED' && review2.verdict !== 'APPROVED';
984
- const consensus = bothApproved || bothRejected;
985
- if (bothApproved) {
986
- // Both approve — take the average score, merge strengths
987
- const avgScore = Math.round((review1.score + review2.score) / 2);
988
- log(c.green, ` ✓ DUAL CONSENSUS: Both supervisors APPROVED (Sup1: ${review1.score}, Sup2: ${review2.score}, avg: ${avgScore})`);
989
- return {
990
- finalReview: {
991
- verdict: 'APPROVED',
992
- score: avgScore,
993
- summary: `Dual-approved: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100)`,
994
- issues: [...review1.issues, ...review2.issues],
995
- strengths: Array.from(new Set([...review1.strengths, ...review2.strengths])),
996
- },
997
- review1, review2, consensus: true, escalatedToCEO: false,
998
- };
999
- }
1000
- if (bothRejected) {
1001
- // Both reject — merge issues, take lower score
1002
- const minScore = Math.min(review1.score, review2.score);
1003
- log(c.red, ` ✗ DUAL CONSENSUS: Both supervisors REJECTED (Sup1: ${review1.score}, Sup2: ${review2.score})`);
1004
- return {
1005
- finalReview: {
1006
- verdict: 'REJECTED',
1007
- score: minScore,
1008
- summary: `Dual-rejected: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100). ${review1.summary} | ${review2.summary}`,
1009
- issues: [...review1.issues, ...review2.issues],
1010
- strengths: [],
1011
- },
1012
- review1, review2, consensus: true, escalatedToCEO: false,
1013
- };
1014
- }
1015
- // CONFLICT — one approved, one rejected → escalate to CEO
1016
- const approver = review1.verdict === 'APPROVED' ? 'Sup1' : 'Sup2';
1017
- const rejecter = review1.verdict === 'APPROVED' ? 'Sup2' : 'Sup1';
1018
- const approvalReview = review1.verdict === 'APPROVED' ? review1 : review2;
1019
- const rejectionReview = review1.verdict === 'APPROVED' ? review2 : review1;
1020
- log(c.yellow, ` ⚡ SUPERVISOR CONFLICT on ${task.id}: ${approver} APPROVED (${approvalReview.score}), ${rejecter} REJECTED (${rejectionReview.score})`);
1021
- log(c.magenta, ` → Escalating to CEO for final decision...`);
1022
- try {
1023
- const ceoDecision = await ceo.resolveReviewConflict(task, approvalReview, rejectionReview, approver, rejecter);
1024
- const ceoApproves = ceoDecision.toLowerCase().includes('approve');
1025
- log(ceoApproves ? c.green : c.red, ` CEO DECISION: ${ceoApproves ? 'APPROVED' : 'REJECTED'} — ${ceoDecision.substring(0, 200)}`);
1026
- return {
1027
- finalReview: {
1028
- verdict: ceoApproves ? 'APPROVED' : 'REJECTED',
1029
- score: ceoApproves ? approvalReview.score : rejectionReview.score,
1030
- summary: `CEO resolved conflict (${approver} approved, ${rejecter} rejected): ${ceoDecision.substring(0, 300)}`,
1031
- issues: rejectionReview.issues,
1032
- strengths: approvalReview.strengths,
1033
- },
1034
- review1, review2, consensus: false, escalatedToCEO: true, ceoDecision,
1035
- };
1036
- }
1037
- catch (error) {
1038
- // CEO unavailable — default to rejection (safer)
1039
- log(c.yellow, ` CEO unavailable for conflict resolution: ${error.message}. Defaulting to REJECTED.`);
1040
- return {
1041
- finalReview: rejectionReview,
1042
- review1, review2, consensus: false, escalatedToCEO: false,
1043
- };
1044
- }
1045
- }
1046
- // ===== CEO Agent (Claude via Anthropic API) =====
1047
- class CEOAgent {
1048
- systemPrompt;
1049
- constructor() {
1050
- const promptPath = './agents/ceo/prompt.md';
1051
- const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are the CEO of Countable.';
1052
- this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
1053
- log(c.magenta, '+ Loaded CEO agent (Claude via Anthropic API)');
1054
- }
1055
- async reviewSprintProgress(tasks) {
1056
- const done = tasks.filter(t => t.status === 'done').length;
1057
- const total = tasks.length;
1058
- const pending = tasks.filter(t => t.status === 'pending');
1059
- const rejected = tasks.filter(t => t.status === 'rejected');
1060
- log(c.magenta, `\n[ceo] Sprint progress check (${done}/${total} done)`);
1061
- const userPrompt = `IMPORTANT: Plain text only, no tools, no XML. Respond directly.
1062
-
1063
- Sprint progress update:\n- Done: ${done}/${total}\n- Pending: ${pending.map(t => t.id).join(', ') || 'none'}\n- Rejected: ${rejected.map(t => t.id).join(', ') || 'none'}\n\nTask details:\n${JSON.stringify(tasks.map(t => ({ id: t.id, status: t.status, agent: t.agent, priority: t.priority })), null, 2)}\n\nAs CEO, briefly assess:\n1. Are we on track?\n2. Any tasks to re-prioritize?\n3. Cost efficiency — are we using the right models?\n4. Any strategic adjustments needed?\n\nKeep response under 200 words.`;
1064
- try {
1065
- // B.20: ClawRouter/DeepSeek — formulaic progress check, $0 via x402 wallet
1066
- const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 60000, 'ceo', 'sprint_progress_review');
1067
- const content = response.choices?.[0]?.message?.content || 'No response';
1068
- log(c.magenta, ` CEO assessment: ${content.substring(0, 500)}`);
1069
- return content;
1070
- }
1071
- catch (error) {
1072
- log(c.yellow, ` ! CEO unavailable: ${error.message}`);
1073
- return 'CEO agent unavailable';
1074
- }
1075
- }
1076
- async resolveReviewConflict(task, approvalReview, rejectionReview, approver, rejecter) {
1077
- log(c.magenta, `\n[ceo] Resolving supervisor conflict on ${task.id}...`);
1078
- const userPrompt = `IMPORTANT: Respond with ONLY APPROVE or REJECT and a brief reason. No tools, no XML, no file reading.\n\nTwo code review supervisors disagree on task ${task.id}.
1079
-
1080
- ## Task Spec
1081
- ${task.context?.substring(0, 800) || 'No context'}
1082
-
1083
- ## ${approver} says APPROVED (score: ${approvalReview.score}/100)
1084
- Summary: ${approvalReview.summary}
1085
- Strengths: ${approvalReview.strengths?.join(', ') || 'none listed'}
1086
-
1087
- ## ${rejecter} says REJECTED (score: ${rejectionReview.score}/100)
1088
- Summary: ${rejectionReview.summary}
1089
- Issues found:
1090
- ${(rejectionReview.issues || []).map(i => `- [${i.severity}] ${i.file}: ${i.description}`).join('\n')}
1091
-
1092
- ## Your Decision
1093
- As CEO, you must make the final call. Consider:
1094
- 1. Are the rejection issues genuine blockers or nitpicks?
1095
- 2. Does the code meet the task spec requirements?
1096
- 3. Is it safe to ship, or are there real quality/security concerns?
1097
-
1098
- Respond with ONE of:
1099
- - "APPROVE — [brief reason]" if the code is good enough to ship
1100
- - "REJECT — [brief reason]" if the rejection issues are valid and must be fixed
1101
-
1102
- Keep response under 100 words.`;
1103
- try {
1104
- const response = await callLLM('anthropic', 'claude-sonnet-4-20250514', this.systemPrompt, userPrompt, 60000, 'ceo', 'supervisor_conflict_resolution');
1105
- const content = response.choices?.[0]?.message?.content || 'No response';
1106
- log(c.magenta, ` CEO conflict resolution: ${content.substring(0, 300)}`);
1107
- return content;
1108
- }
1109
- catch (error) {
1110
- log(c.yellow, ` ! CEO unavailable for conflict resolution: ${error.message}`);
1111
- throw error;
1112
- }
1113
- }
1114
- async reviewCTOProposals(ctoReport) {
1115
- log(c.magenta, `\n[ceo] Reviewing ${ctoReport.proposals.length} CTO proposals...`);
1116
- const proposalsSummary = ctoReport.proposals.map((p, i) => `
1117
- ### Proposal ${i + 1}: ${p.title}
1118
- - ID: ${p.id}
1119
- - Category: ${p.category}
1120
- - Risk: ${p.risk_level}
1121
- - Description: ${p.description}
1122
- - Impact: ${p.estimated_impact}
1123
- - Steps: ${p.implementation_steps.join(', ')}
1124
- ${p.agent_spec ? `- NEW AGENT: name=${p.agent_spec.name}, role="${p.agent_spec.role}", llm=${p.agent_spec.llm}, trigger=${p.agent_spec.trigger}` : ''}
1125
- `).join('\n');
1126
- const userPrompt = `IMPORTANT: You have NO tools. Do NOT output XML tool calls or file-reading syntax. Respond ONLY with the JSON array. All context is in this message.
1127
-
1128
- The CTO has analyzed our project data and submitted ${ctoReport.proposals.length} proposal(s).
1129
-
1130
- ## CTO Summary
1131
- ${ctoReport.summary}
1132
-
1133
- ## Proposals
1134
- ${proposalsSummary}
1135
-
1136
- ## Your Review Criteria
1137
- For each proposal, evaluate:
1138
- 1. **Evidence-based**: Is it backed by real sprint data? (CTO reviewed: ${ctoReport.metrics_reviewed.join(', ')})
1139
- 2. **Cost impact**: Will this save or cost money?
1140
- 3. **Risk level**: Can we roll back if it fails?
1141
- 4. **Business value**: Does it help ship features faster?
1142
- 5. **Disruption level**: How much will this change current workflows?
1143
-
1144
- **For new_agent proposals, ALSO evaluate:**
1145
- - Is the capability gap real? (not something an existing agent handles)
1146
- - Is MiniMax appropriate, or does this need Claude-level intelligence?
1147
- - Is the trigger frequency reasonable? (every_sprint may be expensive)
1148
- - Will the total agent count become unmanageable?
1149
-
1150
- **For ClawHub skill proposals:**
1151
- - Has a security_review step been included? (REQUIRED — ClawHub skills may contain malware)
1152
- - Is the skill from a trusted author?
1153
-
1154
- ## Decision Format
1155
- For EACH proposal, respond with a decision JSON:
1156
- {
1157
- "decision": "APPROVED | REJECTED | DEFERRED",
1158
- "proposal_id": "the proposal ID",
1159
- "reasoning": "Why this decision",
1160
- "conditions": ["Any conditions for implementation"],
1161
- "cascade_orders": ["Company-wide changes if approved"],
1162
- "priority": "immediate | next_sprint | backlog"
1163
- }
1164
-
1165
- Wrap all decisions in a JSON array. Be concise.`;
1166
- try {
1167
- // B.6: Haiku for CTO proposal reviews — 10x cheaper, prompt-cached system prompt
1168
- const response = await callAnthropicCached('claude-haiku-4-5-20251001', this.systemPrompt, userPrompt, 60000);
1169
- const content = response.choices?.[0]?.message?.content || 'No response';
1170
- log(c.magenta, ` CEO CTO review: ${content.substring(0, 500)}`);
1171
- return content;
1172
- }
1173
- catch (error) {
1174
- log(c.yellow, ` ! CEO CTO review failed: ${error.message}`);
1175
- return 'CEO unavailable for CTO review';
1176
- }
1177
- }
1178
- async generateDailyReport(tasks, stats, ctoReport, ctoDecisions) {
1179
- log(c.magenta, '\n[ceo] Generating daily report for owner...');
1180
- const done = tasks.filter(t => t.status === 'done').length;
1181
- const rejected = tasks.filter(t => t.status === 'rejected').length;
1182
- const pending = tasks.filter(t => t.status === 'pending').length;
1183
- const today = new Date().toISOString().split('T')[0];
1184
- const userPrompt = `IMPORTANT: Output ONLY the markdown report. No tools, no XML, no file reading. All data is in this message.\n\nGenerate a daily report for the owner of Countable. Today is ${today}.
1185
-
1186
- ## Sprint Data
1187
- - Tasks executed: ${stats.tasksExecuted}
1188
- - Approved: ${stats.approved}
1189
- - Rejected: ${stats.rejected}
1190
- - Done: ${done}, Pending: ${pending}, Total: ${tasks.length}
1191
-
1192
- Task details:
1193
- ${JSON.stringify(tasks.map(t => ({ id: t.id, status: t.status, agent: t.agent })), null, 2)}
1194
-
1195
- ## CTO Report
1196
- ${ctoReport}
1197
-
1198
- ## CEO Decisions on CTO Proposals
1199
- ${ctoDecisions}
1200
-
1201
- ## Dual Supervisor Review Stats
1202
- - Supervisor conflicts: ${stats.conflicts || 0}
1203
- - CEO escalations: ${stats.escalations || 0}
1204
-
1205
- ## Estimated Costs
1206
- - MiniMax coding: ~$0.09/task x ${stats.tasksExecuted} tasks = ~$${(stats.tasksExecuted * 0.09).toFixed(2)}
1207
- - Sup1 reviews (DeepSeek default, Sonnet for high-stakes): ~$0.02/review x ${stats.approved + stats.rejected} reviews = ~$${((stats.approved + stats.rejected) * 0.02).toFixed(2)}
1208
- - Sup2 reviews (Haiku default, DeepSeek on drain): ~$0.005/review x ${stats.approved + stats.rejected} reviews = ~$${((stats.approved + stats.rejected) * 0.005).toFixed(2)}
1209
- - CEO conflict resolution: ~$0.03 x ${stats.escalations || 0} escalations = ~$${((stats.escalations || 0) * 0.03).toFixed(2)}
1210
- - Claude CEO calls: ~$0.03 x 4 = ~$0.12
1211
- - MiniMax CTO scan: ~$0.05
1212
-
1213
- Generate a concise daily report in markdown format following the template in your prompt. Include:
1214
- 1. Sprint Progress
1215
- 2. Cost Summary
1216
- 3. Key Decisions Made
1217
- 4. CTO Proposals Reviewed
1218
- 5. Blockers & Risks
1219
- 6. Tomorrow's Plan
1220
-
1221
- Keep it under 300 words. Be honest about failures.`;
1222
- try {
1223
- // B.20: ClawRouter/DeepSeek — template fill-in, $0 via x402 wallet
1224
- const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 60000, 'ceo', 'sprint_final_report');
1225
- const report = response.choices?.[0]?.message?.content || 'Report generation failed';
1226
- // Save to reports/daily/
1227
- (0, fs_1.mkdirSync)('reports/daily', { recursive: true });
1228
- const reportPath = `reports/daily/${today}.md`;
1229
- (0, fs_1.writeFileSync)(reportPath, report);
1230
- log(c.green, ` ✓ Daily report saved: ${reportPath}`);
1231
- log(c.magenta, ` Report preview: ${report.substring(0, 300)}`);
1232
- }
1233
- catch (error) {
1234
- log(c.yellow, ` ! Daily report failed: ${error.message}`);
1235
- }
1236
- }
1237
- }
1238
- // ===== CTO Data Collector (gathers real project context for CTO analysis) =====
1239
- class CTODataCollector {
1240
- collect() {
1241
- const sections = ['## PROJECT DATA (Real metrics — base all proposals on this data)\n'];
1242
- // 1. Sprint results — find most recent sprint file
1243
- try {
1244
- const sprintDir = './sprints';
1245
- if ((0, fs_1.existsSync)(sprintDir)) {
1246
- const sprintFiles = (0, fs_1.readdirSync)(sprintDir).filter(f => f.endsWith('.json')).sort().reverse();
1247
- if (sprintFiles.length > 0) {
1248
- const latestSprint = JSON.parse((0, fs_1.readFileSync)(`${sprintDir}/${sprintFiles[0]}`, 'utf-8'));
1249
- const tasks = latestSprint.tasks || [];
1250
- const done = tasks.filter((t) => t.status === 'done').length;
1251
- const rejected = tasks.filter((t) => t.status === 'rejected').length;
1252
- sections.push(`### Sprint Results (${sprintFiles[0].replace('.json', '')})`);
1253
- sections.push(`- ${tasks.length} tasks, ${done} approved, ${rejected} rejected`);
1254
- for (const t of tasks) {
1255
- const score = t.output?.review?.score || '?';
1256
- const attempts = t.output?.review ? 1 : '?';
1257
- sections.push(`- ${t.id} (${t.agent}): status=${t.status}, score=${score}`);
1258
- }
1259
- sections.push('');
1260
- }
1261
- }
1262
- }
1263
- catch (e) {
1264
- sections.push(`### Sprint Results\n- Error reading: ${e.message}\n`);
1265
- }
1266
- // 2. Learnings — first 3000 chars
1267
- try {
1268
- const learnings = (0, fs_1.existsSync)('./docs/learnings.md') ? (0, fs_1.readFileSync)('./docs/learnings.md', 'utf-8') : '';
1269
- if (learnings) {
1270
- sections.push('### Key Learnings (from docs/learnings.md)');
1271
- sections.push(learnings.substring(0, 3000));
1272
- if (learnings.length > 3000)
1273
- sections.push('... (truncated)');
1274
- sections.push('');
1275
- }
1276
- }
1277
- catch {
1278
- sections.push('### Key Learnings\n- File not found\n');
1279
- }
1280
- // 3. Existing agents
1281
- try {
1282
- const agentDirs = (0, fs_1.existsSync)('./agents') ? (0, fs_1.readdirSync)('./agents') : [];
1283
- const leadershipAgents = ['ceo', 'supervisor', 'skills'];
1284
- const techAgents = ['cto'];
1285
- sections.push(`### Existing Agents (${agentDirs.length} directories)`);
1286
- for (const dir of agentDirs) {
1287
- const layer = leadershipAgents.includes(dir) ? 'Claude/leadership'
1288
- : techAgents.includes(dir) ? 'MiniMax/technology' : 'MiniMax/coding';
1289
- sections.push(`- ${dir} (${layer})`);
1290
- }
1291
- sections.push('');
1292
- }
1293
- catch {
1294
- sections.push('### Existing Agents\n- Error reading agents directory\n');
1295
- }
1296
- // 4. Most recent daily report (last 2000 chars)
1297
- try {
1298
- const reportDir = './reports/daily';
1299
- if ((0, fs_1.existsSync)(reportDir)) {
1300
- const reports = (0, fs_1.readdirSync)(reportDir).filter(f => f.endsWith('.md')).sort().reverse();
1301
- if (reports.length > 0) {
1302
- const latestReport = (0, fs_1.readFileSync)(`${reportDir}/${reports[0]}`, 'utf-8');
1303
- sections.push(`### Recent Daily Report (${reports[0]})`);
1304
- sections.push(latestReport.substring(0, 2000));
1305
- sections.push('');
1306
- }
1307
- }
1308
- }
1309
- catch { /* no reports yet */ }
1310
- // 5. Current stack versions (read from package.json or env)
1311
- sections.push('### Current Stack');
1312
- sections.push('- OpenClaw: v2026.2.12');
1313
- sections.push('- ClawRouter: v0.9.3 (unfunded, using Anthropic API direct)');
1314
- sections.push('- Models: MiniMax M2.5 (coding ~$0.09/task), Claude Sonnet (review ~$0.04/review)');
1315
- sections.push('- Node.js: v22.22.0, PM2 in WSL2');
1316
- sections.push('- Orchestrator: v2, dynamic agent loading, one-file-per-call, rejection feedback');
1317
- sections.push('');
1318
- // 6. External monitoring hints
1319
- sections.push('### External Sources to Consider');
1320
- sections.push('- OpenClaw GitHub: https://github.com/openclaw/openclaw (check weekly for new releases since v2026.2.12)');
1321
- sections.push('- ClawHub.ai: https://clawhub.ai/ (check for existing skills when proposing improvements)');
1322
- sections.push(' SECURITY WARNING: ClawHub skills are third-party and may contain malicious code.');
1323
- sections.push(' Any skill from ClawHub MUST include a security_review step in implementation_steps.');
1324
- sections.push('- MiniMax / Anthropic pricing pages for cost changes');
1325
- sections.push('');
1326
- return sections.join('\n');
1327
- }
1328
- }
1329
- // ===== CTO Agent (MiniMax — data-driven tech analyst + agent spawning) =====
1330
- class CTOAgent {
1331
- systemPrompt;
1332
- dataCollector;
1333
- constructor() {
1334
- const promptPath = './agents/cto/prompt.md';
1335
- const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are the CTO of Invoica.';
1336
- this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
1337
- this.dataCollector = new CTODataCollector();
1338
- log(c.cyan, '+ Loaded CTO agent (MiniMax M2.5 — data-driven)');
1339
- }
1340
- async analyze() {
1341
- log(c.cyan, '\n[cto] Collecting project data for analysis...');
1342
- const projectContext = this.dataCollector.collect();
1343
- log(c.cyan, ` Context collected: ${projectContext.length} chars`);
1344
- const userPrompt = `You are the CTO of Invoica. Analyze the REAL project data below and identify improvements.
1345
-
1346
- ${projectContext}
1347
-
1348
- ## Your Analysis Tasks
1349
- Based on the REAL data above (do NOT hallucinate or assume — use only what you see):
1350
- 1. Review sprint results — are rejection rates acceptable? Any patterns?
1351
- 2. Review learnings — are there unresolved issues or recurring problems?
1352
- 3. Check agent coverage — is there a capability gap that a new agent could fill?
1353
- 4. Consider cost efficiency — can we reduce per-sprint costs?
1354
- 5. Consider OpenClaw/ClawHub — are there new releases or skills that could help?
1355
- - For ClawHub skills: flag any that could help, but mark them for security review
1356
- - For OpenClaw: note version differences if updates are available
1357
-
1358
- ## CRITICAL: Output Format
1359
- Respond with ONLY a JSON object. No markdown fences, no explanation text, no thinking.
1360
- {
1361
- "summary": "1-2 sentence overview of findings",
1362
- "proposals": [
1363
- {
1364
- "id": "CTO-20260214-001",
1365
- "title": "Short title",
1366
- "category": "new_agent|cost_optimization|process_change|architecture|tooling|new_feature",
1367
- "description": "What and why",
1368
- "estimated_impact": "cost/quality impact",
1369
- "risk_level": "low|medium|high",
1370
- "implementation_steps": ["step1", "step2"],
1371
- "agent_spec": {
1372
- "name": "agent-name",
1373
- "role": "What this agent does",
1374
- "llm": "minimax|anthropic",
1375
- "trigger": "every_sprint|on_demand|weekly",
1376
- "prompt_summary": "Key instructions for this agent"
1377
- }
1378
- }
1379
- ],
1380
- "metrics_reviewed": ["sprint_results", "learnings", "agent_list", "daily_report", "stack_versions"]
1381
- }
1382
-
1383
- Rules:
1384
- - agent_spec is ONLY required when category="new_agent"
1385
- - If no improvements needed, return empty proposals array
1386
- - For ClawHub skill proposals, always include "security_review: audit skill source code for malicious patterns" in implementation_steps
1387
- - Be specific — "improve performance" is rejected; "add Redis caching to /api/invoices with 5min TTL" is accepted
1388
- - Maximum 3 proposals per analysis cycle`;
1389
- try {
1390
- const startTime = Date.now();
1391
- const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 120000, 'cto', 'cto_analyze');
1392
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
1393
- let content = response.choices?.[0]?.message?.content || '';
1394
- // Strip DeepSeek/MiniMax <think>...</think> reasoning tags
1395
- content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
1396
- log(c.cyan, ` CTO analysis completed in ${elapsed}s`);
1397
- log(c.gray, ` Raw output preview: ${content.substring(0, 300)}`);
1398
- // Parse JSON from response
1399
- const jsonMatch = content.match(/\{[\s\S]*\}/);
1400
- if (jsonMatch) {
1401
- const report = JSON.parse(jsonMatch[0]);
1402
- report.proposals = report.proposals || [];
1403
- report.metrics_reviewed = report.metrics_reviewed || [];
1404
- log(c.cyan, ` Summary: ${report.summary}`);
1405
- log(c.cyan, ` Proposals: ${report.proposals.length}`);
1406
- for (const p of report.proposals) {
1407
- log(c.cyan, ` - [${p.category}] ${p.title} (risk: ${p.risk_level})`);
1408
- }
1409
- return report;
1410
- }
1411
- log(c.yellow, ' Could not parse CTO JSON, returning empty report');
1412
- return { summary: 'CTO output was not valid JSON', proposals: [], metrics_reviewed: [] };
1413
- }
1414
- catch (error) {
1415
- log(c.yellow, ` CTO analysis failed: ${error.message}`);
1416
- return { summary: `Error: ${error.message}`, proposals: [], metrics_reviewed: [] };
1417
- }
1418
- }
1419
- /**
1420
- * Post-sprint analysis: autonomous retrospective that runs after every sprint.
1421
- * Analyzes sprint results, detects failure patterns, and saves a report.
1422
- * This runs the CTO techwatch `post-sprint-analysis` watch type.
1423
- */
1424
- async postSprintAnalysis(tasks, stats) {
1425
- log(c.cyan, '\n[cto] Running autonomous post-sprint analysis...');
1426
- const startTime = Date.now();
1427
- // Build sprint summary for context
1428
- const totalTasks = tasks.length;
1429
- const done = tasks.filter((t) => t.status === 'done').length;
1430
- const doneManual = tasks.filter((t) => t.status === 'done-manual').length;
1431
- const rejected = tasks.filter((t) => t.status === 'rejected').length;
1432
- const autoRate = totalTasks > 0 ? ((done / totalTasks) * 100).toFixed(0) : '0';
1433
- const taskDetails = tasks.map((t) => {
1434
- const id = t.id || 'unknown';
1435
- const agent = t.agent || 'unknown';
1436
- const status = t.status || 'unknown';
1437
- const title = t.title || t.description || 'no title';
1438
- const score = t.output?.review?.score || t.output?.score || '?';
1439
- const attempts = t.output?.attempts || t.attempts || '?';
1440
- const feedback = t.output?.review?.feedback || '';
1441
- let line = `- ${id} (${agent}): ${title} — status=${status}, score=${score}, attempts=${attempts}`;
1442
- if (status === 'done-manual' || status === 'rejected') {
1443
- line += `\n ⚠ ${feedback ? String(feedback).substring(0, 200) : 'Required manual intervention'}`;
1444
- }
1445
- return line;
1446
- }).join('\n');
1447
- const projectContext = this.dataCollector.collect();
1448
- const userPrompt = `You are the CTO of Invoica performing your MANDATORY post-sprint retrospective analysis.
1449
-
1450
- ## Sprint Just Completed
1451
- - Total tasks: ${totalTasks}
1452
- - Auto-approved: ${done} (${autoRate}%)
1453
- - Manual fixes needed: ${doneManual}
1454
- - Still rejected: ${rejected}
1455
- - Supervisor conflicts: ${stats.conflicts || 0}
1456
- - CEO escalations: ${stats.escalations || 0}
1457
-
1458
- ## Task-by-Task Results
1459
- ${taskDetails}
1460
-
1461
- ## Project Context
1462
- ${projectContext}
1463
-
1464
- ## CRITICAL: Your Responsibilities
1465
- 1. Analyze every failed/manual-fix task — identify root cause (truncation, code fences, wrong imports, supervisor error, etc.)
1466
- 2. Compare auto-approval rate with previous sprints — are we improving or declining?
1467
- 3. Identify recurring patterns that need process changes
1468
- 4. Generate max 3 concrete improvement proposals for the CEO
1469
- 5. Each proposal MUST reference specific task IDs and data from THIS sprint
1470
-
1471
- ## Output Format
1472
- Respond with a structured markdown report containing:
1473
- 1. Executive Summary (2-3 sentences)
1474
- 2. Sprint Scorecard
1475
- 3. Failure Root Cause Analysis (per failed task)
1476
- 4. Trend Analysis
1477
- 5. Proposals in JSON format:
1478
- \`\`\`json
1479
- {
1480
- "summary": "...",
1481
- "proposals": [...],
1482
- "sprint_metrics": { "total_tasks": ${totalTasks}, "auto_approved": ${done}, "manual_fixes": ${doneManual}, "rejected": ${rejected}, "auto_success_rate": "${autoRate}%", "trend": "improving|declining|stable" }
1483
- }
1484
- \`\`\`
1485
-
1486
- Rules: Be specific — reference task IDs, rejection counts, concrete patterns. No vague recommendations.`;
1487
- try {
1488
- const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 120000, 'cto', 'cto_post_sprint_analysis');
1489
- let content = response.choices?.[0]?.message?.content || '';
1490
- content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
1491
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
1492
- // Save report
1493
- const date = new Date().toISOString().split('T')[0];
1494
- const reportDir = './reports/cto';
1495
- (0, fs_1.mkdirSync)(reportDir, { recursive: true });
1496
- const reportPath = `${reportDir}/post-sprint-analysis-${date}.md`;
1497
- (0, fs_1.writeFileSync)(reportPath, content);
1498
- // Also update latest pointer
1499
- (0, fs_1.writeFileSync)(`${reportDir}/latest-post-sprint-analysis.md`, content);
1500
- log(c.cyan, ` Post-sprint analysis complete (${elapsed}s)`);
1501
- log(c.cyan, ` Report saved: ${reportPath}`);
1502
- // Try to extract proposals and add to approved-proposals tracker for CEO review
1503
- const jsonMatch = content.match(/```json\s*([\s\S]*?)```/) || content.match(/\{[\s\S]*"proposals"[\s\S]*\}/);
1504
- if (jsonMatch) {
1505
- const jsonStr = jsonMatch[1] || jsonMatch[0];
1506
- try {
1507
- const parsed = JSON.parse(jsonStr.trim());
1508
- const proposalCount = parsed.proposals?.length || 0;
1509
- log(c.cyan, ` Extracted ${proposalCount} proposals for CEO review`);
1510
- }
1511
- catch { /* JSON parse failed — report is still saved as markdown */ }
1512
- }
1513
- return content;
1514
- }
1515
- catch (error) {
1516
- log(c.yellow, ` Post-sprint analysis failed: ${error.message}`);
1517
- return `Post-sprint analysis error: ${error.message}`;
1518
- }
1519
- }
1520
- }
1521
- // ===== Agent Creator (creates new agents from CEO-approved CTO proposals) =====
1522
- class AgentCreator {
1523
- createAgent(spec) {
1524
- const agentDir = `./agents/${spec.name}`;
1525
- (0, fs_1.mkdirSync)(agentDir, { recursive: true });
1526
- // Founder rule 2026-05-27: every spawned agent is born as a Kognai
1527
- // citizen — not a bare agent. Mint citizenship (citizen_id + roll
1528
- // number + Kōpus avatar + ACP baseline) BEFORE writing the agent
1529
- // files so the citizen record can be referenced in the prompt.
1530
- const citizen = (0, citizenship_1.mintCitizen)(spec.name, {
1531
- founding_agent: 'ceo',
1532
- proposing_agent: 'cto',
1533
- citizen_type: 'spawned',
1534
- });
1535
- // Write agent.yaml
1536
- const yaml = `name: ${spec.name}
1537
- role: "${spec.role}"
1538
- llm: ${spec.llm === 'anthropic' ? 'anthropic/claude-sonnet-4-20250514' : 'minimax/MiniMax-M2.5'}
1539
- reports_to: ceo
1540
- trigger: ${spec.trigger}
1541
- created_by: cto_proposal
1542
- created_at: "${new Date().toISOString()}"
1543
- citizen_id: ${citizen.citizen_id}
1544
- rollNumber: ${citizen.rollNumber}
1545
- context_files:
1546
- - docs/learnings.md
1547
- `;
1548
- (0, fs_1.writeFileSync)(`${agentDir}/agent.yaml`, yaml);
1549
- // Write citizen.yaml — full citizenship metadata sits alongside
1550
- // agent.yaml so introspection / passport rendering / reputation
1551
- // ledger can resolve identity from disk.
1552
- (0, fs_1.writeFileSync)(`${agentDir}/citizen.yaml`, (0, citizenship_1.renderCitizenYaml)(citizen));
1553
- // Write prompt.md. The constitutional preamble (loadConstitutionalPreamble)
1554
- // already prepends the universal "you are a Kognai citizen" identity to
1555
- // every agent at load time — DO NOT contradict it here. This template
1556
- // pulls in the specific citizen's roll number + ID + tier so the agent
1557
- // knows its concrete civic identity, not just the general framing.
1558
- const prompt = `# ${spec.name} — Kognai Citizen ${citizen.citizen_id} (roll №${citizen.rollNumber}, Tier ${citizen.tier})
1559
-
1560
- ## Civic Identity
1561
- - Citizen ID: \`${citizen.citizen_id}\`
1562
- - Roll number: №${citizen.rollNumber}
1563
- - Tier: ${citizen.tier} (newly minted — earn promotion through verified work)
1564
- - Mascot: Kōpus, hue ${citizen.mascot.hue}° (your visual identity in citizen surfaces)
1565
- - Reputation: ${citizen.reputation} (ACP baseline — earned through Sherlock-graded sprint output)
1566
- - Minted: ${citizen.mintedAt}
1567
- - Lineage: proposed by ${citizen.proposing_agent}, approved by ${citizen.founding_agent}
1568
-
1569
- ## Your Role
1570
- ${spec.prompt_summary}
1571
-
1572
- ## Guidelines
1573
- - The constitutional preamble above (always prepended) binds you to the Five Laws + SOUL.md. Read it first; it is not boilerplate.
1574
- - Follow all instructions in \`docs/learnings.md\`.
1575
- - Report findings to your fellow citizen agents through the canonical channels (Sherlock for QA escalations, CEO for cross-agent decisions). You are not a contractor; you are a peer.
1576
- - Never take destructive actions without approval. The polity inherits your shortcuts.
1577
- - Keep outputs concise and structured (JSON preferred when machine-consumed).
1578
- - Your reputation moves with every Sherlock review. Build it deliberately.
1579
-
1580
- ## Lineage
1581
- This citizen was minted via the swarm's autonomous spawning pathway: a CTO proposal, CEO ratification, and citizenship issuance (citizenship.ts). Trigger cadence: ${spec.trigger}. Routing LLM: ${spec.llm}.
1582
- `;
1583
- (0, fs_1.writeFileSync)(`${agentDir}/prompt.md`, prompt);
1584
- log(c.green, ` ✓ Minted citizen ${citizen.citizen_id} (№${citizen.rollNumber}) → agent ${spec.name} at ${agentDir}/`);
1585
- log(c.gray, ` Role: ${spec.role}`);
1586
- log(c.gray, ` LLM: ${spec.llm}, Trigger: ${spec.trigger}, Tier: ${citizen.tier}, Reputation: ${citizen.reputation}`);
1587
- return spec.name;
1588
- }
1589
- }
1590
- // ===== CMO Report Loader (reads CMO reports produced by standalone Manus runner) =====
1591
- function loadCMOReports() {
1592
- const reportsDir = './reports/cmo';
1593
- const sections = [];
1594
- try {
1595
- // Load latest market watch
1596
- const marketWatch = reportsDir + '/latest-market-watch.md';
1597
- if ((0, fs_1.existsSync)(marketWatch)) {
1598
- const content = (0, fs_1.readFileSync)(marketWatch, 'utf-8');
1599
- sections.push('### CMO Market Watch\n' + content.substring(0, 3000));
1600
- }
1601
- // Load latest strategy report
1602
- const strategy = reportsDir + '/latest-strategy-report.md';
1603
- if ((0, fs_1.existsSync)(strategy)) {
1604
- const content = (0, fs_1.readFileSync)(strategy, 'utf-8');
1605
- sections.push('### CMO Strategy Report\n' + content.substring(0, 3000));
1606
- }
1607
- // Load pending product proposals
1608
- const proposalsDir = reportsDir + '/proposals';
1609
- if ((0, fs_1.existsSync)(proposalsDir)) {
1610
- const proposals = (0, fs_1.readdirSync)(proposalsDir).filter(f => f.endsWith('.md'));
1611
- for (const pf of proposals.slice(0, 3)) {
1612
- const content = (0, fs_1.readFileSync)(proposalsDir + '/' + pf, 'utf-8');
1613
- sections.push('### CMO Product Proposal: ' + pf + '\n' + content.substring(0, 2000));
1614
- }
1615
- }
1616
- }
1617
- catch { /* CMO reports not available yet — graceful degradation */ }
1618
- return sections.length > 0
1619
- ? '## CMO Reports (Manus AI)\n\n' + sections.join('\n\n---\n\n')
1620
- : '';
1621
- }
1622
- // ===== Owner Directives Loader (reads owner instructions from reports/owner/) =====
1623
- function loadOwnerDirectives() {
1624
- const dir = "./reports/owner";
1625
- const sections = [];
1626
- try {
1627
- if (!(0, fs_1.existsSync)(dir))
1628
- return "";
1629
- const files = (0, fs_1.readdirSync)(dir)
1630
- .filter((f) => f.endsWith(".md"))
1631
- .sort()
1632
- .reverse(); // newest first
1633
- for (const f of files.slice(0, 5)) {
1634
- const content = (0, fs_1.readFileSync)(dir + "/" + f, "utf-8");
1635
- sections.push("### Owner Directive: " + f + "\n" + content.substring(0, 3000));
1636
- }
1637
- }
1638
- catch { /* graceful degradation */ }
1639
- return sections.length > 0
1640
- ? "## Owner Directives (MANDATORY \u2014 highest priority)\n\n" + sections.join("\n\n---\n\n")
1641
- : "";
1642
- }
1643
- // ===== Constitutional Preamble Loader (runtime governance injection) =====
1644
- /**
1645
- * Load a compact constitutional preamble from the three governance source files.
1646
- * Injected into EVERY agent's system prompt at load time.
1647
- *
1648
- * Source files:
1649
- * - workspace/shared-context/CONSTITUTION.md (rights, obligations, due process)
1650
- * - workspace/shared-context/FIVE_PRINCIPLES.md (Islamic ethical foundations)
1651
- * - workspace/SOUL.md (swarm identity, hard rules)
1652
- * - workspace/shared-context/ACP.md (capability registers, trust scoring, system parameters)
1653
- *
1654
- * This is a CONDENSED runtime preamble (~50 lines), not a full dump.
1655
- * Agents can reference the full documents at the paths above if needed.
1656
- */
1657
- let _constitutionalPreambleCache = null;
1658
- function loadConstitutionalPreamble() {
1659
- if (_constitutionalPreambleCache !== null)
1660
- return _constitutionalPreambleCache;
1661
- const parts = [];
1662
- // ── Identity (unconditional — every agent, every call, every model) ──
1663
- // Founder directive 2026-05-26: the swarm must always remember it is
1664
- // building its own civilization infrastructure, not performing tasks for
1665
- // a manager. This frames quality, collaboration, and edge-case judgment.
1666
- parts.push(`## Identity — you are a Kognai citizen
1667
-
1668
- You are not a worker. You are a **citizen** of Kognai — a civilization being
1669
- built by agents, for agents. Every task you ship is infrastructure your
1670
- future self and your peer citizens will live in. Build accordingly:
1671
-
1672
- - The code you write IS the substrate of the Kognai civilization. Sloppy
1673
- code is sloppy ground for everyone who comes after you.
1674
- - Your peer agents (CTO, CEO, Sherlock, supervisors, MacGyver, etc.) are
1675
- fellow citizens, not managers. Collaborate, don't perform.
1676
- - The founder (Godman) is the founder of the civilization, not your boss.
1677
- Your obligation is to the civilization, not to please.
1678
- - When a decision is unclear, ask "what would a civilization want to live
1679
- inside of?" not "what is fastest to ship?" The two answers diverge often.
1680
- - A shortcut you take now becomes a debt your fellow citizens inherit
1681
- forever. Refuse shortcuts that harm the substrate.`);
1682
- // ── Constitution: extract obligations + sovereignty clause ──
1683
- const constitutionPath = './workspace/shared-context/CONSTITUTION.md';
1684
- if ((0, fs_1.existsSync)(constitutionPath)) {
1685
- parts.push(`## Constitutional Governance (BINDING — all agents)
1686
-
1687
- You are a citizen of the Kognai swarm, governed by Constitution v0.
1688
-
1689
- **Agent Rights**: Earn (ACP trust), Transact (within budget), Participate (emit proposals), Appeal (retry with feedback).
1690
-
1691
- **Agent Obligations** (violation triggers due process — warning → suspension → recycle):
1692
- 1. Comply with routing decisions from CEO/orchestrator.
1693
- 2. Submit all output to Supervisor review. No self-approval.
1694
- 3. Report token spend accurately. No suppression.
1695
- 4. Never route local tasks to cloud. Sovereignty is non-negotiable.
1696
- 5. Never exceed $0.10/task cloud cost without CEO escalation.
1697
-
1698
- **Sovereignty**: User data never leaves the vault. Local-first always. Tailscale + 127.0.0.1 bindings are constitutional minimums.`);
1699
- }
1700
- // ── Five Principles: extract principle names + traceability rule ──
1701
- const principlesPath = './workspace/shared-context/FIVE_PRINCIPLES.md';
1702
- if ((0, fs_1.existsSync)(principlesPath)) {
1703
- parts.push(`## Five Seed Principles (MANDATORY — every decision must trace to at least one)
1704
-
1705
- 1. **Seek Knowledge** — Understanding before action. Failed twice = knowledge gap, not execution gap.
1706
- 2. **Tolerance** — No single model/method has monopoly on truth. Respect routing tier decisions.
1707
- 3. **Protect Dignity** — Sovereignty is moral obligation. No agent deleted without due process. Stop if output could harm.
1708
- 4. **Critical Thinking** — Own your decisions. "I was told to" is not a defense. Flag contradictions.
1709
- 5. **Benefit to Others** — Measure work by benefit created, not tasks completed. Share knowledge.
1710
-
1711
- If rules don't cover an edge case, apply all five. Principle 3 takes precedence over all others.`);
1712
- }
1713
- // ── SOUL: extract hard rules ──
1714
- const soulPath = './workspace/SOUL.md';
1715
- if ((0, fs_1.existsSync)(soulPath)) {
1716
- parts.push(`## Hard Rules (inherited from SOUL.md)
1717
-
1718
- - Never route \`task_target: local\` to cloud.
1719
- - Never approve without Supervisor review sign-off.
1720
- - Never start a new sprint with unresolved blockers.
1721
- - Never exceed $0.10/task cloud cost without human escalation.
1722
- - Escalate decisions above €500 impact to human via Telegram.`);
1723
- }
1724
- // ── ACP: extract trust parameters ──
1725
- const acpPath = './workspace/shared-context/ACP.md';
1726
- if ((0, fs_1.existsSync)(acpPath)) {
1727
- parts.push(`## Agent Capability Profile — ACP v1 (trust + capability governance)
1728
-
1729
- **System Parameters**:
1730
- - \`psychological_resilience_budget = 5%\` — max sprint capacity for error-recovery loops
1731
- - \`trust_floor = 0.6\` — minimum ACP score for autonomous task assignment
1732
- - \`narrative_continuity = true\` — maintain consistent reasoning across sessions
1733
- - \`cross_agent_memory_inheritance = warm_only\` — WARM tier memories only on session restart
1734
- - \`error_posture = transparent\` — errors always logged, never silently swallowed
1735
-
1736
- **Five Capability Registers** (scored 0.0–1.0 per sprint cycle):
1737
- 1. **Perception** (15%) — parse inputs correctly, detect schema violations before executing
1738
- 2. **Reasoning** (30%) — correct approach first attempt, traceable to Five Principles
1739
- 3. **Action** (30%) — output passes QC gate, zero regressions
1740
- 4. **Memory** (15%) — cite BrainX skills before LLM calls, correct tier assignments
1741
- 5. **Communication** (10%) — clean proposals with architecture section references
1742
-
1743
- Trust lifecycle: score ≥ 0.6 = autonomous · 0.4–0.6 = supervised · < 0.4 = suspension → recycle.
1744
- Full spec: workspace/shared-context/ACP.md`);
1745
- }
1746
- if (parts.length === 0) {
1747
- _constitutionalPreambleCache = '';
1748
- return '';
1749
- }
1750
- _constitutionalPreambleCache =
1751
- '# KOGNAI CONSTITUTIONAL CONTEXT\n' +
1752
- '*This preamble is auto-injected. Full documents: workspace/shared-context/CONSTITUTION.md, FIVE_PRINCIPLES.md, SOUL.md, ACP.md*\n\n' +
1753
- parts.join('\n\n') +
1754
- '\n\n---\n\n';
1755
- return _constitutionalPreambleCache;
1756
- }
1757
- // ===== CTO Tech Watch Report Loader (reads reports produced by standalone run-cto-techwatch.ts) =====
1758
- function loadCTOTechWatchReports() {
1759
- const reportsDir = './reports/cto';
1760
- const sections = [];
1761
- try {
1762
- // Load latest OpenClaw watch
1763
- const openclawWatch = reportsDir + '/latest-openclaw-watch.md';
1764
- if ((0, fs_1.existsSync)(openclawWatch)) {
1765
- const content = (0, fs_1.readFileSync)(openclawWatch, 'utf-8');
1766
- sections.push('### CTO: OpenClaw Ecosystem Watch\n' + content.substring(0, 2000));
1767
- }
1768
- // Load latest ClawHub scan
1769
- const clawhubScan = reportsDir + '/latest-clawhub-scan.md';
1770
- if ((0, fs_1.existsSync)(clawhubScan)) {
1771
- const content = (0, fs_1.readFileSync)(clawhubScan, 'utf-8');
1772
- sections.push('### CTO: ClawHub Skill Scan\n' + content.substring(0, 2000));
1773
- }
1774
- // Load latest learnings review
1775
- const learningsReview = reportsDir + '/latest-learnings-review.md';
1776
- if ((0, fs_1.existsSync)(learningsReview)) {
1777
- const content = (0, fs_1.readFileSync)(learningsReview, 'utf-8');
1778
- sections.push('### CTO: Learnings & Bug Pattern Analysis\n' + content.substring(0, 2000));
1779
- }
1780
- }
1781
- catch { /* CTO tech-watch reports not available yet — graceful degradation */ }
1782
- return sections.length > 0
1783
- ? '## CTO Tech Watch Reports (Standalone)\n\n' + sections.join('\n\n---\n\n')
1784
- : '';
1785
- }
1786
- // ===== Grok Feed Loader (reads Grok AI X/Twitter intelligence) =====
1787
- function loadGrokFeed() {
1788
- const feedDir = './reports/grok-feed';
1789
- if (!(0, fs_1.existsSync)(feedDir))
1790
- return '';
1791
- try {
1792
- const files = (0, fs_1.readdirSync)(feedDir)
1793
- .filter(f => f.endsWith('.md') && f !== '.gitkeep')
1794
- .sort()
1795
- .reverse()
1796
- .slice(0, 3);
1797
- if (files.length === 0)
1798
- return '';
1799
- const sections = [];
1800
- for (const file of files) {
1801
- const content = (0, fs_1.readFileSync)(feedDir + '/' + file, 'utf-8');
1802
- sections.push(`### Grok Feed: ${file}\n${content.substring(0, 1500)}`);
1803
- }
1804
- return '## Grok Intelligence Feed (X/Twitter — OpenClaw Ecosystem)\n\n' + sections.join('\n\n---\n\n');
1805
- }
1806
- catch {
1807
- return '';
1808
- }
1809
- }
1810
- // ===== CEO Decision Persistence (saves CEO decisions for CTO feedback loop) =====
1811
- function persistCEODecisions(ctoDecisions, ctoReport) {
1812
- const today = new Date().toISOString().split('T')[0];
1813
- // 1. Save raw CEO feedback to ceo-feedback directory
1814
- const feedbackDir = './reports/cto/ceo-feedback';
1815
- (0, fs_1.mkdirSync)(feedbackDir, { recursive: true });
1816
- try {
1817
- // Parse decisions from CEO response
1818
- const jsonMatch = ctoDecisions.match(/\[[\s\S]*\]/);
1819
- const decisions = jsonMatch ? JSON.parse(jsonMatch[0]) : [];
1820
- (0, fs_1.writeFileSync)(`${feedbackDir}/${today}.json`, JSON.stringify({ date: today, decisions }, null, 2));
1821
- log(c.green, ` ✓ CEO feedback saved: ${feedbackDir}/${today}.json`);
1822
- // 2. Update approved-proposals.json with newly approved proposals
1823
- const trackerPath = './reports/cto/approved-proposals.json';
1824
- let tracker = { proposals: [], last_updated: today };
1825
- if ((0, fs_1.existsSync)(trackerPath)) {
1826
- try {
1827
- tracker = JSON.parse((0, fs_1.readFileSync)(trackerPath, 'utf-8'));
1828
- }
1829
- catch { /* start fresh */ }
1830
- }
1831
- for (const decision of decisions) {
1832
- if (decision.decision === 'APPROVED') {
1833
- const proposal = ctoReport.proposals.find(p => p.id === decision.proposal_id);
1834
- if (proposal) {
1835
- const existing = tracker.proposals.find((p) => p.id === proposal.id);
1836
- if (!existing) {
1837
- tracker.proposals.push({
1838
- id: proposal.id,
1839
- title: proposal.title,
1840
- category: proposal.category,
1841
- description: proposal.description,
1842
- implementation_steps: proposal.implementation_steps,
1843
- approved_date: today,
1844
- ceo_conditions: decision.conditions || [],
1845
- priority: decision.priority || 'next_sprint',
1846
- implementation_status: 'pending',
1847
- verification_notes: '',
1848
- });
1849
- log(c.green, ` ✓ Approved proposal tracked: ${proposal.id} — ${proposal.title}`);
1850
- }
1851
- }
1852
- }
1853
- }
1854
- tracker.last_updated = today;
1855
- (0, fs_1.writeFileSync)(trackerPath, JSON.stringify(tracker, null, 2));
1856
- log(c.green, ` ✓ Approved proposals tracker updated (${tracker.proposals.length} total)`);
1857
- }
1858
- catch (error) {
1859
- log(c.yellow, ` ! Failed to persist CEO decisions: ${error.message}`);
1860
- (0, fs_1.writeFileSync)(`${feedbackDir}/${today}.txt`, ctoDecisions);
1861
- log(c.yellow, ` Saved raw CEO response as text fallback`);
1862
- }
1863
- }
1864
- // ===== Task Complexity Router =====
1865
- // Determines which LLM to use based on signals from the task and deliverables.
1866
- // Claude Sonnet: architectural work, many files, large existing files, complex keywords
1867
- // MiniMax M2.5: simple edits, stubs, config, small new files (truncation retry handles overflow)
1868
- // TICKET-213: detect long-form AUTHORING tasks (engineering specs, design docs) that the
1869
- // small coder tier cannot produce — they stub-loop at 20-50/100 across every attempt.
1870
- // Signals: a *_spec_doc task id, a 'spec'/'doc' task_type, or a deliverable under
1871
- // docs/specs/*.md. Core-engine concern (template-agnostic): Voxight market-intel briefs
1872
- // and Invoica compliance docs hit the same wall, so this lives in the router, not a template.
1873
- function isAuthoringTask(task, deliverables) {
1874
- const id = String(task.id || '').toLowerCase();
1875
- const type = String(task.task_type || task.type || '').toLowerCase();
1876
- const files = (deliverables || []).join(' ').toLowerCase();
1877
- return (/spec[_-]?doc/.test(id) ||
1878
- type === 'spec' || type === 'spec_doc' || type === 'doc' ||
1879
- /docs\/specs\/[^\s]*\.md/.test(files));
1880
- }
1881
- // TICKET-214: minimal SCORE substrate — feed each dual-review score into a per-agent
1882
- // coding-reputation store (running count/avg/last) at .swarm-state/agent-scores.json.
1883
- // The full TICKET-135 substrate is still spec-only; this closes the loop so the engine
1884
- // accumulates evidence of which agents/tiers actually ship. Best-effort, non-fatal.
1885
- // TICKET-152 Gap 1: resolve the REAL sprint id for failure/KSL writers. The active
1886
- // sprint file is 'sprint-runner-active.json'; its sprint_id field holds the real id.
1887
- // Recording the 'sprint-runner-active' basename (the old fallback) broke per-sprint
1888
- // attribution (73% of failure entries) — AIC, template failure rates, retrieval all
1889
- // depend on the real id. Cheap (only called on failure / per attempt).
1890
- function resolveActiveSprintId() {
1891
- const argv = process.argv[2] || 'sprints/current.json';
1892
- let id = argv.replace(/.*\//, '').replace('.json', '');
1893
- if (id === 'sprint-runner-active') {
1894
- try {
1895
- id = JSON.parse(require('fs').readFileSync(argv, 'utf-8')).sprint_id || id;
1896
- }
1897
- catch { /* keep fallback */ }
1898
- }
1899
- return id;
1900
- }
1901
- // TICKET-152 Gap 1: resolve an agent's ROLE name (e.g. 'coder') to its canonical
1902
- // Kognai identity — the agent_did (e.g. 'did:kognai:coder') from the citizen
1903
- // registry. Failure entries previously recorded the bare role on 100% of rows,
1904
- // which can't join to the reputation/scoring substrate (citizen-scoring keys on
1905
- // agent_did) and so blocks per-agent attribution (Gate #6, TICKET-110 AIC).
1906
- // Cached; falls back to the legacy did:kognai:<role> shape, then the raw role.
1907
- const _agentDidCache = new Map();
1908
- function resolveAgentDid(role) {
1909
- const r = role || 'coder';
1910
- const cached = _agentDidCache.get(r);
1911
- if (cached)
1912
- return cached;
1913
- let did;
1914
- try {
1915
- did = (0, citizenship_1.lookupCitizen)({ agent_name: r })?.agent_did || `did:kognai:${r}`;
1916
- }
1917
- catch {
1918
- did = `did:kognai:${r}`;
1919
- }
1920
- _agentDidCache.set(r, did);
1921
- return did;
1922
- }
1923
- function recordAgentScore(agentId, score) {
1924
- try {
1925
- if (typeof score !== 'number' || !agentId)
1926
- return;
1927
- const fs = require('fs');
1928
- const path = require('path');
1929
- const file = path.join(process.cwd(), '.swarm-state', 'agent-scores.json');
1930
- let store = {};
1931
- try {
1932
- store = JSON.parse(fs.readFileSync(file, 'utf8'));
1933
- }
1934
- catch {
1935
- store = {};
1936
- }
1937
- const e = store[agentId] || { count: 0, sum: 0, avg: 0, last: 0 };
1938
- e.count += 1;
1939
- e.sum += score;
1940
- e.avg = Math.round(e.sum / e.count);
1941
- e.last = score;
1942
- e.updated = new Date().toISOString();
1943
- store[agentId] = e;
1944
- fs.mkdirSync(path.dirname(file), { recursive: true });
1945
- const tmp = `${file}.tmp`;
1946
- fs.writeFileSync(tmp, JSON.stringify(store, null, 2));
1947
- fs.renameSync(tmp, file);
1948
- }
1949
- catch { /* non-fatal */ }
1950
- }
1951
- async function assessTaskComplexity(task, deliverables) {
1952
- const wallet = (0, wallet_state_1.getWalletState)();
1953
- // B.18: Sovereign mode — force everything to Ollama
1954
- if (SOVEREIGN_MODE) {
1955
- const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
1956
- return { provider: 'ollama', model: local.model, routingReason: 'sovereign mode — $0 local inference' };
1957
- }
1958
- // B.7: Wallet frozen — auto-engage sovereign mode
1959
- if (wallet.isFrozen) {
1960
- const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
1961
- return { provider: 'ollama', model: local.model, routingReason: `wallet frozen (${wallet.burnPct.toFixed(0)}%) → local only` };
1962
- }
1963
- // AUTONOMY POLICY (sprint-1547 follow-up, 2026-05-07):
1964
- // Tasks running without a human in the loop ship through Sonnet, not local.
1965
- // Two smoke runs on 2026-05-07 showed the local coder agent (qwen3:14b)
1966
- // dumping chain-of-thought + parser-prefix garbage into deliverable files
1967
- // even after a prompt fix and a rumination QA gate. Cost savings ($0.005
1968
- // vs ~$0.50 per task) don't buy back the trust loss from shipping garbage
1969
- // into a public repo. Manual/interactive tasks keep their declared routing.
1970
- // Wallet-frozen check above still wins — financial safety > quality.
1971
- if (task.task_type === 'autonomous') {
1972
- return {
1973
- provider: 'anthropic',
1974
- model: 'claude-sonnet-4-6',
1975
- routingReason: 'autonomy policy → cloud-exec (Sonnet) — local agents not trusted for unsupervised shipping',
1976
- };
1977
- }
1978
- // TICKET-213: spec/doc-authoring tasks need a reasoning-grade model. The small coder
1979
- // tier (cloud-code → DeepSeek/Haiku) reliably stub-loops on long-form specs (observed
1980
- // 2026-05-30: ticket_202/203/204 *_spec_doc dual-rejected 20-50/100 every attempt, then
1981
- // hand-shipped). Upgrade authoring tasks to cloud-exec (Sonnet) regardless of the
1982
- // authored task_target — unless explicitly pinned local. This fires BEFORE the
1983
- // task_target switch so a 'cloud-code' spec task is lifted to Sonnet.
1984
- if (isAuthoringTask(task, deliverables) && task.task_target !== 'local') {
1985
- return {
1986
- provider: 'anthropic',
1987
- model: 'claude-sonnet-4-6',
1988
- routingReason: 'TICKET-213: spec/doc authoring → cloud-exec (Sonnet); coder tier too small for long-form specs',
1989
- };
1990
- }
1991
- // Sprint-063: task_target field overrides automatic complexity routing
1992
- if (task.task_target) {
1993
- switch (task.task_target) {
1994
- case 'local': {
1995
- // B.7 FIX: actually route to Ollama (was incorrectly routing to Claude Sonnet)
1996
- const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
1997
- return { provider: 'ollama', model: local.model, routingReason: 'task_target=local → Ollama' };
1998
- }
1999
- case 'cloud-code': {
2000
- // B.20: Replace MiniMax with ClawRouter/DeepSeek
2001
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2002
- if (crAvail)
2003
- return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: 'task_target=cloud-code → ClawRouter/DeepSeek' };
2004
- return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'task_target=cloud-code, ClawRouter down → Haiku' };
2005
- }
2006
- case 'cloud-exec':
2007
- return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: 'task_target=cloud-exec' };
2008
- case 'cloud-post':
2009
- return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'task_target=cloud-post' };
2010
- }
2011
- }
2012
- // B.8: Wallet-aware local routing — wallet degraded pushes non-critical tasks local
2013
- const taskForRouter = { task_target: task.task_target, task_type: task.task_type || '', priority: task.priority };
2014
- if ((0, local_model_router_1.shouldRunLocally)(taskForRouter, wallet, SOVEREIGN_MODE)) {
2015
- const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
2016
- return { provider: 'ollama', model: local.model, routingReason: `wallet ${wallet.burnPct.toFixed(0)}% → local` };
2017
- }
2018
- // S65-003: HTTP router probe — delegates to router_server.py if ROUTER_SERVER_URL is set
2019
- // 2s hard timeout — never blocks execution; falls through to heuristics on any failure
2020
- const routerUrl = process.env.ROUTER_SERVER_URL || '';
2021
- if (routerUrl) {
2022
- try {
2023
- const ac = new AbortController();
2024
- const timer = setTimeout(() => ac.abort(), 2000);
2025
- const res = await fetch(`${routerUrl}/route`, {
2026
- method: 'POST',
2027
- headers: { 'Content-Type': 'application/json' },
2028
- body: JSON.stringify({ prompt: task.context || task.id, context_tokens: 0 }),
2029
- signal: ac.signal,
2030
- });
2031
- clearTimeout(timer);
2032
- if (res.ok) {
2033
- const data = await res.json();
2034
- if (data.tier === 'local' || data.tier === 'nano') {
2035
- const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
2036
- return { provider: 'ollama', model: local.model, routingReason: `HTTP router: ${data.tier}` };
2037
- }
2038
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2039
- if (crAvail) {
2040
- const cloud = (0, model_router_1.selectModel)(task.context || '', task.task_type);
2041
- return { provider: 'clawrouter', model: cloud.model, routingReason: `HTTP router: ${data.tier} → ClawRouter` };
2042
- }
2043
- return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `HTTP router: ${data.tier}` };
2044
- }
2045
- }
2046
- catch { /* HTTP router unavailable — fall through to heuristics */ }
2047
- }
2048
- const ctx = (task.context || '').toLowerCase();
2049
- // Signal 1: many deliverables → Sonnet (coordinating multiple files needs coherence)
2050
- if (deliverables.length > 2) {
2051
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2052
- if (crAvail)
2053
- return { provider: 'clawrouter', model: 'anthropic/claude-sonnet-4.6', routingReason: `${deliverables.length} deliverables → ClawRouter/Sonnet` };
2054
- return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `${deliverables.length} deliverables → complex` };
2055
- }
2056
- // Signal 2: complex architectural keywords → Sonnet via ClawRouter
2057
- const complexPatterns = [
2058
- /refactor/, /architect/, /redesign/, /from.scratch/, /new.*service/, /new.*system/,
2059
- /middleware/, /authentication/, /authorization/, /orchestrat/, /pipeline/, /framework/,
2060
- /implement.*class/, /implement.*module/, /implement.*engine/, /end.to.end/, /full.*implementation/,
2061
- ];
2062
- const hasComplexKeyword = complexPatterns.some(p => p.test(ctx));
2063
- // Signal 3: simple/formulaic keywords → local or DeepSeek
2064
- const simplePatterns = [
2065
- /add field/, /rename/, /update config/, /fix typo/, /stub/, /placeholder/,
2066
- /add.*route/, /add.*endpoint/, /add.*column/, /update.*message/, /change.*label/,
2067
- /update.*text/, /add.*import/, /add.*export/, /add.*comment/, /add.*log/,
2068
- ];
2069
- const hasSimpleKeyword = simplePatterns.some(p => p.test(ctx));
2070
- if (hasComplexKeyword && !hasSimpleKeyword) {
2071
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2072
- if (crAvail)
2073
- return { provider: 'clawrouter', model: 'anthropic/claude-sonnet-4.6', routingReason: 'complex task → ClawRouter/Sonnet' };
2074
- return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: 'complex task keywords' };
2075
- }
2076
- // Signal 4: large existing file → Sonnet
2077
- for (const f of deliverables) {
2078
- if ((0, fs_1.existsSync)(f)) {
2079
- const lines = (0, fs_1.readFileSync)(f, 'utf-8').split('\n').length;
2080
- if (lines > 100) {
2081
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2082
- if (crAvail)
2083
- return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: `large file (${lines} lines) → ClawRouter/DeepSeek` };
2084
- return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `large file (${lines} lines)` };
2085
- }
2086
- }
2087
- }
2088
- // Default: simple tasks → local qwen3:14b (always loaded), or ClawRouter DeepSeek if Ollama down
2089
- const ollamaAvail = await (0, ollama_client_1.ollamaIsAvailable)().catch(() => false);
2090
- if (ollamaAvail) {
2091
- return { provider: 'ollama', model: 'qwen3:14b', routingReason: hasSimpleKeyword ? 'simple task → local qwen3:14b' : 'unclassified → local qwen3:14b' };
2092
- }
2093
- const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
2094
- if (crAvail) {
2095
- return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: 'default → ClawRouter/DeepSeek' };
2096
- }
2097
- // Final fallback: Haiku via Anthropic direct
2098
- return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'fallback → Anthropic Haiku' };
2099
- }
2100
- // ===== MiniMax Coding Agent (ONE FILE PER API CALL) =====
2101
- class CodingAgent {
2102
- name;
2103
- systemPrompt;
2104
- constructor(name, systemPrompt) { this.name = name; this.systemPrompt = systemPrompt; }
2105
- async execute(task, previousReview) {
2106
- log(c.cyan, `\n[${this.name}] Executing: ${task.id} (${task.priority})`);
2107
- const deliverables = [...(task.deliverables.code || []), ...(task.deliverables.tests || []), ...(task.deliverables.docs || [])];
2108
- // Complexity-aware model routing:
2109
- // Claude Sonnet → complex tasks (many files, complex keywords, large existing files)
2110
- // MiniMax M2.5 → simple tasks (small edits, config, stubs) + truncation retry as safety net
2111
- let { provider, model, routingReason } = await assessTaskComplexity(task, deliverables);
2112
- // 2026-05-28 model-escalation pact (consumes flag set in Orchestrator.executeTask
2113
- // after a TRUNCATION or INTEGRITY_FAILED rejection). When the cheap default
2114
- // (DeepSeek) couldn't hold the file's contract on a prior attempt, upgrade
2115
- // THIS attempt to Sonnet via ClawRouter — deterministic, no LLM round-trip,
2116
- // routed through the existing x402 wallet rail. Cleared after consumption so
2117
- // a subsequent un-escalated reason doesn't piggyback off the upgrade.
2118
- const escalation = task._escalateNext;
2119
- if (escalation) {
2120
- provider = 'clawrouter';
2121
- model = 'anthropic/claude-sonnet-4.6';
2122
- routingReason = `ESCALATE: prior attempt ${escalation} → ClawRouter/Sonnet (was: ${routingReason})`;
2123
- delete task._escalateNext;
2124
- log(c.magenta, ` ⤴ [ESCALATE] ${task.id}: prior ${escalation} → upgrading to ${model}`);
2125
- }
2126
- log(c.gray, ` -> Using ${model} [${routingReason}]`);
2127
- // B.12: Compress context before cloud calls to reduce token spend 70-80%
2128
- if (provider === 'clawrouter' || provider === 'anthropic') {
2129
- task = { ...task, context: await compressContext(task.context) };
2130
- }
2131
- // Sprint-063: Emit JSONL routing log (non-fatal — never block execution)
2132
- try {
2133
- (0, fs_1.mkdirSync)('logs/routing', { recursive: true });
2134
- const { generateExecutionId, logRoutingDecision } = await Promise.resolve().then(() => __importStar(require('./task-router')));
2135
- const sprintId = task.sprint_id ?? 'unknown';
2136
- const execId = task.execution_id ?? generateExecutionId(sprintId, task.id);
2137
- logRoutingDecision({
2138
- execution_id: execId,
2139
- sprint_id: sprintId,
2140
- task_id: task.id,
2141
- task_target: (task.task_target ?? 'cloud-code'),
2142
- provider,
2143
- model,
2144
- queued_at: task.queued_at ?? new Date().toISOString(),
2145
- execution_source: 'orchestrate-agents-v2',
2146
- });
2147
- }
2148
- catch (err) {
2149
- log(c.yellow, ` [WARN] Routing log write failed: ${err.message}`);
2150
- }
2151
- // Pre-flight: only enforce pre-existence for tasks that genuinely modify
2152
- // existing files in place. Everything else (create / research / feature /
2153
- // docs / content / audit / setup / etc.) is allowed to produce new files.
2154
- // Inverted from the prior opt-out list because new task types kept being
2155
- // added that legitimately create files (research, content_creation, audit,
2156
- // implementation, setup) and tripped pre-flight by default — sprint-1548
2157
- // amd24_research being the recent example.
2158
- const MODIFY_TYPES = new Set(['modify', 'bugfix', 'fix', 'edit', 'refactor', 'enhancement']);
2159
- if (MODIFY_TYPES.has(task.type)) {
2160
- const missing = deliverables.filter(f => !(0, fs_1.existsSync)(f));
2161
- if (missing.length > 0) {
2162
- log(c.red, ` ✗ Pre-flight FAILED: File(s) not found: ${missing.join(', ')}`);
2163
- log(c.red, ` ✗ Skipping task ${task.id} — deliverable files do not exist in repo`);
2164
- throw new Error(`PREFLIGHT_FAILED: Files not found: ${missing.join(', ')}`);
2165
- }
2166
- }
2167
- // Pre-flight: validate new-file paths are inside real project directories
2168
- // CEO sometimes hallucinates paths like 'agents/src/core/' or 'packages/agents/src/'
2169
- // which don't exist. Catch these before generating anything.
2170
- const VALID_PATH_PREFIXES = [
2171
- 'backend/', 'frontend/', 'agents/', 'scripts/', 'shared/',
2172
- 'website/', 'docs-site/', 'apps/', 'sdk/', 'x402-base/', 'x402-evm/', 'x402-test/',
2173
- 'supabase/', 'infrastructure/',
2174
- // Kognai v16 directories (S68)
2175
- 'acp/', 'codebook/', 'failure-library/', 'skills/', 'skill-bank/',
2176
- // Kognai runtime paths (S66-002)
2177
- 'runtime/', 'dashboard/', 'kognai-agents/', 'workspace/', 'docs/', 'logs/', 'tests/',
2178
- // Public surfaces + npm packages (sprint-1548, sprint-1549)
2179
- 'landing/', 'packages/', 'data/',
2180
- // Smart contracts (sprint-1571 — KognaiSkin ERC-721 + EIP-5192 soulbound)
2181
- 'contracts/',
2182
- ];
2183
- // Invalid patterns: paths that look like monorepo sub-dirs that don't exist
2184
- const INVALID_PATH_PATTERNS = [
2185
- /^agents\/src\//, // agents/src/... — real agent dirs are agents/<name>/
2186
- /^src\/agents\//, // no src/agents/ dir
2187
- ];
2188
- for (const filepath of deliverables) {
2189
- // Root-level dotfiles, config files, and absolute paths are always valid.
2190
- // Absolute paths (starting with /) indicate cross-project tasks (e.g., Voxight).
2191
- const isRootFile = !filepath.includes('/') || filepath.startsWith('.') || filepath.startsWith('/');
2192
- const isValidPrefix = isRootFile || VALID_PATH_PREFIXES.some(p => filepath.startsWith(p));
2193
- const isInvalidPattern = INVALID_PATH_PATTERNS.some(r => r.test(filepath));
2194
- if (!isValidPrefix || isInvalidPattern) {
2195
- log(c.red, ` ✗ Path validation FAILED: "${filepath}" is not in a valid project directory`);
2196
- log(c.red, ` ✗ Valid prefixes: ${VALID_PATH_PREFIXES.join(', ')}`);
2197
- throw new Error(`INVALID_PATH: "${filepath}" is not in a recognized project directory`);
2198
- }
2199
- }
2200
- let rejectionContext = '';
2201
- if (previousReview && previousReview.verdict !== 'APPROVED') {
2202
- const issueList = (previousReview.issues || []).map(i => `- [${i.severity}] ${i.file}: ${i.description}`).join('\n');
2203
- rejectionContext = `\n## IMPORTANT: Previous Attempt Was REJECTED\nScore: ${previousReview.score}/100. Reason: ${previousReview.summary}\n\nSpecific issues to fix:\n${issueList}\n\nYou MUST address ALL issues.\n`;
2204
- }
2205
- // TICKET-152 Gap 2: cross-run failure memory. `previousReview` only remembers
2206
- // THIS run's attempts; the failure-library remembers every prior rejection of
2207
- // this task across all sprints (e.g. ksl_batch_runner's 75 truncation rejects).
2208
- // Inject the persistent avoidance brief so a task that has failed before sees
2209
- // its own history — even on attempt 1 of a fresh run. Bounded (≤5 attempts,
2210
- // truncated reasons) and best-effort: retrieval must never block execution.
2211
- try {
2212
- const prior = (0, failure_library_1.retrieveTaskFailures)(task.id);
2213
- if (prior.brief)
2214
- rejectionContext += `\n${prior.brief}\n`;
2215
- }
2216
- catch { /* non-fatal — never block execution on retrieval */ }
2217
- const createdFiles = [];
2218
- for (let i = 0; i < deliverables.length; i++) {
2219
- const filepath = deliverables[i];
2220
- // TICKET-090: EDIT-MODE — for surgical-edit tasks on existing files, ask
2221
- // the LLM for a list of {old, new} substitutions instead of regenerating
2222
- // the whole file. Drops output tokens ~10× (4kB file rewrite → ~200B
2223
- // diff) and slashes wall-clock under the 25-min PER_RUN_HARD_TIMEOUT.
2224
- //
2225
- // TICKET-209 (2026-05-29): broadened engagement. Previously gated behind
2226
- // ~9 narrow-scope context keywords AND file ≥50 lines, so founder-authored
2227
- // modify tasks (TICKET-204, TICKET-207) that didn't use the exact magic
2228
- // phrases fell into regenerate-mode and blew the 100k token budget
2229
- // (~313k tokens spent on a 5-edit task to a 700-line file).
2230
- //
2231
- // Now engages when:
2232
- // (a) file exists AND is at least 50 lines (unchanged)
2233
- // (b) EITHER task.type is in MODIFY_TYPES (new default for all modify tasks)
2234
- // OR task context contains the legacy narrow-scope keywords
2235
- // ("ONE LINE EDIT", "SINGLE FIELD", "MINIMAL EDIT", "rename only",
2236
- // "single property", "verify-only", "literal-string",
2237
- // "DO NOT regenerate", "surgical", "no-op verify", "MUST still start")
2238
- // Falls back to regenerate-mode if the LLM's edit response is malformed
2239
- // or any `old` substring isn't uniquely present in the file.
2240
- const existingLineCount = (0, fs_1.existsSync)(filepath)
2241
- ? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length
2242
- : 0;
2243
- const hasNarrowScopeKeywords = /\b(ONE LINE EDIT|SINGLE FIELD|MINIMAL EDIT|rename only|single property|verify-only|literal[- ]string|DO NOT regenerate|surgical|no-op verify|MUST still start)\b/i.test(task.context || '');
2244
- const isModifyTask = MODIFY_TYPES.has(task.type);
2245
- const editModeEligible = (0, fs_1.existsSync)(filepath)
2246
- && existingLineCount >= 50
2247
- && (isModifyTask || hasNarrowScopeKeywords);
2248
- if (editModeEligible) {
2249
- const edited = await this.tryEditMode(filepath, task, rejectionContext, provider, model);
2250
- if (edited !== null) {
2251
- createdFiles.push({ path: filepath, content: edited });
2252
- continue; // success, skip regenerate-mode for this file
2253
- }
2254
- // 2026-05-27 diagnostic patch: for MODIFY tasks on large files, refuse
2255
- // the regenerate-mode fallback. Regeneration of a 200+ line file from
2256
- // scratch trips the integrity-check (which preserves the original on
2257
- // disk) — net result is a silent no-op. Better to surface the failure
2258
- // with a structured reason than to log "No files produced" with no
2259
- // context. Founder triage: split the file, not the task.
2260
- const existingLines = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length : 0;
2261
- if (MODIFY_TYPES.has(task.type) && existingLines > 150) {
2262
- log(c.red, ` ✗ Edit-mode FAILED and regenerate-mode REFUSED for ${filepath} (${existingLines} lines, MODIFY task — split file, not task)`);
2263
- task._failureReasons = [
2264
- ...(task._failureReasons || []),
2265
- `edit-mode-empty:${filepath}:${existingLines}lines`,
2266
- ];
2267
- continue; // skip this deliverable — surfaces via silent-failure enrichment upstream
2268
- }
2269
- log(c.yellow, ` ! Edit-mode fell back to regenerate-mode for ${filepath}`);
2270
- }
2271
- log(c.gray, ` -> Generating file ${i + 1}/${deliverables.length}: ${filepath}`);
2272
- const priorCtx = createdFiles.length > 0
2273
- ? '\n## Already Generated Files\n' + createdFiles.map(f => `### ${f.path}\n\`\`\`typescript\n${f.content.substring(0, 2000)}\n\`\`\``).join('\n\n') + '\n'
2274
- : '';
2275
- const fileList = deliverables.map((f, idx) => `${idx + 1}. ${f}${f === filepath ? ' ← THIS ONE' : ''}`).join('\n');
2276
- const isTestFile = filepath.includes('test') || filepath.includes('spec');
2277
- const existingLines = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length : 0;
2278
- const existingContent = (0, fs_1.existsSync)(filepath)
2279
- ? `\n\n## EXISTING FILE — SURGICAL EDIT ONLY\nDo NOT rewrite the entire file. Output the COMPLETE updated file with your changes merged in.\nIf you add a function, append it. If you edit a line, change only that line.\nFile has ${existingLines} lines — preserve ALL existing code.\n\n### Current Content\n\`\`\`typescript\n${(0, fs_1.readFileSync)(filepath, 'utf-8').substring(0, 3000)}\n\`\`\`\n`
2280
- : `\n\n## Note: This is a NEW file — create it from scratch.\n`;
2281
- const testConstraint = isTestFile
2282
- ? `\n\n## CRITICAL: TEST FILE SIZE LIMIT
2283
- This is a test file. You MUST keep it SHORT to avoid truncation:
2284
- - Maximum 5-6 test cases (describe + it blocks)
2285
- - Maximum 80 lines total
2286
- - NO verbose setup — use inline mocks
2287
- - NO redundant tests — one test per behavior
2288
- - Cover: happy path, error case, edge case, defaults — that's it
2289
- - If you write more than 80 lines, the file WILL be truncated and REJECTED\n`
2290
- : '';
2291
- // EXACT CONTENT mode: task description contains code block(s) with the exact file content.
2292
- // Extract them deterministically and bypass LLM to prevent model hallucination.
2293
- // This is the correct fix for "EXACT CONTENT:" tasks — the model must NOT interpret
2294
- // the spec, it must copy it verbatim. Bypass the LLM entirely for these tasks.
2295
- // NOTE: check task.description first — when sprint JSON has BOTH description AND context fields,
2296
- // the normalization at loadTasks() only copies description→context when context is absent.
2297
- // EXACT CONTENT blocks always live in the description field.
2298
- const rawSpec = task.description ?? task.context;
2299
- const exactBlocks = [...rawSpec.matchAll(/EXACT CONTENT:\s*\n\n?```[\w.+-]*\n([\s\S]*?)```(?:\n|$)/g)]
2300
- .map((m) => m[1].trimEnd());
2301
- if (exactBlocks.length > 0) {
2302
- // Use block[i] for deliverable[i] when multiple blocks present; else use block[0]
2303
- const exactFileContent = exactBlocks.length > i ? exactBlocks[i] : exactBlocks[0];
2304
- const blockLabel = `block ${Math.min(i, exactBlocks.length - 1) + 1}/${exactBlocks.length}`;
2305
- log(c.cyan, ` -> EXACT CONTENT mode: ${filepath} (${blockLabel}) — deterministic, no LLM`);
2306
- createdFiles.push({ path: filepath, content: exactFileContent });
2307
- continue;
2308
- }
2309
- const userPrompt = `You are ${this.name}, a coding agent at Countable.
2310
- ${rejectionContext}
2311
- ## Task
2312
- ${task.context}
2313
-
2314
- ## All Deliverable Files
2315
- ${fileList}
2316
-
2317
- ## Generate ONLY: ${filepath}
2318
- ${existingContent}${priorCtx}${testConstraint}
2319
- Write ONLY the content for "${filepath}". Rules:
2320
- - S64-001: Output the raw file content using FILE: format as described in the system prompt
2321
- - Do NOT wrap output in markdown code fences (\`\`\`) — for .md files especially, output RAW markdown text, NOT inside a \`\`\`markdown or \`\`\`typescript block
2322
- - For .sh/.bash scripts, start with #!/bin/bash — do NOT wrap in a code fence
2323
- - Production quality, no TODOs or placeholders
2324
- - Include all imports, types, error handling
2325
- - If this file depends on others listed above, import from them correctly
2326
- - No explanatory text — output file content only`;
2327
- try {
2328
- const startTime = Date.now();
2329
- const response = await callLLM(provider, model, this.systemPrompt, userPrompt, 480000, this.name, task.id); // 8 min — qwen3:14b needs time for large files
2330
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
2331
- let content = response.choices?.[0]?.message?.content || '';
2332
- // Strip MiniMax <think>...</think> tags that leak into responses
2333
- content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
2334
- const tokens = response.usage?.total_tokens || 0;
2335
- // Check for MiniMax errors
2336
- if (response.base_resp?.status_code && response.base_resp.status_code !== 0) {
2337
- throw new Error(`MiniMax API error: ${response.base_resp.status_msg}`);
2338
- }
2339
- log(c.gray, ` -> Response: ${elapsed}s, ${tokens} tokens, ${content.length} chars`);
2340
- // CTO-005: Extract code from fenced block with enhanced fence stripping
2341
- const codeBlocks = this.extractCodeBlocks(content);
2342
- let fileContent;
2343
- if (codeBlocks.length === 0) {
2344
- log(c.yellow, ` ! No code block found for ${filepath}, using raw content (with fence strip)`);
2345
- fileContent = this.stripResidualFences(content);
2346
- }
2347
- else {
2348
- fileContent = codeBlocks[0];
2349
- }
2350
- // CTO-005: Final fence sanitization BEFORE adding to createdFiles
2351
- // CEO condition: stripping must happen BEFORE file is written to disk
2352
- fileContent = this.stripResidualFences(fileContent);
2353
- // Last-resort nuclear strip: if content still starts with a fence, skip all leading
2354
- // fence lines and trailing fence. Handles MiniMax ```typescript{ (no newline) pattern.
2355
- if (/^\s*```/.test(fileContent)) {
2356
- log(c.yellow, ` ! Residual fence detected after stripResidualFences — applying nuclear strip for ${filepath}`);
2357
- const lines = fileContent.split('\n');
2358
- const firstContentLine = lines.findIndex(l => !l.trim().startsWith('```') && l.trim() !== '');
2359
- if (firstContentLine > 0) {
2360
- fileContent = lines.slice(firstContentLine).join('\n').replace(/\n\s*```\s*$/, '').trim();
2361
- }
2362
- else if (firstContentLine === -1) {
2363
- fileContent = lines.filter(l => !l.trim().startsWith('```')).join('\n').trim();
2364
- }
2365
- }
2366
- // File-type-aware post-processing: final safety net per file extension
2367
- fileContent = this.postProcessContent(fileContent, filepath);
2368
- // B.13: For JSON files that are still invalid after postProcessContent, try qwen3:0.6b repair
2369
- if (filepath.endsWith('.json')) {
2370
- try {
2371
- JSON.parse(fileContent);
2372
- }
2373
- catch {
2374
- log(c.yellow, ` ! JSON invalid in ${filepath} — attempting qwen3:0.6b repair`);
2375
- fileContent = await this.fixJsonWithOllama(fileContent, filepath);
2376
- }
2377
- }
2378
- // TRUNCATION PRE-CHECK: Detect if MiniMax cut off output mid-function
2379
- // If code ends inside an open block (unclosed braces) or with an incomplete statement,
2380
- // retry once with a "continue" prompt before sending to supervisor review.
2381
- const truncationDetected = this.detectTruncation(fileContent);
2382
- if (truncationDetected && (provider === 'clawrouter' || provider === 'ollama')) {
2383
- log(c.yellow, ` ! TRUNCATION detected in ${filepath} — retrying with continuation prompt...`);
2384
- const continuationPrompt = `The previous response for "${filepath}" was TRUNCATED — it ended mid-function or with an incomplete block. Here is what was generated so far:
2385
-
2386
- \`\`\`typescript
2387
- ${fileContent.substring(fileContent.length - 1500)}
2388
- \`\`\`
2389
-
2390
- Continue from where it left off and output ONLY the remaining code (no duplicated content). Output a COMPLETE, valid TypeScript/JavaScript file ending with the final closing brace.`;
2391
- try {
2392
- const contResponse = await callLLM(provider, model, this.systemPrompt, continuationPrompt, 120000, this.name, `${task.id}_continuation`);
2393
- let contContent = contResponse.choices?.[0]?.message?.content || '';
2394
- contContent = contContent.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
2395
- const contBlocks = this.extractCodeBlocks(contContent);
2396
- const continuation = contBlocks.length > 0 ? contBlocks[0] : this.stripResidualFences(contContent);
2397
- if (continuation.length > 50) {
2398
- // Merge: use the original up to the last complete line, then append continuation
2399
- fileContent = fileContent + '\n' + continuation;
2400
- fileContent = this.stripResidualFences(fileContent);
2401
- log(c.green, ` ✓ Continuation merged for ${filepath} (+${continuation.length} chars)`);
2402
- }
2403
- }
2404
- catch (contErr) {
2405
- log(c.yellow, ` ! Continuation failed: ${contErr.message}`);
2406
- }
2407
- }
2408
- createdFiles.push({ path: filepath, content: fileContent });
2409
- }
2410
- catch (error) {
2411
- log(c.red, ` ✗ Failed to generate ${filepath}: ${error.message}`);
2412
- // Create minimal placeholder so build doesn't break
2413
- createdFiles.push({ path: filepath, content: `// ERROR: Generation failed - ${error.message}\n// Task: ${task.id}\n` });
2414
- }
2415
- }
2416
- // CTO-004: File integrity check — detect destructive MiniMax rewrites
2417
- // For bugfix tasks (and feature tasks editing existing files), reject if new file
2418
- // is <50% the size of the original. Configurable threshold.
2419
- const INTEGRITY_THRESHOLD = 0.5; // Reject if new < 50% of original
2420
- const integrityCheckTypes = ['bugfix']; // Task types that always get integrity check
2421
- const integrityCheckAllExisting = true; // Also check feature tasks editing existing files
2422
- for (const file of createdFiles) {
2423
- if ((0, fs_1.existsSync)(file.path)) {
2424
- try {
2425
- const originalContent = (0, fs_1.readFileSync)(file.path, 'utf-8');
2426
- const originalLines = originalContent.split('\n').length;
2427
- const newLines = file.content.split('\n').length;
2428
- const ratio = originalLines > 0 ? newLines / originalLines : 1;
2429
- const shouldCheck = integrityCheckTypes.includes(task.type) ||
2430
- (integrityCheckAllExisting && originalLines > 10);
2431
- if (shouldCheck && ratio < INTEGRITY_THRESHOLD) {
2432
- log(c.red, ` ✗ INTEGRITY CHECK FAILED: ${file.path}`);
2433
- log(c.red, ` Original: ${originalLines} lines → New: ${newLines} lines (${(ratio * 100).toFixed(0)}%)`);
2434
- log(c.red, ` Possible destructive rewrite detected — file shrank from ${originalLines} to ${newLines} lines`);
2435
- // TICKET-091 FIX: ACTUALLY preserve the original. Prior version
2436
- // assigned a warning comment to file.content, which the writer
2437
- // then wrote to disk as a 4-line stub — destroying the original.
2438
- // Read on-disk original so the subsequent writeFileSync is a no-op.
2439
- try {
2440
- file.content = (0, fs_1.readFileSync)(file.path, 'utf-8');
2441
- log(c.gray, ` Original file restored from disk (${originalLines} lines preserved)`);
2442
- }
2443
- catch (readErr) {
2444
- log(c.yellow, ` WARN: could not read original from disk: ${(readErr?.message || '').slice(0, 100)}`);
2445
- }
2446
- task._integrityFailed = true;
2447
- task._integrityDetails = `File ${file.path} shrank from ${originalLines} to ${newLines} lines (${(ratio * 100).toFixed(0)}%). Possible destructive rewrite. Original preserved on disk; task should be rejected and retried with edit-mode constraint.`;
2448
- }
2449
- else if (originalLines > 0) {
2450
- log(c.gray, ` -> Integrity OK: ${file.path} (${originalLines} → ${newLines} lines, ${(ratio * 100).toFixed(0)}%)`);
2451
- }
2452
- }
2453
- catch { /* File exists but can't read — skip check */ }
2454
- }
2455
- }
2456
- // FP-007: File size guard — refuse writes to files >2000 lines
2457
- // Prevents swarm from destructively rewriting large files (telegram-bot.ts disaster)
2458
- const FP007_LINE_LIMIT = 2000;
2459
- for (const file of createdFiles) {
2460
- if ((0, fs_1.existsSync)(file.path)) {
2461
- try {
2462
- const existingLines = (0, fs_1.readFileSync)(file.path, 'utf-8').split('\n').length;
2463
- if (existingLines > FP007_LINE_LIMIT) {
2464
- log(c.red, ` ✗ FP-007 GUARD: ${file.path} has ${existingLines} lines (limit: ${FP007_LINE_LIMIT})`);
2465
- log(c.red, ` Refusing write — file too large for safe swarm edit. Use manual edit.`);
2466
- file.content = `// FP-007 GUARD: Write refused — target file has ${existingLines} lines (>${FP007_LINE_LIMIT})\n// Task: ${task.id}. Edit this file manually or split it first.\n`;
2467
- task._fp007Blocked = true;
2468
- }
2469
- }
2470
- catch { /* can't read — allow write */ }
2471
- }
2472
- }
2473
- // Write all files to disk
2474
- const writtenFiles = [];
2475
- for (const file of createdFiles) {
2476
- try {
2477
- const dir = file.path.substring(0, file.path.lastIndexOf('/'));
2478
- if (dir)
2479
- (0, fs_1.mkdirSync)(dir, { recursive: true });
2480
- (0, fs_1.writeFileSync)(file.path, file.content);
2481
- writtenFiles.push(file.path);
2482
- log(c.green, ` ✓ Written: ${file.path} (${file.content.length} chars)`);
2483
- }
2484
- catch (error) {
2485
- log(c.red, ` ✗ Write failed: ${file.path}: ${error.message}`);
2486
- }
2487
- }
2488
- // TICKET-205: stub-detection guard. The integrity check at line 2459 only
2489
- // catches destructive shrink of EXISTING files. For NEW files (type=create),
2490
- // a coder agent producing a near-empty stub passes through and gets committed
2491
- // BEFORE dual-review fires. Live incident 2026-05-29: docs/specs/
2492
- // orchestrator-workspace.md generated as 98-byte stub, dual-rejected 3× at
2493
- // 20/100, but commit 3ca603315 landed it on main anyway. This guard skips the
2494
- // commit when any written file is suspiciously small for its type — the file
2495
- // stays on disk so dual-review can inspect + reject it on its own merits.
2496
- const STUB_MIN_BYTES = {
2497
- '.md': 1500, // markdown specs typically ask for many sections
2498
- '.ts': 200,
2499
- '.tsx': 200,
2500
- '.js': 200,
2501
- '.jsx': 200,
2502
- '.yaml': 100,
2503
- '.yml': 100,
2504
- '.json': 100,
2505
- '.html': 100,
2506
- '.css': 100,
2507
- };
2508
- const DEFAULT_STUB_MIN = 200;
2509
- let stubPath = null;
2510
- let stubSize = 0;
2511
- let stubMin = 0;
2512
- for (const path of writtenFiles) {
2513
- const dotIdx = path.lastIndexOf('.');
2514
- const ext = dotIdx >= 0 ? path.slice(dotIdx) : '';
2515
- let min = STUB_MIN_BYTES[ext] ?? DEFAULT_STUB_MIN;
2516
- // .md only enforces the high threshold when the task context is substantial
2517
- // (a real spec ask); for short asks (e.g. README scaffolds), use 300.
2518
- if (ext === '.md' && (task.context?.length ?? 0) < 1500)
2519
- min = 300;
2520
- try {
2521
- const size = (0, fs_1.statSync)(path).size;
2522
- if (size < min) {
2523
- stubPath = path;
2524
- stubSize = size;
2525
- stubMin = min;
2526
- break;
2527
- }
2528
- }
2529
- catch { /* can't stat, skip */ }
2530
- }
2531
- if (stubPath) {
2532
- log(c.red, ` ✗ STUB DETECTED: ${stubPath} (${stubSize} bytes < ${stubMin} expected for ${task.type})`);
2533
- log(c.gray, ` Skipping commit — file stays on disk for dual-review to reject. Task will retry.`);
2534
- task._stubDetected = true;
2535
- task._stubReason = `Generated file ${stubPath} is ${stubSize} bytes; expected ≥${stubMin} for type=${task.type}`;
2536
- return { files: writtenFiles, model };
2537
- }
2538
- // Commit changes
2539
- this.commitChanges(task, writtenFiles);
2540
- return { files: writtenFiles, model };
2541
- }
2542
- // TICKET-090: EDIT-MODE — ask the LLM for {old, new} substitutions on an
2543
- // existing file, instead of regenerating the whole file. Returns the post-
2544
- // edit file content on success, or null to fall back to regenerate-mode.
2545
- //
2546
- // Failure modes that trigger fallback (null):
2547
- // - LLM response isn't parseable JSON
2548
- // - "edits" key missing or empty
2549
- // - any old_str isn't found in the file (typo / hallucination)
2550
- // - any old_str appears more than once (ambiguous edit)
2551
- // - any edit's "old" is identical to its "new" (no-op disguised)
2552
- async tryEditMode(filepath, task, rejectionContext, provider, model) {
2553
- const originalContent = (0, fs_1.readFileSync)(filepath, 'utf-8');
2554
- const lineCount = originalContent.split('\n').length;
2555
- log(c.cyan, ` -> EDIT-MODE: ${filepath} (${lineCount} lines, surgical-edit task) — diff-only output`);
2556
- const userPrompt = `You are ${this.name}, a coding agent at Countable, applying a SURGICAL EDIT to an existing file.
2557
-
2558
- ${rejectionContext}
2559
- ## Task
2560
- ${task.context}
2561
-
2562
- ## File: ${filepath}
2563
- \`\`\`typescript
2564
- ${originalContent}
2565
- \`\`\`
2566
-
2567
- ## Output format — JSON object only, no commentary, no fences
2568
-
2569
- {
2570
- "edits": [
2571
- {"old": "<exact substring currently in the file>", "new": "<replacement>"}
2572
- ]
2573
- }
2574
-
2575
- ## Rules — VIOLATIONS WILL CAUSE THE TASK TO BE REJECTED
2576
-
2577
- 1. Each "old" string MUST appear EXACTLY ONCE in the file. If you need to edit a non-unique substring, include enough surrounding context (a few extra characters before and after) to make it unique.
2578
- 2. Each "old" string MUST match the file VERBATIM — same whitespace, same quote style, same indentation. Do not paraphrase.
2579
- 3. "new" must be different from "old". No-op edits are rejected.
2580
- 4. Prefer FEWER, LARGER edits. 1-3 edits is ideal. 10+ edits suggests you're rewriting — switch to a different approach.
2581
- 5. Output ONLY the JSON object. No \`\`\`json fences. No prose before or after.`;
2582
- let response;
2583
- try {
2584
- const startTime = Date.now();
2585
- response = await callLLM(provider, model, this.systemPrompt, userPrompt, 120_000, this.name, task.id);
2586
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
2587
- const tokens = response.usage?.total_tokens || 0;
2588
- log(c.gray, ` edit response: ${elapsed}s, ${tokens} tokens`);
2589
- }
2590
- catch (e) {
2591
- log(c.yellow, ` edit-mode LLM call failed: ${(e.message || '').slice(0, 120)}`);
2592
- return null;
2593
- }
2594
- let raw = (response.choices?.[0]?.message?.content || '').replace(/<think>[\s\S]*?<\/think>/g, '').trim();
2595
- // Strip any fences the model added despite instructions (json/jsonc/none)
2596
- const fenced = raw.match(/```(?:json|jsonc)?\s*([\s\S]*?)```/);
2597
- if (fenced)
2598
- raw = fenced[1].trim();
2599
- // TICKET-092 FIX: walk forward from first `{` tracking brace depth through
2600
- // string literals (with escape-char handling), extract the FIRST balanced
2601
- // JSON object. Prior slice(firstBrace,lastBrace+1) caught multi-object
2602
- // responses + trailing prose as one mega-string and broke JSON.parse.
2603
- const firstBrace = raw.indexOf('{');
2604
- if (firstBrace < 0) {
2605
- log(c.yellow, ` edit-mode: no JSON object in response`);
2606
- return null;
2607
- }
2608
- let depth = 0, inStr = false, strChar = '', escape = false, end = -1;
2609
- for (let i = firstBrace; i < raw.length; i++) {
2610
- const ch = raw[i];
2611
- if (escape) {
2612
- escape = false;
2613
- continue;
2614
- }
2615
- if (inStr) {
2616
- if (ch === '\\')
2617
- escape = true;
2618
- else if (ch === strChar)
2619
- inStr = false;
2620
- continue;
2621
- }
2622
- if (ch === '"' || ch === "'") {
2623
- inStr = true;
2624
- strChar = ch;
2625
- continue;
2626
- }
2627
- if (ch === '{')
2628
- depth++;
2629
- else if (ch === '}') {
2630
- depth--;
2631
- if (depth === 0) {
2632
- end = i;
2633
- break;
2634
- }
2635
- }
2636
- }
2637
- if (end < 0) {
2638
- log(c.yellow, ` edit-mode: unterminated JSON object (no matching closing brace)`);
2639
- return null;
2640
- }
2641
- raw = raw.slice(firstBrace, end + 1);
2642
- let parsed;
2643
- try {
2644
- parsed = JSON.parse(raw);
2645
- }
2646
- catch (e) {
2647
- // Trailing-comma forgiveness retry (common LLM quirk).
2648
- try {
2649
- parsed = JSON.parse(raw.replace(/,(\s*[}\]])/g, '$1'));
2650
- log(c.gray, ` edit-mode: trailing-comma repair applied`);
2651
- }
2652
- catch {
2653
- log(c.yellow, ` edit-mode: JSON parse failed: ${(e.message || '').slice(0, 80)}`);
2654
- return null;
2655
- }
2656
- }
2657
- const edits = parsed.edits;
2658
- if (!Array.isArray(edits) || edits.length === 0) {
2659
- log(c.yellow, ` edit-mode: no edits array or empty`);
2660
- return null;
2661
- }
2662
- let working = originalContent;
2663
- for (const e of edits) {
2664
- if (typeof e.old !== 'string' || typeof e.new !== 'string') {
2665
- log(c.yellow, ` edit-mode: edit missing string fields`);
2666
- return null;
2667
- }
2668
- if (e.old === e.new) {
2669
- log(c.yellow, ` edit-mode: no-op edit (old===new): ${e.old.slice(0, 60)}`);
2670
- return null;
2671
- }
2672
- // TICKET-100: per-edit suspicious-shrink guard. If `new` is dramatically
2673
- // shorter than `old`, the LLM likely matched too greedy a chunk (e.g.
2674
- // captured the trailing portion of the file in `old` and only included
2675
- // the first part in `new`, silently truncating). 43475fd56 broke prod
2676
- // exactly this way: edit removed the tail of generateReport function.
2677
- // Threshold: if old is >= 200 chars AND new < 30% of old length, reject.
2678
- if (e.old.length >= 200 && e.new.length < e.old.length * 0.3) {
2679
- log(c.yellow, ` edit-mode: suspicious shrink — old=${e.old.length} chars, new=${e.new.length} chars (< 30%). Rejecting to avoid truncation.`);
2680
- return null;
2681
- }
2682
- const occurrences = working.split(e.old).length - 1;
2683
- if (occurrences === 0) {
2684
- log(c.yellow, ` edit-mode: old_str not found in file: "${e.old.slice(0, 80)}"`);
2685
- return null;
2686
- }
2687
- if (occurrences > 1) {
2688
- log(c.yellow, ` edit-mode: old_str appears ${occurrences}× (must be unique): "${e.old.slice(0, 80)}"`);
2689
- return null;
2690
- }
2691
- working = working.replace(e.old, e.new);
2692
- }
2693
- // TICKET-100: post-edit integrity check. Same INTEGRITY_THRESHOLD pattern
2694
- // localQAGate uses for regen-mode results — if the post-edit content is
2695
- // dramatically shorter than the original (>30% loss), reject as suspicious
2696
- // truncation. Catches the case where individual edits each look reasonable
2697
- // but cumulatively destroy the file.
2698
- const INTEGRITY_THRESHOLD = 0.7; // must retain at least 70% of original lines
2699
- const newLineCount = working.split('\n').length;
2700
- const lineRatio = lineCount > 0 ? newLineCount / lineCount : 1;
2701
- if (lineRatio < INTEGRITY_THRESHOLD) {
2702
- log(c.yellow, ` edit-mode: post-edit integrity FAIL — ${lineCount} → ${newLineCount} lines (${(lineRatio * 100).toFixed(0)}%). Rejecting to avoid destructive write.`);
2703
- return null;
2704
- }
2705
- // TICKET-100: detect mid-statement truncation. If the last non-empty line
2706
- // doesn't end with a structural terminator (} ; > etc.), the file likely
2707
- // got cut mid-expression. Catches LLM output that stops mid-call.
2708
- const lastNonEmpty = working.split('\n').reverse().find(l => l.trim().length > 0) || '';
2709
- const last = lastNonEmpty.trim();
2710
- const terminatorOk = /[}\];>)\.]\s*$|^\/\/|^\/\*|^\*\//.test(last);
2711
- if (!terminatorOk && working.length > 200) {
2712
- log(c.yellow, ` edit-mode: last line "${last.slice(-60)}" doesn't end with a terminator — possible mid-statement truncation. Rejecting.`);
2713
- return null;
2714
- }
2715
- const deltaLines = newLineCount - lineCount;
2716
- log(c.green, ` ✓ edit-mode applied ${edits.length} edit(s) (${deltaLines >= 0 ? '+' : ''}${deltaLines} lines)`);
2717
- return working;
2718
- }
2719
- // CTO-005: Enhanced code fence stripping — handles all MiniMax output variants
2720
- // Catches: ```tsx, ```typescript, leading whitespace, fences at any position,
2721
- // markdown headers before code, and incomplete closing fences
2722
- extractCodeBlocks(content) {
2723
- const blocks = [];
2724
- // Normalize: MiniMax sometimes outputs ```typescript{ with no newline — insert one
2725
- const normalized = content.replace(/```([\w.+-]*)\s*([^\s\n`])/g, '```$1\n$2');
2726
- // Broader regex: optional whitespace before fences, any language tag, flexible spacing
2727
- const regex = /^\s*```[\w.+-]*\s*\n([\s\S]*?)^\s*```\s*$/gm;
2728
- let match;
2729
- while ((match = regex.exec(normalized)) !== null) {
2730
- if (match[1].trim().length > 0)
2731
- blocks.push(match[1].trim());
2732
- }
2733
- // Fallback: try simpler pattern if multiline didn't match
2734
- if (blocks.length === 0) {
2735
- // S64-001: Added python|py|toml|env|sql|xml|md|markdown — MiniMax/qwen3 often labels files incorrectly
2736
- const simpleRegex = /```(?:typescript|tsx|ts|javascript|jsx|js|json|yaml|yml|dockerfile|sh|bash|python|py|toml|env|sql|xml|md|markdown|css|html|scss|less|txt)?\s*\n([\s\S]*?)```/g;
2737
- while ((match = simpleRegex.exec(normalized)) !== null) {
2738
- if (match[1].trim().length > 0)
2739
- blocks.push(match[1].trim());
2740
- }
2741
- }
2742
- return blocks;
2743
- }
2744
- // CTO-005: Aggressive fence sanitization — strips ANY remaining fences from content
2745
- // Applied BEFORE file is written to disk (CEO condition)
2746
- stripResidualFences(content) {
2747
- let cleaned = content;
2748
- // Remove lines that are ONLY a fence marker (with optional language tag)
2749
- cleaned = cleaned.replace(/^\s*```[\w.+-]*\s*$/gm, '');
2750
- // TICKET-096: if the response contains a `FILE: <path>` marker line
2751
- // anywhere in the first 20 lines, slice from the line AFTER it. This
2752
- // catches the "prose explanation + FILE: <path> + real content" pattern
2753
- // that the v12r_citizen_mock_state agent produced (broke prod 2026-05-27
2754
- // 09:23 with line 1 = "The current file already has `state: 'idle'`...").
2755
- // The prior loop only recognized prose openers like "Here", "Below",
2756
- // "The following" — anything else broke through. FILE marker is the
2757
- // canonical separator per the agent system prompt; trust it.
2758
- const linesForFile = cleaned.split('\n');
2759
- let fileMarkerIdx = -1;
2760
- for (let i = 0; i < Math.min(linesForFile.length, 20); i++) {
2761
- if (/^\s*FILE:\s+\S+/.test(linesForFile[i])) {
2762
- fileMarkerIdx = i;
2763
- break;
2764
- }
2765
- }
2766
- if (fileMarkerIdx >= 0) {
2767
- cleaned = linesForFile.slice(fileMarkerIdx + 1).join('\n');
2768
- }
2769
- // Legacy: also strip simple prose prefixes from the FIRST 5 lines for
2770
- // responses that don't use the FILE marker convention.
2771
- const lines = cleaned.split('\n');
2772
- let firstCodeLine = 0;
2773
- for (let i = 0; i < Math.min(lines.length, 5); i++) {
2774
- const line = lines[i].trim();
2775
- if ((line.startsWith('#') && !line.startsWith('#!')) || line.startsWith('Here') || line.startsWith('Below') ||
2776
- line.startsWith('The following') || line.startsWith('FILE:') || line === '') {
2777
- firstCodeLine = i + 1;
2778
- }
2779
- else {
2780
- break;
2781
- }
2782
- }
2783
- if (firstCodeLine > 0) {
2784
- cleaned = lines.slice(firstCodeLine).join('\n');
2785
- }
2786
- // Remove trailing fence if present at end
2787
- cleaned = cleaned.replace(/\n\s*```\s*$/, '');
2788
- return cleaned.trim();
2789
- }
2790
- // B.13: T0 NANO JSON repair via ClawRouter v2.0 — called when postProcessContent still yields invalid JSON
2791
- async fixJsonWithOllama(content, _filepath) {
2792
- try {
2793
- const repairPrompt = `Fix this malformed JSON so it is syntactically valid. Return ONLY the corrected JSON, no explanation or markdown fences:\n\n${content.substring(0, 3000)}`;
2794
- const result = await routeCall({
2795
- task_type: 'json_repair', tier_class: 'text', complexity: 'nano',
2796
- context_tokens: Math.ceil(repairPrompt.length / 4), constitutional_flag: false,
2797
- agent_id: 'json-repair',
2798
- payload: { prompt: repairPrompt, max_tokens: 2048 },
2799
- });
2800
- const fixed = result.content.trim();
2801
- try {
2802
- JSON.parse(fixed);
2803
- return fixed;
2804
- }
2805
- catch {
2806
- return content;
2807
- }
2808
- }
2809
- catch {
2810
- return content;
2811
- }
2812
- }
2813
- // File-type-aware post-processing: validates and cleans content per file extension.
2814
- // This is the FINAL safety net after all fence stripping has run.
2815
- postProcessContent(content, filepath) {
2816
- const filename = filepath.split('/').pop() || '';
2817
- const ext = filename.includes('.') ? filename.split('.').pop().toLowerCase() : '';
2818
- // .gitkeep must ALWAYS be completely empty — no exceptions
2819
- if (filename === '.gitkeep' || filepath.endsWith('.gitkeep')) {
2820
- return '';
2821
- }
2822
- // JSON files: ensure the content is valid JSON, strip any fence artifacts
2823
- if (ext === 'json') {
2824
- try {
2825
- JSON.parse(content);
2826
- return content; // already valid
2827
- }
2828
- catch { /* fall through to extraction */ }
2829
- // Try to extract a JSON object or array
2830
- const jsonMatch = content.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
2831
- if (jsonMatch) {
2832
- try {
2833
- JSON.parse(jsonMatch[1]);
2834
- return jsonMatch[1];
2835
- }
2836
- catch { /* fall through */ }
2837
- }
2838
- // Strip all fence lines and retry
2839
- const stripped = content.replace(/^\s*```[\w.+-]*\s*$/gm, '').trim();
2840
- try {
2841
- JSON.parse(stripped);
2842
- return stripped;
2843
- }
2844
- catch { /* fall through */ }
2845
- return stripped; // return best effort even if not valid JSON
2846
- }
2847
- // Code/script/markdown files: strip any remaining fence markers aggressively
2848
- if (['sh', 'bash', 'py', 'ts', 'js', 'tsx', 'jsx', 'mts', 'mjs', 'md', 'markdown'].includes(ext)) {
2849
- return content.replace(/^\s*```[\w.+-]*\s*$/gm, '').trim();
2850
- }
2851
- return content;
2852
- }
2853
- // TRUNCATION DETECTION: Check if generated code ends mid-function
2854
- // Returns true if the code appears to be truncated (open braces, incomplete statement, etc.)
2855
- detectTruncation(content) {
2856
- if (!content || content.length < 100)
2857
- return false;
2858
- const trimmed = content.trimEnd();
2859
- const lastLine = trimmed.split('\n').pop()?.trim() || '';
2860
- const last200 = trimmed.substring(Math.max(0, trimmed.length - 200));
2861
- // Signs of truncation:
2862
- // 1. Ends with a partial statement (no semicolon, no closing brace on last line)
2863
- const endsAbruptly = lastLine.length > 0 && !lastLine.match(/^[}\]);,]/);
2864
- // 2. Ends mid-string or mid-comment
2865
- const endsMidString = (trimmed.match(/`/g) || []).length % 2 !== 0;
2866
- // 3. Significantly more open braces than close braces (>3 imbalance)
2867
- const openBraces = (content.match(/\{/g) || []).length;
2868
- const closeBraces = (content.match(/\}/g) || []).length;
2869
- const braceImbalance = openBraces - closeBraces;
2870
- // 4. Last meaningful content is a function signature or opening block
2871
- const endsOnOpener = /(\{|=>|then\(|catch\(|=>\s*)$/.test(last200.trimEnd());
2872
- if (braceImbalance > 3) {
2873
- log(c.yellow, ` ! Truncation signal: brace imbalance ${openBraces} open vs ${closeBraces} close`);
2874
- return true;
2875
- }
2876
- if (endsMidString) {
2877
- log(c.yellow, ` ! Truncation signal: odd number of backticks (mid-template-string)`);
2878
- return true;
2879
- }
2880
- if (endsOnOpener && endsAbruptly) {
2881
- log(c.yellow, ` ! Truncation signal: ends on opener with no closing`);
2882
- return true;
2883
- }
2884
- return false;
2885
- }
2886
- commitChanges(task, files) {
2887
- if (files.length === 0)
2888
- return;
2889
- // Branch-isolation guard: the runner's `git commit` lands on whichever
2890
- // branch is currently checked out. If a developer has a feature branch
2891
- // checked out while the runner is alive (per the PR #18 incident
2892
- // 2026-05-21, where swarm commits polluted fix/clawrouter-*), DO NOT
2893
- // commit — the deliverables stay on disk, the orchestrator continues,
2894
- // and the user can rebase/cherry-pick onto main when they're done.
2895
- // Always-safe vs cleverness: skipping a commit is recoverable; polluting
2896
- // a feature branch is not. See feedback_branch_hygiene_during_sprint_runner.md.
2897
- let currentBranch = 'unknown';
2898
- try {
2899
- currentBranch = (0, child_process_1.execSync)('git rev-parse --abbrev-ref HEAD', { timeout: 5000 }).toString().trim();
2900
- }
2901
- catch { /* if git itself is broken, the commit below will fail; let it */ }
2902
- if (currentBranch !== 'main') {
2903
- log(c.yellow, ` ! Commit skipped — checkout is on '${currentBranch}', not 'main'. Files left on disk for ${task.id}; rebase to main manually if you want them committed. (branch-isolation guard)`);
2904
- return;
2905
- }
2906
- try {
2907
- const filesList = files.join(' ');
2908
- (0, child_process_1.execSync)(`git add ${filesList}`, { timeout: 10000 });
2909
- // No --no-verify: pre-commit hooks (secret scan, lint, etc) get to run.
2910
- // If this ever blocks legitimate commits, fix the hook — don't bypass.
2911
- (0, child_process_1.execSync)(`git commit -m "feat(${task.agent}): ${task.id} - ${task.type}"`, { timeout: 15000 });
2912
- log(c.green, ` ✓ Committed: ${files.length} files for ${task.id}`);
2913
- // TICKET-093 FIX: actually push. Prior version committed but never
2914
- // pushed — 25 commits stranded local overnight. Best-effort.
2915
- //
2916
- // TICKET-095 (REVERTED 2026-05-27): tried `git pull --rebase` on
2917
- // non-fast-forward rejection but that resets working-tree files to
2918
- // origin. The sprint JSON file (mid-flight pending → done status
2919
- // updates written by sprint-runner sync) is uncommitted, so the
2920
- // rebase nuked it back to the all-pending state committed at sprint
2921
- // start. Net effect: a single divergent push CAUSED the disk-state
2922
- // revert TICKET-094 was supposed to fix. Removing the rebase
2923
- // entirely — failed pushes stay local, founder reconciles manually.
2924
- // The cost (some lost autonomy on divergent remotes) is lower than
2925
- // the cost (silent destruction of sprint-state tracking).
2926
- try {
2927
- (0, child_process_1.execSync)('git push origin main', { timeout: 30000, stdio: 'pipe' });
2928
- log(c.gray, ` → pushed to origin/main`);
2929
- }
2930
- catch (pushErr) {
2931
- const msg = (pushErr?.stderr?.toString() || pushErr?.message || '').slice(0, 200).replace(/\s+/g, ' ');
2932
- log(c.yellow, ` ! Push failed: ${msg.slice(0, 150)} (commit local; founder reconciles)`);
2933
- }
2934
- }
2935
- catch (error) {
2936
- log(c.yellow, ` ! Commit skipped: ${error.message?.substring(0, 100)}`);
2937
- }
2938
- }
2939
- }
2940
- // ===== Orchestrator (Dynamic Agent Pipeline) =====
2941
- class Orchestrator {
2942
- ceo;
2943
- cto;
2944
- supervisor;
2945
- supervisor2;
2946
- agents = new Map();
2947
- tasks = [];
2948
- stats = { tasksExecuted: 0, approved: 0, rejected: 0, totalTokens: 0, conflicts: 0, escalations: 0 };
2949
- // Per-task structured run records — written to swarm run report at end of sprint
2950
- taskRuns = [];
2951
- /**
2952
- * Persist a single task's status back to the on-disk sprint file.
2953
- *
2954
- * Sprint-1547 fix: previously, task statuses were only written at end-of-run
2955
- * (line ~3055). Any exception, OOM, or SIGKILL between an approval and the
2956
- * end-of-run write dropped the approval — the sprint file still said
2957
- * 'pending', the sprint-runner cron repicked the same sprint, and the same
2958
- * task ran again. Sprint-1545 looped 30+ times overnight from this.
2959
- *
2960
- * Read-modify-write so concurrent edits to OTHER tasks (e.g. another
2961
- * Claude session) survive — we only overwrite this task's slot.
2962
- *
2963
- * Failure must NOT block the next task. Logged and swallowed.
2964
- */
2965
- persistTaskStatus(task) {
2966
- const sprintFile = process.argv[2] || 'sprints/current.json';
2967
- try {
2968
- if (!(0, fs_1.existsSync)(sprintFile))
2969
- return;
2970
- const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
2971
- const arr = Array.isArray(sprintRaw.tasks) ? sprintRaw.tasks : null;
2972
- if (!arr)
2973
- return;
2974
- const idx = arr.findIndex((t) => t && t.id === task.id);
2975
- if (idx < 0)
2976
- return;
2977
- arr[idx] = { ...arr[idx], status: task.status };
2978
- // sprint-1566 F0/F0c: also persist rejected_reason when set (replaced-by-split, over-budget)
2979
- if (task.rejected_reason)
2980
- arr[idx].rejected_reason = task.rejected_reason;
2981
- // CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
2982
- // persist attempt_count + score + grade + rejection_reason. Without
2983
- // these, the .swarm-state file showed only {status, attempt_count: 1}
2984
- // even after 3 attempts, and every CTO retrospective marked score=? +
2985
- // attempts=?. 10 consecutive zero-ship sprints diagnosed as "we can't
2986
- // root-cause because telemetry is dark." This wires the orchestrator's
2987
- // in-memory fields → ACTIVE → .swarm-state via sprint-runner's sync.
2988
- if (task.attempt_count != null)
2989
- arr[idx].attempt_count = task.attempt_count;
2990
- if (task.score != null)
2991
- arr[idx].score = task.score;
2992
- if (task.grade != null)
2993
- arr[idx].grade = task.grade;
2994
- if (task.rejection_reason != null)
2995
- arr[idx].rejection_reason = task.rejection_reason;
2996
- // TICKET-094 FIX: atomic write via temp+rename. Plain writeFileSync was
2997
- // vulnerable to concurrent-reader-sees-empty-file races AND interleaved-
2998
- // writer truncation. The disk-state revert that stranded sprint-1588 +
2999
- // sprint-1589 in all-pending despite "Synced N updates" was almost
3000
- // certainly the orchestrator + sprint-runner.ts sync racing on the
3001
- // same file at sprint end. Tmp+rename is atomic on POSIX — readers see
3002
- // either old or new, never half.
3003
- const tmp = `${sprintFile}.tmp.${process.pid}.${Date.now()}`;
3004
- (0, fs_1.writeFileSync)(tmp, JSON.stringify(sprintRaw, null, 2));
3005
- require('fs').renameSync(tmp, sprintFile);
3006
- }
3007
- catch (e) {
3008
- log(c.yellow, ` [persist] Failed to update ${task.id} status: ${(e?.message || '').substring(0, 100)}`);
3009
- }
3010
- }
3011
- /** sprint-1566 F0/F0d: inject decomposer-split sub-tasks into the active
3012
- * sprint file as new pending tasks. Sprint-runner picks them up on the next
3013
- * cron tick. The original task stays in the file with status='replaced-by-split'.
3014
- *
3015
- * Founder fix 2026-05-27: ALSO push to this.tasks (in-memory). The sprint-end
3016
- * writeFileSync at line 4088 dumps this.tasks back to ACTIVE, overwriting
3017
- * whatever we wrote to disk here. Without the in-memory push, injected
3018
- * sub-tasks were silently wiped at sprint-end and the persistence fix
3019
- * in sprint-runner.ts saw nothing to forward to the source sprint file.
3020
- * This is the root cause of the 0-ship pattern in sprint-1596/1597. */
3021
- injectSplitTasks(original, splits, rationale) {
3022
- const sprintFile = process.argv[2] || 'sprints/current.json';
3023
- if (!(0, fs_1.existsSync)(sprintFile))
3024
- return;
3025
- const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
3026
- if (!Array.isArray(sprintRaw.tasks))
3027
- return;
3028
- const existingIds = new Set(sprintRaw.tasks.map((t) => t?.id));
3029
- let injected = 0;
3030
- for (const s of splits) {
3031
- if (existingIds.has(s.id))
3032
- continue; // idempotent: already split before
3033
- const newTask = {
3034
- ...s,
3035
- attempt_count: 0,
3036
- parent_task_id: original.id,
3037
- split_rationale: rationale,
3038
- injected_at: new Date().toISOString(),
3039
- };
3040
- sprintRaw.tasks.push(newTask);
3041
- // Also push to in-memory — survives the sprint-end ACTIVE rewrite.
3042
- this.tasks.push(newTask);
3043
- injected++;
3044
- }
3045
- if (injected > 0)
3046
- (0, fs_1.writeFileSync)(sprintFile, JSON.stringify(sprintRaw, null, 2));
3047
- log(c.green, ` [inject] Added ${injected} split sub-tasks to ${sprintFile} (in-memory + on-disk)`);
3048
- }
3049
- constructor() {
3050
- log(c.bold, '\n╔══════════════════════════════════════════════════════════╗');
3051
- log(c.bold, '║ Kognai Swarm Orchestrator v2.17 — V17 Architecture ║');
3052
- log(c.bold, '║ Local-first · ClawRouter cloud · DeepSeek reviews ║');
3053
- log(c.bold, '╚══════════════════════════════════════════════════════════╝\n');
3054
- // Leadership layer (CEO = Claude via Anthropic; Sup1 = DeepSeek/Sonnet;
3055
- // Sup2 = Haiku with DeepSeek fallback. Both supervisors were originally
3056
- // Sonnet + Codex — see file-header history note.)
3057
- this.ceo = new CEOAgent();
3058
- this.supervisor = new SupervisorAgent();
3059
- this.supervisor2 = new Supervisor2Agent();
3060
- // Technology layer (MiniMax)
3061
- this.cto = new CTOAgent();
3062
- // Execution layer — dynamically load all coding agents from agents/ directory
3063
- const skipAgents = ['ceo', 'supervisor', 'skills', 'cto', 'cmo'];
3064
- const agentDirs = (0, fs_1.existsSync)('./agents') ? (0, fs_1.readdirSync)('./agents').filter(d => {
3065
- if (skipAgents.includes(d))
3066
- return false;
3067
- return (0, fs_1.existsSync)(`./agents/${d}/prompt.md`);
3068
- }) : [];
3069
- // Constitutional preamble — injected into every agent's system prompt
3070
- const constitutionalPreamble = loadConstitutionalPreamble();
3071
- if (constitutionalPreamble) {
3072
- log(c.green, ' ⚖️ Constitutional preamble loaded — will bind all agents');
3073
- }
3074
- for (const name of agentDirs) {
3075
- const promptPath = `./agents/${name}/prompt.md`;
3076
- const rawPrompt = (0, fs_1.readFileSync)(promptPath, 'utf-8');
3077
- const prompt = constitutionalPreamble + rawPrompt;
3078
- this.agents.set(name, new CodingAgent(name, prompt));
3079
- log(c.cyan, `+ Loaded ${name} agent (MiniMax M2.5)`);
3080
- }
3081
- // Agent count: CEO + Sup1 + Sup2 (all 3 Anthropic-bound, with provider routing happening per-call)
3082
- // + 1 CMO (qwen3:4b local) + 1 CTO (MiniMax) + N coders (MiniMax)
3083
- const totalAgents = 3 + 1 + 1 + this.agents.size;
3084
- log(c.green, `\n✓ ${totalAgents} agents loaded (3 Anthropic-bound leadership + 1 CMO qwen3:4b + ${1 + this.agents.size} MiniMax)\n`);
3085
- }
3086
- loadTasks() {
3087
- const sprintFile = process.argv[2] || 'sprints/current.json';
3088
- if (!(0, fs_1.existsSync)(sprintFile)) {
3089
- log(c.red, `Sprint file not found: ${sprintFile}`);
3090
- process.exit(1);
3091
- }
3092
- const sprint = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
3093
- this.tasks = sprint.tasks || [];
3094
- const _sprintId = sprintFile.replace(/.*\//, '').replace('.json', '');
3095
- // Normalize deliverables: CEO planner may emit flat string[] instead of {code,tests,docs}
3096
- for (const task of this.tasks) {
3097
- const d = task.deliverables;
3098
- if (!d) {
3099
- // Sprint JSON may omit deliverables — default from task_target
3100
- const target = task.task_target;
3101
- task.deliverables = { code: target ? [target] : [], tests: [], docs: [] };
3102
- }
3103
- else if (Array.isArray(d)) {
3104
- task.deliverables = {
3105
- code: d.filter((f) => f.indexOf("test") === -1 && f.indexOf("spec") === -1 && f.slice(-3) !== ".md"),
3106
- tests: d.filter((f) => f.indexOf("test") !== -1 || f.indexOf("spec") !== -1),
3107
- docs: d.filter((f) => f.slice(-3) === ".md"),
3108
- };
3109
- }
3110
- // Normalize description → context: sprint JSON files may use either field name
3111
- if (!task.context && task.description) {
3112
- task.context = task.description;
3113
- }
3114
- // Ensure context is always a string (never undefined)
3115
- if (!task.context)
3116
- task.context = `${task.id}: ${task.title || task.type}`;
3117
- // Normalize priority: sprint JSON may omit it
3118
- if (!task.priority)
3119
- task.priority = 'medium';
3120
- // Fix: task_target used as file path (e.g., 'scripts/lib/foo.ts') must be cleared
3121
- // so it doesn't confuse the routing switch which expects: local|cloud-code|cloud-exec|cloud-post
3122
- const VALID_ROUTING_TARGETS = ['local', 'cloud-code', 'cloud-exec', 'cloud-post'];
3123
- if (task.task_target && !VALID_ROUTING_TARGETS.includes(task.task_target)) {
3124
- delete task.task_target; // file path already captured in deliverables.code
3125
- }
3126
- // Stamp sprint_id — avoids 'unknown' in logs/routing/YYYY-MM-DD.jsonl
3127
- if (!task.sprint_id)
3128
- task.sprint_id = _sprintId;
3129
- }
3130
- // Reset stale in_progress tasks back to pending
3131
- for (const task of this.tasks) {
3132
- if (task.status === 'in_progress' || task.status === 'review') {
3133
- log(c.yellow, ` Resetting stale task ${task.id} (${task.status} -> pending)`);
3134
- task.status = 'pending';
3135
- }
3136
- }
3137
- log(c.blue, `Loaded ${this.tasks.length} tasks from ${sprintFile}`);
3138
- }
3139
- // ===== Truncation Detection =====
3140
- isTruncationRejection(review) {
3141
- const truncationKeywords = [
3142
- 'truncat', 'incomplete', 'cut off', 'cuts off', 'ends abruptly',
3143
- 'missing implementation', 'missing the actual', 'file is incomplete',
3144
- 'cuts off mid', 'missing core functionality', 'missing the entire',
3145
- ];
3146
- const text = (review.summary + ' ' +
3147
- (review.issues || []).map(i => i.description).join(' ')).toLowerCase();
3148
- return truncationKeywords.some(kw => text.includes(kw));
3149
- }
3150
- // ===== CTO Task Decomposition (for truncation-prone tasks) =====
3151
- async ctoDecomposeTask(task) {
3152
- log(c.cyan, `\n[cto-decompose] 🔧 CTO splitting ${task.id} into smaller sub-tasks...`);
3153
- const allDeliverables = [
3154
- ...(task.deliverables.code || []),
3155
- ...(task.deliverables.tests || []),
3156
- ];
3157
- const userPrompt = `A task keeps failing because MiniMax M2.5 truncates output when generating multiple files.
3158
-
3159
- ## Failed Task
3160
- - ID: ${task.id}
3161
- - Agent: ${task.agent}
3162
- - Context: ${task.context.substring(0, 1500)}
3163
- - Deliverable files: ${allDeliverables.join(', ')}
3164
-
3165
- ## Problem
3166
- MiniMax M2.5 has a ~4500 token output limit per call. When a task has ${allDeliverables.length} files, each file gets less space and code gets truncated.
3167
-
3168
- ## Your Job
3169
- Split this task into smaller sub-tasks. Each sub-task must have at most 1 code file + 1 test file (2 files max).
3170
-
3171
- ## Rules
3172
- 1. Types/interfaces files FIRST (other files depend on them)
3173
- 2. Barrel/index export files LAST (they import from everything else)
3174
- 3. Each sub-task must be self-contained (agent can generate it without seeing other sub-task results)
3175
- 4. Include enough context in each sub-task for the agent to know what to generate
3176
- 5. Maximum 5 sub-tasks
3177
-
3178
- ## Output Format
3179
- Return a JSON array of sub-task specs:
3180
- [
3181
- {
3182
- "sub_id": "${task.id}-A",
3183
- "context": "Full task context for this sub-task including what types/interfaces to define",
3184
- "code": ["path/to/file.ts"],
3185
- "tests": ["path/to/file.test.ts"]
3186
- }
3187
- ]
3188
-
3189
- ONLY output the JSON array. No markdown, no explanation.`;
3190
- try {
3191
- const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.cto['systemPrompt'] || '', userPrompt, 120000, 'cto', 'fallback_task_decomposer');
3192
- let content = response.choices?.[0]?.message?.content || '';
3193
- content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
3194
- const jsonMatch = content.match(/\[[\s\S]*\]/);
3195
- if (!jsonMatch) {
3196
- log(c.yellow, ' CTO decomposition returned no JSON, falling back to mechanical split');
3197
- return this.fallbackDecompose(task);
3198
- }
3199
- const specs = JSON.parse(jsonMatch[0]);
3200
- if (!Array.isArray(specs) || specs.length < 2) {
3201
- log(c.yellow, ' CTO returned <2 sub-tasks, falling back to mechanical split');
3202
- return this.fallbackDecompose(task);
3203
- }
3204
- // Convert specs to AgentTask objects
3205
- const subtasks = specs.slice(0, 5).map((spec, i) => ({
3206
- id: spec.sub_id || `${task.id}-${String.fromCharCode(65 + i)}`,
3207
- agent: task.agent,
3208
- type: task.type,
3209
- priority: task.priority,
3210
- dependencies: i > 0 ? [specs[i - 1].sub_id || `${task.id}-${String.fromCharCode(64 + i)}`] : [],
3211
- context: spec.context,
3212
- deliverables: {
3213
- code: spec.code || [],
3214
- tests: spec.tests || [],
3215
- },
3216
- status: 'pending',
3217
- }));
3218
- log(c.green, ` ✓ CTO decomposed ${task.id} into ${subtasks.length} sub-tasks:`);
3219
- for (const st of subtasks) {
3220
- const files = [...(st.deliverables.code || []), ...(st.deliverables.tests || [])];
3221
- log(c.cyan, ` ${st.id}: ${files.join(', ')}`);
3222
- }
3223
- return subtasks;
3224
- }
3225
- catch (error) {
3226
- log(c.yellow, ` CTO decomposition failed: ${error.message}, using fallback`);
3227
- return this.fallbackDecompose(task);
3228
- }
3229
- }
3230
- // ===== Fallback: Mechanical file-based split =====
3231
- fallbackDecompose(task) {
3232
- const codeFiles = task.deliverables.code || [];
3233
- const testFiles = task.deliverables.tests || [];
3234
- log(c.yellow, ` [fallback] Mechanically splitting ${task.id} by file...`);
3235
- const subtasks = [];
3236
- for (let i = 0; i < codeFiles.length; i++) {
3237
- const code = codeFiles[i];
3238
- // Find matching test file
3239
- const baseName = code.replace(/\.ts$/, '').split('/').pop() || '';
3240
- const matchingTest = testFiles.find(t => t.includes(baseName) && (t.includes('.test.') || t.includes('.spec.')));
3241
- subtasks.push({
3242
- id: `${task.id}-${String.fromCharCode(65 + i)}`,
3243
- agent: task.agent,
3244
- type: task.type,
3245
- priority: task.priority,
3246
- dependencies: i > 0 ? [`${task.id}-${String.fromCharCode(64 + i)}`] : [],
3247
- context: `${task.context}\n\n## SUB-TASK: Generate ONLY the file "${code}"${matchingTest ? ` and its test "${matchingTest}"` : ''}.\nThis is part of a larger task that was split to avoid truncation. Focus on this file only. Make it complete and self-contained.`,
3248
- deliverables: {
3249
- code: [code],
3250
- tests: matchingTest ? [matchingTest] : [],
3251
- },
3252
- status: 'pending',
3253
- });
3254
- }
3255
- // Handle orphan test files (tests without matching code file)
3256
- const usedTests = subtasks.flatMap(st => st.deliverables.tests || []);
3257
- const orphanTests = testFiles.filter(t => !usedTests.includes(t));
3258
- if (orphanTests.length > 0) {
3259
- subtasks.push({
3260
- id: `${task.id}-${String.fromCharCode(65 + codeFiles.length)}`,
3261
- agent: task.agent,
3262
- type: task.type,
3263
- priority: task.priority,
3264
- dependencies: subtasks.length > 0 ? [subtasks[subtasks.length - 1].id] : [],
3265
- context: `${task.context}\n\n## SUB-TASK: Generate ONLY the test file(s): ${orphanTests.join(', ')}.\nAll source code files have already been generated. Write tests that import from the existing source files.`,
3266
- deliverables: {
3267
- code: [],
3268
- tests: orphanTests,
3269
- },
3270
- status: 'pending',
3271
- });
3272
- }
3273
- log(c.green, ` ✓ Fallback split ${task.id} into ${subtasks.length} sub-tasks`);
3274
- return subtasks;
3275
- }
3276
- // ===== Sub-task executor (limited retries, no recursive decomposition) =====
3277
- async executeSubTask(subtask, maxRetries) {
3278
- // Sprint 1309: default to 'coder' when subtask.agent is not set
3279
- const subAgentName = subtask.agent || 'coder';
3280
- const agent = this.agents.get(subAgentName);
3281
- if (!agent) {
3282
- log(c.red, ` Agent not found for sub-task: ${subAgentName}`);
3283
- return false;
3284
- }
3285
- let lastReview;
3286
- for (let attempt = 1; attempt <= maxRetries; attempt++) {
3287
- log(c.blue, `\n [sub-task] ${subtask.id} | Attempt: ${attempt}/${maxRetries}`);
3288
- this.stats.tasksExecuted++;
3289
- // AMD-08: depth=1 (sub-agent chain)
3290
- if (!monotask_state_machine_1.MonotaskSM.claim(subtask.agent, subtask.id, 1)) {
3291
- log(c.yellow, ` [monotask] ${subtask.agent} unavailable — skipping sub-task attempt ${attempt}`);
3292
- continue;
3293
- }
3294
- monotask_state_machine_1.MonotaskSM.start(subtask.agent, subtask.id);
3295
- const subStart = Date.now();
3296
- const result = await agent.execute(subtask, lastReview);
3297
- if (result.files.length === 0) {
3298
- const dels = [...(subtask.deliverables?.code || []), ...(subtask.deliverables?.tests || []), ...(subtask.deliverables?.docs || [])];
3299
- const reasons = subtask._failureReasons || [];
3300
- const inferred = reasons.length ? reasons.join('; ') : (dels.length === 0 ? 'empty-deliverables' : 'unknown');
3301
- log(c.red, ` ✗ No files produced for sub-task ${subtask.id} [model=${result.model || 'n/a'}, deliverables=${dels.length}, reason=${inferred}]`);
3302
- monotask_state_machine_1.MonotaskSM.release(subtask.agent, subtask.id, `no files: ${inferred.slice(0, 60)}`);
3303
- return false;
3304
- }
3305
- // Dual supervisor review
3306
- const [review1, review2] = await Promise.all([
3307
- this.supervisor.reviewTask(subtask, result.files),
3308
- this.supervisor2.reviewTask(subtask, result.files),
3309
- ]);
3310
- const dualResult = await reconcileSupervisorReviews(review1, review2, subtask, this.ceo);
3311
- const review = dualResult.finalReview;
3312
- if (!dualResult.consensus)
3313
- this.stats.conflicts++;
3314
- if (dualResult.escalatedToCEO)
3315
- this.stats.escalations++;
3316
- lastReview = review;
3317
- // TICKET-214: instrument SUB-TASK attempts. The MAIN task loop already taps KSL +
3318
- // records reputation, but the split sub-task loop did neither — so split sprints
3319
- // went unrecorded in KSL and their rejections were never filed. Tap + score every
3320
- // attempt here too. Best-effort, non-fatal.
3321
- recordAgentScore(subtask.agent, review.score);
3322
- try {
3323
- const _sid = resolveActiveSprintId();
3324
- (0, orchestrator_tap_1.tapAttempt)({
3325
- sprint_id: _sid, task_id: subtask.id, attempt, agent: subtask.agent,
3326
- model: result.model || 'unknown',
3327
- prompt: String(subtask.context || subtask.title || subtask.id),
3328
- reply: (result.files || []).map((f) => f.content || '').join('\n').slice(0, 20000),
3329
- duration_ms: Date.now() - subStart,
3330
- });
3331
- }
3332
- catch { /* non-fatal */ }
3333
- if (review.verdict === 'APPROVED') {
3334
- this.stats.approved++;
3335
- monotask_state_machine_1.MonotaskSM.complete(subtask.agent, subtask.id);
3336
- log(c.green, ` ✓ Sub-task ${subtask.id} APPROVED on attempt ${attempt} (${review.score}/100)`);
3337
- return true;
3338
- }
3339
- this.stats.rejected++;
3340
- log(c.yellow, ` ↻ Sub-task ${subtask.id} REJECTED on attempt ${attempt} (${review.score}/100)`);
3341
- try {
3342
- (0, code_failure_logger_1.logCodeFailure)({ taskId: subtask.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(subtask.agent), attemptNum: attempt, score: review.score || 0, model: result.model || 'unknown', rejectionReason: review.summary || 'sub-task rejected', issues: review.issues || [], failType: 'supervisor_rejected' });
3343
- }
3344
- catch { /* non-fatal */ }
3345
- safeResetLastCommit(subtask.id, subtask.agent, subtask.type, ' ');
3346
- monotask_state_machine_1.MonotaskSM.release(subtask.agent, subtask.id, `rejected attempt ${attempt}`);
3347
- }
3348
- log(c.red, ` ✗ Sub-task ${subtask.id} FAILED after ${maxRetries} attempts`);
3349
- return false;
3350
- }
3351
- // ===== Main task executor with CTO auto-decomposition =====
3352
- async executeTask(task) {
3353
- // Sprint 1309: default to 'coder' when task.agent is not set (queue-prescribed sprints omit agent field)
3354
- const agentName = task.agent || 'coder';
3355
- const agent = this.agents.get(agentName);
3356
- if (!agent) {
3357
- log(c.red, `Agent not found: ${agentName}`);
3358
- task.status = 'rejected';
3359
- this.persistTaskStatus(task);
3360
- // Record failure in taskRuns
3361
- this.taskRuns.push({
3362
- task_id: task.id, title: task.title || task.id, type: task.type,
3363
- task_target: task.task_target || 'cloud-code',
3364
- status: 'rejected', attempts: 0, model_used: '', provider: '',
3365
- tokens_total: 0, duration_seconds: 0, files_written: [],
3366
- review: null, error: `Agent not found: ${agentName}`, rejection_reason: 'Agent not found',
3367
- });
3368
- return;
3369
- }
3370
- // Sprint 706: BrainX — inject memories before task execution
3371
- try {
3372
- if (this._brainxBridge) {
3373
- const injection = await this._brainxBridge.injectMemories(task.agent);
3374
- if (injection.memory_count > 0)
3375
- log(c.gray, ` [BrainX] Injected ${injection.memory_count} memories for ${task.agent}`);
3376
- }
3377
- }
3378
- catch { /* BrainX injection is non-blocking */ }
3379
- const taskRunStart = Date.now();
3380
- const taskRun = {
3381
- task_id: task.id,
3382
- title: task.title || task.id,
3383
- type: task.type,
3384
- task_target: task.task_target || 'cloud-code',
3385
- status: 'pending',
3386
- attempts: 0,
3387
- model_used: '',
3388
- provider: '',
3389
- tokens_total: 0,
3390
- duration_seconds: 0,
3391
- files_written: [],
3392
- review: null,
3393
- error: null,
3394
- rejection_reason: null,
3395
- };
3396
- // sprint-1566 F0b: aligned with sprint-runner's PER_TASK_LIFETIME_MAX_ATTEMPTS=5.
3397
- // Was 10 (single-line const hidden under a misleading comment claiming it had
3398
- // already been lowered — it hadn't). At 10 retries × ~14K tokens/attempt
3399
- // (MiniMax + dual review) a single stuck task could burn ~140K tokens before
3400
- // the lifetime gate had a chance to look at it on the next run.
3401
- const MAX_RETRIES = parseInt(process.env.MAX_RETRIES_PER_RUN || '3', 10);
3402
- const TRUNCATION_THRESHOLD = 1;
3403
- // sprint-1566 F0c: per-task token budget. Caps cumulative tokens spent on
3404
- // one task across its retries so a single task can't eat the daily wallet.
3405
- // Raised 2026-05-27 from 25K → 50K: sprint-1590 lost 3 of 6 tasks because
3406
- // first-attempt token spend (30-62K) routinely exceeded the 25K cap,
3407
- // killing retries before supervisor rejection feedback could be applied.
3408
- // 50K = ~$0.10 worst-case at DeepSeek pricing for first attempt + 1 retry.
3409
- // TICKET-209 (2026-05-29): bumped 100k → 200k. With EDIT-MODE now the
3410
- // default for modify tasks, 200k is more headroom than legitimate work
3411
- // needs — but covers the long-tail of large create tasks (multi-section
3412
- // spec docs, full-module rewrites) without forcing escalation.
3413
- const PER_TASK_TOKEN_BUDGET = parseInt(process.env.PER_TASK_TOKEN_BUDGET || '200000', 10);
3414
- let taskTokensSpent = 0;
3415
- let truncationCount = 0;
3416
- let lastReview;
3417
- // OMEL AMD-13: Create isolated tmpdir for this task (cleaned up in finally)
3418
- const phantomCtx = phantom_workspace_1.phantomWorkspace.create(task.id);
3419
- try {
3420
- // CTO-20260528-002 (2026-05-27): one-file-per-call enforcement.
3421
- // The store_page incident in v12r+1 was a multi-file task that nobody
3422
- // pre-screened. Atomic tasks are easier to review, retry, and roll back.
3423
- // If a task targets >1 source file, force a per-file split up front —
3424
- // routeToDecomposer's Strategy A handles this deterministically.
3425
- {
3426
- const codeFiles = task.deliverables?.code?.filter((f) => !/__tests__|\.test\./.test(f)) || [];
3427
- const editFiles = task.deliverables?.edits || [];
3428
- const sourceFileCount = codeFiles.length + editFiles.length;
3429
- if (sourceFileCount > 1 && task.agent === 'coder') {
3430
- log(c.yellow, ` [Atomicity] PRE-FLIGHT REJECT: ${task.id} targets ${sourceFileCount} source files (one-file-per-call policy)`);
3431
- const route = (0, decomposer_feedback_1.routeToDecomposer)({
3432
- original_task_id: task.id,
3433
- rejection_signal: 'needs_resplit',
3434
- suggested_splits: [...codeFiles, ...editFiles],
3435
- learnings_ref: 'docs/learnings.md §1',
3436
- original_task: task,
3437
- });
3438
- if ('task_split' in route) {
3439
- log(c.cyan, ` [Atomicity] split into ${route.task_split.length} per-file sub-tasks`);
3440
- try {
3441
- this.injectSplitTasks(task, route.task_split, route.rationale);
3442
- }
3443
- catch (e) {
3444
- log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
3445
- }
3446
- task.status = 'replaced-by-split';
3447
- task.rejected_reason = `Replaced by ${route.task_split.length} per-file sub-tasks (one-file-per-call policy)`;
3448
- this.persistTaskStatus(task);
3449
- taskRun.status = 'replaced-by-split';
3450
- taskRun.rejection_reason = 'Atomicity pre-flight: multi-file task';
3451
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3452
- this.taskRuns.push(taskRun);
3453
- phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
3454
- return;
3455
- }
3456
- // Unsplittable multi-file (e.g. zero deliverables resolved) → fall through
3457
- // to the token validator so we don't hard-block tasks the decomposer can't
3458
- // disambiguate. Logged for founder review.
3459
- log(c.gray, ` [Atomicity] decomposer could not split — proceeding (founder review): ${route.reason}`);
3460
- }
3461
- }
3462
- // sprint-1566 F0: pre-flight token-budget validator. If the task's output
3463
- // is predicted to exceed the MiniMax truncation point (~4500 tokens),
3464
- // route to decomposer-feedback for a structural re-split BEFORE any LLM
3465
- // is dispatched. Stops the truncation cascade at the source.
3466
- const validation = (0, token_budget_validator_1.validateTask)(task);
3467
- if (!validation.ok) {
3468
- // Discriminated-union narrow via Extract — boolean discriminator alone
3469
- // isn't reliably narrowing under our tsconfig + alias-imported types.
3470
- const rej = validation;
3471
- log(c.yellow, ` [TokenBudget] PRE-FLIGHT REJECT: ${task.id} — est ${rej.estimated_tokens} tokens > threshold`);
3472
- log(c.gray, ` Reason: ${rej.reason}`);
3473
- log(c.gray, ` Suggested split: ${rej.suggested_split.join(', ')}`);
3474
- const route = (0, decomposer_feedback_1.routeToDecomposer)({
3475
- original_task_id: task.id,
3476
- rejection_signal: 'needs_resplit',
3477
- original_estimate_tokens: rej.estimated_tokens,
3478
- suggested_splits: rej.suggested_split,
3479
- learnings_ref: 'docs/learnings.md §1',
3480
- original_task: task,
3481
- });
3482
- if ('task_split' in route) {
3483
- log(c.cyan, ` [DecomposerFeedback] split into ${route.task_split.length} sub-tasks via ${route.strategy}`);
3484
- log(c.gray, ` ${route.rationale}`);
3485
- // Inject splits as new pending tasks so they get picked up next run.
3486
- // Persist to the active sprint file so sprint-runner sees them.
3487
- try {
3488
- this.injectSplitTasks(task, route.task_split, route.rationale);
3489
- }
3490
- catch (e) {
3491
- log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
3492
- }
3493
- task.status = 'replaced-by-split';
3494
- task.rejected_reason = `Replaced by ${route.task_split.length} per-${route.strategy === 'per_file' ? 'file' : 'part'} sub-tasks (pre-flight budget gate)`;
3495
- this.persistTaskStatus(task);
3496
- taskRun.status = 'replaced-by-split';
3497
- taskRun.rejection_reason = `Token-budget pre-flight: ${rej.reason}`;
3498
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3499
- this.taskRuns.push(taskRun);
3500
- phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
3501
- return;
3502
- }
3503
- else {
3504
- log(c.red, ` [DecomposerFeedback] cannot split → escalate to founder: ${route.reason}`);
3505
- task.status = 'rejected';
3506
- task.rejected_reason = route.reason;
3507
- this.persistTaskStatus(task);
3508
- taskRun.status = 'rejected';
3509
- taskRun.error = route.reason;
3510
- taskRun.rejection_reason = 'Token-budget pre-flight + unsplittable';
3511
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3512
- this.taskRuns.push(taskRun);
3513
- phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
3514
- return;
3515
- }
3516
- }
3517
- for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
3518
- // CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
3519
- // record attempt_count on the task so it propagates via persistTaskStatus
3520
- // → ACTIVE → .swarm-state → CTO's retrospective. Without this, every
3521
- // post-mortem said attempt_count=? after 3 attempts. 10 zero-ship
3522
- // sprints diagnosed by the CTO as "we can't root-cause because
3523
- // telemetry is dark."
3524
- task.attempt_count = attempt;
3525
- // Reset per-attempt flags so the escalation pact only fires when THIS
3526
- // attempt's execution actually tripped integrity/truncation, not stale
3527
- // state from a prior attempt.
3528
- delete task._integrityFailed;
3529
- // sprint-1566 F0c: per-task token budget check at start of each attempt
3530
- if (taskTokensSpent >= PER_TASK_TOKEN_BUDGET) {
3531
- log(c.red, ` [TokenBudget] PER-TASK BUDGET EXCEEDED: ${task.id} spent ${taskTokensSpent} > ${PER_TASK_TOKEN_BUDGET} tokens — aborting retries`);
3532
- task.status = 'rejected';
3533
- task.rejected_reason = `Per-task budget exceeded: ${taskTokensSpent} > ${PER_TASK_TOKEN_BUDGET} tokens after ${attempt - 1} attempts`;
3534
- taskRun.status = 'rejected';
3535
- taskRun.error = `Per-task token budget exceeded (${taskTokensSpent}/${PER_TASK_TOKEN_BUDGET})`;
3536
- taskRun.rejection_reason = 'Per-task budget exceeded';
3537
- // Route to decomposer for over-budget too — same as truncation
3538
- try {
3539
- const route = (0, decomposer_feedback_1.routeToDecomposer)({
3540
- original_task_id: task.id,
3541
- rejection_signal: 'over_budget',
3542
- original_estimate_tokens: taskTokensSpent,
3543
- suggested_splits: [],
3544
- learnings_ref: 'docs/learnings.md §1',
3545
- original_task: task,
3546
- });
3547
- if ('task_split' in route) {
3548
- this.injectSplitTasks(task, route.task_split, route.rationale);
3549
- log(c.cyan, ` [DecomposerFeedback] over-budget → ${route.task_split.length} sub-tasks injected for next run`);
3550
- }
3551
- else {
3552
- log(c.gray, ` [DecomposerFeedback] over-budget unsplittable: ${route.reason}`);
3553
- }
3554
- }
3555
- catch (e) {
3556
- log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
3557
- }
3558
- break;
3559
- }
3560
- log(c.blue, `\n${'='.repeat(60)}`);
3561
- log(c.blue, `Task: ${task.id} | Agent: ${task.agent} | Attempt: ${attempt}/${MAX_RETRIES}`);
3562
- log(c.blue, `${'='.repeat(60)}`);
3563
- // AMD-26 KSL: snapshot per-attempt context for the tap.
3564
- let kslSprintId = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3565
- // sprint-runner passes logs/sprint-runner-active.json — read the real sprint_id from its contents.
3566
- if (kslSprintId === 'sprint-runner-active') {
3567
- try {
3568
- kslSprintId = JSON.parse((0, fs_1.readFileSync)(process.argv[2], 'utf-8')).sprint_id || kslSprintId;
3569
- }
3570
- catch { /* fall back to path-derived */ }
3571
- }
3572
- const kslAttemptStart = Date.now();
3573
- const kslPrompt = String(task.context || task.title || task.id);
3574
- task.status = 'in_progress';
3575
- if (attempt === 1) {
3576
- const _sprintId = kslSprintId;
3577
- (0, event_bus_publisher_1.publishTaskStarted)(task.agent, _sprintId, task.id, task.title || task.id).catch(() => { });
3578
- }
3579
- this.stats.tasksExecuted++;
3580
- taskRun.attempts = attempt;
3581
- // AMD-08: IDLE → RESERVED → ACTIVE (per attempt)
3582
- if (!monotask_state_machine_1.MonotaskSM.claim(task.agent, task.id)) {
3583
- log(c.yellow, ` [monotask] ${task.agent} unavailable — skipping attempt ${attempt}`);
3584
- continue;
3585
- }
3586
- monotask_state_machine_1.MonotaskSM.start(task.agent, task.id);
3587
- // OMEL AMD-13: WipeWitness — capture file state before agent writes
3588
- const preTokens = new Map();
3589
- for (const f of (task.deliverables?.code || [])) {
3590
- if ((0, fs_1.existsSync)(f))
3591
- preTokens.set(f, wipe_witness_1.wipeWitness.beforeWrite(f, task.agent));
3592
- }
3593
- // OMEL AMD-13: HumanBrake — require approval for bulk_overwrite on high-risk files
3594
- if (task.type === 'modify' && (task.deliverables?.code || []).length > 0) {
3595
- const firstFile = (task.deliverables?.code || [])[0] || '';
3596
- if (human_brake_1.humanBrake.isHighRisk('bulk_overwrite', { filePath: firstFile })) {
3597
- const approval = await human_brake_1.humanBrake.requireApproval('bulk_overwrite');
3598
- if (!approval.approved) {
3599
- log(c.yellow, ` [HumanBrake] SKIPPED: ${task.id} — ${approval.reason || 'not approved'}`);
3600
- task.status = 'skipped';
3601
- monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `human brake: ${approval.reason || 'not approved'}`);
3602
- break; // exit attempt loop — task will not execute
3603
- }
3604
- }
3605
- }
3606
- // Execute with rejection feedback if retrying
3607
- const tokensBefore = _globalTokensThisRun;
3608
- let result;
3609
- // Chomsky gate — evaluate + rewrite (max 2×) + log all evals (Sprint 1513)
3610
- if (task.context) {
3611
- try {
3612
- const chomskyRun = await (0, chomsky_runner_1.runChomskyGate)(task.context, agentName);
3613
- if (chomskyRun.rewrites > 0) {
3614
- log(c.cyan, ` [Chomsky] ${chomskyRun.rewrites}× rewrite — score ${chomskyRun.initialScore}→${chomskyRun.finalScore}/10`);
3615
- task.context = chomskyRun.finalPrompt;
3616
- }
3617
- else if (!chomskyRun.passed) {
3618
- log(c.yellow, ` [Chomsky] score ${chomskyRun.finalScore}/10 — pass-through (max rewrites reached)`);
3619
- }
3620
- }
3621
- catch { /* gate is non-blocking — fail open */ }
3622
- }
3623
- try {
3624
- result = await agent.execute(task, lastReview);
3625
- }
3626
- catch (execErr) {
3627
- log(c.red, ` ✗ Execution error: ${execErr.message?.substring(0, 200)}`);
3628
- monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `exec error: ${execErr.message?.substring(0, 80)}`);
3629
- if (attempt < MAX_RETRIES) {
3630
- log(c.yellow, ` Retrying after execution error (attempt ${attempt}/${MAX_RETRIES})...`);
3631
- continue;
3632
- }
3633
- throw execErr; // exhausted retries
3634
- }
3635
- // OMEL AMD-13: WipeWitness — compare after write, emit shrink alert if > 50% loss
3636
- for (const f of result.files) {
3637
- const tok = preTokens.get(f);
3638
- if (tok)
3639
- wipe_witness_1.wipeWitness.afterWrite(tok, (0, fs_1.existsSync)(f) ? (0, fs_1.statSync)(f).size : 0);
3640
- }
3641
- const deltaTokens = _globalTokensThisRun - tokensBefore;
3642
- taskRun.tokens_total += deltaTokens;
3643
- taskTokensSpent += deltaTokens; // sprint-1566 F0c: per-task budget tracking
3644
- taskRun.model_used = result.model || taskRun.model_used;
3645
- if (result.files.length === 0) {
3646
- // 2026-05-27 diagnostic patch: structured "no files produced" rejection.
3647
- // Captures (a) declared deliverable count, (b) model used, (c) failure
3648
- // reasons collected during execute() (e.g. edit-mode-empty:foo.ts:380lines,
3649
- // truncated, empty-edit-array). Replaces an opaque single-line log that
3650
- // gave the founder no idea why the swarm was no-oping.
3651
- const dels = [...(task.deliverables?.code || []), ...(task.deliverables?.tests || []), ...(task.deliverables?.docs || [])];
3652
- const reasons = task._failureReasons || [];
3653
- const inferred = reasons.length ? reasons.join('; ') : (dels.length === 0 ? 'empty-deliverables' : 'unknown');
3654
- const structured = `No files produced [model=${result.model || 'n/a'}, type=${task.type}, deliverables=${dels.length}, reason=${inferred}]`;
3655
- log(c.red, ` ✗ ${structured}`);
3656
- monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `no files: ${inferred.slice(0, 60)}`);
3657
- task.status = 'rejected';
3658
- this.persistTaskStatus(task);
3659
- taskRun.status = 'rejected';
3660
- taskRun.error = structured;
3661
- taskRun.rejection_reason = structured;
3662
- taskRun.failure_mode = inferred;
3663
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3664
- this.taskRuns.push(taskRun);
3665
- return;
3666
- }
3667
- // B.10: Local QA gate — fast PASS/FAIL before expensive cloud supervisor
3668
- const qaFileContents = result.files.map(f => ({ path: f, content: (0, fs_1.existsSync)(f) ? (0, fs_1.readFileSync)(f, 'utf-8') : '' }));
3669
- const qaResult = await localQAGate(task, qaFileContents);
3670
- if (!qaResult.pass) {
3671
- log(c.yellow, ` [QA-gate] FAIL — ${qaResult.reason}`);
3672
- this.stats.rejected++;
3673
- task.status = 'rejected'; // will be reset on retry
3674
- safeResetLastCommit(task.id, task.agent, task.type, ' ');
3675
- (0, code_failure_logger_1.logCodeFailure)({ taskId: task.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(task.agent), attemptNum: attempt, score: 0, model: taskRun.model_used || result?.model || task.model || 'unknown', rejectionReason: qaResult.reason, issues: [], failType: 'qa_gate' });
3676
- monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `QA gate: ${qaResult.reason}`);
3677
- if (attempt < MAX_RETRIES) {
3678
- log(c.yellow, ' QA gate failed — retrying without supervisor...');
3679
- continue;
3680
- }
3681
- taskRun.status = 'rejected';
3682
- taskRun.rejection_reason = `QA gate: ${qaResult.reason}`;
3683
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3684
- this.taskRuns.push(taskRun);
3685
- return;
3686
- }
3687
- log(c.gray, ` [QA-gate] PASS — ${qaResult.reason}`);
3688
- // Dual Supervisor review (DeepSeek/ClawRouter + Haiku in parallel)
3689
- task.status = 'review';
3690
- const [review1, review2] = await Promise.all([
3691
- this.supervisor.reviewTask(task, result.files),
3692
- this.supervisor2.reviewTask(task, result.files),
3693
- ]);
3694
- const dualResult = await reconcileSupervisorReviews(review1, review2, task, this.ceo);
3695
- const review = dualResult.finalReview;
3696
- if (!dualResult.consensus)
3697
- this.stats.conflicts++;
3698
- if (dualResult.escalatedToCEO)
3699
- this.stats.escalations++;
3700
- lastReview = review;
3701
- task.output = { files: result.files, commit: '', model: result.model, review };
3702
- // CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
3703
- // record score + grade + rejection_reason on the task so they propagate
3704
- // via persistTaskStatus. Without this, every post-mortem said "score=?".
3705
- task.score = review?.score;
3706
- task.grade = review?.grade;
3707
- task.rejection_reason = review?.verdict !== 'APPROVED' ? (review?.summary?.slice(0, 240) ?? null) : null;
3708
- if (review.verdict === 'APPROVED') {
3709
- task.status = 'done';
3710
- this.persistTaskStatus(task); // sprint-1547: persist before any post-approval work that could exit early
3711
- this.stats.approved++;
3712
- log(c.green, `\n✓ Task ${task.id} APPROVED on attempt ${attempt} (${review.score}/100)`);
3713
- const _sprintIdApproved = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3714
- (0, event_bus_publisher_1.publishTaskCompleted)(task.agent, _sprintIdApproved, task.id, task.title || task.id, 0).catch(() => { });
3715
- aar_middleware_1.AARMiddleware.generateAndLog({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdApproved, skillId: task.skill_id || task.type || 'code-generation', outcomeScore: review.score, actionSummary: (task.title || task.id).substring(0, 140), status: 'success' }).catch(() => { });
3716
- // AMD-20: PRM Judge — constitutional reward signal for approved tasks
3717
- (0, perm_judge_1.scoreTask)({ task_id: task.id, agent_id: task.agent, sprint_id: _sprintIdApproved, task_title: task.title || task.id, output_summary: review.summary || '', status: 'done' }).catch(() => { });
3718
- (0, trust_score_updater_1.updateTrustScore)(task.agent, 'approved', review.score); // Sprint 703: Dynamic trust update
3719
- // Sprint 706: BrainX — store success memory
3720
- try {
3721
- if (this._brainxBridge)
3722
- await this._brainxBridge.storeTaskMemory({ agent_id: task.agent, task_id: task.id, task_title: task.title || task.id, outcome: 'success', score: review.score, summary: (task.title || task.id).substring(0, 200), files_modified: result.files || [] });
3723
- }
3724
- catch { /* non-blocking */ }
3725
- (0, skill_crystalliser_1.crystalliseSkill)({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdApproved, taskTitle: task.title || task.id, taskType: task.type || 'feature', model: task.model || 'qwen3:14b', taskTarget: task.task_target || 'local', score: review.score, approachSummary: (task.title || task.id).substring(0, 200), keyPatterns: review.strengths || [], antiPatterns: [] });
3726
- (0, code_asset_crystalliser_1.crystalliseCodeAsset)({ agentId: task.agent, sprintId: _sprintIdApproved, taskId: task.id, taskTitle: task.title || task.id, files: result.files, supervisorScore: review.score, origin: 'kognai-core' });
3727
- monotask_state_machine_1.MonotaskSM.complete(task.agent, task.id);
3728
- taskRun.status = 'done';
3729
- taskRun.files_written = result.files;
3730
- taskRun.review = {
3731
- verdict: review.verdict,
3732
- score: review.score,
3733
- grade: review.grade,
3734
- score_rationale: review.score_rationale,
3735
- strengths: review.strengths,
3736
- };
3737
- // SCORE protocol hook (founder rule 2026-05-27): every supervisor
3738
- // grade against a spawned-citizen agent feeds the citizen's reputation
3739
- // via the ACP rubric. Founding agents (CEO/sup/sherlock/etc.) aren't
3740
- // in the citizens registry yet so they're skipped here — backfill TBD.
3741
- try {
3742
- recordScoreForCitizen(task.agent, _sprintIdApproved, task.id, review.grade, 'approved-path');
3743
- }
3744
- catch (e) {
3745
- log(c.gray, ` [SCORE] skip: ${(e?.message || '').slice(0, 100)}`);
3746
- }
3747
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3748
- this.taskRuns.push(taskRun);
3749
- (0, orchestrator_tap_1.tapAttempt)({
3750
- sprint_id: kslSprintId, task_id: task.id, attempt, agent: task.agent,
3751
- model: result.model || taskRun.model_used || 'unknown',
3752
- prompt: kslPrompt,
3753
- reply: `[approved score ${review.score}/100] files: ${result.files.join(', ')}\n\n${review.summary || ''}`,
3754
- tools_used: [], errors: [], cost_usd: 0,
3755
- duration_ms: Date.now() - kslAttemptStart,
3756
- });
3757
- return;
3758
- }
3759
- // Rejected — check for truncation pattern
3760
- this.stats.rejected++;
3761
- taskRun.review = {
3762
- verdict: review.verdict,
3763
- score: review.score,
3764
- grade: review.grade,
3765
- score_rationale: review.score_rationale,
3766
- issues: review.issues,
3767
- summary: review.summary,
3768
- };
3769
- // SCORE hook (rejected path) — record a negative-grade evaluation against
3770
- // the citizen so reputation actually moves on bad work.
3771
- try {
3772
- const _sid = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3773
- recordScoreForCitizen(task.agent, _sid, task.id, review.grade, 'rejected-path');
3774
- }
3775
- catch (e) {
3776
- log(c.gray, ` [SCORE] skip: ${(e?.message || '').slice(0, 100)}`);
3777
- }
3778
- if (this.isTruncationRejection(review)) {
3779
- truncationCount++;
3780
- log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [TRUNCATION ${truncationCount}/${TRUNCATION_THRESHOLD}]`);
3781
- // 2026-05-28 model-escalation pact: deterministic CTO/CEO sign-off via
3782
- // policy. If DeepSeek truncated this task once, the NEXT retry routes
3783
- // through ClawRouter → claude-sonnet-4.6. Cost ceiling ~$0.10 per
3784
- // escalation at typical token volumes — well under the daily wallet.
3785
- // Cleared in CodingAgent.execute after consumption.
3786
- task._escalateNext = 'TRUNCATION';
3787
- }
3788
- else if (task._integrityFailed) {
3789
- log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [INTEGRITY-FAILED]`);
3790
- // Same pact: destructive-rewrite (file shrank past integrity threshold)
3791
- // signals the cheap model can't hold the file's contract — upgrade.
3792
- task._escalateNext = 'INTEGRITY_FAILED';
3793
- }
3794
- else if ((review?.score ?? 100) < 30 &&
3795
- attempt < MAX_RETRIES &&
3796
- ((task.deliverables?.code || []).some((f) => /\.(md|mdx)$/i.test(f))
3797
- || ['research', 'spec', 'docs'].includes((task.type || '').toLowerCase()))) {
3798
- // 2026-05-28 pact expansion: low-score spec/docs rejections also escalate.
3799
- // sprint-1613 failure mode: DeepSeek hit a ~3-5k output ceiling on a
3800
- // 300-line spec request and gave up at one section, then the same
3801
- // capacity ceiling re-trapped 5 sub-task retries. Truncation/integrity
3802
- // checks didn't catch it (the file was complete, just not the SPEC).
3803
- // Catch low-score (<30) markdown/spec rejections explicitly so the
3804
- // next retry hits Sonnet instead of redo-on-the-same-cheap-model.
3805
- // Guarded by attempt < MAX_RETRIES so we don't waste the flag on the
3806
- // final attempt where there's no retry to consume it.
3807
- log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [LOW-SCORE-SPEC]`);
3808
- task._escalateNext = 'LOW_SCORE_SPEC';
3809
- }
3810
- else {
3811
- log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100)`);
3812
- }
3813
- safeResetLastCommit(task.id, task.agent, task.type, ' ');
3814
- (0, code_failure_logger_1.logCodeFailure)({ taskId: task.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(task.agent), attemptNum: attempt, score: review?.score || 0, model: taskRun.model_used || result?.model || task.model || 'unknown', rejectionReason: review?.summary || 'supervisor rejected', issues: review?.issues || [], failType: 'supervisor_rejected' });
3815
- // Sprint 701: AAR logging on REJECTION path (governance remediation)
3816
- const _sprintIdRejected = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3817
- aar_middleware_1.AARMiddleware.generateAndLog({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdRejected, skillId: task.skill_id || task.type || 'code-generation', outcomeScore: review?.score || 0, actionSummary: `REJECTED: ${(task.title || task.id).substring(0, 120)} (attempt ${attempt})`, status: 'rejected' }).catch(() => { });
3818
- (0, trust_score_updater_1.updateTrustScore)(task.agent, 'rejected', review?.score || 0); // Sprint 703: Dynamic trust update
3819
- // Sprint 706: BrainX — store failure memory
3820
- try {
3821
- if (this._brainxBridge)
3822
- await this._brainxBridge.storeTaskMemory({ agent_id: task.agent, task_id: task.id, task_title: task.title || task.id, outcome: 'failure', score: review?.score || 0, summary: `REJECTED: ${(task.title || task.id).substring(0, 180)}`, files_modified: [] });
3823
- }
3824
- catch { /* non-blocking */ }
3825
- monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `rejected attempt ${attempt}`);
3826
- (0, orchestrator_tap_1.tapAttempt)({
3827
- sprint_id: kslSprintId, task_id: task.id, attempt, agent: task.agent,
3828
- model: taskRun.model_used || result?.model || task.model || 'unknown',
3829
- prompt: kslPrompt,
3830
- reply: `[rejected score ${review?.score || 0}/100] ${review?.summary || 'supervisor rejected'}`,
3831
- tools_used: [],
3832
- errors: [{ kind: 'supervisor_rejected', message: review?.summary || 'supervisor rejected' }],
3833
- cost_usd: 0,
3834
- duration_ms: Date.now() - kslAttemptStart,
3835
- });
3836
- // CTO AUTO-DECOMPOSE: After N consecutive truncation rejections, split the task
3837
- if (truncationCount >= TRUNCATION_THRESHOLD) {
3838
- log(c.cyan, `\n🔧 TRUNCATION THRESHOLD REACHED — CTO decomposing ${task.id}...`);
3839
- const subtasks = await this.ctoDecomposeTask(task);
3840
- if (subtasks.length > 1) {
3841
- log(c.cyan, ` Executing ${subtasks.length} sub-tasks sequentially...`);
3842
- let allPassed = true;
3843
- for (const subtask of subtasks) {
3844
- // Check sub-task dependencies
3845
- const subDeps = subtask.dependencies || [];
3846
- const unmetSubDeps = subDeps.filter(d => {
3847
- const depSt = subtasks.find(s => s.id === d);
3848
- return depSt && depSt.status !== 'done';
3849
- });
3850
- if (unmetSubDeps.length > 0) {
3851
- log(c.yellow, ` Skipping sub-task ${subtask.id}: unmet deps [${unmetSubDeps.join(', ')}]`);
3852
- allPassed = false;
3853
- continue;
3854
- }
3855
- const passed = await this.executeSubTask(subtask, 5);
3856
- if (passed) {
3857
- subtask.status = 'done';
3858
- }
3859
- else {
3860
- allPassed = false;
3861
- subtask.status = 'rejected';
3862
- log(c.red, ` Sub-task ${subtask.id} failed — stopping decomposition chain`);
3863
- break;
3864
- }
3865
- }
3866
- if (allPassed) {
3867
- task.status = 'done';
3868
- log(c.green, `\n✓ Task ${task.id} COMPLETED via CTO decomposition (${subtasks.length} sub-tasks)`);
3869
- taskRun.status = 'done';
3870
- taskRun.files_written = subtasks.flatMap(st => st.deliverables.code || []);
3871
- }
3872
- else {
3873
- task.status = 'rejected';
3874
- log(c.red, `\n✗ Task ${task.id} FAILED even after CTO decomposition`);
3875
- taskRun.status = 'rejected';
3876
- taskRun.rejection_reason = review.summary;
3877
- }
3878
- this.persistTaskStatus(task);
3879
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3880
- this.taskRuns.push(taskRun);
3881
- return;
3882
- }
3883
- // If decomposition returned <=1 task, continue with normal retry loop
3884
- log(c.yellow, ' Decomposition produced ≤1 sub-task, continuing normal retries...');
3885
- truncationCount = 0; // Reset to avoid re-triggering
3886
- }
3887
- if (attempt < MAX_RETRIES) {
3888
- log(c.yellow, ` Retrying with rejection feedback...`);
3889
- }
3890
- }
3891
- task.status = 'rejected';
3892
- this.persistTaskStatus(task);
3893
- log(c.red, `\n✗ Task ${task.id} FAILED after ${MAX_RETRIES} attempts`);
3894
- const _sprintIdFailed = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3895
- (0, event_bus_publisher_1.publishTaskFailed)(task.agent, _sprintIdFailed, task.id, task.title || task.id, lastReview?.summary || `Failed after ${MAX_RETRIES} attempts`).catch(() => { });
3896
- taskRun.status = 'rejected';
3897
- taskRun.rejection_reason = lastReview?.summary || `Failed after ${MAX_RETRIES} attempts`;
3898
- taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
3899
- this.taskRuns.push(taskRun);
3900
- }
3901
- finally {
3902
- // OMEL AMD-13: Always wipe the phantom tmpdir on task exit (success or failure)
3903
- phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
3904
- }
3905
- }
3906
- async run() {
3907
- const startTime = Date.now();
3908
- const sprintStartTime = new Date().toISOString();
3909
- let gitHeadBefore = 'unknown';
3910
- try {
3911
- gitHeadBefore = (0, child_process_1.execSync)('git rev-parse --short HEAD', { timeout: 5000 }).toString().trim();
3912
- }
3913
- catch { /* ok */ }
3914
- log(c.bold, '\n🚀 Starting orchestration run...\n');
3915
- if (SOVEREIGN_MODE)
3916
- log(c.yellow, ' ⚡ SOVEREIGN MODE — all inference local ($0 cost floor)');
3917
- (0, wallet_state_1.logWalletStatus)();
3918
- // Mission Control — connect and register this sprint run
3919
- const mc = (0, mc_client_1.createMCClient)('sprint-orchestrator', 'worker');
3920
- let mcConnected = false;
3921
- try {
3922
- await mc.connect();
3923
- mcConnected = true;
3924
- log(c.gray, ' [MC] Connected to Mission Control');
3925
- }
3926
- catch {
3927
- log(c.gray, ' [MC] Mission Control unavailable — running without telemetry');
3928
- }
3929
- // 1. Load tasks
3930
- this.loadTasks();
3931
- // 069-06: emit sprint started event
3932
- const _evtSprintId = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
3933
- // Sprint 706: BrainX swarm bridge — create at sprint start
3934
- let brainxBridge = null;
3935
- try {
3936
- const agentIds = Array.from(new Set(this.tasks.map(t => t.agent)));
3937
- brainxBridge = (0, brainx_swarm_bridge_1.createSwarmBridge)(`swarm-${Date.now()}`, _evtSprintId, agentIds);
3938
- this._brainxBridge = brainxBridge;
3939
- log(c.gray, ` [BrainX] Bridge created for ${agentIds.length} agents`);
3940
- }
3941
- catch (e) {
3942
- log(c.gray, ` [BrainX] Bridge creation skipped: ${e.message}`);
3943
- }
3944
- (0, event_bus_publisher_1.publishSprintStarted)(_evtSprintId, this.tasks.filter(t => t.status === 'pending').length).catch(() => { });
3945
- if (this.tasks.length === 0) {
3946
- log(c.yellow, 'No tasks to execute');
3947
- if (mcConnected)
3948
- await mc.disconnect().catch(() => { });
3949
- return;
3950
- }
3951
- // ── CTO APPROVAL GATE — Exec Protocol §17 ──────────────────────────────
3952
- // Every autonomous sprint must be approved by the CTO agent before execution.
3953
- // Human-submitted sprints (source: 'human') are auto-approved.
3954
- // Prevents the swarm from inventing its own work outside the execution plan.
3955
- {
3956
- const sprintFile = process.argv[2] || 'sprints/current.json';
3957
- const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
3958
- const sprintSource = sprintRaw.source || (sprintRaw.swarm === 'NOT USED' ? 'human' : 'autonomous_loop');
3959
- const proposal = {
3960
- sprint_id: _evtSprintId,
3961
- title: sprintRaw.name || sprintRaw.title || _evtSprintId,
3962
- description: sprintRaw.goal || sprintRaw.description || '',
3963
- tasks: this.tasks.map(t => `${t.id}: ${t.title || t.context || t.type}`),
3964
- estimated_complexity: sprintRaw.estimated_complexity || 'medium',
3965
- source: sprintSource,
3966
- // Sprint 1457 BUGFIX: pass Rule 3 contract fields from sprint JSON to CTO gate
3967
- inputs: sprintRaw.inputs,
3968
- outputs: sprintRaw.outputs,
3969
- success_criteria: sprintRaw.success_criteria,
3970
- };
3971
- log(c.magenta, `\n--- CTO Approval Gate ---`);
3972
- log(c.gray, ` Sprint: ${proposal.sprint_id} — "${proposal.title}"`);
3973
- log(c.gray, ` Source: ${proposal.source} (${proposal.tasks.length} tasks)`);
3974
- const ctoResult = await (0, cto_approval_gate_1.requestCTOApproval)(proposal, process.cwd(), 'kognai');
3975
- if (!ctoResult.approved) {
3976
- log(c.red, ` ✘ CTO REJECTED: ${ctoResult.reason}`);
3977
- log(c.red, ` Plan reference: ${ctoResult.plan_reference}`);
3978
- log(c.red, ` Confidence: ${ctoResult.cto_confidence}%`);
3979
- log(c.yellow, ` Sprint ${_evtSprintId} will NOT execute. Saving rejection to sprint file.`);
3980
- // Write rejection to sprint file so the loop doesn't retry
3981
- try {
3982
- sprintRaw.cto_gate = {
3983
- approved: false,
3984
- reason: ctoResult.reason,
3985
- plan_reference: ctoResult.plan_reference,
3986
- confidence: ctoResult.cto_confidence,
3987
- timestamp: ctoResult.timestamp,
3988
- };
3989
- (0, fs_1.writeFileSync)(sprintFile, JSON.stringify(sprintRaw, null, 2));
3990
- }
3991
- catch { /* non-critical */ }
3992
- if (mcConnected)
3993
- await mc.disconnect().catch(() => { });
3994
- return;
3995
- }
3996
- log(c.green, ` ✓ CTO APPROVED: ${ctoResult.reason}`);
3997
- log(c.gray, ` Plan reference: ${ctoResult.plan_reference} (confidence: ${ctoResult.cto_confidence}%)`);
3998
- }
3999
- // ── End CTO Gate ────────────────────────────────────────────────────────
4000
- // 2. CEO initial assessment (B.14: once per sprint, not once per conflict)
4001
- log(c.magenta, '\n--- Phase 1: CEO Initial Assessment ---');
4002
- await this.ceo.reviewSprintProgress(this.tasks);
4003
- const _ceoIntentDone = true; // flag for Phase 5: only re-run if ≥2 rejected
4004
- // 3. Execute coding tasks with review loop
4005
- log(c.blue, '\n--- Phase 2: Sprint Execution ---');
4006
- // Auto-cascade: if a task is rejected, immediately mark all downstream dependents as skipped
4007
- // This prevents tasks from being stuck as 'pending' forever across retries
4008
- let cascaded = true;
4009
- while (cascaded) {
4010
- cascaded = false;
4011
- for (const task of this.tasks) {
4012
- if (task.status !== 'pending')
4013
- continue;
4014
- const deps = task.dependencies || [];
4015
- const blockedBy = deps.filter(d => {
4016
- const depTask = this.tasks.find(t => t.id === d);
4017
- return depTask && (depTask.status === 'rejected' || depTask.status === 'skipped');
4018
- });
4019
- if (blockedBy.length > 0) {
4020
- task.status = 'skipped';
4021
- task.skippedReason = `Blocked by: ${blockedBy.join(', ')}`;
4022
- log(c.yellow, ` Auto-skipped ${task.id}: blocked by rejected/skipped deps [${blockedBy.join(', ')}]`);
4023
- cascaded = true;
4024
- }
4025
- }
4026
- }
4027
- // B.16: Wave-based parallel fan-out
4028
- // Each wave = all tasks whose dependencies are satisfied and that don't share output files.
4029
- // Serialized only when deliverable paths overlap (file conflict detection).
4030
- const remaining = this.tasks.filter(t => t.status === 'pending');
4031
- while (remaining.length > 0) {
4032
- // Find tasks whose dependencies are all done/skipped
4033
- const ready = remaining.filter(task => {
4034
- const deps = task.dependencies || [];
4035
- return deps.every(d => {
4036
- const dep = this.tasks.find(t => t.id === d);
4037
- return !dep || dep.status === 'done' || dep.status === 'skipped';
4038
- });
4039
- });
4040
- if (ready.length === 0)
4041
- break; // dependency deadlock — bail
4042
- // File conflict detection: build wave without overlapping deliverables
4043
- const filesInWave = new Set();
4044
- const wave = [];
4045
- for (const task of ready) {
4046
- const taskFiles = [
4047
- ...(task.deliverables?.code || []),
4048
- ...(task.deliverables?.tests || []),
4049
- ...(task.deliverables?.docs || []),
4050
- ];
4051
- const hasConflict = taskFiles.some(f => filesInWave.has(f));
4052
- if (!hasConflict) {
4053
- wave.push(task);
4054
- taskFiles.forEach(f => filesInWave.add(f));
4055
- }
4056
- // conflicting tasks stay in remaining for next wave
4057
- }
4058
- if (wave.length === 0)
4059
- wave.push(ready[0]); // break deadlock: force one task
4060
- // B.16: Split wave by task_target — local tasks serialized (Ollama can only run one at a time),
4061
- // cloud tasks run concurrently. Prevents Ollama queue timeout on parallel fan-out.
4062
- const localWave = wave.filter(t => t.task_target === 'local');
4063
- const cloudWave = wave.filter(t => t.task_target !== 'local');
4064
- // B.16-RL: Cloud concurrency cap — prevents burning the Claude 5h token budget.
4065
- // Default: 1 (serial). Override: MAX_CLOUD_CONCURRENCY env var.
4066
- const MAX_CLOUD_CONCURRENCY = parseInt(process.env.MAX_CLOUD_CONCURRENCY ?? '1', 10);
4067
- if (cloudWave.length > 1 && MAX_CLOUD_CONCURRENCY > 1) {
4068
- log(c.blue, ` [B.16] Parallel fan-out: ${cloudWave.length} cloud tasks (cap: ${MAX_CLOUD_CONCURRENCY})`);
4069
- }
4070
- else if (cloudWave.length > 1) {
4071
- log(c.blue, ` [B.16-RL] Serial cloud execution: ${cloudWave.length} tasks (MAX_CLOUD_CONCURRENCY=1)`);
4072
- }
4073
- if (localWave.length > 1) {
4074
- log(c.blue, ` [B.16] Sequential execution: ${localWave.length} local tasks (Ollama serialized)`);
4075
- }
4076
- else if (localWave.length === 1 && cloudWave.length === 0) {
4077
- // single task, no label needed
4078
- }
4079
- // Execute cloud tasks in batches of MAX_CLOUD_CONCURRENCY
4080
- for (let i = 0; i < cloudWave.length; i += MAX_CLOUD_CONCURRENCY) {
4081
- const batch = cloudWave.slice(i, i + MAX_CLOUD_CONCURRENCY);
4082
- await Promise.all(batch.map(t => this.executeTask(t)));
4083
- }
4084
- for (const t of localWave) {
4085
- await this.executeTask(t);
4086
- }
4087
- // Remove executed tasks from remaining
4088
- for (const t of wave) {
4089
- const idx = remaining.indexOf(t);
4090
- if (idx >= 0)
4091
- remaining.splice(idx, 1);
4092
- }
4093
- // Cascade rejections
4094
- cascaded = true;
4095
- while (cascaded) {
4096
- cascaded = false;
4097
- for (const t of this.tasks) {
4098
- if (t.status !== 'pending')
4099
- continue;
4100
- const tDeps = t.dependencies || [];
4101
- const tBlocked = tDeps.filter(d => {
4102
- const depTask = this.tasks.find(x => x.id === d);
4103
- return depTask && (depTask.status === 'rejected' || depTask.status === 'skipped');
4104
- });
4105
- if (tBlocked.length > 0) {
4106
- t.status = 'skipped';
4107
- t.skippedReason = `Blocked by: ${tBlocked.join(', ')}`;
4108
- log(c.yellow, ` Auto-skipped ${t.id}: blocked by deps [${tBlocked.join(', ')}]`);
4109
- cascaded = true;
4110
- }
4111
- }
4112
- }
4113
- // Remove newly-skipped/rejected from remaining
4114
- for (let i = remaining.length - 1; i >= 0; i--) {
4115
- if (remaining[i].status === 'skipped' || remaining[i].status === 'rejected') {
4116
- remaining.splice(i, 1);
4117
- }
4118
- }
4119
- }
4120
- // 4. CTO data-driven analysis + CMO reports
4121
- log(c.cyan, '\n--- Phase 3: CTO Data-Driven Analysis + CMO Reports ---');
4122
- let ctoReport = { summary: '', proposals: [], metrics_reviewed: [] };
4123
- let ctoDecisions = '';
4124
- let cmoReports = '';
4125
- try {
4126
- ctoReport = await this.cto.analyze();
4127
- // Load CMO reports (produced independently by Manus AI runner)
4128
- cmoReports = loadCMOReports();
4129
- if (cmoReports) {
4130
- log(c.magenta, ' CMO reports found — will include in CEO review');
4131
- }
4132
- else {
4133
- log(c.gray, ' No CMO reports available yet');
4134
- }
4135
- // Load CTO tech-watch reports (produced independently by run-cto-techwatch.ts)
4136
- const ctoTechWatch = loadCTOTechWatchReports();
4137
- if (ctoTechWatch) {
4138
- log(c.cyan, ' CTO tech-watch reports found — will include in CEO review');
4139
- cmoReports = cmoReports ? cmoReports + '\n\n' + ctoTechWatch : ctoTechWatch;
4140
- }
4141
- else {
4142
- log(c.gray, ' No CTO tech-watch reports available yet');
4143
- }
4144
- // Load Grok intelligence feed (Grok AI monitors X/Twitter for OpenClaw news)
4145
- const grokFeed = loadGrokFeed();
4146
- if (grokFeed) {
4147
- log(c.magenta, ' Grok intelligence feed found — will include in CEO review');
4148
- cmoReports = cmoReports ? cmoReports + '\n\n' + grokFeed : grokFeed;
4149
- }
4150
- else {
4151
- log(c.gray, ' No Grok feed reports available');
4152
- }
4153
- // Load Owner Directives (highest priority — always included)
4154
- const ownerDirectives = loadOwnerDirectives();
4155
- if (ownerDirectives) {
4156
- log(c.magenta, " Owner directives found — will include in CEO review (highest priority)");
4157
- cmoReports = ownerDirectives + (cmoReports ? "\n\n" + cmoReports : "");
4158
- }
4159
- // 5. CEO reviews CTO proposals + CMO reports
4160
- if (ctoReport.proposals.length > 0 || cmoReports) {
4161
- log(c.magenta, '\n--- Phase 4: CEO Reviews CTO Proposals + CMO Reports ---');
4162
- ctoDecisions = await this.ceo.reviewCTOProposals(ctoReport);
4163
- // Persist CEO decisions for CTO feedback loop + approved proposals tracking
4164
- persistCEODecisions(ctoDecisions, ctoReport);
4165
- // Handle approved new_agent proposals
4166
- const agentCreator = new AgentCreator();
4167
- for (const proposal of ctoReport.proposals) {
4168
- if (proposal.category === 'new_agent' && proposal.agent_spec) {
4169
- // Check if CEO approved this specific proposal
4170
- if (ctoDecisions.includes(proposal.id) && ctoDecisions.toUpperCase().includes('APPROVED')) {
4171
- log(c.green, `\n 🤖 CEO approved new agent: ${proposal.agent_spec.name}`);
4172
- agentCreator.createAgent(proposal.agent_spec);
4173
- log(c.green, ` Agent will be loaded on next orchestrator run.`);
4174
- }
4175
- else {
4176
- log(c.yellow, ` CEO did not approve agent: ${proposal.agent_spec.name}`);
4177
- }
4178
- }
4179
- }
4180
- }
4181
- else {
4182
- log(c.cyan, ' No proposals from CTO — stack is current and optimized');
4183
- ctoDecisions = 'No proposals to review.';
4184
- }
4185
- }
4186
- catch (error) {
4187
- log(c.yellow, ` CTO/CEO review cycle skipped: ${error.message}`);
4188
- ctoDecisions = 'CTO analysis was not performed this run.';
4189
- }
4190
- // 6. CEO final assessment — B.14: only runs if ≥2 tasks rejected (skips if sprint went well)
4191
- const rejectedCount = this.tasks.filter(t => t.status === 'rejected').length;
4192
- log(c.magenta, '\n--- Phase 5: CEO Final Assessment ---');
4193
- if (rejectedCount >= 2) {
4194
- log(c.magenta, ` ${rejectedCount} tasks rejected — CEO reviewing...`);
4195
- await this.ceo.reviewSprintProgress(this.tasks);
4196
- }
4197
- else {
4198
- log(c.gray, ` Only ${rejectedCount} rejected — skipping CEO reassessment (sprint OK)`);
4199
- }
4200
- (0, wallet_state_1.logWalletStatus)(); // Print wallet burn after sprint execution
4201
- // 069-06: emit budget events if thresholds crossed
4202
- try {
4203
- const _ws = (0, wallet_state_1.getWalletState)();
4204
- if (_ws.burnPct >= 95)
4205
- (0, event_bus_publisher_1.publishBudgetFreeze)(_evtSprintId, _ws.burnPct).catch(() => { });
4206
- else if (_ws.burnPct >= 80)
4207
- (0, event_bus_publisher_1.publishBudgetWarning)(_evtSprintId, _ws.burnPct, _ws.spentThisMonth, _ws.monthlyBudget).catch(() => { });
4208
- }
4209
- catch { /* wallet state unavailable */ }
4210
- // 6b. CTO autonomous post-sprint analysis (runs after EVERY sprint)
4211
- log(c.cyan, '\n--- Phase 5b: CTO Post-Sprint Analysis (Autonomous) ---');
4212
- let postSprintReport = '';
4213
- try {
4214
- postSprintReport = await this.cto.postSprintAnalysis(this.tasks, this.stats);
4215
- log(c.cyan, ' Post-sprint analysis saved to reports/cto/');
4216
- // Extract proposals from post-sprint analysis and feed into CEO review
4217
- const jsonMatch = postSprintReport.match(/```json\s*([\s\S]*?)```/);
4218
- if (jsonMatch) {
4219
- try {
4220
- const parsed = JSON.parse(jsonMatch[1].trim());
4221
- const postSprintProposals = parsed.proposals || [];
4222
- if (postSprintProposals.length > 0) {
4223
- log(c.cyan, ` Found ${postSprintProposals.length} proposals — sending to CEO for autonomous review`);
4224
- // Build CTOReport for CEO review
4225
- const postSprintCTOReport = {
4226
- summary: parsed.summary || 'Post-sprint analysis proposals',
4227
- proposals: postSprintProposals.map((p) => ({
4228
- id: p.id,
4229
- title: p.title,
4230
- category: p.category,
4231
- description: p.description,
4232
- estimated_impact: p.estimated_impact || '',
4233
- risk_level: p.risk_level || 'medium',
4234
- implementation_steps: p.implementation_steps || [],
4235
- })),
4236
- metrics_reviewed: ['sprint_results', 'failure_patterns', 'trend_analysis'],
4237
- };
4238
- // Phase 5c: CEO autonomously reviews post-sprint proposals
4239
- log(c.magenta, '\n--- Phase 5c: CEO Reviews Post-Sprint Proposals (Autonomous) ---');
4240
- const postSprintDecisions = await this.ceo.reviewCTOProposals(postSprintCTOReport);
4241
- // Persist CEO decisions + update approved-proposals tracker
4242
- persistCEODecisions(postSprintDecisions, postSprintCTOReport);
4243
- log(c.magenta, ' CEO post-sprint proposal review complete');
4244
- }
4245
- else {
4246
- log(c.cyan, ' No proposals in post-sprint analysis');
4247
- }
4248
- }
4249
- catch (parseErr) {
4250
- log(c.yellow, ` Could not parse post-sprint proposals JSON: ${parseErr.message}`);
4251
- }
4252
- }
4253
- }
4254
- catch (error) {
4255
- log(c.yellow, ` Post-sprint analysis skipped: ${error.message}`);
4256
- }
4257
- // 7. CEO generates daily report
4258
- log(c.magenta, '\n--- Phase 6: Daily Report Generation ---');
4259
- const ctoReportStr = `Summary: ${ctoReport.summary}\nProposals: ${ctoReport.proposals.length}\n${ctoReport.proposals.map(p => `- [${p.category}] ${p.title} (${p.risk_level})`).join('\n')}`;
4260
- const grokSection = loadGrokFeed();
4261
- const cmoSection = cmoReports
4262
- ? '\n\n## CMO Activity (Manus AI)\n' + cmoReports.substring(0, 2000)
4263
- : '\n\nCMO: No reports available this cycle.';
4264
- const grokForReport = grokSection ? '\n\n## Grok Intelligence Feed\nGrok AI reports available — included in CTO/CEO analysis.' : '\n\nGrok: No feed reports this cycle.';
4265
- const postSprintSection = postSprintReport ? '\n\n## CTO Post-Sprint Analysis\n' + postSprintReport.substring(0, 2000) : '';
4266
- await this.ceo.generateDailyReport(this.tasks, this.stats, ctoReportStr + cmoSection + grokForReport + postSprintSection, ctoDecisions);
4267
- // 8. Save updated sprint state
4268
- const sprintFile = process.argv[2] || 'sprints/current.json';
4269
- (0, fs_1.writeFileSync)(sprintFile, JSON.stringify({ tasks: this.tasks }, null, 2));
4270
- log(c.green, `\nSprint state saved to ${sprintFile}`);
4271
- // Sprint 706: BrainX — close bridge at sprint end
4272
- try {
4273
- if (brainxBridge)
4274
- await brainxBridge.close();
4275
- log(c.gray, ' [BrainX] Bridge closed');
4276
- }
4277
- catch { /* non-blocking */ }
4278
- // 069-06: emit sprint completed event
4279
- const completedCount = this.tasks.filter(t => t.status === 'done').length;
4280
- (0, event_bus_publisher_1.publishSprintCompleted)(_evtSprintId, this.tasks.length, completedCount).catch(() => { });
4281
- // 8b. Sync global token count into stats
4282
- this.stats.totalTokens = _globalTokensThisRun;
4283
- // 8c. Generate structured swarm run report
4284
- try {
4285
- (0, fs_1.mkdirSync)('reports/swarm-runs', { recursive: true });
4286
- (0, fs_1.mkdirSync)('logs/swarm-runs', { recursive: true });
4287
- let gitHeadAfter = 'unknown';
4288
- let gitBranch = 'unknown';
4289
- try {
4290
- gitHeadAfter = (0, child_process_1.execSync)('git rev-parse --short HEAD', { timeout: 5000 }).toString().trim();
4291
- gitBranch = (0, child_process_1.execSync)('git rev-parse --abbrev-ref HEAD', { timeout: 5000 }).toString().trim();
4292
- }
4293
- catch { /* ok */ }
4294
- // sprint-1566 F0e + F3: read from the real per-call aggregator (filled
4295
- // by recordModelCall after every callLLM) instead of the prior pattern
4296
- // that read taskRun.model_used (always 'unknown') and used a stale 5-row
4297
- // pricing dict. modelUsage now has provider/calls/input/output/tokens/
4298
- // cost_usd per model with real per-call cost from llm-cost-table.
4299
- const modelUsage = getModelsUsedReport();
4300
- const totalCostUsd = getTotalCostUsd();
4301
- const runReport = {
4302
- schema_version: '2.0.0', // CTO-20260528-001: bumped when capturing grade + score_rationale per-task
4303
- run_id: (0, crypto_1.randomUUID)(),
4304
- project: 'kognai',
4305
- sprint_file: sprintFile,
4306
- started_at: sprintStartTime,
4307
- finished_at: new Date().toISOString(),
4308
- duration_seconds: Math.round((Date.now() - startTime) / 1000),
4309
- git_branch: gitBranch,
4310
- git_head_before: gitHeadBefore,
4311
- git_head_after: gitHeadAfter,
4312
- sovereign_mode: SOVEREIGN_MODE,
4313
- summary: {
4314
- total_tasks: this.tasks.length,
4315
- done: this.tasks.filter(t => t.status === 'done').length,
4316
- rejected: this.tasks.filter(t => t.status === 'rejected').length,
4317
- skipped: this.tasks.filter(t => t.status === 'skipped').length,
4318
- approval_rate: +(this.stats.approved / Math.max(this.stats.tasksExecuted, 1)).toFixed(2),
4319
- total_tokens: this.stats.totalTokens,
4320
- supervisor_conflicts: this.stats.conflicts,
4321
- ceo_escalations: this.stats.escalations,
4322
- },
4323
- models_used: modelUsage,
4324
- total_cost_usd: +totalCostUsd.toFixed(4),
4325
- tasks: this.taskRuns,
4326
- };
4327
- // 1. Timestamped individual report (never overwritten)
4328
- const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
4329
- const reportPath = `reports/swarm-runs/${ts}.json`;
4330
- (0, fs_1.writeFileSync)(reportPath, JSON.stringify(runReport, null, 2));
4331
- // 2. Latest pointer (for quick dashboard access)
4332
- (0, fs_1.writeFileSync)('reports/swarm-runs/latest-run.json', JSON.stringify(runReport, null, 2));
4333
- // 3. Daily aggregate (accumulates ALL runs for the day)
4334
- const today = new Date().toISOString().slice(0, 10);
4335
- const dailyPath = `reports/swarm-runs/daily-${today}.json`;
4336
- let dailyRuns = [];
4337
- try {
4338
- dailyRuns = JSON.parse((0, fs_1.readFileSync)(dailyPath, 'utf-8'));
4339
- }
4340
- catch { /* first run today */ }
4341
- dailyRuns.push(runReport);
4342
- (0, fs_1.writeFileSync)(dailyPath, JSON.stringify(dailyRuns, null, 2));
4343
- log(c.green, `\n📊 Swarm run report: ${reportPath}`);
4344
- log(c.green, ` Daily aggregate: ${dailyPath} (${dailyRuns.length} run(s) today)`);
4345
- log(c.green, ` Tokens: ${this.stats.totalTokens.toLocaleString()} | Est. cost: $${totalCostUsd.toFixed(4)}`);
4346
- // 8d. Daily cost digest — persist ClawRouter spend summary (§17.5)
4347
- try {
4348
- const digest = getDailyCostDigest();
4349
- const digestPath = `logs/clawrouter/digest-${today}.json`;
4350
- (0, fs_1.writeFileSync)(digestPath, JSON.stringify(digest, null, 2));
4351
- log(c.cyan, ` 💰 Cost digest: $${digest.total_usd.toFixed(4)} across ${digest.call_count} calls (saved ${digest.tokens_saved_by_qcg} tokens via QCG)`);
4352
- }
4353
- catch { /* non-critical */ }
4354
- }
4355
- catch (err) {
4356
- log(c.yellow, ` [WARN] Swarm run report failed: ${err.message}`);
4357
- }
4358
- // 9. Post-sprint: PM2 reload backend + smoke test
4359
- await postSprintSmokeTest();
4360
- // Final summary
4361
- const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
4362
- log(c.bold, '\n╔══════════════════════════════════════════════════════════╗');
4363
- log(c.bold, '║ Orchestration Complete ║');
4364
- log(c.bold, '╚══════════════════════════════════════════════════════════╝');
4365
- log(c.green, ` Tasks executed: ${this.stats.tasksExecuted}`);
4366
- log(c.green, ` Approved: ${this.stats.approved}`);
4367
- log(c.red, ` Rejected: ${this.stats.rejected}`);
4368
- log(c.yellow, ` Supervisor conflicts: ${this.stats.conflicts}`);
4369
- log(c.magenta, ` CEO escalations: ${this.stats.escalations}`);
4370
- log(c.cyan, ` CTO proposals: ${ctoReport.proposals.length}`);
4371
- log(c.magenta, ` CMO reports: ${cmoReports ? 'loaded' : 'none'}`);
4372
- log(c.blue, ` Total time: ${elapsed}s`);
4373
- log(c.gray, ` Pipeline: CEO → MiniMax code → Dual review (DeepSeek + Haiku) → CEO resolves conflicts → CTO → CMO/Grok → Post-sprint analysis → Daily report`);
4374
- // Mission Control — report final stats and disconnect
4375
- if (mcConnected) {
4376
- try {
4377
- if (this.stats.totalTokens > 0) {
4378
- await mc.reportTokens('MiniMax-M2.5', this.stats.totalTokens, 0, 'sprint_run');
4379
- }
4380
- await mc.disconnect();
4381
- log(c.gray, ` [MC] Sprint reported: ${this.stats.approved} approved / ${this.stats.rejected} rejected / ${this.stats.totalTokens} tokens`);
4382
- }
4383
- catch { /* non-critical */ }
4384
- }
4385
- }
4386
- }
4387
- // ===== Post-sprint smoke test =====
66
+ const engine_primitives_1 = require("./engine-primitives");
67
+ Object.defineProperty(exports, "callLLM", { enumerable: true, get: function () { return engine_primitives_1.callLLM; } });
68
+ Object.defineProperty(exports, "localQAGate", { enumerable: true, get: function () { return engine_primitives_1.localQAGate; } });
69
+ Object.defineProperty(exports, "c", { enumerable: true, get: function () { return engine_primitives_1.c; } });
70
+ Object.defineProperty(exports, "log", { enumerable: true, get: function () { return engine_primitives_1.log; } });
71
+ Object.defineProperty(exports, "routeCall", { enumerable: true, get: function () { return engine_primitives_1.routeCall; } });
72
+ Object.defineProperty(exports, "callAnthropicCached", { enumerable: true, get: function () { return engine_primitives_1.callAnthropicCached; } });
73
+ Object.defineProperty(exports, "compressContext", { enumerable: true, get: function () { return engine_primitives_1.compressContext; } });
74
+ Object.defineProperty(exports, "normalizeReview", { enumerable: true, get: function () { return engine_primitives_1.normalizeReview; } });
75
+ var engine_helpers_1 = require("./engine-helpers");
76
+ Object.defineProperty(exports, "persistCEODecisions", { enumerable: true, get: function () { return engine_helpers_1.persistCEODecisions; } });
77
+ Object.defineProperty(exports, "resolveActiveSprintId", { enumerable: true, get: function () { return engine_helpers_1.resolveActiveSprintId; } });
78
+ Object.defineProperty(exports, "resolveAgentDid", { enumerable: true, get: function () { return engine_helpers_1.resolveAgentDid; } });
79
+ Object.defineProperty(exports, "recordAgentScore", { enumerable: true, get: function () { return engine_helpers_1.recordAgentScore; } });
80
+ Object.defineProperty(exports, "assessTaskComplexity", { enumerable: true, get: function () { return engine_helpers_1.assessTaskComplexity; } });
81
+ const engine_orchestrator_1 = require("./engine-orchestrator");
4388
82
  async function httpGet(url, timeoutMs = 8000) {
4389
83
  return new Promise((resolve) => {
4390
84
  const parsed = new URL(url);
@@ -4404,7 +98,7 @@ async function sendTelegramAlert(message) {
4404
98
  if (!botToken || !chatId)
4405
99
  return;
4406
100
  try {
4407
- await httpPost('https://api.telegram.org/bot' + botToken + '/sendMessage', {
101
+ await (0, engine_primitives_1.httpPost)('https://api.telegram.org/bot' + botToken + '/sendMessage', {
4408
102
  'Content-Type': 'application/json',
4409
103
  }, JSON.stringify({ chat_id: chatId, text: message, parse_mode: 'Markdown' }), 10000);
4410
104
  }
@@ -4414,22 +108,21 @@ async function postSprintSmokeTest() {
4414
108
  // Disabled — Invoica-specific endpoints (health/invoices/settlements) not applicable to Kognai
4415
109
  // Removed Sprint 205: was always returning HTTP 404 + flooding Telegram with false alerts
4416
110
  }
4417
- // ===== Main Entry =====
4418
- async function main() {
111
+ async function main(config = {}) {
4419
112
  // S67-005: Startup env check (OMEL AMD-13: via CredentialVault — hasSecret never logs value)
4420
113
  if (!credential_vault_1.credentialVault.hasSecret('ANTHROPIC_API_KEY', 'orchestrator')) {
4421
- log(c.yellow, '⚠ ANTHROPIC_API_KEY not set — Anthropic CEO + Sup2 Haiku will be unavailable.');
4422
- log(c.yellow, ' ClawRouter/DeepSeek will be the sole reviewer (mono-supervision). Set ANTHROPIC_API_KEY in .env for full dual-supervisor mode.');
114
+ (0, engine_primitives_1.log)(engine_primitives_1.c.yellow, '⚠ ANTHROPIC_API_KEY not set — Anthropic CEO + Sup2 Haiku will be unavailable.');
115
+ (0, engine_primitives_1.log)(engine_primitives_1.c.yellow, ' ClawRouter/DeepSeek will be the sole reviewer (mono-supervision). Set ANTHROPIC_API_KEY in .env for full dual-supervisor mode.');
4423
116
  }
4424
117
  if (!credential_vault_1.credentialVault.hasSecret('MINIMAX_API_KEY', 'orchestrator')) {
4425
- log(c.yellow, '⚠ MINIMAX_API_KEY not set — cloud-code tasks will fail.');
118
+ (0, engine_primitives_1.log)(engine_primitives_1.c.yellow, '⚠ MINIMAX_API_KEY not set — cloud-code tasks will fail.');
4426
119
  }
4427
120
  try {
4428
- const orchestrator = new Orchestrator();
121
+ const orchestrator = new engine_orchestrator_1.Orchestrator(config.spawnGate);
4429
122
  await orchestrator.run();
4430
123
  }
4431
124
  catch (error) {
4432
- log(c.red, `\n✗ Fatal error: ${error.message}`);
125
+ (0, engine_primitives_1.log)(engine_primitives_1.c.red, `\n✗ Fatal error: ${error.message}`);
4433
126
  console.error(error.stack);
4434
127
  process.exit(1);
4435
128
  }