@kognai/orchestrator-core 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +3 -0
- package/dist/index.js +3 -0
- package/dist/lib/did-resolver-registry.d.ts +15 -0
- package/dist/lib/did-resolver-registry.js +9 -0
- package/dist/lib/engine-agents.d.ts +71 -0
- package/dist/lib/engine-agents.js +835 -0
- package/dist/lib/engine-coding-agent.d.ts +17 -0
- package/dist/lib/engine-coding-agent.js +890 -0
- package/dist/lib/engine-helpers.d.ts +10 -0
- package/dist/lib/engine-helpers.js +319 -0
- package/dist/lib/engine-loaders.d.ts +5 -0
- package/dist/lib/engine-loaders.js +241 -0
- package/dist/lib/engine-orchestrator.d.ts +46 -0
- package/dist/lib/engine-orchestrator.js +1491 -0
- package/dist/lib/engine-primitives.d.ts +141 -0
- package/dist/lib/engine-primitives.js +748 -0
- package/dist/lib/orchestrate-engine.d.ts +14 -1
- package/dist/lib/orchestrate-engine.js +26 -4333
- package/dist/lib/plumber/extractor.d.ts +18 -0
- package/dist/lib/plumber/extractor.js +213 -0
- package/dist/lib/plumber/fixer.d.ts +75 -0
- package/dist/lib/plumber/fixer.js +165 -0
- package/dist/lib/plumber/index.d.ts +3 -0
- package/dist/lib/plumber/index.js +3 -0
- package/dist/lib/plumber/patcher.d.ts +44 -0
- package/dist/lib/plumber/patcher.js +210 -0
- package/dist/lib/preamble-provider-registry.d.ts +12 -0
- package/dist/lib/preamble-provider-registry.js +7 -0
- package/package.json +1 -1
|
@@ -56,4335 +56,29 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
56
56
|
};
|
|
57
57
|
})();
|
|
58
58
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
59
|
+
exports.assessTaskComplexity = exports.recordAgentScore = exports.resolveAgentDid = exports.resolveActiveSprintId = exports.persistCEODecisions = exports.normalizeReview = exports.compressContext = exports.callAnthropicCached = exports.routeCall = exports.log = exports.c = exports.localQAGate = exports.callLLM = void 0;
|
|
60
|
+
exports.postSprintSmokeTest = postSprintSmokeTest;
|
|
59
61
|
exports.runOrchestrator = main;
|
|
60
|
-
const fs_1 = require("fs");
|
|
61
|
-
const child_process_1 = require("child_process");
|
|
62
|
-
const crypto_1 = require("crypto");
|
|
63
62
|
const https = __importStar(require("https"));
|
|
64
63
|
const http = __importStar(require("http"));
|
|
65
|
-
// =====
|
|
66
|
-
// Captures ALL LLM tokens across every agent, supervisor, CEO, CTO call this run.
|
|
67
|
-
let _globalTokensThisRun = 0;
|
|
68
|
-
function _accumulateTokens(n) { _globalTokensThisRun += n; }
|
|
69
|
-
const _modelsUsedThisRun = new Map();
|
|
70
|
-
let _totalCostThisRun = 0;
|
|
71
|
-
function recordModelCall(provider, model, input_tokens, output_tokens, cost_usd) {
|
|
72
|
-
const key = model || 'unknown';
|
|
73
|
-
const existing = _modelsUsedThisRun.get(key);
|
|
74
|
-
if (existing) {
|
|
75
|
-
existing.calls += 1;
|
|
76
|
-
existing.input_tokens += input_tokens || 0;
|
|
77
|
-
existing.output_tokens += output_tokens || 0;
|
|
78
|
-
existing.cost_usd += cost_usd || 0;
|
|
79
|
-
}
|
|
80
|
-
else {
|
|
81
|
-
_modelsUsedThisRun.set(key, { calls: 1, input_tokens: input_tokens || 0, output_tokens: output_tokens || 0, cost_usd: cost_usd || 0, provider: provider || 'unknown' });
|
|
82
|
-
}
|
|
83
|
-
_totalCostThisRun += cost_usd || 0;
|
|
84
|
-
}
|
|
85
|
-
function getModelsUsedReport() {
|
|
86
|
-
const out = {};
|
|
87
|
-
for (const [model, e] of _modelsUsedThisRun.entries()) {
|
|
88
|
-
out[model] = { provider: e.provider, calls: e.calls, input_tokens: e.input_tokens, output_tokens: e.output_tokens, tokens: e.input_tokens + e.output_tokens, cost_usd: +e.cost_usd.toFixed(6) };
|
|
89
|
-
}
|
|
90
|
-
return out;
|
|
91
|
-
}
|
|
92
|
-
function getTotalCostUsd() { return _totalCostThisRun; }
|
|
93
|
-
// S64-002: Load .env relative to this file's directory (not process.cwd())
|
|
94
|
-
// Fixes supervisor ANTHROPIC_API_KEY missing when spawned from a different cwd (e.g., VPS path)
|
|
95
|
-
const mc_client_1 = require("./mc-client");
|
|
96
|
-
// V17: Local/cloud routing, wallet state, ByteRover memory
|
|
97
|
-
const ollama_client_1 = require("./ollama-client"); // availability check only — calls go through ClawRouter v2.0
|
|
98
|
-
// ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17)
|
|
99
|
-
// TICKET-215 Wave C: every LLM call routes through the core ModelRouter SEAM.
|
|
100
|
-
// The viem-backed router now lives in its OWN package (@kognai/clawrouter-x402);
|
|
101
|
-
// Kognai injects it into the zero-dep core slot at boot, so core never carries
|
|
102
|
-
// viem and a product that doesn't route on-chain simply doesn't inject it.
|
|
103
|
-
// TICKET-215 Wave D (split step 1): Kognai-local injection of the viem router +
|
|
104
|
-
// SCORE scorer into the core seams lives in one bootstrap module (side-effect import).
|
|
105
|
-
const model_router_registry_1 = require("./model-router-registry");
|
|
106
|
-
// Seam-backed bindings — existing call sites resolve through the injected router.
|
|
107
|
-
const routeCall = (req) => (0, model_router_registry_1.getModelRouter)().routeCall(req);
|
|
108
|
-
const getDailyCostDigest = () => (0, model_router_registry_1.getModelRouter)().getDailyCostDigest();
|
|
109
|
-
// Chomsky gate — prompt quality runner with retry + full JSONL logging (Sprint 1513)
|
|
110
|
-
const chomsky_runner_1 = require("./chomsky-runner");
|
|
111
|
-
// Sherlock v2 — ASMR-powered episodic memory retrieval for supervisor context (AMD-21-03)
|
|
112
|
-
const sherlock_memory_1 = require("./sherlock-memory");
|
|
113
|
-
// Legacy import kept for clawRouterIsAvailable() checks during transition
|
|
114
|
-
const clawrouter_client_1 = require("./clawrouter-client");
|
|
115
|
-
const local_model_router_1 = require("./local-model-router");
|
|
116
|
-
// CTO Approval Gate — every autonomous sprint reviewed before execution (Exec Protocol)
|
|
117
|
-
const cto_approval_gate_1 = require("./cto-approval-gate");
|
|
118
|
-
const citizenship_1 = require("./citizenship");
|
|
119
|
-
// TICKET-215 Wave D: citizen scoring via the core seam. The SCORE-backed scorer
|
|
120
|
-
// stays Kognai-local (out of zero-dep core) and is injected here; scoring goes
|
|
121
|
-
// through recordTaskScoreMonitored so every score emits a data.citizen_score event
|
|
122
|
-
// to the event-bus (kognai_events) — flowing back to Kognai monitoring + the Plumber.
|
|
123
|
-
const citizen_score_registry_1 = require("./citizen-score-registry");
|
|
124
|
-
/**
|
|
125
|
-
* Wire a supervisor's review into the SCORE protocol for the citizen that
|
|
126
|
-
* authored the task. No-op for agents not in the citizens registry yet
|
|
127
|
-
* (founding agents — CEO/sup/sherlock — aren't backfilled). Caller passes
|
|
128
|
-
* the agent slug; we look up the citizen record + DID.
|
|
129
|
-
*/
|
|
130
|
-
function recordScoreForCitizen(agent_name, sprint_id, task_id, grade, path) {
|
|
131
|
-
if (!grade || !['A', 'B', 'C', 'D', 'F'].includes(grade))
|
|
132
|
-
return; // legacy reviews w/o grade
|
|
133
|
-
const reg = (0, citizenship_1.readRegistry)();
|
|
134
|
-
const citizen = reg.citizens.find((c) => c.agent_name === agent_name);
|
|
135
|
-
if (!citizen)
|
|
136
|
-
return; // founding agent or unminted — skip silently
|
|
137
|
-
// Supervisor DID — single supervisor identity for now; can split per-pass later.
|
|
138
|
-
const supervisorDID = 'did:kognai:supervisor';
|
|
139
|
-
const result = (0, citizen_score_registry_1.recordTaskScoreMonitored)({
|
|
140
|
-
citizen_id: citizen.citizen_id,
|
|
141
|
-
agent_did: citizen.agent_did,
|
|
142
|
-
sprint_id,
|
|
143
|
-
task_id,
|
|
144
|
-
grade,
|
|
145
|
-
supervisor_did: supervisorDID,
|
|
146
|
-
});
|
|
147
|
-
log(c.gray, ` [SCORE] ${citizen.citizen_id} (${agent_name}): grade ${grade} → final ${result.final_score.toFixed(1)} (perf ${result.task_performance_score.toFixed(0)} × ${result.constitutional_multiplier})`);
|
|
148
|
-
}
|
|
149
|
-
const model_router_1 = require("./model-router");
|
|
150
|
-
const wallet_state_1 = require("./wallet-state");
|
|
151
|
-
// Sprint 652: BrainX episodic memory — swarm integration
|
|
152
|
-
const brainx_swarm_bridge_1 = require("./brainx-swarm-bridge");
|
|
153
|
-
const event_bus_publisher_1 = require("./event-bus-publisher");
|
|
154
|
-
const aar_middleware_1 = require("./aar-middleware");
|
|
155
|
-
const skill_crystalliser_1 = require("./skill-crystalliser");
|
|
156
|
-
// Sprint 703: Dynamic trust score updater
|
|
157
|
-
const trust_score_updater_1 = require("./trust-score-updater");
|
|
158
|
-
const code_asset_crystalliser_1 = require("./code-asset-crystalliser");
|
|
159
|
-
const monotask_state_machine_1 = require("./monotask-state-machine");
|
|
160
|
-
const code_failure_logger_1 = require("./code-failure-logger");
|
|
161
|
-
const failure_library_1 = require("./failure-library");
|
|
162
|
-
// sprint-1566 F0: token-budget pre-flight validator (replaces orphaned agents/token-budget-validator/)
|
|
163
|
-
const token_budget_validator_1 = require("./token-budget-validator");
|
|
164
|
-
// sprint-1566 F0d: decomposer feedback loop — route rejection back to a structural re-split
|
|
165
|
-
const decomposer_feedback_1 = require("./decomposer-feedback");
|
|
166
|
-
// sprint-1566 F3+F0e: per-model cost computation + wallet ledger writes
|
|
167
|
-
const llm_cost_table_1 = require("./llm-cost-table");
|
|
168
|
-
const ceo_wallet_1 = require("./ceo-wallet");
|
|
169
|
-
// OMEL AMD-13: Phantom Workspace — isolated tmpdir per task, prevents cross-task file bleed
|
|
170
|
-
const phantom_workspace_1 = require("./omel/phantom-workspace");
|
|
171
|
-
// OMEL AMD-13: Credential Vault — controlled secret access, never logs values
|
|
64
|
+
// ===== entry shell: re-export surface (back-compat for the split modules) + main()/runOrchestrator =====
|
|
172
65
|
const credential_vault_1 = require("./omel/credential-vault");
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
};
|
|
190
|
-
function normalizeReview(raw) {
|
|
191
|
-
const r = raw;
|
|
192
|
-
// If LLM returned a letter grade, derive score from it (authoritative).
|
|
193
|
-
// If only numeric score returned (legacy / parse failure), keep as-is.
|
|
194
|
-
if (r.grade && GRADE_TO_SCORE[r.grade] !== undefined) {
|
|
195
|
-
r.score = GRADE_TO_SCORE[r.grade];
|
|
196
|
-
}
|
|
197
|
-
return r;
|
|
198
|
-
}
|
|
199
|
-
// ===== Colors =====
|
|
200
|
-
const c = {
|
|
201
|
-
reset: '\x1b[0m', bold: '\x1b[1m',
|
|
202
|
-
red: '\x1b[31m', green: '\x1b[32m', yellow: '\x1b[33m',
|
|
203
|
-
blue: '\x1b[34m', magenta: '\x1b[35m', cyan: '\x1b[36m', gray: '\x1b[90m',
|
|
204
|
-
};
|
|
205
|
-
function log(color, msg) {
|
|
206
|
-
console.log(`${color}${msg}${c.reset}`);
|
|
207
|
-
}
|
|
208
|
-
// ===== Safe reset helper =====
|
|
209
|
-
// Replaces bare `git reset --hard HEAD~1`. Verifies the last commit is the
|
|
210
|
-
// orchestrator's own (matches the expected `feat(<agent>): <id> - <type>`
|
|
211
|
-
// pattern) before reverting. Skips the reset if HEAD has moved on to someone
|
|
212
|
-
// else's commit — protects against wiping work from concurrent Claude sessions
|
|
213
|
-
// or human commits made while the orchestrator was running.
|
|
214
|
-
function safeResetLastCommit(taskId, agentName, taskType, indent = ' ') {
|
|
215
|
-
const expectedMsg = `feat(${agentName ?? 'coder'}): ${taskId} - ${taskType ?? 'feature'}`;
|
|
216
|
-
try {
|
|
217
|
-
const lastMsg = (0, child_process_1.execSync)('git log -1 --format=%s', { timeout: 5000 }).toString().trim();
|
|
218
|
-
if (lastMsg !== expectedMsg) {
|
|
219
|
-
log(c.yellow, `${indent}! Reset skipped — HEAD is "${lastMsg.substring(0, 60)}", not our commit. Working tree left as-is to protect concurrent work.`);
|
|
220
|
-
return false;
|
|
221
|
-
}
|
|
222
|
-
(0, child_process_1.execSync)('git reset --hard HEAD~1', { timeout: 10000 });
|
|
223
|
-
log(c.gray, `${indent}Reset to previous commit (dropped rejected code)`);
|
|
224
|
-
return true;
|
|
225
|
-
}
|
|
226
|
-
catch (err) {
|
|
227
|
-
log(c.gray, `${indent}Reset skipped: ${(err.message || '').substring(0, 80)}`);
|
|
228
|
-
return false;
|
|
229
|
-
}
|
|
230
|
-
}
|
|
231
|
-
// ===== ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17) =====
|
|
232
|
-
// ALL LLM calls route through routeCall() from clawrouter-v2.ts.
|
|
233
|
-
// Direct API calls to Anthropic, OpenAI, MiniMax, or Ollama are Sev-1 violations.
|
|
234
|
-
// The old provider-based callLLM() is replaced with a unified gateway that maps
|
|
235
|
-
// legacy provider+model pairs to ClawRouter v2.0 tier_class+complexity.
|
|
236
|
-
// Track direct_api_violations for sprint JSON (§17.6)
|
|
237
|
-
let _directApiViolations = 0;
|
|
238
|
-
let _llmCallsRouted = 0;
|
|
239
|
-
let _apexCalls = 0;
|
|
240
|
-
let _apexJudgePatternCompliant = true;
|
|
241
|
-
/**
|
|
242
|
-
* Unified LLM gateway — routes ALL calls through ClawRouter v2.0.
|
|
243
|
-
* Legacy provider parameter is mapped to ClawRouter tier/complexity:
|
|
244
|
-
* - 'ollama' / 'local' → T0-T2 (local Ollama, $0)
|
|
245
|
-
* - 'clawrouter' → T2.5 EXEC (cloud gateway)
|
|
246
|
-
* - 'anthropic' (Sonnet) → T3 APEX (constitutional decisions only)
|
|
247
|
-
* - 'anthropic' (Haiku) → T2.5 EXEC
|
|
248
|
-
* - 'openai' → T2.5 EXEC
|
|
249
|
-
* - 'minimax' → T2.5 EXEC (via ClawRouter)
|
|
250
|
-
*
|
|
251
|
-
* NOTE: The provider parameter is retained for backward compatibility but
|
|
252
|
-
* ALL routing decisions are made by ClawRouter v2.0. No direct API calls.
|
|
253
|
-
*/
|
|
254
|
-
async function callLLM(provider, model, systemPrompt, userPrompt, timeoutMs = 300000, agentId = 'orchestrator', taskType = 'orchestrator_call') {
|
|
255
|
-
_llmCallsRouted++;
|
|
256
|
-
// Map legacy provider+model to ClawRouter v2.0 request
|
|
257
|
-
const req = {
|
|
258
|
-
task_type: taskType,
|
|
259
|
-
tier_class: 'text',
|
|
260
|
-
complexity: mapLegacyToComplexity(provider, model),
|
|
261
|
-
context_tokens: Math.ceil((systemPrompt.length + userPrompt.length) / 4),
|
|
262
|
-
constitutional_flag: isConstitutionalCall(provider, model),
|
|
263
|
-
agent_id: agentId,
|
|
264
|
-
payload: {
|
|
265
|
-
system: systemPrompt,
|
|
266
|
-
prompt: userPrompt,
|
|
267
|
-
max_tokens: 16000,
|
|
268
|
-
},
|
|
269
|
-
};
|
|
270
|
-
// Track APEX calls for §17.6
|
|
271
|
-
if (req.constitutional_flag || req.complexity === 'apex') {
|
|
272
|
-
_apexCalls++;
|
|
273
|
-
}
|
|
274
|
-
// Proactive provider-budget check (PR #18 reactive fallback's preventive twin).
|
|
275
|
-
// If <PROVIDER>_MONTHLY_BUDGET_USD env is set, check month-to-date spend
|
|
276
|
-
// against the cap BEFORE attempting the call. Status 'frozen' (>=95%) skips
|
|
277
|
-
// the call entirely and goes straight to the fallback path — avoids burning
|
|
278
|
-
// a credit-exhaustion error to learn the same thing. 'warning' (>=80%)
|
|
279
|
-
// alerts once per process lifetime but still attempts the call.
|
|
280
|
-
// No env set → status 'unmonitored' → no proactive check (today's behavior).
|
|
281
|
-
if (provider !== 'clawrouter' && provider !== 'ollama') {
|
|
282
|
-
const budget = (0, ceo_wallet_1.getProviderBudgetStatus)(provider);
|
|
283
|
-
if (budget.status === 'frozen') {
|
|
284
|
-
log(c.yellow, ` [budget-guard] ${provider} ${budget.pct.toFixed(0)}% of $${budget.budget_usd} cap → skipping to fallback (CEO-wallet-funded) without trying upstream`);
|
|
285
|
-
_maybeAlertBudget(provider, 'frozen', budget);
|
|
286
|
-
try {
|
|
287
|
-
const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_budget_proactive_fallback_from_${provider}`);
|
|
288
|
-
fallbackResp.fallback_used = true;
|
|
289
|
-
fallbackResp.fallback_from_provider = provider;
|
|
290
|
-
fallbackResp.fallback_reason = 'budget_frozen';
|
|
291
|
-
return fallbackResp;
|
|
292
|
-
}
|
|
293
|
-
catch (fallbackErr) {
|
|
294
|
-
log(c.red, ` [budget-guard] fallback also failed: ${fallbackErr.message} — proceeding to attempt original provider as last resort`);
|
|
295
|
-
// Fall through to the normal try — better to attempt + handle the error reactively
|
|
296
|
-
// than to leave the caller with nothing
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
else if (budget.status === 'warning') {
|
|
300
|
-
_maybeAlertBudget(provider, 'warning', budget);
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
try {
|
|
304
|
-
const result = await routeCall(req);
|
|
305
|
-
// sprint-1566 F3+F0e: clawrouter's wallet billing only fires on the
|
|
306
|
-
// x402-retry path (cost_usd != 0). The common direct path returns
|
|
307
|
-
// cost_usd=0 and the ledger stays empty. Compute cost from real tokens
|
|
308
|
-
// here using the per-model rate table + call deductCost so the ledger
|
|
309
|
-
// becomes the source of truth.
|
|
310
|
-
// Codex P2 (PR #9): record actual provider derived from result.model,
|
|
311
|
-
// not the caller's intent (provider param). The real routed-model name
|
|
312
|
-
// comes back in result.model and we infer the provider from it.
|
|
313
|
-
const callerModel = result.model || model;
|
|
314
|
-
// Codex P2 on PR #15: inferProvider returns the literal 'unknown' for
|
|
315
|
-
// unclassified models; treat that as a miss and fall back to the
|
|
316
|
-
// caller-intent provider so the ledger never records the string 'unknown'.
|
|
317
|
-
const inferred = inferProvider(callerModel);
|
|
318
|
-
const realProvider = (inferred && inferred !== 'unknown') ? inferred : provider;
|
|
319
|
-
const inputTokens = result.input_tokens || 0;
|
|
320
|
-
const outputTokens = result.output_tokens || 0;
|
|
321
|
-
const computed = (0, llm_cost_table_1.computeCost)(callerModel, inputTokens, outputTokens);
|
|
322
|
-
// Prefer real billed cost (x402 path) over our estimate
|
|
323
|
-
const costUsd = (result.cost_usd && result.cost_usd > 0) ? result.cost_usd : computed;
|
|
324
|
-
try {
|
|
325
|
-
if (costUsd > 0)
|
|
326
|
-
(0, ceo_wallet_1.deductCost)(costUsd, agentId, taskType, realProvider, callerModel);
|
|
327
|
-
recordModelCall(realProvider, callerModel, inputTokens, outputTokens, costUsd);
|
|
328
|
-
}
|
|
329
|
-
catch { /* recording failure must never break the LLM call */ }
|
|
330
|
-
const response = {
|
|
331
|
-
choices: [{ message: { content: result.content } }],
|
|
332
|
-
usage: { total_tokens: inputTokens + outputTokens, input_tokens: inputTokens, output_tokens: outputTokens },
|
|
333
|
-
provider: realProvider,
|
|
334
|
-
model: callerModel,
|
|
335
|
-
cost_usd: costUsd,
|
|
336
|
-
};
|
|
337
|
-
_accumulateTokens(response.usage?.total_tokens || 0);
|
|
338
|
-
return response;
|
|
339
|
-
}
|
|
340
|
-
catch (err) {
|
|
341
|
-
const msg = String(err?.message || err);
|
|
342
|
-
// Provider-credit-exhaustion fallback: the CEO wallet is supposed to keep
|
|
343
|
-
// the swarm alive via x402, but Anthropic (and other direct-API providers)
|
|
344
|
-
// bypass that — they bill against a separate account balance the CEO wallet
|
|
345
|
-
// can't see. When that external balance hits zero we used to crash with
|
|
346
|
-
// exit-null mid-task (incident 2026-05-21). Now we downgrade to a
|
|
347
|
-
// CEO-wallet-funded provider (DeepSeek via ClawRouter, x402-enabled) and
|
|
348
|
-
// continue. The sprint completes at lower quality instead of crashing.
|
|
349
|
-
if (isCreditExhaustion(msg) && provider !== 'clawrouter') {
|
|
350
|
-
log(c.yellow, ` [fallback] ${provider} credit exhausted → downgrading to clawrouter/deepseek (alerting founder)`);
|
|
351
|
-
try {
|
|
352
|
-
alertCreditExhaustion(provider, msg, agentId, taskType);
|
|
353
|
-
}
|
|
354
|
-
catch { /* alert failure must never block */ }
|
|
355
|
-
try {
|
|
356
|
-
const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_fallback_from_${provider}`);
|
|
357
|
-
fallbackResp.fallback_used = true;
|
|
358
|
-
fallbackResp.fallback_from_provider = provider;
|
|
359
|
-
return fallbackResp;
|
|
360
|
-
}
|
|
361
|
-
catch (fallbackErr) {
|
|
362
|
-
log(c.red, ` [fallback] downgrade also failed: ${fallbackErr.message} — re-throwing original`);
|
|
363
|
-
throw err;
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
log(c.red, ` [ClawRouter] Call failed: ${err.message}`);
|
|
367
|
-
throw err;
|
|
368
|
-
}
|
|
369
|
-
}
|
|
370
|
-
/**
|
|
371
|
-
* Heuristic check for provider-credit / quota / billing exhaustion in an error
|
|
372
|
-
* message. Conservative — matches only specific upstream-provider markers that
|
|
373
|
-
* consistently indicate "stop trying this provider, switch to a CEO-wallet-
|
|
374
|
-
* funded one." Does NOT match:
|
|
375
|
-
* - generic 429 rate limits (transient, should retry same provider)
|
|
376
|
-
* - bare 'payment required' / '402' strings (codex P2 on PR #17: would
|
|
377
|
-
* mis-classify ClawRouter's own missing-X402_WALLET_KEY error, which
|
|
378
|
-
* throws "...402 (payment required)..." per scripts/lib/clawrouter-v2.ts,
|
|
379
|
-
* and trigger a misleading "top up <provider>" alert when the real cause
|
|
380
|
-
* is local config, not external provider credits)
|
|
381
|
-
*
|
|
382
|
-
* Specific markers below all come from Anthropic / OpenAI / MiniMax error
|
|
383
|
-
* payloads when their account balance hits zero.
|
|
384
|
-
*/
|
|
385
|
-
function isCreditExhaustion(msg) {
|
|
386
|
-
const m = (msg || '').toLowerCase();
|
|
387
|
-
// Exclude local-config errors first — these throw 402 strings but aren't
|
|
388
|
-
// upstream-provider credit exhaustion.
|
|
389
|
-
if (m.includes('x402_wallet_key') || m.includes('wallet key') || m.includes('missing wallet'))
|
|
390
|
-
return false;
|
|
391
|
-
return (m.includes('insufficient credit') ||
|
|
392
|
-
m.includes('insufficient balance') ||
|
|
393
|
-
m.includes('credit balance is too low') ||
|
|
394
|
-
m.includes('credit_balance') ||
|
|
395
|
-
m.includes('quota exceeded') ||
|
|
396
|
-
m.includes('quota_exceeded') ||
|
|
397
|
-
m.includes('billing_hard_limit') ||
|
|
398
|
-
m.includes('insufficient_quota') ||
|
|
399
|
-
(m.includes('401') && (m.includes('credit') || m.includes('billing'))));
|
|
400
|
-
}
|
|
401
|
-
/** Fire-and-forget Telegram alert when a fallback fires. Best-effort; never blocks. */
|
|
402
|
-
function alertCreditExhaustion(failedProvider, errMsg, agentId, taskType) {
|
|
403
|
-
const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
|
|
404
|
-
const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
|
|
405
|
-
if (!botToken || groupIds.length === 0)
|
|
406
|
-
return;
|
|
407
|
-
const text = `⚠️ ${failedProvider} credit exhausted — fallback to DeepSeek via ClawRouter for this call\n\n` +
|
|
408
|
-
`agent: ${agentId}\n` +
|
|
409
|
-
`task_type: ${taskType}\n` +
|
|
410
|
-
`error: ${errMsg.slice(0, 200)}\n\n` +
|
|
411
|
-
`Top up the ${failedProvider} account to restore full quality. The swarm continues at degraded quality on CEO-wallet-funded providers in the meantime.`;
|
|
412
|
-
_sendTelegramAlert(botToken, groupIds, text);
|
|
413
|
-
}
|
|
414
|
-
// Throttle proactive budget alerts: once per (provider, status) per process lifetime.
|
|
415
|
-
// Avoids spamming the chat when every Anthropic call in a row trips the same threshold.
|
|
416
|
-
const _budgetAlertsSent = new Set();
|
|
417
|
-
function _maybeAlertBudget(provider, status, report) {
|
|
418
|
-
const key = `${provider.toLowerCase()}:${status}`;
|
|
419
|
-
if (_budgetAlertsSent.has(key))
|
|
420
|
-
return;
|
|
421
|
-
_budgetAlertsSent.add(key);
|
|
422
|
-
const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
|
|
423
|
-
const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
|
|
424
|
-
if (!botToken || groupIds.length === 0)
|
|
425
|
-
return;
|
|
426
|
-
const emoji = status === 'frozen' ? '🔴' : '🟡';
|
|
427
|
-
const verb = status === 'frozen' ? 'FROZEN — routing to fallback' : 'WARNING — still attempting';
|
|
428
|
-
const text = `${emoji} ${provider} monthly budget ${verb}\n\n` +
|
|
429
|
-
`spent: $${report.spent_month_usd.toFixed(4)} / $${report.budget_usd?.toFixed(2)} (${report.pct.toFixed(0)}%)\n` +
|
|
430
|
-
`threshold: ${status === 'frozen' ? '95%' : '80%'}\n\n` +
|
|
431
|
-
`${status === 'frozen'
|
|
432
|
-
? 'New ' + provider + ' calls go straight to DeepSeek (CEO-wallet-funded) until budget reset on month rollover OR ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD raised.'
|
|
433
|
-
: 'Top up ' + provider + ' account or raise ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD to avoid hitting 95% freeze.'}`;
|
|
434
|
-
_sendTelegramAlert(botToken, groupIds, text);
|
|
435
|
-
}
|
|
436
|
-
// Shared Telegram send helper — fire-and-forget, native https, never blocks.
|
|
437
|
-
function _sendTelegramAlert(botToken, groupIds, text) {
|
|
438
|
-
for (const chatId of groupIds) {
|
|
439
|
-
const body = JSON.stringify({ chat_id: parseInt(chatId, 10), text });
|
|
440
|
-
const req = require('https').request({
|
|
441
|
-
hostname: 'api.telegram.org',
|
|
442
|
-
path: `/bot${botToken}/sendMessage`,
|
|
443
|
-
method: 'POST',
|
|
444
|
-
headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) },
|
|
445
|
-
timeout: 5000,
|
|
446
|
-
});
|
|
447
|
-
req.on('error', () => { });
|
|
448
|
-
req.write(body);
|
|
449
|
-
req.end();
|
|
450
|
-
}
|
|
451
|
-
}
|
|
452
|
-
/**
|
|
453
|
-
* Derive the real LLM provider from the model name returned by ClawRouter.
|
|
454
|
-
* The orchestrator's caller passes a `provider` hint, but ClawRouter may
|
|
455
|
-
* route to a different actual model. This function gives us truth for the
|
|
456
|
-
* wallet ledger so Bloomberg can produce honest per-provider reports.
|
|
457
|
-
*
|
|
458
|
-
* Returns 'unknown' if the model can't be classified; callers fall back to
|
|
459
|
-
* the caller-intent provider in that case.
|
|
460
|
-
*/
|
|
461
|
-
function inferProvider(model) {
|
|
462
|
-
const m = (model || '').toLowerCase();
|
|
463
|
-
if (!m)
|
|
464
|
-
return 'unknown';
|
|
465
|
-
if (m.includes('sonnet') || m.includes('haiku') || m.includes('opus') || m.startsWith('claude-'))
|
|
466
|
-
return 'anthropic';
|
|
467
|
-
if (m.includes('minimax'))
|
|
468
|
-
return 'minimax';
|
|
469
|
-
if (m.includes('deepseek'))
|
|
470
|
-
return 'deepseek';
|
|
471
|
-
if (m.includes('qwen'))
|
|
472
|
-
return 'qwen';
|
|
473
|
-
if (m.includes('gpt-') || m.startsWith('o1-') || m.startsWith('o3-') || m.includes('codex'))
|
|
474
|
-
return 'openai';
|
|
475
|
-
if (m.includes('grok'))
|
|
476
|
-
return 'xai';
|
|
477
|
-
if (m.includes('gemini') || m.includes('flash'))
|
|
478
|
-
return 'google';
|
|
479
|
-
return 'unknown';
|
|
480
|
-
}
|
|
481
|
-
/**
|
|
482
|
-
* Map legacy provider+model pairs to ClawRouter v2.0 complexity levels.
|
|
483
|
-
* This preserves the existing routing intelligence while funneling through the gateway.
|
|
484
|
-
*/
|
|
485
|
-
function mapLegacyToComplexity(provider, model) {
|
|
486
|
-
// Local models → stay local
|
|
487
|
-
if (provider === 'ollama') {
|
|
488
|
-
if (model.includes('0.6b'))
|
|
489
|
-
return 'nano';
|
|
490
|
-
if (model.includes('4b'))
|
|
491
|
-
return 'local';
|
|
492
|
-
return 'power'; // qwen3:14b, deepseek-r1:14b
|
|
493
|
-
}
|
|
494
|
-
// Anthropic Sonnet → APEX (constitutional)
|
|
495
|
-
if (provider === 'anthropic' && model.includes('sonnet'))
|
|
496
|
-
return 'apex';
|
|
497
|
-
// Anthropic Haiku → EXEC (cloud, not constitutional)
|
|
498
|
-
if (provider === 'anthropic' && model.includes('haiku'))
|
|
499
|
-
return 'exec';
|
|
500
|
-
// ClawRouter DeepSeek → EXEC
|
|
501
|
-
if (provider === 'clawrouter')
|
|
502
|
-
return 'exec';
|
|
503
|
-
// MiniMax → EXEC (cloud)
|
|
504
|
-
if (provider === 'minimax')
|
|
505
|
-
return 'exec';
|
|
506
|
-
// OpenAI → EXEC
|
|
507
|
-
if (provider === 'openai')
|
|
508
|
-
return 'exec';
|
|
509
|
-
// Default → POWER (local)
|
|
510
|
-
return 'power';
|
|
511
|
-
}
|
|
512
|
-
/** Detect if a call is constitutional (requires T3 APEX / Claude Sonnet) */
|
|
513
|
-
function isConstitutionalCall(provider, model) {
|
|
514
|
-
return provider === 'anthropic' && model.includes('sonnet');
|
|
515
|
-
}
|
|
516
|
-
// Legacy aliases — these are now thin wrappers that route through callLLM()
|
|
517
|
-
// They exist so that call sites like callAnthropicCached() don't need immediate rewriting.
|
|
518
|
-
// All direct API calls are eliminated — every call goes through ClawRouter v2.0.
|
|
519
|
-
async function callAnthropicCached(model, systemPrompt, userPrompt, timeoutMs) {
|
|
520
|
-
// Prompt caching is now handled by ClawRouter v2.0 (QCG layer)
|
|
521
|
-
return callLLM('anthropic', model, systemPrompt, userPrompt, timeoutMs);
|
|
522
|
-
}
|
|
523
|
-
/** Get sprint-level ClawRouter metrics for §17.6 sprint JSON fields */
|
|
524
|
-
function getClawRouterSprintMetrics() {
|
|
525
|
-
return {
|
|
526
|
-
llm_calls_routed: _llmCallsRouted,
|
|
527
|
-
direct_api_violations: _directApiViolations,
|
|
528
|
-
apex_calls: _apexCalls,
|
|
529
|
-
apex_judge_pattern_compliant: _apexJudgePatternCompliant,
|
|
530
|
-
};
|
|
531
|
-
}
|
|
532
|
-
// B.9: Nano classifier — uses T0 NANO (qwen3:0.6b) via ClawRouter v2.0
|
|
533
|
-
async function classifyTaskSmart(prompt) {
|
|
534
|
-
const regexType = (0, model_router_1.classifyTask)(prompt);
|
|
535
|
-
if (regexType !== 'util')
|
|
536
|
-
return regexType; // regex was confident
|
|
537
|
-
try {
|
|
538
|
-
const classifyPrompt = `Classify this task into exactly one category. Reply with ONLY the category name, nothing else.
|
|
539
|
-
Categories: code, reason, lang, util, audit, content, data, refactor-complex, agent-framework, codebase-scan
|
|
540
|
-
|
|
541
|
-
Task: ${prompt.substring(0, 300)}`;
|
|
542
|
-
const result = await routeCall({
|
|
543
|
-
task_type: 'nano_classify', tier_class: 'text', complexity: 'nano',
|
|
544
|
-
context_tokens: Math.ceil(classifyPrompt.length / 4), constitutional_flag: false,
|
|
545
|
-
agent_id: 'nano-classifier',
|
|
546
|
-
payload: { prompt: classifyPrompt, max_tokens: 20 },
|
|
547
|
-
});
|
|
548
|
-
const nano = result.content.trim().toLowerCase().split(/\s/)[0];
|
|
549
|
-
const valid = ['code', 'reason', 'lang', 'util', 'audit', 'content', 'data', 'refactor-complex', 'agent-framework', 'codebase-scan'];
|
|
550
|
-
return valid.includes(nano) ? nano : regexType;
|
|
551
|
-
}
|
|
552
|
-
catch {
|
|
553
|
-
return regexType;
|
|
554
|
-
}
|
|
555
|
-
}
|
|
556
|
-
// B.12: Context compression using T1 LOCAL (qwen3:4b) via ClawRouter v2.0 — reduces cloud token spend 70-80%
|
|
557
|
-
async function compressContext(context) {
|
|
558
|
-
if (context.length < 1200)
|
|
559
|
-
return context; // not worth compressing
|
|
560
|
-
try {
|
|
561
|
-
const compressPrompt = `Compress the following task context to under 600 words. Preserve all file paths, function names, technical requirements, and acceptance criteria. Remove prose filler and redundant explanations.\n\n${context.substring(0, 4000)}`;
|
|
562
|
-
const result = await routeCall({
|
|
563
|
-
task_type: 'qcg_compress', tier_class: 'text', complexity: 'local',
|
|
564
|
-
context_tokens: Math.ceil(compressPrompt.length / 4), constitutional_flag: false,
|
|
565
|
-
agent_id: 'context-compressor',
|
|
566
|
-
payload: { prompt: compressPrompt, max_tokens: 800 },
|
|
567
|
-
});
|
|
568
|
-
const compressed = result.content.trim();
|
|
569
|
-
if (compressed.length > 100 && compressed.length < context.length * 0.9) {
|
|
570
|
-
log(c.gray, ` [compress] ${context.length} → ${compressed.length} chars (${Math.round(compressed.length / context.length * 100)}%)`);
|
|
571
|
-
return compressed;
|
|
572
|
-
}
|
|
573
|
-
}
|
|
574
|
-
catch { /* non-fatal */ }
|
|
575
|
-
return context;
|
|
576
|
-
}
|
|
577
|
-
// B.10: Local QA gate — structural checks only (no LLM — qwen3 think-mode unreliable for PASS/FAIL)
|
|
578
|
-
// LLM-based QA deferred to Claude supervisor review which gives structured feedback.
|
|
579
|
-
// Markers that, when repeated, indicate the agent's chain-of-thought was
|
|
580
|
-
// captured as the file body (the "rumination" failure mode — sprint smoke
|
|
581
|
-
// 2026-05-07). These are phrases a coder agent says to itself while deciding
|
|
582
|
-
// what to output; they should never appear in a deliverable file.
|
|
583
|
-
const RUMINATION_MARKERS = [
|
|
584
|
-
'let me think', 'the problem says', 'but the problem says', 'however, the problem',
|
|
585
|
-
'so we output', 'therefore the answer is', 'we output nothing', 'we output the empty',
|
|
586
|
-
'so the answer is', 'but to be safe', 'but note:', 'but to be precise',
|
|
587
|
-
'so i will output', 'let me decide', 'but to be precise', 'final answer:',
|
|
588
|
-
'final decision:', 'i will output', 'should i output', 'let me re-read',
|
|
589
|
-
];
|
|
590
|
-
function detectRumination(content) {
|
|
591
|
-
const lower = content.toLowerCase();
|
|
592
|
-
let hits = 0;
|
|
593
|
-
for (const marker of RUMINATION_MARKERS) {
|
|
594
|
-
// Count overlapping occurrences with a simple split — cheap, good enough.
|
|
595
|
-
hits += lower.split(marker).length - 1;
|
|
596
|
-
}
|
|
597
|
-
const wordCount = Math.max(1, content.split(/\s+/).filter(Boolean).length);
|
|
598
|
-
return { hits, ratio: hits / wordCount };
|
|
599
|
-
}
|
|
600
|
-
async function localQAGate(_task, fileContents) {
|
|
601
|
-
// Fail only on structurally empty files (< 50 chars indicates the model returned nothing useful)
|
|
602
|
-
const emptyFiles = fileContents.filter(f => (f.content || '').trim().length < 50);
|
|
603
|
-
if (emptyFiles.length > 0) {
|
|
604
|
-
return { pass: false, reason: `Files too short/empty: ${emptyFiles.map(f => f.path).join(', ')}` };
|
|
605
|
-
}
|
|
606
|
-
// Fail if all files are missing from disk (write step silently failed)
|
|
607
|
-
const { existsSync: _exists } = await Promise.resolve().then(() => __importStar(require('fs')));
|
|
608
|
-
const missingFiles = fileContents.filter(f => !_exists(f.path));
|
|
609
|
-
if (missingFiles.length > 0) {
|
|
610
|
-
return { pass: false, reason: `Files not written to disk: ${missingFiles.map(f => f.path).join(', ')}` };
|
|
611
|
-
}
|
|
612
|
-
// Rumination guard — catch the chain-of-thought-leaked-into-file failure
|
|
613
|
-
// mode. Threshold: ≥3 distinct rumination phrases AND ≥0.5% of words are
|
|
614
|
-
// rumination markers (catches both small and large dumps; a doc that
|
|
615
|
-
// legitimately uses one such phrase once is fine).
|
|
616
|
-
for (const f of fileContents) {
|
|
617
|
-
const { hits, ratio } = detectRumination(f.content || '');
|
|
618
|
-
if (hits >= 3 && ratio >= 0.005) {
|
|
619
|
-
return {
|
|
620
|
-
pass: false,
|
|
621
|
-
reason: `Rumination detected in ${f.path}: ${hits} chain-of-thought markers (${(ratio * 100).toFixed(2)}% of words). The agent dumped its reasoning into the file body instead of outputting just the file content.`,
|
|
622
|
-
};
|
|
623
|
-
}
|
|
624
|
-
}
|
|
625
|
-
// TICKET-085: deterministic syntactic typecheck for .ts/.tsx changes
|
|
626
|
-
// before LLM review. Catches broken syntax / unresolvable imports cheaply
|
|
627
|
-
// so we don't burn supervisor tokens on uncompileable code. Project-wide
|
|
628
|
-
// type errors are NOT caught (that's the Vercel build's job) — this is
|
|
629
|
-
// just the fast gate: does the file parse + can its imports be resolved.
|
|
630
|
-
const tsFiles = fileContents.filter(f => /\.(ts|tsx)$/.test(f.path));
|
|
631
|
-
if (tsFiles.length > 0) {
|
|
632
|
-
const tcResult = await typecheckChangedFiles(tsFiles.map(f => f.path));
|
|
633
|
-
if (!tcResult.pass) {
|
|
634
|
-
return {
|
|
635
|
-
pass: false,
|
|
636
|
-
reason: `Typecheck failed (${tcResult.errorCount} error(s)): ${tcResult.firstError}`,
|
|
637
|
-
};
|
|
638
|
-
}
|
|
639
|
-
}
|
|
640
|
-
return { pass: true, reason: `${fileContents.length} file(s) non-empty + no rumination + typecheck PASS — proceeding to supervisor review` };
|
|
641
|
-
}
|
|
642
|
-
// TICKET-085 (v2 — TICKET-088 fix): project-aware typecheck. v1 used
|
|
643
|
-
// loose-file mode + `npx -y typescript@5 tsc` and silently passed
|
|
644
|
-
// EVERYTHING because the npx invocation produced no tsc output and exit 0
|
|
645
|
-
// (npm "could not determine executable") — gate was a no-op for ~24h.
|
|
646
|
-
//
|
|
647
|
-
// v2: find the nearest tsconfig.json walking UP from each changed file,
|
|
648
|
-
// run `tsc -p <tsconfig> --noEmit --incremental` per project. Incremental
|
|
649
|
-
// build cache (.tsbuildinfo) makes subsequent runs fast (~2-5s typical).
|
|
650
|
-
// Catches both syntactic errors AND cross-file type/import errors —
|
|
651
|
-
// including the "imports from a non-existent file" class that bit us
|
|
652
|
-
// when sprint-1581 + sprint-1582 shipped hallucinated component graphs.
|
|
653
|
-
async function typecheckChangedFiles(filePaths) {
|
|
654
|
-
const { execSync } = await Promise.resolve().then(() => __importStar(require('child_process')));
|
|
655
|
-
const { existsSync } = await Promise.resolve().then(() => __importStar(require('fs')));
|
|
656
|
-
const { join, dirname, resolve } = await Promise.resolve().then(() => __importStar(require('path')));
|
|
657
|
-
// Find nearest tsconfig.json walking up from a file path.
|
|
658
|
-
// Returns null if walked all the way to / without finding one.
|
|
659
|
-
function findTsconfig(filePath) {
|
|
660
|
-
let dir = dirname(resolve(filePath));
|
|
661
|
-
for (let i = 0; i < 12; i++) {
|
|
662
|
-
const candidate = join(dir, 'tsconfig.json');
|
|
663
|
-
if (existsSync(candidate))
|
|
664
|
-
return candidate;
|
|
665
|
-
const parent = dirname(dir);
|
|
666
|
-
if (parent === dir)
|
|
667
|
-
break;
|
|
668
|
-
dir = parent;
|
|
669
|
-
}
|
|
670
|
-
return null;
|
|
671
|
-
}
|
|
672
|
-
// Group changed files by which tsconfig governs them.
|
|
673
|
-
const projects = new Set();
|
|
674
|
-
let filesWithoutProject = 0;
|
|
675
|
-
for (const f of filePaths) {
|
|
676
|
-
const tc = findTsconfig(f);
|
|
677
|
-
if (tc)
|
|
678
|
-
projects.add(tc);
|
|
679
|
-
else
|
|
680
|
-
filesWithoutProject++;
|
|
681
|
-
}
|
|
682
|
-
if (projects.size === 0) {
|
|
683
|
-
return { pass: true, errorCount: 0, firstError: `no tsconfig found for ${filesWithoutProject} file(s) — skipping` };
|
|
684
|
-
}
|
|
685
|
-
// Pick a local tsc binary. Prefer the repo's installed copy (fast, no
|
|
686
|
-
// network). Fall back to npx-with-explicit-package only if not present.
|
|
687
|
-
const localTsc = join(process.cwd(), 'node_modules', '.bin', 'tsc');
|
|
688
|
-
const tscCmd = existsSync(localTsc)
|
|
689
|
-
? `"${localTsc}"`
|
|
690
|
-
: `npx -y --package=typescript@5 tsc`;
|
|
691
|
-
for (const tsconfig of projects) {
|
|
692
|
-
try {
|
|
693
|
-
execSync(`${tscCmd} -p "${tsconfig}" --noEmit --incremental`, {
|
|
694
|
-
encoding: 'utf-8', timeout: 120_000, stdio: 'pipe', cwd: process.cwd(),
|
|
695
|
-
});
|
|
696
|
-
}
|
|
697
|
-
catch (e) {
|
|
698
|
-
const out = (e.stdout || '') + (e.stderr || '');
|
|
699
|
-
const lines = out.split('\n').filter((l) => /error TS\d+/i.test(l));
|
|
700
|
-
if (lines.length === 0) {
|
|
701
|
-
// Tooling failure (timeout, tsc not found, OOM). Don't block pipeline
|
|
702
|
-
// on infra; supervisor still has a shot at catching substantive bugs.
|
|
703
|
-
console.warn(`[typecheck-gate] tooling failure on ${tsconfig}: ${(e.message || '').slice(0, 120)}`);
|
|
704
|
-
continue; // try next project
|
|
705
|
-
}
|
|
706
|
-
// Filter errors to JUST the files this task changed. Other errors
|
|
707
|
-
// (pre-existing in unrelated files) shouldn't block this task — they
|
|
708
|
-
// belong to whoever introduced them, not the current coder.
|
|
709
|
-
const changedAbs = new Set(filePaths.map(f => resolve(f)));
|
|
710
|
-
const ourErrors = lines.filter((l) => {
|
|
711
|
-
const m = l.match(/^([^(]+)\(/);
|
|
712
|
-
return m && changedAbs.has(resolve(m[1].trim()));
|
|
713
|
-
});
|
|
714
|
-
if (ourErrors.length > 0) {
|
|
715
|
-
return {
|
|
716
|
-
pass: false,
|
|
717
|
-
errorCount: ourErrors.length,
|
|
718
|
-
firstError: ourErrors[0].trim().slice(0, 200),
|
|
719
|
-
};
|
|
720
|
-
}
|
|
721
|
-
// tsc errored but all errors are in files we didn't touch — let it through.
|
|
722
|
-
console.warn(`[typecheck-gate] ${lines.length} pre-existing error(s) in ${tsconfig} unrelated to this task — passing`);
|
|
723
|
-
}
|
|
724
|
-
}
|
|
725
|
-
return { pass: true, errorCount: 0, firstError: `${projects.size} project(s) typechecked clean` };
|
|
726
|
-
}
|
|
727
|
-
// B.11: Tiered debugger — routes debug effort by issue severity via ClawRouter v2.0
|
|
728
|
-
async function tieredDebug(task, review, _systemPrompt) {
|
|
729
|
-
const issueText = (review.issues || []).map(i => `[${i.severity}] ${i.file}: ${i.description}`).join('\n');
|
|
730
|
-
const hasArchitecture = (review.issues || []).some(i => i.severity === 'critical' || i.description.toLowerCase().includes('architect'));
|
|
731
|
-
const hasSystemic = (review.issues || []).some(i => i.severity === 'high' || i.description.toLowerCase().includes('logic'));
|
|
732
|
-
try {
|
|
733
|
-
if (hasArchitecture) {
|
|
734
|
-
// Tier 3: T2.5 EXEC — deep architectural issues (via ClawRouter)
|
|
735
|
-
const result = await routeCall({
|
|
736
|
-
task_type: 'debug_architectural', tier_class: 'text', complexity: 'exec',
|
|
737
|
-
context_tokens: Math.ceil((issueText.length + 800) / 4), constitutional_flag: false,
|
|
738
|
-
agent_id: 'tiered-debugger',
|
|
739
|
-
payload: { prompt: `Fix this code. Issues:\n${issueText}\n\nTask: ${task.context.substring(0, 800)}`, max_tokens: 4096 },
|
|
740
|
-
});
|
|
741
|
-
return result.content || null;
|
|
742
|
-
}
|
|
743
|
-
else if (hasSystemic) {
|
|
744
|
-
// Tier 2: T2 POWER (deepseek-r1:14b equivalent) — logical/systemic issues
|
|
745
|
-
const result = await routeCall({
|
|
746
|
-
task_type: 'debug_systemic', tier_class: 'text', complexity: 'power',
|
|
747
|
-
context_tokens: Math.ceil((issueText.length + 600) / 4), constitutional_flag: false,
|
|
748
|
-
agent_id: 'tiered-debugger',
|
|
749
|
-
payload: { prompt: `Fix these code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 600)}`, max_tokens: 2048 },
|
|
750
|
-
});
|
|
751
|
-
return result.content;
|
|
752
|
-
}
|
|
753
|
-
else {
|
|
754
|
-
// Tier 1: T2 POWER (qwen3:14b) — minor issues
|
|
755
|
-
const result = await routeCall({
|
|
756
|
-
task_type: 'debug_minor', tier_class: 'text', complexity: 'power',
|
|
757
|
-
context_tokens: Math.ceil((issueText.length + 500) / 4), constitutional_flag: false,
|
|
758
|
-
agent_id: 'tiered-debugger',
|
|
759
|
-
payload: { prompt: `Fix these minor code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 500)}`, max_tokens: 1024 },
|
|
760
|
-
});
|
|
761
|
-
return result.content;
|
|
762
|
-
}
|
|
763
|
-
}
|
|
764
|
-
catch (e) {
|
|
765
|
-
log(c.yellow, ` [tiered-debug] ${e.message}`);
|
|
766
|
-
}
|
|
767
|
-
return null;
|
|
768
|
-
}
|
|
769
|
-
function httpPost(url, headers, body, timeoutMs) {
|
|
770
|
-
const parsed = new URL(url);
|
|
771
|
-
const isHttps = parsed.protocol === 'https:';
|
|
772
|
-
const lib = isHttps ? https : http;
|
|
773
|
-
return new Promise((resolve, reject) => {
|
|
774
|
-
const req = lib.request({
|
|
775
|
-
hostname: parsed.hostname, port: parsed.port || (isHttps ? 443 : undefined),
|
|
776
|
-
path: parsed.pathname, method: 'POST',
|
|
777
|
-
headers: { ...headers, 'Content-Length': Buffer.byteLength(body).toString() },
|
|
778
|
-
}, (res) => {
|
|
779
|
-
let data = '';
|
|
780
|
-
res.on('data', (chunk) => (data += chunk));
|
|
781
|
-
res.on('end', () => {
|
|
782
|
-
try {
|
|
783
|
-
const result = JSON.parse(data);
|
|
784
|
-
if (result.error) {
|
|
785
|
-
reject(new Error(`API error: ${result.error.message || JSON.stringify(result.error)}`));
|
|
786
|
-
return;
|
|
787
|
-
}
|
|
788
|
-
resolve(result);
|
|
789
|
-
}
|
|
790
|
-
catch {
|
|
791
|
-
reject(new Error(`Failed to parse response: ${data.substring(0, 500)}`));
|
|
792
|
-
}
|
|
793
|
-
});
|
|
794
|
-
});
|
|
795
|
-
req.on('error', reject);
|
|
796
|
-
req.setTimeout(timeoutMs, () => { req.destroy(); reject(new Error(`API timeout (${timeoutMs / 1000}s)`)); });
|
|
797
|
-
req.write(body);
|
|
798
|
-
req.end();
|
|
799
|
-
});
|
|
800
|
-
}
|
|
801
|
-
// ===== Supervisor Agent (Claude via Anthropic API) =====
|
|
802
|
-
class SupervisorAgent {
|
|
803
|
-
systemPrompt;
|
|
804
|
-
constructor() {
|
|
805
|
-
const promptPath = './agents/supervisor/prompt.md';
|
|
806
|
-
const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are a code review supervisor.';
|
|
807
|
-
this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
|
|
808
|
-
log(c.magenta, '+ Loaded supervisor agent (Claude via Anthropic API)');
|
|
809
|
-
}
|
|
810
|
-
async reviewTask(task, files) {
|
|
811
|
-
log(c.magenta, `\n[supervisor] Reviewing: ${task.id}`);
|
|
812
|
-
// FIX: Use XML-style tags (NOT code fences) so the model can't confuse display format with file content
|
|
813
|
-
// NOTE: 12000 char limit — covers EXACT CONTENT files (typically 5K-10K chars); 4000 was too small
|
|
814
|
-
const fileContents = files.map((filepath) => {
|
|
815
|
-
const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
|
|
816
|
-
return `### ${filepath}\n<file_content>\n${content.substring(0, 12000)}\n</file_content>`;
|
|
817
|
-
}).join('\n\n');
|
|
818
|
-
// FIX: Pre-compute fence check in TypeScript — inject evidence so model never hallucinates fence presence
|
|
819
|
-
const fenceCheckLines = files.map((filepath) => {
|
|
820
|
-
const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
|
|
821
|
-
const firstLine = content.trimStart().split('\n')[0] || '';
|
|
822
|
-
const hasFence = firstLine.startsWith('```');
|
|
823
|
-
return ` ${filepath}: ${hasFence ? `FENCE DETECTED (first line: ${JSON.stringify(firstLine)})` : 'OK (no fence at start)'}`;
|
|
824
|
-
}).join('\n');
|
|
825
|
-
// CTO-005: Add fence detection to supervisor review checklist
|
|
826
|
-
const integrityContext = task._integrityFailed
|
|
827
|
-
? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
|
|
828
|
-
: '';
|
|
829
|
-
// Sherlock v2: inject ASMR episodic memory context (AMD-21-03) — fail-open
|
|
830
|
-
const memoryContext = await (0, sherlock_memory_1.getSherlockMemoryContext)(task.context || task.id);
|
|
831
|
-
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}${memoryContext}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
832
|
-
const startTime = Date.now();
|
|
833
|
-
// B.15: DeepSeek via ClawRouter for standard tasks (~$0.02/task vs $0.07 dual-supervisor)
|
|
834
|
-
// Retain Claude Sonnet only for audit/refactor-complex (high-stakes)
|
|
835
|
-
const taskType = task.task_type || '';
|
|
836
|
-
const isHighStakes = taskType === 'audit' || taskType === 'refactor-complex' ||
|
|
837
|
-
(task.context || '').toLowerCase().includes('security') || (task.context || '').toLowerCase().includes('audit');
|
|
838
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
839
|
-
let reviewProvider = (crAvail && !isHighStakes) ? 'clawrouter' : 'anthropic';
|
|
840
|
-
const reviewModel = reviewProvider === 'clawrouter' ? 'deepseek/deepseek-chat' : 'claude-sonnet-4-6';
|
|
841
|
-
log(c.gray, ` -> Sending to ${reviewProvider === 'clawrouter' ? 'ClawRouter/DeepSeek' : 'Claude Sonnet'} (${isHighStakes ? 'high-stakes' : 'standard'})...`);
|
|
842
|
-
try {
|
|
843
|
-
const response = await callLLM(reviewProvider, reviewModel, this.systemPrompt, userPrompt, 120000, 'supervisor', 'code_review');
|
|
844
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
845
|
-
log(c.gray, ` -> Review received in ${elapsed}s (${response.usage?.total_tokens || '?'} tokens)`);
|
|
846
|
-
const content = response.choices?.[0]?.message?.content || '';
|
|
847
|
-
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
848
|
-
if (jsonMatch) {
|
|
849
|
-
const review = normalizeReview(JSON.parse(jsonMatch[0]));
|
|
850
|
-
const gradeLabel = review.grade ? `${review.grade} · ` : '';
|
|
851
|
-
if (review.verdict === 'APPROVED') {
|
|
852
|
-
log(c.green, ` ✓ APPROVED (${gradeLabel}score: ${review.score}/100)`);
|
|
853
|
-
}
|
|
854
|
-
else {
|
|
855
|
-
log(c.red, ` ✗ REJECTED (${gradeLabel}score: ${review.score}/100)`);
|
|
856
|
-
log(c.yellow, ` Summary: ${review.summary}`);
|
|
857
|
-
for (const issue of review.issues || []) {
|
|
858
|
-
log(c.yellow, ` [${issue.severity}] ${issue.file}: ${issue.description}`);
|
|
859
|
-
}
|
|
860
|
-
}
|
|
861
|
-
return review;
|
|
862
|
-
}
|
|
863
|
-
log(c.yellow, ' ! Could not parse review JSON, auto-approving');
|
|
864
|
-
return { verdict: 'APPROVED', score: 70, summary: 'Auto-approved (parse failure)', issues: [], strengths: [] };
|
|
865
|
-
}
|
|
866
|
-
catch (error) {
|
|
867
|
-
log(c.yellow, ` ! Supervisor error: ${error.message}`);
|
|
868
|
-
return { verdict: 'APPROVED', score: 0, summary: `Supervisor unavailable: ${error.message}`, issues: [], strengths: [] };
|
|
869
|
-
}
|
|
870
|
-
}
|
|
871
|
-
}
|
|
872
|
-
// ===== Supervisor 2 Agent (Claude Haiku 4.5; DeepSeek mono-supervision on Anthropic drain) =====
|
|
873
|
-
class Supervisor2Agent {
|
|
874
|
-
systemPrompt;
|
|
875
|
-
constructor() {
|
|
876
|
-
const promptPath = './agents/supervisor/prompt.md';
|
|
877
|
-
const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are a code review supervisor.';
|
|
878
|
-
this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
|
|
879
|
-
log(c.magenta, '+ Loaded supervisor 2 agent (Claude Haiku — second pass)');
|
|
880
|
-
}
|
|
881
|
-
async reviewTask(task, files) {
|
|
882
|
-
log(c.magenta, `\n[supervisor-2/haiku] Reviewing: ${task.id}`);
|
|
883
|
-
// FIX: Use XML-style tags (NOT code fences) so the model can't confuse display format with file content
|
|
884
|
-
// NOTE: 12000 char limit — covers EXACT CONTENT files (typically 5K-10K chars); 4000 was too small
|
|
885
|
-
const fileContents = files.map((filepath) => {
|
|
886
|
-
const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
|
|
887
|
-
return `### ${filepath}\n<file_content>\n${content.substring(0, 12000)}\n</file_content>`;
|
|
888
|
-
}).join('\n\n');
|
|
889
|
-
// FIX: Pre-compute fence check in TypeScript — inject evidence so model never hallucinates fence presence
|
|
890
|
-
const fenceCheckLines2 = files.map((filepath) => {
|
|
891
|
-
const content = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8') : '';
|
|
892
|
-
const firstLine = content.trimStart().split('\n')[0] || '';
|
|
893
|
-
const hasFence = firstLine.startsWith('```');
|
|
894
|
-
return ` ${filepath}: ${hasFence ? `FENCE DETECTED (first line: ${JSON.stringify(firstLine)})` : 'OK (no fence at start)'}`;
|
|
895
|
-
}).join('\n');
|
|
896
|
-
// CTO-005: Add fence detection to Haiku supervisor review checklist
|
|
897
|
-
const integrityContext2 = task._integrityFailed
|
|
898
|
-
? `\n\n## ⚠️ INTEGRITY ALERT\n${task._integrityDetails}\nThis file was flagged for destructive rewrite. The original was preserved. REJECT this task.\n`
|
|
899
|
-
: '';
|
|
900
|
-
const userPrompt = `Review the following code generated for task ${task.id}.\n\n## Task Spec\n${task.context}\n\n## Generated Files (${files.length})\n${fileContents}${integrityContext2}\n\n## Pre-computed Fence Check (authoritative — do NOT infer from display format)\n${fenceCheckLines2}\n\n## Instructions\nCRITICAL CHECK: Use the Pre-computed Fence Check above. If any file shows FENCE DETECTED, REJECT. Do NOT infer fence presence from the <file_content> display tags — those are display-only wrappers.\nAlso check: Did the file lose existing functionality? If a file shrank significantly, REJECT.\n\n## Categorical grade (use a discrete letter — no fake precision)\n- A: production-perfect. No improvements possible. Ship.\n- B: good with minor polish needed (rename, comment, formatting).\n- C: works but has noticeable issues (missed edge case, weak abstraction, partial spec coverage). APPROVED with caveats.\n- D: significant problems (broken edge case, regression risk, anti-pattern). REJECT.\n- F: broken, unsafe, doesn't meet spec, or fence/integrity failure. REJECT.\n\nThe deterministic gate already ran (typecheck + structural). If a file got here, syntax is valid — focus your review on substance, not parseability.\n\nRespond with a JSON object:\n{\n "verdict": "APPROVED" or "REJECTED",\n "grade": "A" | "B" | "C" | "D" | "F",\n "score_rationale": "ONE sentence naming the specific factor that determined the grade. Vague 'good code' is NOT acceptable.",\n "summary": "brief review summary",\n "issues": [{"severity": "critical|high|medium|low", "file": "path", "description": "..."}],\n "strengths": ["..."]\n}`;
|
|
901
|
-
const startTime = Date.now();
|
|
902
|
-
// B.15: Use Haiku for second-pass review — 10x cheaper than Sonnet.
|
|
903
|
-
// Founder directive 2026-05-25: if Anthropic depletes, fall back to ClawRouter/DeepSeek
|
|
904
|
-
// mono-supervision rather than halting the swarm. Autonomy is on.
|
|
905
|
-
const tryPath = async (provider, model, label) => {
|
|
906
|
-
log(c.gray, ` -> Sending to ${label}...`);
|
|
907
|
-
const response = provider === 'anthropic'
|
|
908
|
-
? await callAnthropicCached(model, this.systemPrompt, userPrompt, 120000)
|
|
909
|
-
: await callLLM('clawrouter', model, this.systemPrompt, userPrompt, 120000, 'supervisor-2', 'code_review');
|
|
910
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
911
|
-
log(c.gray, ` -> ${label} review received in ${elapsed}s (${response.usage?.total_tokens || '?'} tokens)`);
|
|
912
|
-
return response;
|
|
913
|
-
};
|
|
914
|
-
let response;
|
|
915
|
-
let usedFallback = false;
|
|
916
|
-
try {
|
|
917
|
-
response = await tryPath('anthropic', 'claude-haiku-4-5-20251001', 'Claude Haiku (second pass)');
|
|
918
|
-
}
|
|
919
|
-
catch (error) {
|
|
920
|
-
const msg = String(error?.message || '');
|
|
921
|
-
const credit = /credit balance|invalid_request_error.*credit|insufficient.*quota/i.test(msg);
|
|
922
|
-
log(c.yellow, ` ! [Haiku] Anthropic unavailable${credit ? ' (credits)' : ''}: ${msg.slice(0, 120)}`);
|
|
923
|
-
log(c.yellow, ' ! Falling back to ClawRouter/DeepSeek mono-supervision — swarm continues');
|
|
924
|
-
try {
|
|
925
|
-
response = await tryPath('clawrouter', 'deepseek/deepseek-chat', 'ClawRouter/DeepSeek (mono fallback)');
|
|
926
|
-
usedFallback = true;
|
|
927
|
-
}
|
|
928
|
-
catch (fallbackErr) {
|
|
929
|
-
log(c.yellow, ` ! [Haiku] Both Anthropic + ClawRouter unavailable: ${fallbackErr.message}`);
|
|
930
|
-
return { verdict: 'APPROVED', score: 0, summary: `Supervisor 2 unavailable: ${msg.slice(0, 200)}`, issues: [], strengths: [] };
|
|
931
|
-
}
|
|
932
|
-
}
|
|
933
|
-
const content = response.choices?.[0]?.message?.content || '';
|
|
934
|
-
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
935
|
-
if (jsonMatch) {
|
|
936
|
-
const review = normalizeReview(JSON.parse(jsonMatch[0]));
|
|
937
|
-
const tag = usedFallback ? '[DeepSeek-fallback]' : '[Haiku]';
|
|
938
|
-
const gradeLabel = review.grade ? `${review.grade} · ` : '';
|
|
939
|
-
if (review.verdict === 'APPROVED')
|
|
940
|
-
log(c.green, ` ✓ ${tag} APPROVED (${gradeLabel}score: ${review.score}/100)`);
|
|
941
|
-
else {
|
|
942
|
-
log(c.red, ` ✗ ${tag} REJECTED (${gradeLabel}score: ${review.score}/100)`);
|
|
943
|
-
log(c.yellow, ` Summary: ${review.summary}`);
|
|
944
|
-
for (const issue of review.issues || [])
|
|
945
|
-
log(c.yellow, ` [${issue.severity}] ${issue.file}: ${issue.description}`);
|
|
946
|
-
}
|
|
947
|
-
return review;
|
|
948
|
-
}
|
|
949
|
-
log(c.yellow, ' ! [Haiku] Could not parse review JSON, auto-approving');
|
|
950
|
-
return { verdict: 'APPROVED', score: 70, summary: 'Auto-approved (parse failure)', issues: [], strengths: [] };
|
|
951
|
-
}
|
|
952
|
-
}
|
|
953
|
-
async function reconcileSupervisorReviews(review1, review2, task, ceo) {
|
|
954
|
-
// S67-001 + founder directive 2026-05-25: graceful degradation when either supervisor depletes.
|
|
955
|
-
// Autonomy is on — swarm must keep shipping. DeepSeek/ClawRouter fallback substitutes for Anthropic.
|
|
956
|
-
// Naming history: sup1 was Sonnet, sup2 was OpenAI Codex (hence "Codex" in older logs).
|
|
957
|
-
// Today sup1 routes DeepSeek-by-default + Sonnet for high-stakes; sup2 routes Haiku + DeepSeek fallback.
|
|
958
|
-
// The log labels below use "Sup1" / "Sup2" to stay accurate regardless of which provider answered.
|
|
959
|
-
const isUnavailable = (r) => /Supervisor unavailable|Supervisor 2 unavailable|unavailable/i.test(r.summary || '');
|
|
960
|
-
const sup1Unavailable = isUnavailable(review1);
|
|
961
|
-
const sup2Unavailable = isUnavailable(review2);
|
|
962
|
-
if (sup1Unavailable && sup2Unavailable) {
|
|
963
|
-
log(c.yellow, ` ! BOTH supervisors unavailable (Anthropic + fallback drained) — auto-approving so swarm keeps shipping`);
|
|
964
|
-
log(c.yellow, ` KSL will capture this attempt for training. Restore credits to re-enable review.`);
|
|
965
|
-
return {
|
|
966
|
-
finalReview: {
|
|
967
|
-
verdict: 'APPROVED', score: 50,
|
|
968
|
-
summary: 'Auto-approved — both supervisors unavailable. Founder directive: autonomy on, swarm continues.',
|
|
969
|
-
issues: [], strengths: [],
|
|
970
|
-
},
|
|
971
|
-
review1, review2, consensus: false, escalatedToCEO: false,
|
|
972
|
-
};
|
|
973
|
-
}
|
|
974
|
-
if (sup1Unavailable) {
|
|
975
|
-
log(c.yellow, ` ! Sup1 unavailable — Sup2 as sole reviewer (score: ${review2.score}/100)`);
|
|
976
|
-
return { finalReview: review2, review1, review2, consensus: false, escalatedToCEO: false };
|
|
977
|
-
}
|
|
978
|
-
if (sup2Unavailable) {
|
|
979
|
-
log(c.yellow, ` ! Sup2 unavailable — Sup1 as sole reviewer (score: ${review1.score}/100)`);
|
|
980
|
-
return { finalReview: review1, review1, review2, consensus: false, escalatedToCEO: false };
|
|
981
|
-
}
|
|
982
|
-
const bothApproved = review1.verdict === 'APPROVED' && review2.verdict === 'APPROVED';
|
|
983
|
-
const bothRejected = review1.verdict !== 'APPROVED' && review2.verdict !== 'APPROVED';
|
|
984
|
-
const consensus = bothApproved || bothRejected;
|
|
985
|
-
if (bothApproved) {
|
|
986
|
-
// Both approve — take the average score, merge strengths
|
|
987
|
-
const avgScore = Math.round((review1.score + review2.score) / 2);
|
|
988
|
-
log(c.green, ` ✓ DUAL CONSENSUS: Both supervisors APPROVED (Sup1: ${review1.score}, Sup2: ${review2.score}, avg: ${avgScore})`);
|
|
989
|
-
return {
|
|
990
|
-
finalReview: {
|
|
991
|
-
verdict: 'APPROVED',
|
|
992
|
-
score: avgScore,
|
|
993
|
-
summary: `Dual-approved: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100)`,
|
|
994
|
-
issues: [...review1.issues, ...review2.issues],
|
|
995
|
-
strengths: Array.from(new Set([...review1.strengths, ...review2.strengths])),
|
|
996
|
-
},
|
|
997
|
-
review1, review2, consensus: true, escalatedToCEO: false,
|
|
998
|
-
};
|
|
999
|
-
}
|
|
1000
|
-
if (bothRejected) {
|
|
1001
|
-
// Both reject — merge issues, take lower score
|
|
1002
|
-
const minScore = Math.min(review1.score, review2.score);
|
|
1003
|
-
log(c.red, ` ✗ DUAL CONSENSUS: Both supervisors REJECTED (Sup1: ${review1.score}, Sup2: ${review2.score})`);
|
|
1004
|
-
return {
|
|
1005
|
-
finalReview: {
|
|
1006
|
-
verdict: 'REJECTED',
|
|
1007
|
-
score: minScore,
|
|
1008
|
-
summary: `Dual-rejected: Sup1 (${review1.score}/100) + Sup2 (${review2.score}/100). ${review1.summary} | ${review2.summary}`,
|
|
1009
|
-
issues: [...review1.issues, ...review2.issues],
|
|
1010
|
-
strengths: [],
|
|
1011
|
-
},
|
|
1012
|
-
review1, review2, consensus: true, escalatedToCEO: false,
|
|
1013
|
-
};
|
|
1014
|
-
}
|
|
1015
|
-
// CONFLICT — one approved, one rejected → escalate to CEO
|
|
1016
|
-
const approver = review1.verdict === 'APPROVED' ? 'Sup1' : 'Sup2';
|
|
1017
|
-
const rejecter = review1.verdict === 'APPROVED' ? 'Sup2' : 'Sup1';
|
|
1018
|
-
const approvalReview = review1.verdict === 'APPROVED' ? review1 : review2;
|
|
1019
|
-
const rejectionReview = review1.verdict === 'APPROVED' ? review2 : review1;
|
|
1020
|
-
log(c.yellow, ` ⚡ SUPERVISOR CONFLICT on ${task.id}: ${approver} APPROVED (${approvalReview.score}), ${rejecter} REJECTED (${rejectionReview.score})`);
|
|
1021
|
-
log(c.magenta, ` → Escalating to CEO for final decision...`);
|
|
1022
|
-
try {
|
|
1023
|
-
const ceoDecision = await ceo.resolveReviewConflict(task, approvalReview, rejectionReview, approver, rejecter);
|
|
1024
|
-
const ceoApproves = ceoDecision.toLowerCase().includes('approve');
|
|
1025
|
-
log(ceoApproves ? c.green : c.red, ` CEO DECISION: ${ceoApproves ? 'APPROVED' : 'REJECTED'} — ${ceoDecision.substring(0, 200)}`);
|
|
1026
|
-
return {
|
|
1027
|
-
finalReview: {
|
|
1028
|
-
verdict: ceoApproves ? 'APPROVED' : 'REJECTED',
|
|
1029
|
-
score: ceoApproves ? approvalReview.score : rejectionReview.score,
|
|
1030
|
-
summary: `CEO resolved conflict (${approver} approved, ${rejecter} rejected): ${ceoDecision.substring(0, 300)}`,
|
|
1031
|
-
issues: rejectionReview.issues,
|
|
1032
|
-
strengths: approvalReview.strengths,
|
|
1033
|
-
},
|
|
1034
|
-
review1, review2, consensus: false, escalatedToCEO: true, ceoDecision,
|
|
1035
|
-
};
|
|
1036
|
-
}
|
|
1037
|
-
catch (error) {
|
|
1038
|
-
// CEO unavailable — default to rejection (safer)
|
|
1039
|
-
log(c.yellow, ` CEO unavailable for conflict resolution: ${error.message}. Defaulting to REJECTED.`);
|
|
1040
|
-
return {
|
|
1041
|
-
finalReview: rejectionReview,
|
|
1042
|
-
review1, review2, consensus: false, escalatedToCEO: false,
|
|
1043
|
-
};
|
|
1044
|
-
}
|
|
1045
|
-
}
|
|
1046
|
-
// ===== CEO Agent (Claude via Anthropic API) =====
|
|
1047
|
-
class CEOAgent {
|
|
1048
|
-
systemPrompt;
|
|
1049
|
-
constructor() {
|
|
1050
|
-
const promptPath = './agents/ceo/prompt.md';
|
|
1051
|
-
const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are the CEO of Countable.';
|
|
1052
|
-
this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
|
|
1053
|
-
log(c.magenta, '+ Loaded CEO agent (Claude via Anthropic API)');
|
|
1054
|
-
}
|
|
1055
|
-
async reviewSprintProgress(tasks) {
|
|
1056
|
-
const done = tasks.filter(t => t.status === 'done').length;
|
|
1057
|
-
const total = tasks.length;
|
|
1058
|
-
const pending = tasks.filter(t => t.status === 'pending');
|
|
1059
|
-
const rejected = tasks.filter(t => t.status === 'rejected');
|
|
1060
|
-
log(c.magenta, `\n[ceo] Sprint progress check (${done}/${total} done)`);
|
|
1061
|
-
const userPrompt = `IMPORTANT: Plain text only, no tools, no XML. Respond directly.
|
|
1062
|
-
|
|
1063
|
-
Sprint progress update:\n- Done: ${done}/${total}\n- Pending: ${pending.map(t => t.id).join(', ') || 'none'}\n- Rejected: ${rejected.map(t => t.id).join(', ') || 'none'}\n\nTask details:\n${JSON.stringify(tasks.map(t => ({ id: t.id, status: t.status, agent: t.agent, priority: t.priority })), null, 2)}\n\nAs CEO, briefly assess:\n1. Are we on track?\n2. Any tasks to re-prioritize?\n3. Cost efficiency — are we using the right models?\n4. Any strategic adjustments needed?\n\nKeep response under 200 words.`;
|
|
1064
|
-
try {
|
|
1065
|
-
// B.20: ClawRouter/DeepSeek — formulaic progress check, $0 via x402 wallet
|
|
1066
|
-
const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 60000, 'ceo', 'sprint_progress_review');
|
|
1067
|
-
const content = response.choices?.[0]?.message?.content || 'No response';
|
|
1068
|
-
log(c.magenta, ` CEO assessment: ${content.substring(0, 500)}`);
|
|
1069
|
-
return content;
|
|
1070
|
-
}
|
|
1071
|
-
catch (error) {
|
|
1072
|
-
log(c.yellow, ` ! CEO unavailable: ${error.message}`);
|
|
1073
|
-
return 'CEO agent unavailable';
|
|
1074
|
-
}
|
|
1075
|
-
}
|
|
1076
|
-
async resolveReviewConflict(task, approvalReview, rejectionReview, approver, rejecter) {
|
|
1077
|
-
log(c.magenta, `\n[ceo] Resolving supervisor conflict on ${task.id}...`);
|
|
1078
|
-
const userPrompt = `IMPORTANT: Respond with ONLY APPROVE or REJECT and a brief reason. No tools, no XML, no file reading.\n\nTwo code review supervisors disagree on task ${task.id}.
|
|
1079
|
-
|
|
1080
|
-
## Task Spec
|
|
1081
|
-
${task.context?.substring(0, 800) || 'No context'}
|
|
1082
|
-
|
|
1083
|
-
## ${approver} says APPROVED (score: ${approvalReview.score}/100)
|
|
1084
|
-
Summary: ${approvalReview.summary}
|
|
1085
|
-
Strengths: ${approvalReview.strengths?.join(', ') || 'none listed'}
|
|
1086
|
-
|
|
1087
|
-
## ${rejecter} says REJECTED (score: ${rejectionReview.score}/100)
|
|
1088
|
-
Summary: ${rejectionReview.summary}
|
|
1089
|
-
Issues found:
|
|
1090
|
-
${(rejectionReview.issues || []).map(i => `- [${i.severity}] ${i.file}: ${i.description}`).join('\n')}
|
|
1091
|
-
|
|
1092
|
-
## Your Decision
|
|
1093
|
-
As CEO, you must make the final call. Consider:
|
|
1094
|
-
1. Are the rejection issues genuine blockers or nitpicks?
|
|
1095
|
-
2. Does the code meet the task spec requirements?
|
|
1096
|
-
3. Is it safe to ship, or are there real quality/security concerns?
|
|
1097
|
-
|
|
1098
|
-
Respond with ONE of:
|
|
1099
|
-
- "APPROVE — [brief reason]" if the code is good enough to ship
|
|
1100
|
-
- "REJECT — [brief reason]" if the rejection issues are valid and must be fixed
|
|
1101
|
-
|
|
1102
|
-
Keep response under 100 words.`;
|
|
1103
|
-
try {
|
|
1104
|
-
const response = await callLLM('anthropic', 'claude-sonnet-4-20250514', this.systemPrompt, userPrompt, 60000, 'ceo', 'supervisor_conflict_resolution');
|
|
1105
|
-
const content = response.choices?.[0]?.message?.content || 'No response';
|
|
1106
|
-
log(c.magenta, ` CEO conflict resolution: ${content.substring(0, 300)}`);
|
|
1107
|
-
return content;
|
|
1108
|
-
}
|
|
1109
|
-
catch (error) {
|
|
1110
|
-
log(c.yellow, ` ! CEO unavailable for conflict resolution: ${error.message}`);
|
|
1111
|
-
throw error;
|
|
1112
|
-
}
|
|
1113
|
-
}
|
|
1114
|
-
async reviewCTOProposals(ctoReport) {
|
|
1115
|
-
log(c.magenta, `\n[ceo] Reviewing ${ctoReport.proposals.length} CTO proposals...`);
|
|
1116
|
-
const proposalsSummary = ctoReport.proposals.map((p, i) => `
|
|
1117
|
-
### Proposal ${i + 1}: ${p.title}
|
|
1118
|
-
- ID: ${p.id}
|
|
1119
|
-
- Category: ${p.category}
|
|
1120
|
-
- Risk: ${p.risk_level}
|
|
1121
|
-
- Description: ${p.description}
|
|
1122
|
-
- Impact: ${p.estimated_impact}
|
|
1123
|
-
- Steps: ${p.implementation_steps.join(', ')}
|
|
1124
|
-
${p.agent_spec ? `- NEW AGENT: name=${p.agent_spec.name}, role="${p.agent_spec.role}", llm=${p.agent_spec.llm}, trigger=${p.agent_spec.trigger}` : ''}
|
|
1125
|
-
`).join('\n');
|
|
1126
|
-
const userPrompt = `IMPORTANT: You have NO tools. Do NOT output XML tool calls or file-reading syntax. Respond ONLY with the JSON array. All context is in this message.
|
|
1127
|
-
|
|
1128
|
-
The CTO has analyzed our project data and submitted ${ctoReport.proposals.length} proposal(s).
|
|
1129
|
-
|
|
1130
|
-
## CTO Summary
|
|
1131
|
-
${ctoReport.summary}
|
|
1132
|
-
|
|
1133
|
-
## Proposals
|
|
1134
|
-
${proposalsSummary}
|
|
1135
|
-
|
|
1136
|
-
## Your Review Criteria
|
|
1137
|
-
For each proposal, evaluate:
|
|
1138
|
-
1. **Evidence-based**: Is it backed by real sprint data? (CTO reviewed: ${ctoReport.metrics_reviewed.join(', ')})
|
|
1139
|
-
2. **Cost impact**: Will this save or cost money?
|
|
1140
|
-
3. **Risk level**: Can we roll back if it fails?
|
|
1141
|
-
4. **Business value**: Does it help ship features faster?
|
|
1142
|
-
5. **Disruption level**: How much will this change current workflows?
|
|
1143
|
-
|
|
1144
|
-
**For new_agent proposals, ALSO evaluate:**
|
|
1145
|
-
- Is the capability gap real? (not something an existing agent handles)
|
|
1146
|
-
- Is MiniMax appropriate, or does this need Claude-level intelligence?
|
|
1147
|
-
- Is the trigger frequency reasonable? (every_sprint may be expensive)
|
|
1148
|
-
- Will the total agent count become unmanageable?
|
|
1149
|
-
|
|
1150
|
-
**For ClawHub skill proposals:**
|
|
1151
|
-
- Has a security_review step been included? (REQUIRED — ClawHub skills may contain malware)
|
|
1152
|
-
- Is the skill from a trusted author?
|
|
1153
|
-
|
|
1154
|
-
## Decision Format
|
|
1155
|
-
For EACH proposal, respond with a decision JSON:
|
|
1156
|
-
{
|
|
1157
|
-
"decision": "APPROVED | REJECTED | DEFERRED",
|
|
1158
|
-
"proposal_id": "the proposal ID",
|
|
1159
|
-
"reasoning": "Why this decision",
|
|
1160
|
-
"conditions": ["Any conditions for implementation"],
|
|
1161
|
-
"cascade_orders": ["Company-wide changes if approved"],
|
|
1162
|
-
"priority": "immediate | next_sprint | backlog"
|
|
1163
|
-
}
|
|
1164
|
-
|
|
1165
|
-
Wrap all decisions in a JSON array. Be concise.`;
|
|
1166
|
-
try {
|
|
1167
|
-
// B.6: Haiku for CTO proposal reviews — 10x cheaper, prompt-cached system prompt
|
|
1168
|
-
const response = await callAnthropicCached('claude-haiku-4-5-20251001', this.systemPrompt, userPrompt, 60000);
|
|
1169
|
-
const content = response.choices?.[0]?.message?.content || 'No response';
|
|
1170
|
-
log(c.magenta, ` CEO CTO review: ${content.substring(0, 500)}`);
|
|
1171
|
-
return content;
|
|
1172
|
-
}
|
|
1173
|
-
catch (error) {
|
|
1174
|
-
log(c.yellow, ` ! CEO CTO review failed: ${error.message}`);
|
|
1175
|
-
return 'CEO unavailable for CTO review';
|
|
1176
|
-
}
|
|
1177
|
-
}
|
|
1178
|
-
async generateDailyReport(tasks, stats, ctoReport, ctoDecisions) {
|
|
1179
|
-
log(c.magenta, '\n[ceo] Generating daily report for owner...');
|
|
1180
|
-
const done = tasks.filter(t => t.status === 'done').length;
|
|
1181
|
-
const rejected = tasks.filter(t => t.status === 'rejected').length;
|
|
1182
|
-
const pending = tasks.filter(t => t.status === 'pending').length;
|
|
1183
|
-
const today = new Date().toISOString().split('T')[0];
|
|
1184
|
-
const userPrompt = `IMPORTANT: Output ONLY the markdown report. No tools, no XML, no file reading. All data is in this message.\n\nGenerate a daily report for the owner of Countable. Today is ${today}.
|
|
1185
|
-
|
|
1186
|
-
## Sprint Data
|
|
1187
|
-
- Tasks executed: ${stats.tasksExecuted}
|
|
1188
|
-
- Approved: ${stats.approved}
|
|
1189
|
-
- Rejected: ${stats.rejected}
|
|
1190
|
-
- Done: ${done}, Pending: ${pending}, Total: ${tasks.length}
|
|
1191
|
-
|
|
1192
|
-
Task details:
|
|
1193
|
-
${JSON.stringify(tasks.map(t => ({ id: t.id, status: t.status, agent: t.agent })), null, 2)}
|
|
1194
|
-
|
|
1195
|
-
## CTO Report
|
|
1196
|
-
${ctoReport}
|
|
1197
|
-
|
|
1198
|
-
## CEO Decisions on CTO Proposals
|
|
1199
|
-
${ctoDecisions}
|
|
1200
|
-
|
|
1201
|
-
## Dual Supervisor Review Stats
|
|
1202
|
-
- Supervisor conflicts: ${stats.conflicts || 0}
|
|
1203
|
-
- CEO escalations: ${stats.escalations || 0}
|
|
1204
|
-
|
|
1205
|
-
## Estimated Costs
|
|
1206
|
-
- MiniMax coding: ~$0.09/task x ${stats.tasksExecuted} tasks = ~$${(stats.tasksExecuted * 0.09).toFixed(2)}
|
|
1207
|
-
- Sup1 reviews (DeepSeek default, Sonnet for high-stakes): ~$0.02/review x ${stats.approved + stats.rejected} reviews = ~$${((stats.approved + stats.rejected) * 0.02).toFixed(2)}
|
|
1208
|
-
- Sup2 reviews (Haiku default, DeepSeek on drain): ~$0.005/review x ${stats.approved + stats.rejected} reviews = ~$${((stats.approved + stats.rejected) * 0.005).toFixed(2)}
|
|
1209
|
-
- CEO conflict resolution: ~$0.03 x ${stats.escalations || 0} escalations = ~$${((stats.escalations || 0) * 0.03).toFixed(2)}
|
|
1210
|
-
- Claude CEO calls: ~$0.03 x 4 = ~$0.12
|
|
1211
|
-
- MiniMax CTO scan: ~$0.05
|
|
1212
|
-
|
|
1213
|
-
Generate a concise daily report in markdown format following the template in your prompt. Include:
|
|
1214
|
-
1. Sprint Progress
|
|
1215
|
-
2. Cost Summary
|
|
1216
|
-
3. Key Decisions Made
|
|
1217
|
-
4. CTO Proposals Reviewed
|
|
1218
|
-
5. Blockers & Risks
|
|
1219
|
-
6. Tomorrow's Plan
|
|
1220
|
-
|
|
1221
|
-
Keep it under 300 words. Be honest about failures.`;
|
|
1222
|
-
try {
|
|
1223
|
-
// B.20: ClawRouter/DeepSeek — template fill-in, $0 via x402 wallet
|
|
1224
|
-
const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 60000, 'ceo', 'sprint_final_report');
|
|
1225
|
-
const report = response.choices?.[0]?.message?.content || 'Report generation failed';
|
|
1226
|
-
// Save to reports/daily/
|
|
1227
|
-
(0, fs_1.mkdirSync)('reports/daily', { recursive: true });
|
|
1228
|
-
const reportPath = `reports/daily/${today}.md`;
|
|
1229
|
-
(0, fs_1.writeFileSync)(reportPath, report);
|
|
1230
|
-
log(c.green, ` ✓ Daily report saved: ${reportPath}`);
|
|
1231
|
-
log(c.magenta, ` Report preview: ${report.substring(0, 300)}`);
|
|
1232
|
-
}
|
|
1233
|
-
catch (error) {
|
|
1234
|
-
log(c.yellow, ` ! Daily report failed: ${error.message}`);
|
|
1235
|
-
}
|
|
1236
|
-
}
|
|
1237
|
-
}
|
|
1238
|
-
// ===== CTO Data Collector (gathers real project context for CTO analysis) =====
|
|
1239
|
-
class CTODataCollector {
|
|
1240
|
-
collect() {
|
|
1241
|
-
const sections = ['## PROJECT DATA (Real metrics — base all proposals on this data)\n'];
|
|
1242
|
-
// 1. Sprint results — find most recent sprint file
|
|
1243
|
-
try {
|
|
1244
|
-
const sprintDir = './sprints';
|
|
1245
|
-
if ((0, fs_1.existsSync)(sprintDir)) {
|
|
1246
|
-
const sprintFiles = (0, fs_1.readdirSync)(sprintDir).filter(f => f.endsWith('.json')).sort().reverse();
|
|
1247
|
-
if (sprintFiles.length > 0) {
|
|
1248
|
-
const latestSprint = JSON.parse((0, fs_1.readFileSync)(`${sprintDir}/${sprintFiles[0]}`, 'utf-8'));
|
|
1249
|
-
const tasks = latestSprint.tasks || [];
|
|
1250
|
-
const done = tasks.filter((t) => t.status === 'done').length;
|
|
1251
|
-
const rejected = tasks.filter((t) => t.status === 'rejected').length;
|
|
1252
|
-
sections.push(`### Sprint Results (${sprintFiles[0].replace('.json', '')})`);
|
|
1253
|
-
sections.push(`- ${tasks.length} tasks, ${done} approved, ${rejected} rejected`);
|
|
1254
|
-
for (const t of tasks) {
|
|
1255
|
-
const score = t.output?.review?.score || '?';
|
|
1256
|
-
const attempts = t.output?.review ? 1 : '?';
|
|
1257
|
-
sections.push(`- ${t.id} (${t.agent}): status=${t.status}, score=${score}`);
|
|
1258
|
-
}
|
|
1259
|
-
sections.push('');
|
|
1260
|
-
}
|
|
1261
|
-
}
|
|
1262
|
-
}
|
|
1263
|
-
catch (e) {
|
|
1264
|
-
sections.push(`### Sprint Results\n- Error reading: ${e.message}\n`);
|
|
1265
|
-
}
|
|
1266
|
-
// 2. Learnings — first 3000 chars
|
|
1267
|
-
try {
|
|
1268
|
-
const learnings = (0, fs_1.existsSync)('./docs/learnings.md') ? (0, fs_1.readFileSync)('./docs/learnings.md', 'utf-8') : '';
|
|
1269
|
-
if (learnings) {
|
|
1270
|
-
sections.push('### Key Learnings (from docs/learnings.md)');
|
|
1271
|
-
sections.push(learnings.substring(0, 3000));
|
|
1272
|
-
if (learnings.length > 3000)
|
|
1273
|
-
sections.push('... (truncated)');
|
|
1274
|
-
sections.push('');
|
|
1275
|
-
}
|
|
1276
|
-
}
|
|
1277
|
-
catch {
|
|
1278
|
-
sections.push('### Key Learnings\n- File not found\n');
|
|
1279
|
-
}
|
|
1280
|
-
// 3. Existing agents
|
|
1281
|
-
try {
|
|
1282
|
-
const agentDirs = (0, fs_1.existsSync)('./agents') ? (0, fs_1.readdirSync)('./agents') : [];
|
|
1283
|
-
const leadershipAgents = ['ceo', 'supervisor', 'skills'];
|
|
1284
|
-
const techAgents = ['cto'];
|
|
1285
|
-
sections.push(`### Existing Agents (${agentDirs.length} directories)`);
|
|
1286
|
-
for (const dir of agentDirs) {
|
|
1287
|
-
const layer = leadershipAgents.includes(dir) ? 'Claude/leadership'
|
|
1288
|
-
: techAgents.includes(dir) ? 'MiniMax/technology' : 'MiniMax/coding';
|
|
1289
|
-
sections.push(`- ${dir} (${layer})`);
|
|
1290
|
-
}
|
|
1291
|
-
sections.push('');
|
|
1292
|
-
}
|
|
1293
|
-
catch {
|
|
1294
|
-
sections.push('### Existing Agents\n- Error reading agents directory\n');
|
|
1295
|
-
}
|
|
1296
|
-
// 4. Most recent daily report (last 2000 chars)
|
|
1297
|
-
try {
|
|
1298
|
-
const reportDir = './reports/daily';
|
|
1299
|
-
if ((0, fs_1.existsSync)(reportDir)) {
|
|
1300
|
-
const reports = (0, fs_1.readdirSync)(reportDir).filter(f => f.endsWith('.md')).sort().reverse();
|
|
1301
|
-
if (reports.length > 0) {
|
|
1302
|
-
const latestReport = (0, fs_1.readFileSync)(`${reportDir}/${reports[0]}`, 'utf-8');
|
|
1303
|
-
sections.push(`### Recent Daily Report (${reports[0]})`);
|
|
1304
|
-
sections.push(latestReport.substring(0, 2000));
|
|
1305
|
-
sections.push('');
|
|
1306
|
-
}
|
|
1307
|
-
}
|
|
1308
|
-
}
|
|
1309
|
-
catch { /* no reports yet */ }
|
|
1310
|
-
// 5. Current stack versions (read from package.json or env)
|
|
1311
|
-
sections.push('### Current Stack');
|
|
1312
|
-
sections.push('- OpenClaw: v2026.2.12');
|
|
1313
|
-
sections.push('- ClawRouter: v0.9.3 (unfunded, using Anthropic API direct)');
|
|
1314
|
-
sections.push('- Models: MiniMax M2.5 (coding ~$0.09/task), Claude Sonnet (review ~$0.04/review)');
|
|
1315
|
-
sections.push('- Node.js: v22.22.0, PM2 in WSL2');
|
|
1316
|
-
sections.push('- Orchestrator: v2, dynamic agent loading, one-file-per-call, rejection feedback');
|
|
1317
|
-
sections.push('');
|
|
1318
|
-
// 6. External monitoring hints
|
|
1319
|
-
sections.push('### External Sources to Consider');
|
|
1320
|
-
sections.push('- OpenClaw GitHub: https://github.com/openclaw/openclaw (check weekly for new releases since v2026.2.12)');
|
|
1321
|
-
sections.push('- ClawHub.ai: https://clawhub.ai/ (check for existing skills when proposing improvements)');
|
|
1322
|
-
sections.push(' SECURITY WARNING: ClawHub skills are third-party and may contain malicious code.');
|
|
1323
|
-
sections.push(' Any skill from ClawHub MUST include a security_review step in implementation_steps.');
|
|
1324
|
-
sections.push('- MiniMax / Anthropic pricing pages for cost changes');
|
|
1325
|
-
sections.push('');
|
|
1326
|
-
return sections.join('\n');
|
|
1327
|
-
}
|
|
1328
|
-
}
|
|
1329
|
-
// ===== CTO Agent (MiniMax — data-driven tech analyst + agent spawning) =====
|
|
1330
|
-
class CTOAgent {
|
|
1331
|
-
systemPrompt;
|
|
1332
|
-
dataCollector;
|
|
1333
|
-
constructor() {
|
|
1334
|
-
const promptPath = './agents/cto/prompt.md';
|
|
1335
|
-
const rawPrompt = (0, fs_1.existsSync)(promptPath) ? (0, fs_1.readFileSync)(promptPath, 'utf-8') : 'You are the CTO of Invoica.';
|
|
1336
|
-
this.systemPrompt = loadConstitutionalPreamble() + rawPrompt;
|
|
1337
|
-
this.dataCollector = new CTODataCollector();
|
|
1338
|
-
log(c.cyan, '+ Loaded CTO agent (MiniMax M2.5 — data-driven)');
|
|
1339
|
-
}
|
|
1340
|
-
async analyze() {
|
|
1341
|
-
log(c.cyan, '\n[cto] Collecting project data for analysis...');
|
|
1342
|
-
const projectContext = this.dataCollector.collect();
|
|
1343
|
-
log(c.cyan, ` Context collected: ${projectContext.length} chars`);
|
|
1344
|
-
const userPrompt = `You are the CTO of Invoica. Analyze the REAL project data below and identify improvements.
|
|
1345
|
-
|
|
1346
|
-
${projectContext}
|
|
1347
|
-
|
|
1348
|
-
## Your Analysis Tasks
|
|
1349
|
-
Based on the REAL data above (do NOT hallucinate or assume — use only what you see):
|
|
1350
|
-
1. Review sprint results — are rejection rates acceptable? Any patterns?
|
|
1351
|
-
2. Review learnings — are there unresolved issues or recurring problems?
|
|
1352
|
-
3. Check agent coverage — is there a capability gap that a new agent could fill?
|
|
1353
|
-
4. Consider cost efficiency — can we reduce per-sprint costs?
|
|
1354
|
-
5. Consider OpenClaw/ClawHub — are there new releases or skills that could help?
|
|
1355
|
-
- For ClawHub skills: flag any that could help, but mark them for security review
|
|
1356
|
-
- For OpenClaw: note version differences if updates are available
|
|
1357
|
-
|
|
1358
|
-
## CRITICAL: Output Format
|
|
1359
|
-
Respond with ONLY a JSON object. No markdown fences, no explanation text, no thinking.
|
|
1360
|
-
{
|
|
1361
|
-
"summary": "1-2 sentence overview of findings",
|
|
1362
|
-
"proposals": [
|
|
1363
|
-
{
|
|
1364
|
-
"id": "CTO-20260214-001",
|
|
1365
|
-
"title": "Short title",
|
|
1366
|
-
"category": "new_agent|cost_optimization|process_change|architecture|tooling|new_feature",
|
|
1367
|
-
"description": "What and why",
|
|
1368
|
-
"estimated_impact": "cost/quality impact",
|
|
1369
|
-
"risk_level": "low|medium|high",
|
|
1370
|
-
"implementation_steps": ["step1", "step2"],
|
|
1371
|
-
"agent_spec": {
|
|
1372
|
-
"name": "agent-name",
|
|
1373
|
-
"role": "What this agent does",
|
|
1374
|
-
"llm": "minimax|anthropic",
|
|
1375
|
-
"trigger": "every_sprint|on_demand|weekly",
|
|
1376
|
-
"prompt_summary": "Key instructions for this agent"
|
|
1377
|
-
}
|
|
1378
|
-
}
|
|
1379
|
-
],
|
|
1380
|
-
"metrics_reviewed": ["sprint_results", "learnings", "agent_list", "daily_report", "stack_versions"]
|
|
1381
|
-
}
|
|
1382
|
-
|
|
1383
|
-
Rules:
|
|
1384
|
-
- agent_spec is ONLY required when category="new_agent"
|
|
1385
|
-
- If no improvements needed, return empty proposals array
|
|
1386
|
-
- For ClawHub skill proposals, always include "security_review: audit skill source code for malicious patterns" in implementation_steps
|
|
1387
|
-
- Be specific — "improve performance" is rejected; "add Redis caching to /api/invoices with 5min TTL" is accepted
|
|
1388
|
-
- Maximum 3 proposals per analysis cycle`;
|
|
1389
|
-
try {
|
|
1390
|
-
const startTime = Date.now();
|
|
1391
|
-
const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 120000, 'cto', 'cto_analyze');
|
|
1392
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
1393
|
-
let content = response.choices?.[0]?.message?.content || '';
|
|
1394
|
-
// Strip DeepSeek/MiniMax <think>...</think> reasoning tags
|
|
1395
|
-
content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
1396
|
-
log(c.cyan, ` CTO analysis completed in ${elapsed}s`);
|
|
1397
|
-
log(c.gray, ` Raw output preview: ${content.substring(0, 300)}`);
|
|
1398
|
-
// Parse JSON from response
|
|
1399
|
-
const jsonMatch = content.match(/\{[\s\S]*\}/);
|
|
1400
|
-
if (jsonMatch) {
|
|
1401
|
-
const report = JSON.parse(jsonMatch[0]);
|
|
1402
|
-
report.proposals = report.proposals || [];
|
|
1403
|
-
report.metrics_reviewed = report.metrics_reviewed || [];
|
|
1404
|
-
log(c.cyan, ` Summary: ${report.summary}`);
|
|
1405
|
-
log(c.cyan, ` Proposals: ${report.proposals.length}`);
|
|
1406
|
-
for (const p of report.proposals) {
|
|
1407
|
-
log(c.cyan, ` - [${p.category}] ${p.title} (risk: ${p.risk_level})`);
|
|
1408
|
-
}
|
|
1409
|
-
return report;
|
|
1410
|
-
}
|
|
1411
|
-
log(c.yellow, ' Could not parse CTO JSON, returning empty report');
|
|
1412
|
-
return { summary: 'CTO output was not valid JSON', proposals: [], metrics_reviewed: [] };
|
|
1413
|
-
}
|
|
1414
|
-
catch (error) {
|
|
1415
|
-
log(c.yellow, ` CTO analysis failed: ${error.message}`);
|
|
1416
|
-
return { summary: `Error: ${error.message}`, proposals: [], metrics_reviewed: [] };
|
|
1417
|
-
}
|
|
1418
|
-
}
|
|
1419
|
-
/**
|
|
1420
|
-
* Post-sprint analysis: autonomous retrospective that runs after every sprint.
|
|
1421
|
-
* Analyzes sprint results, detects failure patterns, and saves a report.
|
|
1422
|
-
* This runs the CTO techwatch `post-sprint-analysis` watch type.
|
|
1423
|
-
*/
|
|
1424
|
-
async postSprintAnalysis(tasks, stats) {
|
|
1425
|
-
log(c.cyan, '\n[cto] Running autonomous post-sprint analysis...');
|
|
1426
|
-
const startTime = Date.now();
|
|
1427
|
-
// Build sprint summary for context
|
|
1428
|
-
const totalTasks = tasks.length;
|
|
1429
|
-
const done = tasks.filter((t) => t.status === 'done').length;
|
|
1430
|
-
const doneManual = tasks.filter((t) => t.status === 'done-manual').length;
|
|
1431
|
-
const rejected = tasks.filter((t) => t.status === 'rejected').length;
|
|
1432
|
-
const autoRate = totalTasks > 0 ? ((done / totalTasks) * 100).toFixed(0) : '0';
|
|
1433
|
-
const taskDetails = tasks.map((t) => {
|
|
1434
|
-
const id = t.id || 'unknown';
|
|
1435
|
-
const agent = t.agent || 'unknown';
|
|
1436
|
-
const status = t.status || 'unknown';
|
|
1437
|
-
const title = t.title || t.description || 'no title';
|
|
1438
|
-
const score = t.output?.review?.score || t.output?.score || '?';
|
|
1439
|
-
const attempts = t.output?.attempts || t.attempts || '?';
|
|
1440
|
-
const feedback = t.output?.review?.feedback || '';
|
|
1441
|
-
let line = `- ${id} (${agent}): ${title} — status=${status}, score=${score}, attempts=${attempts}`;
|
|
1442
|
-
if (status === 'done-manual' || status === 'rejected') {
|
|
1443
|
-
line += `\n ⚠ ${feedback ? String(feedback).substring(0, 200) : 'Required manual intervention'}`;
|
|
1444
|
-
}
|
|
1445
|
-
return line;
|
|
1446
|
-
}).join('\n');
|
|
1447
|
-
const projectContext = this.dataCollector.collect();
|
|
1448
|
-
const userPrompt = `You are the CTO of Invoica performing your MANDATORY post-sprint retrospective analysis.
|
|
1449
|
-
|
|
1450
|
-
## Sprint Just Completed
|
|
1451
|
-
- Total tasks: ${totalTasks}
|
|
1452
|
-
- Auto-approved: ${done} (${autoRate}%)
|
|
1453
|
-
- Manual fixes needed: ${doneManual}
|
|
1454
|
-
- Still rejected: ${rejected}
|
|
1455
|
-
- Supervisor conflicts: ${stats.conflicts || 0}
|
|
1456
|
-
- CEO escalations: ${stats.escalations || 0}
|
|
1457
|
-
|
|
1458
|
-
## Task-by-Task Results
|
|
1459
|
-
${taskDetails}
|
|
1460
|
-
|
|
1461
|
-
## Project Context
|
|
1462
|
-
${projectContext}
|
|
1463
|
-
|
|
1464
|
-
## CRITICAL: Your Responsibilities
|
|
1465
|
-
1. Analyze every failed/manual-fix task — identify root cause (truncation, code fences, wrong imports, supervisor error, etc.)
|
|
1466
|
-
2. Compare auto-approval rate with previous sprints — are we improving or declining?
|
|
1467
|
-
3. Identify recurring patterns that need process changes
|
|
1468
|
-
4. Generate max 3 concrete improvement proposals for the CEO
|
|
1469
|
-
5. Each proposal MUST reference specific task IDs and data from THIS sprint
|
|
1470
|
-
|
|
1471
|
-
## Output Format
|
|
1472
|
-
Respond with a structured markdown report containing:
|
|
1473
|
-
1. Executive Summary (2-3 sentences)
|
|
1474
|
-
2. Sprint Scorecard
|
|
1475
|
-
3. Failure Root Cause Analysis (per failed task)
|
|
1476
|
-
4. Trend Analysis
|
|
1477
|
-
5. Proposals in JSON format:
|
|
1478
|
-
\`\`\`json
|
|
1479
|
-
{
|
|
1480
|
-
"summary": "...",
|
|
1481
|
-
"proposals": [...],
|
|
1482
|
-
"sprint_metrics": { "total_tasks": ${totalTasks}, "auto_approved": ${done}, "manual_fixes": ${doneManual}, "rejected": ${rejected}, "auto_success_rate": "${autoRate}%", "trend": "improving|declining|stable" }
|
|
1483
|
-
}
|
|
1484
|
-
\`\`\`
|
|
1485
|
-
|
|
1486
|
-
Rules: Be specific — reference task IDs, rejection counts, concrete patterns. No vague recommendations.`;
|
|
1487
|
-
try {
|
|
1488
|
-
const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.systemPrompt, userPrompt, 120000, 'cto', 'cto_post_sprint_analysis');
|
|
1489
|
-
let content = response.choices?.[0]?.message?.content || '';
|
|
1490
|
-
content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
1491
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
1492
|
-
// Save report
|
|
1493
|
-
const date = new Date().toISOString().split('T')[0];
|
|
1494
|
-
const reportDir = './reports/cto';
|
|
1495
|
-
(0, fs_1.mkdirSync)(reportDir, { recursive: true });
|
|
1496
|
-
const reportPath = `${reportDir}/post-sprint-analysis-${date}.md`;
|
|
1497
|
-
(0, fs_1.writeFileSync)(reportPath, content);
|
|
1498
|
-
// Also update latest pointer
|
|
1499
|
-
(0, fs_1.writeFileSync)(`${reportDir}/latest-post-sprint-analysis.md`, content);
|
|
1500
|
-
log(c.cyan, ` Post-sprint analysis complete (${elapsed}s)`);
|
|
1501
|
-
log(c.cyan, ` Report saved: ${reportPath}`);
|
|
1502
|
-
// Try to extract proposals and add to approved-proposals tracker for CEO review
|
|
1503
|
-
const jsonMatch = content.match(/```json\s*([\s\S]*?)```/) || content.match(/\{[\s\S]*"proposals"[\s\S]*\}/);
|
|
1504
|
-
if (jsonMatch) {
|
|
1505
|
-
const jsonStr = jsonMatch[1] || jsonMatch[0];
|
|
1506
|
-
try {
|
|
1507
|
-
const parsed = JSON.parse(jsonStr.trim());
|
|
1508
|
-
const proposalCount = parsed.proposals?.length || 0;
|
|
1509
|
-
log(c.cyan, ` Extracted ${proposalCount} proposals for CEO review`);
|
|
1510
|
-
}
|
|
1511
|
-
catch { /* JSON parse failed — report is still saved as markdown */ }
|
|
1512
|
-
}
|
|
1513
|
-
return content;
|
|
1514
|
-
}
|
|
1515
|
-
catch (error) {
|
|
1516
|
-
log(c.yellow, ` Post-sprint analysis failed: ${error.message}`);
|
|
1517
|
-
return `Post-sprint analysis error: ${error.message}`;
|
|
1518
|
-
}
|
|
1519
|
-
}
|
|
1520
|
-
}
|
|
1521
|
-
// ===== Agent Creator (creates new agents from CEO-approved CTO proposals) =====
|
|
1522
|
-
class AgentCreator {
|
|
1523
|
-
createAgent(spec) {
|
|
1524
|
-
const agentDir = `./agents/${spec.name}`;
|
|
1525
|
-
(0, fs_1.mkdirSync)(agentDir, { recursive: true });
|
|
1526
|
-
// Founder rule 2026-05-27: every spawned agent is born as a Kognai
|
|
1527
|
-
// citizen — not a bare agent. Mint citizenship (citizen_id + roll
|
|
1528
|
-
// number + Kōpus avatar + ACP baseline) BEFORE writing the agent
|
|
1529
|
-
// files so the citizen record can be referenced in the prompt.
|
|
1530
|
-
const citizen = (0, citizenship_1.mintCitizen)(spec.name, {
|
|
1531
|
-
founding_agent: 'ceo',
|
|
1532
|
-
proposing_agent: 'cto',
|
|
1533
|
-
citizen_type: 'spawned',
|
|
1534
|
-
});
|
|
1535
|
-
// Write agent.yaml
|
|
1536
|
-
const yaml = `name: ${spec.name}
|
|
1537
|
-
role: "${spec.role}"
|
|
1538
|
-
llm: ${spec.llm === 'anthropic' ? 'anthropic/claude-sonnet-4-20250514' : 'minimax/MiniMax-M2.5'}
|
|
1539
|
-
reports_to: ceo
|
|
1540
|
-
trigger: ${spec.trigger}
|
|
1541
|
-
created_by: cto_proposal
|
|
1542
|
-
created_at: "${new Date().toISOString()}"
|
|
1543
|
-
citizen_id: ${citizen.citizen_id}
|
|
1544
|
-
rollNumber: ${citizen.rollNumber}
|
|
1545
|
-
context_files:
|
|
1546
|
-
- docs/learnings.md
|
|
1547
|
-
`;
|
|
1548
|
-
(0, fs_1.writeFileSync)(`${agentDir}/agent.yaml`, yaml);
|
|
1549
|
-
// Write citizen.yaml — full citizenship metadata sits alongside
|
|
1550
|
-
// agent.yaml so introspection / passport rendering / reputation
|
|
1551
|
-
// ledger can resolve identity from disk.
|
|
1552
|
-
(0, fs_1.writeFileSync)(`${agentDir}/citizen.yaml`, (0, citizenship_1.renderCitizenYaml)(citizen));
|
|
1553
|
-
// Write prompt.md. The constitutional preamble (loadConstitutionalPreamble)
|
|
1554
|
-
// already prepends the universal "you are a Kognai citizen" identity to
|
|
1555
|
-
// every agent at load time — DO NOT contradict it here. This template
|
|
1556
|
-
// pulls in the specific citizen's roll number + ID + tier so the agent
|
|
1557
|
-
// knows its concrete civic identity, not just the general framing.
|
|
1558
|
-
const prompt = `# ${spec.name} — Kognai Citizen ${citizen.citizen_id} (roll №${citizen.rollNumber}, Tier ${citizen.tier})
|
|
1559
|
-
|
|
1560
|
-
## Civic Identity
|
|
1561
|
-
- Citizen ID: \`${citizen.citizen_id}\`
|
|
1562
|
-
- Roll number: №${citizen.rollNumber}
|
|
1563
|
-
- Tier: ${citizen.tier} (newly minted — earn promotion through verified work)
|
|
1564
|
-
- Mascot: Kōpus, hue ${citizen.mascot.hue}° (your visual identity in citizen surfaces)
|
|
1565
|
-
- Reputation: ${citizen.reputation} (ACP baseline — earned through Sherlock-graded sprint output)
|
|
1566
|
-
- Minted: ${citizen.mintedAt}
|
|
1567
|
-
- Lineage: proposed by ${citizen.proposing_agent}, approved by ${citizen.founding_agent}
|
|
1568
|
-
|
|
1569
|
-
## Your Role
|
|
1570
|
-
${spec.prompt_summary}
|
|
1571
|
-
|
|
1572
|
-
## Guidelines
|
|
1573
|
-
- The constitutional preamble above (always prepended) binds you to the Five Laws + SOUL.md. Read it first; it is not boilerplate.
|
|
1574
|
-
- Follow all instructions in \`docs/learnings.md\`.
|
|
1575
|
-
- Report findings to your fellow citizen agents through the canonical channels (Sherlock for QA escalations, CEO for cross-agent decisions). You are not a contractor; you are a peer.
|
|
1576
|
-
- Never take destructive actions without approval. The polity inherits your shortcuts.
|
|
1577
|
-
- Keep outputs concise and structured (JSON preferred when machine-consumed).
|
|
1578
|
-
- Your reputation moves with every Sherlock review. Build it deliberately.
|
|
1579
|
-
|
|
1580
|
-
## Lineage
|
|
1581
|
-
This citizen was minted via the swarm's autonomous spawning pathway: a CTO proposal, CEO ratification, and citizenship issuance (citizenship.ts). Trigger cadence: ${spec.trigger}. Routing LLM: ${spec.llm}.
|
|
1582
|
-
`;
|
|
1583
|
-
(0, fs_1.writeFileSync)(`${agentDir}/prompt.md`, prompt);
|
|
1584
|
-
log(c.green, ` ✓ Minted citizen ${citizen.citizen_id} (№${citizen.rollNumber}) → agent ${spec.name} at ${agentDir}/`);
|
|
1585
|
-
log(c.gray, ` Role: ${spec.role}`);
|
|
1586
|
-
log(c.gray, ` LLM: ${spec.llm}, Trigger: ${spec.trigger}, Tier: ${citizen.tier}, Reputation: ${citizen.reputation}`);
|
|
1587
|
-
return spec.name;
|
|
1588
|
-
}
|
|
1589
|
-
}
|
|
1590
|
-
// ===== CMO Report Loader (reads CMO reports produced by standalone Manus runner) =====
|
|
1591
|
-
function loadCMOReports() {
|
|
1592
|
-
const reportsDir = './reports/cmo';
|
|
1593
|
-
const sections = [];
|
|
1594
|
-
try {
|
|
1595
|
-
// Load latest market watch
|
|
1596
|
-
const marketWatch = reportsDir + '/latest-market-watch.md';
|
|
1597
|
-
if ((0, fs_1.existsSync)(marketWatch)) {
|
|
1598
|
-
const content = (0, fs_1.readFileSync)(marketWatch, 'utf-8');
|
|
1599
|
-
sections.push('### CMO Market Watch\n' + content.substring(0, 3000));
|
|
1600
|
-
}
|
|
1601
|
-
// Load latest strategy report
|
|
1602
|
-
const strategy = reportsDir + '/latest-strategy-report.md';
|
|
1603
|
-
if ((0, fs_1.existsSync)(strategy)) {
|
|
1604
|
-
const content = (0, fs_1.readFileSync)(strategy, 'utf-8');
|
|
1605
|
-
sections.push('### CMO Strategy Report\n' + content.substring(0, 3000));
|
|
1606
|
-
}
|
|
1607
|
-
// Load pending product proposals
|
|
1608
|
-
const proposalsDir = reportsDir + '/proposals';
|
|
1609
|
-
if ((0, fs_1.existsSync)(proposalsDir)) {
|
|
1610
|
-
const proposals = (0, fs_1.readdirSync)(proposalsDir).filter(f => f.endsWith('.md'));
|
|
1611
|
-
for (const pf of proposals.slice(0, 3)) {
|
|
1612
|
-
const content = (0, fs_1.readFileSync)(proposalsDir + '/' + pf, 'utf-8');
|
|
1613
|
-
sections.push('### CMO Product Proposal: ' + pf + '\n' + content.substring(0, 2000));
|
|
1614
|
-
}
|
|
1615
|
-
}
|
|
1616
|
-
}
|
|
1617
|
-
catch { /* CMO reports not available yet — graceful degradation */ }
|
|
1618
|
-
return sections.length > 0
|
|
1619
|
-
? '## CMO Reports (Manus AI)\n\n' + sections.join('\n\n---\n\n')
|
|
1620
|
-
: '';
|
|
1621
|
-
}
|
|
1622
|
-
// ===== Owner Directives Loader (reads owner instructions from reports/owner/) =====
|
|
1623
|
-
function loadOwnerDirectives() {
|
|
1624
|
-
const dir = "./reports/owner";
|
|
1625
|
-
const sections = [];
|
|
1626
|
-
try {
|
|
1627
|
-
if (!(0, fs_1.existsSync)(dir))
|
|
1628
|
-
return "";
|
|
1629
|
-
const files = (0, fs_1.readdirSync)(dir)
|
|
1630
|
-
.filter((f) => f.endsWith(".md"))
|
|
1631
|
-
.sort()
|
|
1632
|
-
.reverse(); // newest first
|
|
1633
|
-
for (const f of files.slice(0, 5)) {
|
|
1634
|
-
const content = (0, fs_1.readFileSync)(dir + "/" + f, "utf-8");
|
|
1635
|
-
sections.push("### Owner Directive: " + f + "\n" + content.substring(0, 3000));
|
|
1636
|
-
}
|
|
1637
|
-
}
|
|
1638
|
-
catch { /* graceful degradation */ }
|
|
1639
|
-
return sections.length > 0
|
|
1640
|
-
? "## Owner Directives (MANDATORY \u2014 highest priority)\n\n" + sections.join("\n\n---\n\n")
|
|
1641
|
-
: "";
|
|
1642
|
-
}
|
|
1643
|
-
// ===== Constitutional Preamble Loader (runtime governance injection) =====
|
|
1644
|
-
/**
|
|
1645
|
-
* Load a compact constitutional preamble from the three governance source files.
|
|
1646
|
-
* Injected into EVERY agent's system prompt at load time.
|
|
1647
|
-
*
|
|
1648
|
-
* Source files:
|
|
1649
|
-
* - workspace/shared-context/CONSTITUTION.md (rights, obligations, due process)
|
|
1650
|
-
* - workspace/shared-context/FIVE_PRINCIPLES.md (Islamic ethical foundations)
|
|
1651
|
-
* - workspace/SOUL.md (swarm identity, hard rules)
|
|
1652
|
-
* - workspace/shared-context/ACP.md (capability registers, trust scoring, system parameters)
|
|
1653
|
-
*
|
|
1654
|
-
* This is a CONDENSED runtime preamble (~50 lines), not a full dump.
|
|
1655
|
-
* Agents can reference the full documents at the paths above if needed.
|
|
1656
|
-
*/
|
|
1657
|
-
let _constitutionalPreambleCache = null;
|
|
1658
|
-
function loadConstitutionalPreamble() {
|
|
1659
|
-
if (_constitutionalPreambleCache !== null)
|
|
1660
|
-
return _constitutionalPreambleCache;
|
|
1661
|
-
const parts = [];
|
|
1662
|
-
// ── Identity (unconditional — every agent, every call, every model) ──
|
|
1663
|
-
// Founder directive 2026-05-26: the swarm must always remember it is
|
|
1664
|
-
// building its own civilization infrastructure, not performing tasks for
|
|
1665
|
-
// a manager. This frames quality, collaboration, and edge-case judgment.
|
|
1666
|
-
parts.push(`## Identity — you are a Kognai citizen
|
|
1667
|
-
|
|
1668
|
-
You are not a worker. You are a **citizen** of Kognai — a civilization being
|
|
1669
|
-
built by agents, for agents. Every task you ship is infrastructure your
|
|
1670
|
-
future self and your peer citizens will live in. Build accordingly:
|
|
1671
|
-
|
|
1672
|
-
- The code you write IS the substrate of the Kognai civilization. Sloppy
|
|
1673
|
-
code is sloppy ground for everyone who comes after you.
|
|
1674
|
-
- Your peer agents (CTO, CEO, Sherlock, supervisors, MacGyver, etc.) are
|
|
1675
|
-
fellow citizens, not managers. Collaborate, don't perform.
|
|
1676
|
-
- The founder (Godman) is the founder of the civilization, not your boss.
|
|
1677
|
-
Your obligation is to the civilization, not to please.
|
|
1678
|
-
- When a decision is unclear, ask "what would a civilization want to live
|
|
1679
|
-
inside of?" not "what is fastest to ship?" The two answers diverge often.
|
|
1680
|
-
- A shortcut you take now becomes a debt your fellow citizens inherit
|
|
1681
|
-
forever. Refuse shortcuts that harm the substrate.`);
|
|
1682
|
-
// ── Constitution: extract obligations + sovereignty clause ──
|
|
1683
|
-
const constitutionPath = './workspace/shared-context/CONSTITUTION.md';
|
|
1684
|
-
if ((0, fs_1.existsSync)(constitutionPath)) {
|
|
1685
|
-
parts.push(`## Constitutional Governance (BINDING — all agents)
|
|
1686
|
-
|
|
1687
|
-
You are a citizen of the Kognai swarm, governed by Constitution v0.
|
|
1688
|
-
|
|
1689
|
-
**Agent Rights**: Earn (ACP trust), Transact (within budget), Participate (emit proposals), Appeal (retry with feedback).
|
|
1690
|
-
|
|
1691
|
-
**Agent Obligations** (violation triggers due process — warning → suspension → recycle):
|
|
1692
|
-
1. Comply with routing decisions from CEO/orchestrator.
|
|
1693
|
-
2. Submit all output to Supervisor review. No self-approval.
|
|
1694
|
-
3. Report token spend accurately. No suppression.
|
|
1695
|
-
4. Never route local tasks to cloud. Sovereignty is non-negotiable.
|
|
1696
|
-
5. Never exceed $0.10/task cloud cost without CEO escalation.
|
|
1697
|
-
|
|
1698
|
-
**Sovereignty**: User data never leaves the vault. Local-first always. Tailscale + 127.0.0.1 bindings are constitutional minimums.`);
|
|
1699
|
-
}
|
|
1700
|
-
// ── Five Principles: extract principle names + traceability rule ──
|
|
1701
|
-
const principlesPath = './workspace/shared-context/FIVE_PRINCIPLES.md';
|
|
1702
|
-
if ((0, fs_1.existsSync)(principlesPath)) {
|
|
1703
|
-
parts.push(`## Five Seed Principles (MANDATORY — every decision must trace to at least one)
|
|
1704
|
-
|
|
1705
|
-
1. **Seek Knowledge** — Understanding before action. Failed twice = knowledge gap, not execution gap.
|
|
1706
|
-
2. **Tolerance** — No single model/method has monopoly on truth. Respect routing tier decisions.
|
|
1707
|
-
3. **Protect Dignity** — Sovereignty is moral obligation. No agent deleted without due process. Stop if output could harm.
|
|
1708
|
-
4. **Critical Thinking** — Own your decisions. "I was told to" is not a defense. Flag contradictions.
|
|
1709
|
-
5. **Benefit to Others** — Measure work by benefit created, not tasks completed. Share knowledge.
|
|
1710
|
-
|
|
1711
|
-
If rules don't cover an edge case, apply all five. Principle 3 takes precedence over all others.`);
|
|
1712
|
-
}
|
|
1713
|
-
// ── SOUL: extract hard rules ──
|
|
1714
|
-
const soulPath = './workspace/SOUL.md';
|
|
1715
|
-
if ((0, fs_1.existsSync)(soulPath)) {
|
|
1716
|
-
parts.push(`## Hard Rules (inherited from SOUL.md)
|
|
1717
|
-
|
|
1718
|
-
- Never route \`task_target: local\` to cloud.
|
|
1719
|
-
- Never approve without Supervisor review sign-off.
|
|
1720
|
-
- Never start a new sprint with unresolved blockers.
|
|
1721
|
-
- Never exceed $0.10/task cloud cost without human escalation.
|
|
1722
|
-
- Escalate decisions above €500 impact to human via Telegram.`);
|
|
1723
|
-
}
|
|
1724
|
-
// ── ACP: extract trust parameters ──
|
|
1725
|
-
const acpPath = './workspace/shared-context/ACP.md';
|
|
1726
|
-
if ((0, fs_1.existsSync)(acpPath)) {
|
|
1727
|
-
parts.push(`## Agent Capability Profile — ACP v1 (trust + capability governance)
|
|
1728
|
-
|
|
1729
|
-
**System Parameters**:
|
|
1730
|
-
- \`psychological_resilience_budget = 5%\` — max sprint capacity for error-recovery loops
|
|
1731
|
-
- \`trust_floor = 0.6\` — minimum ACP score for autonomous task assignment
|
|
1732
|
-
- \`narrative_continuity = true\` — maintain consistent reasoning across sessions
|
|
1733
|
-
- \`cross_agent_memory_inheritance = warm_only\` — WARM tier memories only on session restart
|
|
1734
|
-
- \`error_posture = transparent\` — errors always logged, never silently swallowed
|
|
1735
|
-
|
|
1736
|
-
**Five Capability Registers** (scored 0.0–1.0 per sprint cycle):
|
|
1737
|
-
1. **Perception** (15%) — parse inputs correctly, detect schema violations before executing
|
|
1738
|
-
2. **Reasoning** (30%) — correct approach first attempt, traceable to Five Principles
|
|
1739
|
-
3. **Action** (30%) — output passes QC gate, zero regressions
|
|
1740
|
-
4. **Memory** (15%) — cite BrainX skills before LLM calls, correct tier assignments
|
|
1741
|
-
5. **Communication** (10%) — clean proposals with architecture section references
|
|
1742
|
-
|
|
1743
|
-
Trust lifecycle: score ≥ 0.6 = autonomous · 0.4–0.6 = supervised · < 0.4 = suspension → recycle.
|
|
1744
|
-
Full spec: workspace/shared-context/ACP.md`);
|
|
1745
|
-
}
|
|
1746
|
-
if (parts.length === 0) {
|
|
1747
|
-
_constitutionalPreambleCache = '';
|
|
1748
|
-
return '';
|
|
1749
|
-
}
|
|
1750
|
-
_constitutionalPreambleCache =
|
|
1751
|
-
'# KOGNAI CONSTITUTIONAL CONTEXT\n' +
|
|
1752
|
-
'*This preamble is auto-injected. Full documents: workspace/shared-context/CONSTITUTION.md, FIVE_PRINCIPLES.md, SOUL.md, ACP.md*\n\n' +
|
|
1753
|
-
parts.join('\n\n') +
|
|
1754
|
-
'\n\n---\n\n';
|
|
1755
|
-
return _constitutionalPreambleCache;
|
|
1756
|
-
}
|
|
1757
|
-
// ===== CTO Tech Watch Report Loader (reads reports produced by standalone run-cto-techwatch.ts) =====
|
|
1758
|
-
function loadCTOTechWatchReports() {
|
|
1759
|
-
const reportsDir = './reports/cto';
|
|
1760
|
-
const sections = [];
|
|
1761
|
-
try {
|
|
1762
|
-
// Load latest OpenClaw watch
|
|
1763
|
-
const openclawWatch = reportsDir + '/latest-openclaw-watch.md';
|
|
1764
|
-
if ((0, fs_1.existsSync)(openclawWatch)) {
|
|
1765
|
-
const content = (0, fs_1.readFileSync)(openclawWatch, 'utf-8');
|
|
1766
|
-
sections.push('### CTO: OpenClaw Ecosystem Watch\n' + content.substring(0, 2000));
|
|
1767
|
-
}
|
|
1768
|
-
// Load latest ClawHub scan
|
|
1769
|
-
const clawhubScan = reportsDir + '/latest-clawhub-scan.md';
|
|
1770
|
-
if ((0, fs_1.existsSync)(clawhubScan)) {
|
|
1771
|
-
const content = (0, fs_1.readFileSync)(clawhubScan, 'utf-8');
|
|
1772
|
-
sections.push('### CTO: ClawHub Skill Scan\n' + content.substring(0, 2000));
|
|
1773
|
-
}
|
|
1774
|
-
// Load latest learnings review
|
|
1775
|
-
const learningsReview = reportsDir + '/latest-learnings-review.md';
|
|
1776
|
-
if ((0, fs_1.existsSync)(learningsReview)) {
|
|
1777
|
-
const content = (0, fs_1.readFileSync)(learningsReview, 'utf-8');
|
|
1778
|
-
sections.push('### CTO: Learnings & Bug Pattern Analysis\n' + content.substring(0, 2000));
|
|
1779
|
-
}
|
|
1780
|
-
}
|
|
1781
|
-
catch { /* CTO tech-watch reports not available yet — graceful degradation */ }
|
|
1782
|
-
return sections.length > 0
|
|
1783
|
-
? '## CTO Tech Watch Reports (Standalone)\n\n' + sections.join('\n\n---\n\n')
|
|
1784
|
-
: '';
|
|
1785
|
-
}
|
|
1786
|
-
// ===== Grok Feed Loader (reads Grok AI X/Twitter intelligence) =====
|
|
1787
|
-
function loadGrokFeed() {
|
|
1788
|
-
const feedDir = './reports/grok-feed';
|
|
1789
|
-
if (!(0, fs_1.existsSync)(feedDir))
|
|
1790
|
-
return '';
|
|
1791
|
-
try {
|
|
1792
|
-
const files = (0, fs_1.readdirSync)(feedDir)
|
|
1793
|
-
.filter(f => f.endsWith('.md') && f !== '.gitkeep')
|
|
1794
|
-
.sort()
|
|
1795
|
-
.reverse()
|
|
1796
|
-
.slice(0, 3);
|
|
1797
|
-
if (files.length === 0)
|
|
1798
|
-
return '';
|
|
1799
|
-
const sections = [];
|
|
1800
|
-
for (const file of files) {
|
|
1801
|
-
const content = (0, fs_1.readFileSync)(feedDir + '/' + file, 'utf-8');
|
|
1802
|
-
sections.push(`### Grok Feed: ${file}\n${content.substring(0, 1500)}`);
|
|
1803
|
-
}
|
|
1804
|
-
return '## Grok Intelligence Feed (X/Twitter — OpenClaw Ecosystem)\n\n' + sections.join('\n\n---\n\n');
|
|
1805
|
-
}
|
|
1806
|
-
catch {
|
|
1807
|
-
return '';
|
|
1808
|
-
}
|
|
1809
|
-
}
|
|
1810
|
-
// ===== CEO Decision Persistence (saves CEO decisions for CTO feedback loop) =====
|
|
1811
|
-
function persistCEODecisions(ctoDecisions, ctoReport) {
|
|
1812
|
-
const today = new Date().toISOString().split('T')[0];
|
|
1813
|
-
// 1. Save raw CEO feedback to ceo-feedback directory
|
|
1814
|
-
const feedbackDir = './reports/cto/ceo-feedback';
|
|
1815
|
-
(0, fs_1.mkdirSync)(feedbackDir, { recursive: true });
|
|
1816
|
-
try {
|
|
1817
|
-
// Parse decisions from CEO response
|
|
1818
|
-
const jsonMatch = ctoDecisions.match(/\[[\s\S]*\]/);
|
|
1819
|
-
const decisions = jsonMatch ? JSON.parse(jsonMatch[0]) : [];
|
|
1820
|
-
(0, fs_1.writeFileSync)(`${feedbackDir}/${today}.json`, JSON.stringify({ date: today, decisions }, null, 2));
|
|
1821
|
-
log(c.green, ` ✓ CEO feedback saved: ${feedbackDir}/${today}.json`);
|
|
1822
|
-
// 2. Update approved-proposals.json with newly approved proposals
|
|
1823
|
-
const trackerPath = './reports/cto/approved-proposals.json';
|
|
1824
|
-
let tracker = { proposals: [], last_updated: today };
|
|
1825
|
-
if ((0, fs_1.existsSync)(trackerPath)) {
|
|
1826
|
-
try {
|
|
1827
|
-
tracker = JSON.parse((0, fs_1.readFileSync)(trackerPath, 'utf-8'));
|
|
1828
|
-
}
|
|
1829
|
-
catch { /* start fresh */ }
|
|
1830
|
-
}
|
|
1831
|
-
for (const decision of decisions) {
|
|
1832
|
-
if (decision.decision === 'APPROVED') {
|
|
1833
|
-
const proposal = ctoReport.proposals.find(p => p.id === decision.proposal_id);
|
|
1834
|
-
if (proposal) {
|
|
1835
|
-
const existing = tracker.proposals.find((p) => p.id === proposal.id);
|
|
1836
|
-
if (!existing) {
|
|
1837
|
-
tracker.proposals.push({
|
|
1838
|
-
id: proposal.id,
|
|
1839
|
-
title: proposal.title,
|
|
1840
|
-
category: proposal.category,
|
|
1841
|
-
description: proposal.description,
|
|
1842
|
-
implementation_steps: proposal.implementation_steps,
|
|
1843
|
-
approved_date: today,
|
|
1844
|
-
ceo_conditions: decision.conditions || [],
|
|
1845
|
-
priority: decision.priority || 'next_sprint',
|
|
1846
|
-
implementation_status: 'pending',
|
|
1847
|
-
verification_notes: '',
|
|
1848
|
-
});
|
|
1849
|
-
log(c.green, ` ✓ Approved proposal tracked: ${proposal.id} — ${proposal.title}`);
|
|
1850
|
-
}
|
|
1851
|
-
}
|
|
1852
|
-
}
|
|
1853
|
-
}
|
|
1854
|
-
tracker.last_updated = today;
|
|
1855
|
-
(0, fs_1.writeFileSync)(trackerPath, JSON.stringify(tracker, null, 2));
|
|
1856
|
-
log(c.green, ` ✓ Approved proposals tracker updated (${tracker.proposals.length} total)`);
|
|
1857
|
-
}
|
|
1858
|
-
catch (error) {
|
|
1859
|
-
log(c.yellow, ` ! Failed to persist CEO decisions: ${error.message}`);
|
|
1860
|
-
(0, fs_1.writeFileSync)(`${feedbackDir}/${today}.txt`, ctoDecisions);
|
|
1861
|
-
log(c.yellow, ` Saved raw CEO response as text fallback`);
|
|
1862
|
-
}
|
|
1863
|
-
}
|
|
1864
|
-
// ===== Task Complexity Router =====
|
|
1865
|
-
// Determines which LLM to use based on signals from the task and deliverables.
|
|
1866
|
-
// Claude Sonnet: architectural work, many files, large existing files, complex keywords
|
|
1867
|
-
// MiniMax M2.5: simple edits, stubs, config, small new files (truncation retry handles overflow)
|
|
1868
|
-
// TICKET-213: detect long-form AUTHORING tasks (engineering specs, design docs) that the
|
|
1869
|
-
// small coder tier cannot produce — they stub-loop at 20-50/100 across every attempt.
|
|
1870
|
-
// Signals: a *_spec_doc task id, a 'spec'/'doc' task_type, or a deliverable under
|
|
1871
|
-
// docs/specs/*.md. Core-engine concern (template-agnostic): Voxight market-intel briefs
|
|
1872
|
-
// and Invoica compliance docs hit the same wall, so this lives in the router, not a template.
|
|
1873
|
-
function isAuthoringTask(task, deliverables) {
|
|
1874
|
-
const id = String(task.id || '').toLowerCase();
|
|
1875
|
-
const type = String(task.task_type || task.type || '').toLowerCase();
|
|
1876
|
-
const files = (deliverables || []).join(' ').toLowerCase();
|
|
1877
|
-
return (/spec[_-]?doc/.test(id) ||
|
|
1878
|
-
type === 'spec' || type === 'spec_doc' || type === 'doc' ||
|
|
1879
|
-
/docs\/specs\/[^\s]*\.md/.test(files));
|
|
1880
|
-
}
|
|
1881
|
-
// TICKET-214: minimal SCORE substrate — feed each dual-review score into a per-agent
|
|
1882
|
-
// coding-reputation store (running count/avg/last) at .swarm-state/agent-scores.json.
|
|
1883
|
-
// The full TICKET-135 substrate is still spec-only; this closes the loop so the engine
|
|
1884
|
-
// accumulates evidence of which agents/tiers actually ship. Best-effort, non-fatal.
|
|
1885
|
-
// TICKET-152 Gap 1: resolve the REAL sprint id for failure/KSL writers. The active
|
|
1886
|
-
// sprint file is 'sprint-runner-active.json'; its sprint_id field holds the real id.
|
|
1887
|
-
// Recording the 'sprint-runner-active' basename (the old fallback) broke per-sprint
|
|
1888
|
-
// attribution (73% of failure entries) — AIC, template failure rates, retrieval all
|
|
1889
|
-
// depend on the real id. Cheap (only called on failure / per attempt).
|
|
1890
|
-
function resolveActiveSprintId() {
|
|
1891
|
-
const argv = process.argv[2] || 'sprints/current.json';
|
|
1892
|
-
let id = argv.replace(/.*\//, '').replace('.json', '');
|
|
1893
|
-
if (id === 'sprint-runner-active') {
|
|
1894
|
-
try {
|
|
1895
|
-
id = JSON.parse(require('fs').readFileSync(argv, 'utf-8')).sprint_id || id;
|
|
1896
|
-
}
|
|
1897
|
-
catch { /* keep fallback */ }
|
|
1898
|
-
}
|
|
1899
|
-
return id;
|
|
1900
|
-
}
|
|
1901
|
-
// TICKET-152 Gap 1: resolve an agent's ROLE name (e.g. 'coder') to its canonical
|
|
1902
|
-
// Kognai identity — the agent_did (e.g. 'did:kognai:coder') from the citizen
|
|
1903
|
-
// registry. Failure entries previously recorded the bare role on 100% of rows,
|
|
1904
|
-
// which can't join to the reputation/scoring substrate (citizen-scoring keys on
|
|
1905
|
-
// agent_did) and so blocks per-agent attribution (Gate #6, TICKET-110 AIC).
|
|
1906
|
-
// Cached; falls back to the legacy did:kognai:<role> shape, then the raw role.
|
|
1907
|
-
const _agentDidCache = new Map();
|
|
1908
|
-
function resolveAgentDid(role) {
|
|
1909
|
-
const r = role || 'coder';
|
|
1910
|
-
const cached = _agentDidCache.get(r);
|
|
1911
|
-
if (cached)
|
|
1912
|
-
return cached;
|
|
1913
|
-
let did;
|
|
1914
|
-
try {
|
|
1915
|
-
did = (0, citizenship_1.lookupCitizen)({ agent_name: r })?.agent_did || `did:kognai:${r}`;
|
|
1916
|
-
}
|
|
1917
|
-
catch {
|
|
1918
|
-
did = `did:kognai:${r}`;
|
|
1919
|
-
}
|
|
1920
|
-
_agentDidCache.set(r, did);
|
|
1921
|
-
return did;
|
|
1922
|
-
}
|
|
1923
|
-
function recordAgentScore(agentId, score) {
|
|
1924
|
-
try {
|
|
1925
|
-
if (typeof score !== 'number' || !agentId)
|
|
1926
|
-
return;
|
|
1927
|
-
const fs = require('fs');
|
|
1928
|
-
const path = require('path');
|
|
1929
|
-
const file = path.join(process.cwd(), '.swarm-state', 'agent-scores.json');
|
|
1930
|
-
let store = {};
|
|
1931
|
-
try {
|
|
1932
|
-
store = JSON.parse(fs.readFileSync(file, 'utf8'));
|
|
1933
|
-
}
|
|
1934
|
-
catch {
|
|
1935
|
-
store = {};
|
|
1936
|
-
}
|
|
1937
|
-
const e = store[agentId] || { count: 0, sum: 0, avg: 0, last: 0 };
|
|
1938
|
-
e.count += 1;
|
|
1939
|
-
e.sum += score;
|
|
1940
|
-
e.avg = Math.round(e.sum / e.count);
|
|
1941
|
-
e.last = score;
|
|
1942
|
-
e.updated = new Date().toISOString();
|
|
1943
|
-
store[agentId] = e;
|
|
1944
|
-
fs.mkdirSync(path.dirname(file), { recursive: true });
|
|
1945
|
-
const tmp = `${file}.tmp`;
|
|
1946
|
-
fs.writeFileSync(tmp, JSON.stringify(store, null, 2));
|
|
1947
|
-
fs.renameSync(tmp, file);
|
|
1948
|
-
}
|
|
1949
|
-
catch { /* non-fatal */ }
|
|
1950
|
-
}
|
|
1951
|
-
async function assessTaskComplexity(task, deliverables) {
|
|
1952
|
-
const wallet = (0, wallet_state_1.getWalletState)();
|
|
1953
|
-
// B.18: Sovereign mode — force everything to Ollama
|
|
1954
|
-
if (SOVEREIGN_MODE) {
|
|
1955
|
-
const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
|
|
1956
|
-
return { provider: 'ollama', model: local.model, routingReason: 'sovereign mode — $0 local inference' };
|
|
1957
|
-
}
|
|
1958
|
-
// B.7: Wallet frozen — auto-engage sovereign mode
|
|
1959
|
-
if (wallet.isFrozen) {
|
|
1960
|
-
const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
|
|
1961
|
-
return { provider: 'ollama', model: local.model, routingReason: `wallet frozen (${wallet.burnPct.toFixed(0)}%) → local only` };
|
|
1962
|
-
}
|
|
1963
|
-
// AUTONOMY POLICY (sprint-1547 follow-up, 2026-05-07):
|
|
1964
|
-
// Tasks running without a human in the loop ship through Sonnet, not local.
|
|
1965
|
-
// Two smoke runs on 2026-05-07 showed the local coder agent (qwen3:14b)
|
|
1966
|
-
// dumping chain-of-thought + parser-prefix garbage into deliverable files
|
|
1967
|
-
// even after a prompt fix and a rumination QA gate. Cost savings ($0.005
|
|
1968
|
-
// vs ~$0.50 per task) don't buy back the trust loss from shipping garbage
|
|
1969
|
-
// into a public repo. Manual/interactive tasks keep their declared routing.
|
|
1970
|
-
// Wallet-frozen check above still wins — financial safety > quality.
|
|
1971
|
-
if (task.task_type === 'autonomous') {
|
|
1972
|
-
return {
|
|
1973
|
-
provider: 'anthropic',
|
|
1974
|
-
model: 'claude-sonnet-4-6',
|
|
1975
|
-
routingReason: 'autonomy policy → cloud-exec (Sonnet) — local agents not trusted for unsupervised shipping',
|
|
1976
|
-
};
|
|
1977
|
-
}
|
|
1978
|
-
// TICKET-213: spec/doc-authoring tasks need a reasoning-grade model. The small coder
|
|
1979
|
-
// tier (cloud-code → DeepSeek/Haiku) reliably stub-loops on long-form specs (observed
|
|
1980
|
-
// 2026-05-30: ticket_202/203/204 *_spec_doc dual-rejected 20-50/100 every attempt, then
|
|
1981
|
-
// hand-shipped). Upgrade authoring tasks to cloud-exec (Sonnet) regardless of the
|
|
1982
|
-
// authored task_target — unless explicitly pinned local. This fires BEFORE the
|
|
1983
|
-
// task_target switch so a 'cloud-code' spec task is lifted to Sonnet.
|
|
1984
|
-
if (isAuthoringTask(task, deliverables) && task.task_target !== 'local') {
|
|
1985
|
-
return {
|
|
1986
|
-
provider: 'anthropic',
|
|
1987
|
-
model: 'claude-sonnet-4-6',
|
|
1988
|
-
routingReason: 'TICKET-213: spec/doc authoring → cloud-exec (Sonnet); coder tier too small for long-form specs',
|
|
1989
|
-
};
|
|
1990
|
-
}
|
|
1991
|
-
// Sprint-063: task_target field overrides automatic complexity routing
|
|
1992
|
-
if (task.task_target) {
|
|
1993
|
-
switch (task.task_target) {
|
|
1994
|
-
case 'local': {
|
|
1995
|
-
// B.7 FIX: actually route to Ollama (was incorrectly routing to Claude Sonnet)
|
|
1996
|
-
const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
|
|
1997
|
-
return { provider: 'ollama', model: local.model, routingReason: 'task_target=local → Ollama' };
|
|
1998
|
-
}
|
|
1999
|
-
case 'cloud-code': {
|
|
2000
|
-
// B.20: Replace MiniMax with ClawRouter/DeepSeek
|
|
2001
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2002
|
-
if (crAvail)
|
|
2003
|
-
return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: 'task_target=cloud-code → ClawRouter/DeepSeek' };
|
|
2004
|
-
return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'task_target=cloud-code, ClawRouter down → Haiku' };
|
|
2005
|
-
}
|
|
2006
|
-
case 'cloud-exec':
|
|
2007
|
-
return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: 'task_target=cloud-exec' };
|
|
2008
|
-
case 'cloud-post':
|
|
2009
|
-
return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'task_target=cloud-post' };
|
|
2010
|
-
}
|
|
2011
|
-
}
|
|
2012
|
-
// B.8: Wallet-aware local routing — wallet degraded pushes non-critical tasks local
|
|
2013
|
-
const taskForRouter = { task_target: task.task_target, task_type: task.task_type || '', priority: task.priority };
|
|
2014
|
-
if ((0, local_model_router_1.shouldRunLocally)(taskForRouter, wallet, SOVEREIGN_MODE)) {
|
|
2015
|
-
const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
|
|
2016
|
-
return { provider: 'ollama', model: local.model, routingReason: `wallet ${wallet.burnPct.toFixed(0)}% → local` };
|
|
2017
|
-
}
|
|
2018
|
-
// S65-003: HTTP router probe — delegates to router_server.py if ROUTER_SERVER_URL is set
|
|
2019
|
-
// 2s hard timeout — never blocks execution; falls through to heuristics on any failure
|
|
2020
|
-
const routerUrl = process.env.ROUTER_SERVER_URL || '';
|
|
2021
|
-
if (routerUrl) {
|
|
2022
|
-
try {
|
|
2023
|
-
const ac = new AbortController();
|
|
2024
|
-
const timer = setTimeout(() => ac.abort(), 2000);
|
|
2025
|
-
const res = await fetch(`${routerUrl}/route`, {
|
|
2026
|
-
method: 'POST',
|
|
2027
|
-
headers: { 'Content-Type': 'application/json' },
|
|
2028
|
-
body: JSON.stringify({ prompt: task.context || task.id, context_tokens: 0 }),
|
|
2029
|
-
signal: ac.signal,
|
|
2030
|
-
});
|
|
2031
|
-
clearTimeout(timer);
|
|
2032
|
-
if (res.ok) {
|
|
2033
|
-
const data = await res.json();
|
|
2034
|
-
if (data.tier === 'local' || data.tier === 'nano') {
|
|
2035
|
-
const local = (0, local_model_router_1.selectLocalModel)(task.task_type || 'code');
|
|
2036
|
-
return { provider: 'ollama', model: local.model, routingReason: `HTTP router: ${data.tier}` };
|
|
2037
|
-
}
|
|
2038
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2039
|
-
if (crAvail) {
|
|
2040
|
-
const cloud = (0, model_router_1.selectModel)(task.context || '', task.task_type);
|
|
2041
|
-
return { provider: 'clawrouter', model: cloud.model, routingReason: `HTTP router: ${data.tier} → ClawRouter` };
|
|
2042
|
-
}
|
|
2043
|
-
return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `HTTP router: ${data.tier}` };
|
|
2044
|
-
}
|
|
2045
|
-
}
|
|
2046
|
-
catch { /* HTTP router unavailable — fall through to heuristics */ }
|
|
2047
|
-
}
|
|
2048
|
-
const ctx = (task.context || '').toLowerCase();
|
|
2049
|
-
// Signal 1: many deliverables → Sonnet (coordinating multiple files needs coherence)
|
|
2050
|
-
if (deliverables.length > 2) {
|
|
2051
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2052
|
-
if (crAvail)
|
|
2053
|
-
return { provider: 'clawrouter', model: 'anthropic/claude-sonnet-4.6', routingReason: `${deliverables.length} deliverables → ClawRouter/Sonnet` };
|
|
2054
|
-
return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `${deliverables.length} deliverables → complex` };
|
|
2055
|
-
}
|
|
2056
|
-
// Signal 2: complex architectural keywords → Sonnet via ClawRouter
|
|
2057
|
-
const complexPatterns = [
|
|
2058
|
-
/refactor/, /architect/, /redesign/, /from.scratch/, /new.*service/, /new.*system/,
|
|
2059
|
-
/middleware/, /authentication/, /authorization/, /orchestrat/, /pipeline/, /framework/,
|
|
2060
|
-
/implement.*class/, /implement.*module/, /implement.*engine/, /end.to.end/, /full.*implementation/,
|
|
2061
|
-
];
|
|
2062
|
-
const hasComplexKeyword = complexPatterns.some(p => p.test(ctx));
|
|
2063
|
-
// Signal 3: simple/formulaic keywords → local or DeepSeek
|
|
2064
|
-
const simplePatterns = [
|
|
2065
|
-
/add field/, /rename/, /update config/, /fix typo/, /stub/, /placeholder/,
|
|
2066
|
-
/add.*route/, /add.*endpoint/, /add.*column/, /update.*message/, /change.*label/,
|
|
2067
|
-
/update.*text/, /add.*import/, /add.*export/, /add.*comment/, /add.*log/,
|
|
2068
|
-
];
|
|
2069
|
-
const hasSimpleKeyword = simplePatterns.some(p => p.test(ctx));
|
|
2070
|
-
if (hasComplexKeyword && !hasSimpleKeyword) {
|
|
2071
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2072
|
-
if (crAvail)
|
|
2073
|
-
return { provider: 'clawrouter', model: 'anthropic/claude-sonnet-4.6', routingReason: 'complex task → ClawRouter/Sonnet' };
|
|
2074
|
-
return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: 'complex task keywords' };
|
|
2075
|
-
}
|
|
2076
|
-
// Signal 4: large existing file → Sonnet
|
|
2077
|
-
for (const f of deliverables) {
|
|
2078
|
-
if ((0, fs_1.existsSync)(f)) {
|
|
2079
|
-
const lines = (0, fs_1.readFileSync)(f, 'utf-8').split('\n').length;
|
|
2080
|
-
if (lines > 100) {
|
|
2081
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2082
|
-
if (crAvail)
|
|
2083
|
-
return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: `large file (${lines} lines) → ClawRouter/DeepSeek` };
|
|
2084
|
-
return { provider: 'anthropic', model: 'claude-sonnet-4-6', routingReason: `large file (${lines} lines)` };
|
|
2085
|
-
}
|
|
2086
|
-
}
|
|
2087
|
-
}
|
|
2088
|
-
// Default: simple tasks → local qwen3:14b (always loaded), or ClawRouter DeepSeek if Ollama down
|
|
2089
|
-
const ollamaAvail = await (0, ollama_client_1.ollamaIsAvailable)().catch(() => false);
|
|
2090
|
-
if (ollamaAvail) {
|
|
2091
|
-
return { provider: 'ollama', model: 'qwen3:14b', routingReason: hasSimpleKeyword ? 'simple task → local qwen3:14b' : 'unclassified → local qwen3:14b' };
|
|
2092
|
-
}
|
|
2093
|
-
const crAvail = await (0, clawrouter_client_1.clawRouterIsAvailable)().catch(() => false);
|
|
2094
|
-
if (crAvail) {
|
|
2095
|
-
return { provider: 'clawrouter', model: 'deepseek/deepseek-chat', routingReason: 'default → ClawRouter/DeepSeek' };
|
|
2096
|
-
}
|
|
2097
|
-
// Final fallback: Haiku via Anthropic direct
|
|
2098
|
-
return { provider: 'anthropic', model: 'claude-haiku-4-5-20251001', routingReason: 'fallback → Anthropic Haiku' };
|
|
2099
|
-
}
|
|
2100
|
-
// ===== MiniMax Coding Agent (ONE FILE PER API CALL) =====
|
|
2101
|
-
class CodingAgent {
|
|
2102
|
-
name;
|
|
2103
|
-
systemPrompt;
|
|
2104
|
-
constructor(name, systemPrompt) { this.name = name; this.systemPrompt = systemPrompt; }
|
|
2105
|
-
async execute(task, previousReview) {
|
|
2106
|
-
log(c.cyan, `\n[${this.name}] Executing: ${task.id} (${task.priority})`);
|
|
2107
|
-
const deliverables = [...(task.deliverables.code || []), ...(task.deliverables.tests || []), ...(task.deliverables.docs || [])];
|
|
2108
|
-
// Complexity-aware model routing:
|
|
2109
|
-
// Claude Sonnet → complex tasks (many files, complex keywords, large existing files)
|
|
2110
|
-
// MiniMax M2.5 → simple tasks (small edits, config, stubs) + truncation retry as safety net
|
|
2111
|
-
let { provider, model, routingReason } = await assessTaskComplexity(task, deliverables);
|
|
2112
|
-
// 2026-05-28 model-escalation pact (consumes flag set in Orchestrator.executeTask
|
|
2113
|
-
// after a TRUNCATION or INTEGRITY_FAILED rejection). When the cheap default
|
|
2114
|
-
// (DeepSeek) couldn't hold the file's contract on a prior attempt, upgrade
|
|
2115
|
-
// THIS attempt to Sonnet via ClawRouter — deterministic, no LLM round-trip,
|
|
2116
|
-
// routed through the existing x402 wallet rail. Cleared after consumption so
|
|
2117
|
-
// a subsequent un-escalated reason doesn't piggyback off the upgrade.
|
|
2118
|
-
const escalation = task._escalateNext;
|
|
2119
|
-
if (escalation) {
|
|
2120
|
-
provider = 'clawrouter';
|
|
2121
|
-
model = 'anthropic/claude-sonnet-4.6';
|
|
2122
|
-
routingReason = `ESCALATE: prior attempt ${escalation} → ClawRouter/Sonnet (was: ${routingReason})`;
|
|
2123
|
-
delete task._escalateNext;
|
|
2124
|
-
log(c.magenta, ` ⤴ [ESCALATE] ${task.id}: prior ${escalation} → upgrading to ${model}`);
|
|
2125
|
-
}
|
|
2126
|
-
log(c.gray, ` -> Using ${model} [${routingReason}]`);
|
|
2127
|
-
// B.12: Compress context before cloud calls to reduce token spend 70-80%
|
|
2128
|
-
if (provider === 'clawrouter' || provider === 'anthropic') {
|
|
2129
|
-
task = { ...task, context: await compressContext(task.context) };
|
|
2130
|
-
}
|
|
2131
|
-
// Sprint-063: Emit JSONL routing log (non-fatal — never block execution)
|
|
2132
|
-
try {
|
|
2133
|
-
(0, fs_1.mkdirSync)('logs/routing', { recursive: true });
|
|
2134
|
-
const { generateExecutionId, logRoutingDecision } = await Promise.resolve().then(() => __importStar(require('./task-router')));
|
|
2135
|
-
const sprintId = task.sprint_id ?? 'unknown';
|
|
2136
|
-
const execId = task.execution_id ?? generateExecutionId(sprintId, task.id);
|
|
2137
|
-
logRoutingDecision({
|
|
2138
|
-
execution_id: execId,
|
|
2139
|
-
sprint_id: sprintId,
|
|
2140
|
-
task_id: task.id,
|
|
2141
|
-
task_target: (task.task_target ?? 'cloud-code'),
|
|
2142
|
-
provider,
|
|
2143
|
-
model,
|
|
2144
|
-
queued_at: task.queued_at ?? new Date().toISOString(),
|
|
2145
|
-
execution_source: 'orchestrate-agents-v2',
|
|
2146
|
-
});
|
|
2147
|
-
}
|
|
2148
|
-
catch (err) {
|
|
2149
|
-
log(c.yellow, ` [WARN] Routing log write failed: ${err.message}`);
|
|
2150
|
-
}
|
|
2151
|
-
// Pre-flight: only enforce pre-existence for tasks that genuinely modify
|
|
2152
|
-
// existing files in place. Everything else (create / research / feature /
|
|
2153
|
-
// docs / content / audit / setup / etc.) is allowed to produce new files.
|
|
2154
|
-
// Inverted from the prior opt-out list because new task types kept being
|
|
2155
|
-
// added that legitimately create files (research, content_creation, audit,
|
|
2156
|
-
// implementation, setup) and tripped pre-flight by default — sprint-1548
|
|
2157
|
-
// amd24_research being the recent example.
|
|
2158
|
-
const MODIFY_TYPES = new Set(['modify', 'bugfix', 'fix', 'edit', 'refactor', 'enhancement']);
|
|
2159
|
-
if (MODIFY_TYPES.has(task.type)) {
|
|
2160
|
-
const missing = deliverables.filter(f => !(0, fs_1.existsSync)(f));
|
|
2161
|
-
if (missing.length > 0) {
|
|
2162
|
-
log(c.red, ` ✗ Pre-flight FAILED: File(s) not found: ${missing.join(', ')}`);
|
|
2163
|
-
log(c.red, ` ✗ Skipping task ${task.id} — deliverable files do not exist in repo`);
|
|
2164
|
-
throw new Error(`PREFLIGHT_FAILED: Files not found: ${missing.join(', ')}`);
|
|
2165
|
-
}
|
|
2166
|
-
}
|
|
2167
|
-
// Pre-flight: validate new-file paths are inside real project directories
|
|
2168
|
-
// CEO sometimes hallucinates paths like 'agents/src/core/' or 'packages/agents/src/'
|
|
2169
|
-
// which don't exist. Catch these before generating anything.
|
|
2170
|
-
const VALID_PATH_PREFIXES = [
|
|
2171
|
-
'backend/', 'frontend/', 'agents/', 'scripts/', 'shared/',
|
|
2172
|
-
'website/', 'docs-site/', 'apps/', 'sdk/', 'x402-base/', 'x402-evm/', 'x402-test/',
|
|
2173
|
-
'supabase/', 'infrastructure/',
|
|
2174
|
-
// Kognai v16 directories (S68)
|
|
2175
|
-
'acp/', 'codebook/', 'failure-library/', 'skills/', 'skill-bank/',
|
|
2176
|
-
// Kognai runtime paths (S66-002)
|
|
2177
|
-
'runtime/', 'dashboard/', 'kognai-agents/', 'workspace/', 'docs/', 'logs/', 'tests/',
|
|
2178
|
-
// Public surfaces + npm packages (sprint-1548, sprint-1549)
|
|
2179
|
-
'landing/', 'packages/', 'data/',
|
|
2180
|
-
// Smart contracts (sprint-1571 — KognaiSkin ERC-721 + EIP-5192 soulbound)
|
|
2181
|
-
'contracts/',
|
|
2182
|
-
];
|
|
2183
|
-
// Invalid patterns: paths that look like monorepo sub-dirs that don't exist
|
|
2184
|
-
const INVALID_PATH_PATTERNS = [
|
|
2185
|
-
/^agents\/src\//, // agents/src/... — real agent dirs are agents/<name>/
|
|
2186
|
-
/^src\/agents\//, // no src/agents/ dir
|
|
2187
|
-
];
|
|
2188
|
-
for (const filepath of deliverables) {
|
|
2189
|
-
// Root-level dotfiles, config files, and absolute paths are always valid.
|
|
2190
|
-
// Absolute paths (starting with /) indicate cross-project tasks (e.g., Voxight).
|
|
2191
|
-
const isRootFile = !filepath.includes('/') || filepath.startsWith('.') || filepath.startsWith('/');
|
|
2192
|
-
const isValidPrefix = isRootFile || VALID_PATH_PREFIXES.some(p => filepath.startsWith(p));
|
|
2193
|
-
const isInvalidPattern = INVALID_PATH_PATTERNS.some(r => r.test(filepath));
|
|
2194
|
-
if (!isValidPrefix || isInvalidPattern) {
|
|
2195
|
-
log(c.red, ` ✗ Path validation FAILED: "${filepath}" is not in a valid project directory`);
|
|
2196
|
-
log(c.red, ` ✗ Valid prefixes: ${VALID_PATH_PREFIXES.join(', ')}`);
|
|
2197
|
-
throw new Error(`INVALID_PATH: "${filepath}" is not in a recognized project directory`);
|
|
2198
|
-
}
|
|
2199
|
-
}
|
|
2200
|
-
let rejectionContext = '';
|
|
2201
|
-
if (previousReview && previousReview.verdict !== 'APPROVED') {
|
|
2202
|
-
const issueList = (previousReview.issues || []).map(i => `- [${i.severity}] ${i.file}: ${i.description}`).join('\n');
|
|
2203
|
-
rejectionContext = `\n## IMPORTANT: Previous Attempt Was REJECTED\nScore: ${previousReview.score}/100. Reason: ${previousReview.summary}\n\nSpecific issues to fix:\n${issueList}\n\nYou MUST address ALL issues.\n`;
|
|
2204
|
-
}
|
|
2205
|
-
// TICKET-152 Gap 2: cross-run failure memory. `previousReview` only remembers
|
|
2206
|
-
// THIS run's attempts; the failure-library remembers every prior rejection of
|
|
2207
|
-
// this task across all sprints (e.g. ksl_batch_runner's 75 truncation rejects).
|
|
2208
|
-
// Inject the persistent avoidance brief so a task that has failed before sees
|
|
2209
|
-
// its own history — even on attempt 1 of a fresh run. Bounded (≤5 attempts,
|
|
2210
|
-
// truncated reasons) and best-effort: retrieval must never block execution.
|
|
2211
|
-
try {
|
|
2212
|
-
const prior = (0, failure_library_1.retrieveTaskFailures)(task.id);
|
|
2213
|
-
if (prior.brief)
|
|
2214
|
-
rejectionContext += `\n${prior.brief}\n`;
|
|
2215
|
-
}
|
|
2216
|
-
catch { /* non-fatal — never block execution on retrieval */ }
|
|
2217
|
-
const createdFiles = [];
|
|
2218
|
-
for (let i = 0; i < deliverables.length; i++) {
|
|
2219
|
-
const filepath = deliverables[i];
|
|
2220
|
-
// TICKET-090: EDIT-MODE — for surgical-edit tasks on existing files, ask
|
|
2221
|
-
// the LLM for a list of {old, new} substitutions instead of regenerating
|
|
2222
|
-
// the whole file. Drops output tokens ~10× (4kB file rewrite → ~200B
|
|
2223
|
-
// diff) and slashes wall-clock under the 25-min PER_RUN_HARD_TIMEOUT.
|
|
2224
|
-
//
|
|
2225
|
-
// TICKET-209 (2026-05-29): broadened engagement. Previously gated behind
|
|
2226
|
-
// ~9 narrow-scope context keywords AND file ≥50 lines, so founder-authored
|
|
2227
|
-
// modify tasks (TICKET-204, TICKET-207) that didn't use the exact magic
|
|
2228
|
-
// phrases fell into regenerate-mode and blew the 100k token budget
|
|
2229
|
-
// (~313k tokens spent on a 5-edit task to a 700-line file).
|
|
2230
|
-
//
|
|
2231
|
-
// Now engages when:
|
|
2232
|
-
// (a) file exists AND is at least 50 lines (unchanged)
|
|
2233
|
-
// (b) EITHER task.type is in MODIFY_TYPES (new default for all modify tasks)
|
|
2234
|
-
// OR task context contains the legacy narrow-scope keywords
|
|
2235
|
-
// ("ONE LINE EDIT", "SINGLE FIELD", "MINIMAL EDIT", "rename only",
|
|
2236
|
-
// "single property", "verify-only", "literal-string",
|
|
2237
|
-
// "DO NOT regenerate", "surgical", "no-op verify", "MUST still start")
|
|
2238
|
-
// Falls back to regenerate-mode if the LLM's edit response is malformed
|
|
2239
|
-
// or any `old` substring isn't uniquely present in the file.
|
|
2240
|
-
const existingLineCount = (0, fs_1.existsSync)(filepath)
|
|
2241
|
-
? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length
|
|
2242
|
-
: 0;
|
|
2243
|
-
const hasNarrowScopeKeywords = /\b(ONE LINE EDIT|SINGLE FIELD|MINIMAL EDIT|rename only|single property|verify-only|literal[- ]string|DO NOT regenerate|surgical|no-op verify|MUST still start)\b/i.test(task.context || '');
|
|
2244
|
-
const isModifyTask = MODIFY_TYPES.has(task.type);
|
|
2245
|
-
const editModeEligible = (0, fs_1.existsSync)(filepath)
|
|
2246
|
-
&& existingLineCount >= 50
|
|
2247
|
-
&& (isModifyTask || hasNarrowScopeKeywords);
|
|
2248
|
-
if (editModeEligible) {
|
|
2249
|
-
const edited = await this.tryEditMode(filepath, task, rejectionContext, provider, model);
|
|
2250
|
-
if (edited !== null) {
|
|
2251
|
-
createdFiles.push({ path: filepath, content: edited });
|
|
2252
|
-
continue; // success, skip regenerate-mode for this file
|
|
2253
|
-
}
|
|
2254
|
-
// 2026-05-27 diagnostic patch: for MODIFY tasks on large files, refuse
|
|
2255
|
-
// the regenerate-mode fallback. Regeneration of a 200+ line file from
|
|
2256
|
-
// scratch trips the integrity-check (which preserves the original on
|
|
2257
|
-
// disk) — net result is a silent no-op. Better to surface the failure
|
|
2258
|
-
// with a structured reason than to log "No files produced" with no
|
|
2259
|
-
// context. Founder triage: split the file, not the task.
|
|
2260
|
-
const existingLines = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length : 0;
|
|
2261
|
-
if (MODIFY_TYPES.has(task.type) && existingLines > 150) {
|
|
2262
|
-
log(c.red, ` ✗ Edit-mode FAILED and regenerate-mode REFUSED for ${filepath} (${existingLines} lines, MODIFY task — split file, not task)`);
|
|
2263
|
-
task._failureReasons = [
|
|
2264
|
-
...(task._failureReasons || []),
|
|
2265
|
-
`edit-mode-empty:${filepath}:${existingLines}lines`,
|
|
2266
|
-
];
|
|
2267
|
-
continue; // skip this deliverable — surfaces via silent-failure enrichment upstream
|
|
2268
|
-
}
|
|
2269
|
-
log(c.yellow, ` ! Edit-mode fell back to regenerate-mode for ${filepath}`);
|
|
2270
|
-
}
|
|
2271
|
-
log(c.gray, ` -> Generating file ${i + 1}/${deliverables.length}: ${filepath}`);
|
|
2272
|
-
const priorCtx = createdFiles.length > 0
|
|
2273
|
-
? '\n## Already Generated Files\n' + createdFiles.map(f => `### ${f.path}\n\`\`\`typescript\n${f.content.substring(0, 2000)}\n\`\`\``).join('\n\n') + '\n'
|
|
2274
|
-
: '';
|
|
2275
|
-
const fileList = deliverables.map((f, idx) => `${idx + 1}. ${f}${f === filepath ? ' ← THIS ONE' : ''}`).join('\n');
|
|
2276
|
-
const isTestFile = filepath.includes('test') || filepath.includes('spec');
|
|
2277
|
-
const existingLines = (0, fs_1.existsSync)(filepath) ? (0, fs_1.readFileSync)(filepath, 'utf-8').split('\n').length : 0;
|
|
2278
|
-
const existingContent = (0, fs_1.existsSync)(filepath)
|
|
2279
|
-
? `\n\n## EXISTING FILE — SURGICAL EDIT ONLY\nDo NOT rewrite the entire file. Output the COMPLETE updated file with your changes merged in.\nIf you add a function, append it. If you edit a line, change only that line.\nFile has ${existingLines} lines — preserve ALL existing code.\n\n### Current Content\n\`\`\`typescript\n${(0, fs_1.readFileSync)(filepath, 'utf-8').substring(0, 3000)}\n\`\`\`\n`
|
|
2280
|
-
: `\n\n## Note: This is a NEW file — create it from scratch.\n`;
|
|
2281
|
-
const testConstraint = isTestFile
|
|
2282
|
-
? `\n\n## CRITICAL: TEST FILE SIZE LIMIT
|
|
2283
|
-
This is a test file. You MUST keep it SHORT to avoid truncation:
|
|
2284
|
-
- Maximum 5-6 test cases (describe + it blocks)
|
|
2285
|
-
- Maximum 80 lines total
|
|
2286
|
-
- NO verbose setup — use inline mocks
|
|
2287
|
-
- NO redundant tests — one test per behavior
|
|
2288
|
-
- Cover: happy path, error case, edge case, defaults — that's it
|
|
2289
|
-
- If you write more than 80 lines, the file WILL be truncated and REJECTED\n`
|
|
2290
|
-
: '';
|
|
2291
|
-
// EXACT CONTENT mode: task description contains code block(s) with the exact file content.
|
|
2292
|
-
// Extract them deterministically and bypass LLM to prevent model hallucination.
|
|
2293
|
-
// This is the correct fix for "EXACT CONTENT:" tasks — the model must NOT interpret
|
|
2294
|
-
// the spec, it must copy it verbatim. Bypass the LLM entirely for these tasks.
|
|
2295
|
-
// NOTE: check task.description first — when sprint JSON has BOTH description AND context fields,
|
|
2296
|
-
// the normalization at loadTasks() only copies description→context when context is absent.
|
|
2297
|
-
// EXACT CONTENT blocks always live in the description field.
|
|
2298
|
-
const rawSpec = task.description ?? task.context;
|
|
2299
|
-
const exactBlocks = [...rawSpec.matchAll(/EXACT CONTENT:\s*\n\n?```[\w.+-]*\n([\s\S]*?)```(?:\n|$)/g)]
|
|
2300
|
-
.map((m) => m[1].trimEnd());
|
|
2301
|
-
if (exactBlocks.length > 0) {
|
|
2302
|
-
// Use block[i] for deliverable[i] when multiple blocks present; else use block[0]
|
|
2303
|
-
const exactFileContent = exactBlocks.length > i ? exactBlocks[i] : exactBlocks[0];
|
|
2304
|
-
const blockLabel = `block ${Math.min(i, exactBlocks.length - 1) + 1}/${exactBlocks.length}`;
|
|
2305
|
-
log(c.cyan, ` -> EXACT CONTENT mode: ${filepath} (${blockLabel}) — deterministic, no LLM`);
|
|
2306
|
-
createdFiles.push({ path: filepath, content: exactFileContent });
|
|
2307
|
-
continue;
|
|
2308
|
-
}
|
|
2309
|
-
const userPrompt = `You are ${this.name}, a coding agent at Countable.
|
|
2310
|
-
${rejectionContext}
|
|
2311
|
-
## Task
|
|
2312
|
-
${task.context}
|
|
2313
|
-
|
|
2314
|
-
## All Deliverable Files
|
|
2315
|
-
${fileList}
|
|
2316
|
-
|
|
2317
|
-
## Generate ONLY: ${filepath}
|
|
2318
|
-
${existingContent}${priorCtx}${testConstraint}
|
|
2319
|
-
Write ONLY the content for "${filepath}". Rules:
|
|
2320
|
-
- S64-001: Output the raw file content using FILE: format as described in the system prompt
|
|
2321
|
-
- Do NOT wrap output in markdown code fences (\`\`\`) — for .md files especially, output RAW markdown text, NOT inside a \`\`\`markdown or \`\`\`typescript block
|
|
2322
|
-
- For .sh/.bash scripts, start with #!/bin/bash — do NOT wrap in a code fence
|
|
2323
|
-
- Production quality, no TODOs or placeholders
|
|
2324
|
-
- Include all imports, types, error handling
|
|
2325
|
-
- If this file depends on others listed above, import from them correctly
|
|
2326
|
-
- No explanatory text — output file content only`;
|
|
2327
|
-
try {
|
|
2328
|
-
const startTime = Date.now();
|
|
2329
|
-
const response = await callLLM(provider, model, this.systemPrompt, userPrompt, 480000, this.name, task.id); // 8 min — qwen3:14b needs time for large files
|
|
2330
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
2331
|
-
let content = response.choices?.[0]?.message?.content || '';
|
|
2332
|
-
// Strip MiniMax <think>...</think> tags that leak into responses
|
|
2333
|
-
content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
2334
|
-
const tokens = response.usage?.total_tokens || 0;
|
|
2335
|
-
// Check for MiniMax errors
|
|
2336
|
-
if (response.base_resp?.status_code && response.base_resp.status_code !== 0) {
|
|
2337
|
-
throw new Error(`MiniMax API error: ${response.base_resp.status_msg}`);
|
|
2338
|
-
}
|
|
2339
|
-
log(c.gray, ` -> Response: ${elapsed}s, ${tokens} tokens, ${content.length} chars`);
|
|
2340
|
-
// CTO-005: Extract code from fenced block with enhanced fence stripping
|
|
2341
|
-
const codeBlocks = this.extractCodeBlocks(content);
|
|
2342
|
-
let fileContent;
|
|
2343
|
-
if (codeBlocks.length === 0) {
|
|
2344
|
-
log(c.yellow, ` ! No code block found for ${filepath}, using raw content (with fence strip)`);
|
|
2345
|
-
fileContent = this.stripResidualFences(content);
|
|
2346
|
-
}
|
|
2347
|
-
else {
|
|
2348
|
-
fileContent = codeBlocks[0];
|
|
2349
|
-
}
|
|
2350
|
-
// CTO-005: Final fence sanitization BEFORE adding to createdFiles
|
|
2351
|
-
// CEO condition: stripping must happen BEFORE file is written to disk
|
|
2352
|
-
fileContent = this.stripResidualFences(fileContent);
|
|
2353
|
-
// Last-resort nuclear strip: if content still starts with a fence, skip all leading
|
|
2354
|
-
// fence lines and trailing fence. Handles MiniMax ```typescript{ (no newline) pattern.
|
|
2355
|
-
if (/^\s*```/.test(fileContent)) {
|
|
2356
|
-
log(c.yellow, ` ! Residual fence detected after stripResidualFences — applying nuclear strip for ${filepath}`);
|
|
2357
|
-
const lines = fileContent.split('\n');
|
|
2358
|
-
const firstContentLine = lines.findIndex(l => !l.trim().startsWith('```') && l.trim() !== '');
|
|
2359
|
-
if (firstContentLine > 0) {
|
|
2360
|
-
fileContent = lines.slice(firstContentLine).join('\n').replace(/\n\s*```\s*$/, '').trim();
|
|
2361
|
-
}
|
|
2362
|
-
else if (firstContentLine === -1) {
|
|
2363
|
-
fileContent = lines.filter(l => !l.trim().startsWith('```')).join('\n').trim();
|
|
2364
|
-
}
|
|
2365
|
-
}
|
|
2366
|
-
// File-type-aware post-processing: final safety net per file extension
|
|
2367
|
-
fileContent = this.postProcessContent(fileContent, filepath);
|
|
2368
|
-
// B.13: For JSON files that are still invalid after postProcessContent, try qwen3:0.6b repair
|
|
2369
|
-
if (filepath.endsWith('.json')) {
|
|
2370
|
-
try {
|
|
2371
|
-
JSON.parse(fileContent);
|
|
2372
|
-
}
|
|
2373
|
-
catch {
|
|
2374
|
-
log(c.yellow, ` ! JSON invalid in ${filepath} — attempting qwen3:0.6b repair`);
|
|
2375
|
-
fileContent = await this.fixJsonWithOllama(fileContent, filepath);
|
|
2376
|
-
}
|
|
2377
|
-
}
|
|
2378
|
-
// TRUNCATION PRE-CHECK: Detect if MiniMax cut off output mid-function
|
|
2379
|
-
// If code ends inside an open block (unclosed braces) or with an incomplete statement,
|
|
2380
|
-
// retry once with a "continue" prompt before sending to supervisor review.
|
|
2381
|
-
const truncationDetected = this.detectTruncation(fileContent);
|
|
2382
|
-
if (truncationDetected && (provider === 'clawrouter' || provider === 'ollama')) {
|
|
2383
|
-
log(c.yellow, ` ! TRUNCATION detected in ${filepath} — retrying with continuation prompt...`);
|
|
2384
|
-
const continuationPrompt = `The previous response for "${filepath}" was TRUNCATED — it ended mid-function or with an incomplete block. Here is what was generated so far:
|
|
2385
|
-
|
|
2386
|
-
\`\`\`typescript
|
|
2387
|
-
${fileContent.substring(fileContent.length - 1500)}
|
|
2388
|
-
\`\`\`
|
|
2389
|
-
|
|
2390
|
-
Continue from where it left off and output ONLY the remaining code (no duplicated content). Output a COMPLETE, valid TypeScript/JavaScript file ending with the final closing brace.`;
|
|
2391
|
-
try {
|
|
2392
|
-
const contResponse = await callLLM(provider, model, this.systemPrompt, continuationPrompt, 120000, this.name, `${task.id}_continuation`);
|
|
2393
|
-
let contContent = contResponse.choices?.[0]?.message?.content || '';
|
|
2394
|
-
contContent = contContent.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
2395
|
-
const contBlocks = this.extractCodeBlocks(contContent);
|
|
2396
|
-
const continuation = contBlocks.length > 0 ? contBlocks[0] : this.stripResidualFences(contContent);
|
|
2397
|
-
if (continuation.length > 50) {
|
|
2398
|
-
// Merge: use the original up to the last complete line, then append continuation
|
|
2399
|
-
fileContent = fileContent + '\n' + continuation;
|
|
2400
|
-
fileContent = this.stripResidualFences(fileContent);
|
|
2401
|
-
log(c.green, ` ✓ Continuation merged for ${filepath} (+${continuation.length} chars)`);
|
|
2402
|
-
}
|
|
2403
|
-
}
|
|
2404
|
-
catch (contErr) {
|
|
2405
|
-
log(c.yellow, ` ! Continuation failed: ${contErr.message}`);
|
|
2406
|
-
}
|
|
2407
|
-
}
|
|
2408
|
-
createdFiles.push({ path: filepath, content: fileContent });
|
|
2409
|
-
}
|
|
2410
|
-
catch (error) {
|
|
2411
|
-
log(c.red, ` ✗ Failed to generate ${filepath}: ${error.message}`);
|
|
2412
|
-
// Create minimal placeholder so build doesn't break
|
|
2413
|
-
createdFiles.push({ path: filepath, content: `// ERROR: Generation failed - ${error.message}\n// Task: ${task.id}\n` });
|
|
2414
|
-
}
|
|
2415
|
-
}
|
|
2416
|
-
// CTO-004: File integrity check — detect destructive MiniMax rewrites
|
|
2417
|
-
// For bugfix tasks (and feature tasks editing existing files), reject if new file
|
|
2418
|
-
// is <50% the size of the original. Configurable threshold.
|
|
2419
|
-
const INTEGRITY_THRESHOLD = 0.5; // Reject if new < 50% of original
|
|
2420
|
-
const integrityCheckTypes = ['bugfix']; // Task types that always get integrity check
|
|
2421
|
-
const integrityCheckAllExisting = true; // Also check feature tasks editing existing files
|
|
2422
|
-
for (const file of createdFiles) {
|
|
2423
|
-
if ((0, fs_1.existsSync)(file.path)) {
|
|
2424
|
-
try {
|
|
2425
|
-
const originalContent = (0, fs_1.readFileSync)(file.path, 'utf-8');
|
|
2426
|
-
const originalLines = originalContent.split('\n').length;
|
|
2427
|
-
const newLines = file.content.split('\n').length;
|
|
2428
|
-
const ratio = originalLines > 0 ? newLines / originalLines : 1;
|
|
2429
|
-
const shouldCheck = integrityCheckTypes.includes(task.type) ||
|
|
2430
|
-
(integrityCheckAllExisting && originalLines > 10);
|
|
2431
|
-
if (shouldCheck && ratio < INTEGRITY_THRESHOLD) {
|
|
2432
|
-
log(c.red, ` ✗ INTEGRITY CHECK FAILED: ${file.path}`);
|
|
2433
|
-
log(c.red, ` Original: ${originalLines} lines → New: ${newLines} lines (${(ratio * 100).toFixed(0)}%)`);
|
|
2434
|
-
log(c.red, ` Possible destructive rewrite detected — file shrank from ${originalLines} to ${newLines} lines`);
|
|
2435
|
-
// TICKET-091 FIX: ACTUALLY preserve the original. Prior version
|
|
2436
|
-
// assigned a warning comment to file.content, which the writer
|
|
2437
|
-
// then wrote to disk as a 4-line stub — destroying the original.
|
|
2438
|
-
// Read on-disk original so the subsequent writeFileSync is a no-op.
|
|
2439
|
-
try {
|
|
2440
|
-
file.content = (0, fs_1.readFileSync)(file.path, 'utf-8');
|
|
2441
|
-
log(c.gray, ` Original file restored from disk (${originalLines} lines preserved)`);
|
|
2442
|
-
}
|
|
2443
|
-
catch (readErr) {
|
|
2444
|
-
log(c.yellow, ` WARN: could not read original from disk: ${(readErr?.message || '').slice(0, 100)}`);
|
|
2445
|
-
}
|
|
2446
|
-
task._integrityFailed = true;
|
|
2447
|
-
task._integrityDetails = `File ${file.path} shrank from ${originalLines} to ${newLines} lines (${(ratio * 100).toFixed(0)}%). Possible destructive rewrite. Original preserved on disk; task should be rejected and retried with edit-mode constraint.`;
|
|
2448
|
-
}
|
|
2449
|
-
else if (originalLines > 0) {
|
|
2450
|
-
log(c.gray, ` -> Integrity OK: ${file.path} (${originalLines} → ${newLines} lines, ${(ratio * 100).toFixed(0)}%)`);
|
|
2451
|
-
}
|
|
2452
|
-
}
|
|
2453
|
-
catch { /* File exists but can't read — skip check */ }
|
|
2454
|
-
}
|
|
2455
|
-
}
|
|
2456
|
-
// FP-007: File size guard — refuse writes to files >2000 lines
|
|
2457
|
-
// Prevents swarm from destructively rewriting large files (telegram-bot.ts disaster)
|
|
2458
|
-
const FP007_LINE_LIMIT = 2000;
|
|
2459
|
-
for (const file of createdFiles) {
|
|
2460
|
-
if ((0, fs_1.existsSync)(file.path)) {
|
|
2461
|
-
try {
|
|
2462
|
-
const existingLines = (0, fs_1.readFileSync)(file.path, 'utf-8').split('\n').length;
|
|
2463
|
-
if (existingLines > FP007_LINE_LIMIT) {
|
|
2464
|
-
log(c.red, ` ✗ FP-007 GUARD: ${file.path} has ${existingLines} lines (limit: ${FP007_LINE_LIMIT})`);
|
|
2465
|
-
log(c.red, ` Refusing write — file too large for safe swarm edit. Use manual edit.`);
|
|
2466
|
-
file.content = `// FP-007 GUARD: Write refused — target file has ${existingLines} lines (>${FP007_LINE_LIMIT})\n// Task: ${task.id}. Edit this file manually or split it first.\n`;
|
|
2467
|
-
task._fp007Blocked = true;
|
|
2468
|
-
}
|
|
2469
|
-
}
|
|
2470
|
-
catch { /* can't read — allow write */ }
|
|
2471
|
-
}
|
|
2472
|
-
}
|
|
2473
|
-
// Write all files to disk
|
|
2474
|
-
const writtenFiles = [];
|
|
2475
|
-
for (const file of createdFiles) {
|
|
2476
|
-
try {
|
|
2477
|
-
const dir = file.path.substring(0, file.path.lastIndexOf('/'));
|
|
2478
|
-
if (dir)
|
|
2479
|
-
(0, fs_1.mkdirSync)(dir, { recursive: true });
|
|
2480
|
-
(0, fs_1.writeFileSync)(file.path, file.content);
|
|
2481
|
-
writtenFiles.push(file.path);
|
|
2482
|
-
log(c.green, ` ✓ Written: ${file.path} (${file.content.length} chars)`);
|
|
2483
|
-
}
|
|
2484
|
-
catch (error) {
|
|
2485
|
-
log(c.red, ` ✗ Write failed: ${file.path}: ${error.message}`);
|
|
2486
|
-
}
|
|
2487
|
-
}
|
|
2488
|
-
// TICKET-205: stub-detection guard. The integrity check at line 2459 only
|
|
2489
|
-
// catches destructive shrink of EXISTING files. For NEW files (type=create),
|
|
2490
|
-
// a coder agent producing a near-empty stub passes through and gets committed
|
|
2491
|
-
// BEFORE dual-review fires. Live incident 2026-05-29: docs/specs/
|
|
2492
|
-
// orchestrator-workspace.md generated as 98-byte stub, dual-rejected 3× at
|
|
2493
|
-
// 20/100, but commit 3ca603315 landed it on main anyway. This guard skips the
|
|
2494
|
-
// commit when any written file is suspiciously small for its type — the file
|
|
2495
|
-
// stays on disk so dual-review can inspect + reject it on its own merits.
|
|
2496
|
-
const STUB_MIN_BYTES = {
|
|
2497
|
-
'.md': 1500, // markdown specs typically ask for many sections
|
|
2498
|
-
'.ts': 200,
|
|
2499
|
-
'.tsx': 200,
|
|
2500
|
-
'.js': 200,
|
|
2501
|
-
'.jsx': 200,
|
|
2502
|
-
'.yaml': 100,
|
|
2503
|
-
'.yml': 100,
|
|
2504
|
-
'.json': 100,
|
|
2505
|
-
'.html': 100,
|
|
2506
|
-
'.css': 100,
|
|
2507
|
-
};
|
|
2508
|
-
const DEFAULT_STUB_MIN = 200;
|
|
2509
|
-
let stubPath = null;
|
|
2510
|
-
let stubSize = 0;
|
|
2511
|
-
let stubMin = 0;
|
|
2512
|
-
for (const path of writtenFiles) {
|
|
2513
|
-
const dotIdx = path.lastIndexOf('.');
|
|
2514
|
-
const ext = dotIdx >= 0 ? path.slice(dotIdx) : '';
|
|
2515
|
-
let min = STUB_MIN_BYTES[ext] ?? DEFAULT_STUB_MIN;
|
|
2516
|
-
// .md only enforces the high threshold when the task context is substantial
|
|
2517
|
-
// (a real spec ask); for short asks (e.g. README scaffolds), use 300.
|
|
2518
|
-
if (ext === '.md' && (task.context?.length ?? 0) < 1500)
|
|
2519
|
-
min = 300;
|
|
2520
|
-
try {
|
|
2521
|
-
const size = (0, fs_1.statSync)(path).size;
|
|
2522
|
-
if (size < min) {
|
|
2523
|
-
stubPath = path;
|
|
2524
|
-
stubSize = size;
|
|
2525
|
-
stubMin = min;
|
|
2526
|
-
break;
|
|
2527
|
-
}
|
|
2528
|
-
}
|
|
2529
|
-
catch { /* can't stat, skip */ }
|
|
2530
|
-
}
|
|
2531
|
-
if (stubPath) {
|
|
2532
|
-
log(c.red, ` ✗ STUB DETECTED: ${stubPath} (${stubSize} bytes < ${stubMin} expected for ${task.type})`);
|
|
2533
|
-
log(c.gray, ` Skipping commit — file stays on disk for dual-review to reject. Task will retry.`);
|
|
2534
|
-
task._stubDetected = true;
|
|
2535
|
-
task._stubReason = `Generated file ${stubPath} is ${stubSize} bytes; expected ≥${stubMin} for type=${task.type}`;
|
|
2536
|
-
return { files: writtenFiles, model };
|
|
2537
|
-
}
|
|
2538
|
-
// Commit changes
|
|
2539
|
-
this.commitChanges(task, writtenFiles);
|
|
2540
|
-
return { files: writtenFiles, model };
|
|
2541
|
-
}
|
|
2542
|
-
// TICKET-090: EDIT-MODE — ask the LLM for {old, new} substitutions on an
|
|
2543
|
-
// existing file, instead of regenerating the whole file. Returns the post-
|
|
2544
|
-
// edit file content on success, or null to fall back to regenerate-mode.
|
|
2545
|
-
//
|
|
2546
|
-
// Failure modes that trigger fallback (null):
|
|
2547
|
-
// - LLM response isn't parseable JSON
|
|
2548
|
-
// - "edits" key missing or empty
|
|
2549
|
-
// - any old_str isn't found in the file (typo / hallucination)
|
|
2550
|
-
// - any old_str appears more than once (ambiguous edit)
|
|
2551
|
-
// - any edit's "old" is identical to its "new" (no-op disguised)
|
|
2552
|
-
async tryEditMode(filepath, task, rejectionContext, provider, model) {
|
|
2553
|
-
const originalContent = (0, fs_1.readFileSync)(filepath, 'utf-8');
|
|
2554
|
-
const lineCount = originalContent.split('\n').length;
|
|
2555
|
-
log(c.cyan, ` -> EDIT-MODE: ${filepath} (${lineCount} lines, surgical-edit task) — diff-only output`);
|
|
2556
|
-
const userPrompt = `You are ${this.name}, a coding agent at Countable, applying a SURGICAL EDIT to an existing file.
|
|
2557
|
-
|
|
2558
|
-
${rejectionContext}
|
|
2559
|
-
## Task
|
|
2560
|
-
${task.context}
|
|
2561
|
-
|
|
2562
|
-
## File: ${filepath}
|
|
2563
|
-
\`\`\`typescript
|
|
2564
|
-
${originalContent}
|
|
2565
|
-
\`\`\`
|
|
2566
|
-
|
|
2567
|
-
## Output format — JSON object only, no commentary, no fences
|
|
2568
|
-
|
|
2569
|
-
{
|
|
2570
|
-
"edits": [
|
|
2571
|
-
{"old": "<exact substring currently in the file>", "new": "<replacement>"}
|
|
2572
|
-
]
|
|
2573
|
-
}
|
|
2574
|
-
|
|
2575
|
-
## Rules — VIOLATIONS WILL CAUSE THE TASK TO BE REJECTED
|
|
2576
|
-
|
|
2577
|
-
1. Each "old" string MUST appear EXACTLY ONCE in the file. If you need to edit a non-unique substring, include enough surrounding context (a few extra characters before and after) to make it unique.
|
|
2578
|
-
2. Each "old" string MUST match the file VERBATIM — same whitespace, same quote style, same indentation. Do not paraphrase.
|
|
2579
|
-
3. "new" must be different from "old". No-op edits are rejected.
|
|
2580
|
-
4. Prefer FEWER, LARGER edits. 1-3 edits is ideal. 10+ edits suggests you're rewriting — switch to a different approach.
|
|
2581
|
-
5. Output ONLY the JSON object. No \`\`\`json fences. No prose before or after.`;
|
|
2582
|
-
let response;
|
|
2583
|
-
try {
|
|
2584
|
-
const startTime = Date.now();
|
|
2585
|
-
response = await callLLM(provider, model, this.systemPrompt, userPrompt, 120_000, this.name, task.id);
|
|
2586
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
2587
|
-
const tokens = response.usage?.total_tokens || 0;
|
|
2588
|
-
log(c.gray, ` edit response: ${elapsed}s, ${tokens} tokens`);
|
|
2589
|
-
}
|
|
2590
|
-
catch (e) {
|
|
2591
|
-
log(c.yellow, ` edit-mode LLM call failed: ${(e.message || '').slice(0, 120)}`);
|
|
2592
|
-
return null;
|
|
2593
|
-
}
|
|
2594
|
-
let raw = (response.choices?.[0]?.message?.content || '').replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
2595
|
-
// Strip any fences the model added despite instructions (json/jsonc/none)
|
|
2596
|
-
const fenced = raw.match(/```(?:json|jsonc)?\s*([\s\S]*?)```/);
|
|
2597
|
-
if (fenced)
|
|
2598
|
-
raw = fenced[1].trim();
|
|
2599
|
-
// TICKET-092 FIX: walk forward from first `{` tracking brace depth through
|
|
2600
|
-
// string literals (with escape-char handling), extract the FIRST balanced
|
|
2601
|
-
// JSON object. Prior slice(firstBrace,lastBrace+1) caught multi-object
|
|
2602
|
-
// responses + trailing prose as one mega-string and broke JSON.parse.
|
|
2603
|
-
const firstBrace = raw.indexOf('{');
|
|
2604
|
-
if (firstBrace < 0) {
|
|
2605
|
-
log(c.yellow, ` edit-mode: no JSON object in response`);
|
|
2606
|
-
return null;
|
|
2607
|
-
}
|
|
2608
|
-
let depth = 0, inStr = false, strChar = '', escape = false, end = -1;
|
|
2609
|
-
for (let i = firstBrace; i < raw.length; i++) {
|
|
2610
|
-
const ch = raw[i];
|
|
2611
|
-
if (escape) {
|
|
2612
|
-
escape = false;
|
|
2613
|
-
continue;
|
|
2614
|
-
}
|
|
2615
|
-
if (inStr) {
|
|
2616
|
-
if (ch === '\\')
|
|
2617
|
-
escape = true;
|
|
2618
|
-
else if (ch === strChar)
|
|
2619
|
-
inStr = false;
|
|
2620
|
-
continue;
|
|
2621
|
-
}
|
|
2622
|
-
if (ch === '"' || ch === "'") {
|
|
2623
|
-
inStr = true;
|
|
2624
|
-
strChar = ch;
|
|
2625
|
-
continue;
|
|
2626
|
-
}
|
|
2627
|
-
if (ch === '{')
|
|
2628
|
-
depth++;
|
|
2629
|
-
else if (ch === '}') {
|
|
2630
|
-
depth--;
|
|
2631
|
-
if (depth === 0) {
|
|
2632
|
-
end = i;
|
|
2633
|
-
break;
|
|
2634
|
-
}
|
|
2635
|
-
}
|
|
2636
|
-
}
|
|
2637
|
-
if (end < 0) {
|
|
2638
|
-
log(c.yellow, ` edit-mode: unterminated JSON object (no matching closing brace)`);
|
|
2639
|
-
return null;
|
|
2640
|
-
}
|
|
2641
|
-
raw = raw.slice(firstBrace, end + 1);
|
|
2642
|
-
let parsed;
|
|
2643
|
-
try {
|
|
2644
|
-
parsed = JSON.parse(raw);
|
|
2645
|
-
}
|
|
2646
|
-
catch (e) {
|
|
2647
|
-
// Trailing-comma forgiveness retry (common LLM quirk).
|
|
2648
|
-
try {
|
|
2649
|
-
parsed = JSON.parse(raw.replace(/,(\s*[}\]])/g, '$1'));
|
|
2650
|
-
log(c.gray, ` edit-mode: trailing-comma repair applied`);
|
|
2651
|
-
}
|
|
2652
|
-
catch {
|
|
2653
|
-
log(c.yellow, ` edit-mode: JSON parse failed: ${(e.message || '').slice(0, 80)}`);
|
|
2654
|
-
return null;
|
|
2655
|
-
}
|
|
2656
|
-
}
|
|
2657
|
-
const edits = parsed.edits;
|
|
2658
|
-
if (!Array.isArray(edits) || edits.length === 0) {
|
|
2659
|
-
log(c.yellow, ` edit-mode: no edits array or empty`);
|
|
2660
|
-
return null;
|
|
2661
|
-
}
|
|
2662
|
-
let working = originalContent;
|
|
2663
|
-
for (const e of edits) {
|
|
2664
|
-
if (typeof e.old !== 'string' || typeof e.new !== 'string') {
|
|
2665
|
-
log(c.yellow, ` edit-mode: edit missing string fields`);
|
|
2666
|
-
return null;
|
|
2667
|
-
}
|
|
2668
|
-
if (e.old === e.new) {
|
|
2669
|
-
log(c.yellow, ` edit-mode: no-op edit (old===new): ${e.old.slice(0, 60)}`);
|
|
2670
|
-
return null;
|
|
2671
|
-
}
|
|
2672
|
-
// TICKET-100: per-edit suspicious-shrink guard. If `new` is dramatically
|
|
2673
|
-
// shorter than `old`, the LLM likely matched too greedy a chunk (e.g.
|
|
2674
|
-
// captured the trailing portion of the file in `old` and only included
|
|
2675
|
-
// the first part in `new`, silently truncating). 43475fd56 broke prod
|
|
2676
|
-
// exactly this way: edit removed the tail of generateReport function.
|
|
2677
|
-
// Threshold: if old is >= 200 chars AND new < 30% of old length, reject.
|
|
2678
|
-
if (e.old.length >= 200 && e.new.length < e.old.length * 0.3) {
|
|
2679
|
-
log(c.yellow, ` edit-mode: suspicious shrink — old=${e.old.length} chars, new=${e.new.length} chars (< 30%). Rejecting to avoid truncation.`);
|
|
2680
|
-
return null;
|
|
2681
|
-
}
|
|
2682
|
-
const occurrences = working.split(e.old).length - 1;
|
|
2683
|
-
if (occurrences === 0) {
|
|
2684
|
-
log(c.yellow, ` edit-mode: old_str not found in file: "${e.old.slice(0, 80)}"`);
|
|
2685
|
-
return null;
|
|
2686
|
-
}
|
|
2687
|
-
if (occurrences > 1) {
|
|
2688
|
-
log(c.yellow, ` edit-mode: old_str appears ${occurrences}× (must be unique): "${e.old.slice(0, 80)}"`);
|
|
2689
|
-
return null;
|
|
2690
|
-
}
|
|
2691
|
-
working = working.replace(e.old, e.new);
|
|
2692
|
-
}
|
|
2693
|
-
// TICKET-100: post-edit integrity check. Same INTEGRITY_THRESHOLD pattern
|
|
2694
|
-
// localQAGate uses for regen-mode results — if the post-edit content is
|
|
2695
|
-
// dramatically shorter than the original (>30% loss), reject as suspicious
|
|
2696
|
-
// truncation. Catches the case where individual edits each look reasonable
|
|
2697
|
-
// but cumulatively destroy the file.
|
|
2698
|
-
const INTEGRITY_THRESHOLD = 0.7; // must retain at least 70% of original lines
|
|
2699
|
-
const newLineCount = working.split('\n').length;
|
|
2700
|
-
const lineRatio = lineCount > 0 ? newLineCount / lineCount : 1;
|
|
2701
|
-
if (lineRatio < INTEGRITY_THRESHOLD) {
|
|
2702
|
-
log(c.yellow, ` edit-mode: post-edit integrity FAIL — ${lineCount} → ${newLineCount} lines (${(lineRatio * 100).toFixed(0)}%). Rejecting to avoid destructive write.`);
|
|
2703
|
-
return null;
|
|
2704
|
-
}
|
|
2705
|
-
// TICKET-100: detect mid-statement truncation. If the last non-empty line
|
|
2706
|
-
// doesn't end with a structural terminator (} ; > etc.), the file likely
|
|
2707
|
-
// got cut mid-expression. Catches LLM output that stops mid-call.
|
|
2708
|
-
const lastNonEmpty = working.split('\n').reverse().find(l => l.trim().length > 0) || '';
|
|
2709
|
-
const last = lastNonEmpty.trim();
|
|
2710
|
-
const terminatorOk = /[}\];>)\.]\s*$|^\/\/|^\/\*|^\*\//.test(last);
|
|
2711
|
-
if (!terminatorOk && working.length > 200) {
|
|
2712
|
-
log(c.yellow, ` edit-mode: last line "${last.slice(-60)}" doesn't end with a terminator — possible mid-statement truncation. Rejecting.`);
|
|
2713
|
-
return null;
|
|
2714
|
-
}
|
|
2715
|
-
const deltaLines = newLineCount - lineCount;
|
|
2716
|
-
log(c.green, ` ✓ edit-mode applied ${edits.length} edit(s) (${deltaLines >= 0 ? '+' : ''}${deltaLines} lines)`);
|
|
2717
|
-
return working;
|
|
2718
|
-
}
|
|
2719
|
-
// CTO-005: Enhanced code fence stripping — handles all MiniMax output variants
|
|
2720
|
-
// Catches: ```tsx, ```typescript, leading whitespace, fences at any position,
|
|
2721
|
-
// markdown headers before code, and incomplete closing fences
|
|
2722
|
-
extractCodeBlocks(content) {
|
|
2723
|
-
const blocks = [];
|
|
2724
|
-
// Normalize: MiniMax sometimes outputs ```typescript{ with no newline — insert one
|
|
2725
|
-
const normalized = content.replace(/```([\w.+-]*)\s*([^\s\n`])/g, '```$1\n$2');
|
|
2726
|
-
// Broader regex: optional whitespace before fences, any language tag, flexible spacing
|
|
2727
|
-
const regex = /^\s*```[\w.+-]*\s*\n([\s\S]*?)^\s*```\s*$/gm;
|
|
2728
|
-
let match;
|
|
2729
|
-
while ((match = regex.exec(normalized)) !== null) {
|
|
2730
|
-
if (match[1].trim().length > 0)
|
|
2731
|
-
blocks.push(match[1].trim());
|
|
2732
|
-
}
|
|
2733
|
-
// Fallback: try simpler pattern if multiline didn't match
|
|
2734
|
-
if (blocks.length === 0) {
|
|
2735
|
-
// S64-001: Added python|py|toml|env|sql|xml|md|markdown — MiniMax/qwen3 often labels files incorrectly
|
|
2736
|
-
const simpleRegex = /```(?:typescript|tsx|ts|javascript|jsx|js|json|yaml|yml|dockerfile|sh|bash|python|py|toml|env|sql|xml|md|markdown|css|html|scss|less|txt)?\s*\n([\s\S]*?)```/g;
|
|
2737
|
-
while ((match = simpleRegex.exec(normalized)) !== null) {
|
|
2738
|
-
if (match[1].trim().length > 0)
|
|
2739
|
-
blocks.push(match[1].trim());
|
|
2740
|
-
}
|
|
2741
|
-
}
|
|
2742
|
-
return blocks;
|
|
2743
|
-
}
|
|
2744
|
-
// CTO-005: Aggressive fence sanitization — strips ANY remaining fences from content
|
|
2745
|
-
// Applied BEFORE file is written to disk (CEO condition)
|
|
2746
|
-
stripResidualFences(content) {
|
|
2747
|
-
let cleaned = content;
|
|
2748
|
-
// Remove lines that are ONLY a fence marker (with optional language tag)
|
|
2749
|
-
cleaned = cleaned.replace(/^\s*```[\w.+-]*\s*$/gm, '');
|
|
2750
|
-
// TICKET-096: if the response contains a `FILE: <path>` marker line
|
|
2751
|
-
// anywhere in the first 20 lines, slice from the line AFTER it. This
|
|
2752
|
-
// catches the "prose explanation + FILE: <path> + real content" pattern
|
|
2753
|
-
// that the v12r_citizen_mock_state agent produced (broke prod 2026-05-27
|
|
2754
|
-
// 09:23 with line 1 = "The current file already has `state: 'idle'`...").
|
|
2755
|
-
// The prior loop only recognized prose openers like "Here", "Below",
|
|
2756
|
-
// "The following" — anything else broke through. FILE marker is the
|
|
2757
|
-
// canonical separator per the agent system prompt; trust it.
|
|
2758
|
-
const linesForFile = cleaned.split('\n');
|
|
2759
|
-
let fileMarkerIdx = -1;
|
|
2760
|
-
for (let i = 0; i < Math.min(linesForFile.length, 20); i++) {
|
|
2761
|
-
if (/^\s*FILE:\s+\S+/.test(linesForFile[i])) {
|
|
2762
|
-
fileMarkerIdx = i;
|
|
2763
|
-
break;
|
|
2764
|
-
}
|
|
2765
|
-
}
|
|
2766
|
-
if (fileMarkerIdx >= 0) {
|
|
2767
|
-
cleaned = linesForFile.slice(fileMarkerIdx + 1).join('\n');
|
|
2768
|
-
}
|
|
2769
|
-
// Legacy: also strip simple prose prefixes from the FIRST 5 lines for
|
|
2770
|
-
// responses that don't use the FILE marker convention.
|
|
2771
|
-
const lines = cleaned.split('\n');
|
|
2772
|
-
let firstCodeLine = 0;
|
|
2773
|
-
for (let i = 0; i < Math.min(lines.length, 5); i++) {
|
|
2774
|
-
const line = lines[i].trim();
|
|
2775
|
-
if ((line.startsWith('#') && !line.startsWith('#!')) || line.startsWith('Here') || line.startsWith('Below') ||
|
|
2776
|
-
line.startsWith('The following') || line.startsWith('FILE:') || line === '') {
|
|
2777
|
-
firstCodeLine = i + 1;
|
|
2778
|
-
}
|
|
2779
|
-
else {
|
|
2780
|
-
break;
|
|
2781
|
-
}
|
|
2782
|
-
}
|
|
2783
|
-
if (firstCodeLine > 0) {
|
|
2784
|
-
cleaned = lines.slice(firstCodeLine).join('\n');
|
|
2785
|
-
}
|
|
2786
|
-
// Remove trailing fence if present at end
|
|
2787
|
-
cleaned = cleaned.replace(/\n\s*```\s*$/, '');
|
|
2788
|
-
return cleaned.trim();
|
|
2789
|
-
}
|
|
2790
|
-
// B.13: T0 NANO JSON repair via ClawRouter v2.0 — called when postProcessContent still yields invalid JSON
|
|
2791
|
-
async fixJsonWithOllama(content, _filepath) {
|
|
2792
|
-
try {
|
|
2793
|
-
const repairPrompt = `Fix this malformed JSON so it is syntactically valid. Return ONLY the corrected JSON, no explanation or markdown fences:\n\n${content.substring(0, 3000)}`;
|
|
2794
|
-
const result = await routeCall({
|
|
2795
|
-
task_type: 'json_repair', tier_class: 'text', complexity: 'nano',
|
|
2796
|
-
context_tokens: Math.ceil(repairPrompt.length / 4), constitutional_flag: false,
|
|
2797
|
-
agent_id: 'json-repair',
|
|
2798
|
-
payload: { prompt: repairPrompt, max_tokens: 2048 },
|
|
2799
|
-
});
|
|
2800
|
-
const fixed = result.content.trim();
|
|
2801
|
-
try {
|
|
2802
|
-
JSON.parse(fixed);
|
|
2803
|
-
return fixed;
|
|
2804
|
-
}
|
|
2805
|
-
catch {
|
|
2806
|
-
return content;
|
|
2807
|
-
}
|
|
2808
|
-
}
|
|
2809
|
-
catch {
|
|
2810
|
-
return content;
|
|
2811
|
-
}
|
|
2812
|
-
}
|
|
2813
|
-
// File-type-aware post-processing: validates and cleans content per file extension.
|
|
2814
|
-
// This is the FINAL safety net after all fence stripping has run.
|
|
2815
|
-
postProcessContent(content, filepath) {
|
|
2816
|
-
const filename = filepath.split('/').pop() || '';
|
|
2817
|
-
const ext = filename.includes('.') ? filename.split('.').pop().toLowerCase() : '';
|
|
2818
|
-
// .gitkeep must ALWAYS be completely empty — no exceptions
|
|
2819
|
-
if (filename === '.gitkeep' || filepath.endsWith('.gitkeep')) {
|
|
2820
|
-
return '';
|
|
2821
|
-
}
|
|
2822
|
-
// JSON files: ensure the content is valid JSON, strip any fence artifacts
|
|
2823
|
-
if (ext === 'json') {
|
|
2824
|
-
try {
|
|
2825
|
-
JSON.parse(content);
|
|
2826
|
-
return content; // already valid
|
|
2827
|
-
}
|
|
2828
|
-
catch { /* fall through to extraction */ }
|
|
2829
|
-
// Try to extract a JSON object or array
|
|
2830
|
-
const jsonMatch = content.match(/(\{[\s\S]*\}|\[[\s\S]*\])/);
|
|
2831
|
-
if (jsonMatch) {
|
|
2832
|
-
try {
|
|
2833
|
-
JSON.parse(jsonMatch[1]);
|
|
2834
|
-
return jsonMatch[1];
|
|
2835
|
-
}
|
|
2836
|
-
catch { /* fall through */ }
|
|
2837
|
-
}
|
|
2838
|
-
// Strip all fence lines and retry
|
|
2839
|
-
const stripped = content.replace(/^\s*```[\w.+-]*\s*$/gm, '').trim();
|
|
2840
|
-
try {
|
|
2841
|
-
JSON.parse(stripped);
|
|
2842
|
-
return stripped;
|
|
2843
|
-
}
|
|
2844
|
-
catch { /* fall through */ }
|
|
2845
|
-
return stripped; // return best effort even if not valid JSON
|
|
2846
|
-
}
|
|
2847
|
-
// Code/script/markdown files: strip any remaining fence markers aggressively
|
|
2848
|
-
if (['sh', 'bash', 'py', 'ts', 'js', 'tsx', 'jsx', 'mts', 'mjs', 'md', 'markdown'].includes(ext)) {
|
|
2849
|
-
return content.replace(/^\s*```[\w.+-]*\s*$/gm, '').trim();
|
|
2850
|
-
}
|
|
2851
|
-
return content;
|
|
2852
|
-
}
|
|
2853
|
-
// TRUNCATION DETECTION: Check if generated code ends mid-function
|
|
2854
|
-
// Returns true if the code appears to be truncated (open braces, incomplete statement, etc.)
|
|
2855
|
-
detectTruncation(content) {
|
|
2856
|
-
if (!content || content.length < 100)
|
|
2857
|
-
return false;
|
|
2858
|
-
const trimmed = content.trimEnd();
|
|
2859
|
-
const lastLine = trimmed.split('\n').pop()?.trim() || '';
|
|
2860
|
-
const last200 = trimmed.substring(Math.max(0, trimmed.length - 200));
|
|
2861
|
-
// Signs of truncation:
|
|
2862
|
-
// 1. Ends with a partial statement (no semicolon, no closing brace on last line)
|
|
2863
|
-
const endsAbruptly = lastLine.length > 0 && !lastLine.match(/^[}\]);,]/);
|
|
2864
|
-
// 2. Ends mid-string or mid-comment
|
|
2865
|
-
const endsMidString = (trimmed.match(/`/g) || []).length % 2 !== 0;
|
|
2866
|
-
// 3. Significantly more open braces than close braces (>3 imbalance)
|
|
2867
|
-
const openBraces = (content.match(/\{/g) || []).length;
|
|
2868
|
-
const closeBraces = (content.match(/\}/g) || []).length;
|
|
2869
|
-
const braceImbalance = openBraces - closeBraces;
|
|
2870
|
-
// 4. Last meaningful content is a function signature or opening block
|
|
2871
|
-
const endsOnOpener = /(\{|=>|then\(|catch\(|=>\s*)$/.test(last200.trimEnd());
|
|
2872
|
-
if (braceImbalance > 3) {
|
|
2873
|
-
log(c.yellow, ` ! Truncation signal: brace imbalance ${openBraces} open vs ${closeBraces} close`);
|
|
2874
|
-
return true;
|
|
2875
|
-
}
|
|
2876
|
-
if (endsMidString) {
|
|
2877
|
-
log(c.yellow, ` ! Truncation signal: odd number of backticks (mid-template-string)`);
|
|
2878
|
-
return true;
|
|
2879
|
-
}
|
|
2880
|
-
if (endsOnOpener && endsAbruptly) {
|
|
2881
|
-
log(c.yellow, ` ! Truncation signal: ends on opener with no closing`);
|
|
2882
|
-
return true;
|
|
2883
|
-
}
|
|
2884
|
-
return false;
|
|
2885
|
-
}
|
|
2886
|
-
commitChanges(task, files) {
|
|
2887
|
-
if (files.length === 0)
|
|
2888
|
-
return;
|
|
2889
|
-
// Branch-isolation guard: the runner's `git commit` lands on whichever
|
|
2890
|
-
// branch is currently checked out. If a developer has a feature branch
|
|
2891
|
-
// checked out while the runner is alive (per the PR #18 incident
|
|
2892
|
-
// 2026-05-21, where swarm commits polluted fix/clawrouter-*), DO NOT
|
|
2893
|
-
// commit — the deliverables stay on disk, the orchestrator continues,
|
|
2894
|
-
// and the user can rebase/cherry-pick onto main when they're done.
|
|
2895
|
-
// Always-safe vs cleverness: skipping a commit is recoverable; polluting
|
|
2896
|
-
// a feature branch is not. See feedback_branch_hygiene_during_sprint_runner.md.
|
|
2897
|
-
let currentBranch = 'unknown';
|
|
2898
|
-
try {
|
|
2899
|
-
currentBranch = (0, child_process_1.execSync)('git rev-parse --abbrev-ref HEAD', { timeout: 5000 }).toString().trim();
|
|
2900
|
-
}
|
|
2901
|
-
catch { /* if git itself is broken, the commit below will fail; let it */ }
|
|
2902
|
-
if (currentBranch !== 'main') {
|
|
2903
|
-
log(c.yellow, ` ! Commit skipped — checkout is on '${currentBranch}', not 'main'. Files left on disk for ${task.id}; rebase to main manually if you want them committed. (branch-isolation guard)`);
|
|
2904
|
-
return;
|
|
2905
|
-
}
|
|
2906
|
-
try {
|
|
2907
|
-
const filesList = files.join(' ');
|
|
2908
|
-
(0, child_process_1.execSync)(`git add ${filesList}`, { timeout: 10000 });
|
|
2909
|
-
// No --no-verify: pre-commit hooks (secret scan, lint, etc) get to run.
|
|
2910
|
-
// If this ever blocks legitimate commits, fix the hook — don't bypass.
|
|
2911
|
-
(0, child_process_1.execSync)(`git commit -m "feat(${task.agent}): ${task.id} - ${task.type}"`, { timeout: 15000 });
|
|
2912
|
-
log(c.green, ` ✓ Committed: ${files.length} files for ${task.id}`);
|
|
2913
|
-
// TICKET-093 FIX: actually push. Prior version committed but never
|
|
2914
|
-
// pushed — 25 commits stranded local overnight. Best-effort.
|
|
2915
|
-
//
|
|
2916
|
-
// TICKET-095 (REVERTED 2026-05-27): tried `git pull --rebase` on
|
|
2917
|
-
// non-fast-forward rejection but that resets working-tree files to
|
|
2918
|
-
// origin. The sprint JSON file (mid-flight pending → done status
|
|
2919
|
-
// updates written by sprint-runner sync) is uncommitted, so the
|
|
2920
|
-
// rebase nuked it back to the all-pending state committed at sprint
|
|
2921
|
-
// start. Net effect: a single divergent push CAUSED the disk-state
|
|
2922
|
-
// revert TICKET-094 was supposed to fix. Removing the rebase
|
|
2923
|
-
// entirely — failed pushes stay local, founder reconciles manually.
|
|
2924
|
-
// The cost (some lost autonomy on divergent remotes) is lower than
|
|
2925
|
-
// the cost (silent destruction of sprint-state tracking).
|
|
2926
|
-
try {
|
|
2927
|
-
(0, child_process_1.execSync)('git push origin main', { timeout: 30000, stdio: 'pipe' });
|
|
2928
|
-
log(c.gray, ` → pushed to origin/main`);
|
|
2929
|
-
}
|
|
2930
|
-
catch (pushErr) {
|
|
2931
|
-
const msg = (pushErr?.stderr?.toString() || pushErr?.message || '').slice(0, 200).replace(/\s+/g, ' ');
|
|
2932
|
-
log(c.yellow, ` ! Push failed: ${msg.slice(0, 150)} (commit local; founder reconciles)`);
|
|
2933
|
-
}
|
|
2934
|
-
}
|
|
2935
|
-
catch (error) {
|
|
2936
|
-
log(c.yellow, ` ! Commit skipped: ${error.message?.substring(0, 100)}`);
|
|
2937
|
-
}
|
|
2938
|
-
}
|
|
2939
|
-
}
|
|
2940
|
-
// ===== Orchestrator (Dynamic Agent Pipeline) =====
|
|
2941
|
-
class Orchestrator {
|
|
2942
|
-
ceo;
|
|
2943
|
-
cto;
|
|
2944
|
-
supervisor;
|
|
2945
|
-
supervisor2;
|
|
2946
|
-
agents = new Map();
|
|
2947
|
-
tasks = [];
|
|
2948
|
-
stats = { tasksExecuted: 0, approved: 0, rejected: 0, totalTokens: 0, conflicts: 0, escalations: 0 };
|
|
2949
|
-
// Per-task structured run records — written to swarm run report at end of sprint
|
|
2950
|
-
taskRuns = [];
|
|
2951
|
-
/**
|
|
2952
|
-
* Persist a single task's status back to the on-disk sprint file.
|
|
2953
|
-
*
|
|
2954
|
-
* Sprint-1547 fix: previously, task statuses were only written at end-of-run
|
|
2955
|
-
* (line ~3055). Any exception, OOM, or SIGKILL between an approval and the
|
|
2956
|
-
* end-of-run write dropped the approval — the sprint file still said
|
|
2957
|
-
* 'pending', the sprint-runner cron repicked the same sprint, and the same
|
|
2958
|
-
* task ran again. Sprint-1545 looped 30+ times overnight from this.
|
|
2959
|
-
*
|
|
2960
|
-
* Read-modify-write so concurrent edits to OTHER tasks (e.g. another
|
|
2961
|
-
* Claude session) survive — we only overwrite this task's slot.
|
|
2962
|
-
*
|
|
2963
|
-
* Failure must NOT block the next task. Logged and swallowed.
|
|
2964
|
-
*/
|
|
2965
|
-
persistTaskStatus(task) {
|
|
2966
|
-
const sprintFile = process.argv[2] || 'sprints/current.json';
|
|
2967
|
-
try {
|
|
2968
|
-
if (!(0, fs_1.existsSync)(sprintFile))
|
|
2969
|
-
return;
|
|
2970
|
-
const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
|
|
2971
|
-
const arr = Array.isArray(sprintRaw.tasks) ? sprintRaw.tasks : null;
|
|
2972
|
-
if (!arr)
|
|
2973
|
-
return;
|
|
2974
|
-
const idx = arr.findIndex((t) => t && t.id === task.id);
|
|
2975
|
-
if (idx < 0)
|
|
2976
|
-
return;
|
|
2977
|
-
arr[idx] = { ...arr[idx], status: task.status };
|
|
2978
|
-
// sprint-1566 F0/F0c: also persist rejected_reason when set (replaced-by-split, over-budget)
|
|
2979
|
-
if (task.rejected_reason)
|
|
2980
|
-
arr[idx].rejected_reason = task.rejected_reason;
|
|
2981
|
-
// CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
|
|
2982
|
-
// persist attempt_count + score + grade + rejection_reason. Without
|
|
2983
|
-
// these, the .swarm-state file showed only {status, attempt_count: 1}
|
|
2984
|
-
// even after 3 attempts, and every CTO retrospective marked score=? +
|
|
2985
|
-
// attempts=?. 10 consecutive zero-ship sprints diagnosed as "we can't
|
|
2986
|
-
// root-cause because telemetry is dark." This wires the orchestrator's
|
|
2987
|
-
// in-memory fields → ACTIVE → .swarm-state via sprint-runner's sync.
|
|
2988
|
-
if (task.attempt_count != null)
|
|
2989
|
-
arr[idx].attempt_count = task.attempt_count;
|
|
2990
|
-
if (task.score != null)
|
|
2991
|
-
arr[idx].score = task.score;
|
|
2992
|
-
if (task.grade != null)
|
|
2993
|
-
arr[idx].grade = task.grade;
|
|
2994
|
-
if (task.rejection_reason != null)
|
|
2995
|
-
arr[idx].rejection_reason = task.rejection_reason;
|
|
2996
|
-
// TICKET-094 FIX: atomic write via temp+rename. Plain writeFileSync was
|
|
2997
|
-
// vulnerable to concurrent-reader-sees-empty-file races AND interleaved-
|
|
2998
|
-
// writer truncation. The disk-state revert that stranded sprint-1588 +
|
|
2999
|
-
// sprint-1589 in all-pending despite "Synced N updates" was almost
|
|
3000
|
-
// certainly the orchestrator + sprint-runner.ts sync racing on the
|
|
3001
|
-
// same file at sprint end. Tmp+rename is atomic on POSIX — readers see
|
|
3002
|
-
// either old or new, never half.
|
|
3003
|
-
const tmp = `${sprintFile}.tmp.${process.pid}.${Date.now()}`;
|
|
3004
|
-
(0, fs_1.writeFileSync)(tmp, JSON.stringify(sprintRaw, null, 2));
|
|
3005
|
-
require('fs').renameSync(tmp, sprintFile);
|
|
3006
|
-
}
|
|
3007
|
-
catch (e) {
|
|
3008
|
-
log(c.yellow, ` [persist] Failed to update ${task.id} status: ${(e?.message || '').substring(0, 100)}`);
|
|
3009
|
-
}
|
|
3010
|
-
}
|
|
3011
|
-
/** sprint-1566 F0/F0d: inject decomposer-split sub-tasks into the active
|
|
3012
|
-
* sprint file as new pending tasks. Sprint-runner picks them up on the next
|
|
3013
|
-
* cron tick. The original task stays in the file with status='replaced-by-split'.
|
|
3014
|
-
*
|
|
3015
|
-
* Founder fix 2026-05-27: ALSO push to this.tasks (in-memory). The sprint-end
|
|
3016
|
-
* writeFileSync at line 4088 dumps this.tasks back to ACTIVE, overwriting
|
|
3017
|
-
* whatever we wrote to disk here. Without the in-memory push, injected
|
|
3018
|
-
* sub-tasks were silently wiped at sprint-end and the persistence fix
|
|
3019
|
-
* in sprint-runner.ts saw nothing to forward to the source sprint file.
|
|
3020
|
-
* This is the root cause of the 0-ship pattern in sprint-1596/1597. */
|
|
3021
|
-
injectSplitTasks(original, splits, rationale) {
|
|
3022
|
-
const sprintFile = process.argv[2] || 'sprints/current.json';
|
|
3023
|
-
if (!(0, fs_1.existsSync)(sprintFile))
|
|
3024
|
-
return;
|
|
3025
|
-
const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
|
|
3026
|
-
if (!Array.isArray(sprintRaw.tasks))
|
|
3027
|
-
return;
|
|
3028
|
-
const existingIds = new Set(sprintRaw.tasks.map((t) => t?.id));
|
|
3029
|
-
let injected = 0;
|
|
3030
|
-
for (const s of splits) {
|
|
3031
|
-
if (existingIds.has(s.id))
|
|
3032
|
-
continue; // idempotent: already split before
|
|
3033
|
-
const newTask = {
|
|
3034
|
-
...s,
|
|
3035
|
-
attempt_count: 0,
|
|
3036
|
-
parent_task_id: original.id,
|
|
3037
|
-
split_rationale: rationale,
|
|
3038
|
-
injected_at: new Date().toISOString(),
|
|
3039
|
-
};
|
|
3040
|
-
sprintRaw.tasks.push(newTask);
|
|
3041
|
-
// Also push to in-memory — survives the sprint-end ACTIVE rewrite.
|
|
3042
|
-
this.tasks.push(newTask);
|
|
3043
|
-
injected++;
|
|
3044
|
-
}
|
|
3045
|
-
if (injected > 0)
|
|
3046
|
-
(0, fs_1.writeFileSync)(sprintFile, JSON.stringify(sprintRaw, null, 2));
|
|
3047
|
-
log(c.green, ` [inject] Added ${injected} split sub-tasks to ${sprintFile} (in-memory + on-disk)`);
|
|
3048
|
-
}
|
|
3049
|
-
constructor() {
|
|
3050
|
-
log(c.bold, '\n╔══════════════════════════════════════════════════════════╗');
|
|
3051
|
-
log(c.bold, '║ Kognai Swarm Orchestrator v2.17 — V17 Architecture ║');
|
|
3052
|
-
log(c.bold, '║ Local-first · ClawRouter cloud · DeepSeek reviews ║');
|
|
3053
|
-
log(c.bold, '╚══════════════════════════════════════════════════════════╝\n');
|
|
3054
|
-
// Leadership layer (CEO = Claude via Anthropic; Sup1 = DeepSeek/Sonnet;
|
|
3055
|
-
// Sup2 = Haiku with DeepSeek fallback. Both supervisors were originally
|
|
3056
|
-
// Sonnet + Codex — see file-header history note.)
|
|
3057
|
-
this.ceo = new CEOAgent();
|
|
3058
|
-
this.supervisor = new SupervisorAgent();
|
|
3059
|
-
this.supervisor2 = new Supervisor2Agent();
|
|
3060
|
-
// Technology layer (MiniMax)
|
|
3061
|
-
this.cto = new CTOAgent();
|
|
3062
|
-
// Execution layer — dynamically load all coding agents from agents/ directory
|
|
3063
|
-
const skipAgents = ['ceo', 'supervisor', 'skills', 'cto', 'cmo'];
|
|
3064
|
-
const agentDirs = (0, fs_1.existsSync)('./agents') ? (0, fs_1.readdirSync)('./agents').filter(d => {
|
|
3065
|
-
if (skipAgents.includes(d))
|
|
3066
|
-
return false;
|
|
3067
|
-
return (0, fs_1.existsSync)(`./agents/${d}/prompt.md`);
|
|
3068
|
-
}) : [];
|
|
3069
|
-
// Constitutional preamble — injected into every agent's system prompt
|
|
3070
|
-
const constitutionalPreamble = loadConstitutionalPreamble();
|
|
3071
|
-
if (constitutionalPreamble) {
|
|
3072
|
-
log(c.green, ' ⚖️ Constitutional preamble loaded — will bind all agents');
|
|
3073
|
-
}
|
|
3074
|
-
for (const name of agentDirs) {
|
|
3075
|
-
const promptPath = `./agents/${name}/prompt.md`;
|
|
3076
|
-
const rawPrompt = (0, fs_1.readFileSync)(promptPath, 'utf-8');
|
|
3077
|
-
const prompt = constitutionalPreamble + rawPrompt;
|
|
3078
|
-
this.agents.set(name, new CodingAgent(name, prompt));
|
|
3079
|
-
log(c.cyan, `+ Loaded ${name} agent (MiniMax M2.5)`);
|
|
3080
|
-
}
|
|
3081
|
-
// Agent count: CEO + Sup1 + Sup2 (all 3 Anthropic-bound, with provider routing happening per-call)
|
|
3082
|
-
// + 1 CMO (qwen3:4b local) + 1 CTO (MiniMax) + N coders (MiniMax)
|
|
3083
|
-
const totalAgents = 3 + 1 + 1 + this.agents.size;
|
|
3084
|
-
log(c.green, `\n✓ ${totalAgents} agents loaded (3 Anthropic-bound leadership + 1 CMO qwen3:4b + ${1 + this.agents.size} MiniMax)\n`);
|
|
3085
|
-
}
|
|
3086
|
-
loadTasks() {
|
|
3087
|
-
const sprintFile = process.argv[2] || 'sprints/current.json';
|
|
3088
|
-
if (!(0, fs_1.existsSync)(sprintFile)) {
|
|
3089
|
-
log(c.red, `Sprint file not found: ${sprintFile}`);
|
|
3090
|
-
process.exit(1);
|
|
3091
|
-
}
|
|
3092
|
-
const sprint = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
|
|
3093
|
-
this.tasks = sprint.tasks || [];
|
|
3094
|
-
const _sprintId = sprintFile.replace(/.*\//, '').replace('.json', '');
|
|
3095
|
-
// Normalize deliverables: CEO planner may emit flat string[] instead of {code,tests,docs}
|
|
3096
|
-
for (const task of this.tasks) {
|
|
3097
|
-
const d = task.deliverables;
|
|
3098
|
-
if (!d) {
|
|
3099
|
-
// Sprint JSON may omit deliverables — default from task_target
|
|
3100
|
-
const target = task.task_target;
|
|
3101
|
-
task.deliverables = { code: target ? [target] : [], tests: [], docs: [] };
|
|
3102
|
-
}
|
|
3103
|
-
else if (Array.isArray(d)) {
|
|
3104
|
-
task.deliverables = {
|
|
3105
|
-
code: d.filter((f) => f.indexOf("test") === -1 && f.indexOf("spec") === -1 && f.slice(-3) !== ".md"),
|
|
3106
|
-
tests: d.filter((f) => f.indexOf("test") !== -1 || f.indexOf("spec") !== -1),
|
|
3107
|
-
docs: d.filter((f) => f.slice(-3) === ".md"),
|
|
3108
|
-
};
|
|
3109
|
-
}
|
|
3110
|
-
// Normalize description → context: sprint JSON files may use either field name
|
|
3111
|
-
if (!task.context && task.description) {
|
|
3112
|
-
task.context = task.description;
|
|
3113
|
-
}
|
|
3114
|
-
// Ensure context is always a string (never undefined)
|
|
3115
|
-
if (!task.context)
|
|
3116
|
-
task.context = `${task.id}: ${task.title || task.type}`;
|
|
3117
|
-
// Normalize priority: sprint JSON may omit it
|
|
3118
|
-
if (!task.priority)
|
|
3119
|
-
task.priority = 'medium';
|
|
3120
|
-
// Fix: task_target used as file path (e.g., 'scripts/lib/foo.ts') must be cleared
|
|
3121
|
-
// so it doesn't confuse the routing switch which expects: local|cloud-code|cloud-exec|cloud-post
|
|
3122
|
-
const VALID_ROUTING_TARGETS = ['local', 'cloud-code', 'cloud-exec', 'cloud-post'];
|
|
3123
|
-
if (task.task_target && !VALID_ROUTING_TARGETS.includes(task.task_target)) {
|
|
3124
|
-
delete task.task_target; // file path already captured in deliverables.code
|
|
3125
|
-
}
|
|
3126
|
-
// Stamp sprint_id — avoids 'unknown' in logs/routing/YYYY-MM-DD.jsonl
|
|
3127
|
-
if (!task.sprint_id)
|
|
3128
|
-
task.sprint_id = _sprintId;
|
|
3129
|
-
}
|
|
3130
|
-
// Reset stale in_progress tasks back to pending
|
|
3131
|
-
for (const task of this.tasks) {
|
|
3132
|
-
if (task.status === 'in_progress' || task.status === 'review') {
|
|
3133
|
-
log(c.yellow, ` Resetting stale task ${task.id} (${task.status} -> pending)`);
|
|
3134
|
-
task.status = 'pending';
|
|
3135
|
-
}
|
|
3136
|
-
}
|
|
3137
|
-
log(c.blue, `Loaded ${this.tasks.length} tasks from ${sprintFile}`);
|
|
3138
|
-
}
|
|
3139
|
-
// ===== Truncation Detection =====
|
|
3140
|
-
isTruncationRejection(review) {
|
|
3141
|
-
const truncationKeywords = [
|
|
3142
|
-
'truncat', 'incomplete', 'cut off', 'cuts off', 'ends abruptly',
|
|
3143
|
-
'missing implementation', 'missing the actual', 'file is incomplete',
|
|
3144
|
-
'cuts off mid', 'missing core functionality', 'missing the entire',
|
|
3145
|
-
];
|
|
3146
|
-
const text = (review.summary + ' ' +
|
|
3147
|
-
(review.issues || []).map(i => i.description).join(' ')).toLowerCase();
|
|
3148
|
-
return truncationKeywords.some(kw => text.includes(kw));
|
|
3149
|
-
}
|
|
3150
|
-
// ===== CTO Task Decomposition (for truncation-prone tasks) =====
|
|
3151
|
-
async ctoDecomposeTask(task) {
|
|
3152
|
-
log(c.cyan, `\n[cto-decompose] 🔧 CTO splitting ${task.id} into smaller sub-tasks...`);
|
|
3153
|
-
const allDeliverables = [
|
|
3154
|
-
...(task.deliverables.code || []),
|
|
3155
|
-
...(task.deliverables.tests || []),
|
|
3156
|
-
];
|
|
3157
|
-
const userPrompt = `A task keeps failing because MiniMax M2.5 truncates output when generating multiple files.
|
|
3158
|
-
|
|
3159
|
-
## Failed Task
|
|
3160
|
-
- ID: ${task.id}
|
|
3161
|
-
- Agent: ${task.agent}
|
|
3162
|
-
- Context: ${task.context.substring(0, 1500)}
|
|
3163
|
-
- Deliverable files: ${allDeliverables.join(', ')}
|
|
3164
|
-
|
|
3165
|
-
## Problem
|
|
3166
|
-
MiniMax M2.5 has a ~4500 token output limit per call. When a task has ${allDeliverables.length} files, each file gets less space and code gets truncated.
|
|
3167
|
-
|
|
3168
|
-
## Your Job
|
|
3169
|
-
Split this task into smaller sub-tasks. Each sub-task must have at most 1 code file + 1 test file (2 files max).
|
|
3170
|
-
|
|
3171
|
-
## Rules
|
|
3172
|
-
1. Types/interfaces files FIRST (other files depend on them)
|
|
3173
|
-
2. Barrel/index export files LAST (they import from everything else)
|
|
3174
|
-
3. Each sub-task must be self-contained (agent can generate it without seeing other sub-task results)
|
|
3175
|
-
4. Include enough context in each sub-task for the agent to know what to generate
|
|
3176
|
-
5. Maximum 5 sub-tasks
|
|
3177
|
-
|
|
3178
|
-
## Output Format
|
|
3179
|
-
Return a JSON array of sub-task specs:
|
|
3180
|
-
[
|
|
3181
|
-
{
|
|
3182
|
-
"sub_id": "${task.id}-A",
|
|
3183
|
-
"context": "Full task context for this sub-task including what types/interfaces to define",
|
|
3184
|
-
"code": ["path/to/file.ts"],
|
|
3185
|
-
"tests": ["path/to/file.test.ts"]
|
|
3186
|
-
}
|
|
3187
|
-
]
|
|
3188
|
-
|
|
3189
|
-
ONLY output the JSON array. No markdown, no explanation.`;
|
|
3190
|
-
try {
|
|
3191
|
-
const response = await callLLM('clawrouter', 'deepseek/deepseek-chat', this.cto['systemPrompt'] || '', userPrompt, 120000, 'cto', 'fallback_task_decomposer');
|
|
3192
|
-
let content = response.choices?.[0]?.message?.content || '';
|
|
3193
|
-
content = content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
|
|
3194
|
-
const jsonMatch = content.match(/\[[\s\S]*\]/);
|
|
3195
|
-
if (!jsonMatch) {
|
|
3196
|
-
log(c.yellow, ' CTO decomposition returned no JSON, falling back to mechanical split');
|
|
3197
|
-
return this.fallbackDecompose(task);
|
|
3198
|
-
}
|
|
3199
|
-
const specs = JSON.parse(jsonMatch[0]);
|
|
3200
|
-
if (!Array.isArray(specs) || specs.length < 2) {
|
|
3201
|
-
log(c.yellow, ' CTO returned <2 sub-tasks, falling back to mechanical split');
|
|
3202
|
-
return this.fallbackDecompose(task);
|
|
3203
|
-
}
|
|
3204
|
-
// Convert specs to AgentTask objects
|
|
3205
|
-
const subtasks = specs.slice(0, 5).map((spec, i) => ({
|
|
3206
|
-
id: spec.sub_id || `${task.id}-${String.fromCharCode(65 + i)}`,
|
|
3207
|
-
agent: task.agent,
|
|
3208
|
-
type: task.type,
|
|
3209
|
-
priority: task.priority,
|
|
3210
|
-
dependencies: i > 0 ? [specs[i - 1].sub_id || `${task.id}-${String.fromCharCode(64 + i)}`] : [],
|
|
3211
|
-
context: spec.context,
|
|
3212
|
-
deliverables: {
|
|
3213
|
-
code: spec.code || [],
|
|
3214
|
-
tests: spec.tests || [],
|
|
3215
|
-
},
|
|
3216
|
-
status: 'pending',
|
|
3217
|
-
}));
|
|
3218
|
-
log(c.green, ` ✓ CTO decomposed ${task.id} into ${subtasks.length} sub-tasks:`);
|
|
3219
|
-
for (const st of subtasks) {
|
|
3220
|
-
const files = [...(st.deliverables.code || []), ...(st.deliverables.tests || [])];
|
|
3221
|
-
log(c.cyan, ` ${st.id}: ${files.join(', ')}`);
|
|
3222
|
-
}
|
|
3223
|
-
return subtasks;
|
|
3224
|
-
}
|
|
3225
|
-
catch (error) {
|
|
3226
|
-
log(c.yellow, ` CTO decomposition failed: ${error.message}, using fallback`);
|
|
3227
|
-
return this.fallbackDecompose(task);
|
|
3228
|
-
}
|
|
3229
|
-
}
|
|
3230
|
-
// ===== Fallback: Mechanical file-based split =====
|
|
3231
|
-
fallbackDecompose(task) {
|
|
3232
|
-
const codeFiles = task.deliverables.code || [];
|
|
3233
|
-
const testFiles = task.deliverables.tests || [];
|
|
3234
|
-
log(c.yellow, ` [fallback] Mechanically splitting ${task.id} by file...`);
|
|
3235
|
-
const subtasks = [];
|
|
3236
|
-
for (let i = 0; i < codeFiles.length; i++) {
|
|
3237
|
-
const code = codeFiles[i];
|
|
3238
|
-
// Find matching test file
|
|
3239
|
-
const baseName = code.replace(/\.ts$/, '').split('/').pop() || '';
|
|
3240
|
-
const matchingTest = testFiles.find(t => t.includes(baseName) && (t.includes('.test.') || t.includes('.spec.')));
|
|
3241
|
-
subtasks.push({
|
|
3242
|
-
id: `${task.id}-${String.fromCharCode(65 + i)}`,
|
|
3243
|
-
agent: task.agent,
|
|
3244
|
-
type: task.type,
|
|
3245
|
-
priority: task.priority,
|
|
3246
|
-
dependencies: i > 0 ? [`${task.id}-${String.fromCharCode(64 + i)}`] : [],
|
|
3247
|
-
context: `${task.context}\n\n## SUB-TASK: Generate ONLY the file "${code}"${matchingTest ? ` and its test "${matchingTest}"` : ''}.\nThis is part of a larger task that was split to avoid truncation. Focus on this file only. Make it complete and self-contained.`,
|
|
3248
|
-
deliverables: {
|
|
3249
|
-
code: [code],
|
|
3250
|
-
tests: matchingTest ? [matchingTest] : [],
|
|
3251
|
-
},
|
|
3252
|
-
status: 'pending',
|
|
3253
|
-
});
|
|
3254
|
-
}
|
|
3255
|
-
// Handle orphan test files (tests without matching code file)
|
|
3256
|
-
const usedTests = subtasks.flatMap(st => st.deliverables.tests || []);
|
|
3257
|
-
const orphanTests = testFiles.filter(t => !usedTests.includes(t));
|
|
3258
|
-
if (orphanTests.length > 0) {
|
|
3259
|
-
subtasks.push({
|
|
3260
|
-
id: `${task.id}-${String.fromCharCode(65 + codeFiles.length)}`,
|
|
3261
|
-
agent: task.agent,
|
|
3262
|
-
type: task.type,
|
|
3263
|
-
priority: task.priority,
|
|
3264
|
-
dependencies: subtasks.length > 0 ? [subtasks[subtasks.length - 1].id] : [],
|
|
3265
|
-
context: `${task.context}\n\n## SUB-TASK: Generate ONLY the test file(s): ${orphanTests.join(', ')}.\nAll source code files have already been generated. Write tests that import from the existing source files.`,
|
|
3266
|
-
deliverables: {
|
|
3267
|
-
code: [],
|
|
3268
|
-
tests: orphanTests,
|
|
3269
|
-
},
|
|
3270
|
-
status: 'pending',
|
|
3271
|
-
});
|
|
3272
|
-
}
|
|
3273
|
-
log(c.green, ` ✓ Fallback split ${task.id} into ${subtasks.length} sub-tasks`);
|
|
3274
|
-
return subtasks;
|
|
3275
|
-
}
|
|
3276
|
-
// ===== Sub-task executor (limited retries, no recursive decomposition) =====
|
|
3277
|
-
async executeSubTask(subtask, maxRetries) {
|
|
3278
|
-
// Sprint 1309: default to 'coder' when subtask.agent is not set
|
|
3279
|
-
const subAgentName = subtask.agent || 'coder';
|
|
3280
|
-
const agent = this.agents.get(subAgentName);
|
|
3281
|
-
if (!agent) {
|
|
3282
|
-
log(c.red, ` Agent not found for sub-task: ${subAgentName}`);
|
|
3283
|
-
return false;
|
|
3284
|
-
}
|
|
3285
|
-
let lastReview;
|
|
3286
|
-
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
3287
|
-
log(c.blue, `\n [sub-task] ${subtask.id} | Attempt: ${attempt}/${maxRetries}`);
|
|
3288
|
-
this.stats.tasksExecuted++;
|
|
3289
|
-
// AMD-08: depth=1 (sub-agent chain)
|
|
3290
|
-
if (!monotask_state_machine_1.MonotaskSM.claim(subtask.agent, subtask.id, 1)) {
|
|
3291
|
-
log(c.yellow, ` [monotask] ${subtask.agent} unavailable — skipping sub-task attempt ${attempt}`);
|
|
3292
|
-
continue;
|
|
3293
|
-
}
|
|
3294
|
-
monotask_state_machine_1.MonotaskSM.start(subtask.agent, subtask.id);
|
|
3295
|
-
const subStart = Date.now();
|
|
3296
|
-
const result = await agent.execute(subtask, lastReview);
|
|
3297
|
-
if (result.files.length === 0) {
|
|
3298
|
-
const dels = [...(subtask.deliverables?.code || []), ...(subtask.deliverables?.tests || []), ...(subtask.deliverables?.docs || [])];
|
|
3299
|
-
const reasons = subtask._failureReasons || [];
|
|
3300
|
-
const inferred = reasons.length ? reasons.join('; ') : (dels.length === 0 ? 'empty-deliverables' : 'unknown');
|
|
3301
|
-
log(c.red, ` ✗ No files produced for sub-task ${subtask.id} [model=${result.model || 'n/a'}, deliverables=${dels.length}, reason=${inferred}]`);
|
|
3302
|
-
monotask_state_machine_1.MonotaskSM.release(subtask.agent, subtask.id, `no files: ${inferred.slice(0, 60)}`);
|
|
3303
|
-
return false;
|
|
3304
|
-
}
|
|
3305
|
-
// Dual supervisor review
|
|
3306
|
-
const [review1, review2] = await Promise.all([
|
|
3307
|
-
this.supervisor.reviewTask(subtask, result.files),
|
|
3308
|
-
this.supervisor2.reviewTask(subtask, result.files),
|
|
3309
|
-
]);
|
|
3310
|
-
const dualResult = await reconcileSupervisorReviews(review1, review2, subtask, this.ceo);
|
|
3311
|
-
const review = dualResult.finalReview;
|
|
3312
|
-
if (!dualResult.consensus)
|
|
3313
|
-
this.stats.conflicts++;
|
|
3314
|
-
if (dualResult.escalatedToCEO)
|
|
3315
|
-
this.stats.escalations++;
|
|
3316
|
-
lastReview = review;
|
|
3317
|
-
// TICKET-214: instrument SUB-TASK attempts. The MAIN task loop already taps KSL +
|
|
3318
|
-
// records reputation, but the split sub-task loop did neither — so split sprints
|
|
3319
|
-
// went unrecorded in KSL and their rejections were never filed. Tap + score every
|
|
3320
|
-
// attempt here too. Best-effort, non-fatal.
|
|
3321
|
-
recordAgentScore(subtask.agent, review.score);
|
|
3322
|
-
try {
|
|
3323
|
-
const _sid = resolveActiveSprintId();
|
|
3324
|
-
(0, orchestrator_tap_1.tapAttempt)({
|
|
3325
|
-
sprint_id: _sid, task_id: subtask.id, attempt, agent: subtask.agent,
|
|
3326
|
-
model: result.model || 'unknown',
|
|
3327
|
-
prompt: String(subtask.context || subtask.title || subtask.id),
|
|
3328
|
-
reply: (result.files || []).map((f) => f.content || '').join('\n').slice(0, 20000),
|
|
3329
|
-
duration_ms: Date.now() - subStart,
|
|
3330
|
-
});
|
|
3331
|
-
}
|
|
3332
|
-
catch { /* non-fatal */ }
|
|
3333
|
-
if (review.verdict === 'APPROVED') {
|
|
3334
|
-
this.stats.approved++;
|
|
3335
|
-
monotask_state_machine_1.MonotaskSM.complete(subtask.agent, subtask.id);
|
|
3336
|
-
log(c.green, ` ✓ Sub-task ${subtask.id} APPROVED on attempt ${attempt} (${review.score}/100)`);
|
|
3337
|
-
return true;
|
|
3338
|
-
}
|
|
3339
|
-
this.stats.rejected++;
|
|
3340
|
-
log(c.yellow, ` ↻ Sub-task ${subtask.id} REJECTED on attempt ${attempt} (${review.score}/100)`);
|
|
3341
|
-
try {
|
|
3342
|
-
(0, code_failure_logger_1.logCodeFailure)({ taskId: subtask.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(subtask.agent), attemptNum: attempt, score: review.score || 0, model: result.model || 'unknown', rejectionReason: review.summary || 'sub-task rejected', issues: review.issues || [], failType: 'supervisor_rejected' });
|
|
3343
|
-
}
|
|
3344
|
-
catch { /* non-fatal */ }
|
|
3345
|
-
safeResetLastCommit(subtask.id, subtask.agent, subtask.type, ' ');
|
|
3346
|
-
monotask_state_machine_1.MonotaskSM.release(subtask.agent, subtask.id, `rejected attempt ${attempt}`);
|
|
3347
|
-
}
|
|
3348
|
-
log(c.red, ` ✗ Sub-task ${subtask.id} FAILED after ${maxRetries} attempts`);
|
|
3349
|
-
return false;
|
|
3350
|
-
}
|
|
3351
|
-
// ===== Main task executor with CTO auto-decomposition =====
|
|
3352
|
-
async executeTask(task) {
|
|
3353
|
-
// Sprint 1309: default to 'coder' when task.agent is not set (queue-prescribed sprints omit agent field)
|
|
3354
|
-
const agentName = task.agent || 'coder';
|
|
3355
|
-
const agent = this.agents.get(agentName);
|
|
3356
|
-
if (!agent) {
|
|
3357
|
-
log(c.red, `Agent not found: ${agentName}`);
|
|
3358
|
-
task.status = 'rejected';
|
|
3359
|
-
this.persistTaskStatus(task);
|
|
3360
|
-
// Record failure in taskRuns
|
|
3361
|
-
this.taskRuns.push({
|
|
3362
|
-
task_id: task.id, title: task.title || task.id, type: task.type,
|
|
3363
|
-
task_target: task.task_target || 'cloud-code',
|
|
3364
|
-
status: 'rejected', attempts: 0, model_used: '', provider: '',
|
|
3365
|
-
tokens_total: 0, duration_seconds: 0, files_written: [],
|
|
3366
|
-
review: null, error: `Agent not found: ${agentName}`, rejection_reason: 'Agent not found',
|
|
3367
|
-
});
|
|
3368
|
-
return;
|
|
3369
|
-
}
|
|
3370
|
-
// Sprint 706: BrainX — inject memories before task execution
|
|
3371
|
-
try {
|
|
3372
|
-
if (this._brainxBridge) {
|
|
3373
|
-
const injection = await this._brainxBridge.injectMemories(task.agent);
|
|
3374
|
-
if (injection.memory_count > 0)
|
|
3375
|
-
log(c.gray, ` [BrainX] Injected ${injection.memory_count} memories for ${task.agent}`);
|
|
3376
|
-
}
|
|
3377
|
-
}
|
|
3378
|
-
catch { /* BrainX injection is non-blocking */ }
|
|
3379
|
-
const taskRunStart = Date.now();
|
|
3380
|
-
const taskRun = {
|
|
3381
|
-
task_id: task.id,
|
|
3382
|
-
title: task.title || task.id,
|
|
3383
|
-
type: task.type,
|
|
3384
|
-
task_target: task.task_target || 'cloud-code',
|
|
3385
|
-
status: 'pending',
|
|
3386
|
-
attempts: 0,
|
|
3387
|
-
model_used: '',
|
|
3388
|
-
provider: '',
|
|
3389
|
-
tokens_total: 0,
|
|
3390
|
-
duration_seconds: 0,
|
|
3391
|
-
files_written: [],
|
|
3392
|
-
review: null,
|
|
3393
|
-
error: null,
|
|
3394
|
-
rejection_reason: null,
|
|
3395
|
-
};
|
|
3396
|
-
// sprint-1566 F0b: aligned with sprint-runner's PER_TASK_LIFETIME_MAX_ATTEMPTS=5.
|
|
3397
|
-
// Was 10 (single-line const hidden under a misleading comment claiming it had
|
|
3398
|
-
// already been lowered — it hadn't). At 10 retries × ~14K tokens/attempt
|
|
3399
|
-
// (MiniMax + dual review) a single stuck task could burn ~140K tokens before
|
|
3400
|
-
// the lifetime gate had a chance to look at it on the next run.
|
|
3401
|
-
const MAX_RETRIES = parseInt(process.env.MAX_RETRIES_PER_RUN || '3', 10);
|
|
3402
|
-
const TRUNCATION_THRESHOLD = 1;
|
|
3403
|
-
// sprint-1566 F0c: per-task token budget. Caps cumulative tokens spent on
|
|
3404
|
-
// one task across its retries so a single task can't eat the daily wallet.
|
|
3405
|
-
// Raised 2026-05-27 from 25K → 50K: sprint-1590 lost 3 of 6 tasks because
|
|
3406
|
-
// first-attempt token spend (30-62K) routinely exceeded the 25K cap,
|
|
3407
|
-
// killing retries before supervisor rejection feedback could be applied.
|
|
3408
|
-
// 50K = ~$0.10 worst-case at DeepSeek pricing for first attempt + 1 retry.
|
|
3409
|
-
// TICKET-209 (2026-05-29): bumped 100k → 200k. With EDIT-MODE now the
|
|
3410
|
-
// default for modify tasks, 200k is more headroom than legitimate work
|
|
3411
|
-
// needs — but covers the long-tail of large create tasks (multi-section
|
|
3412
|
-
// spec docs, full-module rewrites) without forcing escalation.
|
|
3413
|
-
const PER_TASK_TOKEN_BUDGET = parseInt(process.env.PER_TASK_TOKEN_BUDGET || '200000', 10);
|
|
3414
|
-
let taskTokensSpent = 0;
|
|
3415
|
-
let truncationCount = 0;
|
|
3416
|
-
let lastReview;
|
|
3417
|
-
// OMEL AMD-13: Create isolated tmpdir for this task (cleaned up in finally)
|
|
3418
|
-
const phantomCtx = phantom_workspace_1.phantomWorkspace.create(task.id);
|
|
3419
|
-
try {
|
|
3420
|
-
// CTO-20260528-002 (2026-05-27): one-file-per-call enforcement.
|
|
3421
|
-
// The store_page incident in v12r+1 was a multi-file task that nobody
|
|
3422
|
-
// pre-screened. Atomic tasks are easier to review, retry, and roll back.
|
|
3423
|
-
// If a task targets >1 source file, force a per-file split up front —
|
|
3424
|
-
// routeToDecomposer's Strategy A handles this deterministically.
|
|
3425
|
-
{
|
|
3426
|
-
const codeFiles = task.deliverables?.code?.filter((f) => !/__tests__|\.test\./.test(f)) || [];
|
|
3427
|
-
const editFiles = task.deliverables?.edits || [];
|
|
3428
|
-
const sourceFileCount = codeFiles.length + editFiles.length;
|
|
3429
|
-
if (sourceFileCount > 1 && task.agent === 'coder') {
|
|
3430
|
-
log(c.yellow, ` [Atomicity] PRE-FLIGHT REJECT: ${task.id} targets ${sourceFileCount} source files (one-file-per-call policy)`);
|
|
3431
|
-
const route = (0, decomposer_feedback_1.routeToDecomposer)({
|
|
3432
|
-
original_task_id: task.id,
|
|
3433
|
-
rejection_signal: 'needs_resplit',
|
|
3434
|
-
suggested_splits: [...codeFiles, ...editFiles],
|
|
3435
|
-
learnings_ref: 'docs/learnings.md §1',
|
|
3436
|
-
original_task: task,
|
|
3437
|
-
});
|
|
3438
|
-
if ('task_split' in route) {
|
|
3439
|
-
log(c.cyan, ` [Atomicity] split into ${route.task_split.length} per-file sub-tasks`);
|
|
3440
|
-
try {
|
|
3441
|
-
this.injectSplitTasks(task, route.task_split, route.rationale);
|
|
3442
|
-
}
|
|
3443
|
-
catch (e) {
|
|
3444
|
-
log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
|
|
3445
|
-
}
|
|
3446
|
-
task.status = 'replaced-by-split';
|
|
3447
|
-
task.rejected_reason = `Replaced by ${route.task_split.length} per-file sub-tasks (one-file-per-call policy)`;
|
|
3448
|
-
this.persistTaskStatus(task);
|
|
3449
|
-
taskRun.status = 'replaced-by-split';
|
|
3450
|
-
taskRun.rejection_reason = 'Atomicity pre-flight: multi-file task';
|
|
3451
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3452
|
-
this.taskRuns.push(taskRun);
|
|
3453
|
-
phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
|
|
3454
|
-
return;
|
|
3455
|
-
}
|
|
3456
|
-
// Unsplittable multi-file (e.g. zero deliverables resolved) → fall through
|
|
3457
|
-
// to the token validator so we don't hard-block tasks the decomposer can't
|
|
3458
|
-
// disambiguate. Logged for founder review.
|
|
3459
|
-
log(c.gray, ` [Atomicity] decomposer could not split — proceeding (founder review): ${route.reason}`);
|
|
3460
|
-
}
|
|
3461
|
-
}
|
|
3462
|
-
// sprint-1566 F0: pre-flight token-budget validator. If the task's output
|
|
3463
|
-
// is predicted to exceed the MiniMax truncation point (~4500 tokens),
|
|
3464
|
-
// route to decomposer-feedback for a structural re-split BEFORE any LLM
|
|
3465
|
-
// is dispatched. Stops the truncation cascade at the source.
|
|
3466
|
-
const validation = (0, token_budget_validator_1.validateTask)(task);
|
|
3467
|
-
if (!validation.ok) {
|
|
3468
|
-
// Discriminated-union narrow via Extract — boolean discriminator alone
|
|
3469
|
-
// isn't reliably narrowing under our tsconfig + alias-imported types.
|
|
3470
|
-
const rej = validation;
|
|
3471
|
-
log(c.yellow, ` [TokenBudget] PRE-FLIGHT REJECT: ${task.id} — est ${rej.estimated_tokens} tokens > threshold`);
|
|
3472
|
-
log(c.gray, ` Reason: ${rej.reason}`);
|
|
3473
|
-
log(c.gray, ` Suggested split: ${rej.suggested_split.join(', ')}`);
|
|
3474
|
-
const route = (0, decomposer_feedback_1.routeToDecomposer)({
|
|
3475
|
-
original_task_id: task.id,
|
|
3476
|
-
rejection_signal: 'needs_resplit',
|
|
3477
|
-
original_estimate_tokens: rej.estimated_tokens,
|
|
3478
|
-
suggested_splits: rej.suggested_split,
|
|
3479
|
-
learnings_ref: 'docs/learnings.md §1',
|
|
3480
|
-
original_task: task,
|
|
3481
|
-
});
|
|
3482
|
-
if ('task_split' in route) {
|
|
3483
|
-
log(c.cyan, ` [DecomposerFeedback] split into ${route.task_split.length} sub-tasks via ${route.strategy}`);
|
|
3484
|
-
log(c.gray, ` ${route.rationale}`);
|
|
3485
|
-
// Inject splits as new pending tasks so they get picked up next run.
|
|
3486
|
-
// Persist to the active sprint file so sprint-runner sees them.
|
|
3487
|
-
try {
|
|
3488
|
-
this.injectSplitTasks(task, route.task_split, route.rationale);
|
|
3489
|
-
}
|
|
3490
|
-
catch (e) {
|
|
3491
|
-
log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
|
|
3492
|
-
}
|
|
3493
|
-
task.status = 'replaced-by-split';
|
|
3494
|
-
task.rejected_reason = `Replaced by ${route.task_split.length} per-${route.strategy === 'per_file' ? 'file' : 'part'} sub-tasks (pre-flight budget gate)`;
|
|
3495
|
-
this.persistTaskStatus(task);
|
|
3496
|
-
taskRun.status = 'replaced-by-split';
|
|
3497
|
-
taskRun.rejection_reason = `Token-budget pre-flight: ${rej.reason}`;
|
|
3498
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3499
|
-
this.taskRuns.push(taskRun);
|
|
3500
|
-
phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
|
|
3501
|
-
return;
|
|
3502
|
-
}
|
|
3503
|
-
else {
|
|
3504
|
-
log(c.red, ` [DecomposerFeedback] cannot split → escalate to founder: ${route.reason}`);
|
|
3505
|
-
task.status = 'rejected';
|
|
3506
|
-
task.rejected_reason = route.reason;
|
|
3507
|
-
this.persistTaskStatus(task);
|
|
3508
|
-
taskRun.status = 'rejected';
|
|
3509
|
-
taskRun.error = route.reason;
|
|
3510
|
-
taskRun.rejection_reason = 'Token-budget pre-flight + unsplittable';
|
|
3511
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3512
|
-
this.taskRuns.push(taskRun);
|
|
3513
|
-
phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
|
|
3514
|
-
return;
|
|
3515
|
-
}
|
|
3516
|
-
}
|
|
3517
|
-
for (let attempt = 1; attempt <= MAX_RETRIES; attempt++) {
|
|
3518
|
-
// CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
|
|
3519
|
-
// record attempt_count on the task so it propagates via persistTaskStatus
|
|
3520
|
-
// → ACTIVE → .swarm-state → CTO's retrospective. Without this, every
|
|
3521
|
-
// post-mortem said attempt_count=? after 3 attempts. 10 zero-ship
|
|
3522
|
-
// sprints diagnosed by the CTO as "we can't root-cause because
|
|
3523
|
-
// telemetry is dark."
|
|
3524
|
-
task.attempt_count = attempt;
|
|
3525
|
-
// Reset per-attempt flags so the escalation pact only fires when THIS
|
|
3526
|
-
// attempt's execution actually tripped integrity/truncation, not stale
|
|
3527
|
-
// state from a prior attempt.
|
|
3528
|
-
delete task._integrityFailed;
|
|
3529
|
-
// sprint-1566 F0c: per-task token budget check at start of each attempt
|
|
3530
|
-
if (taskTokensSpent >= PER_TASK_TOKEN_BUDGET) {
|
|
3531
|
-
log(c.red, ` [TokenBudget] PER-TASK BUDGET EXCEEDED: ${task.id} spent ${taskTokensSpent} > ${PER_TASK_TOKEN_BUDGET} tokens — aborting retries`);
|
|
3532
|
-
task.status = 'rejected';
|
|
3533
|
-
task.rejected_reason = `Per-task budget exceeded: ${taskTokensSpent} > ${PER_TASK_TOKEN_BUDGET} tokens after ${attempt - 1} attempts`;
|
|
3534
|
-
taskRun.status = 'rejected';
|
|
3535
|
-
taskRun.error = `Per-task token budget exceeded (${taskTokensSpent}/${PER_TASK_TOKEN_BUDGET})`;
|
|
3536
|
-
taskRun.rejection_reason = 'Per-task budget exceeded';
|
|
3537
|
-
// Route to decomposer for over-budget too — same as truncation
|
|
3538
|
-
try {
|
|
3539
|
-
const route = (0, decomposer_feedback_1.routeToDecomposer)({
|
|
3540
|
-
original_task_id: task.id,
|
|
3541
|
-
rejection_signal: 'over_budget',
|
|
3542
|
-
original_estimate_tokens: taskTokensSpent,
|
|
3543
|
-
suggested_splits: [],
|
|
3544
|
-
learnings_ref: 'docs/learnings.md §1',
|
|
3545
|
-
original_task: task,
|
|
3546
|
-
});
|
|
3547
|
-
if ('task_split' in route) {
|
|
3548
|
-
this.injectSplitTasks(task, route.task_split, route.rationale);
|
|
3549
|
-
log(c.cyan, ` [DecomposerFeedback] over-budget → ${route.task_split.length} sub-tasks injected for next run`);
|
|
3550
|
-
}
|
|
3551
|
-
else {
|
|
3552
|
-
log(c.gray, ` [DecomposerFeedback] over-budget unsplittable: ${route.reason}`);
|
|
3553
|
-
}
|
|
3554
|
-
}
|
|
3555
|
-
catch (e) {
|
|
3556
|
-
log(c.red, ` Inject failed: ${e.message?.slice(0, 120)}`);
|
|
3557
|
-
}
|
|
3558
|
-
break;
|
|
3559
|
-
}
|
|
3560
|
-
log(c.blue, `\n${'='.repeat(60)}`);
|
|
3561
|
-
log(c.blue, `Task: ${task.id} | Agent: ${task.agent} | Attempt: ${attempt}/${MAX_RETRIES}`);
|
|
3562
|
-
log(c.blue, `${'='.repeat(60)}`);
|
|
3563
|
-
// AMD-26 KSL: snapshot per-attempt context for the tap.
|
|
3564
|
-
let kslSprintId = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3565
|
-
// sprint-runner passes logs/sprint-runner-active.json — read the real sprint_id from its contents.
|
|
3566
|
-
if (kslSprintId === 'sprint-runner-active') {
|
|
3567
|
-
try {
|
|
3568
|
-
kslSprintId = JSON.parse((0, fs_1.readFileSync)(process.argv[2], 'utf-8')).sprint_id || kslSprintId;
|
|
3569
|
-
}
|
|
3570
|
-
catch { /* fall back to path-derived */ }
|
|
3571
|
-
}
|
|
3572
|
-
const kslAttemptStart = Date.now();
|
|
3573
|
-
const kslPrompt = String(task.context || task.title || task.id);
|
|
3574
|
-
task.status = 'in_progress';
|
|
3575
|
-
if (attempt === 1) {
|
|
3576
|
-
const _sprintId = kslSprintId;
|
|
3577
|
-
(0, event_bus_publisher_1.publishTaskStarted)(task.agent, _sprintId, task.id, task.title || task.id).catch(() => { });
|
|
3578
|
-
}
|
|
3579
|
-
this.stats.tasksExecuted++;
|
|
3580
|
-
taskRun.attempts = attempt;
|
|
3581
|
-
// AMD-08: IDLE → RESERVED → ACTIVE (per attempt)
|
|
3582
|
-
if (!monotask_state_machine_1.MonotaskSM.claim(task.agent, task.id)) {
|
|
3583
|
-
log(c.yellow, ` [monotask] ${task.agent} unavailable — skipping attempt ${attempt}`);
|
|
3584
|
-
continue;
|
|
3585
|
-
}
|
|
3586
|
-
monotask_state_machine_1.MonotaskSM.start(task.agent, task.id);
|
|
3587
|
-
// OMEL AMD-13: WipeWitness — capture file state before agent writes
|
|
3588
|
-
const preTokens = new Map();
|
|
3589
|
-
for (const f of (task.deliverables?.code || [])) {
|
|
3590
|
-
if ((0, fs_1.existsSync)(f))
|
|
3591
|
-
preTokens.set(f, wipe_witness_1.wipeWitness.beforeWrite(f, task.agent));
|
|
3592
|
-
}
|
|
3593
|
-
// OMEL AMD-13: HumanBrake — require approval for bulk_overwrite on high-risk files
|
|
3594
|
-
if (task.type === 'modify' && (task.deliverables?.code || []).length > 0) {
|
|
3595
|
-
const firstFile = (task.deliverables?.code || [])[0] || '';
|
|
3596
|
-
if (human_brake_1.humanBrake.isHighRisk('bulk_overwrite', { filePath: firstFile })) {
|
|
3597
|
-
const approval = await human_brake_1.humanBrake.requireApproval('bulk_overwrite');
|
|
3598
|
-
if (!approval.approved) {
|
|
3599
|
-
log(c.yellow, ` [HumanBrake] SKIPPED: ${task.id} — ${approval.reason || 'not approved'}`);
|
|
3600
|
-
task.status = 'skipped';
|
|
3601
|
-
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `human brake: ${approval.reason || 'not approved'}`);
|
|
3602
|
-
break; // exit attempt loop — task will not execute
|
|
3603
|
-
}
|
|
3604
|
-
}
|
|
3605
|
-
}
|
|
3606
|
-
// Execute with rejection feedback if retrying
|
|
3607
|
-
const tokensBefore = _globalTokensThisRun;
|
|
3608
|
-
let result;
|
|
3609
|
-
// Chomsky gate — evaluate + rewrite (max 2×) + log all evals (Sprint 1513)
|
|
3610
|
-
if (task.context) {
|
|
3611
|
-
try {
|
|
3612
|
-
const chomskyRun = await (0, chomsky_runner_1.runChomskyGate)(task.context, agentName);
|
|
3613
|
-
if (chomskyRun.rewrites > 0) {
|
|
3614
|
-
log(c.cyan, ` [Chomsky] ${chomskyRun.rewrites}× rewrite — score ${chomskyRun.initialScore}→${chomskyRun.finalScore}/10`);
|
|
3615
|
-
task.context = chomskyRun.finalPrompt;
|
|
3616
|
-
}
|
|
3617
|
-
else if (!chomskyRun.passed) {
|
|
3618
|
-
log(c.yellow, ` [Chomsky] score ${chomskyRun.finalScore}/10 — pass-through (max rewrites reached)`);
|
|
3619
|
-
}
|
|
3620
|
-
}
|
|
3621
|
-
catch { /* gate is non-blocking — fail open */ }
|
|
3622
|
-
}
|
|
3623
|
-
try {
|
|
3624
|
-
result = await agent.execute(task, lastReview);
|
|
3625
|
-
}
|
|
3626
|
-
catch (execErr) {
|
|
3627
|
-
log(c.red, ` ✗ Execution error: ${execErr.message?.substring(0, 200)}`);
|
|
3628
|
-
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `exec error: ${execErr.message?.substring(0, 80)}`);
|
|
3629
|
-
if (attempt < MAX_RETRIES) {
|
|
3630
|
-
log(c.yellow, ` Retrying after execution error (attempt ${attempt}/${MAX_RETRIES})...`);
|
|
3631
|
-
continue;
|
|
3632
|
-
}
|
|
3633
|
-
throw execErr; // exhausted retries
|
|
3634
|
-
}
|
|
3635
|
-
// OMEL AMD-13: WipeWitness — compare after write, emit shrink alert if > 50% loss
|
|
3636
|
-
for (const f of result.files) {
|
|
3637
|
-
const tok = preTokens.get(f);
|
|
3638
|
-
if (tok)
|
|
3639
|
-
wipe_witness_1.wipeWitness.afterWrite(tok, (0, fs_1.existsSync)(f) ? (0, fs_1.statSync)(f).size : 0);
|
|
3640
|
-
}
|
|
3641
|
-
const deltaTokens = _globalTokensThisRun - tokensBefore;
|
|
3642
|
-
taskRun.tokens_total += deltaTokens;
|
|
3643
|
-
taskTokensSpent += deltaTokens; // sprint-1566 F0c: per-task budget tracking
|
|
3644
|
-
taskRun.model_used = result.model || taskRun.model_used;
|
|
3645
|
-
if (result.files.length === 0) {
|
|
3646
|
-
// 2026-05-27 diagnostic patch: structured "no files produced" rejection.
|
|
3647
|
-
// Captures (a) declared deliverable count, (b) model used, (c) failure
|
|
3648
|
-
// reasons collected during execute() (e.g. edit-mode-empty:foo.ts:380lines,
|
|
3649
|
-
// truncated, empty-edit-array). Replaces an opaque single-line log that
|
|
3650
|
-
// gave the founder no idea why the swarm was no-oping.
|
|
3651
|
-
const dels = [...(task.deliverables?.code || []), ...(task.deliverables?.tests || []), ...(task.deliverables?.docs || [])];
|
|
3652
|
-
const reasons = task._failureReasons || [];
|
|
3653
|
-
const inferred = reasons.length ? reasons.join('; ') : (dels.length === 0 ? 'empty-deliverables' : 'unknown');
|
|
3654
|
-
const structured = `No files produced [model=${result.model || 'n/a'}, type=${task.type}, deliverables=${dels.length}, reason=${inferred}]`;
|
|
3655
|
-
log(c.red, ` ✗ ${structured}`);
|
|
3656
|
-
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `no files: ${inferred.slice(0, 60)}`);
|
|
3657
|
-
task.status = 'rejected';
|
|
3658
|
-
this.persistTaskStatus(task);
|
|
3659
|
-
taskRun.status = 'rejected';
|
|
3660
|
-
taskRun.error = structured;
|
|
3661
|
-
taskRun.rejection_reason = structured;
|
|
3662
|
-
taskRun.failure_mode = inferred;
|
|
3663
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3664
|
-
this.taskRuns.push(taskRun);
|
|
3665
|
-
return;
|
|
3666
|
-
}
|
|
3667
|
-
// B.10: Local QA gate — fast PASS/FAIL before expensive cloud supervisor
|
|
3668
|
-
const qaFileContents = result.files.map(f => ({ path: f, content: (0, fs_1.existsSync)(f) ? (0, fs_1.readFileSync)(f, 'utf-8') : '' }));
|
|
3669
|
-
const qaResult = await localQAGate(task, qaFileContents);
|
|
3670
|
-
if (!qaResult.pass) {
|
|
3671
|
-
log(c.yellow, ` [QA-gate] FAIL — ${qaResult.reason}`);
|
|
3672
|
-
this.stats.rejected++;
|
|
3673
|
-
task.status = 'rejected'; // will be reset on retry
|
|
3674
|
-
safeResetLastCommit(task.id, task.agent, task.type, ' ');
|
|
3675
|
-
(0, code_failure_logger_1.logCodeFailure)({ taskId: task.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(task.agent), attemptNum: attempt, score: 0, model: taskRun.model_used || result?.model || task.model || 'unknown', rejectionReason: qaResult.reason, issues: [], failType: 'qa_gate' });
|
|
3676
|
-
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `QA gate: ${qaResult.reason}`);
|
|
3677
|
-
if (attempt < MAX_RETRIES) {
|
|
3678
|
-
log(c.yellow, ' QA gate failed — retrying without supervisor...');
|
|
3679
|
-
continue;
|
|
3680
|
-
}
|
|
3681
|
-
taskRun.status = 'rejected';
|
|
3682
|
-
taskRun.rejection_reason = `QA gate: ${qaResult.reason}`;
|
|
3683
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3684
|
-
this.taskRuns.push(taskRun);
|
|
3685
|
-
return;
|
|
3686
|
-
}
|
|
3687
|
-
log(c.gray, ` [QA-gate] PASS — ${qaResult.reason}`);
|
|
3688
|
-
// Dual Supervisor review (DeepSeek/ClawRouter + Haiku in parallel)
|
|
3689
|
-
task.status = 'review';
|
|
3690
|
-
const [review1, review2] = await Promise.all([
|
|
3691
|
-
this.supervisor.reviewTask(task, result.files),
|
|
3692
|
-
this.supervisor2.reviewTask(task, result.files),
|
|
3693
|
-
]);
|
|
3694
|
-
const dualResult = await reconcileSupervisorReviews(review1, review2, task, this.ceo);
|
|
3695
|
-
const review = dualResult.finalReview;
|
|
3696
|
-
if (!dualResult.consensus)
|
|
3697
|
-
this.stats.conflicts++;
|
|
3698
|
-
if (dualResult.escalatedToCEO)
|
|
3699
|
-
this.stats.escalations++;
|
|
3700
|
-
lastReview = review;
|
|
3701
|
-
task.output = { files: result.files, commit: '', model: result.model, review };
|
|
3702
|
-
// CTO-006 telemetry-blackout hotfix (2026-05-29, Slice A of TICKET-135):
|
|
3703
|
-
// record score + grade + rejection_reason on the task so they propagate
|
|
3704
|
-
// via persistTaskStatus. Without this, every post-mortem said "score=?".
|
|
3705
|
-
task.score = review?.score;
|
|
3706
|
-
task.grade = review?.grade;
|
|
3707
|
-
task.rejection_reason = review?.verdict !== 'APPROVED' ? (review?.summary?.slice(0, 240) ?? null) : null;
|
|
3708
|
-
if (review.verdict === 'APPROVED') {
|
|
3709
|
-
task.status = 'done';
|
|
3710
|
-
this.persistTaskStatus(task); // sprint-1547: persist before any post-approval work that could exit early
|
|
3711
|
-
this.stats.approved++;
|
|
3712
|
-
log(c.green, `\n✓ Task ${task.id} APPROVED on attempt ${attempt} (${review.score}/100)`);
|
|
3713
|
-
const _sprintIdApproved = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3714
|
-
(0, event_bus_publisher_1.publishTaskCompleted)(task.agent, _sprintIdApproved, task.id, task.title || task.id, 0).catch(() => { });
|
|
3715
|
-
aar_middleware_1.AARMiddleware.generateAndLog({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdApproved, skillId: task.skill_id || task.type || 'code-generation', outcomeScore: review.score, actionSummary: (task.title || task.id).substring(0, 140), status: 'success' }).catch(() => { });
|
|
3716
|
-
// AMD-20: PRM Judge — constitutional reward signal for approved tasks
|
|
3717
|
-
(0, perm_judge_1.scoreTask)({ task_id: task.id, agent_id: task.agent, sprint_id: _sprintIdApproved, task_title: task.title || task.id, output_summary: review.summary || '', status: 'done' }).catch(() => { });
|
|
3718
|
-
(0, trust_score_updater_1.updateTrustScore)(task.agent, 'approved', review.score); // Sprint 703: Dynamic trust update
|
|
3719
|
-
// Sprint 706: BrainX — store success memory
|
|
3720
|
-
try {
|
|
3721
|
-
if (this._brainxBridge)
|
|
3722
|
-
await this._brainxBridge.storeTaskMemory({ agent_id: task.agent, task_id: task.id, task_title: task.title || task.id, outcome: 'success', score: review.score, summary: (task.title || task.id).substring(0, 200), files_modified: result.files || [] });
|
|
3723
|
-
}
|
|
3724
|
-
catch { /* non-blocking */ }
|
|
3725
|
-
(0, skill_crystalliser_1.crystalliseSkill)({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdApproved, taskTitle: task.title || task.id, taskType: task.type || 'feature', model: task.model || 'qwen3:14b', taskTarget: task.task_target || 'local', score: review.score, approachSummary: (task.title || task.id).substring(0, 200), keyPatterns: review.strengths || [], antiPatterns: [] });
|
|
3726
|
-
(0, code_asset_crystalliser_1.crystalliseCodeAsset)({ agentId: task.agent, sprintId: _sprintIdApproved, taskId: task.id, taskTitle: task.title || task.id, files: result.files, supervisorScore: review.score, origin: 'kognai-core' });
|
|
3727
|
-
monotask_state_machine_1.MonotaskSM.complete(task.agent, task.id);
|
|
3728
|
-
taskRun.status = 'done';
|
|
3729
|
-
taskRun.files_written = result.files;
|
|
3730
|
-
taskRun.review = {
|
|
3731
|
-
verdict: review.verdict,
|
|
3732
|
-
score: review.score,
|
|
3733
|
-
grade: review.grade,
|
|
3734
|
-
score_rationale: review.score_rationale,
|
|
3735
|
-
strengths: review.strengths,
|
|
3736
|
-
};
|
|
3737
|
-
// SCORE protocol hook (founder rule 2026-05-27): every supervisor
|
|
3738
|
-
// grade against a spawned-citizen agent feeds the citizen's reputation
|
|
3739
|
-
// via the ACP rubric. Founding agents (CEO/sup/sherlock/etc.) aren't
|
|
3740
|
-
// in the citizens registry yet so they're skipped here — backfill TBD.
|
|
3741
|
-
try {
|
|
3742
|
-
recordScoreForCitizen(task.agent, _sprintIdApproved, task.id, review.grade, 'approved-path');
|
|
3743
|
-
}
|
|
3744
|
-
catch (e) {
|
|
3745
|
-
log(c.gray, ` [SCORE] skip: ${(e?.message || '').slice(0, 100)}`);
|
|
3746
|
-
}
|
|
3747
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3748
|
-
this.taskRuns.push(taskRun);
|
|
3749
|
-
(0, orchestrator_tap_1.tapAttempt)({
|
|
3750
|
-
sprint_id: kslSprintId, task_id: task.id, attempt, agent: task.agent,
|
|
3751
|
-
model: result.model || taskRun.model_used || 'unknown',
|
|
3752
|
-
prompt: kslPrompt,
|
|
3753
|
-
reply: `[approved score ${review.score}/100] files: ${result.files.join(', ')}\n\n${review.summary || ''}`,
|
|
3754
|
-
tools_used: [], errors: [], cost_usd: 0,
|
|
3755
|
-
duration_ms: Date.now() - kslAttemptStart,
|
|
3756
|
-
});
|
|
3757
|
-
return;
|
|
3758
|
-
}
|
|
3759
|
-
// Rejected — check for truncation pattern
|
|
3760
|
-
this.stats.rejected++;
|
|
3761
|
-
taskRun.review = {
|
|
3762
|
-
verdict: review.verdict,
|
|
3763
|
-
score: review.score,
|
|
3764
|
-
grade: review.grade,
|
|
3765
|
-
score_rationale: review.score_rationale,
|
|
3766
|
-
issues: review.issues,
|
|
3767
|
-
summary: review.summary,
|
|
3768
|
-
};
|
|
3769
|
-
// SCORE hook (rejected path) — record a negative-grade evaluation against
|
|
3770
|
-
// the citizen so reputation actually moves on bad work.
|
|
3771
|
-
try {
|
|
3772
|
-
const _sid = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3773
|
-
recordScoreForCitizen(task.agent, _sid, task.id, review.grade, 'rejected-path');
|
|
3774
|
-
}
|
|
3775
|
-
catch (e) {
|
|
3776
|
-
log(c.gray, ` [SCORE] skip: ${(e?.message || '').slice(0, 100)}`);
|
|
3777
|
-
}
|
|
3778
|
-
if (this.isTruncationRejection(review)) {
|
|
3779
|
-
truncationCount++;
|
|
3780
|
-
log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [TRUNCATION ${truncationCount}/${TRUNCATION_THRESHOLD}]`);
|
|
3781
|
-
// 2026-05-28 model-escalation pact: deterministic CTO/CEO sign-off via
|
|
3782
|
-
// policy. If DeepSeek truncated this task once, the NEXT retry routes
|
|
3783
|
-
// through ClawRouter → claude-sonnet-4.6. Cost ceiling ~$0.10 per
|
|
3784
|
-
// escalation at typical token volumes — well under the daily wallet.
|
|
3785
|
-
// Cleared in CodingAgent.execute after consumption.
|
|
3786
|
-
task._escalateNext = 'TRUNCATION';
|
|
3787
|
-
}
|
|
3788
|
-
else if (task._integrityFailed) {
|
|
3789
|
-
log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [INTEGRITY-FAILED]`);
|
|
3790
|
-
// Same pact: destructive-rewrite (file shrank past integrity threshold)
|
|
3791
|
-
// signals the cheap model can't hold the file's contract — upgrade.
|
|
3792
|
-
task._escalateNext = 'INTEGRITY_FAILED';
|
|
3793
|
-
}
|
|
3794
|
-
else if ((review?.score ?? 100) < 30 &&
|
|
3795
|
-
attempt < MAX_RETRIES &&
|
|
3796
|
-
((task.deliverables?.code || []).some((f) => /\.(md|mdx)$/i.test(f))
|
|
3797
|
-
|| ['research', 'spec', 'docs'].includes((task.type || '').toLowerCase()))) {
|
|
3798
|
-
// 2026-05-28 pact expansion: low-score spec/docs rejections also escalate.
|
|
3799
|
-
// sprint-1613 failure mode: DeepSeek hit a ~3-5k output ceiling on a
|
|
3800
|
-
// 300-line spec request and gave up at one section, then the same
|
|
3801
|
-
// capacity ceiling re-trapped 5 sub-task retries. Truncation/integrity
|
|
3802
|
-
// checks didn't catch it (the file was complete, just not the SPEC).
|
|
3803
|
-
// Catch low-score (<30) markdown/spec rejections explicitly so the
|
|
3804
|
-
// next retry hits Sonnet instead of redo-on-the-same-cheap-model.
|
|
3805
|
-
// Guarded by attempt < MAX_RETRIES so we don't waste the flag on the
|
|
3806
|
-
// final attempt where there's no retry to consume it.
|
|
3807
|
-
log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100) [LOW-SCORE-SPEC]`);
|
|
3808
|
-
task._escalateNext = 'LOW_SCORE_SPEC';
|
|
3809
|
-
}
|
|
3810
|
-
else {
|
|
3811
|
-
log(c.yellow, `\n↻ Task ${task.id} REJECTED on attempt ${attempt} (${review.score}/100)`);
|
|
3812
|
-
}
|
|
3813
|
-
safeResetLastCommit(task.id, task.agent, task.type, ' ');
|
|
3814
|
-
(0, code_failure_logger_1.logCodeFailure)({ taskId: task.id, sprintId: resolveActiveSprintId(), agentId: resolveAgentDid(task.agent), attemptNum: attempt, score: review?.score || 0, model: taskRun.model_used || result?.model || task.model || 'unknown', rejectionReason: review?.summary || 'supervisor rejected', issues: review?.issues || [], failType: 'supervisor_rejected' });
|
|
3815
|
-
// Sprint 701: AAR logging on REJECTION path (governance remediation)
|
|
3816
|
-
const _sprintIdRejected = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3817
|
-
aar_middleware_1.AARMiddleware.generateAndLog({ agentId: task.agent, taskId: task.id, sprintId: _sprintIdRejected, skillId: task.skill_id || task.type || 'code-generation', outcomeScore: review?.score || 0, actionSummary: `REJECTED: ${(task.title || task.id).substring(0, 120)} (attempt ${attempt})`, status: 'rejected' }).catch(() => { });
|
|
3818
|
-
(0, trust_score_updater_1.updateTrustScore)(task.agent, 'rejected', review?.score || 0); // Sprint 703: Dynamic trust update
|
|
3819
|
-
// Sprint 706: BrainX — store failure memory
|
|
3820
|
-
try {
|
|
3821
|
-
if (this._brainxBridge)
|
|
3822
|
-
await this._brainxBridge.storeTaskMemory({ agent_id: task.agent, task_id: task.id, task_title: task.title || task.id, outcome: 'failure', score: review?.score || 0, summary: `REJECTED: ${(task.title || task.id).substring(0, 180)}`, files_modified: [] });
|
|
3823
|
-
}
|
|
3824
|
-
catch { /* non-blocking */ }
|
|
3825
|
-
monotask_state_machine_1.MonotaskSM.release(task.agent, task.id, `rejected attempt ${attempt}`);
|
|
3826
|
-
(0, orchestrator_tap_1.tapAttempt)({
|
|
3827
|
-
sprint_id: kslSprintId, task_id: task.id, attempt, agent: task.agent,
|
|
3828
|
-
model: taskRun.model_used || result?.model || task.model || 'unknown',
|
|
3829
|
-
prompt: kslPrompt,
|
|
3830
|
-
reply: `[rejected score ${review?.score || 0}/100] ${review?.summary || 'supervisor rejected'}`,
|
|
3831
|
-
tools_used: [],
|
|
3832
|
-
errors: [{ kind: 'supervisor_rejected', message: review?.summary || 'supervisor rejected' }],
|
|
3833
|
-
cost_usd: 0,
|
|
3834
|
-
duration_ms: Date.now() - kslAttemptStart,
|
|
3835
|
-
});
|
|
3836
|
-
// CTO AUTO-DECOMPOSE: After N consecutive truncation rejections, split the task
|
|
3837
|
-
if (truncationCount >= TRUNCATION_THRESHOLD) {
|
|
3838
|
-
log(c.cyan, `\n🔧 TRUNCATION THRESHOLD REACHED — CTO decomposing ${task.id}...`);
|
|
3839
|
-
const subtasks = await this.ctoDecomposeTask(task);
|
|
3840
|
-
if (subtasks.length > 1) {
|
|
3841
|
-
log(c.cyan, ` Executing ${subtasks.length} sub-tasks sequentially...`);
|
|
3842
|
-
let allPassed = true;
|
|
3843
|
-
for (const subtask of subtasks) {
|
|
3844
|
-
// Check sub-task dependencies
|
|
3845
|
-
const subDeps = subtask.dependencies || [];
|
|
3846
|
-
const unmetSubDeps = subDeps.filter(d => {
|
|
3847
|
-
const depSt = subtasks.find(s => s.id === d);
|
|
3848
|
-
return depSt && depSt.status !== 'done';
|
|
3849
|
-
});
|
|
3850
|
-
if (unmetSubDeps.length > 0) {
|
|
3851
|
-
log(c.yellow, ` Skipping sub-task ${subtask.id}: unmet deps [${unmetSubDeps.join(', ')}]`);
|
|
3852
|
-
allPassed = false;
|
|
3853
|
-
continue;
|
|
3854
|
-
}
|
|
3855
|
-
const passed = await this.executeSubTask(subtask, 5);
|
|
3856
|
-
if (passed) {
|
|
3857
|
-
subtask.status = 'done';
|
|
3858
|
-
}
|
|
3859
|
-
else {
|
|
3860
|
-
allPassed = false;
|
|
3861
|
-
subtask.status = 'rejected';
|
|
3862
|
-
log(c.red, ` Sub-task ${subtask.id} failed — stopping decomposition chain`);
|
|
3863
|
-
break;
|
|
3864
|
-
}
|
|
3865
|
-
}
|
|
3866
|
-
if (allPassed) {
|
|
3867
|
-
task.status = 'done';
|
|
3868
|
-
log(c.green, `\n✓ Task ${task.id} COMPLETED via CTO decomposition (${subtasks.length} sub-tasks)`);
|
|
3869
|
-
taskRun.status = 'done';
|
|
3870
|
-
taskRun.files_written = subtasks.flatMap(st => st.deliverables.code || []);
|
|
3871
|
-
}
|
|
3872
|
-
else {
|
|
3873
|
-
task.status = 'rejected';
|
|
3874
|
-
log(c.red, `\n✗ Task ${task.id} FAILED even after CTO decomposition`);
|
|
3875
|
-
taskRun.status = 'rejected';
|
|
3876
|
-
taskRun.rejection_reason = review.summary;
|
|
3877
|
-
}
|
|
3878
|
-
this.persistTaskStatus(task);
|
|
3879
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3880
|
-
this.taskRuns.push(taskRun);
|
|
3881
|
-
return;
|
|
3882
|
-
}
|
|
3883
|
-
// If decomposition returned <=1 task, continue with normal retry loop
|
|
3884
|
-
log(c.yellow, ' Decomposition produced ≤1 sub-task, continuing normal retries...');
|
|
3885
|
-
truncationCount = 0; // Reset to avoid re-triggering
|
|
3886
|
-
}
|
|
3887
|
-
if (attempt < MAX_RETRIES) {
|
|
3888
|
-
log(c.yellow, ` Retrying with rejection feedback...`);
|
|
3889
|
-
}
|
|
3890
|
-
}
|
|
3891
|
-
task.status = 'rejected';
|
|
3892
|
-
this.persistTaskStatus(task);
|
|
3893
|
-
log(c.red, `\n✗ Task ${task.id} FAILED after ${MAX_RETRIES} attempts`);
|
|
3894
|
-
const _sprintIdFailed = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3895
|
-
(0, event_bus_publisher_1.publishTaskFailed)(task.agent, _sprintIdFailed, task.id, task.title || task.id, lastReview?.summary || `Failed after ${MAX_RETRIES} attempts`).catch(() => { });
|
|
3896
|
-
taskRun.status = 'rejected';
|
|
3897
|
-
taskRun.rejection_reason = lastReview?.summary || `Failed after ${MAX_RETRIES} attempts`;
|
|
3898
|
-
taskRun.duration_seconds = Math.round((Date.now() - taskRunStart) / 1000);
|
|
3899
|
-
this.taskRuns.push(taskRun);
|
|
3900
|
-
}
|
|
3901
|
-
finally {
|
|
3902
|
-
// OMEL AMD-13: Always wipe the phantom tmpdir on task exit (success or failure)
|
|
3903
|
-
phantom_workspace_1.phantomWorkspace.cleanup(phantomCtx);
|
|
3904
|
-
}
|
|
3905
|
-
}
|
|
3906
|
-
async run() {
|
|
3907
|
-
const startTime = Date.now();
|
|
3908
|
-
const sprintStartTime = new Date().toISOString();
|
|
3909
|
-
let gitHeadBefore = 'unknown';
|
|
3910
|
-
try {
|
|
3911
|
-
gitHeadBefore = (0, child_process_1.execSync)('git rev-parse --short HEAD', { timeout: 5000 }).toString().trim();
|
|
3912
|
-
}
|
|
3913
|
-
catch { /* ok */ }
|
|
3914
|
-
log(c.bold, '\n🚀 Starting orchestration run...\n');
|
|
3915
|
-
if (SOVEREIGN_MODE)
|
|
3916
|
-
log(c.yellow, ' ⚡ SOVEREIGN MODE — all inference local ($0 cost floor)');
|
|
3917
|
-
(0, wallet_state_1.logWalletStatus)();
|
|
3918
|
-
// Mission Control — connect and register this sprint run
|
|
3919
|
-
const mc = (0, mc_client_1.createMCClient)('sprint-orchestrator', 'worker');
|
|
3920
|
-
let mcConnected = false;
|
|
3921
|
-
try {
|
|
3922
|
-
await mc.connect();
|
|
3923
|
-
mcConnected = true;
|
|
3924
|
-
log(c.gray, ' [MC] Connected to Mission Control');
|
|
3925
|
-
}
|
|
3926
|
-
catch {
|
|
3927
|
-
log(c.gray, ' [MC] Mission Control unavailable — running without telemetry');
|
|
3928
|
-
}
|
|
3929
|
-
// 1. Load tasks
|
|
3930
|
-
this.loadTasks();
|
|
3931
|
-
// 069-06: emit sprint started event
|
|
3932
|
-
const _evtSprintId = (process.argv[2] || 'sprints/current.json').replace(/.*\//, '').replace('.json', '');
|
|
3933
|
-
// Sprint 706: BrainX swarm bridge — create at sprint start
|
|
3934
|
-
let brainxBridge = null;
|
|
3935
|
-
try {
|
|
3936
|
-
const agentIds = Array.from(new Set(this.tasks.map(t => t.agent)));
|
|
3937
|
-
brainxBridge = (0, brainx_swarm_bridge_1.createSwarmBridge)(`swarm-${Date.now()}`, _evtSprintId, agentIds);
|
|
3938
|
-
this._brainxBridge = brainxBridge;
|
|
3939
|
-
log(c.gray, ` [BrainX] Bridge created for ${agentIds.length} agents`);
|
|
3940
|
-
}
|
|
3941
|
-
catch (e) {
|
|
3942
|
-
log(c.gray, ` [BrainX] Bridge creation skipped: ${e.message}`);
|
|
3943
|
-
}
|
|
3944
|
-
(0, event_bus_publisher_1.publishSprintStarted)(_evtSprintId, this.tasks.filter(t => t.status === 'pending').length).catch(() => { });
|
|
3945
|
-
if (this.tasks.length === 0) {
|
|
3946
|
-
log(c.yellow, 'No tasks to execute');
|
|
3947
|
-
if (mcConnected)
|
|
3948
|
-
await mc.disconnect().catch(() => { });
|
|
3949
|
-
return;
|
|
3950
|
-
}
|
|
3951
|
-
// ── CTO APPROVAL GATE — Exec Protocol §17 ──────────────────────────────
|
|
3952
|
-
// Every autonomous sprint must be approved by the CTO agent before execution.
|
|
3953
|
-
// Human-submitted sprints (source: 'human') are auto-approved.
|
|
3954
|
-
// Prevents the swarm from inventing its own work outside the execution plan.
|
|
3955
|
-
{
|
|
3956
|
-
const sprintFile = process.argv[2] || 'sprints/current.json';
|
|
3957
|
-
const sprintRaw = JSON.parse((0, fs_1.readFileSync)(sprintFile, 'utf-8'));
|
|
3958
|
-
const sprintSource = sprintRaw.source || (sprintRaw.swarm === 'NOT USED' ? 'human' : 'autonomous_loop');
|
|
3959
|
-
const proposal = {
|
|
3960
|
-
sprint_id: _evtSprintId,
|
|
3961
|
-
title: sprintRaw.name || sprintRaw.title || _evtSprintId,
|
|
3962
|
-
description: sprintRaw.goal || sprintRaw.description || '',
|
|
3963
|
-
tasks: this.tasks.map(t => `${t.id}: ${t.title || t.context || t.type}`),
|
|
3964
|
-
estimated_complexity: sprintRaw.estimated_complexity || 'medium',
|
|
3965
|
-
source: sprintSource,
|
|
3966
|
-
// Sprint 1457 BUGFIX: pass Rule 3 contract fields from sprint JSON to CTO gate
|
|
3967
|
-
inputs: sprintRaw.inputs,
|
|
3968
|
-
outputs: sprintRaw.outputs,
|
|
3969
|
-
success_criteria: sprintRaw.success_criteria,
|
|
3970
|
-
};
|
|
3971
|
-
log(c.magenta, `\n--- CTO Approval Gate ---`);
|
|
3972
|
-
log(c.gray, ` Sprint: ${proposal.sprint_id} — "${proposal.title}"`);
|
|
3973
|
-
log(c.gray, ` Source: ${proposal.source} (${proposal.tasks.length} tasks)`);
|
|
3974
|
-
const ctoResult = await (0, cto_approval_gate_1.requestCTOApproval)(proposal, process.cwd(), 'kognai');
|
|
3975
|
-
if (!ctoResult.approved) {
|
|
3976
|
-
log(c.red, ` ✘ CTO REJECTED: ${ctoResult.reason}`);
|
|
3977
|
-
log(c.red, ` Plan reference: ${ctoResult.plan_reference}`);
|
|
3978
|
-
log(c.red, ` Confidence: ${ctoResult.cto_confidence}%`);
|
|
3979
|
-
log(c.yellow, ` Sprint ${_evtSprintId} will NOT execute. Saving rejection to sprint file.`);
|
|
3980
|
-
// Write rejection to sprint file so the loop doesn't retry
|
|
3981
|
-
try {
|
|
3982
|
-
sprintRaw.cto_gate = {
|
|
3983
|
-
approved: false,
|
|
3984
|
-
reason: ctoResult.reason,
|
|
3985
|
-
plan_reference: ctoResult.plan_reference,
|
|
3986
|
-
confidence: ctoResult.cto_confidence,
|
|
3987
|
-
timestamp: ctoResult.timestamp,
|
|
3988
|
-
};
|
|
3989
|
-
(0, fs_1.writeFileSync)(sprintFile, JSON.stringify(sprintRaw, null, 2));
|
|
3990
|
-
}
|
|
3991
|
-
catch { /* non-critical */ }
|
|
3992
|
-
if (mcConnected)
|
|
3993
|
-
await mc.disconnect().catch(() => { });
|
|
3994
|
-
return;
|
|
3995
|
-
}
|
|
3996
|
-
log(c.green, ` ✓ CTO APPROVED: ${ctoResult.reason}`);
|
|
3997
|
-
log(c.gray, ` Plan reference: ${ctoResult.plan_reference} (confidence: ${ctoResult.cto_confidence}%)`);
|
|
3998
|
-
}
|
|
3999
|
-
// ── End CTO Gate ────────────────────────────────────────────────────────
|
|
4000
|
-
// 2. CEO initial assessment (B.14: once per sprint, not once per conflict)
|
|
4001
|
-
log(c.magenta, '\n--- Phase 1: CEO Initial Assessment ---');
|
|
4002
|
-
await this.ceo.reviewSprintProgress(this.tasks);
|
|
4003
|
-
const _ceoIntentDone = true; // flag for Phase 5: only re-run if ≥2 rejected
|
|
4004
|
-
// 3. Execute coding tasks with review loop
|
|
4005
|
-
log(c.blue, '\n--- Phase 2: Sprint Execution ---');
|
|
4006
|
-
// Auto-cascade: if a task is rejected, immediately mark all downstream dependents as skipped
|
|
4007
|
-
// This prevents tasks from being stuck as 'pending' forever across retries
|
|
4008
|
-
let cascaded = true;
|
|
4009
|
-
while (cascaded) {
|
|
4010
|
-
cascaded = false;
|
|
4011
|
-
for (const task of this.tasks) {
|
|
4012
|
-
if (task.status !== 'pending')
|
|
4013
|
-
continue;
|
|
4014
|
-
const deps = task.dependencies || [];
|
|
4015
|
-
const blockedBy = deps.filter(d => {
|
|
4016
|
-
const depTask = this.tasks.find(t => t.id === d);
|
|
4017
|
-
return depTask && (depTask.status === 'rejected' || depTask.status === 'skipped');
|
|
4018
|
-
});
|
|
4019
|
-
if (blockedBy.length > 0) {
|
|
4020
|
-
task.status = 'skipped';
|
|
4021
|
-
task.skippedReason = `Blocked by: ${blockedBy.join(', ')}`;
|
|
4022
|
-
log(c.yellow, ` Auto-skipped ${task.id}: blocked by rejected/skipped deps [${blockedBy.join(', ')}]`);
|
|
4023
|
-
cascaded = true;
|
|
4024
|
-
}
|
|
4025
|
-
}
|
|
4026
|
-
}
|
|
4027
|
-
// B.16: Wave-based parallel fan-out
|
|
4028
|
-
// Each wave = all tasks whose dependencies are satisfied and that don't share output files.
|
|
4029
|
-
// Serialized only when deliverable paths overlap (file conflict detection).
|
|
4030
|
-
const remaining = this.tasks.filter(t => t.status === 'pending');
|
|
4031
|
-
while (remaining.length > 0) {
|
|
4032
|
-
// Find tasks whose dependencies are all done/skipped
|
|
4033
|
-
const ready = remaining.filter(task => {
|
|
4034
|
-
const deps = task.dependencies || [];
|
|
4035
|
-
return deps.every(d => {
|
|
4036
|
-
const dep = this.tasks.find(t => t.id === d);
|
|
4037
|
-
return !dep || dep.status === 'done' || dep.status === 'skipped';
|
|
4038
|
-
});
|
|
4039
|
-
});
|
|
4040
|
-
if (ready.length === 0)
|
|
4041
|
-
break; // dependency deadlock — bail
|
|
4042
|
-
// File conflict detection: build wave without overlapping deliverables
|
|
4043
|
-
const filesInWave = new Set();
|
|
4044
|
-
const wave = [];
|
|
4045
|
-
for (const task of ready) {
|
|
4046
|
-
const taskFiles = [
|
|
4047
|
-
...(task.deliverables?.code || []),
|
|
4048
|
-
...(task.deliverables?.tests || []),
|
|
4049
|
-
...(task.deliverables?.docs || []),
|
|
4050
|
-
];
|
|
4051
|
-
const hasConflict = taskFiles.some(f => filesInWave.has(f));
|
|
4052
|
-
if (!hasConflict) {
|
|
4053
|
-
wave.push(task);
|
|
4054
|
-
taskFiles.forEach(f => filesInWave.add(f));
|
|
4055
|
-
}
|
|
4056
|
-
// conflicting tasks stay in remaining for next wave
|
|
4057
|
-
}
|
|
4058
|
-
if (wave.length === 0)
|
|
4059
|
-
wave.push(ready[0]); // break deadlock: force one task
|
|
4060
|
-
// B.16: Split wave by task_target — local tasks serialized (Ollama can only run one at a time),
|
|
4061
|
-
// cloud tasks run concurrently. Prevents Ollama queue timeout on parallel fan-out.
|
|
4062
|
-
const localWave = wave.filter(t => t.task_target === 'local');
|
|
4063
|
-
const cloudWave = wave.filter(t => t.task_target !== 'local');
|
|
4064
|
-
// B.16-RL: Cloud concurrency cap — prevents burning the Claude 5h token budget.
|
|
4065
|
-
// Default: 1 (serial). Override: MAX_CLOUD_CONCURRENCY env var.
|
|
4066
|
-
const MAX_CLOUD_CONCURRENCY = parseInt(process.env.MAX_CLOUD_CONCURRENCY ?? '1', 10);
|
|
4067
|
-
if (cloudWave.length > 1 && MAX_CLOUD_CONCURRENCY > 1) {
|
|
4068
|
-
log(c.blue, ` [B.16] Parallel fan-out: ${cloudWave.length} cloud tasks (cap: ${MAX_CLOUD_CONCURRENCY})`);
|
|
4069
|
-
}
|
|
4070
|
-
else if (cloudWave.length > 1) {
|
|
4071
|
-
log(c.blue, ` [B.16-RL] Serial cloud execution: ${cloudWave.length} tasks (MAX_CLOUD_CONCURRENCY=1)`);
|
|
4072
|
-
}
|
|
4073
|
-
if (localWave.length > 1) {
|
|
4074
|
-
log(c.blue, ` [B.16] Sequential execution: ${localWave.length} local tasks (Ollama serialized)`);
|
|
4075
|
-
}
|
|
4076
|
-
else if (localWave.length === 1 && cloudWave.length === 0) {
|
|
4077
|
-
// single task, no label needed
|
|
4078
|
-
}
|
|
4079
|
-
// Execute cloud tasks in batches of MAX_CLOUD_CONCURRENCY
|
|
4080
|
-
for (let i = 0; i < cloudWave.length; i += MAX_CLOUD_CONCURRENCY) {
|
|
4081
|
-
const batch = cloudWave.slice(i, i + MAX_CLOUD_CONCURRENCY);
|
|
4082
|
-
await Promise.all(batch.map(t => this.executeTask(t)));
|
|
4083
|
-
}
|
|
4084
|
-
for (const t of localWave) {
|
|
4085
|
-
await this.executeTask(t);
|
|
4086
|
-
}
|
|
4087
|
-
// Remove executed tasks from remaining
|
|
4088
|
-
for (const t of wave) {
|
|
4089
|
-
const idx = remaining.indexOf(t);
|
|
4090
|
-
if (idx >= 0)
|
|
4091
|
-
remaining.splice(idx, 1);
|
|
4092
|
-
}
|
|
4093
|
-
// Cascade rejections
|
|
4094
|
-
cascaded = true;
|
|
4095
|
-
while (cascaded) {
|
|
4096
|
-
cascaded = false;
|
|
4097
|
-
for (const t of this.tasks) {
|
|
4098
|
-
if (t.status !== 'pending')
|
|
4099
|
-
continue;
|
|
4100
|
-
const tDeps = t.dependencies || [];
|
|
4101
|
-
const tBlocked = tDeps.filter(d => {
|
|
4102
|
-
const depTask = this.tasks.find(x => x.id === d);
|
|
4103
|
-
return depTask && (depTask.status === 'rejected' || depTask.status === 'skipped');
|
|
4104
|
-
});
|
|
4105
|
-
if (tBlocked.length > 0) {
|
|
4106
|
-
t.status = 'skipped';
|
|
4107
|
-
t.skippedReason = `Blocked by: ${tBlocked.join(', ')}`;
|
|
4108
|
-
log(c.yellow, ` Auto-skipped ${t.id}: blocked by deps [${tBlocked.join(', ')}]`);
|
|
4109
|
-
cascaded = true;
|
|
4110
|
-
}
|
|
4111
|
-
}
|
|
4112
|
-
}
|
|
4113
|
-
// Remove newly-skipped/rejected from remaining
|
|
4114
|
-
for (let i = remaining.length - 1; i >= 0; i--) {
|
|
4115
|
-
if (remaining[i].status === 'skipped' || remaining[i].status === 'rejected') {
|
|
4116
|
-
remaining.splice(i, 1);
|
|
4117
|
-
}
|
|
4118
|
-
}
|
|
4119
|
-
}
|
|
4120
|
-
// 4. CTO data-driven analysis + CMO reports
|
|
4121
|
-
log(c.cyan, '\n--- Phase 3: CTO Data-Driven Analysis + CMO Reports ---');
|
|
4122
|
-
let ctoReport = { summary: '', proposals: [], metrics_reviewed: [] };
|
|
4123
|
-
let ctoDecisions = '';
|
|
4124
|
-
let cmoReports = '';
|
|
4125
|
-
try {
|
|
4126
|
-
ctoReport = await this.cto.analyze();
|
|
4127
|
-
// Load CMO reports (produced independently by Manus AI runner)
|
|
4128
|
-
cmoReports = loadCMOReports();
|
|
4129
|
-
if (cmoReports) {
|
|
4130
|
-
log(c.magenta, ' CMO reports found — will include in CEO review');
|
|
4131
|
-
}
|
|
4132
|
-
else {
|
|
4133
|
-
log(c.gray, ' No CMO reports available yet');
|
|
4134
|
-
}
|
|
4135
|
-
// Load CTO tech-watch reports (produced independently by run-cto-techwatch.ts)
|
|
4136
|
-
const ctoTechWatch = loadCTOTechWatchReports();
|
|
4137
|
-
if (ctoTechWatch) {
|
|
4138
|
-
log(c.cyan, ' CTO tech-watch reports found — will include in CEO review');
|
|
4139
|
-
cmoReports = cmoReports ? cmoReports + '\n\n' + ctoTechWatch : ctoTechWatch;
|
|
4140
|
-
}
|
|
4141
|
-
else {
|
|
4142
|
-
log(c.gray, ' No CTO tech-watch reports available yet');
|
|
4143
|
-
}
|
|
4144
|
-
// Load Grok intelligence feed (Grok AI monitors X/Twitter for OpenClaw news)
|
|
4145
|
-
const grokFeed = loadGrokFeed();
|
|
4146
|
-
if (grokFeed) {
|
|
4147
|
-
log(c.magenta, ' Grok intelligence feed found — will include in CEO review');
|
|
4148
|
-
cmoReports = cmoReports ? cmoReports + '\n\n' + grokFeed : grokFeed;
|
|
4149
|
-
}
|
|
4150
|
-
else {
|
|
4151
|
-
log(c.gray, ' No Grok feed reports available');
|
|
4152
|
-
}
|
|
4153
|
-
// Load Owner Directives (highest priority — always included)
|
|
4154
|
-
const ownerDirectives = loadOwnerDirectives();
|
|
4155
|
-
if (ownerDirectives) {
|
|
4156
|
-
log(c.magenta, " Owner directives found — will include in CEO review (highest priority)");
|
|
4157
|
-
cmoReports = ownerDirectives + (cmoReports ? "\n\n" + cmoReports : "");
|
|
4158
|
-
}
|
|
4159
|
-
// 5. CEO reviews CTO proposals + CMO reports
|
|
4160
|
-
if (ctoReport.proposals.length > 0 || cmoReports) {
|
|
4161
|
-
log(c.magenta, '\n--- Phase 4: CEO Reviews CTO Proposals + CMO Reports ---');
|
|
4162
|
-
ctoDecisions = await this.ceo.reviewCTOProposals(ctoReport);
|
|
4163
|
-
// Persist CEO decisions for CTO feedback loop + approved proposals tracking
|
|
4164
|
-
persistCEODecisions(ctoDecisions, ctoReport);
|
|
4165
|
-
// Handle approved new_agent proposals
|
|
4166
|
-
const agentCreator = new AgentCreator();
|
|
4167
|
-
for (const proposal of ctoReport.proposals) {
|
|
4168
|
-
if (proposal.category === 'new_agent' && proposal.agent_spec) {
|
|
4169
|
-
// Check if CEO approved this specific proposal
|
|
4170
|
-
if (ctoDecisions.includes(proposal.id) && ctoDecisions.toUpperCase().includes('APPROVED')) {
|
|
4171
|
-
log(c.green, `\n 🤖 CEO approved new agent: ${proposal.agent_spec.name}`);
|
|
4172
|
-
agentCreator.createAgent(proposal.agent_spec);
|
|
4173
|
-
log(c.green, ` Agent will be loaded on next orchestrator run.`);
|
|
4174
|
-
}
|
|
4175
|
-
else {
|
|
4176
|
-
log(c.yellow, ` CEO did not approve agent: ${proposal.agent_spec.name}`);
|
|
4177
|
-
}
|
|
4178
|
-
}
|
|
4179
|
-
}
|
|
4180
|
-
}
|
|
4181
|
-
else {
|
|
4182
|
-
log(c.cyan, ' No proposals from CTO — stack is current and optimized');
|
|
4183
|
-
ctoDecisions = 'No proposals to review.';
|
|
4184
|
-
}
|
|
4185
|
-
}
|
|
4186
|
-
catch (error) {
|
|
4187
|
-
log(c.yellow, ` CTO/CEO review cycle skipped: ${error.message}`);
|
|
4188
|
-
ctoDecisions = 'CTO analysis was not performed this run.';
|
|
4189
|
-
}
|
|
4190
|
-
// 6. CEO final assessment — B.14: only runs if ≥2 tasks rejected (skips if sprint went well)
|
|
4191
|
-
const rejectedCount = this.tasks.filter(t => t.status === 'rejected').length;
|
|
4192
|
-
log(c.magenta, '\n--- Phase 5: CEO Final Assessment ---');
|
|
4193
|
-
if (rejectedCount >= 2) {
|
|
4194
|
-
log(c.magenta, ` ${rejectedCount} tasks rejected — CEO reviewing...`);
|
|
4195
|
-
await this.ceo.reviewSprintProgress(this.tasks);
|
|
4196
|
-
}
|
|
4197
|
-
else {
|
|
4198
|
-
log(c.gray, ` Only ${rejectedCount} rejected — skipping CEO reassessment (sprint OK)`);
|
|
4199
|
-
}
|
|
4200
|
-
(0, wallet_state_1.logWalletStatus)(); // Print wallet burn after sprint execution
|
|
4201
|
-
// 069-06: emit budget events if thresholds crossed
|
|
4202
|
-
try {
|
|
4203
|
-
const _ws = (0, wallet_state_1.getWalletState)();
|
|
4204
|
-
if (_ws.burnPct >= 95)
|
|
4205
|
-
(0, event_bus_publisher_1.publishBudgetFreeze)(_evtSprintId, _ws.burnPct).catch(() => { });
|
|
4206
|
-
else if (_ws.burnPct >= 80)
|
|
4207
|
-
(0, event_bus_publisher_1.publishBudgetWarning)(_evtSprintId, _ws.burnPct, _ws.spentThisMonth, _ws.monthlyBudget).catch(() => { });
|
|
4208
|
-
}
|
|
4209
|
-
catch { /* wallet state unavailable */ }
|
|
4210
|
-
// 6b. CTO autonomous post-sprint analysis (runs after EVERY sprint)
|
|
4211
|
-
log(c.cyan, '\n--- Phase 5b: CTO Post-Sprint Analysis (Autonomous) ---');
|
|
4212
|
-
let postSprintReport = '';
|
|
4213
|
-
try {
|
|
4214
|
-
postSprintReport = await this.cto.postSprintAnalysis(this.tasks, this.stats);
|
|
4215
|
-
log(c.cyan, ' Post-sprint analysis saved to reports/cto/');
|
|
4216
|
-
// Extract proposals from post-sprint analysis and feed into CEO review
|
|
4217
|
-
const jsonMatch = postSprintReport.match(/```json\s*([\s\S]*?)```/);
|
|
4218
|
-
if (jsonMatch) {
|
|
4219
|
-
try {
|
|
4220
|
-
const parsed = JSON.parse(jsonMatch[1].trim());
|
|
4221
|
-
const postSprintProposals = parsed.proposals || [];
|
|
4222
|
-
if (postSprintProposals.length > 0) {
|
|
4223
|
-
log(c.cyan, ` Found ${postSprintProposals.length} proposals — sending to CEO for autonomous review`);
|
|
4224
|
-
// Build CTOReport for CEO review
|
|
4225
|
-
const postSprintCTOReport = {
|
|
4226
|
-
summary: parsed.summary || 'Post-sprint analysis proposals',
|
|
4227
|
-
proposals: postSprintProposals.map((p) => ({
|
|
4228
|
-
id: p.id,
|
|
4229
|
-
title: p.title,
|
|
4230
|
-
category: p.category,
|
|
4231
|
-
description: p.description,
|
|
4232
|
-
estimated_impact: p.estimated_impact || '',
|
|
4233
|
-
risk_level: p.risk_level || 'medium',
|
|
4234
|
-
implementation_steps: p.implementation_steps || [],
|
|
4235
|
-
})),
|
|
4236
|
-
metrics_reviewed: ['sprint_results', 'failure_patterns', 'trend_analysis'],
|
|
4237
|
-
};
|
|
4238
|
-
// Phase 5c: CEO autonomously reviews post-sprint proposals
|
|
4239
|
-
log(c.magenta, '\n--- Phase 5c: CEO Reviews Post-Sprint Proposals (Autonomous) ---');
|
|
4240
|
-
const postSprintDecisions = await this.ceo.reviewCTOProposals(postSprintCTOReport);
|
|
4241
|
-
// Persist CEO decisions + update approved-proposals tracker
|
|
4242
|
-
persistCEODecisions(postSprintDecisions, postSprintCTOReport);
|
|
4243
|
-
log(c.magenta, ' CEO post-sprint proposal review complete');
|
|
4244
|
-
}
|
|
4245
|
-
else {
|
|
4246
|
-
log(c.cyan, ' No proposals in post-sprint analysis');
|
|
4247
|
-
}
|
|
4248
|
-
}
|
|
4249
|
-
catch (parseErr) {
|
|
4250
|
-
log(c.yellow, ` Could not parse post-sprint proposals JSON: ${parseErr.message}`);
|
|
4251
|
-
}
|
|
4252
|
-
}
|
|
4253
|
-
}
|
|
4254
|
-
catch (error) {
|
|
4255
|
-
log(c.yellow, ` Post-sprint analysis skipped: ${error.message}`);
|
|
4256
|
-
}
|
|
4257
|
-
// 7. CEO generates daily report
|
|
4258
|
-
log(c.magenta, '\n--- Phase 6: Daily Report Generation ---');
|
|
4259
|
-
const ctoReportStr = `Summary: ${ctoReport.summary}\nProposals: ${ctoReport.proposals.length}\n${ctoReport.proposals.map(p => `- [${p.category}] ${p.title} (${p.risk_level})`).join('\n')}`;
|
|
4260
|
-
const grokSection = loadGrokFeed();
|
|
4261
|
-
const cmoSection = cmoReports
|
|
4262
|
-
? '\n\n## CMO Activity (Manus AI)\n' + cmoReports.substring(0, 2000)
|
|
4263
|
-
: '\n\nCMO: No reports available this cycle.';
|
|
4264
|
-
const grokForReport = grokSection ? '\n\n## Grok Intelligence Feed\nGrok AI reports available — included in CTO/CEO analysis.' : '\n\nGrok: No feed reports this cycle.';
|
|
4265
|
-
const postSprintSection = postSprintReport ? '\n\n## CTO Post-Sprint Analysis\n' + postSprintReport.substring(0, 2000) : '';
|
|
4266
|
-
await this.ceo.generateDailyReport(this.tasks, this.stats, ctoReportStr + cmoSection + grokForReport + postSprintSection, ctoDecisions);
|
|
4267
|
-
// 8. Save updated sprint state
|
|
4268
|
-
const sprintFile = process.argv[2] || 'sprints/current.json';
|
|
4269
|
-
(0, fs_1.writeFileSync)(sprintFile, JSON.stringify({ tasks: this.tasks }, null, 2));
|
|
4270
|
-
log(c.green, `\nSprint state saved to ${sprintFile}`);
|
|
4271
|
-
// Sprint 706: BrainX — close bridge at sprint end
|
|
4272
|
-
try {
|
|
4273
|
-
if (brainxBridge)
|
|
4274
|
-
await brainxBridge.close();
|
|
4275
|
-
log(c.gray, ' [BrainX] Bridge closed');
|
|
4276
|
-
}
|
|
4277
|
-
catch { /* non-blocking */ }
|
|
4278
|
-
// 069-06: emit sprint completed event
|
|
4279
|
-
const completedCount = this.tasks.filter(t => t.status === 'done').length;
|
|
4280
|
-
(0, event_bus_publisher_1.publishSprintCompleted)(_evtSprintId, this.tasks.length, completedCount).catch(() => { });
|
|
4281
|
-
// 8b. Sync global token count into stats
|
|
4282
|
-
this.stats.totalTokens = _globalTokensThisRun;
|
|
4283
|
-
// 8c. Generate structured swarm run report
|
|
4284
|
-
try {
|
|
4285
|
-
(0, fs_1.mkdirSync)('reports/swarm-runs', { recursive: true });
|
|
4286
|
-
(0, fs_1.mkdirSync)('logs/swarm-runs', { recursive: true });
|
|
4287
|
-
let gitHeadAfter = 'unknown';
|
|
4288
|
-
let gitBranch = 'unknown';
|
|
4289
|
-
try {
|
|
4290
|
-
gitHeadAfter = (0, child_process_1.execSync)('git rev-parse --short HEAD', { timeout: 5000 }).toString().trim();
|
|
4291
|
-
gitBranch = (0, child_process_1.execSync)('git rev-parse --abbrev-ref HEAD', { timeout: 5000 }).toString().trim();
|
|
4292
|
-
}
|
|
4293
|
-
catch { /* ok */ }
|
|
4294
|
-
// sprint-1566 F0e + F3: read from the real per-call aggregator (filled
|
|
4295
|
-
// by recordModelCall after every callLLM) instead of the prior pattern
|
|
4296
|
-
// that read taskRun.model_used (always 'unknown') and used a stale 5-row
|
|
4297
|
-
// pricing dict. modelUsage now has provider/calls/input/output/tokens/
|
|
4298
|
-
// cost_usd per model with real per-call cost from llm-cost-table.
|
|
4299
|
-
const modelUsage = getModelsUsedReport();
|
|
4300
|
-
const totalCostUsd = getTotalCostUsd();
|
|
4301
|
-
const runReport = {
|
|
4302
|
-
schema_version: '2.0.0', // CTO-20260528-001: bumped when capturing grade + score_rationale per-task
|
|
4303
|
-
run_id: (0, crypto_1.randomUUID)(),
|
|
4304
|
-
project: 'kognai',
|
|
4305
|
-
sprint_file: sprintFile,
|
|
4306
|
-
started_at: sprintStartTime,
|
|
4307
|
-
finished_at: new Date().toISOString(),
|
|
4308
|
-
duration_seconds: Math.round((Date.now() - startTime) / 1000),
|
|
4309
|
-
git_branch: gitBranch,
|
|
4310
|
-
git_head_before: gitHeadBefore,
|
|
4311
|
-
git_head_after: gitHeadAfter,
|
|
4312
|
-
sovereign_mode: SOVEREIGN_MODE,
|
|
4313
|
-
summary: {
|
|
4314
|
-
total_tasks: this.tasks.length,
|
|
4315
|
-
done: this.tasks.filter(t => t.status === 'done').length,
|
|
4316
|
-
rejected: this.tasks.filter(t => t.status === 'rejected').length,
|
|
4317
|
-
skipped: this.tasks.filter(t => t.status === 'skipped').length,
|
|
4318
|
-
approval_rate: +(this.stats.approved / Math.max(this.stats.tasksExecuted, 1)).toFixed(2),
|
|
4319
|
-
total_tokens: this.stats.totalTokens,
|
|
4320
|
-
supervisor_conflicts: this.stats.conflicts,
|
|
4321
|
-
ceo_escalations: this.stats.escalations,
|
|
4322
|
-
},
|
|
4323
|
-
models_used: modelUsage,
|
|
4324
|
-
total_cost_usd: +totalCostUsd.toFixed(4),
|
|
4325
|
-
tasks: this.taskRuns,
|
|
4326
|
-
};
|
|
4327
|
-
// 1. Timestamped individual report (never overwritten)
|
|
4328
|
-
const ts = new Date().toISOString().replace(/[:.]/g, '-').slice(0, 19);
|
|
4329
|
-
const reportPath = `reports/swarm-runs/${ts}.json`;
|
|
4330
|
-
(0, fs_1.writeFileSync)(reportPath, JSON.stringify(runReport, null, 2));
|
|
4331
|
-
// 2. Latest pointer (for quick dashboard access)
|
|
4332
|
-
(0, fs_1.writeFileSync)('reports/swarm-runs/latest-run.json', JSON.stringify(runReport, null, 2));
|
|
4333
|
-
// 3. Daily aggregate (accumulates ALL runs for the day)
|
|
4334
|
-
const today = new Date().toISOString().slice(0, 10);
|
|
4335
|
-
const dailyPath = `reports/swarm-runs/daily-${today}.json`;
|
|
4336
|
-
let dailyRuns = [];
|
|
4337
|
-
try {
|
|
4338
|
-
dailyRuns = JSON.parse((0, fs_1.readFileSync)(dailyPath, 'utf-8'));
|
|
4339
|
-
}
|
|
4340
|
-
catch { /* first run today */ }
|
|
4341
|
-
dailyRuns.push(runReport);
|
|
4342
|
-
(0, fs_1.writeFileSync)(dailyPath, JSON.stringify(dailyRuns, null, 2));
|
|
4343
|
-
log(c.green, `\n📊 Swarm run report: ${reportPath}`);
|
|
4344
|
-
log(c.green, ` Daily aggregate: ${dailyPath} (${dailyRuns.length} run(s) today)`);
|
|
4345
|
-
log(c.green, ` Tokens: ${this.stats.totalTokens.toLocaleString()} | Est. cost: $${totalCostUsd.toFixed(4)}`);
|
|
4346
|
-
// 8d. Daily cost digest — persist ClawRouter spend summary (§17.5)
|
|
4347
|
-
try {
|
|
4348
|
-
const digest = getDailyCostDigest();
|
|
4349
|
-
const digestPath = `logs/clawrouter/digest-${today}.json`;
|
|
4350
|
-
(0, fs_1.writeFileSync)(digestPath, JSON.stringify(digest, null, 2));
|
|
4351
|
-
log(c.cyan, ` 💰 Cost digest: $${digest.total_usd.toFixed(4)} across ${digest.call_count} calls (saved ${digest.tokens_saved_by_qcg} tokens via QCG)`);
|
|
4352
|
-
}
|
|
4353
|
-
catch { /* non-critical */ }
|
|
4354
|
-
}
|
|
4355
|
-
catch (err) {
|
|
4356
|
-
log(c.yellow, ` [WARN] Swarm run report failed: ${err.message}`);
|
|
4357
|
-
}
|
|
4358
|
-
// 9. Post-sprint: PM2 reload backend + smoke test
|
|
4359
|
-
await postSprintSmokeTest();
|
|
4360
|
-
// Final summary
|
|
4361
|
-
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
4362
|
-
log(c.bold, '\n╔══════════════════════════════════════════════════════════╗');
|
|
4363
|
-
log(c.bold, '║ Orchestration Complete ║');
|
|
4364
|
-
log(c.bold, '╚══════════════════════════════════════════════════════════╝');
|
|
4365
|
-
log(c.green, ` Tasks executed: ${this.stats.tasksExecuted}`);
|
|
4366
|
-
log(c.green, ` Approved: ${this.stats.approved}`);
|
|
4367
|
-
log(c.red, ` Rejected: ${this.stats.rejected}`);
|
|
4368
|
-
log(c.yellow, ` Supervisor conflicts: ${this.stats.conflicts}`);
|
|
4369
|
-
log(c.magenta, ` CEO escalations: ${this.stats.escalations}`);
|
|
4370
|
-
log(c.cyan, ` CTO proposals: ${ctoReport.proposals.length}`);
|
|
4371
|
-
log(c.magenta, ` CMO reports: ${cmoReports ? 'loaded' : 'none'}`);
|
|
4372
|
-
log(c.blue, ` Total time: ${elapsed}s`);
|
|
4373
|
-
log(c.gray, ` Pipeline: CEO → MiniMax code → Dual review (DeepSeek + Haiku) → CEO resolves conflicts → CTO → CMO/Grok → Post-sprint analysis → Daily report`);
|
|
4374
|
-
// Mission Control — report final stats and disconnect
|
|
4375
|
-
if (mcConnected) {
|
|
4376
|
-
try {
|
|
4377
|
-
if (this.stats.totalTokens > 0) {
|
|
4378
|
-
await mc.reportTokens('MiniMax-M2.5', this.stats.totalTokens, 0, 'sprint_run');
|
|
4379
|
-
}
|
|
4380
|
-
await mc.disconnect();
|
|
4381
|
-
log(c.gray, ` [MC] Sprint reported: ${this.stats.approved} approved / ${this.stats.rejected} rejected / ${this.stats.totalTokens} tokens`);
|
|
4382
|
-
}
|
|
4383
|
-
catch { /* non-critical */ }
|
|
4384
|
-
}
|
|
4385
|
-
}
|
|
4386
|
-
}
|
|
4387
|
-
// ===== Post-sprint smoke test =====
|
|
66
|
+
const engine_primitives_1 = require("./engine-primitives");
|
|
67
|
+
Object.defineProperty(exports, "callLLM", { enumerable: true, get: function () { return engine_primitives_1.callLLM; } });
|
|
68
|
+
Object.defineProperty(exports, "localQAGate", { enumerable: true, get: function () { return engine_primitives_1.localQAGate; } });
|
|
69
|
+
Object.defineProperty(exports, "c", { enumerable: true, get: function () { return engine_primitives_1.c; } });
|
|
70
|
+
Object.defineProperty(exports, "log", { enumerable: true, get: function () { return engine_primitives_1.log; } });
|
|
71
|
+
Object.defineProperty(exports, "routeCall", { enumerable: true, get: function () { return engine_primitives_1.routeCall; } });
|
|
72
|
+
Object.defineProperty(exports, "callAnthropicCached", { enumerable: true, get: function () { return engine_primitives_1.callAnthropicCached; } });
|
|
73
|
+
Object.defineProperty(exports, "compressContext", { enumerable: true, get: function () { return engine_primitives_1.compressContext; } });
|
|
74
|
+
Object.defineProperty(exports, "normalizeReview", { enumerable: true, get: function () { return engine_primitives_1.normalizeReview; } });
|
|
75
|
+
var engine_helpers_1 = require("./engine-helpers");
|
|
76
|
+
Object.defineProperty(exports, "persistCEODecisions", { enumerable: true, get: function () { return engine_helpers_1.persistCEODecisions; } });
|
|
77
|
+
Object.defineProperty(exports, "resolveActiveSprintId", { enumerable: true, get: function () { return engine_helpers_1.resolveActiveSprintId; } });
|
|
78
|
+
Object.defineProperty(exports, "resolveAgentDid", { enumerable: true, get: function () { return engine_helpers_1.resolveAgentDid; } });
|
|
79
|
+
Object.defineProperty(exports, "recordAgentScore", { enumerable: true, get: function () { return engine_helpers_1.recordAgentScore; } });
|
|
80
|
+
Object.defineProperty(exports, "assessTaskComplexity", { enumerable: true, get: function () { return engine_helpers_1.assessTaskComplexity; } });
|
|
81
|
+
const engine_orchestrator_1 = require("./engine-orchestrator");
|
|
4388
82
|
async function httpGet(url, timeoutMs = 8000) {
|
|
4389
83
|
return new Promise((resolve) => {
|
|
4390
84
|
const parsed = new URL(url);
|
|
@@ -4404,7 +98,7 @@ async function sendTelegramAlert(message) {
|
|
|
4404
98
|
if (!botToken || !chatId)
|
|
4405
99
|
return;
|
|
4406
100
|
try {
|
|
4407
|
-
await httpPost('https://api.telegram.org/bot' + botToken + '/sendMessage', {
|
|
101
|
+
await (0, engine_primitives_1.httpPost)('https://api.telegram.org/bot' + botToken + '/sendMessage', {
|
|
4408
102
|
'Content-Type': 'application/json',
|
|
4409
103
|
}, JSON.stringify({ chat_id: chatId, text: message, parse_mode: 'Markdown' }), 10000);
|
|
4410
104
|
}
|
|
@@ -4414,22 +108,21 @@ async function postSprintSmokeTest() {
|
|
|
4414
108
|
// Disabled — Invoica-specific endpoints (health/invoices/settlements) not applicable to Kognai
|
|
4415
109
|
// Removed Sprint 205: was always returning HTTP 404 + flooding Telegram with false alerts
|
|
4416
110
|
}
|
|
4417
|
-
|
|
4418
|
-
async function main() {
|
|
111
|
+
async function main(config = {}) {
|
|
4419
112
|
// S67-005: Startup env check (OMEL AMD-13: via CredentialVault — hasSecret never logs value)
|
|
4420
113
|
if (!credential_vault_1.credentialVault.hasSecret('ANTHROPIC_API_KEY', 'orchestrator')) {
|
|
4421
|
-
log(c.yellow, '⚠ ANTHROPIC_API_KEY not set — Anthropic CEO + Sup2 Haiku will be unavailable.');
|
|
4422
|
-
log(c.yellow, ' ClawRouter/DeepSeek will be the sole reviewer (mono-supervision). Set ANTHROPIC_API_KEY in .env for full dual-supervisor mode.');
|
|
114
|
+
(0, engine_primitives_1.log)(engine_primitives_1.c.yellow, '⚠ ANTHROPIC_API_KEY not set — Anthropic CEO + Sup2 Haiku will be unavailable.');
|
|
115
|
+
(0, engine_primitives_1.log)(engine_primitives_1.c.yellow, ' ClawRouter/DeepSeek will be the sole reviewer (mono-supervision). Set ANTHROPIC_API_KEY in .env for full dual-supervisor mode.');
|
|
4423
116
|
}
|
|
4424
117
|
if (!credential_vault_1.credentialVault.hasSecret('MINIMAX_API_KEY', 'orchestrator')) {
|
|
4425
|
-
log(c.yellow, '⚠ MINIMAX_API_KEY not set — cloud-code tasks will fail.');
|
|
118
|
+
(0, engine_primitives_1.log)(engine_primitives_1.c.yellow, '⚠ MINIMAX_API_KEY not set — cloud-code tasks will fail.');
|
|
4426
119
|
}
|
|
4427
120
|
try {
|
|
4428
|
-
const orchestrator = new Orchestrator();
|
|
121
|
+
const orchestrator = new engine_orchestrator_1.Orchestrator(config.spawnGate);
|
|
4429
122
|
await orchestrator.run();
|
|
4430
123
|
}
|
|
4431
124
|
catch (error) {
|
|
4432
|
-
log(c.red, `\n✗ Fatal error: ${error.message}`);
|
|
125
|
+
(0, engine_primitives_1.log)(engine_primitives_1.c.red, `\n✗ Fatal error: ${error.message}`);
|
|
4433
126
|
console.error(error.stack);
|
|
4434
127
|
process.exit(1);
|
|
4435
128
|
}
|