@kognai/orchestrator-core 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,748 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.c = exports.SOVEREIGN_MODE = exports.getDailyCostDigest = exports.routeCall = exports._globalTokensThisRun = void 0;
37
+ exports.recordModelCall = recordModelCall;
38
+ exports.getModelsUsedReport = getModelsUsedReport;
39
+ exports.getTotalCostUsd = getTotalCostUsd;
40
+ exports.recordScoreForCitizen = recordScoreForCitizen;
41
+ exports.normalizeReview = normalizeReview;
42
+ exports.log = log;
43
+ exports.safeResetLastCommit = safeResetLastCommit;
44
+ exports.callLLM = callLLM;
45
+ exports.callAnthropicCached = callAnthropicCached;
46
+ exports.compressContext = compressContext;
47
+ exports.localQAGate = localQAGate;
48
+ exports.httpPost = httpPost;
49
+ const child_process_1 = require("child_process");
50
+ const https = __importStar(require("https"));
51
+ const http = __importStar(require("http"));
52
+ // ===== Module-level token accumulator =====
53
+ // Captures ALL LLM tokens across every agent, supervisor, CEO, CTO call this run.
54
+ exports._globalTokensThisRun = 0;
55
+ function _accumulateTokens(n) { exports._globalTokensThisRun += n; }
56
+ const _modelsUsedThisRun = new Map();
57
+ let _totalCostThisRun = 0;
58
+ function recordModelCall(provider, model, input_tokens, output_tokens, cost_usd) {
59
+ const key = model || 'unknown';
60
+ const existing = _modelsUsedThisRun.get(key);
61
+ if (existing) {
62
+ existing.calls += 1;
63
+ existing.input_tokens += input_tokens || 0;
64
+ existing.output_tokens += output_tokens || 0;
65
+ existing.cost_usd += cost_usd || 0;
66
+ }
67
+ else {
68
+ _modelsUsedThisRun.set(key, { calls: 1, input_tokens: input_tokens || 0, output_tokens: output_tokens || 0, cost_usd: cost_usd || 0, provider: provider || 'unknown' });
69
+ }
70
+ _totalCostThisRun += cost_usd || 0;
71
+ }
72
+ function getModelsUsedReport() {
73
+ const out = {};
74
+ for (const [model, e] of _modelsUsedThisRun.entries()) {
75
+ out[model] = { provider: e.provider, calls: e.calls, input_tokens: e.input_tokens, output_tokens: e.output_tokens, tokens: e.input_tokens + e.output_tokens, cost_usd: +e.cost_usd.toFixed(6) };
76
+ }
77
+ return out;
78
+ }
79
+ function getTotalCostUsd() { return _totalCostThisRun; }
80
+ // ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17)
81
+ // TICKET-215 Wave C: every LLM call routes through the core ModelRouter SEAM.
82
+ // The viem-backed router now lives in its OWN package (@kognai/clawrouter-x402);
83
+ // Kognai injects it into the zero-dep core slot at boot, so core never carries
84
+ // viem and a product that doesn't route on-chain simply doesn't inject it.
85
+ // TICKET-215 Wave D (split step 1): Kognai-local injection of the viem router +
86
+ // SCORE scorer into the core seams lives in one bootstrap module (side-effect import).
87
+ const model_router_registry_1 = require("./model-router-registry");
88
+ // Seam-backed bindings — existing call sites resolve through the injected router.
89
+ const routeCall = (req) => (0, model_router_registry_1.getModelRouter)().routeCall(req);
90
+ exports.routeCall = routeCall;
91
+ const getDailyCostDigest = () => (0, model_router_registry_1.getModelRouter)().getDailyCostDigest();
92
+ exports.getDailyCostDigest = getDailyCostDigest;
93
+ const citizenship_1 = require("./citizenship");
94
+ // TICKET-215 Wave D: citizen scoring via the core seam. The SCORE-backed scorer
95
+ // stays Kognai-local (out of zero-dep core) and is injected here; scoring goes
96
+ // through recordTaskScoreMonitored so every score emits a data.citizen_score event
97
+ // to the event-bus (kognai_events) — flowing back to Kognai monitoring + the Plumber.
98
+ const citizen_score_registry_1 = require("./citizen-score-registry");
99
+ /**
100
+ * Wire a supervisor's review into the SCORE protocol for the citizen that
101
+ * authored the task. No-op for agents not in the citizens registry yet
102
+ * (founding agents — CEO/sup/sherlock — aren't backfilled). Caller passes
103
+ * the agent slug; we look up the citizen record + DID.
104
+ */
105
+ function recordScoreForCitizen(agent_name, sprint_id, task_id, grade, path) {
106
+ if (!grade || !['A', 'B', 'C', 'D', 'F'].includes(grade))
107
+ return; // legacy reviews w/o grade
108
+ const reg = (0, citizenship_1.readRegistry)();
109
+ const citizen = reg.citizens.find((c) => c.agent_name === agent_name);
110
+ if (!citizen)
111
+ return; // founding agent or unminted — skip silently
112
+ // Supervisor DID — single supervisor identity for now; can split per-pass later.
113
+ const supervisorDID = 'did:kognai:supervisor';
114
+ const result = (0, citizen_score_registry_1.recordTaskScoreMonitored)({
115
+ citizen_id: citizen.citizen_id,
116
+ agent_did: citizen.agent_did,
117
+ sprint_id,
118
+ task_id,
119
+ grade,
120
+ supervisor_did: supervisorDID,
121
+ });
122
+ log(exports.c.gray, ` [SCORE] ${citizen.citizen_id} (${agent_name}): grade ${grade} → final ${result.final_score.toFixed(1)} (perf ${result.task_performance_score.toFixed(0)} × ${result.constitutional_multiplier})`);
123
+ }
124
+ const model_router_1 = require("./model-router");
125
+ // sprint-1566 F3+F0e: per-model cost computation + wallet ledger writes
126
+ const llm_cost_table_1 = require("./llm-cost-table");
127
+ const ceo_wallet_1 = require("./ceo-wallet");
128
+ // V17: Sovereign mode — force all inference to local Ollama ($0 cost floor)
129
+ exports.SOVEREIGN_MODE = process.argv.includes('--sovereign') || process.env.SOVEREIGN_MODE === '1';
130
+ // TICKET-085: map letter grade → numeric score so legacy code (LoRA cron,
131
+ // trust updater, AAR middleware, etc.) keeps working unchanged. A discrete
132
+ // scale prevents the LLM-anchored 88 cluster; the derived score preserves
133
+ // downstream contracts.
134
+ const GRADE_TO_SCORE = {
135
+ A: 95, B: 85, C: 70, D: 50, F: 20,
136
+ };
137
+ function normalizeReview(raw) {
138
+ const r = raw;
139
+ // If LLM returned a letter grade, derive score from it (authoritative).
140
+ // If only numeric score returned (legacy / parse failure), keep as-is.
141
+ if (r.grade && GRADE_TO_SCORE[r.grade] !== undefined) {
142
+ r.score = GRADE_TO_SCORE[r.grade];
143
+ }
144
+ return r;
145
+ }
146
+ // ===== Colors =====
147
+ exports.c = {
148
+ reset: '\x1b[0m', bold: '\x1b[1m',
149
+ red: '\x1b[31m', green: '\x1b[32m', yellow: '\x1b[33m',
150
+ blue: '\x1b[34m', magenta: '\x1b[35m', cyan: '\x1b[36m', gray: '\x1b[90m',
151
+ };
152
+ function log(color, msg) {
153
+ console.log(`${color}${msg}${exports.c.reset}`);
154
+ }
155
+ // ===== Safe reset helper =====
156
+ // Replaces bare `git reset --hard HEAD~1`. Verifies the last commit is the
157
+ // orchestrator's own (matches the expected `feat(<agent>): <id> - <type>`
158
+ // pattern) before reverting. Skips the reset if HEAD has moved on to someone
159
+ // else's commit — protects against wiping work from concurrent Claude sessions
160
+ // or human commits made while the orchestrator was running.
161
+ function safeResetLastCommit(taskId, agentName, taskType, indent = ' ') {
162
+ const expectedMsg = `feat(${agentName ?? 'coder'}): ${taskId} - ${taskType ?? 'feature'}`;
163
+ try {
164
+ const lastMsg = (0, child_process_1.execSync)('git log -1 --format=%s', { timeout: 5000 }).toString().trim();
165
+ if (lastMsg !== expectedMsg) {
166
+ log(exports.c.yellow, `${indent}! Reset skipped — HEAD is "${lastMsg.substring(0, 60)}", not our commit. Working tree left as-is to protect concurrent work.`);
167
+ return false;
168
+ }
169
+ (0, child_process_1.execSync)('git reset --hard HEAD~1', { timeout: 10000 });
170
+ log(exports.c.gray, `${indent}Reset to previous commit (dropped rejected code)`);
171
+ return true;
172
+ }
173
+ catch (err) {
174
+ log(exports.c.gray, `${indent}Reset skipped: ${(err.message || '').substring(0, 80)}`);
175
+ return false;
176
+ }
177
+ }
178
+ // ===== ClawRouter v2.0 — MANDATORY SINGLE GATEWAY (Exec Protocol §17) =====
179
+ // ALL LLM calls route through routeCall() from clawrouter-v2.ts.
180
+ // Direct API calls to Anthropic, OpenAI, MiniMax, or Ollama are Sev-1 violations.
181
+ // The old provider-based callLLM() is replaced with a unified gateway that maps
182
+ // legacy provider+model pairs to ClawRouter v2.0 tier_class+complexity.
183
+ // Track direct_api_violations for sprint JSON (§17.6)
184
+ let _directApiViolations = 0;
185
+ let _llmCallsRouted = 0;
186
+ let _apexCalls = 0;
187
+ let _apexJudgePatternCompliant = true;
188
+ /**
189
+ * Unified LLM gateway — routes ALL calls through ClawRouter v2.0.
190
+ * Legacy provider parameter is mapped to ClawRouter tier/complexity:
191
+ * - 'ollama' / 'local' → T0-T2 (local Ollama, $0)
192
+ * - 'clawrouter' → T2.5 EXEC (cloud gateway)
193
+ * - 'anthropic' (Sonnet) → T3 APEX (constitutional decisions only)
194
+ * - 'anthropic' (Haiku) → T2.5 EXEC
195
+ * - 'openai' → T2.5 EXEC
196
+ * - 'minimax' → T2.5 EXEC (via ClawRouter)
197
+ *
198
+ * NOTE: The provider parameter is retained for backward compatibility but
199
+ * ALL routing decisions are made by ClawRouter v2.0. No direct API calls.
200
+ */
201
+ async function callLLM(provider, model, systemPrompt, userPrompt, timeoutMs = 300000, agentId = 'orchestrator', taskType = 'orchestrator_call') {
202
+ _llmCallsRouted++;
203
+ // Map legacy provider+model to ClawRouter v2.0 request
204
+ const req = {
205
+ task_type: taskType,
206
+ tier_class: 'text',
207
+ complexity: mapLegacyToComplexity(provider, model),
208
+ context_tokens: Math.ceil((systemPrompt.length + userPrompt.length) / 4),
209
+ constitutional_flag: isConstitutionalCall(provider, model),
210
+ agent_id: agentId,
211
+ payload: {
212
+ system: systemPrompt,
213
+ prompt: userPrompt,
214
+ max_tokens: 16000,
215
+ },
216
+ };
217
+ // Track APEX calls for §17.6
218
+ if (req.constitutional_flag || req.complexity === 'apex') {
219
+ _apexCalls++;
220
+ }
221
+ // Proactive provider-budget check (PR #18 reactive fallback's preventive twin).
222
+ // If <PROVIDER>_MONTHLY_BUDGET_USD env is set, check month-to-date spend
223
+ // against the cap BEFORE attempting the call. Status 'frozen' (>=95%) skips
224
+ // the call entirely and goes straight to the fallback path — avoids burning
225
+ // a credit-exhaustion error to learn the same thing. 'warning' (>=80%)
226
+ // alerts once per process lifetime but still attempts the call.
227
+ // No env set → status 'unmonitored' → no proactive check (today's behavior).
228
+ if (provider !== 'clawrouter' && provider !== 'ollama') {
229
+ const budget = (0, ceo_wallet_1.getProviderBudgetStatus)(provider);
230
+ if (budget.status === 'frozen') {
231
+ log(exports.c.yellow, ` [budget-guard] ${provider} ${budget.pct.toFixed(0)}% of $${budget.budget_usd} cap → skipping to fallback (CEO-wallet-funded) without trying upstream`);
232
+ _maybeAlertBudget(provider, 'frozen', budget);
233
+ try {
234
+ const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_budget_proactive_fallback_from_${provider}`);
235
+ fallbackResp.fallback_used = true;
236
+ fallbackResp.fallback_from_provider = provider;
237
+ fallbackResp.fallback_reason = 'budget_frozen';
238
+ return fallbackResp;
239
+ }
240
+ catch (fallbackErr) {
241
+ log(exports.c.red, ` [budget-guard] fallback also failed: ${fallbackErr.message} — proceeding to attempt original provider as last resort`);
242
+ // Fall through to the normal try — better to attempt + handle the error reactively
243
+ // than to leave the caller with nothing
244
+ }
245
+ }
246
+ else if (budget.status === 'warning') {
247
+ _maybeAlertBudget(provider, 'warning', budget);
248
+ }
249
+ }
250
+ try {
251
+ const result = await (0, exports.routeCall)(req);
252
+ // sprint-1566 F3+F0e: clawrouter's wallet billing only fires on the
253
+ // x402-retry path (cost_usd != 0). The common direct path returns
254
+ // cost_usd=0 and the ledger stays empty. Compute cost from real tokens
255
+ // here using the per-model rate table + call deductCost so the ledger
256
+ // becomes the source of truth.
257
+ // Codex P2 (PR #9): record actual provider derived from result.model,
258
+ // not the caller's intent (provider param). The real routed-model name
259
+ // comes back in result.model and we infer the provider from it.
260
+ const callerModel = result.model || model;
261
+ // Codex P2 on PR #15: inferProvider returns the literal 'unknown' for
262
+ // unclassified models; treat that as a miss and fall back to the
263
+ // caller-intent provider so the ledger never records the string 'unknown'.
264
+ const inferred = inferProvider(callerModel);
265
+ const realProvider = (inferred && inferred !== 'unknown') ? inferred : provider;
266
+ const inputTokens = result.input_tokens || 0;
267
+ const outputTokens = result.output_tokens || 0;
268
+ const computed = (0, llm_cost_table_1.computeCost)(callerModel, inputTokens, outputTokens);
269
+ // Prefer real billed cost (x402 path) over our estimate
270
+ const costUsd = (result.cost_usd && result.cost_usd > 0) ? result.cost_usd : computed;
271
+ try {
272
+ if (costUsd > 0)
273
+ (0, ceo_wallet_1.deductCost)(costUsd, agentId, taskType, realProvider, callerModel);
274
+ recordModelCall(realProvider, callerModel, inputTokens, outputTokens, costUsd);
275
+ }
276
+ catch { /* recording failure must never break the LLM call */ }
277
+ const response = {
278
+ choices: [{ message: { content: result.content } }],
279
+ usage: { total_tokens: inputTokens + outputTokens, input_tokens: inputTokens, output_tokens: outputTokens },
280
+ provider: realProvider,
281
+ model: callerModel,
282
+ cost_usd: costUsd,
283
+ };
284
+ _accumulateTokens(response.usage?.total_tokens || 0);
285
+ return response;
286
+ }
287
+ catch (err) {
288
+ const msg = String(err?.message || err);
289
+ // Provider-credit-exhaustion fallback: the CEO wallet is supposed to keep
290
+ // the swarm alive via x402, but Anthropic (and other direct-API providers)
291
+ // bypass that — they bill against a separate account balance the CEO wallet
292
+ // can't see. When that external balance hits zero we used to crash with
293
+ // exit-null mid-task (incident 2026-05-21). Now we downgrade to a
294
+ // CEO-wallet-funded provider (DeepSeek via ClawRouter, x402-enabled) and
295
+ // continue. The sprint completes at lower quality instead of crashing.
296
+ if (isCreditExhaustion(msg) && provider !== 'clawrouter') {
297
+ log(exports.c.yellow, ` [fallback] ${provider} credit exhausted → downgrading to clawrouter/deepseek (alerting founder)`);
298
+ try {
299
+ alertCreditExhaustion(provider, msg, agentId, taskType);
300
+ }
301
+ catch { /* alert failure must never block */ }
302
+ try {
303
+ const fallbackResp = await callLLM('clawrouter', 'deepseek/deepseek-chat', systemPrompt, userPrompt, timeoutMs, agentId, `${taskType}_fallback_from_${provider}`);
304
+ fallbackResp.fallback_used = true;
305
+ fallbackResp.fallback_from_provider = provider;
306
+ return fallbackResp;
307
+ }
308
+ catch (fallbackErr) {
309
+ log(exports.c.red, ` [fallback] downgrade also failed: ${fallbackErr.message} — re-throwing original`);
310
+ throw err;
311
+ }
312
+ }
313
+ log(exports.c.red, ` [ClawRouter] Call failed: ${err.message}`);
314
+ throw err;
315
+ }
316
+ }
317
+ /**
318
+ * Heuristic check for provider-credit / quota / billing exhaustion in an error
319
+ * message. Conservative — matches only specific upstream-provider markers that
320
+ * consistently indicate "stop trying this provider, switch to a CEO-wallet-
321
+ * funded one." Does NOT match:
322
+ * - generic 429 rate limits (transient, should retry same provider)
323
+ * - bare 'payment required' / '402' strings (codex P2 on PR #17: would
324
+ * mis-classify ClawRouter's own missing-X402_WALLET_KEY error, which
325
+ * throws "...402 (payment required)..." per scripts/lib/clawrouter-v2.ts,
326
+ * and trigger a misleading "top up <provider>" alert when the real cause
327
+ * is local config, not external provider credits)
328
+ *
329
+ * Specific markers below all come from Anthropic / OpenAI / MiniMax error
330
+ * payloads when their account balance hits zero.
331
+ */
332
+ function isCreditExhaustion(msg) {
333
+ const m = (msg || '').toLowerCase();
334
+ // Exclude local-config errors first — these throw 402 strings but aren't
335
+ // upstream-provider credit exhaustion.
336
+ if (m.includes('x402_wallet_key') || m.includes('wallet key') || m.includes('missing wallet'))
337
+ return false;
338
+ return (m.includes('insufficient credit') ||
339
+ m.includes('insufficient balance') ||
340
+ m.includes('credit balance is too low') ||
341
+ m.includes('credit_balance') ||
342
+ m.includes('quota exceeded') ||
343
+ m.includes('quota_exceeded') ||
344
+ m.includes('billing_hard_limit') ||
345
+ m.includes('insufficient_quota') ||
346
+ (m.includes('401') && (m.includes('credit') || m.includes('billing'))));
347
+ }
348
+ /** Fire-and-forget Telegram alert when a fallback fires. Best-effort; never blocks. */
349
+ function alertCreditExhaustion(failedProvider, errMsg, agentId, taskType) {
350
+ const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
351
+ const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
352
+ if (!botToken || groupIds.length === 0)
353
+ return;
354
+ const text = `⚠️ ${failedProvider} credit exhausted — fallback to DeepSeek via ClawRouter for this call\n\n` +
355
+ `agent: ${agentId}\n` +
356
+ `task_type: ${taskType}\n` +
357
+ `error: ${errMsg.slice(0, 200)}\n\n` +
358
+ `Top up the ${failedProvider} account to restore full quality. The swarm continues at degraded quality on CEO-wallet-funded providers in the meantime.`;
359
+ _sendTelegramAlert(botToken, groupIds, text);
360
+ }
361
+ // Throttle proactive budget alerts: once per (provider, status) per process lifetime.
362
+ // Avoids spamming the chat when every Anthropic call in a row trips the same threshold.
363
+ const _budgetAlertsSent = new Set();
364
+ function _maybeAlertBudget(provider, status, report) {
365
+ const key = `${provider.toLowerCase()}:${status}`;
366
+ if (_budgetAlertsSent.has(key))
367
+ return;
368
+ _budgetAlertsSent.add(key);
369
+ const botToken = process.env.SENIOR_CODER_BOT_TOKEN || process.env.TELEGRAM_BOT_TOKEN || '';
370
+ const groupIds = (process.env.SENIOR_CODER_TG_GROUP_ID || '').split(',').map(s => s.trim()).filter(Boolean);
371
+ if (!botToken || groupIds.length === 0)
372
+ return;
373
+ const emoji = status === 'frozen' ? '🔴' : '🟡';
374
+ const verb = status === 'frozen' ? 'FROZEN — routing to fallback' : 'WARNING — still attempting';
375
+ const text = `${emoji} ${provider} monthly budget ${verb}\n\n` +
376
+ `spent: $${report.spent_month_usd.toFixed(4)} / $${report.budget_usd?.toFixed(2)} (${report.pct.toFixed(0)}%)\n` +
377
+ `threshold: ${status === 'frozen' ? '95%' : '80%'}\n\n` +
378
+ `${status === 'frozen'
379
+ ? 'New ' + provider + ' calls go straight to DeepSeek (CEO-wallet-funded) until budget reset on month rollover OR ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD raised.'
380
+ : 'Top up ' + provider + ' account or raise ' + provider.toUpperCase() + '_MONTHLY_BUDGET_USD to avoid hitting 95% freeze.'}`;
381
+ _sendTelegramAlert(botToken, groupIds, text);
382
+ }
383
+ // Shared Telegram send helper — fire-and-forget, native https, never blocks.
384
+ function _sendTelegramAlert(botToken, groupIds, text) {
385
+ for (const chatId of groupIds) {
386
+ const body = JSON.stringify({ chat_id: parseInt(chatId, 10), text });
387
+ const req = require('https').request({
388
+ hostname: 'api.telegram.org',
389
+ path: `/bot${botToken}/sendMessage`,
390
+ method: 'POST',
391
+ headers: { 'Content-Type': 'application/json', 'Content-Length': Buffer.byteLength(body) },
392
+ timeout: 5000,
393
+ });
394
+ req.on('error', () => { });
395
+ req.write(body);
396
+ req.end();
397
+ }
398
+ }
399
+ /**
400
+ * Derive the real LLM provider from the model name returned by ClawRouter.
401
+ * The orchestrator's caller passes a `provider` hint, but ClawRouter may
402
+ * route to a different actual model. This function gives us truth for the
403
+ * wallet ledger so Bloomberg can produce honest per-provider reports.
404
+ *
405
+ * Returns 'unknown' if the model can't be classified; callers fall back to
406
+ * the caller-intent provider in that case.
407
+ */
408
+ function inferProvider(model) {
409
+ const m = (model || '').toLowerCase();
410
+ if (!m)
411
+ return 'unknown';
412
+ if (m.includes('sonnet') || m.includes('haiku') || m.includes('opus') || m.startsWith('claude-'))
413
+ return 'anthropic';
414
+ if (m.includes('minimax'))
415
+ return 'minimax';
416
+ if (m.includes('deepseek'))
417
+ return 'deepseek';
418
+ if (m.includes('qwen'))
419
+ return 'qwen';
420
+ if (m.includes('gpt-') || m.startsWith('o1-') || m.startsWith('o3-') || m.includes('codex'))
421
+ return 'openai';
422
+ if (m.includes('grok'))
423
+ return 'xai';
424
+ if (m.includes('gemini') || m.includes('flash'))
425
+ return 'google';
426
+ return 'unknown';
427
+ }
428
+ /**
429
+ * Map legacy provider+model pairs to ClawRouter v2.0 complexity levels.
430
+ * This preserves the existing routing intelligence while funneling through the gateway.
431
+ */
432
+ function mapLegacyToComplexity(provider, model) {
433
+ // Local models → stay local
434
+ if (provider === 'ollama') {
435
+ if (model.includes('0.6b'))
436
+ return 'nano';
437
+ if (model.includes('4b'))
438
+ return 'local';
439
+ return 'power'; // qwen3:14b, deepseek-r1:14b
440
+ }
441
+ // Anthropic Sonnet → APEX (constitutional)
442
+ if (provider === 'anthropic' && model.includes('sonnet'))
443
+ return 'apex';
444
+ // Anthropic Haiku → EXEC (cloud, not constitutional)
445
+ if (provider === 'anthropic' && model.includes('haiku'))
446
+ return 'exec';
447
+ // ClawRouter DeepSeek → EXEC
448
+ if (provider === 'clawrouter')
449
+ return 'exec';
450
+ // MiniMax → EXEC (cloud)
451
+ if (provider === 'minimax')
452
+ return 'exec';
453
+ // OpenAI → EXEC
454
+ if (provider === 'openai')
455
+ return 'exec';
456
+ // Default → POWER (local)
457
+ return 'power';
458
+ }
459
+ /** Detect if a call is constitutional (requires T3 APEX / Claude Sonnet) */
460
+ function isConstitutionalCall(provider, model) {
461
+ return provider === 'anthropic' && model.includes('sonnet');
462
+ }
463
+ // Legacy aliases — these are now thin wrappers that route through callLLM()
464
+ // They exist so that call sites like callAnthropicCached() don't need immediate rewriting.
465
+ // All direct API calls are eliminated — every call goes through ClawRouter v2.0.
466
+ async function callAnthropicCached(model, systemPrompt, userPrompt, timeoutMs) {
467
+ // Prompt caching is now handled by ClawRouter v2.0 (QCG layer)
468
+ return callLLM('anthropic', model, systemPrompt, userPrompt, timeoutMs);
469
+ }
470
+ /** Get sprint-level ClawRouter metrics for §17.6 sprint JSON fields */
471
+ function getClawRouterSprintMetrics() {
472
+ return {
473
+ llm_calls_routed: _llmCallsRouted,
474
+ direct_api_violations: _directApiViolations,
475
+ apex_calls: _apexCalls,
476
+ apex_judge_pattern_compliant: _apexJudgePatternCompliant,
477
+ };
478
+ }
479
+ // B.9: Nano classifier — uses T0 NANO (qwen3:0.6b) via ClawRouter v2.0
480
+ async function classifyTaskSmart(prompt) {
481
+ const regexType = (0, model_router_1.classifyTask)(prompt);
482
+ if (regexType !== 'util')
483
+ return regexType; // regex was confident
484
+ try {
485
+ const classifyPrompt = `Classify this task into exactly one category. Reply with ONLY the category name, nothing else.
486
+ Categories: code, reason, lang, util, audit, content, data, refactor-complex, agent-framework, codebase-scan
487
+
488
+ Task: ${prompt.substring(0, 300)}`;
489
+ const result = await (0, exports.routeCall)({
490
+ task_type: 'nano_classify', tier_class: 'text', complexity: 'nano',
491
+ context_tokens: Math.ceil(classifyPrompt.length / 4), constitutional_flag: false,
492
+ agent_id: 'nano-classifier',
493
+ payload: { prompt: classifyPrompt, max_tokens: 20 },
494
+ });
495
+ const nano = result.content.trim().toLowerCase().split(/\s/)[0];
496
+ const valid = ['code', 'reason', 'lang', 'util', 'audit', 'content', 'data', 'refactor-complex', 'agent-framework', 'codebase-scan'];
497
+ return valid.includes(nano) ? nano : regexType;
498
+ }
499
+ catch {
500
+ return regexType;
501
+ }
502
+ }
503
+ // B.12: Context compression using T1 LOCAL (qwen3:4b) via ClawRouter v2.0 — reduces cloud token spend 70-80%
504
+ async function compressContext(context) {
505
+ if (context.length < 1200)
506
+ return context; // not worth compressing
507
+ try {
508
+ const compressPrompt = `Compress the following task context to under 600 words. Preserve all file paths, function names, technical requirements, and acceptance criteria. Remove prose filler and redundant explanations.\n\n${context.substring(0, 4000)}`;
509
+ const result = await (0, exports.routeCall)({
510
+ task_type: 'qcg_compress', tier_class: 'text', complexity: 'local',
511
+ context_tokens: Math.ceil(compressPrompt.length / 4), constitutional_flag: false,
512
+ agent_id: 'context-compressor',
513
+ payload: { prompt: compressPrompt, max_tokens: 800 },
514
+ });
515
+ const compressed = result.content.trim();
516
+ if (compressed.length > 100 && compressed.length < context.length * 0.9) {
517
+ log(exports.c.gray, ` [compress] ${context.length} → ${compressed.length} chars (${Math.round(compressed.length / context.length * 100)}%)`);
518
+ return compressed;
519
+ }
520
+ }
521
+ catch { /* non-fatal */ }
522
+ return context;
523
+ }
524
+ // B.10: Local QA gate — structural checks only (no LLM — qwen3 think-mode unreliable for PASS/FAIL)
525
+ // LLM-based QA deferred to Claude supervisor review which gives structured feedback.
526
+ // Markers that, when repeated, indicate the agent's chain-of-thought was
527
+ // captured as the file body (the "rumination" failure mode — sprint smoke
528
+ // 2026-05-07). These are phrases a coder agent says to itself while deciding
529
+ // what to output; they should never appear in a deliverable file.
530
+ const RUMINATION_MARKERS = [
531
+ 'let me think', 'the problem says', 'but the problem says', 'however, the problem',
532
+ 'so we output', 'therefore the answer is', 'we output nothing', 'we output the empty',
533
+ 'so the answer is', 'but to be safe', 'but note:', 'but to be precise',
534
+ 'so i will output', 'let me decide', 'but to be precise', 'final answer:',
535
+ 'final decision:', 'i will output', 'should i output', 'let me re-read',
536
+ ];
537
+ function detectRumination(content) {
538
+ const lower = content.toLowerCase();
539
+ let hits = 0;
540
+ for (const marker of RUMINATION_MARKERS) {
541
+ // Count overlapping occurrences with a simple split — cheap, good enough.
542
+ hits += lower.split(marker).length - 1;
543
+ }
544
+ const wordCount = Math.max(1, content.split(/\s+/).filter(Boolean).length);
545
+ return { hits, ratio: hits / wordCount };
546
+ }
547
+ async function localQAGate(_task, fileContents) {
548
+ // Fail only on structurally empty files (< 50 chars indicates the model returned nothing useful)
549
+ const emptyFiles = fileContents.filter(f => (f.content || '').trim().length < 50);
550
+ if (emptyFiles.length > 0) {
551
+ return { pass: false, reason: `Files too short/empty: ${emptyFiles.map(f => f.path).join(', ')}` };
552
+ }
553
+ // Fail if all files are missing from disk (write step silently failed)
554
+ const { existsSync: _exists } = await Promise.resolve().then(() => __importStar(require('fs')));
555
+ const missingFiles = fileContents.filter(f => !_exists(f.path));
556
+ if (missingFiles.length > 0) {
557
+ return { pass: false, reason: `Files not written to disk: ${missingFiles.map(f => f.path).join(', ')}` };
558
+ }
559
+ // Rumination guard — catch the chain-of-thought-leaked-into-file failure
560
+ // mode. Threshold: ≥3 distinct rumination phrases AND ≥0.5% of words are
561
+ // rumination markers (catches both small and large dumps; a doc that
562
+ // legitimately uses one such phrase once is fine).
563
+ for (const f of fileContents) {
564
+ const { hits, ratio } = detectRumination(f.content || '');
565
+ if (hits >= 3 && ratio >= 0.005) {
566
+ return {
567
+ pass: false,
568
+ reason: `Rumination detected in ${f.path}: ${hits} chain-of-thought markers (${(ratio * 100).toFixed(2)}% of words). The agent dumped its reasoning into the file body instead of outputting just the file content.`,
569
+ };
570
+ }
571
+ }
572
+ // TICKET-085: deterministic syntactic typecheck for .ts/.tsx changes
573
+ // before LLM review. Catches broken syntax / unresolvable imports cheaply
574
+ // so we don't burn supervisor tokens on uncompileable code. Project-wide
575
+ // type errors are NOT caught (that's the Vercel build's job) — this is
576
+ // just the fast gate: does the file parse + can its imports be resolved.
577
+ const tsFiles = fileContents.filter(f => /\.(ts|tsx)$/.test(f.path));
578
+ if (tsFiles.length > 0) {
579
+ const tcResult = await typecheckChangedFiles(tsFiles.map(f => f.path));
580
+ if (!tcResult.pass) {
581
+ return {
582
+ pass: false,
583
+ reason: `Typecheck failed (${tcResult.errorCount} error(s)): ${tcResult.firstError}`,
584
+ };
585
+ }
586
+ }
587
+ return { pass: true, reason: `${fileContents.length} file(s) non-empty + no rumination + typecheck PASS — proceeding to supervisor review` };
588
+ }
589
+ // TICKET-085 (v2 — TICKET-088 fix): project-aware typecheck. v1 used
590
+ // loose-file mode + `npx -y typescript@5 tsc` and silently passed
591
+ // EVERYTHING because the npx invocation produced no tsc output and exit 0
592
+ // (npm "could not determine executable") — gate was a no-op for ~24h.
593
+ //
594
+ // v2: find the nearest tsconfig.json walking UP from each changed file,
595
+ // run `tsc -p <tsconfig> --noEmit --incremental` per project. Incremental
596
+ // build cache (.tsbuildinfo) makes subsequent runs fast (~2-5s typical).
597
+ // Catches both syntactic errors AND cross-file type/import errors —
598
+ // including the "imports from a non-existent file" class that bit us
599
+ // when sprint-1581 + sprint-1582 shipped hallucinated component graphs.
600
+ async function typecheckChangedFiles(filePaths) {
601
+ const { execSync } = await Promise.resolve().then(() => __importStar(require('child_process')));
602
+ const { existsSync } = await Promise.resolve().then(() => __importStar(require('fs')));
603
+ const { join, dirname, resolve } = await Promise.resolve().then(() => __importStar(require('path')));
604
+ // Find nearest tsconfig.json walking up from a file path.
605
+ // Returns null if walked all the way to / without finding one.
606
+ function findTsconfig(filePath) {
607
+ let dir = dirname(resolve(filePath));
608
+ for (let i = 0; i < 12; i++) {
609
+ const candidate = join(dir, 'tsconfig.json');
610
+ if (existsSync(candidate))
611
+ return candidate;
612
+ const parent = dirname(dir);
613
+ if (parent === dir)
614
+ break;
615
+ dir = parent;
616
+ }
617
+ return null;
618
+ }
619
+ // Group changed files by which tsconfig governs them.
620
+ const projects = new Set();
621
+ let filesWithoutProject = 0;
622
+ for (const f of filePaths) {
623
+ const tc = findTsconfig(f);
624
+ if (tc)
625
+ projects.add(tc);
626
+ else
627
+ filesWithoutProject++;
628
+ }
629
+ if (projects.size === 0) {
630
+ return { pass: true, errorCount: 0, firstError: `no tsconfig found for ${filesWithoutProject} file(s) — skipping` };
631
+ }
632
+ // Pick a local tsc binary. Prefer the repo's installed copy (fast, no
633
+ // network). Fall back to npx-with-explicit-package only if not present.
634
+ const localTsc = join(process.cwd(), 'node_modules', '.bin', 'tsc');
635
+ const tscCmd = existsSync(localTsc)
636
+ ? `"${localTsc}"`
637
+ : `npx -y --package=typescript@5 tsc`;
638
+ for (const tsconfig of projects) {
639
+ try {
640
+ execSync(`${tscCmd} -p "${tsconfig}" --noEmit --incremental`, {
641
+ encoding: 'utf-8', timeout: 120_000, stdio: 'pipe', cwd: process.cwd(),
642
+ });
643
+ }
644
+ catch (e) {
645
+ const out = (e.stdout || '') + (e.stderr || '');
646
+ const lines = out.split('\n').filter((l) => /error TS\d+/i.test(l));
647
+ if (lines.length === 0) {
648
+ // Tooling failure (timeout, tsc not found, OOM). Don't block pipeline
649
+ // on infra; supervisor still has a shot at catching substantive bugs.
650
+ console.warn(`[typecheck-gate] tooling failure on ${tsconfig}: ${(e.message || '').slice(0, 120)}`);
651
+ continue; // try next project
652
+ }
653
+ // Filter errors to JUST the files this task changed. Other errors
654
+ // (pre-existing in unrelated files) shouldn't block this task — they
655
+ // belong to whoever introduced them, not the current coder.
656
+ const changedAbs = new Set(filePaths.map(f => resolve(f)));
657
+ const ourErrors = lines.filter((l) => {
658
+ const m = l.match(/^([^(]+)\(/);
659
+ return m && changedAbs.has(resolve(m[1].trim()));
660
+ });
661
+ if (ourErrors.length > 0) {
662
+ return {
663
+ pass: false,
664
+ errorCount: ourErrors.length,
665
+ firstError: ourErrors[0].trim().slice(0, 200),
666
+ };
667
+ }
668
+ // tsc errored but all errors are in files we didn't touch — let it through.
669
+ console.warn(`[typecheck-gate] ${lines.length} pre-existing error(s) in ${tsconfig} unrelated to this task — passing`);
670
+ }
671
+ }
672
+ return { pass: true, errorCount: 0, firstError: `${projects.size} project(s) typechecked clean` };
673
+ }
674
+ // B.11: Tiered debugger — routes debug effort by issue severity via ClawRouter v2.0
675
+ async function tieredDebug(task, review, _systemPrompt) {
676
+ const issueText = (review.issues || []).map(i => `[${i.severity}] ${i.file}: ${i.description}`).join('\n');
677
+ const hasArchitecture = (review.issues || []).some(i => i.severity === 'critical' || i.description.toLowerCase().includes('architect'));
678
+ const hasSystemic = (review.issues || []).some(i => i.severity === 'high' || i.description.toLowerCase().includes('logic'));
679
+ try {
680
+ if (hasArchitecture) {
681
+ // Tier 3: T2.5 EXEC — deep architectural issues (via ClawRouter)
682
+ const result = await (0, exports.routeCall)({
683
+ task_type: 'debug_architectural', tier_class: 'text', complexity: 'exec',
684
+ context_tokens: Math.ceil((issueText.length + 800) / 4), constitutional_flag: false,
685
+ agent_id: 'tiered-debugger',
686
+ payload: { prompt: `Fix this code. Issues:\n${issueText}\n\nTask: ${task.context.substring(0, 800)}`, max_tokens: 4096 },
687
+ });
688
+ return result.content || null;
689
+ }
690
+ else if (hasSystemic) {
691
+ // Tier 2: T2 POWER (deepseek-r1:14b equivalent) — logical/systemic issues
692
+ const result = await (0, exports.routeCall)({
693
+ task_type: 'debug_systemic', tier_class: 'text', complexity: 'power',
694
+ context_tokens: Math.ceil((issueText.length + 600) / 4), constitutional_flag: false,
695
+ agent_id: 'tiered-debugger',
696
+ payload: { prompt: `Fix these code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 600)}`, max_tokens: 2048 },
697
+ });
698
+ return result.content;
699
+ }
700
+ else {
701
+ // Tier 1: T2 POWER (qwen3:14b) — minor issues
702
+ const result = await (0, exports.routeCall)({
703
+ task_type: 'debug_minor', tier_class: 'text', complexity: 'power',
704
+ context_tokens: Math.ceil((issueText.length + 500) / 4), constitutional_flag: false,
705
+ agent_id: 'tiered-debugger',
706
+ payload: { prompt: `Fix these minor code issues:\n${issueText}\n\nTask: ${task.context.substring(0, 500)}`, max_tokens: 1024 },
707
+ });
708
+ return result.content;
709
+ }
710
+ }
711
+ catch (e) {
712
+ log(exports.c.yellow, ` [tiered-debug] ${e.message}`);
713
+ }
714
+ return null;
715
+ }
716
+ function httpPost(url, headers, body, timeoutMs) {
717
+ const parsed = new URL(url);
718
+ const isHttps = parsed.protocol === 'https:';
719
+ const lib = isHttps ? https : http;
720
+ return new Promise((resolve, reject) => {
721
+ const req = lib.request({
722
+ hostname: parsed.hostname, port: parsed.port || (isHttps ? 443 : undefined),
723
+ path: parsed.pathname, method: 'POST',
724
+ headers: { ...headers, 'Content-Length': Buffer.byteLength(body).toString() },
725
+ }, (res) => {
726
+ let data = '';
727
+ res.on('data', (chunk) => (data += chunk));
728
+ res.on('end', () => {
729
+ try {
730
+ const result = JSON.parse(data);
731
+ if (result.error) {
732
+ reject(new Error(`API error: ${result.error.message || JSON.stringify(result.error)}`));
733
+ return;
734
+ }
735
+ resolve(result);
736
+ }
737
+ catch {
738
+ reject(new Error(`Failed to parse response: ${data.substring(0, 500)}`));
739
+ }
740
+ });
741
+ });
742
+ req.on('error', reject);
743
+ req.setTimeout(timeoutMs, () => { req.destroy(); reject(new Error(`API timeout (${timeoutMs / 1000}s)`)); });
744
+ req.write(body);
745
+ req.end();
746
+ });
747
+ }
748
+ // <<<EXTRACT-END-primitives