@stackbilt/aegis-core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. package/package.json +96 -0
  2. package/schema.sql +586 -0
  3. package/src/adapters/voice/cloudflare-agent.ts +34 -0
  4. package/src/auth.ts +124 -0
  5. package/src/bluesky.ts +464 -0
  6. package/src/claude-tools/content.ts +188 -0
  7. package/src/claude-tools/email.ts +69 -0
  8. package/src/claude-tools/github.ts +440 -0
  9. package/src/claude-tools/goals.ts +116 -0
  10. package/src/claude-tools/index.ts +353 -0
  11. package/src/claude-tools/web.ts +59 -0
  12. package/src/claude.ts +406 -0
  13. package/src/codebeast.ts +200 -0
  14. package/src/composite.ts +715 -0
  15. package/src/content/column.ts +80 -0
  16. package/src/content/hero-image.ts +47 -0
  17. package/src/content/index.ts +27 -0
  18. package/src/content/journal.ts +91 -0
  19. package/src/content/roundtable.ts +163 -0
  20. package/src/core.ts +309 -0
  21. package/src/dashboard.ts +620 -0
  22. package/src/decision-docs.ts +284 -0
  23. package/src/dispatch.ts +13 -0
  24. package/src/edge-env.ts +58 -0
  25. package/src/email.ts +850 -0
  26. package/src/exports.ts +156 -0
  27. package/src/github-projects.ts +312 -0
  28. package/src/github.ts +670 -0
  29. package/src/groq.ts +247 -0
  30. package/src/health-page.ts +578 -0
  31. package/src/index.ts +89 -0
  32. package/src/kernel/argus-actions.ts +397 -0
  33. package/src/kernel/argus-correlation.ts +639 -0
  34. package/src/kernel/board.ts +91 -0
  35. package/src/kernel/briefing.ts +177 -0
  36. package/src/kernel/classify-memory-topic.ts +166 -0
  37. package/src/kernel/cognition.ts +377 -0
  38. package/src/kernel/court-cards.ts +163 -0
  39. package/src/kernel/dispatch.ts +587 -0
  40. package/src/kernel/domain.ts +50 -0
  41. package/src/kernel/dynamic-tools.ts +322 -0
  42. package/src/kernel/executor-port.ts +45 -0
  43. package/src/kernel/executors/claude.ts +73 -0
  44. package/src/kernel/executors/direct.ts +237 -0
  45. package/src/kernel/executors/groq.ts +18 -0
  46. package/src/kernel/executors/index.ts +87 -0
  47. package/src/kernel/executors/tarotscript.ts +104 -0
  48. package/src/kernel/executors/workers-ai.ts +54 -0
  49. package/src/kernel/insight-cache.ts +76 -0
  50. package/src/kernel/memory/agenda.ts +200 -0
  51. package/src/kernel/memory/blocks.ts +188 -0
  52. package/src/kernel/memory/consolidation.ts +194 -0
  53. package/src/kernel/memory/episodic.ts +241 -0
  54. package/src/kernel/memory/goals.ts +156 -0
  55. package/src/kernel/memory/graph.ts +290 -0
  56. package/src/kernel/memory/index.ts +11 -0
  57. package/src/kernel/memory/insights.ts +316 -0
  58. package/src/kernel/memory/procedural.ts +467 -0
  59. package/src/kernel/memory/pruning.ts +67 -0
  60. package/src/kernel/memory/recall.ts +367 -0
  61. package/src/kernel/memory/semantic.ts +315 -0
  62. package/src/kernel/memory/synthesis.ts +161 -0
  63. package/src/kernel/memory-adapter.ts +369 -0
  64. package/src/kernel/memory-guardrails.ts +76 -0
  65. package/src/kernel/port.ts +23 -0
  66. package/src/kernel/resilience.ts +322 -0
  67. package/src/kernel/router.ts +471 -0
  68. package/src/kernel/scheduled/agent-dispatch.ts +252 -0
  69. package/src/kernel/scheduled/argus-analytics.ts +247 -0
  70. package/src/kernel/scheduled/argus-heartbeat.ts +320 -0
  71. package/src/kernel/scheduled/argus-notify.ts +348 -0
  72. package/src/kernel/scheduled/board-sync.ts +110 -0
  73. package/src/kernel/scheduled/ci-watcher.ts +125 -0
  74. package/src/kernel/scheduled/cognitive-metrics.ts +377 -0
  75. package/src/kernel/scheduled/consolidation.ts +229 -0
  76. package/src/kernel/scheduled/content-drip.ts +47 -0
  77. package/src/kernel/scheduled/content.ts +6 -0
  78. package/src/kernel/scheduled/conversation-facts.ts +204 -0
  79. package/src/kernel/scheduled/cost-report.ts +84 -0
  80. package/src/kernel/scheduled/curiosity.ts +219 -0
  81. package/src/kernel/scheduled/dev-activity.ts +44 -0
  82. package/src/kernel/scheduled/digest.ts +317 -0
  83. package/src/kernel/scheduled/dreaming/agenda-triage.ts +115 -0
  84. package/src/kernel/scheduled/dreaming/facts.ts +239 -0
  85. package/src/kernel/scheduled/dreaming/index.ts +8 -0
  86. package/src/kernel/scheduled/dreaming/llm.ts +33 -0
  87. package/src/kernel/scheduled/dreaming/pattern-synthesis.ts +124 -0
  88. package/src/kernel/scheduled/dreaming/persona.ts +75 -0
  89. package/src/kernel/scheduled/dreaming/symbolic.ts +31 -0
  90. package/src/kernel/scheduled/dreaming/task-proposals.ts +80 -0
  91. package/src/kernel/scheduled/dreaming.ts +66 -0
  92. package/src/kernel/scheduled/entropy.ts +149 -0
  93. package/src/kernel/scheduled/escalation.ts +192 -0
  94. package/src/kernel/scheduled/feed-watcher.ts +206 -0
  95. package/src/kernel/scheduled/goals.ts +214 -0
  96. package/src/kernel/scheduled/governance.ts +41 -0
  97. package/src/kernel/scheduled/heartbeat.ts +220 -0
  98. package/src/kernel/scheduled/inbox-processor.ts +174 -0
  99. package/src/kernel/scheduled/index.ts +245 -0
  100. package/src/kernel/scheduled/issue-proposer.ts +478 -0
  101. package/src/kernel/scheduled/issue-watcher.ts +128 -0
  102. package/src/kernel/scheduled/pr-automerge.ts +213 -0
  103. package/src/kernel/scheduled/product-health.ts +107 -0
  104. package/src/kernel/scheduled/reflection.ts +373 -0
  105. package/src/kernel/scheduled/self-improvement.ts +114 -0
  106. package/src/kernel/scheduled/social-engage.ts +175 -0
  107. package/src/kernel/scheduled/task-audit.ts +60 -0
  108. package/src/kernel/symbolic.ts +156 -0
  109. package/src/kernel/types.ts +145 -0
  110. package/src/landing.ts +1190 -0
  111. package/src/lib/audit-chain/chain.ts +28 -0
  112. package/src/lib/audit-chain/types.ts +12 -0
  113. package/src/lib/observability/errors.ts +55 -0
  114. package/src/markdown.ts +164 -0
  115. package/src/mcp/handlers.ts +647 -0
  116. package/src/mcp/server.ts +184 -0
  117. package/src/mcp/tools.ts +316 -0
  118. package/src/mcp-client.ts +275 -0
  119. package/src/mcp-server.ts +2 -0
  120. package/src/operator/config.example.ts +60 -0
  121. package/src/operator/config.ts +60 -0
  122. package/src/operator/index.ts +46 -0
  123. package/src/operator/persona.example.ts +34 -0
  124. package/src/operator/persona.ts +34 -0
  125. package/src/operator/prompt-builder.ts +190 -0
  126. package/src/operator/types.ts +43 -0
  127. package/src/pulse.ts +1179 -0
  128. package/src/routes/bluesky.ts +116 -0
  129. package/src/routes/cc-tasks.ts +328 -0
  130. package/src/routes/codebeast.ts +1 -0
  131. package/src/routes/content.ts +194 -0
  132. package/src/routes/conversations.ts +25 -0
  133. package/src/routes/dynamic-tools.ts +111 -0
  134. package/src/routes/feedback.ts +192 -0
  135. package/src/routes/health.ts +147 -0
  136. package/src/routes/messages.ts +228 -0
  137. package/src/routes/observability.ts +82 -0
  138. package/src/routes/operator-logs.ts +42 -0
  139. package/src/routes/pages.ts +96 -0
  140. package/src/routes/sessions.ts +54 -0
  141. package/src/sanitize.ts +73 -0
  142. package/src/schema-enums.ts +155 -0
  143. package/src/search.ts +112 -0
  144. package/src/task-intelligence.ts +497 -0
  145. package/src/types.ts +194 -0
  146. package/src/ui.ts +5 -0
  147. package/src/version.ts +3 -0
  148. package/src/workers-ai-chat.ts +333 -0
@@ -0,0 +1,471 @@
1
+ import { getProcedure, findNearMiss, procedureKey, PROCEDURE_MIN_SUCCESSES, PROCEDURE_MIN_SUCCESS_RATE, getConversationHistory } from './memory/index.js';
2
+ import { askGroq, askGroqWithLogprobs } from '../groq.js';
3
+ import type { KernelIntent, ExecutionPlan, Executor } from './types.js';
4
+ import { buildClassifySystem, getTaskPatterns } from '../operator/prompt-builder.js';
5
+ import { domainPreFilter } from './domain.js';
6
+
7
+ // ─── Confidence Thresholds ──────────────────────────────────
8
+ const CONFIDENCE_TRUST = 0.80; // ≥ 0.80 → use classification as-is
9
+ const CONFIDENCE_VERIFY = 0.50; // 0.50–0.79 → re-classify with Groq logprobs
10
+ // < 0.50 → escalate (skip procedural lookup)
11
+
12
+ // ─── TarotScript classify-cast (zero inference, deterministic) ─
13
+
14
+ // Map TarotScript confidence levels to numeric values
15
+ const TS_CONFIDENCE: Record<string, number> = { high: 0.92, moderate: 0.70, low: 0.35 };
16
+
17
+ function cardNameToClassification(name: string): string {
18
+ return name.toLowerCase().replace(/\s+/g, '_');
19
+ }
20
+
21
+ interface ClassifyCastFacts {
22
+ classification?: string;
23
+ classification_complexity?: string;
24
+ classification_needs_tools?: string;
25
+ classification_confidence?: string;
26
+ tiebreaker_override?: string;
27
+ compound_intent?: string;
28
+ secondary_classification?: string;
29
+ }
30
+
31
+ async function classifyWithTarotScript(
32
+ fetcher: Fetcher,
33
+ message: string,
34
+ source: string,
35
+ ): Promise<{ classification: string; complexity: number; needsTools: boolean; confidence: number } | null> {
36
+ const response = await fetcher.fetch('https://tarotscript-worker/run', {
37
+ method: 'POST',
38
+ headers: { 'Content-Type': 'application/json' },
39
+ body: JSON.stringify({
40
+ spreadType: 'classify-cast',
41
+ querent: {
42
+ id: 'aegis-router',
43
+ intention: message,
44
+ state: { message, source },
45
+ },
46
+ }),
47
+ });
48
+
49
+ if (!response.ok) return null;
50
+
51
+ const result = await response.json() as { facts?: ClassifyCastFacts };
52
+ const facts = result.facts;
53
+ if (!facts?.classification) return null;
54
+
55
+ // Apply tiebreaker override if present
56
+ const override = facts.tiebreaker_override && facts.tiebreaker_override !== 'none'
57
+ ? facts.tiebreaker_override : null;
58
+ const rawClass = override ?? cardNameToClassification(facts.classification);
59
+
60
+ return {
61
+ classification: rawClass,
62
+ complexity: parseInt(facts.classification_complexity ?? '2', 10),
63
+ needsTools: facts.classification_needs_tools === 'true',
64
+ confidence: TS_CONFIDENCE[facts.classification_confidence ?? 'moderate'] ?? 0.70,
65
+ };
66
+ }
67
+
68
+ // ─── Workers AI classification (zero cost, zero network hop) ─
69
+
70
+ async function classifyWithWorkersAI(
71
+ ai: Ai,
72
+ systemPrompt: string,
73
+ userPrompt: string,
74
+ ): Promise<string> {
75
+ const result = await ai.run('@cf/meta/llama-3.2-3b-instruct', {
76
+ messages: [
77
+ { role: 'system', content: systemPrompt },
78
+ { role: 'user', content: userPrompt },
79
+ ],
80
+ max_tokens: 200,
81
+ temperature: 0.1,
82
+ }) as { response?: string };
83
+ return result.response ?? '';
84
+ }
85
+
86
+
87
+
88
+ // Fallback routes — used for degraded procedure replanning and when JSON classification fails
89
+ const DEFAULT_ROUTES: Record<string, Executor> = {
90
+ heartbeat: 'direct',
91
+ bizops_read: 'gpt_oss',
92
+ bizops_mutate: 'gpt_oss',
93
+ general_knowledge: 'gpt_oss',
94
+ memory_recall: 'gpt_oss',
95
+ greeting: 'gpt_oss',
96
+ code_task: 'claude_code',
97
+ code_review: 'gpt_oss',
98
+ self_improvement: 'composite',
99
+ web_research: 'gpt_oss',
100
+ goal_execution: 'composite',
101
+ symbolic_consultation: 'gpt_oss',
102
+ support_triage: 'gpt_oss',
103
+ tarot_pulse: 'gpt_oss',
104
+ tarot_trajectory: 'gpt_oss',
105
+ tarot_multi_angle: 'gpt_oss',
106
+ tarot_deep: 'gpt_oss',
107
+ tarot_shadow: 'gpt_oss',
108
+ tarot_orchestration: 'gpt_oss',
109
+ tarot_planning: 'gpt_oss',
110
+ };
111
+
112
+ // Patterns that are explicitly decomposable into parallel tool subtasks.
113
+ // All other patterns stay on single-model executors that preserve conversation
114
+ // history and avoid the orchestrator reinterpreting user intent.
115
+ const COMPOSITE_ELIGIBLE: ReadonlySet<string> = new Set([
116
+ 'self_improvement', // code review + GitHub + analysis — always multi-model
117
+ 'goal_execution', // autonomous goals — often multi-tool
118
+ ]);
119
+
120
+ // Complexity-aware executor selection — used for Phase 3 (no mature procedure)
121
+ function selectDefaultExecutor(classification: string, intent: KernelIntent): Executor {
122
+ const complexity = intent.complexity ?? 2;
123
+ const needsTools = intent.needsTools ?? true;
124
+ const confidence = intent.confidence ?? 0.8;
125
+
126
+ // Fixed executors (unchanged regardless of complexity or confidence)
127
+ if (classification === 'heartbeat') return 'direct';
128
+ if (classification === 'greeting') return 'gpt_oss'; // GPT-OSS 120B — smart enough for re-entry briefing
129
+ if (classification === 'code_task') return 'claude_code';
130
+ // TarotScript is a classifier, not a responder — all classifications route to LLM executors
131
+ if (classification === 'symbolic_consultation') return 'gpt_oss';
132
+ if (classification === 'support_triage') return 'gpt_oss';
133
+ // User corrections need thread history to understand the original intent
134
+ if (classification === 'user_correction') return 'gpt_oss';
135
+ // memory_recall needs buildContext() for semantic memory access — never route to
136
+ // workers_ai or groq which lack memory context (see: equity recall failure 2026-03-04)
137
+ if (classification === 'memory_recall') return 'gpt_oss';
138
+
139
+ // Self-improvement always needs multi-model pipeline (code review + GitHub + analysis)
140
+ if (classification === 'self_improvement') return 'composite';
141
+
142
+ // Goal execution: complexity-aware routing
143
+ // Simple goals (single tool call) → gpt_oss (direct tool loop, no 4-model overhead)
144
+ // Complex goals (multi-step reasoning) → composite
145
+ if (classification === 'goal_execution') {
146
+ if (complexity <= 1 && needsTools) return 'gpt_oss';
147
+ return 'composite';
148
+ }
149
+
150
+ // ─── Confidence-based tier escalation ────────────────────
151
+ // Escalate zone (<0.50): classification itself is suspect → send to Claude
152
+ if (confidence < CONFIDENCE_VERIFY) return 'claude';
153
+
154
+ // High complexity → Opus (deep reasoning)
155
+ if (complexity >= 3) return 'claude_opus';
156
+
157
+ // Verify zone (0.50-0.79): bump one tier for safety margin
158
+ // Only route to composite if the pattern is explicitly decomposable.
159
+ // Conversational/advisory follow-ups stay on single-model executors that
160
+ // preserve thread history and avoid orchestrator intent drift.
161
+ if (confidence < CONFIDENCE_TRUST) {
162
+ if (needsTools) return 'gpt_oss';
163
+ if (complexity <= 1) return 'gpt_oss'; // workers_ai → gpt_oss
164
+ return 'claude'; // moderate no-tool → claude
165
+ }
166
+
167
+ // ─── Trust zone (≥0.80): standard routing ───────────────
168
+
169
+ // BizOps mutations stay on gpt_oss — the single-model tool loop handles
170
+ // sequential lookup→update patterns better than multi-model decomposition,
171
+ // which splits them into parallel subtasks that can't share data (#85).
172
+ if (classification === 'bizops_mutate') return 'gpt_oss';
173
+
174
+ // Tool-requiring patterns → GPT-OSS-120B (single-model tool loop with thread history)
175
+ // Composite is reserved for COMPOSITE_ELIGIBLE patterns only.
176
+ if (needsTools) return 'gpt_oss';
177
+ if (['bizops_read', 'web_research'].includes(classification)) return 'gpt_oss';
178
+
179
+ // Simple no-tool queries → Workers AI (Llama 70B, cheapest)
180
+ if (complexity <= 1) return 'workers_ai';
181
+
182
+ // Moderate no-tool → GPT-OSS-120B
183
+ return 'gpt_oss';
184
+ }
185
+
186
+ export interface RouteResult {
187
+ plan: ExecutionPlan;
188
+ nearMiss?: string;
189
+ reclassified?: boolean;
190
+ }
191
+
192
+ export async function route(
193
+ intent: KernelIntent,
194
+ db: D1Database,
195
+ groqApiKey: string,
196
+ groqModel: string,
197
+ groqBaseUrl?: string,
198
+ ai?: Ai,
199
+ tarotscriptFetcher?: Fetcher,
200
+ ): Promise<RouteResult> {
201
+ // ─── Phase 0: Internal triggers bypass classification ──────
202
+ if (intent.source.channel === 'internal' && intent.classified) {
203
+ const procKey = procedureKey(intent.classified, intent.complexity);
204
+ const procedure = await getProcedure(db, procKey);
205
+
206
+ if (procedure) {
207
+ if (procedure.status === 'degraded' || procedure.status === 'broken') {
208
+ return {
209
+ plan: {
210
+ executor: DEFAULT_ROUTES[intent.classified] ?? 'direct',
211
+ reasoning: `Internal trigger "${intent.classified}" — procedure ${procedure.status}, replanning`,
212
+ costCeiling: 'free',
213
+ },
214
+ };
215
+ }
216
+
217
+ if (procedure.success_count >= PROCEDURE_MIN_SUCCESSES) {
218
+ return {
219
+ plan: {
220
+ executor: procedure.executor as Executor,
221
+ reasoning: `Internal trigger with known procedure (${procedure.success_count} successes)`,
222
+ procedureId: procedure.id,
223
+ costCeiling: intent.costCeiling,
224
+ },
225
+ };
226
+ }
227
+ }
228
+
229
+ return {
230
+ plan: {
231
+ executor: DEFAULT_ROUTES[intent.classified] ?? 'direct',
232
+ reasoning: `Internal trigger "${intent.classified}" — no mature procedure, using default`,
233
+ costCeiling: 'free',
234
+ },
235
+ };
236
+ }
237
+
238
+ // ─── Phase 0.5: Domain pre-filter (observe only) ──────────
239
+ const domainTag = domainPreFilter(intent.raw);
240
+ intent.domain = domainTag.domain;
241
+ intent.domainConfidence = domainTag.confidence;
242
+ console.log(`[router] domain pre-filter: ${domainTag.domain} (confidence=${domainTag.confidence.toFixed(2)})`);
243
+
244
+ // ─── Phase 1: Classification ──────────────────────────────
245
+ // Priority: TarotScript classify-cast (zero cost, semantic keyword matching)
246
+ // → Workers AI (zero cost, zero network hop)
247
+ // → Groq (low cost fallback)
248
+ let classification = '';
249
+ let routerNearMiss: string | undefined;
250
+
251
+ // Fetch recent conversation context for multi-turn classification
252
+ let classifyInput = intent.raw;
253
+ if (intent.source.channel === 'web' && intent.source.threadId) {
254
+ try {
255
+ const history = await getConversationHistory(db, intent.source.threadId, 4);
256
+ if (history.length > 0) {
257
+ const contextLines = history.slice(-4).map(m =>
258
+ `${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content.slice(0, 200)}`
259
+ ).join('\n');
260
+ classifyInput = `[Conversation context]\n${contextLines}\n\n[Current message to classify]\n${intent.raw}`;
261
+ }
262
+ } catch {
263
+ // Context fetch failed — classify without it
264
+ }
265
+ }
266
+
267
+ // ── Phase 1a: TarotScript classify-cast ──
268
+ if (tarotscriptFetcher) {
269
+ try {
270
+ const tsResult = await classifyWithTarotScript(
271
+ tarotscriptFetcher,
272
+ intent.raw,
273
+ intent.source.channel === 'internal' ? 'internal' : 'user',
274
+ );
275
+
276
+ if (tsResult && getTaskPatterns().includes(tsResult.classification)) {
277
+ // Guard: reject symbolic_consultation from classify-cast unless the message
278
+ // contains explicit tarot signals. The relevance scorer falls back to random
279
+ // draws when no card matches, which can produce false symbolic_consultation
280
+ // classifications for ordinary messages.
281
+ if (tsResult.classification === 'symbolic_consultation') {
282
+ const raw = intent.raw.toLowerCase();
283
+ const tarotSignals = /\b(tarot|divination|oracle)\b|pull\s+a\s+card|what\s+do\s+the\s+cards|do\s+a\s+(tarot\s+)?reading|tarot\s+spread|card\s+reading|run\s+a\s+spread/i;
284
+ if (!tarotSignals.test(raw)) {
285
+ console.log(`[router] classify-cast returned symbolic_consultation but no tarot signals — rejecting, falling through to LLM`);
286
+ // Don't accept — fall through to Phase 1b
287
+ } else {
288
+ classification = tsResult.classification;
289
+ intent.complexity = tsResult.complexity;
290
+ intent.needsTools = tsResult.needsTools;
291
+ intent.confidence = tsResult.confidence;
292
+ intent.classifierSource = 'classify-cast';
293
+ console.log(`[router] classify-cast: ${classification} (confidence=${tsResult.confidence})`);
294
+ }
295
+ } else {
296
+ classification = tsResult.classification;
297
+ intent.complexity = tsResult.complexity;
298
+ intent.needsTools = tsResult.needsTools;
299
+ intent.confidence = tsResult.confidence;
300
+ intent.classifierSource = 'classify-cast';
301
+ console.log(`[router] classify-cast: ${classification} (confidence=${tsResult.confidence})`);
302
+ }
303
+ }
304
+ } catch (err) {
305
+ console.warn('[router] classify-cast failed, falling back to LLM chain:', err instanceof Error ? err.message : String(err));
306
+ }
307
+ }
308
+
309
+ // ── Phase 1b: LLM fallback (Workers AI → Groq) ──
310
+ if (!classification) {
311
+ const classifySystem = buildClassifySystem();
312
+ let rawClassification: string | null = null;
313
+
314
+ if (ai) {
315
+ try {
316
+ rawClassification = await classifyWithWorkersAI(ai, classifySystem, classifyInput);
317
+ if (!rawClassification || rawClassification.trim().length === 0) {
318
+ rawClassification = null;
319
+ } else {
320
+ intent.classifierSource = 'workers-ai';
321
+ }
322
+ } catch (err) {
323
+ console.warn('[router] Workers AI classification failed, falling back to Groq:', err instanceof Error ? err.message : String(err));
324
+ }
325
+ }
326
+
327
+ if (!rawClassification) {
328
+ try {
329
+ rawClassification = await askGroq(groqApiKey, groqModel, classifySystem, classifyInput, groqBaseUrl);
330
+ intent.classifierSource = 'groq';
331
+ } catch (err) {
332
+ console.warn('[router] Groq classification failed — falling back to general_knowledge:', err instanceof Error ? err.message : String(err));
333
+ routerNearMiss = 'router_fallback:groq_error';
334
+ classification = 'general_knowledge';
335
+ }
336
+ }
337
+
338
+ if (rawClassification && !classification) {
339
+ const cleaned = rawClassification.trim();
340
+ try {
341
+ const parsed = JSON.parse(cleaned);
342
+ classification = (parsed.pattern as string ?? '').toLowerCase().replace(/[^a-z_]/g, '');
343
+ intent.complexity = parsed.complexity ?? 2;
344
+ intent.needsTools = parsed.needs_tools ?? false;
345
+ intent.confidence = parsed.confidence ?? 0.8;
346
+ } catch {
347
+ classification = cleaned.toLowerCase().replace(/[^a-z_]/g, '');
348
+ }
349
+
350
+ if (!getTaskPatterns().includes(classification)) {
351
+ console.warn(`[router] unrecognized classification "${cleaned.slice(0, 80)}" — falling back to general_knowledge`);
352
+ routerNearMiss = `router_fallback:invalid_class:${cleaned.slice(0, 50)}`;
353
+ classification = 'general_knowledge';
354
+ }
355
+ }
356
+ }
357
+
358
+ if (!classification) {
359
+ classification = 'general_knowledge';
360
+ }
361
+
362
+ intent.classified = classification;
363
+
364
+ // ─── Phase 1.5: Confidence evaluation ──────────────────────
365
+ const confidence = intent.confidence ?? 0.8;
366
+ let reclassified = false;
367
+
368
+ if (confidence < CONFIDENCE_VERIFY) {
369
+ // Escalate zone (<0.50): classification is suspect — skip procedural lookup,
370
+ // go straight to confidence-aware default routing
371
+ console.log(`[router] Low confidence ${confidence.toFixed(2)} for "${classification}" — escalating`);
372
+ const executor = selectDefaultExecutor(classification, intent);
373
+ return {
374
+ plan: {
375
+ executor,
376
+ reasoning: `Low-confidence classification (${confidence.toFixed(2)}) "${classification}" → escalated to ${executor}`,
377
+ costCeiling: executor === 'claude' || executor === 'claude_opus' ? 'expensive' : executor === 'composite' ? 'expensive' : 'cheap',
378
+ },
379
+ nearMiss: routerNearMiss,
380
+ reclassified: false,
381
+ };
382
+ }
383
+
384
+ if (confidence < CONFIDENCE_TRUST) {
385
+ // Verify zone (0.50-0.79): re-classify with Groq 70B + logprobs for a second opinion
386
+ try {
387
+ const groqResult = await askGroqWithLogprobs(
388
+ groqApiKey,
389
+ groqModel,
390
+ buildClassifySystem(),
391
+ classifyInput,
392
+ groqBaseUrl,
393
+ );
394
+
395
+ if (groqResult.tokenConfidence >= 0.75) {
396
+ // Groq is confident — adopt its classification
397
+ const newClass = groqResult.pattern;
398
+ if (getTaskPatterns().includes(newClass)) {
399
+ console.log(`[router] Reclassified "${classification}" → "${newClass}" (token confidence ${groqResult.tokenConfidence.toFixed(2)})`);
400
+ classification = newClass;
401
+ intent.classified = classification;
402
+ intent.complexity = groqResult.complexity;
403
+ intent.needsTools = groqResult.needs_tools;
404
+ intent.confidence = groqResult.selfReportedConfidence;
405
+ reclassified = true;
406
+ }
407
+ } else {
408
+ // Groq also uncertain — let low confidence flow into Phase 3
409
+ console.log(`[router] Groq also uncertain (token=${groqResult.tokenConfidence.toFixed(2)}) for "${classification}" — keeping with verify-zone routing`);
410
+ }
411
+ } catch (err) {
412
+ console.warn('[router] Groq logprobs re-classification failed:', err instanceof Error ? err.message : String(err));
413
+ // Failure to re-classify is non-fatal — continue with original classification + verify-zone routing
414
+ }
415
+ }
416
+
417
+ // ─── Phase 2: Procedural lookup ───────────────────────────
418
+ const procKey = procedureKey(classification, intent.complexity);
419
+ const procedure = await getProcedure(db, procKey);
420
+ let nearMiss: string | undefined;
421
+
422
+ if (procedure) {
423
+ if (procedure.status === 'degraded' || procedure.status === 'broken') {
424
+ const executor = DEFAULT_ROUTES[classification] ?? 'claude';
425
+ return {
426
+ plan: {
427
+ executor,
428
+ reasoning: `Procedure "${procKey}" is ${procedure.status} — replanning via ${executor}`,
429
+ costCeiling: executor === 'groq' ? 'cheap' : executor === 'direct' ? 'free' : 'expensive',
430
+ },
431
+ };
432
+ }
433
+
434
+ const total = procedure.success_count + procedure.fail_count;
435
+ const successRate = total > 0 ? procedure.success_count / total : 0;
436
+
437
+ if (procedure.success_count >= PROCEDURE_MIN_SUCCESSES && successRate >= PROCEDURE_MIN_SUCCESS_RATE) {
438
+ // TarotScript is a classifier, not a responder — override any learned
439
+ // procedure that points to tarotscript so users never see raw spread output.
440
+ let executor = procedure.executor as Executor;
441
+ if (executor === 'tarotscript') {
442
+ console.log(`[router] procedure "${procKey}" points to tarotscript — overriding to gpt_oss (tarotscript is classify-only)`);
443
+ executor = 'gpt_oss';
444
+ }
445
+
446
+ return {
447
+ plan: {
448
+ executor,
449
+ reasoning: `Matched procedure "${procKey}" (${procedure.success_count} successes, ${Math.round(procedure.avg_latency_ms)}ms avg)`,
450
+ procedureId: procedure.id,
451
+ costCeiling: intent.costCeiling,
452
+ },
453
+ };
454
+ }
455
+ } else {
456
+ nearMiss = (await findNearMiss(db, procKey)) ?? undefined;
457
+ }
458
+
459
+ // ─── Phase 3: Complexity-aware routing ───────────────────
460
+ const executor = selectDefaultExecutor(classification, intent);
461
+
462
+ return {
463
+ plan: {
464
+ executor,
465
+ reasoning: `First-time routing: "${classification}" → ${executor} (complexity=${intent.complexity ?? '?'}, tools=${intent.needsTools ?? '?'}, confidence=${confidence.toFixed(2)})`,
466
+ costCeiling: executor === 'groq' || executor === 'workers_ai' || executor === 'gpt_oss' ? 'cheap' : executor === 'direct' ? 'free' : executor === 'composite' ? 'expensive' : 'expensive',
467
+ },
468
+ nearMiss: nearMiss ?? routerNearMiss,
469
+ reclassified,
470
+ };
471
+ }