@thispointon/kondi-chat 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +556 -0
  3. package/bin/kondi-chat +56 -0
  4. package/bin/kondi-chat.js +72 -0
  5. package/package.json +55 -0
  6. package/scripts/demo.tape +49 -0
  7. package/scripts/postinstall.cjs +103 -0
  8. package/src/audit/analytics.ts +261 -0
  9. package/src/audit/ledger.ts +253 -0
  10. package/src/audit/telemetry.ts +165 -0
  11. package/src/cli/backend.ts +675 -0
  12. package/src/cli/commands.ts +419 -0
  13. package/src/cli/help.ts +182 -0
  14. package/src/cli/submit-helpers.ts +159 -0
  15. package/src/cli/submit.ts +539 -0
  16. package/src/cli/wizard.ts +121 -0
  17. package/src/context/bootstrap.ts +138 -0
  18. package/src/context/budget.ts +100 -0
  19. package/src/context/manager.ts +666 -0
  20. package/src/context/memory.ts +160 -0
  21. package/src/context/preflight.ts +176 -0
  22. package/src/context/project-brain.ts +101 -0
  23. package/src/context/receipts.ts +108 -0
  24. package/src/context/skills.ts +154 -0
  25. package/src/context/symbol-index.ts +240 -0
  26. package/src/council/profiles.ts +137 -0
  27. package/src/council/tool.ts +138 -0
  28. package/src/council-engine/cli/council-artifacts.ts +230 -0
  29. package/src/council-engine/cli/council-config.ts +178 -0
  30. package/src/council-engine/cli/council-session-export.ts +116 -0
  31. package/src/council-engine/cli/kondi.ts +98 -0
  32. package/src/council-engine/cli/llm-caller.ts +229 -0
  33. package/src/council-engine/cli/localStorage-shim.ts +119 -0
  34. package/src/council-engine/cli/node-platform.ts +68 -0
  35. package/src/council-engine/cli/run-council.ts +481 -0
  36. package/src/council-engine/cli/run-pipeline.ts +772 -0
  37. package/src/council-engine/cli/session-export.ts +153 -0
  38. package/src/council-engine/configs/councils/analysis.json +101 -0
  39. package/src/council-engine/configs/councils/code-planning.json +86 -0
  40. package/src/council-engine/configs/councils/coding.json +89 -0
  41. package/src/council-engine/configs/councils/debate.json +97 -0
  42. package/src/council-engine/configs/councils/solo-claude.json +34 -0
  43. package/src/council-engine/configs/councils/solo-gpt.json +34 -0
  44. package/src/council-engine/council/coding-orchestrator.ts +1205 -0
  45. package/src/council-engine/council/context-bootstrap.ts +147 -0
  46. package/src/council-engine/council/context-inspection.ts +42 -0
  47. package/src/council-engine/council/context-store.ts +763 -0
  48. package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
  49. package/src/council-engine/council/factory.ts +164 -0
  50. package/src/council-engine/council/index.ts +201 -0
  51. package/src/council-engine/council/ledger-store.ts +438 -0
  52. package/src/council-engine/council/prompts.ts +1689 -0
  53. package/src/council-engine/council/storage-cleanup.ts +164 -0
  54. package/src/council-engine/council/store.ts +1110 -0
  55. package/src/council-engine/council/synthesis.ts +291 -0
  56. package/src/council-engine/council/types.ts +845 -0
  57. package/src/council-engine/council/validation.ts +613 -0
  58. package/src/council-engine/pipeline/build-detect.ts +73 -0
  59. package/src/council-engine/pipeline/executor.ts +1048 -0
  60. package/src/council-engine/pipeline/index.ts +9 -0
  61. package/src/council-engine/pipeline/install-detect.ts +84 -0
  62. package/src/council-engine/pipeline/memory-store.ts +182 -0
  63. package/src/council-engine/pipeline/output-parsers.ts +146 -0
  64. package/src/council-engine/pipeline/run-output.ts +149 -0
  65. package/src/council-engine/pipeline/session-import.ts +177 -0
  66. package/src/council-engine/pipeline/store.ts +753 -0
  67. package/src/council-engine/pipeline/test-detect.ts +82 -0
  68. package/src/council-engine/pipeline/types.ts +401 -0
  69. package/src/council-engine/services/deliberationSummary.ts +114 -0
  70. package/src/council-engine/tsconfig.json +16 -0
  71. package/src/council-engine/types/mcp.ts +122 -0
  72. package/src/council-engine/utils/filterTools.ts +73 -0
  73. package/src/engine/apply.ts +238 -0
  74. package/src/engine/checkpoints.ts +237 -0
  75. package/src/engine/consultants.ts +347 -0
  76. package/src/engine/diff.ts +171 -0
  77. package/src/engine/errors.ts +102 -0
  78. package/src/engine/git-tools.ts +246 -0
  79. package/src/engine/hooks.ts +181 -0
  80. package/src/engine/loop-guard.ts +155 -0
  81. package/src/engine/permissions.ts +293 -0
  82. package/src/engine/pipeline.ts +376 -0
  83. package/src/engine/sub-agents.ts +133 -0
  84. package/src/engine/task-card.ts +185 -0
  85. package/src/engine/task-router.ts +256 -0
  86. package/src/engine/task-store.ts +86 -0
  87. package/src/engine/tools.ts +783 -0
  88. package/src/engine/verify.ts +111 -0
  89. package/src/mcp/client.ts +225 -0
  90. package/src/mcp/config.ts +120 -0
  91. package/src/mcp/tool-manager.ts +192 -0
  92. package/src/mcp/types.ts +61 -0
  93. package/src/providers/llm-caller.ts +943 -0
  94. package/src/providers/rate-limiter.ts +238 -0
  95. package/src/router/NOTES.md +28 -0
  96. package/src/router/collector.ts +474 -0
  97. package/src/router/embeddings.ts +286 -0
  98. package/src/router/index.ts +299 -0
  99. package/src/router/intent-router.ts +225 -0
  100. package/src/router/nn-router.ts +205 -0
  101. package/src/router/profiles.ts +309 -0
  102. package/src/router/registry.ts +565 -0
  103. package/src/router/rules.ts +274 -0
  104. package/src/router/train.py +408 -0
  105. package/src/session/store.ts +211 -0
  106. package/src/test-utils/mock-llm.ts +39 -0
  107. package/src/types.ts +322 -0
  108. package/src/web/manager.ts +311 -0
@@ -0,0 +1,286 @@
1
+ /**
2
+ * Embedding Module — lightweight text embeddings for content-aware routing.
3
+ *
4
+ * Embeds prompts so the router can distinguish "calculate thrust-to-weight
5
+ * ratio" from "fix the CSS layout" even when both are execute/implementation.
6
+ *
7
+ * Backends:
8
+ * - ollama: local GPU, nomic-embed-text (768D) or any Ollama embedding model
9
+ * - openai: OpenAI embeddings API (text-embedding-3-small, 1536D)
10
+ * - compatible: any OpenAI-compatible embedding endpoint
11
+ *
12
+ * Embeddings are cached to disk so we don't re-compute on restart.
13
+ */
14
+
15
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
16
+ import { join } from 'node:path';
17
+ import { createHash } from 'node:crypto';
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Configuration
21
+ // ---------------------------------------------------------------------------
22
+
23
+ export interface EmbeddingConfig {
24
+ /** Backend type */
25
+ backend: 'ollama' | 'openai' | 'compatible';
26
+ /** Model name (e.g., "nomic-embed-text", "text-embedding-3-small") */
27
+ model: string;
28
+ /** Base URL for the API */
29
+ baseUrl: string;
30
+ /** API key (not needed for Ollama) */
31
+ apiKey?: string;
32
+ /** Expected embedding dimension (for validation) */
33
+ dimension: number;
34
+ }
35
+
36
+ const DEFAULT_CONFIGS: Record<string, EmbeddingConfig> = {
37
+ ollama: {
38
+ backend: 'ollama',
39
+ model: 'nomic-embed-text',
40
+ baseUrl: 'http://localhost:11434',
41
+ dimension: 768,
42
+ },
43
+ openai: {
44
+ backend: 'openai',
45
+ model: 'text-embedding-3-small',
46
+ baseUrl: 'https://api.openai.com/v1',
47
+ apiKey: process.env.OPENAI_API_KEY,
48
+ dimension: 1536,
49
+ },
50
+ };
51
+
52
+ // ---------------------------------------------------------------------------
53
+ // Embedding Service
54
+ // ---------------------------------------------------------------------------
55
+
56
+ export class EmbeddingService {
57
+ private config: EmbeddingConfig;
58
+ private cache: Map<string, number[]> = new Map();
59
+ private cacheDir: string;
60
+ private cacheFile: string;
61
+ private backendReady = false;
62
+ private attemptedFallback = false;
63
+
64
+ constructor(storageDir: string, config?: Partial<EmbeddingConfig>) {
65
+ // Default to Ollama with nomic-embed-text
66
+ this.config = {
67
+ ...DEFAULT_CONFIGS.ollama,
68
+ ...config,
69
+ };
70
+
71
+ this.cacheDir = join(storageDir, 'embeddings');
72
+ this.cacheFile = join(this.cacheDir, 'cache.json');
73
+ mkdirSync(this.cacheDir, { recursive: true });
74
+ this.loadCache();
75
+ }
76
+
77
+ getConfig(): EmbeddingConfig {
78
+ return { ...this.config };
79
+ }
80
+
81
+ getDimension(): number {
82
+ return this.config.dimension;
83
+ }
84
+
85
+ /**
86
+ * Embed a text string. Returns the embedding vector.
87
+ * Results are cached by content hash.
88
+ */
89
+ async embed(text: string): Promise<number[]> {
90
+ await this.ensureBackend();
91
+
92
+ // Truncate very long texts — embedding models have limits
93
+ const truncated = text.slice(0, 8192);
94
+ const hash = this.hash(truncated);
95
+
96
+ // Check cache
97
+ const cached = this.cache.get(hash);
98
+ if (cached) return cached;
99
+
100
+ // Call embedding API
101
+ const embedding = await this.callApi(truncated);
102
+
103
+ // Cache and persist
104
+ this.cache.set(hash, embedding);
105
+ this.saveCache();
106
+
107
+ return embedding;
108
+ }
109
+
110
+ /**
111
+ * Embed multiple texts in a batch. More efficient than individual calls.
112
+ */
113
+ async embedBatch(texts: string[]): Promise<number[][]> {
114
+ await this.ensureBackend();
115
+
116
+ const truncated = texts.map(t => t.slice(0, 8192));
117
+ const results: number[][] = [];
118
+ const uncached: { index: number; text: string }[] = [];
119
+
120
+ // Check cache first
121
+ for (let i = 0; i < truncated.length; i++) {
122
+ const hash = this.hash(truncated[i]);
123
+ const cached = this.cache.get(hash);
124
+ if (cached) {
125
+ results[i] = cached;
126
+ } else {
127
+ uncached.push({ index: i, text: truncated[i] });
128
+ }
129
+ }
130
+
131
+ // Batch call for uncached
132
+ if (uncached.length > 0) {
133
+ const embeddings = await this.callApiBatch(uncached.map(u => u.text));
134
+ for (let i = 0; i < uncached.length; i++) {
135
+ const hash = this.hash(uncached[i].text);
136
+ this.cache.set(hash, embeddings[i]);
137
+ results[uncached[i].index] = embeddings[i];
138
+ }
139
+ this.saveCache();
140
+ }
141
+
142
+ return results;
143
+ }
144
+
145
+ /** Check if the embedding backend is reachable. */
146
+ async healthCheck(): Promise<{ ok: boolean; error?: string }> {
147
+ try {
148
+ const embedding = await this.callApi('test');
149
+ if (embedding.length !== this.config.dimension) {
150
+ return {
151
+ ok: false,
152
+ error: `Expected ${this.config.dimension}D, got ${embedding.length}D`,
153
+ };
154
+ }
155
+ return { ok: true };
156
+ } catch (error) {
157
+ return { ok: false, error: (error as Error).message };
158
+ }
159
+ }
160
+
161
+ /** Number of cached embeddings. */
162
+ cacheSize(): number {
163
+ return this.cache.size;
164
+ }
165
+
166
+ // -------------------------------------------------------------------------
167
+ // API calls
168
+ // -------------------------------------------------------------------------
169
+
170
+ private async ensureBackend(): Promise<void> {
171
+ if (this.backendReady) return;
172
+ try {
173
+ await this.callApi('health-check');
174
+ this.backendReady = true;
175
+ return;
176
+ } catch (error) {
177
+ if (this.config.backend === 'ollama' && process.env.OPENAI_API_KEY && !this.attemptedFallback) {
178
+ // Fallback to OpenAI embeddings when local Ollama is unavailable
179
+ this.attemptedFallback = true;
180
+ this.config = { ...DEFAULT_CONFIGS.openai, apiKey: process.env.OPENAI_API_KEY };
181
+ return this.ensureBackend();
182
+ }
183
+ throw error;
184
+ }
185
+ }
186
+
187
+ private async callApi(text: string): Promise<number[]> {
188
+ const results = await this.callApiBatch([text]);
189
+ return results[0];
190
+ }
191
+
192
+ private async callApiBatch(texts: string[]): Promise<number[][]> {
193
+ switch (this.config.backend) {
194
+ case 'ollama':
195
+ return this.callOllama(texts);
196
+ case 'openai':
197
+ case 'compatible':
198
+ return this.callOpenAIEmbeddings(texts);
199
+ default:
200
+ throw new Error(`Unknown embedding backend: ${this.config.backend}`);
201
+ }
202
+ }
203
+
204
+ private async callOllama(texts: string[]): Promise<number[][]> {
205
+ // Ollama doesn't support batch — call individually
206
+ const results: number[][] = [];
207
+ for (const text of texts) {
208
+ const resp = await fetch(`${this.config.baseUrl}/api/embed`, {
209
+ method: 'POST',
210
+ headers: { 'Content-Type': 'application/json' },
211
+ body: JSON.stringify({ model: this.config.model, input: text }),
212
+ });
213
+
214
+ if (!resp.ok) {
215
+ const body = await resp.text();
216
+ throw new Error(`Ollama embedding failed (${resp.status}): ${body.slice(0, 200)}`);
217
+ }
218
+
219
+ const data: any = await resp.json();
220
+ const embedding = data.embeddings?.[0];
221
+ if (!embedding || !Array.isArray(embedding)) {
222
+ throw new Error('Ollama returned invalid embedding format');
223
+ }
224
+ results.push(embedding);
225
+ }
226
+ return results;
227
+ }
228
+
229
+ private async callOpenAIEmbeddings(texts: string[]): Promise<number[][]> {
230
+ const headers: Record<string, string> = {
231
+ 'Content-Type': 'application/json',
232
+ };
233
+ if (this.config.apiKey) {
234
+ headers['Authorization'] = `Bearer ${this.config.apiKey}`;
235
+ }
236
+
237
+ const resp = await fetch(`${this.config.baseUrl}/embeddings`, {
238
+ method: 'POST',
239
+ headers,
240
+ body: JSON.stringify({
241
+ model: this.config.model,
242
+ input: texts,
243
+ }),
244
+ });
245
+
246
+ if (!resp.ok) {
247
+ const body = await resp.text();
248
+ throw new Error(`Embedding API failed (${resp.status}): ${body.slice(0, 200)}`);
249
+ }
250
+
251
+ const data: any = await resp.json();
252
+ const embeddings = data.data
253
+ ?.sort((a: any, b: any) => a.index - b.index)
254
+ .map((d: any) => d.embedding);
255
+
256
+ if (!embeddings || embeddings.length !== texts.length) {
257
+ throw new Error('Embedding API returned wrong number of results');
258
+ }
259
+
260
+ return embeddings;
261
+ }
262
+
263
+ // -------------------------------------------------------------------------
264
+ // Cache
265
+ // -------------------------------------------------------------------------
266
+
267
+ private hash(text: string): string {
268
+ return createHash('sha256').update(text).digest('hex').slice(0, 16);
269
+ }
270
+
271
+ private loadCache(): void {
272
+ if (!existsSync(this.cacheFile)) return;
273
+ try {
274
+ const raw = readFileSync(this.cacheFile, 'utf-8');
275
+ const entries: [string, number[]][] = JSON.parse(raw);
276
+ this.cache = new Map(entries);
277
+ } catch {
278
+ this.cache = new Map();
279
+ }
280
+ }
281
+
282
+ private saveCache(): void {
283
+ const entries = Array.from(this.cache.entries());
284
+ writeFileSync(this.cacheFile, JSON.stringify(entries));
285
+ }
286
+ }
@@ -0,0 +1,299 @@
1
+ /**
2
+ * Unified Router — chains NN → Intent → Rules for model selection.
3
+ *
4
+ * The Intent Router is the primary and most capable strategy — it reads
5
+ * every model's description and capabilities from the registry and asks
6
+ * an LLM which one best fits the task. It handles any model, any capability.
7
+ *
8
+ * The NN Router is a fast approximation of Intent — when trained on enough
9
+ * data, it can predict the Intent Router's choice without an LLM call.
10
+ * It's used when available for speed (no API latency).
11
+ *
12
+ * The Rule Router is the minimal fallback — phase/task-kind heuristics
13
+ * that always produce a result but don't consider model descriptions.
14
+ *
15
+ * Priority: NN (if trained & confident) → Intent (primary) → Rules (fallback)
16
+ */
17
+
18
+ import type { LedgerPhase, TaskKind, ProviderId } from '../types.ts';
19
+ import type { ModelEntry } from './registry.ts';
20
+ import { ModelRegistry } from './registry.ts';
21
+ import { RuleRouter, type RouteDecision } from './rules.ts';
22
+ import { NNRouter } from './nn-router.ts';
23
+ import { IntentRouter, type IntentRouterConfig } from './intent-router.ts';
24
+ import { EmbeddingService, type EmbeddingConfig } from './embeddings.ts';
25
+ import { RoutingCollector } from './collector.ts';
26
+
27
+ // ---------------------------------------------------------------------------
28
+ // Unified route decision
29
+ // ---------------------------------------------------------------------------
30
+
31
+ export interface UnifiedRouteDecision {
32
+ model: ModelEntry;
33
+ reason: string;
34
+ tier: 'nn' | 'intent' | 'rules';
35
+ promoted: boolean;
36
+ confidence?: number;
37
+ }
38
+
39
+ /**
40
+ * Context about what happened in prior phases of the current pipeline.
41
+ * Fed to the intent router so the LLM classifier can make informed
42
+ * decisions — "Gemini just wrote the code, tests passed, pick a
43
+ * reviewer" instead of blindly seeing the original prompt again.
44
+ */
45
+ export interface PhaseContext {
46
+ priorPhases?: Array<{
47
+ phase: string;
48
+ model: string;
49
+ summary?: string;
50
+ succeeded?: boolean;
51
+ }>;
52
+ currentGoal?: string;
53
+ }
54
+
55
+ // ---------------------------------------------------------------------------
56
+ // Unified Router
57
+ // ---------------------------------------------------------------------------
58
+
59
+ export class Router {
60
+ readonly registry: ModelRegistry;
61
+ readonly rules: RuleRouter;
62
+ readonly nn: NNRouter;
63
+ readonly intent: IntentRouter;
64
+ readonly embeddings: EmbeddingService;
65
+ readonly collector: RoutingCollector;
66
+
67
+ private useIntent: boolean;
68
+ /**
69
+ * Active profile scope + classifier overrides, applied to every
70
+ * `select()` call. `setProfileScope` is called from backend.ts whenever
71
+ * the active budget profile changes so the intent router (and its
72
+ * classifier LLM) stay inside the profile's allowedProviders.
73
+ */
74
+ private profileScope: {
75
+ allowedProviders?: ProviderId[];
76
+ classifier?: { provider: ProviderId; model: string };
77
+ rolePinning?: Record<string, string>;
78
+ } = {};
79
+
80
+ constructor(
81
+ storageDir: string,
82
+ options?: {
83
+ embeddingConfig?: Partial<EmbeddingConfig>;
84
+ intentConfig?: Partial<IntentRouterConfig>;
85
+ useIntent?: boolean;
86
+ nnConfidenceThreshold?: number;
87
+ },
88
+ ) {
89
+ this.registry = new ModelRegistry(storageDir);
90
+ this.rules = new RuleRouter(this.registry);
91
+ this.embeddings = new EmbeddingService(storageDir, options?.embeddingConfig);
92
+ this.nn = new NNRouter(storageDir, options?.nnConfidenceThreshold);
93
+ this.intent = new IntentRouter(options?.intentConfig);
94
+ this.collector = new RoutingCollector(storageDir, this.embeddings);
95
+ this.useIntent = options?.useIntent ?? true;
96
+ }
97
+
98
+ /**
99
+ * Update the profile-scoped behavior for intent routing:
100
+ * - allowedProviders: filters candidate models
101
+ * - classifier: overrides the classifier LLM (e.g. zai uses glm-4.5-flash)
102
+ * Called from backend.ts whenever the active profile changes.
103
+ */
104
+ setProfileScope(scope: {
105
+ classifier?: { provider: ProviderId; model: string };
106
+ rolePinning?: Record<string, string>;
107
+ /** Explicit provider allow-list from the profile. Takes precedence
108
+ * over the auto-derived one when set. */
109
+ allowedProviders?: ProviderId[];
110
+ }): void {
111
+ // Prefer the profile's explicit allowedProviders. Otherwise derive
112
+ // it from rolePinning so a profile with only pins still gets scoped
113
+ // routing for free.
114
+ let allowedProviders: ProviderId[] | undefined = scope.allowedProviders;
115
+ if (!allowedProviders && scope.rolePinning) {
116
+ const providers = new Set<ProviderId>();
117
+ for (const modelId of Object.values(scope.rolePinning)) {
118
+ const m = this.registry.getById(modelId);
119
+ if (m) providers.add(m.provider);
120
+ }
121
+ if (providers.size > 0) {
122
+ allowedProviders = [...providers];
123
+ }
124
+ }
125
+ this.profileScope = { ...scope, allowedProviders };
126
+ }
127
+
128
+ /** Get the profile-scoped classifier model (for task-router, compactor, etc.) */
129
+ getClassifier(): { provider: ProviderId; model: string } | undefined {
130
+ return this.profileScope.classifier;
131
+ }
132
+
133
+ /**
134
+ * Select the best model. Tries NN → Intent → Pin fallback → Rules.
135
+ *
136
+ * The intent router gets rich phase context (what models handled prior
137
+ * phases, what succeeded/failed) so it can make informed per-phase
138
+ * decisions. Profile pins (`rolePinning`) now serve as the fallback,
139
+ * not the first check — the router gets a real shot at intelligent
140
+ * selection before the hard override kicks in.
141
+ */
142
+ async select(
143
+ phase: LedgerPhase,
144
+ promptText: string,
145
+ taskKind?: TaskKind,
146
+ failures?: number,
147
+ promotionThreshold?: number,
148
+ phaseContext?: PhaseContext,
149
+ ): Promise<UnifiedRouteDecision> {
150
+ // Fast path: if the profile has a direct pin for this phase, use it
151
+ // immediately. No LLM call, no embedding, no latency. This is the
152
+ // common case — most profiles pin every phase, so the intent router
153
+ // is unnecessary overhead. The intent router only adds value when
154
+ // multiple models could serve a phase and the choice is non-obvious.
155
+ const directPin = this.profileScope.rolePinning?.[phase];
156
+ if (directPin) {
157
+ const pinned = this.registry.getById(directPin);
158
+ if (pinned && pinned.enabled) {
159
+ return {
160
+ model: pinned,
161
+ reason: `pin: ${pinned.alias || pinned.id}`,
162
+ tier: 'rules',
163
+ promoted: false,
164
+ };
165
+ }
166
+ }
167
+
168
+ // 1. Try NN router (fast, no LLM call).
169
+ try {
170
+ if (this.nn.isAvailable()) {
171
+ let embedding: number[] | undefined;
172
+ try { embedding = await this.embeddings.embed(promptText.slice(0, 2048)); } catch { /* skip */ }
173
+
174
+ const nnResult = this.nn.predict(
175
+ phase, taskKind,
176
+ promptText.length, 0, failures || 0,
177
+ this.registry, embedding,
178
+ );
179
+
180
+ if (nnResult) {
181
+ return {
182
+ model: nnResult.model,
183
+ reason: `nn (${(nnResult.confidence * 100).toFixed(0)}% confidence)`,
184
+ tier: 'nn',
185
+ promoted: false,
186
+ confidence: nnResult.confidence,
187
+ };
188
+ }
189
+ }
190
+ } catch (e) {
191
+ process.stderr.write(`[router] NN tier failed: ${(e as Error).message}\n`);
192
+ }
193
+
194
+ // 2. Intent router with enriched phase context. Only reached when
195
+ // no direct pin exists for this phase — i.e. the profile leaves
196
+ // this phase unspecified and wants intelligent model selection.
197
+ const pinnedModelIds = this.profileScope.rolePinning
198
+ ? [...new Set(Object.values(this.profileScope.rolePinning))]
199
+ : undefined;
200
+
201
+ try {
202
+ if (this.useIntent) {
203
+ const intentResult = await this.intent.classify(
204
+ promptText, phase, taskKind, this.registry,
205
+ {
206
+ allowedProviders: this.profileScope.allowedProviders,
207
+ allowedModelIds: pinnedModelIds,
208
+ classifier: this.profileScope.classifier,
209
+ phaseContext,
210
+ phasePreference: this.profileScope.rolePinning?.[phase],
211
+ },
212
+ );
213
+
214
+ if (intentResult) {
215
+ return {
216
+ model: intentResult.model,
217
+ reason: `intent: ${intentResult.intent}`,
218
+ tier: 'intent',
219
+ promoted: false,
220
+ };
221
+ }
222
+ }
223
+ } catch (e) {
224
+ process.stderr.write(`[router] Intent tier failed: ${(e as Error).message}\n`);
225
+ }
226
+
227
+ // 3. Profile pin fallback (already checked above for direct phase pin,
228
+ // but the pin might have been for a model that wasn't enabled).
229
+ const pinnedId = this.profileScope.rolePinning?.[phase];
230
+ if (pinnedId) {
231
+ const pinned = this.registry.getById(pinnedId);
232
+ if (pinned && pinned.enabled) {
233
+ return {
234
+ model: pinned,
235
+ reason: `pin: ${pinned.alias || pinned.id} (intent failed, using profile default)`,
236
+ tier: 'rules',
237
+ promoted: false,
238
+ };
239
+ }
240
+ // Pin exists but model not found — this is a config error.
241
+ // Log clearly so the user knows why their profile isn't working.
242
+ process.stderr.write(
243
+ `[router] ⚠ profile pin "${pinnedId}" for phase "${phase}" not found in registry. ` +
244
+ `Check that the model ID matches an entry in models.yml. Falling back to rules.\n`
245
+ );
246
+ }
247
+
248
+ // 4. Rule-based fallback — last resort. If we got here, both the
249
+ // intent router AND the profile pin failed. Make the reason clear.
250
+ const ruleResult = this.rules.select(phase, taskKind, failures, promotionThreshold);
251
+ const fallbackReason = pinnedId
252
+ ? `⚠ "${pinnedId}" not available → ${ruleResult.model.alias || ruleResult.model.id} (fallback)`
253
+ : ruleResult.reason;
254
+ return {
255
+ model: ruleResult.model,
256
+ reason: ruleResult.promoted
257
+ ? `⚠ promoted after ${failures} failures → ${ruleResult.model.alias || ruleResult.model.id}`
258
+ : fallbackReason,
259
+ tier: 'rules',
260
+ promoted: ruleResult.promoted,
261
+ };
262
+ }
263
+
264
+ /**
265
+ * Synchronous select — rules only. Use when you can't await.
266
+ */
267
+ selectSync(
268
+ phase: LedgerPhase,
269
+ taskKind?: TaskKind,
270
+ failures?: number,
271
+ promotionThreshold?: number,
272
+ ): UnifiedRouteDecision {
273
+ const ruleResult = this.rules.select(phase, taskKind, failures, promotionThreshold);
274
+ return {
275
+ model: ruleResult.model,
276
+ reason: ruleResult.reason,
277
+ tier: 'rules',
278
+ promoted: ruleResult.promoted,
279
+ };
280
+ }
281
+
282
+ /** Status summary for display. */
283
+ status(): string {
284
+ const lines: string[] = [];
285
+ lines.push(`NN Router: ${this.nn.isAvailable() ? 'trained and active' : 'not trained (collecting data)'}`);
286
+ lines.push(`Intent Router: ${this.useIntent ? 'enabled' : 'disabled'}`);
287
+ lines.push(`Rule Router: active (fallback)`);
288
+ lines.push(`Embeddings: ${this.embeddings.getConfig().backend}/${this.embeddings.getConfig().model} (${this.embeddings.getConfig().dimension}D, ${this.embeddings.cacheSize()} cached)`);
289
+ return lines.join('\n');
290
+ }
291
+ }
292
+
293
+ // Re-export for convenience
294
+ export { ModelRegistry, type ModelEntry } from './registry.ts';
295
+ export { RuleRouter, type RouteDecision } from './rules.ts';
296
+ export { NNRouter } from './nn-router.ts';
297
+ export { IntentRouter } from './intent-router.ts';
298
+ export { EmbeddingService, type EmbeddingConfig } from './embeddings.ts';
299
+ export { RoutingCollector, type RoutingSample } from './collector.ts';