@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Embedding Module — lightweight text embeddings for content-aware routing.
|
|
3
|
+
*
|
|
4
|
+
* Embeds prompts so the router can distinguish "calculate thrust-to-weight
|
|
5
|
+
* ratio" from "fix the CSS layout" even when both are execute/implementation.
|
|
6
|
+
*
|
|
7
|
+
* Backends:
|
|
8
|
+
* - ollama: local GPU, nomic-embed-text (768D) or any Ollama embedding model
|
|
9
|
+
* - openai: OpenAI embeddings API (text-embedding-3-small, 1536D)
|
|
10
|
+
* - compatible: any OpenAI-compatible embedding endpoint
|
|
11
|
+
*
|
|
12
|
+
* Embeddings are cached to disk so we don't re-compute on restart.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync } from 'node:fs';
|
|
16
|
+
import { join } from 'node:path';
|
|
17
|
+
import { createHash } from 'node:crypto';
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Configuration
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
export interface EmbeddingConfig {
|
|
24
|
+
/** Backend type */
|
|
25
|
+
backend: 'ollama' | 'openai' | 'compatible';
|
|
26
|
+
/** Model name (e.g., "nomic-embed-text", "text-embedding-3-small") */
|
|
27
|
+
model: string;
|
|
28
|
+
/** Base URL for the API */
|
|
29
|
+
baseUrl: string;
|
|
30
|
+
/** API key (not needed for Ollama) */
|
|
31
|
+
apiKey?: string;
|
|
32
|
+
/** Expected embedding dimension (for validation) */
|
|
33
|
+
dimension: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const DEFAULT_CONFIGS: Record<string, EmbeddingConfig> = {
|
|
37
|
+
ollama: {
|
|
38
|
+
backend: 'ollama',
|
|
39
|
+
model: 'nomic-embed-text',
|
|
40
|
+
baseUrl: 'http://localhost:11434',
|
|
41
|
+
dimension: 768,
|
|
42
|
+
},
|
|
43
|
+
openai: {
|
|
44
|
+
backend: 'openai',
|
|
45
|
+
model: 'text-embedding-3-small',
|
|
46
|
+
baseUrl: 'https://api.openai.com/v1',
|
|
47
|
+
apiKey: process.env.OPENAI_API_KEY,
|
|
48
|
+
dimension: 1536,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
// Embedding Service
|
|
54
|
+
// ---------------------------------------------------------------------------
|
|
55
|
+
|
|
56
|
+
export class EmbeddingService {
|
|
57
|
+
private config: EmbeddingConfig;
|
|
58
|
+
private cache: Map<string, number[]> = new Map();
|
|
59
|
+
private cacheDir: string;
|
|
60
|
+
private cacheFile: string;
|
|
61
|
+
private backendReady = false;
|
|
62
|
+
private attemptedFallback = false;
|
|
63
|
+
|
|
64
|
+
constructor(storageDir: string, config?: Partial<EmbeddingConfig>) {
|
|
65
|
+
// Default to Ollama with nomic-embed-text
|
|
66
|
+
this.config = {
|
|
67
|
+
...DEFAULT_CONFIGS.ollama,
|
|
68
|
+
...config,
|
|
69
|
+
};
|
|
70
|
+
|
|
71
|
+
this.cacheDir = join(storageDir, 'embeddings');
|
|
72
|
+
this.cacheFile = join(this.cacheDir, 'cache.json');
|
|
73
|
+
mkdirSync(this.cacheDir, { recursive: true });
|
|
74
|
+
this.loadCache();
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
getConfig(): EmbeddingConfig {
|
|
78
|
+
return { ...this.config };
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
getDimension(): number {
|
|
82
|
+
return this.config.dimension;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/**
|
|
86
|
+
* Embed a text string. Returns the embedding vector.
|
|
87
|
+
* Results are cached by content hash.
|
|
88
|
+
*/
|
|
89
|
+
async embed(text: string): Promise<number[]> {
|
|
90
|
+
await this.ensureBackend();
|
|
91
|
+
|
|
92
|
+
// Truncate very long texts — embedding models have limits
|
|
93
|
+
const truncated = text.slice(0, 8192);
|
|
94
|
+
const hash = this.hash(truncated);
|
|
95
|
+
|
|
96
|
+
// Check cache
|
|
97
|
+
const cached = this.cache.get(hash);
|
|
98
|
+
if (cached) return cached;
|
|
99
|
+
|
|
100
|
+
// Call embedding API
|
|
101
|
+
const embedding = await this.callApi(truncated);
|
|
102
|
+
|
|
103
|
+
// Cache and persist
|
|
104
|
+
this.cache.set(hash, embedding);
|
|
105
|
+
this.saveCache();
|
|
106
|
+
|
|
107
|
+
return embedding;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Embed multiple texts in a batch. More efficient than individual calls.
|
|
112
|
+
*/
|
|
113
|
+
async embedBatch(texts: string[]): Promise<number[][]> {
|
|
114
|
+
await this.ensureBackend();
|
|
115
|
+
|
|
116
|
+
const truncated = texts.map(t => t.slice(0, 8192));
|
|
117
|
+
const results: number[][] = [];
|
|
118
|
+
const uncached: { index: number; text: string }[] = [];
|
|
119
|
+
|
|
120
|
+
// Check cache first
|
|
121
|
+
for (let i = 0; i < truncated.length; i++) {
|
|
122
|
+
const hash = this.hash(truncated[i]);
|
|
123
|
+
const cached = this.cache.get(hash);
|
|
124
|
+
if (cached) {
|
|
125
|
+
results[i] = cached;
|
|
126
|
+
} else {
|
|
127
|
+
uncached.push({ index: i, text: truncated[i] });
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// Batch call for uncached
|
|
132
|
+
if (uncached.length > 0) {
|
|
133
|
+
const embeddings = await this.callApiBatch(uncached.map(u => u.text));
|
|
134
|
+
for (let i = 0; i < uncached.length; i++) {
|
|
135
|
+
const hash = this.hash(uncached[i].text);
|
|
136
|
+
this.cache.set(hash, embeddings[i]);
|
|
137
|
+
results[uncached[i].index] = embeddings[i];
|
|
138
|
+
}
|
|
139
|
+
this.saveCache();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return results;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/** Check if the embedding backend is reachable. */
|
|
146
|
+
async healthCheck(): Promise<{ ok: boolean; error?: string }> {
|
|
147
|
+
try {
|
|
148
|
+
const embedding = await this.callApi('test');
|
|
149
|
+
if (embedding.length !== this.config.dimension) {
|
|
150
|
+
return {
|
|
151
|
+
ok: false,
|
|
152
|
+
error: `Expected ${this.config.dimension}D, got ${embedding.length}D`,
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
return { ok: true };
|
|
156
|
+
} catch (error) {
|
|
157
|
+
return { ok: false, error: (error as Error).message };
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Number of cached embeddings. */
|
|
162
|
+
cacheSize(): number {
|
|
163
|
+
return this.cache.size;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
// -------------------------------------------------------------------------
|
|
167
|
+
// API calls
|
|
168
|
+
// -------------------------------------------------------------------------
|
|
169
|
+
|
|
170
|
+
private async ensureBackend(): Promise<void> {
|
|
171
|
+
if (this.backendReady) return;
|
|
172
|
+
try {
|
|
173
|
+
await this.callApi('health-check');
|
|
174
|
+
this.backendReady = true;
|
|
175
|
+
return;
|
|
176
|
+
} catch (error) {
|
|
177
|
+
if (this.config.backend === 'ollama' && process.env.OPENAI_API_KEY && !this.attemptedFallback) {
|
|
178
|
+
// Fallback to OpenAI embeddings when local Ollama is unavailable
|
|
179
|
+
this.attemptedFallback = true;
|
|
180
|
+
this.config = { ...DEFAULT_CONFIGS.openai, apiKey: process.env.OPENAI_API_KEY };
|
|
181
|
+
return this.ensureBackend();
|
|
182
|
+
}
|
|
183
|
+
throw error;
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
private async callApi(text: string): Promise<number[]> {
|
|
188
|
+
const results = await this.callApiBatch([text]);
|
|
189
|
+
return results[0];
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
private async callApiBatch(texts: string[]): Promise<number[][]> {
|
|
193
|
+
switch (this.config.backend) {
|
|
194
|
+
case 'ollama':
|
|
195
|
+
return this.callOllama(texts);
|
|
196
|
+
case 'openai':
|
|
197
|
+
case 'compatible':
|
|
198
|
+
return this.callOpenAIEmbeddings(texts);
|
|
199
|
+
default:
|
|
200
|
+
throw new Error(`Unknown embedding backend: ${this.config.backend}`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
private async callOllama(texts: string[]): Promise<number[][]> {
|
|
205
|
+
// Ollama doesn't support batch — call individually
|
|
206
|
+
const results: number[][] = [];
|
|
207
|
+
for (const text of texts) {
|
|
208
|
+
const resp = await fetch(`${this.config.baseUrl}/api/embed`, {
|
|
209
|
+
method: 'POST',
|
|
210
|
+
headers: { 'Content-Type': 'application/json' },
|
|
211
|
+
body: JSON.stringify({ model: this.config.model, input: text }),
|
|
212
|
+
});
|
|
213
|
+
|
|
214
|
+
if (!resp.ok) {
|
|
215
|
+
const body = await resp.text();
|
|
216
|
+
throw new Error(`Ollama embedding failed (${resp.status}): ${body.slice(0, 200)}`);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const data: any = await resp.json();
|
|
220
|
+
const embedding = data.embeddings?.[0];
|
|
221
|
+
if (!embedding || !Array.isArray(embedding)) {
|
|
222
|
+
throw new Error('Ollama returned invalid embedding format');
|
|
223
|
+
}
|
|
224
|
+
results.push(embedding);
|
|
225
|
+
}
|
|
226
|
+
return results;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
private async callOpenAIEmbeddings(texts: string[]): Promise<number[][]> {
|
|
230
|
+
const headers: Record<string, string> = {
|
|
231
|
+
'Content-Type': 'application/json',
|
|
232
|
+
};
|
|
233
|
+
if (this.config.apiKey) {
|
|
234
|
+
headers['Authorization'] = `Bearer ${this.config.apiKey}`;
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
const resp = await fetch(`${this.config.baseUrl}/embeddings`, {
|
|
238
|
+
method: 'POST',
|
|
239
|
+
headers,
|
|
240
|
+
body: JSON.stringify({
|
|
241
|
+
model: this.config.model,
|
|
242
|
+
input: texts,
|
|
243
|
+
}),
|
|
244
|
+
});
|
|
245
|
+
|
|
246
|
+
if (!resp.ok) {
|
|
247
|
+
const body = await resp.text();
|
|
248
|
+
throw new Error(`Embedding API failed (${resp.status}): ${body.slice(0, 200)}`);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const data: any = await resp.json();
|
|
252
|
+
const embeddings = data.data
|
|
253
|
+
?.sort((a: any, b: any) => a.index - b.index)
|
|
254
|
+
.map((d: any) => d.embedding);
|
|
255
|
+
|
|
256
|
+
if (!embeddings || embeddings.length !== texts.length) {
|
|
257
|
+
throw new Error('Embedding API returned wrong number of results');
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return embeddings;
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// -------------------------------------------------------------------------
|
|
264
|
+
// Cache
|
|
265
|
+
// -------------------------------------------------------------------------
|
|
266
|
+
|
|
267
|
+
private hash(text: string): string {
|
|
268
|
+
return createHash('sha256').update(text).digest('hex').slice(0, 16);
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
private loadCache(): void {
|
|
272
|
+
if (!existsSync(this.cacheFile)) return;
|
|
273
|
+
try {
|
|
274
|
+
const raw = readFileSync(this.cacheFile, 'utf-8');
|
|
275
|
+
const entries: [string, number[]][] = JSON.parse(raw);
|
|
276
|
+
this.cache = new Map(entries);
|
|
277
|
+
} catch {
|
|
278
|
+
this.cache = new Map();
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
private saveCache(): void {
|
|
283
|
+
const entries = Array.from(this.cache.entries());
|
|
284
|
+
writeFileSync(this.cacheFile, JSON.stringify(entries));
|
|
285
|
+
}
|
|
286
|
+
}
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified Router — chains NN → Intent → Rules for model selection.
|
|
3
|
+
*
|
|
4
|
+
* The Intent Router is the primary and most capable strategy — it reads
|
|
5
|
+
* every model's description and capabilities from the registry and asks
|
|
6
|
+
* an LLM which one best fits the task. It handles any model, any capability.
|
|
7
|
+
*
|
|
8
|
+
* The NN Router is a fast approximation of Intent — when trained on enough
|
|
9
|
+
* data, it can predict the Intent Router's choice without an LLM call.
|
|
10
|
+
* It's used when available for speed (no API latency).
|
|
11
|
+
*
|
|
12
|
+
* The Rule Router is the minimal fallback — phase/task-kind heuristics
|
|
13
|
+
* that always produce a result but don't consider model descriptions.
|
|
14
|
+
*
|
|
15
|
+
* Priority: NN (if trained & confident) → Intent (primary) → Rules (fallback)
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import type { LedgerPhase, TaskKind, ProviderId } from '../types.ts';
|
|
19
|
+
import type { ModelEntry } from './registry.ts';
|
|
20
|
+
import { ModelRegistry } from './registry.ts';
|
|
21
|
+
import { RuleRouter, type RouteDecision } from './rules.ts';
|
|
22
|
+
import { NNRouter } from './nn-router.ts';
|
|
23
|
+
import { IntentRouter, type IntentRouterConfig } from './intent-router.ts';
|
|
24
|
+
import { EmbeddingService, type EmbeddingConfig } from './embeddings.ts';
|
|
25
|
+
import { RoutingCollector } from './collector.ts';
|
|
26
|
+
|
|
27
|
+
// ---------------------------------------------------------------------------
|
|
28
|
+
// Unified route decision
|
|
29
|
+
// ---------------------------------------------------------------------------
|
|
30
|
+
|
|
31
|
+
export interface UnifiedRouteDecision {
|
|
32
|
+
model: ModelEntry;
|
|
33
|
+
reason: string;
|
|
34
|
+
tier: 'nn' | 'intent' | 'rules';
|
|
35
|
+
promoted: boolean;
|
|
36
|
+
confidence?: number;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Context about what happened in prior phases of the current pipeline.
|
|
41
|
+
* Fed to the intent router so the LLM classifier can make informed
|
|
42
|
+
* decisions — "Gemini just wrote the code, tests passed, pick a
|
|
43
|
+
* reviewer" instead of blindly seeing the original prompt again.
|
|
44
|
+
*/
|
|
45
|
+
export interface PhaseContext {
|
|
46
|
+
priorPhases?: Array<{
|
|
47
|
+
phase: string;
|
|
48
|
+
model: string;
|
|
49
|
+
summary?: string;
|
|
50
|
+
succeeded?: boolean;
|
|
51
|
+
}>;
|
|
52
|
+
currentGoal?: string;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
// Unified Router
|
|
57
|
+
// ---------------------------------------------------------------------------
|
|
58
|
+
|
|
59
|
+
export class Router {
|
|
60
|
+
readonly registry: ModelRegistry;
|
|
61
|
+
readonly rules: RuleRouter;
|
|
62
|
+
readonly nn: NNRouter;
|
|
63
|
+
readonly intent: IntentRouter;
|
|
64
|
+
readonly embeddings: EmbeddingService;
|
|
65
|
+
readonly collector: RoutingCollector;
|
|
66
|
+
|
|
67
|
+
private useIntent: boolean;
|
|
68
|
+
/**
|
|
69
|
+
* Active profile scope + classifier overrides, applied to every
|
|
70
|
+
* `select()` call. `setProfileScope` is called from backend.ts whenever
|
|
71
|
+
* the active budget profile changes so the intent router (and its
|
|
72
|
+
* classifier LLM) stay inside the profile's allowedProviders.
|
|
73
|
+
*/
|
|
74
|
+
private profileScope: {
|
|
75
|
+
allowedProviders?: ProviderId[];
|
|
76
|
+
classifier?: { provider: ProviderId; model: string };
|
|
77
|
+
rolePinning?: Record<string, string>;
|
|
78
|
+
} = {};
|
|
79
|
+
|
|
80
|
+
constructor(
|
|
81
|
+
storageDir: string,
|
|
82
|
+
options?: {
|
|
83
|
+
embeddingConfig?: Partial<EmbeddingConfig>;
|
|
84
|
+
intentConfig?: Partial<IntentRouterConfig>;
|
|
85
|
+
useIntent?: boolean;
|
|
86
|
+
nnConfidenceThreshold?: number;
|
|
87
|
+
},
|
|
88
|
+
) {
|
|
89
|
+
this.registry = new ModelRegistry(storageDir);
|
|
90
|
+
this.rules = new RuleRouter(this.registry);
|
|
91
|
+
this.embeddings = new EmbeddingService(storageDir, options?.embeddingConfig);
|
|
92
|
+
this.nn = new NNRouter(storageDir, options?.nnConfidenceThreshold);
|
|
93
|
+
this.intent = new IntentRouter(options?.intentConfig);
|
|
94
|
+
this.collector = new RoutingCollector(storageDir, this.embeddings);
|
|
95
|
+
this.useIntent = options?.useIntent ?? true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Update the profile-scoped behavior for intent routing:
|
|
100
|
+
* - allowedProviders: filters candidate models
|
|
101
|
+
* - classifier: overrides the classifier LLM (e.g. zai uses glm-4.5-flash)
|
|
102
|
+
* Called from backend.ts whenever the active profile changes.
|
|
103
|
+
*/
|
|
104
|
+
setProfileScope(scope: {
|
|
105
|
+
classifier?: { provider: ProviderId; model: string };
|
|
106
|
+
rolePinning?: Record<string, string>;
|
|
107
|
+
/** Explicit provider allow-list from the profile. Takes precedence
|
|
108
|
+
* over the auto-derived one when set. */
|
|
109
|
+
allowedProviders?: ProviderId[];
|
|
110
|
+
}): void {
|
|
111
|
+
// Prefer the profile's explicit allowedProviders. Otherwise derive
|
|
112
|
+
// it from rolePinning so a profile with only pins still gets scoped
|
|
113
|
+
// routing for free.
|
|
114
|
+
let allowedProviders: ProviderId[] | undefined = scope.allowedProviders;
|
|
115
|
+
if (!allowedProviders && scope.rolePinning) {
|
|
116
|
+
const providers = new Set<ProviderId>();
|
|
117
|
+
for (const modelId of Object.values(scope.rolePinning)) {
|
|
118
|
+
const m = this.registry.getById(modelId);
|
|
119
|
+
if (m) providers.add(m.provider);
|
|
120
|
+
}
|
|
121
|
+
if (providers.size > 0) {
|
|
122
|
+
allowedProviders = [...providers];
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
this.profileScope = { ...scope, allowedProviders };
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
/** Get the profile-scoped classifier model (for task-router, compactor, etc.) */
|
|
129
|
+
getClassifier(): { provider: ProviderId; model: string } | undefined {
|
|
130
|
+
return this.profileScope.classifier;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
/**
|
|
134
|
+
* Select the best model. Tries NN → Intent → Pin fallback → Rules.
|
|
135
|
+
*
|
|
136
|
+
* The intent router gets rich phase context (what models handled prior
|
|
137
|
+
* phases, what succeeded/failed) so it can make informed per-phase
|
|
138
|
+
* decisions. Profile pins (`rolePinning`) now serve as the fallback,
|
|
139
|
+
* not the first check — the router gets a real shot at intelligent
|
|
140
|
+
* selection before the hard override kicks in.
|
|
141
|
+
*/
|
|
142
|
+
async select(
|
|
143
|
+
phase: LedgerPhase,
|
|
144
|
+
promptText: string,
|
|
145
|
+
taskKind?: TaskKind,
|
|
146
|
+
failures?: number,
|
|
147
|
+
promotionThreshold?: number,
|
|
148
|
+
phaseContext?: PhaseContext,
|
|
149
|
+
): Promise<UnifiedRouteDecision> {
|
|
150
|
+
// Fast path: if the profile has a direct pin for this phase, use it
|
|
151
|
+
// immediately. No LLM call, no embedding, no latency. This is the
|
|
152
|
+
// common case — most profiles pin every phase, so the intent router
|
|
153
|
+
// is unnecessary overhead. The intent router only adds value when
|
|
154
|
+
// multiple models could serve a phase and the choice is non-obvious.
|
|
155
|
+
const directPin = this.profileScope.rolePinning?.[phase];
|
|
156
|
+
if (directPin) {
|
|
157
|
+
const pinned = this.registry.getById(directPin);
|
|
158
|
+
if (pinned && pinned.enabled) {
|
|
159
|
+
return {
|
|
160
|
+
model: pinned,
|
|
161
|
+
reason: `pin: ${pinned.alias || pinned.id}`,
|
|
162
|
+
tier: 'rules',
|
|
163
|
+
promoted: false,
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// 1. Try NN router (fast, no LLM call).
|
|
169
|
+
try {
|
|
170
|
+
if (this.nn.isAvailable()) {
|
|
171
|
+
let embedding: number[] | undefined;
|
|
172
|
+
try { embedding = await this.embeddings.embed(promptText.slice(0, 2048)); } catch { /* skip */ }
|
|
173
|
+
|
|
174
|
+
const nnResult = this.nn.predict(
|
|
175
|
+
phase, taskKind,
|
|
176
|
+
promptText.length, 0, failures || 0,
|
|
177
|
+
this.registry, embedding,
|
|
178
|
+
);
|
|
179
|
+
|
|
180
|
+
if (nnResult) {
|
|
181
|
+
return {
|
|
182
|
+
model: nnResult.model,
|
|
183
|
+
reason: `nn (${(nnResult.confidence * 100).toFixed(0)}% confidence)`,
|
|
184
|
+
tier: 'nn',
|
|
185
|
+
promoted: false,
|
|
186
|
+
confidence: nnResult.confidence,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
} catch (e) {
|
|
191
|
+
process.stderr.write(`[router] NN tier failed: ${(e as Error).message}\n`);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// 2. Intent router with enriched phase context. Only reached when
|
|
195
|
+
// no direct pin exists for this phase — i.e. the profile leaves
|
|
196
|
+
// this phase unspecified and wants intelligent model selection.
|
|
197
|
+
const pinnedModelIds = this.profileScope.rolePinning
|
|
198
|
+
? [...new Set(Object.values(this.profileScope.rolePinning))]
|
|
199
|
+
: undefined;
|
|
200
|
+
|
|
201
|
+
try {
|
|
202
|
+
if (this.useIntent) {
|
|
203
|
+
const intentResult = await this.intent.classify(
|
|
204
|
+
promptText, phase, taskKind, this.registry,
|
|
205
|
+
{
|
|
206
|
+
allowedProviders: this.profileScope.allowedProviders,
|
|
207
|
+
allowedModelIds: pinnedModelIds,
|
|
208
|
+
classifier: this.profileScope.classifier,
|
|
209
|
+
phaseContext,
|
|
210
|
+
phasePreference: this.profileScope.rolePinning?.[phase],
|
|
211
|
+
},
|
|
212
|
+
);
|
|
213
|
+
|
|
214
|
+
if (intentResult) {
|
|
215
|
+
return {
|
|
216
|
+
model: intentResult.model,
|
|
217
|
+
reason: `intent: ${intentResult.intent}`,
|
|
218
|
+
tier: 'intent',
|
|
219
|
+
promoted: false,
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
} catch (e) {
|
|
224
|
+
process.stderr.write(`[router] Intent tier failed: ${(e as Error).message}\n`);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
// 3. Profile pin fallback (already checked above for direct phase pin,
|
|
228
|
+
// but the pin might have been for a model that wasn't enabled).
|
|
229
|
+
const pinnedId = this.profileScope.rolePinning?.[phase];
|
|
230
|
+
if (pinnedId) {
|
|
231
|
+
const pinned = this.registry.getById(pinnedId);
|
|
232
|
+
if (pinned && pinned.enabled) {
|
|
233
|
+
return {
|
|
234
|
+
model: pinned,
|
|
235
|
+
reason: `pin: ${pinned.alias || pinned.id} (intent failed, using profile default)`,
|
|
236
|
+
tier: 'rules',
|
|
237
|
+
promoted: false,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
// Pin exists but model not found — this is a config error.
|
|
241
|
+
// Log clearly so the user knows why their profile isn't working.
|
|
242
|
+
process.stderr.write(
|
|
243
|
+
`[router] ⚠ profile pin "${pinnedId}" for phase "${phase}" not found in registry. ` +
|
|
244
|
+
`Check that the model ID matches an entry in models.yml. Falling back to rules.\n`
|
|
245
|
+
);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
// 4. Rule-based fallback — last resort. If we got here, both the
|
|
249
|
+
// intent router AND the profile pin failed. Make the reason clear.
|
|
250
|
+
const ruleResult = this.rules.select(phase, taskKind, failures, promotionThreshold);
|
|
251
|
+
const fallbackReason = pinnedId
|
|
252
|
+
? `⚠ "${pinnedId}" not available → ${ruleResult.model.alias || ruleResult.model.id} (fallback)`
|
|
253
|
+
: ruleResult.reason;
|
|
254
|
+
return {
|
|
255
|
+
model: ruleResult.model,
|
|
256
|
+
reason: ruleResult.promoted
|
|
257
|
+
? `⚠ promoted after ${failures} failures → ${ruleResult.model.alias || ruleResult.model.id}`
|
|
258
|
+
: fallbackReason,
|
|
259
|
+
tier: 'rules',
|
|
260
|
+
promoted: ruleResult.promoted,
|
|
261
|
+
};
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
/**
|
|
265
|
+
* Synchronous select — rules only. Use when you can't await.
|
|
266
|
+
*/
|
|
267
|
+
selectSync(
|
|
268
|
+
phase: LedgerPhase,
|
|
269
|
+
taskKind?: TaskKind,
|
|
270
|
+
failures?: number,
|
|
271
|
+
promotionThreshold?: number,
|
|
272
|
+
): UnifiedRouteDecision {
|
|
273
|
+
const ruleResult = this.rules.select(phase, taskKind, failures, promotionThreshold);
|
|
274
|
+
return {
|
|
275
|
+
model: ruleResult.model,
|
|
276
|
+
reason: ruleResult.reason,
|
|
277
|
+
tier: 'rules',
|
|
278
|
+
promoted: ruleResult.promoted,
|
|
279
|
+
};
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/** Status summary for display. */
|
|
283
|
+
status(): string {
|
|
284
|
+
const lines: string[] = [];
|
|
285
|
+
lines.push(`NN Router: ${this.nn.isAvailable() ? 'trained and active' : 'not trained (collecting data)'}`);
|
|
286
|
+
lines.push(`Intent Router: ${this.useIntent ? 'enabled' : 'disabled'}`);
|
|
287
|
+
lines.push(`Rule Router: active (fallback)`);
|
|
288
|
+
lines.push(`Embeddings: ${this.embeddings.getConfig().backend}/${this.embeddings.getConfig().model} (${this.embeddings.getConfig().dimension}D, ${this.embeddings.cacheSize()} cached)`);
|
|
289
|
+
return lines.join('\n');
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Re-export for convenience
|
|
294
|
+
export { ModelRegistry, type ModelEntry } from './registry.ts';
|
|
295
|
+
export { RuleRouter, type RouteDecision } from './rules.ts';
|
|
296
|
+
export { NNRouter } from './nn-router.ts';
|
|
297
|
+
export { IntentRouter } from './intent-router.ts';
|
|
298
|
+
export { EmbeddingService, type EmbeddingConfig } from './embeddings.ts';
|
|
299
|
+
export { RoutingCollector, type RoutingSample } from './collector.ts';
|