@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Intent Router — LLM-based classification, the primary routing strategy.
|
|
3
|
+
*
|
|
4
|
+
* Reads every model's description and capabilities from the registry and
|
|
5
|
+
* asks a cheap LLM: "given these model descriptions, which one best matches
|
|
6
|
+
* this task?" This is the smartest tier — it handles any model, any capability,
|
|
7
|
+
* and adapts automatically when models are added or removed.
|
|
8
|
+
*
|
|
9
|
+
* Priority chain:
|
|
10
|
+
* 1. NN Router — fast approximation of Intent (when trained)
|
|
11
|
+
* 2. Intent Router — primary, reads model descriptions (this file)
|
|
12
|
+
* 3. Rule Router — minimal phase/task-kind fallback
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import type { LedgerPhase, TaskKind, ProviderId } from '../types.ts';
|
|
16
|
+
import { callLLM } from '../providers/llm-caller.ts';
|
|
17
|
+
import type { ModelRegistry, ModelEntry } from './registry.ts';
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Configuration
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
export interface IntentRouterConfig {
|
|
24
|
+
/** Provider for the classification LLM (should be cheap/fast) */
|
|
25
|
+
provider: ProviderId;
|
|
26
|
+
model?: string;
|
|
27
|
+
/**
|
|
28
|
+
* Phases to run intent routing on. Defaults to *all* phases — the intent
|
|
29
|
+
* router is the primary tier, so we want it owning every decision it can.
|
|
30
|
+
*/
|
|
31
|
+
phases?: LedgerPhase[];
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
const DEFAULT_CONFIG: IntentRouterConfig = {
|
|
35
|
+
provider: 'anthropic',
|
|
36
|
+
model: 'claude-haiku-4-5-20251001',
|
|
37
|
+
// `undefined` = all phases eligible.
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
/** Optional per-call overrides that come from the active budget profile. */
|
|
41
|
+
export interface IntentRouterCallOptions {
|
|
42
|
+
/** If set, only consider models from these providers. */
|
|
43
|
+
allowedProviders?: ProviderId[];
|
|
44
|
+
/**
|
|
45
|
+
* If set, only consider these specific model IDs. Derived from the
|
|
46
|
+
* profile's rolePinning values so the classifier sees exactly the
|
|
47
|
+
* models the profile uses, not every model from the allowed providers.
|
|
48
|
+
* Takes precedence over allowedProviders when both are set.
|
|
49
|
+
*/
|
|
50
|
+
allowedModelIds?: string[];
|
|
51
|
+
/** Override the classifier LLM for this call (e.g. zai's glm-4.5-flash). */
|
|
52
|
+
classifier?: { provider: ProviderId; model: string };
|
|
53
|
+
/** Rich context about what happened in prior pipeline phases. */
|
|
54
|
+
phaseContext?: import('../router/index.ts').PhaseContext;
|
|
55
|
+
/** The profile's preferred model for this phase (soft hint, not hard pin). */
|
|
56
|
+
phasePreference?: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ---------------------------------------------------------------------------
|
|
60
|
+
// Intent Router
|
|
61
|
+
// ---------------------------------------------------------------------------
|
|
62
|
+
|
|
63
|
+
export class IntentRouter {
|
|
64
|
+
private config: IntentRouterConfig;
|
|
65
|
+
|
|
66
|
+
constructor(config?: Partial<IntentRouterConfig>) {
|
|
67
|
+
this.config = { ...DEFAULT_CONFIG, ...config };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Classify a prompt against available models and return the best match.
|
|
72
|
+
* Returns null if classification fails.
|
|
73
|
+
*/
|
|
74
|
+
async classify(
|
|
75
|
+
promptText: string,
|
|
76
|
+
phase: LedgerPhase,
|
|
77
|
+
taskKind: TaskKind | undefined,
|
|
78
|
+
registry: ModelRegistry,
|
|
79
|
+
opts?: IntentRouterCallOptions,
|
|
80
|
+
): Promise<{ model: ModelEntry; intent: string } | null> {
|
|
81
|
+
// Phase filter: if the caller restricted phases in config, honor it.
|
|
82
|
+
// Default (no config) = all phases.
|
|
83
|
+
if (this.config.phases && !this.config.phases.includes(phase)) {
|
|
84
|
+
return null;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Candidate scope: specific model IDs (from rolePinning) > provider
|
|
88
|
+
// filter > all enabled. When the profile declares rolePinning, the
|
|
89
|
+
// classifier sees exactly those 4–5 models, not every model from 3
|
|
90
|
+
// providers. Much less noise, much better picks.
|
|
91
|
+
let enabled = registry.getEnabled();
|
|
92
|
+
if (opts?.allowedModelIds && opts.allowedModelIds.length > 0) {
|
|
93
|
+
const allow = new Set(opts.allowedModelIds);
|
|
94
|
+
enabled = enabled.filter(m => allow.has(m.id));
|
|
95
|
+
} else if (opts?.allowedProviders && opts.allowedProviders.length > 0) {
|
|
96
|
+
const allow = new Set(opts.allowedProviders);
|
|
97
|
+
enabled = enabled.filter(m => allow.has(m.provider));
|
|
98
|
+
}
|
|
99
|
+
if (enabled.length <= 1) {
|
|
100
|
+
// Trivial case: if there's only one candidate (or zero), skip the LLM
|
|
101
|
+
// call and let whatever's there be the answer — or defer to the next
|
|
102
|
+
// router tier. Returning null triggers the fallback chain.
|
|
103
|
+
return enabled.length === 1
|
|
104
|
+
? { model: enabled[0], intent: 'only-candidate' }
|
|
105
|
+
: null;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Build route descriptions from registry
|
|
109
|
+
const routes = enabled.map(m => ({
|
|
110
|
+
name: m.id,
|
|
111
|
+
description: this.describeModel(m),
|
|
112
|
+
}));
|
|
113
|
+
|
|
114
|
+
const routesXml = routes
|
|
115
|
+
.map(r => ` <route name="${r.name}">${r.description}</route>`)
|
|
116
|
+
.join('\n');
|
|
117
|
+
|
|
118
|
+
// Build phase-context block so the classifier knows what happened
|
|
119
|
+
// in prior pipeline steps, not just the original user prompt.
|
|
120
|
+
let contextBlock = '';
|
|
121
|
+
if (opts?.phaseContext?.priorPhases && opts.phaseContext.priorPhases.length > 0) {
|
|
122
|
+
const lines = opts.phaseContext.priorPhases.map(p =>
|
|
123
|
+
` - ${p.phase}: handled by ${p.model}${p.succeeded === false ? ' (FAILED)' : ''}${p.summary ? ` — ${p.summary}` : ''}`
|
|
124
|
+
);
|
|
125
|
+
contextBlock = `\n<prior_phases>\n${lines.join('\n')}\n</prior_phases>\n`;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
let preferenceHint = '';
|
|
129
|
+
if (opts?.phasePreference) {
|
|
130
|
+
preferenceHint = `\nThe user's profile suggests "${opts.phasePreference}" for the ${phase} phase. Honor this preference unless another model is clearly better suited given the context above.\n`;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const phaseDescriptions: Record<string, string> = {
|
|
134
|
+
discuss: 'Conversational Q&A, explanations, open-ended discussion. Needs good general reasoning at reasonable cost.',
|
|
135
|
+
dispatch: 'Planning and task decomposition. Needs strong architectural reasoning — this call sets the direction for everything that follows. Worth paying more for quality here.',
|
|
136
|
+
execute: 'Code generation, file editing, tool calls. High-volume phase (3-10 calls per turn). Cost matters. Speed matters. Code quality needs to be good but planning was already done.',
|
|
137
|
+
reflect: 'Reviewing and critiquing code that was just written. Needs to catch bugs without hallucinating new ones. Should NOT be the same model that wrote the code if possible.',
|
|
138
|
+
compress: 'Summarizing old context to save tokens. Grunt work. Use the cheapest model available.',
|
|
139
|
+
state_update: 'Updating session state. Grunt work. Use the cheapest model available.',
|
|
140
|
+
verify: 'Local verification (no LLM needed).',
|
|
141
|
+
consult: 'Domain-expert consultation. Use whatever model the consultant specifies.',
|
|
142
|
+
};
|
|
143
|
+
|
|
144
|
+
const phaseDesc = phaseDescriptions[phase] || `Phase: ${phase}`;
|
|
145
|
+
|
|
146
|
+
const prompt = `You are a router that selects the best model for the current step of a multi-phase coding pipeline.
|
|
147
|
+
|
|
148
|
+
<routes>
|
|
149
|
+
${routesXml}
|
|
150
|
+
</routes>
|
|
151
|
+
|
|
152
|
+
<current_step>
|
|
153
|
+
Phase: ${phase}
|
|
154
|
+
Phase meaning: ${phaseDesc}
|
|
155
|
+
${taskKind ? `Task kind: ${taskKind}` : ''}
|
|
156
|
+
Original goal: ${(opts?.phaseContext?.currentGoal || promptText).slice(0, 800)}
|
|
157
|
+
</current_step>
|
|
158
|
+
${contextBlock}${preferenceHint}
|
|
159
|
+
Given the available models, the current phase, and what happened in prior phases, which model should handle this step? Consider: capabilities, cost, and whether the model that wrote the code should be different from the one that reviews it.
|
|
160
|
+
Respond with ONLY a JSON object: {"route": "model_id"}`;
|
|
161
|
+
|
|
162
|
+
// Classifier model: per-call override (from active profile) > config default.
|
|
163
|
+
const classifierProvider = opts?.classifier?.provider ?? this.config.provider;
|
|
164
|
+
const classifierModel = opts?.classifier?.model ?? this.config.model;
|
|
165
|
+
|
|
166
|
+
try {
|
|
167
|
+
const response = await callLLM({
|
|
168
|
+
provider: classifierProvider,
|
|
169
|
+
model: classifierModel,
|
|
170
|
+
systemPrompt: 'You select the best model for a task. Respond with only JSON.',
|
|
171
|
+
userMessage: prompt,
|
|
172
|
+
maxOutputTokens: 50,
|
|
173
|
+
temperature: 0,
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
const parsed = this.parseResponse(response.content);
|
|
177
|
+
if (!parsed) return null;
|
|
178
|
+
|
|
179
|
+
const model = registry.getById(parsed);
|
|
180
|
+
// Also re-check it's inside the allowed set — the LLM could hallucinate
|
|
181
|
+
// a model name that wasn't in the input list.
|
|
182
|
+
if (!model || !model.enabled) return null;
|
|
183
|
+
if (opts?.allowedProviders && opts.allowedProviders.length > 0) {
|
|
184
|
+
if (!opts.allowedProviders.includes(model.provider)) return null;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
process.stderr.write(` │ intent-router: ${model.id} (via ${response.model})\n`);
|
|
188
|
+
return { model, intent: parsed };
|
|
189
|
+
} catch {
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// -------------------------------------------------------------------------
|
|
195
|
+
// Helpers
|
|
196
|
+
// -------------------------------------------------------------------------
|
|
197
|
+
|
|
198
|
+
/** Generate a natural language description of a model from its registry entry. */
|
|
199
|
+
private describeModel(m: ModelEntry): string {
|
|
200
|
+
const costTier = m.inputCostPer1M < 1 ? 'cheap' : m.inputCostPer1M < 5 ? 'mid-tier' : 'expensive';
|
|
201
|
+
return (
|
|
202
|
+
`${m.name} — ${costTier} model (${m.provider}). ` +
|
|
203
|
+
`Good at: ${m.capabilities.join(', ')}. ` +
|
|
204
|
+
`Context: ${(m.contextWindow / 1000).toFixed(0)}K tokens. ` +
|
|
205
|
+
`Cost: $${m.inputCostPer1M}/M input, $${m.outputCostPer1M}/M output.`
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
/** Parse the LLM's route selection from its response. */
|
|
210
|
+
private parseResponse(content: string): string | null {
|
|
211
|
+
try {
|
|
212
|
+
// Try JSON parse
|
|
213
|
+
const match = content.match(/\{[^}]*"route"\s*:\s*"([^"]+)"[^}]*\}/);
|
|
214
|
+
if (match) return match[1];
|
|
215
|
+
|
|
216
|
+
// Try plain text
|
|
217
|
+
const cleaned = content.trim().replace(/^["']|["']$/g, '');
|
|
218
|
+
if (cleaned && !cleaned.includes(' ')) return cleaned;
|
|
219
|
+
|
|
220
|
+
return null;
|
|
221
|
+
} catch {
|
|
222
|
+
return null;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NN Router — lightweight neural network for model selection.
|
|
3
|
+
*
|
|
4
|
+
* Trained by src/router/train.py from data collected by the rule-based
|
|
5
|
+
* router. At inference time, predicts which model will succeed for a
|
|
6
|
+
* given (phase, task_kind, prompt_length, context_tokens, failures).
|
|
7
|
+
*
|
|
8
|
+
* Falls back to the rule router when:
|
|
9
|
+
* - No trained model exists
|
|
10
|
+
* - Confidence is below threshold
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import { readFileSync, existsSync } from 'node:fs';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
import type { LedgerPhase, TaskKind } from '../types.ts';
|
|
16
|
+
import type { ModelRegistry, ModelEntry } from './registry.ts';
|
|
17
|
+
import type { EmbeddingService } from './embeddings.ts';
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Types
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
interface NNModelData {
|
|
24
|
+
nn: {
|
|
25
|
+
weights: number[][][];
|
|
26
|
+
biases: number[][];
|
|
27
|
+
layerDims: number[];
|
|
28
|
+
};
|
|
29
|
+
featureInfo: {
|
|
30
|
+
phases: string[];
|
|
31
|
+
taskKinds: string[];
|
|
32
|
+
featureNames: string[];
|
|
33
|
+
inputDim: number;
|
|
34
|
+
embeddingDim: number;
|
|
35
|
+
hasEmbeddings: boolean;
|
|
36
|
+
};
|
|
37
|
+
modelNames: string[];
|
|
38
|
+
metrics: Record<string, unknown>;
|
|
39
|
+
sampleCount: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Inference (pure math, no dependencies)
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
function relu(x: number[]): number[] {
|
|
47
|
+
return x.map(v => Math.max(0, v));
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function sigmoid(x: number[]): number[] {
|
|
51
|
+
return x.map(v => 1 / (1 + Math.exp(-Math.max(-500, Math.min(500, v)))));
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function matmul(input: number[], weights: number[][], bias: number[]): number[] {
|
|
55
|
+
const output = new Array(weights[0].length).fill(0);
|
|
56
|
+
for (let j = 0; j < output.length; j++) {
|
|
57
|
+
let sum = bias[j];
|
|
58
|
+
for (let i = 0; i < input.length; i++) {
|
|
59
|
+
sum += input[i] * weights[i][j];
|
|
60
|
+
}
|
|
61
|
+
output[j] = sum;
|
|
62
|
+
}
|
|
63
|
+
return output;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function predict(input: number[], weights: number[][][], biases: number[][]): number[] {
|
|
67
|
+
let x = input;
|
|
68
|
+
for (let layer = 0; layer < weights.length; layer++) {
|
|
69
|
+
x = matmul(x, weights[layer], biases[layer]);
|
|
70
|
+
if (layer < weights.length - 1) {
|
|
71
|
+
x = relu(x);
|
|
72
|
+
} else {
|
|
73
|
+
x = sigmoid(x);
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return x;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// ---------------------------------------------------------------------------
|
|
80
|
+
// NN Router
|
|
81
|
+
// ---------------------------------------------------------------------------
|
|
82
|
+
|
|
83
|
+
export class NNRouter {
|
|
84
|
+
private modelData: NNModelData | null = null;
|
|
85
|
+
private modelPath: string;
|
|
86
|
+
private confidenceThreshold: number;
|
|
87
|
+
|
|
88
|
+
constructor(storageDir: string, confidenceThreshold = 0.6) {
|
|
89
|
+
this.modelPath = join(storageDir, 'router-model.json');
|
|
90
|
+
this.confidenceThreshold = confidenceThreshold;
|
|
91
|
+
this.load();
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
/** Is a trained model available? */
|
|
95
|
+
isAvailable(): boolean {
|
|
96
|
+
return this.modelData !== null;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Reload model from disk (after retraining). */
|
|
100
|
+
reload(): void {
|
|
101
|
+
this.load();
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Predict the best model for a given context.
|
|
106
|
+
* Returns null if no model is loaded or confidence is too low.
|
|
107
|
+
*
|
|
108
|
+
* @param embedding Optional pre-computed embedding from EmbeddingService.
|
|
109
|
+
* If the model was trained with embeddings and none is provided,
|
|
110
|
+
* a zero vector is used (degrades to structured-only features).
|
|
111
|
+
*/
|
|
112
|
+
predict(
|
|
113
|
+
phase: LedgerPhase,
|
|
114
|
+
taskKind: TaskKind | undefined,
|
|
115
|
+
promptLength: number,
|
|
116
|
+
contextTokens: number,
|
|
117
|
+
failures: number,
|
|
118
|
+
registry: ModelRegistry,
|
|
119
|
+
embedding?: number[],
|
|
120
|
+
): { model: ModelEntry; confidence: number; probabilities: Record<string, number> } | null {
|
|
121
|
+
if (!this.modelData) return null;
|
|
122
|
+
|
|
123
|
+
const features = this.encodeFeatures(phase, taskKind, promptLength, contextTokens, failures, embedding);
|
|
124
|
+
const probs = predict(features, this.modelData.nn.weights, this.modelData.nn.biases);
|
|
125
|
+
|
|
126
|
+
// Build probability map
|
|
127
|
+
const probabilities: Record<string, number> = {};
|
|
128
|
+
let bestIdx = 0;
|
|
129
|
+
let bestProb = 0;
|
|
130
|
+
|
|
131
|
+
for (let i = 0; i < this.modelData.modelNames.length; i++) {
|
|
132
|
+
const name = this.modelData.modelNames[i];
|
|
133
|
+
probabilities[name] = probs[i];
|
|
134
|
+
if (probs[i] > bestProb) {
|
|
135
|
+
bestProb = probs[i];
|
|
136
|
+
bestIdx = i;
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Check confidence threshold
|
|
141
|
+
if (bestProb < this.confidenceThreshold) return null;
|
|
142
|
+
|
|
143
|
+
// Find the model in the registry
|
|
144
|
+
const modelId = this.modelData.modelNames[bestIdx];
|
|
145
|
+
const model = registry.getById(modelId);
|
|
146
|
+
if (!model || !model.enabled) return null;
|
|
147
|
+
|
|
148
|
+
return { model, confidence: bestProb, probabilities };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// -------------------------------------------------------------------------
|
|
152
|
+
// Feature encoding
|
|
153
|
+
// -------------------------------------------------------------------------
|
|
154
|
+
|
|
155
|
+
private encodeFeatures(
|
|
156
|
+
phase: LedgerPhase,
|
|
157
|
+
taskKind: TaskKind | undefined,
|
|
158
|
+
promptLength: number,
|
|
159
|
+
contextTokens: number,
|
|
160
|
+
failures: number,
|
|
161
|
+
embedding?: number[],
|
|
162
|
+
): number[] {
|
|
163
|
+
if (!this.modelData) return [];
|
|
164
|
+
|
|
165
|
+
const info = this.modelData.featureInfo;
|
|
166
|
+
|
|
167
|
+
// Structured features
|
|
168
|
+
const phaseVec = info.phases.map(p => p === phase ? 1 : 0);
|
|
169
|
+
const kindVec = info.taskKinds.map(k => k === (taskKind || 'none') ? 1 : 0);
|
|
170
|
+
const promptNorm = Math.min(promptLength / 10_000, 1);
|
|
171
|
+
const contextNorm = Math.min(contextTokens / 100_000, 1);
|
|
172
|
+
const failureNorm = Math.min(failures / 5, 1);
|
|
173
|
+
const structured = [...phaseVec, ...kindVec, promptNorm, contextNorm, failureNorm];
|
|
174
|
+
|
|
175
|
+
// Prepend embedding if the model was trained with them
|
|
176
|
+
if (info.hasEmbeddings && info.embeddingDim > 0) {
|
|
177
|
+
const emb = embedding || new Array(info.embeddingDim).fill(0);
|
|
178
|
+
// Pad or truncate to expected dimension
|
|
179
|
+
const padded = emb.length >= info.embeddingDim
|
|
180
|
+
? emb.slice(0, info.embeddingDim)
|
|
181
|
+
: [...emb, ...new Array(info.embeddingDim - emb.length).fill(0)];
|
|
182
|
+
return [...padded, ...structured];
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return structured;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// -------------------------------------------------------------------------
|
|
189
|
+
// Persistence
|
|
190
|
+
// -------------------------------------------------------------------------
|
|
191
|
+
|
|
192
|
+
private load(): void {
|
|
193
|
+
if (!existsSync(this.modelPath)) {
|
|
194
|
+
this.modelData = null;
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
try {
|
|
199
|
+
const raw = readFileSync(this.modelPath, 'utf-8');
|
|
200
|
+
this.modelData = JSON.parse(raw);
|
|
201
|
+
} catch {
|
|
202
|
+
this.modelData = null;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
}
|