@thispointon/kondi-chat 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +556 -0
- package/bin/kondi-chat +56 -0
- package/bin/kondi-chat.js +72 -0
- package/package.json +55 -0
- package/scripts/demo.tape +49 -0
- package/scripts/postinstall.cjs +103 -0
- package/src/audit/analytics.ts +261 -0
- package/src/audit/ledger.ts +253 -0
- package/src/audit/telemetry.ts +165 -0
- package/src/cli/backend.ts +675 -0
- package/src/cli/commands.ts +419 -0
- package/src/cli/help.ts +182 -0
- package/src/cli/submit-helpers.ts +159 -0
- package/src/cli/submit.ts +539 -0
- package/src/cli/wizard.ts +121 -0
- package/src/context/bootstrap.ts +138 -0
- package/src/context/budget.ts +100 -0
- package/src/context/manager.ts +666 -0
- package/src/context/memory.ts +160 -0
- package/src/context/preflight.ts +176 -0
- package/src/context/project-brain.ts +101 -0
- package/src/context/receipts.ts +108 -0
- package/src/context/skills.ts +154 -0
- package/src/context/symbol-index.ts +240 -0
- package/src/council/profiles.ts +137 -0
- package/src/council/tool.ts +138 -0
- package/src/council-engine/cli/council-artifacts.ts +230 -0
- package/src/council-engine/cli/council-config.ts +178 -0
- package/src/council-engine/cli/council-session-export.ts +116 -0
- package/src/council-engine/cli/kondi.ts +98 -0
- package/src/council-engine/cli/llm-caller.ts +229 -0
- package/src/council-engine/cli/localStorage-shim.ts +119 -0
- package/src/council-engine/cli/node-platform.ts +68 -0
- package/src/council-engine/cli/run-council.ts +481 -0
- package/src/council-engine/cli/run-pipeline.ts +772 -0
- package/src/council-engine/cli/session-export.ts +153 -0
- package/src/council-engine/configs/councils/analysis.json +101 -0
- package/src/council-engine/configs/councils/code-planning.json +86 -0
- package/src/council-engine/configs/councils/coding.json +89 -0
- package/src/council-engine/configs/councils/debate.json +97 -0
- package/src/council-engine/configs/councils/solo-claude.json +34 -0
- package/src/council-engine/configs/councils/solo-gpt.json +34 -0
- package/src/council-engine/council/coding-orchestrator.ts +1205 -0
- package/src/council-engine/council/context-bootstrap.ts +147 -0
- package/src/council-engine/council/context-inspection.ts +42 -0
- package/src/council-engine/council/context-store.ts +763 -0
- package/src/council-engine/council/deliberation-orchestrator.ts +2762 -0
- package/src/council-engine/council/factory.ts +164 -0
- package/src/council-engine/council/index.ts +201 -0
- package/src/council-engine/council/ledger-store.ts +438 -0
- package/src/council-engine/council/prompts.ts +1689 -0
- package/src/council-engine/council/storage-cleanup.ts +164 -0
- package/src/council-engine/council/store.ts +1110 -0
- package/src/council-engine/council/synthesis.ts +291 -0
- package/src/council-engine/council/types.ts +845 -0
- package/src/council-engine/council/validation.ts +613 -0
- package/src/council-engine/pipeline/build-detect.ts +73 -0
- package/src/council-engine/pipeline/executor.ts +1048 -0
- package/src/council-engine/pipeline/index.ts +9 -0
- package/src/council-engine/pipeline/install-detect.ts +84 -0
- package/src/council-engine/pipeline/memory-store.ts +182 -0
- package/src/council-engine/pipeline/output-parsers.ts +146 -0
- package/src/council-engine/pipeline/run-output.ts +149 -0
- package/src/council-engine/pipeline/session-import.ts +177 -0
- package/src/council-engine/pipeline/store.ts +753 -0
- package/src/council-engine/pipeline/test-detect.ts +82 -0
- package/src/council-engine/pipeline/types.ts +401 -0
- package/src/council-engine/services/deliberationSummary.ts +114 -0
- package/src/council-engine/tsconfig.json +16 -0
- package/src/council-engine/types/mcp.ts +122 -0
- package/src/council-engine/utils/filterTools.ts +73 -0
- package/src/engine/apply.ts +238 -0
- package/src/engine/checkpoints.ts +237 -0
- package/src/engine/consultants.ts +347 -0
- package/src/engine/diff.ts +171 -0
- package/src/engine/errors.ts +102 -0
- package/src/engine/git-tools.ts +246 -0
- package/src/engine/hooks.ts +181 -0
- package/src/engine/loop-guard.ts +155 -0
- package/src/engine/permissions.ts +293 -0
- package/src/engine/pipeline.ts +376 -0
- package/src/engine/sub-agents.ts +133 -0
- package/src/engine/task-card.ts +185 -0
- package/src/engine/task-router.ts +256 -0
- package/src/engine/task-store.ts +86 -0
- package/src/engine/tools.ts +783 -0
- package/src/engine/verify.ts +111 -0
- package/src/mcp/client.ts +225 -0
- package/src/mcp/config.ts +120 -0
- package/src/mcp/tool-manager.ts +192 -0
- package/src/mcp/types.ts +61 -0
- package/src/providers/llm-caller.ts +943 -0
- package/src/providers/rate-limiter.ts +238 -0
- package/src/router/NOTES.md +28 -0
- package/src/router/collector.ts +474 -0
- package/src/router/embeddings.ts +286 -0
- package/src/router/index.ts +299 -0
- package/src/router/intent-router.ts +225 -0
- package/src/router/nn-router.ts +205 -0
- package/src/router/profiles.ts +309 -0
- package/src/router/registry.ts +565 -0
- package/src/router/rules.ts +274 -0
- package/src/router/train.py +408 -0
- package/src/session/store.ts +211 -0
- package/src/test-utils/mock-llm.ts +39 -0
- package/src/types.ts +322 -0
- package/src/web/manager.ts +311 -0
|
@@ -0,0 +1,565 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model Registry — catalog of available models and their capabilities.
|
|
3
|
+
*
|
|
4
|
+
* Stored as YAML in .kondi-chat/models.yml, editable by the user
|
|
5
|
+
* and managed via /models commands. The router uses this to know
|
|
6
|
+
* what's available and how much it costs.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
|
|
10
|
+
import { join } from 'node:path';
|
|
11
|
+
import { homedir } from 'node:os';
|
|
12
|
+
import type { ProviderId } from '../types.ts';
|
|
13
|
+
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
// Types
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Model capabilities are open-ended strings, not a fixed enum.
|
|
20
|
+
* Default capabilities: reasoning, coding, fast-coding, summarization, analysis, general
|
|
21
|
+
* Users can add domain-specific ones: robot-orchestration, image-generation, etc.
|
|
22
|
+
* The router learns which capabilities matter through training data.
|
|
23
|
+
*/
|
|
24
|
+
export type ModelCapability = string;
|
|
25
|
+
|
|
26
|
+
export interface ModelEntry {
|
|
27
|
+
/** Unique ID used in API calls (e.g., "claude-sonnet-4-5-20250929") */
|
|
28
|
+
id: string;
|
|
29
|
+
/** Human-readable name */
|
|
30
|
+
name: string;
|
|
31
|
+
/** Short alias for @mentions in chat (e.g., "claude", "gpt", "deepseek") */
|
|
32
|
+
alias?: string;
|
|
33
|
+
/** Provider for API routing */
|
|
34
|
+
provider: ProviderId;
|
|
35
|
+
/** What this model is good at — ordered by strength */
|
|
36
|
+
capabilities: ModelCapability[];
|
|
37
|
+
/** Cost per 1M input tokens (USD) */
|
|
38
|
+
inputCostPer1M: number;
|
|
39
|
+
/** Cost per 1M output tokens (USD) */
|
|
40
|
+
outputCostPer1M: number;
|
|
41
|
+
/** Context window size in tokens */
|
|
42
|
+
contextWindow: number;
|
|
43
|
+
/** Is this model currently enabled? */
|
|
44
|
+
enabled: boolean;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ---------------------------------------------------------------------------
|
|
48
|
+
// Default models
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
|
|
51
|
+
const DEFAULT_MODELS: ModelEntry[] = [
|
|
52
|
+
// --- Planning & Architecture ---
|
|
53
|
+
{
|
|
54
|
+
id: 'claude-opus-4-20250514',
|
|
55
|
+
name: 'Claude Opus 4',
|
|
56
|
+
alias: 'opus',
|
|
57
|
+
provider: 'anthropic',
|
|
58
|
+
capabilities: ['planning', 'reasoning', 'architecture', 'analysis'],
|
|
59
|
+
inputCostPer1M: 15,
|
|
60
|
+
outputCostPer1M: 75,
|
|
61
|
+
contextWindow: 200_000,
|
|
62
|
+
enabled: false, // disabled by default; enable with /models enable opus if desired
|
|
63
|
+
},
|
|
64
|
+
// --- Open-ended questions & general tasks ---
|
|
65
|
+
{
|
|
66
|
+
id: 'gpt-5.4',
|
|
67
|
+
name: 'GPT-5.4',
|
|
68
|
+
alias: 'gpt',
|
|
69
|
+
provider: 'openai',
|
|
70
|
+
capabilities: ['planning', 'general', 'reasoning', 'marketing', 'writing', 'coding', 'analysis'],
|
|
71
|
+
inputCostPer1M: 2.5,
|
|
72
|
+
outputCostPer1M: 15,
|
|
73
|
+
contextWindow: 1_000_000,
|
|
74
|
+
enabled: true,
|
|
75
|
+
},
|
|
76
|
+
// --- Gemini models (free tier via Google AI Studio) ---
|
|
77
|
+
{
|
|
78
|
+
id: 'models/gemini-2.5-pro',
|
|
79
|
+
name: 'Gemini 2.5 Pro',
|
|
80
|
+
alias: 'gemini-pro',
|
|
81
|
+
provider: 'google',
|
|
82
|
+
capabilities: ['planning', 'reasoning', 'analysis', 'coding'],
|
|
83
|
+
inputCostPer1M: 0,
|
|
84
|
+
outputCostPer1M: 0,
|
|
85
|
+
contextWindow: 1_000_000,
|
|
86
|
+
enabled: true,
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
id: 'models/gemini-2.5-flash',
|
|
90
|
+
name: 'Gemini 2.5 Flash',
|
|
91
|
+
alias: 'gemini',
|
|
92
|
+
provider: 'google',
|
|
93
|
+
capabilities: ['coding', 'fast-coding', 'general', 'summarization'],
|
|
94
|
+
inputCostPer1M: 0,
|
|
95
|
+
outputCostPer1M: 0,
|
|
96
|
+
contextWindow: 1_000_000,
|
|
97
|
+
enabled: true,
|
|
98
|
+
},
|
|
99
|
+
// --- Mid-tier OpenAI ---
|
|
100
|
+
{
|
|
101
|
+
id: 'gpt-5.4-mini',
|
|
102
|
+
name: 'GPT-5.4 Mini',
|
|
103
|
+
alias: 'mini',
|
|
104
|
+
provider: 'openai',
|
|
105
|
+
capabilities: ['general', 'marketing', 'writing', 'fast-coding'],
|
|
106
|
+
inputCostPer1M: 0.75,
|
|
107
|
+
outputCostPer1M: 4.5,
|
|
108
|
+
contextWindow: 400_000,
|
|
109
|
+
enabled: true,
|
|
110
|
+
},
|
|
111
|
+
// --- Cheap OpenAI ---
|
|
112
|
+
{
|
|
113
|
+
id: 'gpt-5.4-nano',
|
|
114
|
+
name: 'GPT-5.4 Nano',
|
|
115
|
+
alias: 'nano',
|
|
116
|
+
provider: 'openai',
|
|
117
|
+
capabilities: ['summarization', 'fast-coding', 'general'],
|
|
118
|
+
inputCostPer1M: 0.20,
|
|
119
|
+
outputCostPer1M: 1.25,
|
|
120
|
+
contextWindow: 400_000,
|
|
121
|
+
enabled: true,
|
|
122
|
+
},
|
|
123
|
+
// --- Code generation ---
|
|
124
|
+
{
|
|
125
|
+
id: 'deepseek-chat',
|
|
126
|
+
// The deepseek-chat endpoint now serves V4 Flash (see ledger.ts pricing
|
|
127
|
+
// table and commit 96d8ffe). Keep name/cost/context in sync with the
|
|
128
|
+
// ledger entry — the intent router shows these values to the classifier.
|
|
129
|
+
name: 'DeepSeek V4 Flash',
|
|
130
|
+
alias: 'deep',
|
|
131
|
+
provider: 'deepseek',
|
|
132
|
+
capabilities: ['coding', 'fast-coding', 'refactoring'],
|
|
133
|
+
inputCostPer1M: 0.14,
|
|
134
|
+
outputCostPer1M: 0.28,
|
|
135
|
+
contextWindow: 1_000_000,
|
|
136
|
+
enabled: true,
|
|
137
|
+
},
|
|
138
|
+
// --- Code review & analysis ---
|
|
139
|
+
{
|
|
140
|
+
id: 'claude-sonnet-4-5-20250929',
|
|
141
|
+
name: 'Claude Sonnet 4.5',
|
|
142
|
+
alias: 'claude',
|
|
143
|
+
provider: 'anthropic',
|
|
144
|
+
capabilities: ['code-review', 'analysis', 'reasoning', 'coding'],
|
|
145
|
+
inputCostPer1M: 3,
|
|
146
|
+
outputCostPer1M: 15,
|
|
147
|
+
contextWindow: 200_000,
|
|
148
|
+
enabled: true,
|
|
149
|
+
},
|
|
150
|
+
// --- Summaries, compression, state updates ---
|
|
151
|
+
{
|
|
152
|
+
id: 'claude-haiku-4-5-20251001',
|
|
153
|
+
name: 'Claude Haiku 4.5',
|
|
154
|
+
alias: 'haiku',
|
|
155
|
+
provider: 'anthropic',
|
|
156
|
+
capabilities: ['summarization', 'fast-coding', 'general'],
|
|
157
|
+
inputCostPer1M: 0.8,
|
|
158
|
+
outputCostPer1M: 4,
|
|
159
|
+
contextWindow: 200_000,
|
|
160
|
+
enabled: true,
|
|
161
|
+
},
|
|
162
|
+
// --- Z.AI (GLM family, OpenAI-compatible) ---
|
|
163
|
+
{
|
|
164
|
+
id: 'glm-5.1',
|
|
165
|
+
name: 'GLM 5.1',
|
|
166
|
+
alias: 'reason',
|
|
167
|
+
provider: 'zai',
|
|
168
|
+
capabilities: ['planning', 'reasoning', 'analysis', 'code-review'],
|
|
169
|
+
inputCostPer1M: 1.40,
|
|
170
|
+
outputCostPer1M: 4.40,
|
|
171
|
+
contextWindow: 200_000,
|
|
172
|
+
enabled: true,
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
id: 'glm-4.6',
|
|
176
|
+
name: 'GLM 4.6',
|
|
177
|
+
alias: 'glm',
|
|
178
|
+
provider: 'zai',
|
|
179
|
+
capabilities: ['coding', 'fast-coding', 'general'],
|
|
180
|
+
inputCostPer1M: 0.60,
|
|
181
|
+
outputCostPer1M: 2.20,
|
|
182
|
+
contextWindow: 200_000,
|
|
183
|
+
enabled: true,
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
id: 'glm-4.5-flash',
|
|
187
|
+
name: 'GLM 4.5 Flash',
|
|
188
|
+
alias: 'flash',
|
|
189
|
+
provider: 'zai',
|
|
190
|
+
capabilities: ['summarization', 'general'],
|
|
191
|
+
inputCostPer1M: 0,
|
|
192
|
+
outputCostPer1M: 0,
|
|
193
|
+
contextWindow: 128_000,
|
|
194
|
+
enabled: true,
|
|
195
|
+
},
|
|
196
|
+
{
|
|
197
|
+
id: 'glm-4.5-air',
|
|
198
|
+
name: 'GLM 4.5 Air',
|
|
199
|
+
alias: 'glm-air',
|
|
200
|
+
provider: 'zai',
|
|
201
|
+
capabilities: ['fast-coding', 'general', 'summarization'],
|
|
202
|
+
inputCostPer1M: 0.20,
|
|
203
|
+
outputCostPer1M: 1.10,
|
|
204
|
+
contextWindow: 128_000,
|
|
205
|
+
enabled: false,
|
|
206
|
+
},
|
|
207
|
+
// --- Local models (Ollama) ---
|
|
208
|
+
{
|
|
209
|
+
id: 'qwen2.5:3b',
|
|
210
|
+
name: 'Qwen 2.5 3B',
|
|
211
|
+
alias: 'qwen',
|
|
212
|
+
provider: 'ollama',
|
|
213
|
+
capabilities: ['general', 'fast-coding'],
|
|
214
|
+
inputCostPer1M: 0,
|
|
215
|
+
outputCostPer1M: 0,
|
|
216
|
+
contextWindow: 32_000,
|
|
217
|
+
enabled: true,
|
|
218
|
+
},
|
|
219
|
+
{
|
|
220
|
+
id: 'phi3.5:3.8b',
|
|
221
|
+
name: 'Phi 3.5',
|
|
222
|
+
alias: 'phi',
|
|
223
|
+
provider: 'ollama',
|
|
224
|
+
capabilities: ['summarization', 'general', 'fast-coding'],
|
|
225
|
+
inputCostPer1M: 0,
|
|
226
|
+
outputCostPer1M: 0,
|
|
227
|
+
contextWindow: 128_000,
|
|
228
|
+
enabled: true,
|
|
229
|
+
},
|
|
230
|
+
{
|
|
231
|
+
id: 'nemotron-3-nano:4b',
|
|
232
|
+
name: 'Nemotron 3 Nano 4B',
|
|
233
|
+
alias: 'nemo',
|
|
234
|
+
provider: 'ollama',
|
|
235
|
+
capabilities: ['reasoning', 'general', 'fast-coding'],
|
|
236
|
+
inputCostPer1M: 0,
|
|
237
|
+
outputCostPer1M: 0,
|
|
238
|
+
contextWindow: 256_000,
|
|
239
|
+
enabled: true,
|
|
240
|
+
},
|
|
241
|
+
];
|
|
242
|
+
|
|
243
|
+
// ---------------------------------------------------------------------------
|
|
244
|
+
// Registry
|
|
245
|
+
// ---------------------------------------------------------------------------
|
|
246
|
+
|
|
247
|
+
export type ModelStatus = 'unknown' | 'available' | 'unavailable';
|
|
248
|
+
|
|
249
|
+
export class ModelRegistry {
|
|
250
|
+
private models: ModelEntry[] = [];
|
|
251
|
+
private configPath: string;
|
|
252
|
+
/** Runtime availability — not persisted, checked on demand */
|
|
253
|
+
private statusMap: Map<string, { status: ModelStatus; error?: string }> = new Map();
|
|
254
|
+
|
|
255
|
+
constructor(storageDir: string) {
|
|
256
|
+
this.configPath = join(storageDir, 'models.yml');
|
|
257
|
+
this.load();
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// -------------------------------------------------------------------------
|
|
261
|
+
// Queries
|
|
262
|
+
// -------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
getAll(): ModelEntry[] {
|
|
265
|
+
return [...this.models];
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
getEnabled(): ModelEntry[] {
|
|
269
|
+
return this.models.filter(m => m.enabled);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/** Get models that are enabled AND confirmed available */
|
|
273
|
+
getAvailable(): ModelEntry[] {
|
|
274
|
+
return this.models.filter(m =>
|
|
275
|
+
m.enabled && this.getStatus(m.id).status !== 'unavailable'
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/** Get the runtime status of a model */
|
|
280
|
+
getStatus(id: string): { status: ModelStatus; error?: string } {
|
|
281
|
+
return this.statusMap.get(id) || { status: 'unknown' };
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
getById(id: string): ModelEntry | undefined {
|
|
285
|
+
return this.models.find(m => m.id === id);
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* Find a model by its @mention alias. Case-insensitive, with unambiguous
|
|
290
|
+
* prefix matching: `@gemi` resolves to `gemini` if it's the only enabled
|
|
291
|
+
* alias starting with those letters. Returns undefined if no match or
|
|
292
|
+
* if the prefix matches multiple aliases (call `findAliasCandidates` to
|
|
293
|
+
* report them).
|
|
294
|
+
*/
|
|
295
|
+
getByAlias(alias: string): ModelEntry | undefined {
|
|
296
|
+
const lower = alias.toLowerCase();
|
|
297
|
+
const enabled = this.models.filter(m => m.alias && m.enabled);
|
|
298
|
+
// 1. Exact match wins, cheaper than prefix scan and unambiguous.
|
|
299
|
+
const exact = enabled.find(m => m.alias!.toLowerCase() === lower);
|
|
300
|
+
if (exact) return exact;
|
|
301
|
+
// 2. Prefix match — only return if it's unique.
|
|
302
|
+
const prefix = enabled.filter(m => m.alias!.toLowerCase().startsWith(lower));
|
|
303
|
+
return prefix.length === 1 ? prefix[0] : undefined;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
/**
|
|
307
|
+
* Return every enabled alias that starts with `prefix` (case-insensitive).
|
|
308
|
+
* Used by error-message sites to report "did you mean X or Y?" on an
|
|
309
|
+
* ambiguous @mention and by the TUI suggestion system for `@` autocomplete.
|
|
310
|
+
*/
|
|
311
|
+
findAliasCandidates(prefix: string): string[] {
|
|
312
|
+
const lower = prefix.toLowerCase();
|
|
313
|
+
return this.models
|
|
314
|
+
.filter(m => m.alias && m.enabled && m.alias.toLowerCase().startsWith(lower))
|
|
315
|
+
.map(m => m.alias!);
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/** Get all known aliases for display */
|
|
319
|
+
getAliases(): string[] {
|
|
320
|
+
return this.getEnabled()
|
|
321
|
+
.filter(m => m.alias)
|
|
322
|
+
.map(m => m.alias!);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
/** Get models that have a given capability, sorted by cost (cheapest first). Excludes unavailable models. */
|
|
326
|
+
getByCapability(capability: ModelCapability): ModelEntry[] {
|
|
327
|
+
return this.getAvailable()
|
|
328
|
+
.filter(m => m.capabilities.includes(capability))
|
|
329
|
+
.sort((a, b) => a.inputCostPer1M - b.inputCostPer1M);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
/** Get the cheapest enabled model with a given capability */
|
|
333
|
+
getCheapest(capability: ModelCapability): ModelEntry | undefined {
|
|
334
|
+
return this.getByCapability(capability)[0];
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/** Get the most capable (most expensive) enabled model with a given capability */
|
|
338
|
+
getBest(capability: ModelCapability): ModelEntry | undefined {
|
|
339
|
+
const models = this.getByCapability(capability);
|
|
340
|
+
return models[models.length - 1];
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// -------------------------------------------------------------------------
|
|
344
|
+
// Mutations
|
|
345
|
+
// -------------------------------------------------------------------------
|
|
346
|
+
|
|
347
|
+
add(entry: ModelEntry): void {
|
|
348
|
+
const existing = this.models.findIndex(m => m.id === entry.id);
|
|
349
|
+
if (existing >= 0) {
|
|
350
|
+
this.models[existing] = entry;
|
|
351
|
+
} else {
|
|
352
|
+
this.models.push(entry);
|
|
353
|
+
}
|
|
354
|
+
this.save();
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
remove(id: string): boolean {
|
|
358
|
+
const before = this.models.length;
|
|
359
|
+
this.models = this.models.filter(m => m.id !== id);
|
|
360
|
+
if (this.models.length < before) {
|
|
361
|
+
this.save();
|
|
362
|
+
return true;
|
|
363
|
+
}
|
|
364
|
+
return false;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
enable(id: string): boolean {
|
|
368
|
+
const model = this.models.find(m => m.id === id);
|
|
369
|
+
if (model) {
|
|
370
|
+
model.enabled = true;
|
|
371
|
+
this.save();
|
|
372
|
+
return true;
|
|
373
|
+
}
|
|
374
|
+
return false;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
disable(id: string): boolean {
|
|
378
|
+
const model = this.models.find(m => m.id === id);
|
|
379
|
+
if (model) {
|
|
380
|
+
model.enabled = false;
|
|
381
|
+
this.save();
|
|
382
|
+
return true;
|
|
383
|
+
}
|
|
384
|
+
return false;
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// -------------------------------------------------------------------------
|
|
388
|
+
// Health checks
|
|
389
|
+
// -------------------------------------------------------------------------
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Check availability of all enabled models.
|
|
393
|
+
* - Ollama: check if model is pulled locally
|
|
394
|
+
* - Cloud providers: check if API key is set
|
|
395
|
+
*/
|
|
396
|
+
async checkHealth(): Promise<Map<string, { status: ModelStatus; error?: string }>> {
|
|
397
|
+
const enabled = this.getEnabled();
|
|
398
|
+
|
|
399
|
+
// Check Ollama models in one call
|
|
400
|
+
const ollamaModels = await this.getOllamaModels();
|
|
401
|
+
|
|
402
|
+
// Check each model
|
|
403
|
+
for (const m of enabled) {
|
|
404
|
+
if (m.provider === 'ollama') {
|
|
405
|
+
if (ollamaModels === null) {
|
|
406
|
+
this.statusMap.set(m.id, { status: 'unavailable', error: 'Ollama not running' });
|
|
407
|
+
} else if (ollamaModels.has(m.id) || ollamaModels.has(m.id.split(':')[0])) {
|
|
408
|
+
this.statusMap.set(m.id, { status: 'available' });
|
|
409
|
+
} else {
|
|
410
|
+
this.statusMap.set(m.id, { status: 'unavailable', error: `Not pulled. Run: ollama pull ${m.id}` });
|
|
411
|
+
}
|
|
412
|
+
} else {
|
|
413
|
+
// Cloud provider — check for API key
|
|
414
|
+
const key = this.getApiKeyFor(m.provider);
|
|
415
|
+
if (key) {
|
|
416
|
+
this.statusMap.set(m.id, { status: 'available' });
|
|
417
|
+
} else {
|
|
418
|
+
const envVar = this.getEnvVarFor(m.provider);
|
|
419
|
+
this.statusMap.set(m.id, { status: 'unavailable', error: `No API key. Set ${envVar}` });
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
return this.statusMap;
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
private async getOllamaModels(): Promise<Set<string> | null> {
|
|
428
|
+
try {
|
|
429
|
+
const resp = await fetch('http://localhost:11434/api/tags', {
|
|
430
|
+
signal: AbortSignal.timeout(3000),
|
|
431
|
+
});
|
|
432
|
+
if (!resp.ok) return null;
|
|
433
|
+
const data: any = await resp.json();
|
|
434
|
+
const names = new Set<string>();
|
|
435
|
+
for (const m of data.models || []) {
|
|
436
|
+
names.add(m.name);
|
|
437
|
+
// Also add without tag (e.g., "phi3.5" from "phi3.5:3.8b")
|
|
438
|
+
const base = m.name.split(':')[0];
|
|
439
|
+
names.add(base);
|
|
440
|
+
}
|
|
441
|
+
return names;
|
|
442
|
+
} catch {
|
|
443
|
+
return null;
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
private getApiKeyFor(provider: ProviderId): string | undefined {
|
|
448
|
+
switch (provider) {
|
|
449
|
+
case 'anthropic': return process.env.ANTHROPIC_API_KEY;
|
|
450
|
+
case 'openai': return process.env.OPENAI_API_KEY;
|
|
451
|
+
case 'deepseek': return process.env.DEEPSEEK_API_KEY;
|
|
452
|
+
case 'xai': return process.env.XAI_API_KEY;
|
|
453
|
+
case 'zai': return process.env.ZAI_API_KEY;
|
|
454
|
+
case 'google': return process.env.GOOGLE_API_KEY;
|
|
455
|
+
case 'nvidia-router': return process.env.NVIDIA_API_KEY;
|
|
456
|
+
default: return undefined;
|
|
457
|
+
}
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
private getEnvVarFor(provider: ProviderId): string {
|
|
461
|
+
const vars: Record<string, string> = {
|
|
462
|
+
anthropic: 'ANTHROPIC_API_KEY',
|
|
463
|
+
openai: 'OPENAI_API_KEY',
|
|
464
|
+
deepseek: 'DEEPSEEK_API_KEY',
|
|
465
|
+
xai: 'XAI_API_KEY',
|
|
466
|
+
zai: 'ZAI_API_KEY',
|
|
467
|
+
google: 'GOOGLE_API_KEY',
|
|
468
|
+
'nvidia-router': 'NVIDIA_API_KEY',
|
|
469
|
+
};
|
|
470
|
+
return vars[provider] || 'API_KEY';
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
/** Format health check results for display */
|
|
474
|
+
formatHealth(): string {
|
|
475
|
+
const lines: string[] = ['Model Health:'];
|
|
476
|
+
const enabled = this.getEnabled();
|
|
477
|
+
|
|
478
|
+
for (const m of enabled) {
|
|
479
|
+
const s = this.getStatus(m.id);
|
|
480
|
+
const icon = s.status === 'available' ? 'OK' : s.status === 'unavailable' ? 'FAIL' : '??';
|
|
481
|
+
const alias = m.alias ? ` @${m.alias}` : '';
|
|
482
|
+
const error = s.error ? ` — ${s.error}` : '';
|
|
483
|
+
lines.push(` [${icon.padEnd(4)}] ${m.name}${alias} (${m.provider})${error}`);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
const available = enabled.filter(m => this.getStatus(m.id).status === 'available').length;
|
|
487
|
+
const unavailable = enabled.filter(m => this.getStatus(m.id).status === 'unavailable').length;
|
|
488
|
+
lines.push('');
|
|
489
|
+
lines.push(`${available} available, ${unavailable} unavailable, ${enabled.length - available - unavailable} unchecked`);
|
|
490
|
+
|
|
491
|
+
return lines.join('\n');
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
// -------------------------------------------------------------------------
|
|
495
|
+
// Persistence — simple YAML-like format
|
|
496
|
+
// -------------------------------------------------------------------------
|
|
497
|
+
|
|
498
|
+
private load(): void {
|
|
499
|
+
// Load project-level models first.
|
|
500
|
+
let projectModels: ModelEntry[] = [];
|
|
501
|
+
if (existsSync(this.configPath)) {
|
|
502
|
+
try {
|
|
503
|
+
projectModels = JSON.parse(readFileSync(this.configPath, 'utf-8'));
|
|
504
|
+
} catch { /* start fresh */ }
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
// Merge with user-level models (~/.kondi-chat/models.yml) so models
|
|
508
|
+
// added in one project are available everywhere. User-level entries
|
|
509
|
+
// are added only if the project doesn't already have that model ID.
|
|
510
|
+
const userPath = join(homedir(), '.kondi-chat', 'models.yml');
|
|
511
|
+
let userModels: ModelEntry[] = [];
|
|
512
|
+
if (existsSync(userPath)) {
|
|
513
|
+
try { userModels = JSON.parse(readFileSync(userPath, 'utf-8')); } catch { /* skip */ }
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
if (projectModels.length > 0) {
|
|
517
|
+
const projectIds = new Set(projectModels.map(m => m.id));
|
|
518
|
+
// Add user-level models that aren't in the project config
|
|
519
|
+
for (const um of userModels) {
|
|
520
|
+
if (!projectIds.has(um.id)) projectModels.push(um);
|
|
521
|
+
}
|
|
522
|
+
this.models = projectModels;
|
|
523
|
+
} else if (userModels.length > 0) {
|
|
524
|
+
this.models = userModels;
|
|
525
|
+
} else {
|
|
526
|
+
this.models = [...DEFAULT_MODELS];
|
|
527
|
+
}
|
|
528
|
+
this.save();
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
private save(): void {
|
|
532
|
+
writeFileSync(this.configPath, JSON.stringify(this.models, null, 2));
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
/** Format for display */
|
|
536
|
+
format(): string {
|
|
537
|
+
const lines: string[] = [];
|
|
538
|
+
const enabled = this.getEnabled();
|
|
539
|
+
const disabled = this.models.filter(m => !m.enabled);
|
|
540
|
+
|
|
541
|
+
if (enabled.length > 0) {
|
|
542
|
+
lines.push('Enabled models:');
|
|
543
|
+
for (const m of enabled) {
|
|
544
|
+
const alias = m.alias ? `@${m.alias}` : '(no alias)';
|
|
545
|
+
lines.push('');
|
|
546
|
+
lines.push(` ${m.name} ${alias}`);
|
|
547
|
+
lines.push(` ID: ${m.id}`);
|
|
548
|
+
lines.push(` Provider: ${m.provider}`);
|
|
549
|
+
lines.push(` Capabilities: ${m.capabilities.join(', ')}`);
|
|
550
|
+
lines.push(` Cost: $${m.inputCostPer1M.toFixed(2)} in / $${m.outputCostPer1M.toFixed(2)} out per 1M tokens`);
|
|
551
|
+
lines.push(` Context: ${(m.contextWindow / 1000).toFixed(0)}K tokens`);
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
if (disabled.length > 0) {
|
|
556
|
+
lines.push('');
|
|
557
|
+
lines.push('Disabled:');
|
|
558
|
+
for (const m of disabled) {
|
|
559
|
+
lines.push(` ${m.name} — ${m.id} (${m.provider})`);
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
return lines.join('\n');
|
|
564
|
+
}
|
|
565
|
+
}
|