@goondocks/myco 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/dist/{chunk-QGJ2ZIUZ.js → chunk-25FY74AP.js} +56 -22
- package/dist/chunk-25FY74AP.js.map +1 -0
- package/dist/{chunk-2YBUL3IL.js → chunk-4WL5X7VS.js} +3 -3
- package/dist/{chunk-24DOZEUJ.js → chunk-ALBVNGCF.js} +591 -27
- package/dist/chunk-ALBVNGCF.js.map +1 -0
- package/dist/{chunk-E7OBRBCQ.js → chunk-CK24O5YQ.js} +12 -2
- package/dist/chunk-CK24O5YQ.js.map +1 -0
- package/dist/{chunk-2GSX3BK2.js → chunk-CPVXNRGW.js} +4 -4
- package/dist/{chunk-L25U7PIG.js → chunk-CQ4RKK67.js} +2 -2
- package/dist/{chunk-GDYYJTTT.js → chunk-DBMHUMG3.js} +3 -3
- package/dist/{chunk-5FNZ7AMX.js → chunk-IWBWZQK6.js} +2 -2
- package/dist/{chunk-MQSYSQ6T.js → chunk-JSK7L46L.js} +11 -6
- package/dist/{chunk-MQSYSQ6T.js.map → chunk-JSK7L46L.js.map} +1 -1
- package/dist/{chunk-KUMVJIJW.js → chunk-LDKXXKF6.js} +6 -10
- package/dist/{chunk-KUMVJIJW.js.map → chunk-LDKXXKF6.js.map} +1 -1
- package/dist/{chunk-2ZBB3MQT.js → chunk-PQWQC3RF.js} +444 -21
- package/dist/chunk-PQWQC3RF.js.map +1 -0
- package/dist/{chunk-5QWZT4AB.js → chunk-RNWALAFP.js} +2 -2
- package/dist/{chunk-3EM23DMD.js → chunk-RXJHB7W4.js} +2 -2
- package/dist/{chunk-GNR3QAER.js → chunk-RY76WEN3.js} +2 -2
- package/dist/{chunk-6BSDCZ5Q.js → chunk-WBLTISAK.js} +8 -3
- package/dist/chunk-WBLTISAK.js.map +1 -0
- package/dist/{chunk-ZMYNRTTD.js → chunk-WU4PCNIK.js} +4 -3
- package/dist/chunk-WU4PCNIK.js.map +1 -0
- package/dist/{chunk-YTANWAGE.js → chunk-XNAM6Z4O.js} +2 -2
- package/dist/{chunk-P3WO3N3I.js → chunk-YG6MLLGL.js} +19 -3
- package/dist/{chunk-P3WO3N3I.js.map → chunk-YG6MLLGL.js.map} +1 -1
- package/dist/{cli-K7SUTP7A.js → cli-EGWAINIE.js} +20 -20
- package/dist/{client-YJMNTITQ.js → client-FDKJ4BY7.js} +5 -5
- package/dist/{config-G5GGT5A6.js → config-HDUFDOQN.js} +3 -3
- package/dist/{curate-6T5NKVXK.js → curate-OHIJFBYF.js} +10 -11
- package/dist/{curate-6T5NKVXK.js.map → curate-OHIJFBYF.js.map} +1 -1
- package/dist/{detect-providers-S3M5TAMW.js → detect-providers-4U3ZPW5G.js} +3 -3
- package/dist/{digest-O35VHYFP.js → digest-I2XYCK2M.js} +11 -13
- package/dist/{digest-O35VHYFP.js.map → digest-I2XYCK2M.js.map} +1 -1
- package/dist/{init-TFLSATB3.js → init-ZO2XQT6U.js} +8 -8
- package/dist/{main-JEUQS3BY.js → main-XZ6X4BUX.js} +177 -40
- package/dist/main-XZ6X4BUX.js.map +1 -0
- package/dist/{rebuild-7SH5GSNX.js → rebuild-NAH4EW5B.js} +10 -11
- package/dist/{rebuild-7SH5GSNX.js.map → rebuild-NAH4EW5B.js.map} +1 -1
- package/dist/reprocess-6FOP37XS.js +79 -0
- package/dist/reprocess-6FOP37XS.js.map +1 -0
- package/dist/{restart-NLJLB52D.js → restart-WSA4JSE3.js} +6 -6
- package/dist/{search-2BVRF54H.js → search-QXJQUB35.js} +6 -6
- package/dist/{server-4AMZNP4F.js → server-VXN3CJ4Y.js} +14 -18
- package/dist/{server-4AMZNP4F.js.map → server-VXN3CJ4Y.js.map} +1 -1
- package/dist/{session-start-AZAF3DTE.js → session-start-KQ4KCQMZ.js} +9 -9
- package/dist/setup-digest-QNCM3PNQ.js +15 -0
- package/dist/setup-llm-EAOIUSPJ.js +15 -0
- package/dist/src/cli.js +4 -4
- package/dist/src/daemon/main.js +4 -4
- package/dist/src/hooks/post-tool-use.js +5 -5
- package/dist/src/hooks/session-end.js +5 -5
- package/dist/src/hooks/session-start.js +4 -4
- package/dist/src/hooks/stop.js +7 -7
- package/dist/src/hooks/user-prompt-submit.js +5 -5
- package/dist/src/mcp/server.js +4 -4
- package/dist/src/prompts/consolidation.md +2 -0
- package/dist/src/prompts/digest-7500.md +68 -0
- package/dist/{stats-MKDIZFIQ.js → stats-43OESUEB.js} +6 -6
- package/dist/ui/assets/index-Bk4X_8-Z.css +1 -0
- package/dist/ui/assets/index-D3SY7ZHY.js +299 -0
- package/dist/ui/index.html +2 -2
- package/dist/{verify-7DW7LAND.js → verify-IIAHBAAU.js} +6 -6
- package/dist/{version-RQLD7VBP.js → version-NKOECSVH.js} +4 -4
- package/package.json +1 -1
- package/dist/chunk-24DOZEUJ.js.map +0 -1
- package/dist/chunk-2ZBB3MQT.js.map +0 -1
- package/dist/chunk-3JCXYLHD.js +0 -33
- package/dist/chunk-3JCXYLHD.js.map +0 -1
- package/dist/chunk-6BSDCZ5Q.js.map +0 -1
- package/dist/chunk-B5UZSHQV.js +0 -250
- package/dist/chunk-B5UZSHQV.js.map +0 -1
- package/dist/chunk-E7OBRBCQ.js.map +0 -1
- package/dist/chunk-KC7ENQTN.js +0 -436
- package/dist/chunk-KC7ENQTN.js.map +0 -1
- package/dist/chunk-QGJ2ZIUZ.js.map +0 -1
- package/dist/chunk-UVGAVYWZ.js +0 -157
- package/dist/chunk-UVGAVYWZ.js.map +0 -1
- package/dist/chunk-ZMYNRTTD.js.map +0 -1
- package/dist/main-JEUQS3BY.js.map +0 -1
- package/dist/reprocess-Q4YH2ZBK.js +0 -268
- package/dist/reprocess-Q4YH2ZBK.js.map +0 -1
- package/dist/setup-digest-YLZZGSSR.js +0 -15
- package/dist/setup-llm-JOXBSLXC.js +0 -15
- package/dist/ui/assets/index-D37IoDXS.css +0 -1
- package/dist/ui/assets/index-DA61Ial2.js +0 -289
- /package/dist/{chunk-2YBUL3IL.js.map → chunk-4WL5X7VS.js.map} +0 -0
- /package/dist/{chunk-2GSX3BK2.js.map → chunk-CPVXNRGW.js.map} +0 -0
- /package/dist/{chunk-L25U7PIG.js.map → chunk-CQ4RKK67.js.map} +0 -0
- /package/dist/{chunk-GDYYJTTT.js.map → chunk-DBMHUMG3.js.map} +0 -0
- /package/dist/{chunk-5FNZ7AMX.js.map → chunk-IWBWZQK6.js.map} +0 -0
- /package/dist/{chunk-5QWZT4AB.js.map → chunk-RNWALAFP.js.map} +0 -0
- /package/dist/{chunk-3EM23DMD.js.map → chunk-RXJHB7W4.js.map} +0 -0
- /package/dist/{chunk-GNR3QAER.js.map → chunk-RY76WEN3.js.map} +0 -0
- /package/dist/{chunk-YTANWAGE.js.map → chunk-XNAM6Z4O.js.map} +0 -0
- /package/dist/{cli-K7SUTP7A.js.map → cli-EGWAINIE.js.map} +0 -0
- /package/dist/{client-YJMNTITQ.js.map → client-FDKJ4BY7.js.map} +0 -0
- /package/dist/{config-G5GGT5A6.js.map → config-HDUFDOQN.js.map} +0 -0
- /package/dist/{detect-providers-S3M5TAMW.js.map → detect-providers-4U3ZPW5G.js.map} +0 -0
- /package/dist/{init-TFLSATB3.js.map → init-ZO2XQT6U.js.map} +0 -0
- /package/dist/{restart-NLJLB52D.js.map → restart-WSA4JSE3.js.map} +0 -0
- /package/dist/{search-2BVRF54H.js.map → search-QXJQUB35.js.map} +0 -0
- /package/dist/{session-start-AZAF3DTE.js.map → session-start-KQ4KCQMZ.js.map} +0 -0
- /package/dist/{setup-digest-YLZZGSSR.js.map → setup-digest-QNCM3PNQ.js.map} +0 -0
- /package/dist/{setup-llm-JOXBSLXC.js.map → setup-llm-EAOIUSPJ.js.map} +0 -0
- /package/dist/{stats-MKDIZFIQ.js.map → stats-43OESUEB.js.map} +0 -0
- /package/dist/{verify-7DW7LAND.js.map → verify-IIAHBAAU.js.map} +0 -0
- /package/dist/{version-RQLD7VBP.js.map → version-NKOECSVH.js.map} +0 -0
|
@@ -3,7 +3,7 @@ import {
|
|
|
3
3
|
DAEMON_CLIENT_TIMEOUT_MS,
|
|
4
4
|
EMBEDDING_REQUEST_TIMEOUT_MS,
|
|
5
5
|
LLM_REQUEST_TIMEOUT_MS
|
|
6
|
-
} from "./chunk-
|
|
6
|
+
} from "./chunk-WBLTISAK.js";
|
|
7
7
|
|
|
8
8
|
// src/intelligence/ollama.ts
|
|
9
9
|
var ENDPOINT_GENERATE = "/api/generate";
|
|
@@ -32,7 +32,7 @@ var OllamaBackend = class _OllamaBackend {
|
|
|
32
32
|
const body = {
|
|
33
33
|
model: this.model,
|
|
34
34
|
prompt,
|
|
35
|
-
stream:
|
|
35
|
+
stream: true,
|
|
36
36
|
options
|
|
37
37
|
};
|
|
38
38
|
if (opts?.systemPrompt) {
|
|
@@ -54,8 +54,40 @@ var OllamaBackend = class _OllamaBackend {
|
|
|
54
54
|
const errorBody = await response.text().catch(() => "");
|
|
55
55
|
throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);
|
|
56
56
|
}
|
|
57
|
-
|
|
58
|
-
|
|
57
|
+
return this.readStream(response);
|
|
58
|
+
}
|
|
59
|
+
/** Read an Ollama streaming response (newline-delimited JSON) and accumulate the result. */
|
|
60
|
+
async readStream(response) {
|
|
61
|
+
const reader = response.body.getReader();
|
|
62
|
+
const decoder = new TextDecoder();
|
|
63
|
+
let text = "";
|
|
64
|
+
let model = this.model;
|
|
65
|
+
let buffer = "";
|
|
66
|
+
try {
|
|
67
|
+
for (; ; ) {
|
|
68
|
+
const { done, value } = await reader.read();
|
|
69
|
+
if (done) break;
|
|
70
|
+
buffer += decoder.decode(value, { stream: true });
|
|
71
|
+
const lines = buffer.split("\n");
|
|
72
|
+
buffer = lines.pop() ?? "";
|
|
73
|
+
for (const line of lines) {
|
|
74
|
+
if (!line.trim()) continue;
|
|
75
|
+
const chunk = JSON.parse(line);
|
|
76
|
+
if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);
|
|
77
|
+
text += chunk.response ?? "";
|
|
78
|
+
if (chunk.model) model = chunk.model;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
if (buffer.trim()) {
|
|
82
|
+
const chunk = JSON.parse(buffer);
|
|
83
|
+
if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);
|
|
84
|
+
text += chunk.response ?? "";
|
|
85
|
+
if (chunk.model) model = chunk.model;
|
|
86
|
+
}
|
|
87
|
+
} finally {
|
|
88
|
+
reader.releaseLock();
|
|
89
|
+
}
|
|
90
|
+
return { text, model };
|
|
59
91
|
}
|
|
60
92
|
async embed(text) {
|
|
61
93
|
const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {
|
|
@@ -183,35 +215,37 @@ var LmStudioBackend = class _LmStudioBackend {
|
|
|
183
215
|
return { embedding, model: data.model, dimensions: embedding.length };
|
|
184
216
|
}
|
|
185
217
|
/**
|
|
186
|
-
* Ensure a model instance is loaded
|
|
187
|
-
* Called every digest cycle
|
|
218
|
+
* Ensure a model instance is loaded and capture its ID for routing.
|
|
219
|
+
* Called every digest cycle so it recovers from idle TTL eviction.
|
|
188
220
|
*
|
|
189
|
-
*
|
|
190
|
-
*
|
|
221
|
+
* Strategy: reuse ANY loaded instance of this model. Only load a new one
|
|
222
|
+
* when zero instances exist. This avoids the previous bug where strict
|
|
223
|
+
* config matching (context_length, offload_kv_cache_to_gpu) caused new
|
|
224
|
+
* instances to spawn every cycle — exhausting system resources.
|
|
191
225
|
*
|
|
192
|
-
*
|
|
193
|
-
*
|
|
226
|
+
* context_length is set per-request on /api/v1/chat, so we don't need
|
|
227
|
+
* to match it at load time. Load-time-only params like
|
|
228
|
+
* offload_kv_cache_to_gpu are llama.cpp-specific and may not apply to
|
|
229
|
+
* all models (e.g., glm-4.7-flash has no KV cache setting).
|
|
194
230
|
*/
|
|
195
231
|
async ensureLoaded(contextLength, gpuKvCache) {
|
|
196
|
-
const ctx = contextLength ?? this.contextWindow;
|
|
197
|
-
const kvCache = gpuKvCache ?? false;
|
|
198
232
|
const instances = await this.getLoadedInstances();
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
if (matchesContext && matchesKvCache) {
|
|
203
|
-
this.instanceId = instance.id;
|
|
204
|
-
return;
|
|
205
|
-
}
|
|
233
|
+
if (instances.length > 0) {
|
|
234
|
+
this.instanceId = instances[0].id;
|
|
235
|
+
return;
|
|
206
236
|
}
|
|
237
|
+
const ctx = contextLength ?? this.contextWindow;
|
|
207
238
|
const body = {
|
|
208
239
|
model: this.model,
|
|
209
|
-
|
|
210
|
-
|
|
240
|
+
// llama.cpp-specific — ignored by other engines (MLX, etc.)
|
|
241
|
+
flash_attention: true
|
|
211
242
|
};
|
|
212
243
|
if (ctx) {
|
|
213
244
|
body.context_length = ctx;
|
|
214
245
|
}
|
|
246
|
+
if (gpuKvCache) {
|
|
247
|
+
body.offload_kv_cache_to_gpu = true;
|
|
248
|
+
}
|
|
215
249
|
const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {
|
|
216
250
|
method: "POST",
|
|
217
251
|
headers: { "Content-Type": "application/json" },
|
|
@@ -273,4 +307,4 @@ export {
|
|
|
273
307
|
OllamaBackend,
|
|
274
308
|
LmStudioBackend
|
|
275
309
|
};
|
|
276
|
-
//# sourceMappingURL=chunk-
|
|
310
|
+
//# sourceMappingURL=chunk-25FY74AP.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/intelligence/ollama.ts","../src/intelligence/lm-studio.ts"],"sourcesContent":["import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface OllamaConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields (ignored, kept for backward compat during migration)\n embedding_model?: string;\n summary_model?: string;\n}\n\n// Ollama API endpoints\nconst ENDPOINT_GENERATE = '/api/generate';\nconst ENDPOINT_EMBED = '/api/embed';\nconst ENDPOINT_TAGS = '/api/tags';\n\nexport class OllamaBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:11434';\n readonly name = 'ollama';\n private baseUrl: string;\n private model: string;\n private defaultMaxTokens: number;\n private contextWindow: number | undefined;\n\n constructor(config?: OllamaConfig) {\n this.baseUrl = config?.base_url ?? OllamaBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n this.contextWindow = config?.context_window;\n }\n\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n\n // Send num_ctx from config or per-call override. Ollama reloads the model\n // on num_ctx changes, but consistent values (same num_ctx every call)\n // only cause one reload on first use. Without this, Ollama falls back to\n // its model default (often 2048), ignoring the user's configured context.\n const contextLength = opts?.contextLength ?? this.contextWindow;\n const options: Record<string, unknown> = { num_predict: maxTokens };\n if (contextLength) {\n options.num_ctx = contextLength;\n }\n\n const body: Record<string, unknown> = {\n model: this.model,\n prompt,\n stream: true,\n options,\n };\n\n // System prompt — sent as a separate field instead of concatenated into prompt\n if (opts?.systemPrompt) {\n body.system = opts.systemPrompt;\n }\n\n // Thinking control — false suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.think = opts.reasoning === 'off' ? false : opts.reasoning;\n }\n\n // Keep model loaded between requests (useful for digest cycles)\n if (opts?.keepAlive) {\n body.keep_alive = opts.keepAlive;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_GENERATE}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n return this.readStream(response);\n }\n\n /** Read an Ollama streaming response (newline-delimited JSON) and accumulate the result. */\n private async readStream(response: Response): Promise<LlmResponse> {\n const reader = response.body!.getReader();\n const decoder = new TextDecoder();\n let text = '';\n let model = this.model;\n let buffer = '';\n\n try {\n for (;;) {\n const { done, value } = await reader.read();\n if (done) break;\n\n buffer += decoder.decode(value, { stream: true });\n const lines = buffer.split('\\n');\n buffer = lines.pop() ?? '';\n\n for (const line of lines) {\n if (!line.trim()) continue;\n const chunk = JSON.parse(line) as { response?: string; model?: string; error?: string };\n if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);\n text += chunk.response ?? '';\n if (chunk.model) model = chunk.model;\n }\n }\n\n // Process remaining buffer\n if (buffer.trim()) {\n const chunk = JSON.parse(buffer) as { response?: string; model?: string; error?: string };\n if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);\n text += chunk.response ?? '';\n if (chunk.model) model = chunk.model;\n }\n } finally {\n reader.releaseLock();\n }\n\n return { text, model };\n }\n\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`Ollama embed failed: ${response.status} ${response.statusText}`);\n }\n\n const data = await response.json() as { embeddings: number[][]; model: string };\n const embedding = data.embeddings[0];\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this Ollama instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { models: Array<{ name: string }> };\n return data.models.map((m) => m.name);\n } catch {\n return [];\n }\n }\n}\n","import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface LmStudioConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields\n embedding_model?: string;\n summary_model?: string;\n}\n\n// LM Studio API endpoints\nconst ENDPOINT_CHAT = '/api/v1/chat';\nconst ENDPOINT_MODELS_LOAD = '/api/v1/models/load';\nconst ENDPOINT_MODELS_LIST = '/v1/models';\nconst ENDPOINT_MODELS_NATIVE = '/api/v1/models';\nconst ENDPOINT_EMBEDDINGS = '/v1/embeddings';\n\n/** Shape of a loaded instance from the LM Studio native models API.\n * Config fields vary by engine — llama.cpp models include flash_attention\n * and offload_kv_cache_to_gpu, but other engines (MLX, etc.) may omit them. */\ninterface NativeLoadedInstance {\n id: string;\n config: {\n context_length: number;\n flash_attention?: boolean;\n offload_kv_cache_to_gpu?: boolean;\n };\n}\n\n/** Shape of a model entry from the LM Studio native models API. */\ninterface NativeModelEntry {\n type: string;\n key: string;\n loaded_instances: NativeLoadedInstance[];\n}\n\nexport class LmStudioBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:1234';\n readonly name = 'lm-studio';\n private baseUrl: string;\n private model: string;\n private instanceId: string | null = null;\n private contextWindow: number | undefined;\n private defaultMaxTokens: number;\n\n constructor(config?: LmStudioConfig) {\n this.baseUrl = config?.base_url ?? LmStudioBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n /**\n * Generate text using LM Studio's native REST API (/api/v1/chat).\n * Routes to our specific instance by ID when available, with model name +\n * context_length as fallback. This ensures correct routing when multiple\n * daemons share the same LM Studio, and graceful degradation when our\n * instance is evicted by idle TTL.\n */\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n const contextLength = opts?.contextLength ?? this.contextWindow;\n\n const body: Record<string, unknown> = {\n model: this.instanceId ?? this.model,\n input: prompt,\n max_output_tokens: maxTokens,\n store: false,\n };\n\n // Always send context_length — even when routing by instance ID.\n // If our instance was evicted and LM Studio auto-loads, this ensures\n // the replacement gets the correct context window.\n if (contextLength) {\n body.context_length = contextLength;\n }\n\n // System prompt — sent separately from user content\n if (opts?.systemPrompt) {\n body.system_prompt = opts.systemPrompt;\n }\n\n // Reasoning control — 'off' suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.reasoning = opts.reasoning;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_CHAT}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n // If our instance was evicted, clear the ID so ensureLoaded\n // reloads on the next cycle instead of hitting a stale ID repeatedly\n if (response.status === 404 && this.instanceId) {\n this.instanceId = null;\n }\n throw new Error(`LM Studio summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as {\n model_instance_id: string;\n output: Array<{ type: string; content: string }>;\n };\n const messageOutput = data.output.find((o) => o.type === 'message');\n const text = messageOutput?.content ?? '';\n return { text, model: data.model_instance_id };\n }\n\n /**\n * Generate embeddings using LM Studio's OpenAI-compatible endpoint.\n * (The native API doesn't have an embedding endpoint — OpenAI-compat is fine here.)\n */\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBEDDINGS}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`LM Studio embed failed: ${response.status}`);\n }\n\n const data = await response.json() as {\n data: Array<{ embedding: number[] }>;\n model: string;\n };\n const embedding = data.data[0].embedding;\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n /**\n * Ensure a model instance is loaded and capture its ID for routing.\n * Called every digest cycle so it recovers from idle TTL eviction.\n *\n * Strategy: reuse ANY loaded instance of this model. Only load a new one\n * when zero instances exist. This avoids the previous bug where strict\n * config matching (context_length, offload_kv_cache_to_gpu) caused new\n * instances to spawn every cycle — exhausting system resources.\n *\n * context_length is set per-request on /api/v1/chat, so we don't need\n * to match it at load time. Load-time-only params like\n * offload_kv_cache_to_gpu are llama.cpp-specific and may not apply to\n * all models (e.g., glm-4.7-flash has no KV cache setting).\n */\n async ensureLoaded(contextLength?: number, gpuKvCache?: boolean): Promise<void> {\n // Query native API for existing loaded instances of this model\n const instances = await this.getLoadedInstances();\n\n if (instances.length > 0) {\n // Reuse the first available instance — don't reject over config differences.\n // context_length is set per-request; load-time params like kv_cache are\n // model-dependent and may not even appear in the instance config.\n this.instanceId = instances[0].id;\n return;\n }\n\n // No instances loaded — load one with our preferred settings.\n // These are hints; LM Studio silently ignores params that don't apply to the model's engine.\n const ctx = contextLength ?? this.contextWindow;\n const body: Record<string, unknown> = {\n model: this.model,\n // llama.cpp-specific — ignored by other engines (MLX, etc.)\n flash_attention: true,\n };\n if (ctx) {\n body.context_length = ctx;\n }\n if (gpuKvCache) {\n body.offload_kv_cache_to_gpu = true;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio model load failed: ${response.status} ${errorBody.slice(0, 200)}`);\n }\n\n const loadResult = await response.json() as Record<string, unknown>;\n const id = (loadResult.instance_id ?? loadResult.id ?? loadResult.model_instance_id) as string | undefined;\n if (id) {\n this.instanceId = id;\n }\n }\n\n /**\n * Query the LM Studio native API for loaded instances of this model.\n * Returns an empty array if the API is unavailable or the model has no loaded instances.\n */\n private async getLoadedInstances(): Promise<NativeLoadedInstance[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_NATIVE}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n if (!response.ok) return [];\n\n const data = await response.json() as { models: NativeModelEntry[] };\n const entry = data.models.find((m) => m.key === this.model);\n return entry?.loaded_instances ?? [];\n } catch {\n return [];\n }\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this LM Studio instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { data: Array<{ id: string }> };\n return data.data.map((m) => m.id);\n } catch {\n return [];\n }\n }\n}\n"],"mappings":";;;;;;;;AAcA,IAAM,oBAAoB;AAC1B,IAAM,iBAAiB;AACvB,IAAM,gBAAgB;AAEf,IAAM,gBAAN,MAAM,eAAwD;AAAA,EACnE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAAuB;AACjC,SAAK,UAAU,QAAQ,YAAY,eAAc;AACjD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,mBAAmB,QAAQ,cAAc;AAC9C,SAAK,gBAAgB,QAAQ;AAAA,EAC/B;AAAA,EAEA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAM1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAM,UAAmC,EAAE,aAAa,UAAU;AAClE,QAAI,eAAe;AACjB,cAAQ,UAAU;AAAA,IACpB;AAEA,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,MACR;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,SAAS,KAAK;AAAA,IACrB;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,QAAQ,KAAK,cAAc,QAAQ,QAAQ,KAAK;AAAA,IACvD;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,iBAAiB,IAAI;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,4BAA4B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC1F;AAEA,WAAO,KAAK,WAAW,QAAQ;AAAA,EACjC;AAAA;AAAA,EAGA,MAAc,WAAW,UAA0C;AACjE,UAAM,SAAS,SAAS,KAAM,UAAU;AACxC,UAAM,UAAU,IAAI,YAAY;AAChC,QAAI,OAAO;AACX,QAAI,QAAQ,KAAK;AACjB,QAAI,SAAS;AAEb,QAAI;AACF,iBAAS;AACP,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AAEV,kBAAU,QAAQ,OAAO,OAAO,EAAE,QAAQ,KAAK,CAAC;AAChD,cAAM,QAAQ,OAAO,MAAM,IAAI;AAC/B,iBAAS,MAAM,IAAI,KAAK;AAExB,mBAAW,QAAQ,OAAO;AACxB,cAAI,CAAC,KAAK,KAAK,EAAG;AAClB,gBAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,cAAI,MAAM,MAAO,OAAM,IAAI,MAAM,wBAAwB,MAAM,KAAK,EAAE;AACtE,kBAAQ,MAAM,YAAY;AAC1B,cAAI,MAAM,MAAO,SAAQ,MAAM;AAAA,QACjC;AAAA,MACF;AAGA,UAAI,OAAO,KAAK,GAAG;AACjB,cAAM,QAAQ,KAAK,MAAM,MAAM;AAC/B,YAAI,MAAM,MAAO,OAAM,IAAI,MAAM,wBAAwB,MAAM,KAAK,EAAE;AACtE,gBAAQ,MAAM,YAAY;AAC1B,YAAI,MAAM,MAAO,SAAQ,MAAM;AAAA,MACjC;AAAA,IACF,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAEA,WAAO,EAAE,MAAM,MAAM;AAAA,EACvB;AAAA,EAEA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,cAAc,IAAI;AAAA,MAC/D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IAClF;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAM,YAAY,KAAK,WAAW,CAAC;AACnC,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IACtC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;;;ACxJA,IAAM,gBAAgB;AACtB,IAAM,uBAAuB;AAC7B,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,sBAAsB;AAqBrB,IAAM,kBAAN,MAAM,iBAA0D;AAAA,EACrE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA,aAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EAER,YAAY,QAAyB;AACnC,SAAK,UAAU,QAAQ,YAAY,iBAAgB;AACnD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ;AAC7B,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAC1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAElD,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK,cAAc,KAAK;AAAA,MAC/B,OAAO;AAAA,MACP,mBAAmB;AAAA,MACnB,OAAO;AAAA,IACT;AAKA,QAAI,eAAe;AACjB,WAAK,iBAAiB;AAAA,IACxB;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,gBAAgB,KAAK;AAAA,IAC5B;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,YAAY,KAAK;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,MAC9D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AAGtD,UAAI,SAAS,WAAW,OAAO,KAAK,YAAY;AAC9C,aAAK,aAAa;AAAA,MACpB;AACA,YAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC7F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,gBAAgB,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,SAAS;AAClE,UAAM,OAAO,eAAe,WAAW;AACvC,WAAO,EAAE,MAAM,OAAO,KAAK,kBAAkB;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,mBAAmB,IAAI;AAAA,MACpE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,2BAA2B,SAAS,MAAM,EAAE;AAAA,IAC9D;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,YAAY,KAAK,KAAK,CAAC,EAAE;AAC/B,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,aAAa,eAAwB,YAAqC;AAE9E,UAAM,YAAY,MAAM,KAAK,mBAAmB;AAEhD,QAAI,UAAU,SAAS,GAAG;AAIxB,WAAK,aAAa,UAAU,CAAC,EAAE;AAC/B;AAAA,IACF;AAIA,UAAM,MAAM,iBAAiB,KAAK;AAClC,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA;AAAA,MAEZ,iBAAiB;AAAA,IACnB;AACA,QAAI,KAAK;AACP,WAAK,iBAAiB;AAAA,IACxB;AACA,QAAI,YAAY;AACd,WAAK,0BAA0B;AAAA,IACjC;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,MACrE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,sBAAsB;AAAA,IACpD,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,gCAAgC,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC9F;AAEA,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,KAAM,WAAW,eAAe,WAAW,MAAM,WAAW;AAClE,QAAI,IAAI;AACN,WAAK,aAAa;AAAA,IACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,qBAAsD;AAClE,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,QACvE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,UAAI,CAAC,SAAS,GAAI,QAAO,CAAC;AAE1B,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,QAAQ,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,QAAQ,KAAK,KAAK;AAC1D,aAAO,OAAO,oBAAoB,CAAC;AAAA,IACrC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAClC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;","names":[]}
|
|
@@ -2,10 +2,10 @@ import { createRequire as __cr } from 'node:module'; const require = __cr(import
|
|
|
2
2
|
import {
|
|
3
3
|
LmStudioBackend,
|
|
4
4
|
OllamaBackend
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-25FY74AP.js";
|
|
6
6
|
import {
|
|
7
7
|
AgentRegistry
|
|
8
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-RNWALAFP.js";
|
|
9
9
|
|
|
10
10
|
// src/cli/shared.ts
|
|
11
11
|
import fs from "fs";
|
|
@@ -83,4 +83,4 @@ export {
|
|
|
83
83
|
VAULT_GITIGNORE,
|
|
84
84
|
configureVaultEnv
|
|
85
85
|
};
|
|
86
|
-
//# sourceMappingURL=chunk-
|
|
86
|
+
//# sourceMappingURL=chunk-4WL5X7VS.js.map
|