@goondocks/myco 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/dist/{chunk-QGJ2ZIUZ.js → chunk-25FY74AP.js} +56 -22
  4. package/dist/chunk-25FY74AP.js.map +1 -0
  5. package/dist/{chunk-2YBUL3IL.js → chunk-4WL5X7VS.js} +3 -3
  6. package/dist/{chunk-24DOZEUJ.js → chunk-ALBVNGCF.js} +591 -27
  7. package/dist/chunk-ALBVNGCF.js.map +1 -0
  8. package/dist/{chunk-E7OBRBCQ.js → chunk-CK24O5YQ.js} +12 -2
  9. package/dist/chunk-CK24O5YQ.js.map +1 -0
  10. package/dist/{chunk-2GSX3BK2.js → chunk-CPVXNRGW.js} +4 -4
  11. package/dist/{chunk-L25U7PIG.js → chunk-CQ4RKK67.js} +2 -2
  12. package/dist/{chunk-GDYYJTTT.js → chunk-DBMHUMG3.js} +3 -3
  13. package/dist/{chunk-5FNZ7AMX.js → chunk-IWBWZQK6.js} +2 -2
  14. package/dist/{chunk-MQSYSQ6T.js → chunk-JSK7L46L.js} +11 -6
  15. package/dist/{chunk-MQSYSQ6T.js.map → chunk-JSK7L46L.js.map} +1 -1
  16. package/dist/{chunk-KUMVJIJW.js → chunk-LDKXXKF6.js} +6 -10
  17. package/dist/{chunk-KUMVJIJW.js.map → chunk-LDKXXKF6.js.map} +1 -1
  18. package/dist/{chunk-2ZBB3MQT.js → chunk-PQWQC3RF.js} +444 -21
  19. package/dist/chunk-PQWQC3RF.js.map +1 -0
  20. package/dist/{chunk-5QWZT4AB.js → chunk-RNWALAFP.js} +2 -2
  21. package/dist/{chunk-3EM23DMD.js → chunk-RXJHB7W4.js} +2 -2
  22. package/dist/{chunk-GNR3QAER.js → chunk-RY76WEN3.js} +2 -2
  23. package/dist/{chunk-6BSDCZ5Q.js → chunk-WBLTISAK.js} +8 -3
  24. package/dist/chunk-WBLTISAK.js.map +1 -0
  25. package/dist/{chunk-ZMYNRTTD.js → chunk-WU4PCNIK.js} +4 -3
  26. package/dist/chunk-WU4PCNIK.js.map +1 -0
  27. package/dist/{chunk-YTANWAGE.js → chunk-XNAM6Z4O.js} +2 -2
  28. package/dist/{chunk-P3WO3N3I.js → chunk-YG6MLLGL.js} +19 -3
  29. package/dist/{chunk-P3WO3N3I.js.map → chunk-YG6MLLGL.js.map} +1 -1
  30. package/dist/{cli-K7SUTP7A.js → cli-EGWAINIE.js} +20 -20
  31. package/dist/{client-YJMNTITQ.js → client-FDKJ4BY7.js} +5 -5
  32. package/dist/{config-G5GGT5A6.js → config-HDUFDOQN.js} +3 -3
  33. package/dist/{curate-6T5NKVXK.js → curate-OHIJFBYF.js} +10 -11
  34. package/dist/{curate-6T5NKVXK.js.map → curate-OHIJFBYF.js.map} +1 -1
  35. package/dist/{detect-providers-S3M5TAMW.js → detect-providers-4U3ZPW5G.js} +3 -3
  36. package/dist/{digest-O35VHYFP.js → digest-I2XYCK2M.js} +11 -13
  37. package/dist/{digest-O35VHYFP.js.map → digest-I2XYCK2M.js.map} +1 -1
  38. package/dist/{init-TFLSATB3.js → init-ZO2XQT6U.js} +8 -8
  39. package/dist/{main-JEUQS3BY.js → main-XZ6X4BUX.js} +177 -40
  40. package/dist/main-XZ6X4BUX.js.map +1 -0
  41. package/dist/{rebuild-7SH5GSNX.js → rebuild-NAH4EW5B.js} +10 -11
  42. package/dist/{rebuild-7SH5GSNX.js.map → rebuild-NAH4EW5B.js.map} +1 -1
  43. package/dist/reprocess-6FOP37XS.js +79 -0
  44. package/dist/reprocess-6FOP37XS.js.map +1 -0
  45. package/dist/{restart-NLJLB52D.js → restart-WSA4JSE3.js} +6 -6
  46. package/dist/{search-2BVRF54H.js → search-QXJQUB35.js} +6 -6
  47. package/dist/{server-4AMZNP4F.js → server-VXN3CJ4Y.js} +14 -18
  48. package/dist/{server-4AMZNP4F.js.map → server-VXN3CJ4Y.js.map} +1 -1
  49. package/dist/{session-start-AZAF3DTE.js → session-start-KQ4KCQMZ.js} +9 -9
  50. package/dist/setup-digest-QNCM3PNQ.js +15 -0
  51. package/dist/setup-llm-EAOIUSPJ.js +15 -0
  52. package/dist/src/cli.js +4 -4
  53. package/dist/src/daemon/main.js +4 -4
  54. package/dist/src/hooks/post-tool-use.js +5 -5
  55. package/dist/src/hooks/session-end.js +5 -5
  56. package/dist/src/hooks/session-start.js +4 -4
  57. package/dist/src/hooks/stop.js +7 -7
  58. package/dist/src/hooks/user-prompt-submit.js +5 -5
  59. package/dist/src/mcp/server.js +4 -4
  60. package/dist/src/prompts/consolidation.md +2 -0
  61. package/dist/src/prompts/digest-7500.md +68 -0
  62. package/dist/{stats-MKDIZFIQ.js → stats-43OESUEB.js} +6 -6
  63. package/dist/ui/assets/index-Bk4X_8-Z.css +1 -0
  64. package/dist/ui/assets/index-D3SY7ZHY.js +299 -0
  65. package/dist/ui/index.html +2 -2
  66. package/dist/{verify-7DW7LAND.js → verify-IIAHBAAU.js} +6 -6
  67. package/dist/{version-RQLD7VBP.js → version-NKOECSVH.js} +4 -4
  68. package/package.json +1 -1
  69. package/dist/chunk-24DOZEUJ.js.map +0 -1
  70. package/dist/chunk-2ZBB3MQT.js.map +0 -1
  71. package/dist/chunk-3JCXYLHD.js +0 -33
  72. package/dist/chunk-3JCXYLHD.js.map +0 -1
  73. package/dist/chunk-6BSDCZ5Q.js.map +0 -1
  74. package/dist/chunk-B5UZSHQV.js +0 -250
  75. package/dist/chunk-B5UZSHQV.js.map +0 -1
  76. package/dist/chunk-E7OBRBCQ.js.map +0 -1
  77. package/dist/chunk-KC7ENQTN.js +0 -436
  78. package/dist/chunk-KC7ENQTN.js.map +0 -1
  79. package/dist/chunk-QGJ2ZIUZ.js.map +0 -1
  80. package/dist/chunk-UVGAVYWZ.js +0 -157
  81. package/dist/chunk-UVGAVYWZ.js.map +0 -1
  82. package/dist/chunk-ZMYNRTTD.js.map +0 -1
  83. package/dist/main-JEUQS3BY.js.map +0 -1
  84. package/dist/reprocess-Q4YH2ZBK.js +0 -268
  85. package/dist/reprocess-Q4YH2ZBK.js.map +0 -1
  86. package/dist/setup-digest-YLZZGSSR.js +0 -15
  87. package/dist/setup-llm-JOXBSLXC.js +0 -15
  88. package/dist/ui/assets/index-D37IoDXS.css +0 -1
  89. package/dist/ui/assets/index-DA61Ial2.js +0 -289
  90. /package/dist/{chunk-2YBUL3IL.js.map → chunk-4WL5X7VS.js.map} +0 -0
  91. /package/dist/{chunk-2GSX3BK2.js.map → chunk-CPVXNRGW.js.map} +0 -0
  92. /package/dist/{chunk-L25U7PIG.js.map → chunk-CQ4RKK67.js.map} +0 -0
  93. /package/dist/{chunk-GDYYJTTT.js.map → chunk-DBMHUMG3.js.map} +0 -0
  94. /package/dist/{chunk-5FNZ7AMX.js.map → chunk-IWBWZQK6.js.map} +0 -0
  95. /package/dist/{chunk-5QWZT4AB.js.map → chunk-RNWALAFP.js.map} +0 -0
  96. /package/dist/{chunk-3EM23DMD.js.map → chunk-RXJHB7W4.js.map} +0 -0
  97. /package/dist/{chunk-GNR3QAER.js.map → chunk-RY76WEN3.js.map} +0 -0
  98. /package/dist/{chunk-YTANWAGE.js.map → chunk-XNAM6Z4O.js.map} +0 -0
  99. /package/dist/{cli-K7SUTP7A.js.map → cli-EGWAINIE.js.map} +0 -0
  100. /package/dist/{client-YJMNTITQ.js.map → client-FDKJ4BY7.js.map} +0 -0
  101. /package/dist/{config-G5GGT5A6.js.map → config-HDUFDOQN.js.map} +0 -0
  102. /package/dist/{detect-providers-S3M5TAMW.js.map → detect-providers-4U3ZPW5G.js.map} +0 -0
  103. /package/dist/{init-TFLSATB3.js.map → init-ZO2XQT6U.js.map} +0 -0
  104. /package/dist/{restart-NLJLB52D.js.map → restart-WSA4JSE3.js.map} +0 -0
  105. /package/dist/{search-2BVRF54H.js.map → search-QXJQUB35.js.map} +0 -0
  106. /package/dist/{session-start-AZAF3DTE.js.map → session-start-KQ4KCQMZ.js.map} +0 -0
  107. /package/dist/{setup-digest-YLZZGSSR.js.map → setup-digest-QNCM3PNQ.js.map} +0 -0
  108. /package/dist/{setup-llm-JOXBSLXC.js.map → setup-llm-EAOIUSPJ.js.map} +0 -0
  109. /package/dist/{stats-MKDIZFIQ.js.map → stats-43OESUEB.js.map} +0 -0
  110. /package/dist/{verify-7DW7LAND.js.map → verify-IIAHBAAU.js.map} +0 -0
  111. /package/dist/{version-RQLD7VBP.js.map → version-NKOECSVH.js.map} +0 -0
@@ -12,7 +12,7 @@
12
12
  "source": {
13
13
  "source": "npm",
14
14
  "package": "@goondocks/myco",
15
- "version": "0.6.0"
15
+ "version": "0.6.2"
16
16
  },
17
17
  "description": "Collective agent intelligence — captures session knowledge and serves it back via MCP",
18
18
  "license": "MIT",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myco",
3
- "version": "0.6.1",
3
+ "version": "0.6.3",
4
4
  "description": "Collective agent intelligence — captures session knowledge and serves it back to your team via MCP",
5
5
  "author": {
6
6
  "name": "goondocks-co",
@@ -3,7 +3,7 @@ import {
3
3
  DAEMON_CLIENT_TIMEOUT_MS,
4
4
  EMBEDDING_REQUEST_TIMEOUT_MS,
5
5
  LLM_REQUEST_TIMEOUT_MS
6
- } from "./chunk-6BSDCZ5Q.js";
6
+ } from "./chunk-WBLTISAK.js";
7
7
 
8
8
  // src/intelligence/ollama.ts
9
9
  var ENDPOINT_GENERATE = "/api/generate";
@@ -32,7 +32,7 @@ var OllamaBackend = class _OllamaBackend {
32
32
  const body = {
33
33
  model: this.model,
34
34
  prompt,
35
- stream: false,
35
+ stream: true,
36
36
  options
37
37
  };
38
38
  if (opts?.systemPrompt) {
@@ -54,8 +54,40 @@ var OllamaBackend = class _OllamaBackend {
54
54
  const errorBody = await response.text().catch(() => "");
55
55
  throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);
56
56
  }
57
- const data = await response.json();
58
- return { text: data.response, model: data.model };
57
+ return this.readStream(response);
58
+ }
59
+ /** Read an Ollama streaming response (newline-delimited JSON) and accumulate the result. */
60
+ async readStream(response) {
61
+ const reader = response.body.getReader();
62
+ const decoder = new TextDecoder();
63
+ let text = "";
64
+ let model = this.model;
65
+ let buffer = "";
66
+ try {
67
+ for (; ; ) {
68
+ const { done, value } = await reader.read();
69
+ if (done) break;
70
+ buffer += decoder.decode(value, { stream: true });
71
+ const lines = buffer.split("\n");
72
+ buffer = lines.pop() ?? "";
73
+ for (const line of lines) {
74
+ if (!line.trim()) continue;
75
+ const chunk = JSON.parse(line);
76
+ if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);
77
+ text += chunk.response ?? "";
78
+ if (chunk.model) model = chunk.model;
79
+ }
80
+ }
81
+ if (buffer.trim()) {
82
+ const chunk = JSON.parse(buffer);
83
+ if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);
84
+ text += chunk.response ?? "";
85
+ if (chunk.model) model = chunk.model;
86
+ }
87
+ } finally {
88
+ reader.releaseLock();
89
+ }
90
+ return { text, model };
59
91
  }
60
92
  async embed(text) {
61
93
  const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {
@@ -183,35 +215,37 @@ var LmStudioBackend = class _LmStudioBackend {
183
215
  return { embedding, model: data.model, dimensions: embedding.length };
184
216
  }
185
217
  /**
186
- * Ensure a model instance is loaded with the desired settings.
187
- * Called every digest cycle (not cached) so it recovers from idle TTL eviction.
218
+ * Ensure a model instance is loaded and capture its ID for routing.
219
+ * Called every digest cycle so it recovers from idle TTL eviction.
188
220
  *
189
- * The load API is necessary to control offload_kv_cache_to_gpu a load-time
190
- * setting that cannot be set per-request via the chat API.
221
+ * Strategy: reuse ANY loaded instance of this model. Only load a new one
222
+ * when zero instances exist. This avoids the previous bug where strict
223
+ * config matching (context_length, offload_kv_cache_to_gpu) caused new
224
+ * instances to spawn every cycle — exhausting system resources.
191
225
  *
192
- * Multi-daemon safe: finds or loads our own compatible instance without
193
- * touching instances from other daemons/projects. Routes by instance ID.
226
+ * context_length is set per-request on /api/v1/chat, so we don't need
227
+ * to match it at load time. Load-time-only params like
228
+ * offload_kv_cache_to_gpu are llama.cpp-specific and may not apply to
229
+ * all models (e.g., glm-4.7-flash has no KV cache setting).
194
230
  */
195
231
  async ensureLoaded(contextLength, gpuKvCache) {
196
- const ctx = contextLength ?? this.contextWindow;
197
- const kvCache = gpuKvCache ?? false;
198
232
  const instances = await this.getLoadedInstances();
199
- for (const instance of instances) {
200
- const matchesContext = !ctx || instance.config.context_length === ctx;
201
- const matchesKvCache = instance.config.offload_kv_cache_to_gpu === kvCache;
202
- if (matchesContext && matchesKvCache) {
203
- this.instanceId = instance.id;
204
- return;
205
- }
233
+ if (instances.length > 0) {
234
+ this.instanceId = instances[0].id;
235
+ return;
206
236
  }
237
+ const ctx = contextLength ?? this.contextWindow;
207
238
  const body = {
208
239
  model: this.model,
209
- flash_attention: true,
210
- offload_kv_cache_to_gpu: kvCache
240
+ // llama.cpp-specific — ignored by other engines (MLX, etc.)
241
+ flash_attention: true
211
242
  };
212
243
  if (ctx) {
213
244
  body.context_length = ctx;
214
245
  }
246
+ if (gpuKvCache) {
247
+ body.offload_kv_cache_to_gpu = true;
248
+ }
215
249
  const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {
216
250
  method: "POST",
217
251
  headers: { "Content-Type": "application/json" },
@@ -273,4 +307,4 @@ export {
273
307
  OllamaBackend,
274
308
  LmStudioBackend
275
309
  };
276
- //# sourceMappingURL=chunk-QGJ2ZIUZ.js.map
310
+ //# sourceMappingURL=chunk-25FY74AP.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/intelligence/ollama.ts","../src/intelligence/lm-studio.ts"],"sourcesContent":["import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface OllamaConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields (ignored, kept for backward compat during migration)\n embedding_model?: string;\n summary_model?: string;\n}\n\n// Ollama API endpoints\nconst ENDPOINT_GENERATE = '/api/generate';\nconst ENDPOINT_EMBED = '/api/embed';\nconst ENDPOINT_TAGS = '/api/tags';\n\nexport class OllamaBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:11434';\n readonly name = 'ollama';\n private baseUrl: string;\n private model: string;\n private defaultMaxTokens: number;\n private contextWindow: number | undefined;\n\n constructor(config?: OllamaConfig) {\n this.baseUrl = config?.base_url ?? OllamaBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n this.contextWindow = config?.context_window;\n }\n\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n\n // Send num_ctx from config or per-call override. Ollama reloads the model\n // on num_ctx changes, but consistent values (same num_ctx every call)\n // only cause one reload on first use. Without this, Ollama falls back to\n // its model default (often 2048), ignoring the user's configured context.\n const contextLength = opts?.contextLength ?? this.contextWindow;\n const options: Record<string, unknown> = { num_predict: maxTokens };\n if (contextLength) {\n options.num_ctx = contextLength;\n }\n\n const body: Record<string, unknown> = {\n model: this.model,\n prompt,\n stream: true,\n options,\n };\n\n // System prompt — sent as a separate field instead of concatenated into prompt\n if (opts?.systemPrompt) {\n body.system = opts.systemPrompt;\n }\n\n // Thinking control — false suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.think = opts.reasoning === 'off' ? false : opts.reasoning;\n }\n\n // Keep model loaded between requests (useful for digest cycles)\n if (opts?.keepAlive) {\n body.keep_alive = opts.keepAlive;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_GENERATE}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n return this.readStream(response);\n }\n\n /** Read an Ollama streaming response (newline-delimited JSON) and accumulate the result. */\n private async readStream(response: Response): Promise<LlmResponse> {\n const reader = response.body!.getReader();\n const decoder = new TextDecoder();\n let text = '';\n let model = this.model;\n let buffer = '';\n\n try {\n for (;;) {\n const { done, value } = await reader.read();\n if (done) break;\n\n buffer += decoder.decode(value, { stream: true });\n const lines = buffer.split('\\n');\n buffer = lines.pop() ?? '';\n\n for (const line of lines) {\n if (!line.trim()) continue;\n const chunk = JSON.parse(line) as { response?: string; model?: string; error?: string };\n if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);\n text += chunk.response ?? '';\n if (chunk.model) model = chunk.model;\n }\n }\n\n // Process remaining buffer\n if (buffer.trim()) {\n const chunk = JSON.parse(buffer) as { response?: string; model?: string; error?: string };\n if (chunk.error) throw new Error(`Ollama stream error: ${chunk.error}`);\n text += chunk.response ?? '';\n if (chunk.model) model = chunk.model;\n }\n } finally {\n reader.releaseLock();\n }\n\n return { text, model };\n }\n\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`Ollama embed failed: ${response.status} ${response.statusText}`);\n }\n\n const data = await response.json() as { embeddings: number[][]; model: string };\n const embedding = data.embeddings[0];\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this Ollama instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { models: Array<{ name: string }> };\n return data.models.map((m) => m.name);\n } catch {\n return [];\n }\n }\n}\n","import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface LmStudioConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields\n embedding_model?: string;\n summary_model?: string;\n}\n\n// LM Studio API endpoints\nconst ENDPOINT_CHAT = '/api/v1/chat';\nconst ENDPOINT_MODELS_LOAD = '/api/v1/models/load';\nconst ENDPOINT_MODELS_LIST = '/v1/models';\nconst ENDPOINT_MODELS_NATIVE = '/api/v1/models';\nconst ENDPOINT_EMBEDDINGS = '/v1/embeddings';\n\n/** Shape of a loaded instance from the LM Studio native models API.\n * Config fields vary by engine — llama.cpp models include flash_attention\n * and offload_kv_cache_to_gpu, but other engines (MLX, etc.) may omit them. */\ninterface NativeLoadedInstance {\n id: string;\n config: {\n context_length: number;\n flash_attention?: boolean;\n offload_kv_cache_to_gpu?: boolean;\n };\n}\n\n/** Shape of a model entry from the LM Studio native models API. */\ninterface NativeModelEntry {\n type: string;\n key: string;\n loaded_instances: NativeLoadedInstance[];\n}\n\nexport class LmStudioBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:1234';\n readonly name = 'lm-studio';\n private baseUrl: string;\n private model: string;\n private instanceId: string | null = null;\n private contextWindow: number | undefined;\n private defaultMaxTokens: number;\n\n constructor(config?: LmStudioConfig) {\n this.baseUrl = config?.base_url ?? LmStudioBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n /**\n * Generate text using LM Studio's native REST API (/api/v1/chat).\n * Routes to our specific instance by ID when available, with model name +\n * context_length as fallback. This ensures correct routing when multiple\n * daemons share the same LM Studio, and graceful degradation when our\n * instance is evicted by idle TTL.\n */\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n const contextLength = opts?.contextLength ?? this.contextWindow;\n\n const body: Record<string, unknown> = {\n model: this.instanceId ?? this.model,\n input: prompt,\n max_output_tokens: maxTokens,\n store: false,\n };\n\n // Always send context_length — even when routing by instance ID.\n // If our instance was evicted and LM Studio auto-loads, this ensures\n // the replacement gets the correct context window.\n if (contextLength) {\n body.context_length = contextLength;\n }\n\n // System prompt — sent separately from user content\n if (opts?.systemPrompt) {\n body.system_prompt = opts.systemPrompt;\n }\n\n // Reasoning control — 'off' suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.reasoning = opts.reasoning;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_CHAT}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n // If our instance was evicted, clear the ID so ensureLoaded\n // reloads on the next cycle instead of hitting a stale ID repeatedly\n if (response.status === 404 && this.instanceId) {\n this.instanceId = null;\n }\n throw new Error(`LM Studio summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as {\n model_instance_id: string;\n output: Array<{ type: string; content: string }>;\n };\n const messageOutput = data.output.find((o) => o.type === 'message');\n const text = messageOutput?.content ?? '';\n return { text, model: data.model_instance_id };\n }\n\n /**\n * Generate embeddings using LM Studio's OpenAI-compatible endpoint.\n * (The native API doesn't have an embedding endpoint — OpenAI-compat is fine here.)\n */\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBEDDINGS}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`LM Studio embed failed: ${response.status}`);\n }\n\n const data = await response.json() as {\n data: Array<{ embedding: number[] }>;\n model: string;\n };\n const embedding = data.data[0].embedding;\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n /**\n * Ensure a model instance is loaded and capture its ID for routing.\n * Called every digest cycle so it recovers from idle TTL eviction.\n *\n * Strategy: reuse ANY loaded instance of this model. Only load a new one\n * when zero instances exist. This avoids the previous bug where strict\n * config matching (context_length, offload_kv_cache_to_gpu) caused new\n * instances to spawn every cycle — exhausting system resources.\n *\n * context_length is set per-request on /api/v1/chat, so we don't need\n * to match it at load time. Load-time-only params like\n * offload_kv_cache_to_gpu are llama.cpp-specific and may not apply to\n * all models (e.g., glm-4.7-flash has no KV cache setting).\n */\n async ensureLoaded(contextLength?: number, gpuKvCache?: boolean): Promise<void> {\n // Query native API for existing loaded instances of this model\n const instances = await this.getLoadedInstances();\n\n if (instances.length > 0) {\n // Reuse the first available instance — don't reject over config differences.\n // context_length is set per-request; load-time params like kv_cache are\n // model-dependent and may not even appear in the instance config.\n this.instanceId = instances[0].id;\n return;\n }\n\n // No instances loaded — load one with our preferred settings.\n // These are hints; LM Studio silently ignores params that don't apply to the model's engine.\n const ctx = contextLength ?? this.contextWindow;\n const body: Record<string, unknown> = {\n model: this.model,\n // llama.cpp-specific — ignored by other engines (MLX, etc.)\n flash_attention: true,\n };\n if (ctx) {\n body.context_length = ctx;\n }\n if (gpuKvCache) {\n body.offload_kv_cache_to_gpu = true;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio model load failed: ${response.status} ${errorBody.slice(0, 200)}`);\n }\n\n const loadResult = await response.json() as Record<string, unknown>;\n const id = (loadResult.instance_id ?? loadResult.id ?? loadResult.model_instance_id) as string | undefined;\n if (id) {\n this.instanceId = id;\n }\n }\n\n /**\n * Query the LM Studio native API for loaded instances of this model.\n * Returns an empty array if the API is unavailable or the model has no loaded instances.\n */\n private async getLoadedInstances(): Promise<NativeLoadedInstance[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_NATIVE}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n if (!response.ok) return [];\n\n const data = await response.json() as { models: NativeModelEntry[] };\n const entry = data.models.find((m) => m.key === this.model);\n return entry?.loaded_instances ?? [];\n } catch {\n return [];\n }\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this LM Studio instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { data: Array<{ id: string }> };\n return data.data.map((m) => m.id);\n } catch {\n return [];\n }\n }\n}\n"],"mappings":";;;;;;;;AAcA,IAAM,oBAAoB;AAC1B,IAAM,iBAAiB;AACvB,IAAM,gBAAgB;AAEf,IAAM,gBAAN,MAAM,eAAwD;AAAA,EACnE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAAuB;AACjC,SAAK,UAAU,QAAQ,YAAY,eAAc;AACjD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,mBAAmB,QAAQ,cAAc;AAC9C,SAAK,gBAAgB,QAAQ;AAAA,EAC/B;AAAA,EAEA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAM1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAM,UAAmC,EAAE,aAAa,UAAU;AAClE,QAAI,eAAe;AACjB,cAAQ,UAAU;AAAA,IACpB;AAEA,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,MACR;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,SAAS,KAAK;AAAA,IACrB;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,QAAQ,KAAK,cAAc,QAAQ,QAAQ,KAAK;AAAA,IACvD;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,iBAAiB,IAAI;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,4BAA4B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC1F;AAEA,WAAO,KAAK,WAAW,QAAQ;AAAA,EACjC;AAAA;AAAA,EAGA,MAAc,WAAW,UAA0C;AACjE,UAAM,SAAS,SAAS,KAAM,UAAU;AACxC,UAAM,UAAU,IAAI,YAAY;AAChC,QAAI,OAAO;AACX,QAAI,QAAQ,KAAK;AACjB,QAAI,SAAS;AAEb,QAAI;AACF,iBAAS;AACP,cAAM,EAAE,MAAM,MAAM,IAAI,MAAM,OAAO,KAAK;AAC1C,YAAI,KAAM;AAEV,kBAAU,QAAQ,OAAO,OAAO,EAAE,QAAQ,KAAK,CAAC;AAChD,cAAM,QAAQ,OAAO,MAAM,IAAI;AAC/B,iBAAS,MAAM,IAAI,KAAK;AAExB,mBAAW,QAAQ,OAAO;AACxB,cAAI,CAAC,KAAK,KAAK,EAAG;AAClB,gBAAM,QAAQ,KAAK,MAAM,IAAI;AAC7B,cAAI,MAAM,MAAO,OAAM,IAAI,MAAM,wBAAwB,MAAM,KAAK,EAAE;AACtE,kBAAQ,MAAM,YAAY;AAC1B,cAAI,MAAM,MAAO,SAAQ,MAAM;AAAA,QACjC;AAAA,MACF;AAGA,UAAI,OAAO,KAAK,GAAG;AACjB,cAAM,QAAQ,KAAK,MAAM,MAAM;AAC/B,YAAI,MAAM,MAAO,OAAM,IAAI,MAAM,wBAAwB,MAAM,KAAK,EAAE;AACtE,gBAAQ,MAAM,YAAY;AAC1B,YAAI,MAAM,MAAO,SAAQ,MAAM;AAAA,MACjC;AAAA,IACF,UAAE;AACA,aAAO,YAAY;AAAA,IACrB;AAEA,WAAO,EAAE,MAAM,MAAM;AAAA,EACvB;AAAA,EAEA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,cAAc,IAAI;AAAA,MAC/D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IAClF;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAM,YAAY,KAAK,WAAW,CAAC;AACnC,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IACtC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;;;ACxJA,IAAM,gBAAgB;AACtB,IAAM,uBAAuB;AAC7B,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,sBAAsB;AAqBrB,IAAM,kBAAN,MAAM,iBAA0D;AAAA,EACrE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA,aAA4B;AAAA,EAC5B;AAAA,EACA;AAAA,EAER,YAAY,QAAyB;AACnC,SAAK,UAAU,QAAQ,YAAY,iBAAgB;AACnD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ;AAC7B,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EASA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAC1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAElD,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK,cAAc,KAAK;AAAA,MAC/B,OAAO;AAAA,MACP,mBAAmB;AAAA,MACnB,OAAO;AAAA,IACT;AAKA,QAAI,eAAe;AACjB,WAAK,iBAAiB;AAAA,IACxB;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,gBAAgB,KAAK;AAAA,IAC5B;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,YAAY,KAAK;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,MAC9D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AAGtD,UAAI,SAAS,WAAW,OAAO,KAAK,YAAY;AAC9C,aAAK,aAAa;AAAA,MACpB;AACA,YAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC7F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,gBAAgB,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,SAAS;AAClE,UAAM,OAAO,eAAe,WAAW;AACvC,WAAO,EAAE,MAAM,OAAO,KAAK,kBAAkB;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,mBAAmB,IAAI;AAAA,MACpE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,2BAA2B,SAAS,MAAM,EAAE;AAAA,IAC9D;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,YAAY,KAAK,KAAK,CAAC,EAAE;AAC/B,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAgBA,MAAM,aAAa,eAAwB,YAAqC;AAE9E,UAAM,YAAY,MAAM,KAAK,mBAAmB;AAEhD,QAAI,UAAU,SAAS,GAAG;AAIxB,WAAK,aAAa,UAAU,CAAC,EAAE;AAC/B;AAAA,IACF;AAIA,UAAM,MAAM,iBAAiB,KAAK;AAClC,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA;AAAA,MAEZ,iBAAiB;AAAA,IACnB;AACA,QAAI,KAAK;AACP,WAAK,iBAAiB;AAAA,IACxB;AACA,QAAI,YAAY;AACd,WAAK,0BAA0B;AAAA,IACjC;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,MACrE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,sBAAsB;AAAA,IACpD,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,gCAAgC,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC9F;AAEA,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,KAAM,WAAW,eAAe,WAAW,MAAM,WAAW;AAClE,QAAI,IAAI;AACN,WAAK,aAAa;AAAA,IACpB;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,qBAAsD;AAClE,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,QACvE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,UAAI,CAAC,SAAS,GAAI,QAAO,CAAC;AAE1B,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,QAAQ,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,QAAQ,KAAK,KAAK;AAC1D,aAAO,OAAO,oBAAoB,CAAC;AAAA,IACrC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAClC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;","names":[]}
@@ -2,10 +2,10 @@ import { createRequire as __cr } from 'node:module'; const require = __cr(import
2
2
  import {
3
3
  LmStudioBackend,
4
4
  OllamaBackend
5
- } from "./chunk-QGJ2ZIUZ.js";
5
+ } from "./chunk-25FY74AP.js";
6
6
  import {
7
7
  AgentRegistry
8
- } from "./chunk-5QWZT4AB.js";
8
+ } from "./chunk-RNWALAFP.js";
9
9
 
10
10
  // src/cli/shared.ts
11
11
  import fs from "fs";
@@ -83,4 +83,4 @@ export {
83
83
  VAULT_GITIGNORE,
84
84
  configureVaultEnv
85
85
  };
86
- //# sourceMappingURL=chunk-2YBUL3IL.js.map
86
+ //# sourceMappingURL=chunk-4WL5X7VS.js.map