@goondocks/myco 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/.claude-plugin/marketplace.json +1 -1
  2. package/.claude-plugin/plugin.json +1 -1
  3. package/README.md +5 -1
  4. package/dist/chunk-2AMAOSRF.js +105 -0
  5. package/dist/chunk-2AMAOSRF.js.map +1 -0
  6. package/dist/{chunk-I7PNZEBO.js → chunk-6LTNFMXO.js} +12 -1
  7. package/dist/{chunk-I7PNZEBO.js.map → chunk-6LTNFMXO.js.map} +1 -1
  8. package/dist/{chunk-2GJFTIWX.js → chunk-7KQB22DP.js} +2 -2
  9. package/dist/{chunk-JBD5KP5G.js → chunk-B6WVNDA5.js} +14 -2
  10. package/dist/chunk-B6WVNDA5.js.map +1 -0
  11. package/dist/chunk-FIA5NTRH.js +159 -0
  12. package/dist/chunk-FIA5NTRH.js.map +1 -0
  13. package/dist/{chunk-GFBG73P4.js → chunk-FIRMTYFH.js} +3 -3
  14. package/dist/{chunk-XCPQHC4X.js → chunk-HJG7Z6SJ.js} +2 -2
  15. package/dist/chunk-HL2S5QZG.js +385 -0
  16. package/dist/chunk-HL2S5QZG.js.map +1 -0
  17. package/dist/{chunk-WBT5DWGC.js → chunk-IURC35BF.js} +2 -2
  18. package/dist/{chunk-67R6EMYD.js → chunk-JI6M2L2W.js} +31 -52
  19. package/dist/chunk-JI6M2L2W.js.map +1 -0
  20. package/dist/{chunk-FPEDTLQ6.js → chunk-JJL6AMDA.js} +3 -101
  21. package/dist/chunk-JJL6AMDA.js.map +1 -0
  22. package/dist/chunk-KYL67SKZ.js +150 -0
  23. package/dist/chunk-KYL67SKZ.js.map +1 -0
  24. package/dist/{chunk-ZCBL5HER.js → chunk-ND4VK6C7.js} +2 -2
  25. package/dist/{chunk-V2OWD2VV.js → chunk-R6LQT3U7.js} +24 -146
  26. package/dist/chunk-R6LQT3U7.js.map +1 -0
  27. package/dist/{chunk-IYFKPSRP.js → chunk-RCV2I4AI.js} +3 -3
  28. package/dist/{chunk-BNIYWCST.js → chunk-X6TKHO22.js} +2 -2
  29. package/dist/{chunk-OUFSLZTX.js → chunk-ZWUFTOG3.js} +21 -9
  30. package/dist/chunk-ZWUFTOG3.js.map +1 -0
  31. package/dist/{cli-PMOFCZQL.js → cli-BLYNNKGJ.js} +24 -18
  32. package/dist/cli-BLYNNKGJ.js.map +1 -0
  33. package/dist/{client-5SUO2UYH.js → client-5GB4WVXE.js} +5 -5
  34. package/dist/curate-S4HOYWXA.js +231 -0
  35. package/dist/curate-S4HOYWXA.js.map +1 -0
  36. package/dist/{detect-providers-IRL2TTLK.js → detect-providers-BIHYFK5M.js} +3 -3
  37. package/dist/digest-7NKYXM6G.js +96 -0
  38. package/dist/digest-7NKYXM6G.js.map +1 -0
  39. package/dist/{init-NUF5UBUJ.js → init-HPQ77WWF.js} +5 -5
  40. package/dist/{main-2XEBVUR6.js → main-NFQ4II75.js} +253 -576
  41. package/dist/main-NFQ4II75.js.map +1 -0
  42. package/dist/{rebuild-E6YFIRYZ.js → rebuild-KQ6G2GZM.js} +8 -7
  43. package/dist/{rebuild-E6YFIRYZ.js.map → rebuild-KQ6G2GZM.js.map} +1 -1
  44. package/dist/{reprocess-7G7KQWCN.js → reprocess-ZL4HKTSC.js} +95 -24
  45. package/dist/reprocess-ZL4HKTSC.js.map +1 -0
  46. package/dist/{restart-ABW4ZK3P.js → restart-FYW662DR.js} +6 -6
  47. package/dist/{search-MPD7SFK6.js → search-E5JQMTXV.js} +6 -6
  48. package/dist/{server-NZLZRITH.js → server-TV3D35HZ.js} +38 -15
  49. package/dist/{server-NZLZRITH.js.map → server-TV3D35HZ.js.map} +1 -1
  50. package/dist/{session-start-YB4A4PZB.js → session-start-5MFEOVQ5.js} +6 -6
  51. package/dist/{setup-digest-K732MGOJ.js → setup-digest-DZAFIBEF.js} +5 -5
  52. package/dist/{setup-llm-XCCH5LYD.js → setup-llm-4BZM33YT.js} +5 -5
  53. package/dist/src/cli.js +4 -4
  54. package/dist/src/daemon/main.js +4 -4
  55. package/dist/src/hooks/post-tool-use.js +5 -5
  56. package/dist/src/hooks/session-end.js +5 -5
  57. package/dist/src/hooks/session-start.js +4 -4
  58. package/dist/src/hooks/stop.js +6 -6
  59. package/dist/src/hooks/stop.js.map +1 -1
  60. package/dist/src/hooks/user-prompt-submit.js +5 -5
  61. package/dist/src/mcp/server.js +4 -4
  62. package/dist/src/prompts/extraction.md +1 -1
  63. package/dist/src/prompts/summary.md +1 -11
  64. package/dist/src/prompts/supersession.md +32 -0
  65. package/dist/{stats-6G7SN5YZ.js → stats-ZIIJ2GB3.js} +5 -5
  66. package/dist/{verify-JFHQH55Z.js → verify-RACBFT2P.js} +4 -4
  67. package/dist/{version-5B2TWXQJ.js → version-HJTVNPOO.js} +4 -4
  68. package/package.json +1 -1
  69. package/skills/setup/SKILL.md +56 -28
  70. package/skills/setup/references/model-recommendations.md +49 -43
  71. package/dist/chunk-67R6EMYD.js.map +0 -1
  72. package/dist/chunk-FPEDTLQ6.js.map +0 -1
  73. package/dist/chunk-JBD5KP5G.js.map +0 -1
  74. package/dist/chunk-OUFSLZTX.js.map +0 -1
  75. package/dist/chunk-V2OWD2VV.js.map +0 -1
  76. package/dist/cli-PMOFCZQL.js.map +0 -1
  77. package/dist/main-2XEBVUR6.js.map +0 -1
  78. package/dist/reprocess-7G7KQWCN.js.map +0 -1
  79. /package/dist/{chunk-2GJFTIWX.js.map → chunk-7KQB22DP.js.map} +0 -0
  80. /package/dist/{chunk-GFBG73P4.js.map → chunk-FIRMTYFH.js.map} +0 -0
  81. /package/dist/{chunk-XCPQHC4X.js.map → chunk-HJG7Z6SJ.js.map} +0 -0
  82. /package/dist/{chunk-WBT5DWGC.js.map → chunk-IURC35BF.js.map} +0 -0
  83. /package/dist/{chunk-ZCBL5HER.js.map → chunk-ND4VK6C7.js.map} +0 -0
  84. /package/dist/{chunk-IYFKPSRP.js.map → chunk-RCV2I4AI.js.map} +0 -0
  85. /package/dist/{chunk-BNIYWCST.js.map → chunk-X6TKHO22.js.map} +0 -0
  86. /package/dist/{client-5SUO2UYH.js.map → client-5GB4WVXE.js.map} +0 -0
  87. /package/dist/{detect-providers-IRL2TTLK.js.map → detect-providers-BIHYFK5M.js.map} +0 -0
  88. /package/dist/{init-NUF5UBUJ.js.map → init-HPQ77WWF.js.map} +0 -0
  89. /package/dist/{restart-ABW4ZK3P.js.map → restart-FYW662DR.js.map} +0 -0
  90. /package/dist/{search-MPD7SFK6.js.map → search-E5JQMTXV.js.map} +0 -0
  91. /package/dist/{session-start-YB4A4PZB.js.map → session-start-5MFEOVQ5.js.map} +0 -0
  92. /package/dist/{setup-digest-K732MGOJ.js.map → setup-digest-DZAFIBEF.js.map} +0 -0
  93. /package/dist/{setup-llm-XCCH5LYD.js.map → setup-llm-4BZM33YT.js.map} +0 -0
  94. /package/dist/{stats-6G7SN5YZ.js.map → stats-ZIIJ2GB3.js.map} +0 -0
  95. /package/dist/{verify-JFHQH55Z.js.map → verify-RACBFT2P.js.map} +0 -0
  96. /package/dist/{version-5B2TWXQJ.js.map → version-HJTVNPOO.js.map} +0 -0
@@ -1,60 +1,79 @@
1
1
  # Model Recommendations
2
2
 
3
- Hardware-based guidance for choosing intelligence and embedding models during Myco setup.
3
+ Hardware-based guidance for choosing models during Myco setup. Myco uses three model tiers that load simultaneously in Ollama.
4
4
 
5
- ## Intelligence Model (LLM)
5
+ ## Three-Tier Architecture
6
6
 
7
- One model handles all intelligence tasks hooks, extraction, summaries, and digest. Size for digestion, the most demanding task (largest context window). The same model runs at 8192 context for hooks and at the digest context window below for synthesis.
7
+ | Tier | Purpose | Speed vs Quality |
8
+ |------|---------|-----------------|
9
+ | **Embedding** | Vector search, semantic similarity | Dedicated small model, always loaded |
10
+ | **Processor** | Extraction, summarization, titles, classification | Speed matters — fast model, 8K context |
11
+ | **Digest** | Synthesize vault knowledge into tiered extracts | Quality matters — large model, up to 65K context |
8
12
 
9
- | RAM | Recommended Model | Digest Context Window |
10
- |-----|-------------------|-----------------------|
11
- | **64GB+** | `qwen3.5:35b` (MoE, recommended) | 65536 |
12
- | **32–64GB** | `qwen3.5:27b` | 32768 |
13
- | **16–32GB** | `qwen3.5:latest` (~10B) | 16384 |
14
- | **8–16GB** | `qwen3.5:4b` | 8192 |
13
+ The processor and digest can be the same model on smaller machines. On larger machines, splitting them gives the best speed/quality balance — processor tasks complete in seconds instead of minutes.
14
+
15
+ ## Recommended Configurations
16
+
17
+ | RAM | Processor Model | Digest Model | Digest Context | Inject Tier | Est. VRAM |
18
+ |-----|----------------|--------------|----------------|-------------|-----------|
19
+ | **64GB+** | `qwen3.5:latest` (~8B) | `qwen3.5:35b` (MoE) | 65536 | 3000 | ~35GB |
20
+ | **48GB** | `qwen3.5:latest` (~8B) | `qwen3.5:27b` | 32768 | 3000 | ~26GB |
21
+ | **32GB** | `qwen3.5:4b` | `qwen3.5:latest` (~8B) | 16384 | 1500 | ~11GB |
22
+ | **16GB** | `qwen3.5:4b` | `qwen3.5:4b` | 8192 | 1500 | ~6GB |
23
+
24
+ Embedding model (`bge-m3`, ~1.3GB) is included in all VRAM estimates.
25
+
26
+ When processor and digest use the same model (16GB tier), Ollama loads it once — no extra VRAM.
15
27
 
16
28
  ### Why Qwen 3.5?
17
29
 
18
30
  Qwen 3.5 models offer strong instruction-following and synthesis quality on local hardware. The MoE variant (`35b`) runs efficiently on 64GB+ systems because only a subset of parameters activate per token. Any instruction-tuned model that handles JSON output works — prefer what the user already has loaded, but recommend Qwen 3.5 for new setups.
19
31
 
20
- ### Pulling Models
32
+ ### Important: Reasoning Token Suppression
33
+
34
+ Qwen 3.5 models are reasoning models that generate `<think>` tokens before output. Myco automatically suppresses this via `reasoning: 'off'` on all LLM calls. No user configuration needed — this is handled in code via the `LLM_REASONING_MODE` constant.
35
+
36
+ ### Ollama Performance Settings
37
+
38
+ Recommend users add these to their Ollama service configuration for best performance:
39
+
40
+ ```
41
+ OLLAMA_FLASH_ATTENTION=1 # Required for KV cache quantization
42
+ OLLAMA_KV_CACHE_TYPE=q8_0 # Halves KV cache memory — makes large digest context affordable
43
+ ```
44
+
45
+ These are system-wide Ollama settings (launchd plist on macOS, systemd on Linux), not Myco-controlled.
46
+
47
+ ## Pulling Models
21
48
 
22
49
  **Ollama:**
23
50
  ```bash
24
- ollama pull qwen3.5 # pulls latest tag (~10B)
25
51
  ollama pull qwen3.5:4b # 4B variant
26
- ollama pull qwen3.5:27b # 27B variant
52
+ ollama pull qwen3.5:latest # latest variant (~8B)
27
53
  ollama pull qwen3.5:35b # 35B MoE variant
54
+ ollama pull bge-m3 # embedding model
28
55
  ```
29
56
 
30
- **LM Studio:** Search for `qwen3.5` in the model browser. Download the variant matching the RAM tier above.
57
+ **LM Studio:** Search for `qwen3.5` in the model browser. Download the variants matching the RAM tier above.
31
58
 
32
59
  ## Embedding Model
33
60
 
34
- Embedding models are separate from the intelligence model. Anthropic does not support embeddings — only Ollama and LM Studio provide embedding models.
61
+ Separate from the intelligence models. Anthropic does not support embeddings — only Ollama and LM Studio provide embedding models.
35
62
 
36
- Recommended embedding models:
63
+ Recommended:
37
64
  - `bge-m3` — strong multilingual embeddings, good default
38
65
  - `nomic-embed-text` — lightweight alternative
39
66
 
40
- **Ollama:**
41
- ```bash
42
- ollama pull bge-m3
43
- ollama pull nomic-embed-text
44
- ```
45
-
46
- **LM Studio:** Filter the model list for names containing `text-embedding`. If none are available, search for and download an embedding model through the model browser.
47
-
48
67
  ## Inject Tier
49
68
 
50
- Controls how much pre-computed context the agent receives at session start. Agents can always request a different tier on-demand via the `myco_context` MCP tool.
69
+ Controls how much pre-computed context the agent receives at session start. All tiers are available regardless of local hardware — the local LLM can generate any tier. The default should be based on the **coding agent's context window**, not the local model.
51
70
 
52
- | RAM | Available Tiers | Default |
53
- |-----|-----------------|---------|
54
- | **64GB+** | 1500, 3000, 5000, 10000 | 3000 |
55
- | **32–64GB** | 1500, 3000, 5000 | 3000 |
56
- | **16–32GB** | 1500, 3000 | 1500 |
57
- | **8–16GB** | 1500 | 1500 |
71
+ | Agent Context Window | Default Tier | Rationale |
72
+ |---------------------|-------------|-----------|
73
+ | **1M+** (Opus 4.6) | 10000 | Rich context is cheap relative to the window |
74
+ | **200K** (Sonnet 4.6, Gemini) | 5000 | Good depth without crowding the agent's context |
75
+ | **128K** (GPT-4o, smaller models) | 3000 | Balanced — enough for key decisions and recent activity |
76
+ | **32K or less** | 1500 | Executive briefing only — preserve context for the task |
58
77
 
59
78
  ### Tier Descriptions
60
79
 
@@ -62,16 +81,3 @@ Controls how much pre-computed context the agent receives at session start. Agen
62
81
  - **3000** — team standup (recommended for most setups)
63
82
  - **5000** — deep onboarding
64
83
  - **10000** — institutional knowledge (richest, most context)
65
-
66
- ## Advanced: Separate Digestion Model
67
-
68
- The guided setup configures one intelligence model for all tasks. Power users who want a separate, larger model specifically for digest can configure it via CLI:
69
-
70
- ```bash
71
- node ${CLAUDE_PLUGIN_ROOT}/dist/src/cli.js setup-digest \
72
- --provider lm-studio \
73
- --model "qwen/qwen3.5-35b-a3b" \
74
- --context-window 65536
75
- ```
76
-
77
- This is not exposed in the guided setup to avoid resource exhaustion from running two large models simultaneously.
@@ -1 +0,0 @@
1
- {"version":3,"sources":["../src/intelligence/ollama.ts","../src/intelligence/lm-studio.ts"],"sourcesContent":["import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { estimateTokens, LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface OllamaConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields (ignored, kept for backward compat during migration)\n embedding_model?: string;\n summary_model?: string;\n}\n\n// Ollama API endpoints\nconst ENDPOINT_GENERATE = '/api/generate';\nconst ENDPOINT_EMBED = '/api/embed';\nconst ENDPOINT_TAGS = '/api/tags';\n\nexport class OllamaBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:11434';\n readonly name = 'ollama';\n private baseUrl: string;\n private model: string;\n private contextWindow: number;\n private defaultMaxTokens: number;\n\n constructor(config?: OllamaConfig) {\n this.baseUrl = config?.base_url ?? OllamaBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window ?? 8192;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n const contextLength = opts?.contextLength ?? this.contextWindow;\n const promptTokens = estimateTokens(prompt);\n const numCtx = Math.max(promptTokens + maxTokens, contextLength);\n\n const body: Record<string, unknown> = {\n model: this.model,\n prompt,\n stream: false,\n options: {\n num_ctx: numCtx,\n num_predict: maxTokens,\n },\n };\n\n // System prompt — sent as a separate field instead of concatenated into prompt\n if (opts?.systemPrompt) {\n body.system = opts.systemPrompt;\n }\n\n // Thinking control — false suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.think = opts.reasoning === 'off' ? false : opts.reasoning;\n }\n\n // Keep model loaded between requests (useful for digest cycles)\n if (opts?.keepAlive) {\n body.keep_alive = opts.keepAlive;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_GENERATE}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as { response: string; model: string };\n return { text: data.response, model: data.model };\n }\n\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`Ollama embed failed: ${response.status} ${response.statusText}`);\n }\n\n const data = await response.json() as { embeddings: number[][]; model: string };\n const embedding = data.embeddings[0];\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this Ollama instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { models: Array<{ name: string }> };\n return data.models.map((m) => m.name);\n } catch {\n return [];\n }\n }\n}\n","import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface LmStudioConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields\n embedding_model?: string;\n summary_model?: string;\n}\n\n// LM Studio API endpoints\nconst ENDPOINT_CHAT = '/api/v1/chat';\nconst ENDPOINT_MODELS_LOAD = '/api/v1/models/load';\nconst ENDPOINT_MODELS_UNLOAD = '/api/v1/models/unload';\nconst ENDPOINT_MODELS_LIST = '/v1/models';\nconst ENDPOINT_MODELS_NATIVE = '/api/v1/models';\nconst ENDPOINT_EMBEDDINGS = '/v1/embeddings';\n\n/** Shape of a loaded instance from the LM Studio native models API. */\ninterface NativeLoadedInstance {\n id: string;\n config: {\n context_length: number;\n flash_attention: boolean;\n offload_kv_cache_to_gpu: boolean;\n };\n}\n\n/** Shape of a model entry from the LM Studio native models API. */\ninterface NativeModelEntry {\n type: string;\n key: string;\n loaded_instances: NativeLoadedInstance[];\n}\n\nexport class LmStudioBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:1234';\n readonly name = 'lm-studio';\n private baseUrl: string;\n private model: string;\n private loadedInstanceId: string | null = null;\n private contextWindow: number | undefined;\n private defaultMaxTokens: number;\n\n constructor(config?: LmStudioConfig) {\n this.baseUrl = config?.base_url ?? LmStudioBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n /**\n * Generate text using LM Studio's native REST API (/api/v1/chat).\n * Supports per-request context_length, reasoning control, and system_prompt.\n */\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n\n const body: Record<string, unknown> = {\n model: this.loadedInstanceId ?? this.model,\n input: prompt,\n max_output_tokens: maxTokens,\n store: false,\n };\n\n // Only set context_length if we haven't pre-loaded the model\n // (pre-loaded models already have the correct context via ensureLoaded)\n if (!this.loadedInstanceId) {\n const contextLength = opts?.contextLength ?? this.contextWindow;\n if (contextLength) {\n body.context_length = contextLength;\n }\n }\n\n // System prompt — sent separately from user content\n if (opts?.systemPrompt) {\n body.system_prompt = opts.systemPrompt;\n }\n\n // Reasoning control — 'off' suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.reasoning = opts.reasoning;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_CHAT}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as {\n model_instance_id: string;\n output: Array<{ type: string; content: string }>;\n };\n const messageOutput = data.output.find((o) => o.type === 'message');\n const text = messageOutput?.content ?? '';\n return { text, model: data.model_instance_id };\n }\n\n /**\n * Generate embeddings using LM Studio's OpenAI-compatible endpoint.\n * (The native API doesn't have an embedding endpoint — OpenAI-compat is fine here.)\n */\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBEDDINGS}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`LM Studio embed failed: ${response.status}`);\n }\n\n const data = await response.json() as {\n data: Array<{ embedding: number[] }>;\n model: string;\n };\n const embedding = data.data[0].embedding;\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n /**\n * Ensure a model instance is loaded with the desired settings.\n * First checks for an existing compatible instance to reuse (prevents\n * accumulation across daemon restarts), then loads a new one only if needed.\n * Unloads incompatible instances of the same model to prevent resource exhaustion.\n */\n async ensureLoaded(contextLength?: number, gpuKvCache?: boolean): Promise<void> {\n const ctx = contextLength ?? this.contextWindow;\n const kvCache = gpuKvCache ?? false;\n\n // Query native API for existing loaded instances of this model\n const instances = await this.getLoadedInstances();\n\n // Check for a compatible instance we can reuse\n for (const instance of instances) {\n const matchesContext = !ctx || instance.config.context_length === ctx;\n const matchesKvCache = instance.config.offload_kv_cache_to_gpu === kvCache;\n if (matchesContext && matchesKvCache) {\n this.loadedInstanceId = instance.id;\n // Unload any incompatible instances (best effort, don't block on failure)\n await this.unloadIncompatibleInstances(instances, ctx, kvCache);\n return;\n }\n }\n\n // Unload incompatible instances before loading to free resources\n await this.unloadIncompatibleInstances(instances, ctx, kvCache);\n\n // No compatible instance found — load a new one\n const body: Record<string, unknown> = {\n model: this.model,\n flash_attention: true,\n offload_kv_cache_to_gpu: kvCache,\n };\n if (ctx) {\n body.context_length = ctx;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio model load failed: ${response.status} ${errorBody.slice(0, 200)}`);\n }\n\n // Capture instance ID — LM Studio may return it under different field names\n const loadResult = await response.json() as Record<string, unknown>;\n const instanceId = (loadResult.id ?? loadResult.instance_id ?? loadResult.model_instance_id) as string | undefined;\n if (instanceId) {\n this.loadedInstanceId = instanceId;\n }\n }\n\n /**\n * Query the LM Studio native API for loaded instances of this model.\n * Returns an empty array if the API is unavailable or the model has no loaded instances.\n */\n private async getLoadedInstances(): Promise<NativeLoadedInstance[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_NATIVE}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n if (!response.ok) return [];\n\n const data = await response.json() as { models: NativeModelEntry[] };\n const entry = data.models.find((m) => m.key === this.model);\n return entry?.loaded_instances ?? [];\n } catch {\n return [];\n }\n }\n\n /**\n * Unload instances of this model that don't match the desired settings.\n * Best-effort — failures are silently ignored to avoid blocking the load path.\n */\n private async unloadIncompatibleInstances(\n instances: NativeLoadedInstance[],\n contextLength: number | undefined,\n gpuKvCache: boolean,\n ): Promise<void> {\n for (const instance of instances) {\n const matchesContext = !contextLength || instance.config.context_length === contextLength;\n const matchesKvCache = instance.config.offload_kv_cache_to_gpu === gpuKvCache;\n if (!matchesContext || !matchesKvCache) {\n try {\n await fetch(`${this.baseUrl}${ENDPOINT_MODELS_UNLOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({ model: instance.id }),\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n } catch {\n // Best effort — don't fail the load if cleanup fails\n }\n }\n }\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this LM Studio instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { data: Array<{ id: string }> };\n return data.data.map((m) => m.id);\n } catch {\n return [];\n }\n }\n}\n"],"mappings":";;;;;;;;;AAcA,IAAM,oBAAoB;AAC1B,IAAM,iBAAiB;AACvB,IAAM,gBAAgB;AAEf,IAAM,gBAAN,MAAM,eAAwD;AAAA,EACnE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAAuB;AACjC,SAAK,UAAU,QAAQ,YAAY,eAAc;AACjD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ,kBAAkB;AAC/C,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA,EAEA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAC1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAM,eAAe,eAAe,MAAM;AAC1C,UAAM,SAAS,KAAK,IAAI,eAAe,WAAW,aAAa;AAE/D,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,SAAS;AAAA,QACT,aAAa;AAAA,MACf;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,SAAS,KAAK;AAAA,IACrB;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,QAAQ,KAAK,cAAc,QAAQ,QAAQ,KAAK;AAAA,IACvD;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,iBAAiB,IAAI;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,4BAA4B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC1F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,EAAE,MAAM,KAAK,UAAU,OAAO,KAAK,MAAM;AAAA,EAClD;AAAA,EAEA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,cAAc,IAAI;AAAA,MAC/D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IAClF;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAM,YAAY,KAAK,WAAW,CAAC;AACnC,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IACtC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;;;AC7GA,IAAM,gBAAgB;AACtB,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,sBAAsB;AAmBrB,IAAM,kBAAN,MAAM,iBAA0D;AAAA,EACrE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA,mBAAkC;AAAA,EAClC;AAAA,EACA;AAAA,EAER,YAAY,QAAyB;AACnC,SAAK,UAAU,QAAQ,YAAY,iBAAgB;AACnD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ;AAC7B,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAE1C,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK,oBAAoB,KAAK;AAAA,MACrC,OAAO;AAAA,MACP,mBAAmB;AAAA,MACnB,OAAO;AAAA,IACT;AAIA,QAAI,CAAC,KAAK,kBAAkB;AAC1B,YAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAI,eAAe;AACjB,aAAK,iBAAiB;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,gBAAgB,KAAK;AAAA,IAC5B;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,YAAY,KAAK;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,MAC9D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC7F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,gBAAgB,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,SAAS;AAClE,UAAM,OAAO,eAAe,WAAW;AACvC,WAAO,EAAE,MAAM,OAAO,KAAK,kBAAkB;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,mBAAmB,IAAI;AAAA,MACpE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,2BAA2B,SAAS,MAAM,EAAE;AAAA,IAC9D;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,YAAY,KAAK,KAAK,CAAC,EAAE;AAC/B,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,aAAa,eAAwB,YAAqC;AAC9E,UAAM,MAAM,iBAAiB,KAAK;AAClC,UAAM,UAAU,cAAc;AAG9B,UAAM,YAAY,MAAM,KAAK,mBAAmB;AAGhD,eAAW,YAAY,WAAW;AAChC,YAAM,iBAAiB,CAAC,OAAO,SAAS,OAAO,mBAAmB;AAClE,YAAM,iBAAiB,SAAS,OAAO,4BAA4B;AACnE,UAAI,kBAAkB,gBAAgB;AACpC,aAAK,mBAAmB,SAAS;AAEjC,cAAM,KAAK,4BAA4B,WAAW,KAAK,OAAO;AAC9D;AAAA,MACF;AAAA,IACF;AAGA,UAAM,KAAK,4BAA4B,WAAW,KAAK,OAAO;AAG9D,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ,iBAAiB;AAAA,MACjB,yBAAyB;AAAA,IAC3B;AACA,QAAI,KAAK;AACP,WAAK,iBAAiB;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,MACrE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,sBAAsB;AAAA,IACpD,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,gCAAgC,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC9F;AAGA,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,aAAc,WAAW,MAAM,WAAW,eAAe,WAAW;AAC1E,QAAI,YAAY;AACd,WAAK,mBAAmB;AAAA,IAC1B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,qBAAsD;AAClE,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,QACvE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,UAAI,CAAC,SAAS,GAAI,QAAO,CAAC;AAE1B,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,QAAQ,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,QAAQ,KAAK,KAAK;AAC1D,aAAO,OAAO,oBAAoB,CAAC;AAAA,IACrC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,4BACZ,WACA,eACA,YACe;AACf,eAAW,YAAY,WAAW;AAChC,YAAM,iBAAiB,CAAC,iBAAiB,SAAS,OAAO,mBAAmB;AAC5E,YAAM,iBAAiB,SAAS,OAAO,4BAA4B;AACnE,UAAI,CAAC,kBAAkB,CAAC,gBAAgB;AACtC,YAAI;AACF,gBAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,YACtD,QAAQ;AAAA,YACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,YAC9C,MAAM,KAAK,UAAU,EAAE,OAAO,SAAS,GAAG,CAAC;AAAA,YAC3C,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,UACtD,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAClC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;","names":[]}