@goondocks/myco 0.4.3 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/dist/chunk-2AMAOSRF.js +105 -0
- package/dist/chunk-2AMAOSRF.js.map +1 -0
- package/dist/chunk-3F63SFZZ.js +381 -0
- package/dist/chunk-3F63SFZZ.js.map +1 -0
- package/dist/{chunk-WBT5DWGC.js → chunk-42R7KVAW.js} +2 -2
- package/dist/{chunk-GFBG73P4.js → chunk-5FIIK27E.js} +3 -3
- package/dist/{chunk-XCPQHC4X.js → chunk-6CAKKNGD.js} +2 -2
- package/dist/{chunk-I7PNZEBO.js → chunk-6LTNFMXO.js} +12 -1
- package/dist/{chunk-I7PNZEBO.js.map → chunk-6LTNFMXO.js.map} +1 -1
- package/dist/{chunk-V2OWD2VV.js → chunk-DKHYIA2V.js} +24 -146
- package/dist/chunk-DKHYIA2V.js.map +1 -0
- package/dist/{chunk-BNIYWCST.js → chunk-EQVQEFOA.js} +2 -2
- package/dist/{chunk-FPEDTLQ6.js → chunk-JJL6AMDA.js} +3 -101
- package/dist/chunk-JJL6AMDA.js.map +1 -0
- package/dist/{chunk-OUFSLZTX.js → chunk-KDWBZSOB.js} +21 -9
- package/dist/chunk-KDWBZSOB.js.map +1 -0
- package/dist/{chunk-67R6EMYD.js → chunk-OPO47BVS.js} +31 -52
- package/dist/chunk-OPO47BVS.js.map +1 -0
- package/dist/{chunk-IYFKPSRP.js → chunk-OSZRLHIJ.js} +3 -3
- package/dist/chunk-PD7LV22R.js +150 -0
- package/dist/chunk-PD7LV22R.js.map +1 -0
- package/dist/{chunk-JBD5KP5G.js → chunk-TDLQBGKA.js} +6 -2
- package/dist/chunk-TDLQBGKA.js.map +1 -0
- package/dist/{chunk-2GJFTIWX.js → chunk-TK2ZYIAL.js} +2 -2
- package/dist/{chunk-ZCBL5HER.js → chunk-XIIVIMFC.js} +2 -2
- package/dist/{cli-PMOFCZQL.js → cli-WOM4Z2Z4.js} +21 -18
- package/dist/cli-WOM4Z2Z4.js.map +1 -0
- package/dist/{client-5SUO2UYH.js → client-XCNF6NFT.js} +5 -5
- package/dist/{detect-providers-IRL2TTLK.js → detect-providers-CQSPTW2B.js} +3 -3
- package/dist/digest-WTS6S4XP.js +96 -0
- package/dist/digest-WTS6S4XP.js.map +1 -0
- package/dist/{init-NUF5UBUJ.js → init-VPLUEULI.js} +5 -5
- package/dist/{main-2XEBVUR6.js → main-OGXH6XWO.js} +230 -575
- package/dist/main-OGXH6XWO.js.map +1 -0
- package/dist/{rebuild-E6YFIRYZ.js → rebuild-Z4YUY6HT.js} +8 -7
- package/dist/{rebuild-E6YFIRYZ.js.map → rebuild-Z4YUY6HT.js.map} +1 -1
- package/dist/{reprocess-7G7KQWCN.js → reprocess-DMGPZTLC.js} +91 -20
- package/dist/reprocess-DMGPZTLC.js.map +1 -0
- package/dist/{restart-ABW4ZK3P.js → restart-QCQQ55KX.js} +6 -6
- package/dist/{search-MPD7SFK6.js → search-ACEFQOUW.js} +6 -6
- package/dist/{server-NZLZRITH.js → server-BQ3DWKZ6.js} +16 -14
- package/dist/{server-NZLZRITH.js.map → server-BQ3DWKZ6.js.map} +1 -1
- package/dist/{session-start-YB4A4PZB.js → session-start-BXRTKS4X.js} +6 -6
- package/dist/{setup-digest-K732MGOJ.js → setup-digest-EJXSQGZ5.js} +5 -5
- package/dist/{setup-llm-XCCH5LYD.js → setup-llm-P3MLWUDR.js} +5 -5
- package/dist/src/cli.js +4 -4
- package/dist/src/daemon/main.js +4 -4
- package/dist/src/hooks/post-tool-use.js +5 -5
- package/dist/src/hooks/session-end.js +5 -5
- package/dist/src/hooks/session-start.js +4 -4
- package/dist/src/hooks/stop.js +6 -6
- package/dist/src/hooks/stop.js.map +1 -1
- package/dist/src/hooks/user-prompt-submit.js +5 -5
- package/dist/src/mcp/server.js +4 -4
- package/dist/src/prompts/extraction.md +1 -1
- package/dist/src/prompts/summary.md +1 -11
- package/dist/{stats-6G7SN5YZ.js → stats-3FAP5FKV.js} +5 -5
- package/dist/{verify-JFHQH55Z.js → verify-3FTCOULE.js} +4 -4
- package/dist/{version-5B2TWXQJ.js → version-AL67JH7X.js} +4 -4
- package/package.json +1 -1
- package/skills/setup/SKILL.md +56 -28
- package/skills/setup/references/model-recommendations.md +49 -43
- package/dist/chunk-67R6EMYD.js.map +0 -1
- package/dist/chunk-FPEDTLQ6.js.map +0 -1
- package/dist/chunk-JBD5KP5G.js.map +0 -1
- package/dist/chunk-OUFSLZTX.js.map +0 -1
- package/dist/chunk-V2OWD2VV.js.map +0 -1
- package/dist/cli-PMOFCZQL.js.map +0 -1
- package/dist/main-2XEBVUR6.js.map +0 -1
- package/dist/reprocess-7G7KQWCN.js.map +0 -1
- /package/dist/{chunk-WBT5DWGC.js.map → chunk-42R7KVAW.js.map} +0 -0
- /package/dist/{chunk-GFBG73P4.js.map → chunk-5FIIK27E.js.map} +0 -0
- /package/dist/{chunk-XCPQHC4X.js.map → chunk-6CAKKNGD.js.map} +0 -0
- /package/dist/{chunk-BNIYWCST.js.map → chunk-EQVQEFOA.js.map} +0 -0
- /package/dist/{chunk-IYFKPSRP.js.map → chunk-OSZRLHIJ.js.map} +0 -0
- /package/dist/{chunk-2GJFTIWX.js.map → chunk-TK2ZYIAL.js.map} +0 -0
- /package/dist/{chunk-ZCBL5HER.js.map → chunk-XIIVIMFC.js.map} +0 -0
- /package/dist/{client-5SUO2UYH.js.map → client-XCNF6NFT.js.map} +0 -0
- /package/dist/{detect-providers-IRL2TTLK.js.map → detect-providers-CQSPTW2B.js.map} +0 -0
- /package/dist/{init-NUF5UBUJ.js.map → init-VPLUEULI.js.map} +0 -0
- /package/dist/{restart-ABW4ZK3P.js.map → restart-QCQQ55KX.js.map} +0 -0
- /package/dist/{search-MPD7SFK6.js.map → search-ACEFQOUW.js.map} +0 -0
- /package/dist/{session-start-YB4A4PZB.js.map → session-start-BXRTKS4X.js.map} +0 -0
- /package/dist/{setup-digest-K732MGOJ.js.map → setup-digest-EJXSQGZ5.js.map} +0 -0
- /package/dist/{setup-llm-XCCH5LYD.js.map → setup-llm-P3MLWUDR.js.map} +0 -0
- /package/dist/{stats-6G7SN5YZ.js.map → stats-3FAP5FKV.js.map} +0 -0
- /package/dist/{verify-JFHQH55Z.js.map → verify-3FTCOULE.js.map} +0 -0
- /package/dist/{version-5B2TWXQJ.js.map → version-AL67JH7X.js.map} +0 -0
|
@@ -1,60 +1,79 @@
|
|
|
1
1
|
# Model Recommendations
|
|
2
2
|
|
|
3
|
-
Hardware-based guidance for choosing
|
|
3
|
+
Hardware-based guidance for choosing models during Myco setup. Myco uses three model tiers that load simultaneously in Ollama.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Three-Tier Architecture
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
| Tier | Purpose | Speed vs Quality |
|
|
8
|
+
|------|---------|-----------------|
|
|
9
|
+
| **Embedding** | Vector search, semantic similarity | Dedicated small model, always loaded |
|
|
10
|
+
| **Processor** | Extraction, summarization, titles, classification | Speed matters — fast model, 8K context |
|
|
11
|
+
| **Digest** | Synthesize vault knowledge into tiered extracts | Quality matters — large model, up to 65K context |
|
|
8
12
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
|
14
|
-
|
|
13
|
+
The processor and digest can be the same model on smaller machines. On larger machines, splitting them gives the best speed/quality balance — processor tasks complete in seconds instead of minutes.
|
|
14
|
+
|
|
15
|
+
## Recommended Configurations
|
|
16
|
+
|
|
17
|
+
| RAM | Processor Model | Digest Model | Digest Context | Inject Tier | Est. VRAM |
|
|
18
|
+
|-----|----------------|--------------|----------------|-------------|-----------|
|
|
19
|
+
| **64GB+** | `qwen3.5:latest` (~8B) | `qwen3.5:35b` (MoE) | 65536 | 3000 | ~35GB |
|
|
20
|
+
| **48GB** | `qwen3.5:latest` (~8B) | `qwen3.5:27b` | 32768 | 3000 | ~26GB |
|
|
21
|
+
| **32GB** | `qwen3.5:4b` | `qwen3.5:latest` (~8B) | 16384 | 1500 | ~11GB |
|
|
22
|
+
| **16GB** | `qwen3.5:4b` | `qwen3.5:4b` | 8192 | 1500 | ~6GB |
|
|
23
|
+
|
|
24
|
+
Embedding model (`bge-m3`, ~1.3GB) is included in all VRAM estimates.
|
|
25
|
+
|
|
26
|
+
When processor and digest use the same model (16GB tier), Ollama loads it once — no extra VRAM.
|
|
15
27
|
|
|
16
28
|
### Why Qwen 3.5?
|
|
17
29
|
|
|
18
30
|
Qwen 3.5 models offer strong instruction-following and synthesis quality on local hardware. The MoE variant (`35b`) runs efficiently on 64GB+ systems because only a subset of parameters activate per token. Any instruction-tuned model that handles JSON output works — prefer what the user already has loaded, but recommend Qwen 3.5 for new setups.
|
|
19
31
|
|
|
20
|
-
###
|
|
32
|
+
### Important: Reasoning Token Suppression
|
|
33
|
+
|
|
34
|
+
Qwen 3.5 models are reasoning models that generate `<think>` tokens before output. Myco automatically suppresses this via `reasoning: 'off'` on all LLM calls. No user configuration needed — this is handled in code via the `LLM_REASONING_MODE` constant.
|
|
35
|
+
|
|
36
|
+
### Ollama Performance Settings
|
|
37
|
+
|
|
38
|
+
Recommend users add these to their Ollama service configuration for best performance:
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
OLLAMA_FLASH_ATTENTION=1 # Required for KV cache quantization
|
|
42
|
+
OLLAMA_KV_CACHE_TYPE=q8_0 # Halves KV cache memory — makes large digest context affordable
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
These are system-wide Ollama settings (launchd plist on macOS, systemd on Linux), not Myco-controlled.
|
|
46
|
+
|
|
47
|
+
## Pulling Models
|
|
21
48
|
|
|
22
49
|
**Ollama:**
|
|
23
50
|
```bash
|
|
24
|
-
ollama pull qwen3.5 # pulls latest tag (~10B)
|
|
25
51
|
ollama pull qwen3.5:4b # 4B variant
|
|
26
|
-
ollama pull qwen3.5:
|
|
52
|
+
ollama pull qwen3.5:latest # latest variant (~8B)
|
|
27
53
|
ollama pull qwen3.5:35b # 35B MoE variant
|
|
54
|
+
ollama pull bge-m3 # embedding model
|
|
28
55
|
```
|
|
29
56
|
|
|
30
|
-
**LM Studio:** Search for `qwen3.5` in the model browser. Download the
|
|
57
|
+
**LM Studio:** Search for `qwen3.5` in the model browser. Download the variants matching the RAM tier above.
|
|
31
58
|
|
|
32
59
|
## Embedding Model
|
|
33
60
|
|
|
34
|
-
|
|
61
|
+
Separate from the intelligence models. Anthropic does not support embeddings — only Ollama and LM Studio provide embedding models.
|
|
35
62
|
|
|
36
|
-
Recommended
|
|
63
|
+
Recommended:
|
|
37
64
|
- `bge-m3` — strong multilingual embeddings, good default
|
|
38
65
|
- `nomic-embed-text` — lightweight alternative
|
|
39
66
|
|
|
40
|
-
**Ollama:**
|
|
41
|
-
```bash
|
|
42
|
-
ollama pull bge-m3
|
|
43
|
-
ollama pull nomic-embed-text
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
**LM Studio:** Filter the model list for names containing `text-embedding`. If none are available, search for and download an embedding model through the model browser.
|
|
47
|
-
|
|
48
67
|
## Inject Tier
|
|
49
68
|
|
|
50
|
-
Controls how much pre-computed context the agent receives at session start.
|
|
69
|
+
Controls how much pre-computed context the agent receives at session start. All tiers are available regardless of local hardware — the local LLM can generate any tier. The default should be based on the **coding agent's context window**, not the local model.
|
|
51
70
|
|
|
52
|
-
|
|
|
53
|
-
|
|
54
|
-
| **
|
|
55
|
-
| **
|
|
56
|
-
| **
|
|
57
|
-
| **
|
|
71
|
+
| Agent Context Window | Default Tier | Rationale |
|
|
72
|
+
|---------------------|-------------|-----------|
|
|
73
|
+
| **1M+** (Opus 4.6) | 10000 | Rich context is cheap relative to the window |
|
|
74
|
+
| **200K** (Sonnet 4.6, Gemini) | 5000 | Good depth without crowding the agent's context |
|
|
75
|
+
| **128K** (GPT-4o, smaller models) | 3000 | Balanced — enough for key decisions and recent activity |
|
|
76
|
+
| **32K or less** | 1500 | Executive briefing only — preserve context for the task |
|
|
58
77
|
|
|
59
78
|
### Tier Descriptions
|
|
60
79
|
|
|
@@ -62,16 +81,3 @@ Controls how much pre-computed context the agent receives at session start. Agen
|
|
|
62
81
|
- **3000** — team standup (recommended for most setups)
|
|
63
82
|
- **5000** — deep onboarding
|
|
64
83
|
- **10000** — institutional knowledge (richest, most context)
|
|
65
|
-
|
|
66
|
-
## Advanced: Separate Digestion Model
|
|
67
|
-
|
|
68
|
-
The guided setup configures one intelligence model for all tasks. Power users who want a separate, larger model specifically for digest can configure it via CLI:
|
|
69
|
-
|
|
70
|
-
```bash
|
|
71
|
-
node ${CLAUDE_PLUGIN_ROOT}/dist/src/cli.js setup-digest \
|
|
72
|
-
--provider lm-studio \
|
|
73
|
-
--model "qwen/qwen3.5-35b-a3b" \
|
|
74
|
-
--context-window 65536
|
|
75
|
-
```
|
|
76
|
-
|
|
77
|
-
This is not exposed in the guided setup to avoid resource exhaustion from running two large models simultaneously.
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/intelligence/ollama.ts","../src/intelligence/lm-studio.ts"],"sourcesContent":["import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { estimateTokens, LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface OllamaConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields (ignored, kept for backward compat during migration)\n embedding_model?: string;\n summary_model?: string;\n}\n\n// Ollama API endpoints\nconst ENDPOINT_GENERATE = '/api/generate';\nconst ENDPOINT_EMBED = '/api/embed';\nconst ENDPOINT_TAGS = '/api/tags';\n\nexport class OllamaBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:11434';\n readonly name = 'ollama';\n private baseUrl: string;\n private model: string;\n private contextWindow: number;\n private defaultMaxTokens: number;\n\n constructor(config?: OllamaConfig) {\n this.baseUrl = config?.base_url ?? OllamaBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window ?? 8192;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n const contextLength = opts?.contextLength ?? this.contextWindow;\n const promptTokens = estimateTokens(prompt);\n const numCtx = Math.max(promptTokens + maxTokens, contextLength);\n\n const body: Record<string, unknown> = {\n model: this.model,\n prompt,\n stream: false,\n options: {\n num_ctx: numCtx,\n num_predict: maxTokens,\n },\n };\n\n // System prompt — sent as a separate field instead of concatenated into prompt\n if (opts?.systemPrompt) {\n body.system = opts.systemPrompt;\n }\n\n // Thinking control — false suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.think = opts.reasoning === 'off' ? false : opts.reasoning;\n }\n\n // Keep model loaded between requests (useful for digest cycles)\n if (opts?.keepAlive) {\n body.keep_alive = opts.keepAlive;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_GENERATE}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`Ollama summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as { response: string; model: string };\n return { text: data.response, model: data.model };\n }\n\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBED}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`Ollama embed failed: ${response.status} ${response.statusText}`);\n }\n\n const data = await response.json() as { embeddings: number[][]; model: string };\n const embedding = data.embeddings[0];\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this Ollama instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_TAGS}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { models: Array<{ name: string }> };\n return data.models.map((m) => m.name);\n } catch {\n return [];\n }\n }\n}\n","import type { LlmProvider, EmbeddingProvider, LlmResponse, EmbeddingResponse, LlmRequestOptions } from './llm.js';\nimport { LLM_REQUEST_TIMEOUT_MS, EMBEDDING_REQUEST_TIMEOUT_MS, DAEMON_CLIENT_TIMEOUT_MS } from '../constants.js';\n\ninterface LmStudioConfig {\n model?: string;\n base_url?: string;\n context_window?: number;\n max_tokens?: number;\n // Legacy fields\n embedding_model?: string;\n summary_model?: string;\n}\n\n// LM Studio API endpoints\nconst ENDPOINT_CHAT = '/api/v1/chat';\nconst ENDPOINT_MODELS_LOAD = '/api/v1/models/load';\nconst ENDPOINT_MODELS_UNLOAD = '/api/v1/models/unload';\nconst ENDPOINT_MODELS_LIST = '/v1/models';\nconst ENDPOINT_MODELS_NATIVE = '/api/v1/models';\nconst ENDPOINT_EMBEDDINGS = '/v1/embeddings';\n\n/** Shape of a loaded instance from the LM Studio native models API. */\ninterface NativeLoadedInstance {\n id: string;\n config: {\n context_length: number;\n flash_attention: boolean;\n offload_kv_cache_to_gpu: boolean;\n };\n}\n\n/** Shape of a model entry from the LM Studio native models API. */\ninterface NativeModelEntry {\n type: string;\n key: string;\n loaded_instances: NativeLoadedInstance[];\n}\n\nexport class LmStudioBackend implements LlmProvider, EmbeddingProvider {\n static readonly DEFAULT_BASE_URL = 'http://localhost:1234';\n readonly name = 'lm-studio';\n private baseUrl: string;\n private model: string;\n private loadedInstanceId: string | null = null;\n private contextWindow: number | undefined;\n private defaultMaxTokens: number;\n\n constructor(config?: LmStudioConfig) {\n this.baseUrl = config?.base_url ?? LmStudioBackend.DEFAULT_BASE_URL;\n this.model = config?.model ?? config?.summary_model ?? 'llama3.2';\n this.contextWindow = config?.context_window;\n this.defaultMaxTokens = config?.max_tokens ?? 1024;\n }\n\n /**\n * Generate text using LM Studio's native REST API (/api/v1/chat).\n * Supports per-request context_length, reasoning control, and system_prompt.\n */\n async summarize(prompt: string, opts?: LlmRequestOptions): Promise<LlmResponse> {\n const maxTokens = opts?.maxTokens ?? this.defaultMaxTokens;\n\n const body: Record<string, unknown> = {\n model: this.loadedInstanceId ?? this.model,\n input: prompt,\n max_output_tokens: maxTokens,\n store: false,\n };\n\n // Only set context_length if we haven't pre-loaded the model\n // (pre-loaded models already have the correct context via ensureLoaded)\n if (!this.loadedInstanceId) {\n const contextLength = opts?.contextLength ?? this.contextWindow;\n if (contextLength) {\n body.context_length = contextLength;\n }\n }\n\n // System prompt — sent separately from user content\n if (opts?.systemPrompt) {\n body.system_prompt = opts.systemPrompt;\n }\n\n // Reasoning control — 'off' suppresses chain-of-thought for reasoning models\n if (opts?.reasoning) {\n body.reasoning = opts.reasoning;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_CHAT}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(opts?.timeoutMs ?? LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio summarize failed: ${response.status} ${errorBody.slice(0, 500)}`);\n }\n\n const data = await response.json() as {\n model_instance_id: string;\n output: Array<{ type: string; content: string }>;\n };\n const messageOutput = data.output.find((o) => o.type === 'message');\n const text = messageOutput?.content ?? '';\n return { text, model: data.model_instance_id };\n }\n\n /**\n * Generate embeddings using LM Studio's OpenAI-compatible endpoint.\n * (The native API doesn't have an embedding endpoint — OpenAI-compat is fine here.)\n */\n async embed(text: string): Promise<EmbeddingResponse> {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_EMBEDDINGS}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({\n model: this.model,\n input: text,\n }),\n signal: AbortSignal.timeout(EMBEDDING_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n throw new Error(`LM Studio embed failed: ${response.status}`);\n }\n\n const data = await response.json() as {\n data: Array<{ embedding: number[] }>;\n model: string;\n };\n const embedding = data.data[0].embedding;\n return { embedding, model: data.model, dimensions: embedding.length };\n }\n\n /**\n * Ensure a model instance is loaded with the desired settings.\n * First checks for an existing compatible instance to reuse (prevents\n * accumulation across daemon restarts), then loads a new one only if needed.\n * Unloads incompatible instances of the same model to prevent resource exhaustion.\n */\n async ensureLoaded(contextLength?: number, gpuKvCache?: boolean): Promise<void> {\n const ctx = contextLength ?? this.contextWindow;\n const kvCache = gpuKvCache ?? false;\n\n // Query native API for existing loaded instances of this model\n const instances = await this.getLoadedInstances();\n\n // Check for a compatible instance we can reuse\n for (const instance of instances) {\n const matchesContext = !ctx || instance.config.context_length === ctx;\n const matchesKvCache = instance.config.offload_kv_cache_to_gpu === kvCache;\n if (matchesContext && matchesKvCache) {\n this.loadedInstanceId = instance.id;\n // Unload any incompatible instances (best effort, don't block on failure)\n await this.unloadIncompatibleInstances(instances, ctx, kvCache);\n return;\n }\n }\n\n // Unload incompatible instances before loading to free resources\n await this.unloadIncompatibleInstances(instances, ctx, kvCache);\n\n // No compatible instance found — load a new one\n const body: Record<string, unknown> = {\n model: this.model,\n flash_attention: true,\n offload_kv_cache_to_gpu: kvCache,\n };\n if (ctx) {\n body.context_length = ctx;\n }\n\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify(body),\n signal: AbortSignal.timeout(LLM_REQUEST_TIMEOUT_MS),\n });\n\n if (!response.ok) {\n const errorBody = await response.text().catch(() => '');\n throw new Error(`LM Studio model load failed: ${response.status} ${errorBody.slice(0, 200)}`);\n }\n\n // Capture instance ID — LM Studio may return it under different field names\n const loadResult = await response.json() as Record<string, unknown>;\n const instanceId = (loadResult.id ?? loadResult.instance_id ?? loadResult.model_instance_id) as string | undefined;\n if (instanceId) {\n this.loadedInstanceId = instanceId;\n }\n }\n\n /**\n * Query the LM Studio native API for loaded instances of this model.\n * Returns an empty array if the API is unavailable or the model has no loaded instances.\n */\n private async getLoadedInstances(): Promise<NativeLoadedInstance[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_NATIVE}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n if (!response.ok) return [];\n\n const data = await response.json() as { models: NativeModelEntry[] };\n const entry = data.models.find((m) => m.key === this.model);\n return entry?.loaded_instances ?? [];\n } catch {\n return [];\n }\n }\n\n /**\n * Unload instances of this model that don't match the desired settings.\n * Best-effort — failures are silently ignored to avoid blocking the load path.\n */\n private async unloadIncompatibleInstances(\n instances: NativeLoadedInstance[],\n contextLength: number | undefined,\n gpuKvCache: boolean,\n ): Promise<void> {\n for (const instance of instances) {\n const matchesContext = !contextLength || instance.config.context_length === contextLength;\n const matchesKvCache = instance.config.offload_kv_cache_to_gpu === gpuKvCache;\n if (!matchesContext || !matchesKvCache) {\n try {\n await fetch(`${this.baseUrl}${ENDPOINT_MODELS_UNLOAD}`, {\n method: 'POST',\n headers: { 'Content-Type': 'application/json' },\n body: JSON.stringify({ model: instance.id }),\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n } catch {\n // Best effort — don't fail the load if cleanup fails\n }\n }\n }\n }\n\n async isAvailable(): Promise<boolean> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(DAEMON_CLIENT_TIMEOUT_MS),\n });\n return response.ok;\n } catch {\n return false;\n }\n }\n\n /** List available models on this LM Studio instance. */\n async listModels(timeoutMs?: number): Promise<string[]> {\n try {\n const response = await fetch(`${this.baseUrl}${ENDPOINT_MODELS_LIST}`, {\n signal: AbortSignal.timeout(timeoutMs ?? DAEMON_CLIENT_TIMEOUT_MS),\n });\n const data = await response.json() as { data: Array<{ id: string }> };\n return data.data.map((m) => m.id);\n } catch {\n return [];\n }\n }\n}\n"],"mappings":";;;;;;;;;AAcA,IAAM,oBAAoB;AAC1B,IAAM,iBAAiB;AACvB,IAAM,gBAAgB;AAEf,IAAM,gBAAN,MAAM,eAAwD;AAAA,EACnE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,QAAuB;AACjC,SAAK,UAAU,QAAQ,YAAY,eAAc;AACjD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ,kBAAkB;AAC/C,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA,EAEA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAC1C,UAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAM,eAAe,eAAe,MAAM;AAC1C,UAAM,SAAS,KAAK,IAAI,eAAe,WAAW,aAAa;AAE/D,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ;AAAA,MACA,QAAQ;AAAA,MACR,SAAS;AAAA,QACP,SAAS;AAAA,QACT,aAAa;AAAA,MACf;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,SAAS,KAAK;AAAA,IACrB;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,QAAQ,KAAK,cAAc,QAAQ,QAAQ,KAAK;AAAA,IACvD;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,aAAa,KAAK;AAAA,IACzB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,iBAAiB,IAAI;AAAA,MAClE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,4BAA4B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC1F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,WAAO,EAAE,MAAM,KAAK,UAAU,OAAO,KAAK,MAAM;AAAA,EAClD;AAAA,EAEA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,cAAc,IAAI;AAAA,MAC/D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IAClF;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AACjC,UAAM,YAAY,KAAK,WAAW,CAAC;AACnC,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,QAC9D,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,OAAO,IAAI,CAAC,MAAM,EAAE,IAAI;AAAA,IACtC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;;;AC7GA,IAAM,gBAAgB;AACtB,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,uBAAuB;AAC7B,IAAM,yBAAyB;AAC/B,IAAM,sBAAsB;AAmBrB,IAAM,kBAAN,MAAM,iBAA0D;AAAA,EACrE,OAAgB,mBAAmB;AAAA,EAC1B,OAAO;AAAA,EACR;AAAA,EACA;AAAA,EACA,mBAAkC;AAAA,EAClC;AAAA,EACA;AAAA,EAER,YAAY,QAAyB;AACnC,SAAK,UAAU,QAAQ,YAAY,iBAAgB;AACnD,SAAK,QAAQ,QAAQ,SAAS,QAAQ,iBAAiB;AACvD,SAAK,gBAAgB,QAAQ;AAC7B,SAAK,mBAAmB,QAAQ,cAAc;AAAA,EAChD;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,UAAU,QAAgB,MAAgD;AAC9E,UAAM,YAAY,MAAM,aAAa,KAAK;AAE1C,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK,oBAAoB,KAAK;AAAA,MACrC,OAAO;AAAA,MACP,mBAAmB;AAAA,MACnB,OAAO;AAAA,IACT;AAIA,QAAI,CAAC,KAAK,kBAAkB;AAC1B,YAAM,gBAAgB,MAAM,iBAAiB,KAAK;AAClD,UAAI,eAAe;AACjB,aAAK,iBAAiB;AAAA,MACxB;AAAA,IACF;AAGA,QAAI,MAAM,cAAc;AACtB,WAAK,gBAAgB,KAAK;AAAA,IAC5B;AAGA,QAAI,MAAM,WAAW;AACnB,WAAK,YAAY,KAAK;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,aAAa,IAAI;AAAA,MAC9D,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,MAAM,aAAa,sBAAsB;AAAA,IACvE,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,+BAA+B,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC7F;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,gBAAgB,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,SAAS,SAAS;AAClE,UAAM,OAAO,eAAe,WAAW;AACvC,WAAO,EAAE,MAAM,OAAO,KAAK,kBAAkB;AAAA,EAC/C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,MAA0C;AACpD,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,mBAAmB,IAAI;AAAA,MACpE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU;AAAA,QACnB,OAAO,KAAK;AAAA,QACZ,OAAO;AAAA,MACT,CAAC;AAAA,MACD,QAAQ,YAAY,QAAQ,4BAA4B;AAAA,IAC1D,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,2BAA2B,SAAS,MAAM,EAAE;AAAA,IAC9D;AAEA,UAAM,OAAO,MAAM,SAAS,KAAK;AAIjC,UAAM,YAAY,KAAK,KAAK,CAAC,EAAE;AAC/B,WAAO,EAAE,WAAW,OAAO,KAAK,OAAO,YAAY,UAAU,OAAO;AAAA,EACtE;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAM,aAAa,eAAwB,YAAqC;AAC9E,UAAM,MAAM,iBAAiB,KAAK;AAClC,UAAM,UAAU,cAAc;AAG9B,UAAM,YAAY,MAAM,KAAK,mBAAmB;AAGhD,eAAW,YAAY,WAAW;AAChC,YAAM,iBAAiB,CAAC,OAAO,SAAS,OAAO,mBAAmB;AAClE,YAAM,iBAAiB,SAAS,OAAO,4BAA4B;AACnE,UAAI,kBAAkB,gBAAgB;AACpC,aAAK,mBAAmB,SAAS;AAEjC,cAAM,KAAK,4BAA4B,WAAW,KAAK,OAAO;AAC9D;AAAA,MACF;AAAA,IACF;AAGA,UAAM,KAAK,4BAA4B,WAAW,KAAK,OAAO;AAG9D,UAAM,OAAgC;AAAA,MACpC,OAAO,KAAK;AAAA,MACZ,iBAAiB;AAAA,MACjB,yBAAyB;AAAA,IAC3B;AACA,QAAI,KAAK;AACP,WAAK,iBAAiB;AAAA,IACxB;AAEA,UAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,MACrE,QAAQ;AAAA,MACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,MAC9C,MAAM,KAAK,UAAU,IAAI;AAAA,MACzB,QAAQ,YAAY,QAAQ,sBAAsB;AAAA,IACpD,CAAC;AAED,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,YAAY,MAAM,SAAS,KAAK,EAAE,MAAM,MAAM,EAAE;AACtD,YAAM,IAAI,MAAM,gCAAgC,SAAS,MAAM,IAAI,UAAU,MAAM,GAAG,GAAG,CAAC,EAAE;AAAA,IAC9F;AAGA,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,aAAc,WAAW,MAAM,WAAW,eAAe,WAAW;AAC1E,QAAI,YAAY;AACd,WAAK,mBAAmB;AAAA,IAC1B;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,qBAAsD;AAClE,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,QACvE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,UAAI,CAAC,SAAS,GAAI,QAAO,CAAC;AAE1B,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,YAAM,QAAQ,KAAK,OAAO,KAAK,CAAC,MAAM,EAAE,QAAQ,KAAK,KAAK;AAC1D,aAAO,OAAO,oBAAoB,CAAC;AAAA,IACrC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAc,4BACZ,WACA,eACA,YACe;AACf,eAAW,YAAY,WAAW;AAChC,YAAM,iBAAiB,CAAC,iBAAiB,SAAS,OAAO,mBAAmB;AAC5E,YAAM,iBAAiB,SAAS,OAAO,4BAA4B;AACnE,UAAI,CAAC,kBAAkB,CAAC,gBAAgB;AACtC,YAAI;AACF,gBAAM,MAAM,GAAG,KAAK,OAAO,GAAG,sBAAsB,IAAI;AAAA,YACtD,QAAQ;AAAA,YACR,SAAS,EAAE,gBAAgB,mBAAmB;AAAA,YAC9C,MAAM,KAAK,UAAU,EAAE,OAAO,SAAS,GAAG,CAAC;AAAA,YAC3C,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,UACtD,CAAC;AAAA,QACH,QAAQ;AAAA,QAER;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAM,cAAgC;AACpC,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,wBAAwB;AAAA,MACtD,CAAC;AACD,aAAO,SAAS;AAAA,IAClB,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA,EAGA,MAAM,WAAW,WAAuC;AACtD,QAAI;AACF,YAAM,WAAW,MAAM,MAAM,GAAG,KAAK,OAAO,GAAG,oBAAoB,IAAI;AAAA,QACrE,QAAQ,YAAY,QAAQ,aAAa,wBAAwB;AAAA,MACnE,CAAC;AACD,YAAM,OAAO,MAAM,SAAS,KAAK;AACjC,aAAO,KAAK,KAAK,IAAI,CAAC,MAAM,EAAE,EAAE;AAAA,IAClC,QAAQ;AACN,aAAO,CAAC;AAAA,IACV;AAAA,EACF;AACF;","names":[]}
|